xref: /illumos-gate/usr/src/uts/common/io/scsi/targets/sd.c (revision d88e498a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * SCSI disk target driver.
29  */
30 #include <sys/scsi/scsi.h>
31 #include <sys/dkbad.h>
32 #include <sys/dklabel.h>
33 #include <sys/dkio.h>
34 #include <sys/fdio.h>
35 #include <sys/cdio.h>
36 #include <sys/mhd.h>
37 #include <sys/vtoc.h>
38 #include <sys/dktp/fdisk.h>
39 #include <sys/kstat.h>
40 #include <sys/vtrace.h>
41 #include <sys/note.h>
42 #include <sys/thread.h>
43 #include <sys/proc.h>
44 #include <sys/efi_partition.h>
45 #include <sys/var.h>
46 #include <sys/aio_req.h>
47 
48 #ifdef __lock_lint
49 #define	_LP64
50 #define	__amd64
51 #endif
52 
53 #if (defined(__fibre))
54 /* Note: is there a leadville version of the following? */
55 #include <sys/fc4/fcal_linkapp.h>
56 #endif
57 #include <sys/taskq.h>
58 #include <sys/uuid.h>
59 #include <sys/byteorder.h>
60 #include <sys/sdt.h>
61 
62 #include "sd_xbuf.h"
63 
64 #include <sys/scsi/targets/sddef.h>
65 #include <sys/cmlb.h>
66 #include <sys/sysevent/eventdefs.h>
67 #include <sys/sysevent/dev.h>
68 
69 #include <sys/fm/protocol.h>
70 
71 /*
72  * Loadable module info.
73  */
74 #if (defined(__fibre))
75 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver"
76 char _depends_on[]	= "misc/scsi misc/cmlb drv/fcp";
77 #else /* !__fibre */
78 #define	SD_MODULE_NAME	"SCSI Disk Driver"
79 char _depends_on[]	= "misc/scsi misc/cmlb";
80 #endif /* !__fibre */
81 
82 /*
83  * Define the interconnect type, to allow the driver to distinguish
84  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
85  *
86  * This is really for backward compatibility. In the future, the driver
87  * should actually check the "interconnect-type" property as reported by
88  * the HBA; however at present this property is not defined by all HBAs,
89  * so we will use this #define (1) to permit the driver to run in
90  * backward-compatibility mode; and (2) to print a notification message
91  * if an FC HBA does not support the "interconnect-type" property.  The
92  * behavior of the driver will be to assume parallel SCSI behaviors unless
93  * the "interconnect-type" property is defined by the HBA **AND** has a
94  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
95  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
96  * Channel behaviors (as per the old ssd).  (Note that the
97  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
98  * will result in the driver assuming parallel SCSI behaviors.)
99  *
100  * (see common/sys/scsi/impl/services.h)
101  *
102  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
103  * since some FC HBAs may already support that, and there is some code in
104  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
105  * default would confuse that code, and besides things should work fine
106  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
107  * "interconnect_type" property.
108  *
109  */
110 #if (defined(__fibre))
111 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
112 #else
113 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
114 #endif
115 
116 /*
117  * The name of the driver, established from the module name in _init.
118  */
119 static	char *sd_label			= NULL;
120 
121 /*
122  * Driver name is unfortunately prefixed on some driver.conf properties.
123  */
124 #if (defined(__fibre))
125 #define	sd_max_xfer_size		ssd_max_xfer_size
126 #define	sd_config_list			ssd_config_list
127 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
128 static	char *sd_config_list		= "ssd-config-list";
129 #else
130 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
131 static	char *sd_config_list		= "sd-config-list";
132 #endif
133 
134 /*
135  * Driver global variables
136  */
137 
138 #if (defined(__fibre))
139 /*
140  * These #defines are to avoid namespace collisions that occur because this
141  * code is currently used to compile two separate driver modules: sd and ssd.
142  * All global variables need to be treated this way (even if declared static)
143  * in order to allow the debugger to resolve the names properly.
144  * It is anticipated that in the near future the ssd module will be obsoleted,
145  * at which time this namespace issue should go away.
146  */
147 #define	sd_state			ssd_state
148 #define	sd_io_time			ssd_io_time
149 #define	sd_failfast_enable		ssd_failfast_enable
150 #define	sd_ua_retry_count		ssd_ua_retry_count
151 #define	sd_report_pfa			ssd_report_pfa
152 #define	sd_max_throttle			ssd_max_throttle
153 #define	sd_min_throttle			ssd_min_throttle
154 #define	sd_rot_delay			ssd_rot_delay
155 
156 #define	sd_retry_on_reservation_conflict	\
157 					ssd_retry_on_reservation_conflict
158 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
159 #define	sd_resv_conflict_name		ssd_resv_conflict_name
160 
161 #define	sd_component_mask		ssd_component_mask
162 #define	sd_level_mask			ssd_level_mask
163 #define	sd_debug_un			ssd_debug_un
164 #define	sd_error_level			ssd_error_level
165 
166 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
167 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
168 
169 #define	sd_tr				ssd_tr
170 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
171 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
172 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
173 #define	sd_check_media_time		ssd_check_media_time
174 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
175 #define	sd_label_mutex			ssd_label_mutex
176 #define	sd_detach_mutex			ssd_detach_mutex
177 #define	sd_log_buf			ssd_log_buf
178 #define	sd_log_mutex			ssd_log_mutex
179 
180 #define	sd_disk_table			ssd_disk_table
181 #define	sd_disk_table_size		ssd_disk_table_size
182 #define	sd_sense_mutex			ssd_sense_mutex
183 #define	sd_cdbtab			ssd_cdbtab
184 
185 #define	sd_cb_ops			ssd_cb_ops
186 #define	sd_ops				ssd_ops
187 #define	sd_additional_codes		ssd_additional_codes
188 #define	sd_tgops			ssd_tgops
189 
190 #define	sd_minor_data			ssd_minor_data
191 #define	sd_minor_data_efi		ssd_minor_data_efi
192 
193 #define	sd_tq				ssd_tq
194 #define	sd_wmr_tq			ssd_wmr_tq
195 #define	sd_taskq_name			ssd_taskq_name
196 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
197 #define	sd_taskq_minalloc		ssd_taskq_minalloc
198 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
199 
200 #define	sd_dump_format_string		ssd_dump_format_string
201 
202 #define	sd_iostart_chain		ssd_iostart_chain
203 #define	sd_iodone_chain			ssd_iodone_chain
204 
205 #define	sd_pm_idletime			ssd_pm_idletime
206 
207 #define	sd_force_pm_supported		ssd_force_pm_supported
208 
209 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
210 
211 #define	sd_ssc_init			ssd_ssc_init
212 #define	sd_ssc_send			ssd_ssc_send
213 #define	sd_ssc_fini			ssd_ssc_fini
214 #define	sd_ssc_assessment		ssd_ssc_assessment
215 #define	sd_ssc_post			ssd_ssc_post
216 #define	sd_ssc_print			ssd_ssc_print
217 #define	sd_ssc_ereport_post		ssd_ssc_ereport_post
218 #define	sd_ssc_set_info			ssd_ssc_set_info
219 #define	sd_ssc_extract_info		ssd_ssc_extract_info
220 
221 #endif
222 
223 #ifdef	SDDEBUG
224 int	sd_force_pm_supported		= 0;
225 #endif	/* SDDEBUG */
226 
227 void *sd_state				= NULL;
228 int sd_io_time				= SD_IO_TIME;
229 int sd_failfast_enable			= 1;
230 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
231 int sd_report_pfa			= 1;
232 int sd_max_throttle			= SD_MAX_THROTTLE;
233 int sd_min_throttle			= SD_MIN_THROTTLE;
234 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
235 int sd_qfull_throttle_enable		= TRUE;
236 
237 int sd_retry_on_reservation_conflict	= 1;
238 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
239 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
240 
241 static int sd_dtype_optical_bind	= -1;
242 
243 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
244 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
245 
246 /*
247  * Global data for debug logging. To enable debug printing, sd_component_mask
248  * and sd_level_mask should be set to the desired bit patterns as outlined in
249  * sddef.h.
250  */
251 uint_t	sd_component_mask		= 0x0;
252 uint_t	sd_level_mask			= 0x0;
253 struct	sd_lun *sd_debug_un		= NULL;
254 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
255 
256 /* Note: these may go away in the future... */
257 static uint32_t	sd_xbuf_active_limit	= 512;
258 static uint32_t sd_xbuf_reserve_limit	= 16;
259 
260 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
261 
262 /*
263  * Timer value used to reset the throttle after it has been reduced
264  * (typically in response to TRAN_BUSY or STATUS_QFULL)
265  */
266 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
267 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
268 
269 /*
270  * Interval value associated with the media change scsi watch.
271  */
272 static int sd_check_media_time		= 3000000;
273 
274 /*
275  * Wait value used for in progress operations during a DDI_SUSPEND
276  */
277 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
278 
279 /*
280  * sd_label_mutex protects a static buffer used in the disk label
281  * component of the driver
282  */
283 static kmutex_t sd_label_mutex;
284 
285 /*
286  * sd_detach_mutex protects un_layer_count, un_detach_count, and
287  * un_opens_in_progress in the sd_lun structure.
288  */
289 static kmutex_t sd_detach_mutex;
290 
291 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
292 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
293 
294 /*
295  * Global buffer and mutex for debug logging
296  */
297 static char	sd_log_buf[1024];
298 static kmutex_t	sd_log_mutex;
299 
300 /*
301  * Structs and globals for recording attached lun information.
302  * This maintains a chain. Each node in the chain represents a SCSI controller.
303  * The structure records the number of luns attached to each target connected
304  * with the controller.
305  * For parallel scsi device only.
306  */
307 struct sd_scsi_hba_tgt_lun {
308 	struct sd_scsi_hba_tgt_lun	*next;
309 	dev_info_t			*pdip;
310 	int				nlun[NTARGETS_WIDE];
311 };
312 
313 /*
314  * Flag to indicate the lun is attached or detached
315  */
316 #define	SD_SCSI_LUN_ATTACH	0
317 #define	SD_SCSI_LUN_DETACH	1
318 
319 static kmutex_t	sd_scsi_target_lun_mutex;
320 static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
321 
322 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
323     sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
324 
325 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
326     sd_scsi_target_lun_head))
327 
328 /*
329  * "Smart" Probe Caching structs, globals, #defines, etc.
330  * For parallel scsi and non-self-identify device only.
331  */
332 
333 /*
334  * The following resources and routines are implemented to support
335  * "smart" probing, which caches the scsi_probe() results in an array,
336  * in order to help avoid long probe times.
337  */
338 struct sd_scsi_probe_cache {
339 	struct	sd_scsi_probe_cache	*next;
340 	dev_info_t	*pdip;
341 	int		cache[NTARGETS_WIDE];
342 };
343 
344 static kmutex_t	sd_scsi_probe_cache_mutex;
345 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
346 
347 /*
348  * Really we only need protection on the head of the linked list, but
349  * better safe than sorry.
350  */
351 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
352     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
353 
354 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
355     sd_scsi_probe_cache_head))
356 
357 
358 /*
359  * Vendor specific data name property declarations
360  */
361 
362 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
363 
364 static sd_tunables seagate_properties = {
365 	SEAGATE_THROTTLE_VALUE,
366 	0,
367 	0,
368 	0,
369 	0,
370 	0,
371 	0,
372 	0,
373 	0
374 };
375 
376 
377 static sd_tunables fujitsu_properties = {
378 	FUJITSU_THROTTLE_VALUE,
379 	0,
380 	0,
381 	0,
382 	0,
383 	0,
384 	0,
385 	0,
386 	0
387 };
388 
389 static sd_tunables ibm_properties = {
390 	IBM_THROTTLE_VALUE,
391 	0,
392 	0,
393 	0,
394 	0,
395 	0,
396 	0,
397 	0,
398 	0
399 };
400 
401 static sd_tunables purple_properties = {
402 	PURPLE_THROTTLE_VALUE,
403 	0,
404 	0,
405 	PURPLE_BUSY_RETRIES,
406 	PURPLE_RESET_RETRY_COUNT,
407 	PURPLE_RESERVE_RELEASE_TIME,
408 	0,
409 	0,
410 	0
411 };
412 
413 static sd_tunables sve_properties = {
414 	SVE_THROTTLE_VALUE,
415 	0,
416 	0,
417 	SVE_BUSY_RETRIES,
418 	SVE_RESET_RETRY_COUNT,
419 	SVE_RESERVE_RELEASE_TIME,
420 	SVE_MIN_THROTTLE_VALUE,
421 	SVE_DISKSORT_DISABLED_FLAG,
422 	0
423 };
424 
425 static sd_tunables maserati_properties = {
426 	0,
427 	0,
428 	0,
429 	0,
430 	0,
431 	0,
432 	0,
433 	MASERATI_DISKSORT_DISABLED_FLAG,
434 	MASERATI_LUN_RESET_ENABLED_FLAG
435 };
436 
437 static sd_tunables pirus_properties = {
438 	PIRUS_THROTTLE_VALUE,
439 	0,
440 	PIRUS_NRR_COUNT,
441 	PIRUS_BUSY_RETRIES,
442 	PIRUS_RESET_RETRY_COUNT,
443 	0,
444 	PIRUS_MIN_THROTTLE_VALUE,
445 	PIRUS_DISKSORT_DISABLED_FLAG,
446 	PIRUS_LUN_RESET_ENABLED_FLAG
447 };
448 
449 #endif
450 
451 #if (defined(__sparc) && !defined(__fibre)) || \
452 	(defined(__i386) || defined(__amd64))
453 
454 
455 static sd_tunables elite_properties = {
456 	ELITE_THROTTLE_VALUE,
457 	0,
458 	0,
459 	0,
460 	0,
461 	0,
462 	0,
463 	0,
464 	0
465 };
466 
467 static sd_tunables st31200n_properties = {
468 	ST31200N_THROTTLE_VALUE,
469 	0,
470 	0,
471 	0,
472 	0,
473 	0,
474 	0,
475 	0,
476 	0
477 };
478 
479 #endif /* Fibre or not */
480 
481 static sd_tunables lsi_properties_scsi = {
482 	LSI_THROTTLE_VALUE,
483 	0,
484 	LSI_NOTREADY_RETRIES,
485 	0,
486 	0,
487 	0,
488 	0,
489 	0,
490 	0
491 };
492 
493 static sd_tunables symbios_properties = {
494 	SYMBIOS_THROTTLE_VALUE,
495 	0,
496 	SYMBIOS_NOTREADY_RETRIES,
497 	0,
498 	0,
499 	0,
500 	0,
501 	0,
502 	0
503 };
504 
505 static sd_tunables lsi_properties = {
506 	0,
507 	0,
508 	LSI_NOTREADY_RETRIES,
509 	0,
510 	0,
511 	0,
512 	0,
513 	0,
514 	0
515 };
516 
517 static sd_tunables lsi_oem_properties = {
518 	0,
519 	0,
520 	LSI_OEM_NOTREADY_RETRIES,
521 	0,
522 	0,
523 	0,
524 	0,
525 	0,
526 	0,
527 	1
528 };
529 
530 
531 
532 #if (defined(SD_PROP_TST))
533 
534 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
535 #define	SD_TST_THROTTLE_VAL	16
536 #define	SD_TST_NOTREADY_VAL	12
537 #define	SD_TST_BUSY_VAL		60
538 #define	SD_TST_RST_RETRY_VAL	36
539 #define	SD_TST_RSV_REL_TIME	60
540 
541 static sd_tunables tst_properties = {
542 	SD_TST_THROTTLE_VAL,
543 	SD_TST_CTYPE_VAL,
544 	SD_TST_NOTREADY_VAL,
545 	SD_TST_BUSY_VAL,
546 	SD_TST_RST_RETRY_VAL,
547 	SD_TST_RSV_REL_TIME,
548 	0,
549 	0,
550 	0
551 };
552 #endif
553 
554 /* This is similar to the ANSI toupper implementation */
555 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
556 
557 /*
558  * Static Driver Configuration Table
559  *
560  * This is the table of disks which need throttle adjustment (or, perhaps
561  * something else as defined by the flags at a future time.)  device_id
562  * is a string consisting of concatenated vid (vendor), pid (product/model)
563  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
564  * the parts of the string are as defined by the sizes in the scsi_inquiry
565  * structure.  Device type is searched as far as the device_id string is
566  * defined.  Flags defines which values are to be set in the driver from the
567  * properties list.
568  *
569  * Entries below which begin and end with a "*" are a special case.
570  * These do not have a specific vendor, and the string which follows
571  * can appear anywhere in the 16 byte PID portion of the inquiry data.
572  *
573  * Entries below which begin and end with a " " (blank) are a special
574  * case. The comparison function will treat multiple consecutive blanks
575  * as equivalent to a single blank. For example, this causes a
576  * sd_disk_table entry of " NEC CDROM " to match a device's id string
577  * of  "NEC       CDROM".
578  *
579  * Note: The MD21 controller type has been obsoleted.
580  *	 ST318202F is a Legacy device
581  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
582  *	 made with an FC connection. The entries here are a legacy.
583  */
584 static sd_disk_config_t sd_disk_table[] = {
585 #if defined(__fibre) || defined(__i386) || defined(__amd64)
586 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
587 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
588 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
589 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
590 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
591 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
592 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
593 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
594 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
595 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
596 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
597 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
598 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
599 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
600 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
601 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
602 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
603 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
604 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
605 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
606 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
607 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
608 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
609 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
610 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
611 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
612 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
613 	{ "IBM     1724-100",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
614 	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
615 	{ "IBM     1726-22x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
616 	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
617 	{ "IBM     1726-42x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
618 	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
619 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
620 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
621 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
622 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
623 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
624 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
625 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
626 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
627 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
628 	{ "IBM     1818",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
629 	{ "DELL    MD3000",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
630 	{ "DELL    MD3000i",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
631 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
632 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
633 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
634 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
635 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT |
636 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
637 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT |
638 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
639 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
640 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
641 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
642 			SD_CONF_BSET_BSY_RETRY_COUNT|
643 			SD_CONF_BSET_RST_RETRIES|
644 			SD_CONF_BSET_RSV_REL_TIME,
645 		&purple_properties },
646 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
647 		SD_CONF_BSET_BSY_RETRY_COUNT|
648 		SD_CONF_BSET_RST_RETRIES|
649 		SD_CONF_BSET_RSV_REL_TIME|
650 		SD_CONF_BSET_MIN_THROTTLE|
651 		SD_CONF_BSET_DISKSORT_DISABLED,
652 		&sve_properties },
653 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
654 			SD_CONF_BSET_BSY_RETRY_COUNT|
655 			SD_CONF_BSET_RST_RETRIES|
656 			SD_CONF_BSET_RSV_REL_TIME,
657 		&purple_properties },
658 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
659 		SD_CONF_BSET_LUN_RESET_ENABLED,
660 		&maserati_properties },
661 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
662 		SD_CONF_BSET_NRR_COUNT|
663 		SD_CONF_BSET_BSY_RETRY_COUNT|
664 		SD_CONF_BSET_RST_RETRIES|
665 		SD_CONF_BSET_MIN_THROTTLE|
666 		SD_CONF_BSET_DISKSORT_DISABLED|
667 		SD_CONF_BSET_LUN_RESET_ENABLED,
668 		&pirus_properties },
669 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
670 		SD_CONF_BSET_NRR_COUNT|
671 		SD_CONF_BSET_BSY_RETRY_COUNT|
672 		SD_CONF_BSET_RST_RETRIES|
673 		SD_CONF_BSET_MIN_THROTTLE|
674 		SD_CONF_BSET_DISKSORT_DISABLED|
675 		SD_CONF_BSET_LUN_RESET_ENABLED,
676 		&pirus_properties },
677 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
678 		SD_CONF_BSET_NRR_COUNT|
679 		SD_CONF_BSET_BSY_RETRY_COUNT|
680 		SD_CONF_BSET_RST_RETRIES|
681 		SD_CONF_BSET_MIN_THROTTLE|
682 		SD_CONF_BSET_DISKSORT_DISABLED|
683 		SD_CONF_BSET_LUN_RESET_ENABLED,
684 		&pirus_properties },
685 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
686 		SD_CONF_BSET_NRR_COUNT|
687 		SD_CONF_BSET_BSY_RETRY_COUNT|
688 		SD_CONF_BSET_RST_RETRIES|
689 		SD_CONF_BSET_MIN_THROTTLE|
690 		SD_CONF_BSET_DISKSORT_DISABLED|
691 		SD_CONF_BSET_LUN_RESET_ENABLED,
692 		&pirus_properties },
693 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
694 		SD_CONF_BSET_NRR_COUNT|
695 		SD_CONF_BSET_BSY_RETRY_COUNT|
696 		SD_CONF_BSET_RST_RETRIES|
697 		SD_CONF_BSET_MIN_THROTTLE|
698 		SD_CONF_BSET_DISKSORT_DISABLED|
699 		SD_CONF_BSET_LUN_RESET_ENABLED,
700 		&pirus_properties },
701 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
702 		SD_CONF_BSET_NRR_COUNT|
703 		SD_CONF_BSET_BSY_RETRY_COUNT|
704 		SD_CONF_BSET_RST_RETRIES|
705 		SD_CONF_BSET_MIN_THROTTLE|
706 		SD_CONF_BSET_DISKSORT_DISABLED|
707 		SD_CONF_BSET_LUN_RESET_ENABLED,
708 		&pirus_properties },
709 	{ "SUN     STK6580_6780", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
710 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
711 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
712 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
713 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
714 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
715 #endif /* fibre or NON-sparc platforms */
716 #if ((defined(__sparc) && !defined(__fibre)) ||\
717 	(defined(__i386) || defined(__amd64)))
718 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
719 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
720 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
721 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
722 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
723 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
724 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
725 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
726 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
727 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
728 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
729 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
730 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
731 	    &symbios_properties },
732 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
733 	    &lsi_properties_scsi },
734 #if defined(__i386) || defined(__amd64)
735 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
736 				    | SD_CONF_BSET_READSUB_BCD
737 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
738 				    | SD_CONF_BSET_NO_READ_HEADER
739 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
740 
741 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
742 				    | SD_CONF_BSET_READSUB_BCD
743 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
744 				    | SD_CONF_BSET_NO_READ_HEADER
745 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
746 #endif /* __i386 || __amd64 */
747 #endif /* sparc NON-fibre or NON-sparc platforms */
748 
749 #if (defined(SD_PROP_TST))
750 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
751 				| SD_CONF_BSET_CTYPE
752 				| SD_CONF_BSET_NRR_COUNT
753 				| SD_CONF_BSET_FAB_DEVID
754 				| SD_CONF_BSET_NOCACHE
755 				| SD_CONF_BSET_BSY_RETRY_COUNT
756 				| SD_CONF_BSET_PLAYMSF_BCD
757 				| SD_CONF_BSET_READSUB_BCD
758 				| SD_CONF_BSET_READ_TOC_TRK_BCD
759 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
760 				| SD_CONF_BSET_NO_READ_HEADER
761 				| SD_CONF_BSET_READ_CD_XD4
762 				| SD_CONF_BSET_RST_RETRIES
763 				| SD_CONF_BSET_RSV_REL_TIME
764 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
765 #endif
766 };
767 
768 static const int sd_disk_table_size =
769 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
770 
771 
772 
773 #define	SD_INTERCONNECT_PARALLEL	0
774 #define	SD_INTERCONNECT_FABRIC		1
775 #define	SD_INTERCONNECT_FIBRE		2
776 #define	SD_INTERCONNECT_SSA		3
777 #define	SD_INTERCONNECT_SATA		4
778 #define	SD_INTERCONNECT_SAS		5
779 
780 #define	SD_IS_PARALLEL_SCSI(un)		\
781 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
782 #define	SD_IS_SERIAL(un)		\
783 	(((un)->un_interconnect_type == SD_INTERCONNECT_SATA) ||\
784 	((un)->un_interconnect_type == SD_INTERCONNECT_SAS))
785 
786 /*
787  * Definitions used by device id registration routines
788  */
789 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
790 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
791 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
792 
793 static kmutex_t sd_sense_mutex = {0};
794 
795 /*
796  * Macros for updates of the driver state
797  */
798 #define	New_state(un, s)        \
799 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
800 #define	Restore_state(un)	\
801 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
802 
803 static struct sd_cdbinfo sd_cdbtab[] = {
804 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
805 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
806 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
807 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
808 };
809 
810 /*
811  * Specifies the number of seconds that must have elapsed since the last
812  * cmd. has completed for a device to be declared idle to the PM framework.
813  */
814 static int sd_pm_idletime = 1;
815 
816 /*
817  * Internal function prototypes
818  */
819 
820 #if (defined(__fibre))
821 /*
822  * These #defines are to avoid namespace collisions that occur because this
823  * code is currently used to compile two separate driver modules: sd and ssd.
824  * All function names need to be treated this way (even if declared static)
825  * in order to allow the debugger to resolve the names properly.
826  * It is anticipated that in the near future the ssd module will be obsoleted,
827  * at which time this ugliness should go away.
828  */
829 #define	sd_log_trace			ssd_log_trace
830 #define	sd_log_info			ssd_log_info
831 #define	sd_log_err			ssd_log_err
832 #define	sdprobe				ssdprobe
833 #define	sdinfo				ssdinfo
834 #define	sd_prop_op			ssd_prop_op
835 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
836 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
837 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
838 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
839 #define	sd_scsi_target_lun_init		ssd_scsi_target_lun_init
840 #define	sd_scsi_target_lun_fini		ssd_scsi_target_lun_fini
841 #define	sd_scsi_get_target_lun_count	ssd_scsi_get_target_lun_count
842 #define	sd_scsi_update_lun_on_target	ssd_scsi_update_lun_on_target
843 #define	sd_spin_up_unit			ssd_spin_up_unit
844 #define	sd_enable_descr_sense		ssd_enable_descr_sense
845 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
846 #define	sd_set_mmc_caps			ssd_set_mmc_caps
847 #define	sd_read_unit_properties		ssd_read_unit_properties
848 #define	sd_process_sdconf_file		ssd_process_sdconf_file
849 #define	sd_process_sdconf_table		ssd_process_sdconf_table
850 #define	sd_sdconf_id_match		ssd_sdconf_id_match
851 #define	sd_blank_cmp			ssd_blank_cmp
852 #define	sd_chk_vers1_data		ssd_chk_vers1_data
853 #define	sd_set_vers1_properties		ssd_set_vers1_properties
854 
855 #define	sd_get_physical_geometry	ssd_get_physical_geometry
856 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
857 #define	sd_update_block_info		ssd_update_block_info
858 #define	sd_register_devid		ssd_register_devid
859 #define	sd_get_devid			ssd_get_devid
860 #define	sd_create_devid			ssd_create_devid
861 #define	sd_write_deviceid		ssd_write_deviceid
862 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
863 #define	sd_setup_pm			ssd_setup_pm
864 #define	sd_create_pm_components		ssd_create_pm_components
865 #define	sd_ddi_suspend			ssd_ddi_suspend
866 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
867 #define	sd_ddi_resume			ssd_ddi_resume
868 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
869 #define	sdpower				ssdpower
870 #define	sdattach			ssdattach
871 #define	sddetach			ssddetach
872 #define	sd_unit_attach			ssd_unit_attach
873 #define	sd_unit_detach			ssd_unit_detach
874 #define	sd_set_unit_attributes		ssd_set_unit_attributes
875 #define	sd_create_errstats		ssd_create_errstats
876 #define	sd_set_errstats			ssd_set_errstats
877 #define	sd_set_pstats			ssd_set_pstats
878 #define	sddump				ssddump
879 #define	sd_scsi_poll			ssd_scsi_poll
880 #define	sd_send_polled_RQS		ssd_send_polled_RQS
881 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
882 #define	sd_init_event_callbacks		ssd_init_event_callbacks
883 #define	sd_event_callback		ssd_event_callback
884 #define	sd_cache_control		ssd_cache_control
885 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
886 #define	sd_get_nv_sup			ssd_get_nv_sup
887 #define	sd_make_device			ssd_make_device
888 #define	sdopen				ssdopen
889 #define	sdclose				ssdclose
890 #define	sd_ready_and_valid		ssd_ready_and_valid
891 #define	sdmin				ssdmin
892 #define	sdread				ssdread
893 #define	sdwrite				ssdwrite
894 #define	sdaread				ssdaread
895 #define	sdawrite			ssdawrite
896 #define	sdstrategy			ssdstrategy
897 #define	sdioctl				ssdioctl
898 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
899 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
900 #define	sd_checksum_iostart		ssd_checksum_iostart
901 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
902 #define	sd_pm_iostart			ssd_pm_iostart
903 #define	sd_core_iostart			ssd_core_iostart
904 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
905 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
906 #define	sd_checksum_iodone		ssd_checksum_iodone
907 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
908 #define	sd_pm_iodone			ssd_pm_iodone
909 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
910 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
911 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
912 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
913 #define	sd_buf_iodone			ssd_buf_iodone
914 #define	sd_uscsi_strategy		ssd_uscsi_strategy
915 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
916 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
917 #define	sd_uscsi_iodone			ssd_uscsi_iodone
918 #define	sd_xbuf_strategy		ssd_xbuf_strategy
919 #define	sd_xbuf_init			ssd_xbuf_init
920 #define	sd_pm_entry			ssd_pm_entry
921 #define	sd_pm_exit			ssd_pm_exit
922 
923 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
924 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
925 
926 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
927 #define	sdintr				ssdintr
928 #define	sd_start_cmds			ssd_start_cmds
929 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
930 #define	sd_bioclone_alloc		ssd_bioclone_alloc
931 #define	sd_bioclone_free		ssd_bioclone_free
932 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
933 #define	sd_shadow_buf_free		ssd_shadow_buf_free
934 #define	sd_print_transport_rejected_message	\
935 					ssd_print_transport_rejected_message
936 #define	sd_retry_command		ssd_retry_command
937 #define	sd_set_retry_bp			ssd_set_retry_bp
938 #define	sd_send_request_sense_command	ssd_send_request_sense_command
939 #define	sd_start_retry_command		ssd_start_retry_command
940 #define	sd_start_direct_priority_command	\
941 					ssd_start_direct_priority_command
942 #define	sd_return_failed_command	ssd_return_failed_command
943 #define	sd_return_failed_command_no_restart	\
944 					ssd_return_failed_command_no_restart
945 #define	sd_return_command		ssd_return_command
946 #define	sd_sync_with_callback		ssd_sync_with_callback
947 #define	sdrunout			ssdrunout
948 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
949 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
950 #define	sd_reduce_throttle		ssd_reduce_throttle
951 #define	sd_restore_throttle		ssd_restore_throttle
952 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
953 #define	sd_init_cdb_limits		ssd_init_cdb_limits
954 #define	sd_pkt_status_good		ssd_pkt_status_good
955 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
956 #define	sd_pkt_status_busy		ssd_pkt_status_busy
957 #define	sd_pkt_status_reservation_conflict	\
958 					ssd_pkt_status_reservation_conflict
959 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
960 #define	sd_handle_request_sense		ssd_handle_request_sense
961 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
962 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
963 #define	sd_validate_sense_data		ssd_validate_sense_data
964 #define	sd_decode_sense			ssd_decode_sense
965 #define	sd_print_sense_msg		ssd_print_sense_msg
966 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
967 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
968 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
969 #define	sd_sense_key_medium_or_hardware_error	\
970 					ssd_sense_key_medium_or_hardware_error
971 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
972 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
973 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
974 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
975 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
976 #define	sd_sense_key_default		ssd_sense_key_default
977 #define	sd_print_retry_msg		ssd_print_retry_msg
978 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
979 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
980 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
981 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
982 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
983 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
984 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
985 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
986 #define	sd_pkt_reason_default		ssd_pkt_reason_default
987 #define	sd_reset_target			ssd_reset_target
988 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
989 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
990 #define	sd_taskq_create			ssd_taskq_create
991 #define	sd_taskq_delete			ssd_taskq_delete
992 #define	sd_target_change_task		ssd_target_change_task
993 #define	sd_log_lun_expansion_event	ssd_log_lun_expansion_event
994 #define	sd_media_change_task		ssd_media_change_task
995 #define	sd_handle_mchange		ssd_handle_mchange
996 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
997 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
998 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
999 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
1000 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
1001 					sd_send_scsi_feature_GET_CONFIGURATION
1002 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
1003 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
1004 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
1005 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
1006 					ssd_send_scsi_PERSISTENT_RESERVE_IN
1007 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
1008 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
1009 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
1010 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
1011 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
1012 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
1013 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
1014 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
1015 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
1016 #define	sd_alloc_rqs			ssd_alloc_rqs
1017 #define	sd_free_rqs			ssd_free_rqs
1018 #define	sd_dump_memory			ssd_dump_memory
1019 #define	sd_get_media_info		ssd_get_media_info
1020 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
1021 #define	sd_nvpair_str_decode		ssd_nvpair_str_decode
1022 #define	sd_strtok_r			ssd_strtok_r
1023 #define	sd_set_properties		ssd_set_properties
1024 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
1025 #define	sd_setup_next_xfer		ssd_setup_next_xfer
1026 #define	sd_dkio_get_temp		ssd_dkio_get_temp
1027 #define	sd_check_mhd			ssd_check_mhd
1028 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1029 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1030 #define	sd_sname			ssd_sname
1031 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1032 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1033 #define	sd_take_ownership		ssd_take_ownership
1034 #define	sd_reserve_release		ssd_reserve_release
1035 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1036 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1037 #define	sd_persistent_reservation_in_read_keys	\
1038 					ssd_persistent_reservation_in_read_keys
1039 #define	sd_persistent_reservation_in_read_resv	\
1040 					ssd_persistent_reservation_in_read_resv
1041 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1042 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1043 #define	sd_mhdioc_release		ssd_mhdioc_release
1044 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1045 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1046 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1047 #define	sr_change_blkmode		ssr_change_blkmode
1048 #define	sr_change_speed			ssr_change_speed
1049 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1050 #define	sr_pause_resume			ssr_pause_resume
1051 #define	sr_play_msf			ssr_play_msf
1052 #define	sr_play_trkind			ssr_play_trkind
1053 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1054 #define	sr_read_subchannel		ssr_read_subchannel
1055 #define	sr_read_tocentry		ssr_read_tocentry
1056 #define	sr_read_tochdr			ssr_read_tochdr
1057 #define	sr_read_cdda			ssr_read_cdda
1058 #define	sr_read_cdxa			ssr_read_cdxa
1059 #define	sr_read_mode1			ssr_read_mode1
1060 #define	sr_read_mode2			ssr_read_mode2
1061 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1062 #define	sr_sector_mode			ssr_sector_mode
1063 #define	sr_eject			ssr_eject
1064 #define	sr_ejected			ssr_ejected
1065 #define	sr_check_wp			ssr_check_wp
1066 #define	sd_check_media			ssd_check_media
1067 #define	sd_media_watch_cb		ssd_media_watch_cb
1068 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1069 #define	sr_volume_ctrl			ssr_volume_ctrl
1070 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1071 #define	sd_log_page_supported		ssd_log_page_supported
1072 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1073 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1074 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1075 #define	sd_range_lock			ssd_range_lock
1076 #define	sd_get_range			ssd_get_range
1077 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1078 #define	sd_range_unlock			ssd_range_unlock
1079 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1080 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1081 
1082 #define	sd_iostart_chain		ssd_iostart_chain
1083 #define	sd_iodone_chain			ssd_iodone_chain
1084 #define	sd_initpkt_map			ssd_initpkt_map
1085 #define	sd_destroypkt_map		ssd_destroypkt_map
1086 #define	sd_chain_type_map		ssd_chain_type_map
1087 #define	sd_chain_index_map		ssd_chain_index_map
1088 
1089 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1090 #define	sd_failfast_flushq		ssd_failfast_flushq
1091 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1092 
1093 #define	sd_is_lsi			ssd_is_lsi
1094 #define	sd_tg_rdwr			ssd_tg_rdwr
1095 #define	sd_tg_getinfo			ssd_tg_getinfo
1096 
1097 #endif	/* #if (defined(__fibre)) */
1098 
1099 
1100 int _init(void);
1101 int _fini(void);
1102 int _info(struct modinfo *modinfop);
1103 
1104 /*PRINTFLIKE3*/
1105 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1106 /*PRINTFLIKE3*/
1107 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1108 /*PRINTFLIKE3*/
1109 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1110 
1111 static int sdprobe(dev_info_t *devi);
1112 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1113     void **result);
1114 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1115     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1116 
1117 /*
1118  * Smart probe for parallel scsi
1119  */
1120 static void sd_scsi_probe_cache_init(void);
1121 static void sd_scsi_probe_cache_fini(void);
1122 static void sd_scsi_clear_probe_cache(void);
1123 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1124 
1125 /*
1126  * Attached luns on target for parallel scsi
1127  */
1128 static void sd_scsi_target_lun_init(void);
1129 static void sd_scsi_target_lun_fini(void);
1130 static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
1131 static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
1132 
1133 static int	sd_spin_up_unit(sd_ssc_t *ssc);
1134 
1135 /*
1136  * Using sd_ssc_init to establish sd_ssc_t struct
1137  * Using sd_ssc_send to send uscsi internal command
1138  * Using sd_ssc_fini to free sd_ssc_t struct
1139  */
1140 static sd_ssc_t *sd_ssc_init(struct sd_lun *un);
1141 static int sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd,
1142     int flag, enum uio_seg dataspace, int path_flag);
1143 static void sd_ssc_fini(sd_ssc_t *ssc);
1144 
1145 /*
1146  * Using sd_ssc_assessment to set correct type-of-assessment
1147  * Using sd_ssc_post to post ereport & system log
1148  *       sd_ssc_post will call sd_ssc_print to print system log
1149  *       sd_ssc_post will call sd_ssd_ereport_post to post ereport
1150  */
1151 static void sd_ssc_assessment(sd_ssc_t *ssc,
1152     enum sd_type_assessment tp_assess);
1153 
1154 static void sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess);
1155 static void sd_ssc_print(sd_ssc_t *ssc, int sd_severity);
1156 static void sd_ssc_ereport_post(sd_ssc_t *ssc,
1157     enum sd_driver_assessment drv_assess);
1158 
1159 /*
1160  * Using sd_ssc_set_info to mark an un-decodable-data error.
1161  * Using sd_ssc_extract_info to transfer information from internal
1162  *       data structures to sd_ssc_t.
1163  */
1164 static void sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp,
1165     const char *fmt, ...);
1166 static void sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un,
1167     struct scsi_pkt *pktp, struct buf *bp, struct sd_xbuf *xp);
1168 
1169 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1170     enum uio_seg dataspace, int path_flag);
1171 
1172 #ifdef _LP64
1173 static void	sd_enable_descr_sense(sd_ssc_t *ssc);
1174 static void	sd_reenable_dsense_task(void *arg);
1175 #endif /* _LP64 */
1176 
1177 static void	sd_set_mmc_caps(sd_ssc_t *ssc);
1178 
1179 static void sd_read_unit_properties(struct sd_lun *un);
1180 static int  sd_process_sdconf_file(struct sd_lun *un);
1181 static void sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str);
1182 static char *sd_strtok_r(char *string, const char *sepset, char **lasts);
1183 static void sd_set_properties(struct sd_lun *un, char *name, char *value);
1184 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1185     int *data_list, sd_tunables *values);
1186 static void sd_process_sdconf_table(struct sd_lun *un);
1187 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1188 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1189 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1190 	int list_len, char *dataname_ptr);
1191 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1192     sd_tunables *prop_list);
1193 
1194 static void sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi,
1195     int reservation_flag);
1196 static int  sd_get_devid(sd_ssc_t *ssc);
1197 static ddi_devid_t sd_create_devid(sd_ssc_t *ssc);
1198 static int  sd_write_deviceid(sd_ssc_t *ssc);
1199 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1200 static int  sd_check_vpd_page_support(sd_ssc_t *ssc);
1201 
1202 static void sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi);
1203 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1204 
1205 static int  sd_ddi_suspend(dev_info_t *devi);
1206 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1207 static int  sd_ddi_resume(dev_info_t *devi);
1208 static int  sd_ddi_pm_resume(struct sd_lun *un);
1209 static int  sdpower(dev_info_t *devi, int component, int level);
1210 
1211 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1212 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1213 static int  sd_unit_attach(dev_info_t *devi);
1214 static int  sd_unit_detach(dev_info_t *devi);
1215 
1216 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1217 static void sd_create_errstats(struct sd_lun *un, int instance);
1218 static void sd_set_errstats(struct sd_lun *un);
1219 static void sd_set_pstats(struct sd_lun *un);
1220 
1221 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1222 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1223 static int  sd_send_polled_RQS(struct sd_lun *un);
1224 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1225 
1226 #if (defined(__fibre))
1227 /*
1228  * Event callbacks (photon)
1229  */
1230 static void sd_init_event_callbacks(struct sd_lun *un);
1231 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1232 #endif
1233 
1234 /*
1235  * Defines for sd_cache_control
1236  */
1237 
1238 #define	SD_CACHE_ENABLE		1
1239 #define	SD_CACHE_DISABLE	0
1240 #define	SD_CACHE_NOCHANGE	-1
1241 
1242 static int   sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag);
1243 static int   sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled);
1244 static void  sd_get_nv_sup(sd_ssc_t *ssc);
1245 static dev_t sd_make_device(dev_info_t *devi);
1246 
1247 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1248 	uint64_t capacity);
1249 
1250 /*
1251  * Driver entry point functions.
1252  */
1253 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1254 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1255 static int  sd_ready_and_valid(sd_ssc_t *ssc, int part);
1256 
1257 static void sdmin(struct buf *bp);
1258 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1259 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1260 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1261 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1262 
1263 static int sdstrategy(struct buf *bp);
1264 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1265 
1266 /*
1267  * Function prototypes for layering functions in the iostart chain.
1268  */
1269 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1270 	struct buf *bp);
1271 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1272 	struct buf *bp);
1273 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1274 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1275 	struct buf *bp);
1276 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1277 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1278 
1279 /*
1280  * Function prototypes for layering functions in the iodone chain.
1281  */
1282 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1283 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1284 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1285 	struct buf *bp);
1286 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1287 	struct buf *bp);
1288 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1289 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1290 	struct buf *bp);
1291 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1292 
1293 /*
1294  * Prototypes for functions to support buf(9S) based IO.
1295  */
1296 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1297 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1298 static void sd_destroypkt_for_buf(struct buf *);
1299 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1300 	struct buf *bp, int flags,
1301 	int (*callback)(caddr_t), caddr_t callback_arg,
1302 	diskaddr_t lba, uint32_t blockcount);
1303 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1304 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1305 
1306 /*
1307  * Prototypes for functions to support USCSI IO.
1308  */
1309 static int sd_uscsi_strategy(struct buf *bp);
1310 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1311 static void sd_destroypkt_for_uscsi(struct buf *);
1312 
1313 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1314 	uchar_t chain_type, void *pktinfop);
1315 
1316 static int  sd_pm_entry(struct sd_lun *un);
1317 static void sd_pm_exit(struct sd_lun *un);
1318 
1319 static void sd_pm_idletimeout_handler(void *arg);
1320 
1321 /*
1322  * sd_core internal functions (used at the sd_core_io layer).
1323  */
1324 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1325 static void sdintr(struct scsi_pkt *pktp);
1326 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1327 
1328 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1329 	enum uio_seg dataspace, int path_flag);
1330 
1331 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1332 	daddr_t blkno, int (*func)(struct buf *));
1333 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1334 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1335 static void sd_bioclone_free(struct buf *bp);
1336 static void sd_shadow_buf_free(struct buf *bp);
1337 
1338 static void sd_print_transport_rejected_message(struct sd_lun *un,
1339 	struct sd_xbuf *xp, int code);
1340 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1341     void *arg, int code);
1342 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1343     void *arg, int code);
1344 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1345     void *arg, int code);
1346 
1347 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1348 	int retry_check_flag,
1349 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1350 		int c),
1351 	void *user_arg, int failure_code,  clock_t retry_delay,
1352 	void (*statp)(kstat_io_t *));
1353 
1354 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1355 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1356 
1357 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1358 	struct scsi_pkt *pktp);
1359 static void sd_start_retry_command(void *arg);
1360 static void sd_start_direct_priority_command(void *arg);
1361 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1362 	int errcode);
1363 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1364 	struct buf *bp, int errcode);
1365 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1366 static void sd_sync_with_callback(struct sd_lun *un);
1367 static int sdrunout(caddr_t arg);
1368 
1369 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1370 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1371 
1372 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1373 static void sd_restore_throttle(void *arg);
1374 
1375 static void sd_init_cdb_limits(struct sd_lun *un);
1376 
1377 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1378 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1379 
1380 /*
1381  * Error handling functions
1382  */
1383 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1384 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1385 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1386 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1387 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1388 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1389 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1390 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1391 
1392 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1393 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1394 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1395 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1396 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1397 	struct sd_xbuf *xp, size_t actual_len);
1398 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1399 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1400 
1401 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1402 	void *arg, int code);
1403 
1404 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1405 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1406 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1407 	uint8_t *sense_datap,
1408 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1409 static void sd_sense_key_not_ready(struct sd_lun *un,
1410 	uint8_t *sense_datap,
1411 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1412 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1413 	uint8_t *sense_datap,
1414 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1415 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1416 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1417 static void sd_sense_key_unit_attention(struct sd_lun *un,
1418 	uint8_t *sense_datap,
1419 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1420 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1421 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1422 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1423 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1424 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1425 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1426 static void sd_sense_key_default(struct sd_lun *un,
1427 	uint8_t *sense_datap,
1428 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1429 
1430 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1431 	void *arg, int flag);
1432 
1433 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1434 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1435 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1436 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1437 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1438 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1439 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1440 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1441 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1442 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1443 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1444 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1445 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1446 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1447 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1448 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1449 
1450 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1451 
1452 static void sd_start_stop_unit_callback(void *arg);
1453 static void sd_start_stop_unit_task(void *arg);
1454 
1455 static void sd_taskq_create(void);
1456 static void sd_taskq_delete(void);
1457 static void sd_target_change_task(void *arg);
1458 static void sd_log_lun_expansion_event(struct sd_lun *un, int km_flag);
1459 static void sd_media_change_task(void *arg);
1460 
1461 static int sd_handle_mchange(struct sd_lun *un);
1462 static int sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag);
1463 static int sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp,
1464 	uint32_t *lbap, int path_flag);
1465 static int sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
1466 	uint32_t *lbap, int path_flag);
1467 static int sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int flag,
1468 	int path_flag);
1469 static int sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr,
1470 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1471 static int sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag);
1472 static int sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc,
1473 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1474 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc,
1475 	uchar_t usr_cmd, uchar_t *usr_bufp);
1476 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1477 	struct dk_callback *dkc);
1478 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1479 static int sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc,
1480 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1481 	uchar_t *bufaddr, uint_t buflen, int path_flag);
1482 static int sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc,
1483 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1484 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag);
1485 static int sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize,
1486 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1487 static int sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize,
1488 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1489 static int sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
1490 	size_t buflen, daddr_t start_block, int path_flag);
1491 #define	sd_send_scsi_READ(ssc, bufaddr, buflen, start_block, path_flag)	\
1492 	sd_send_scsi_RDWR(ssc, SCMD_READ, bufaddr, buflen, start_block, \
1493 	path_flag)
1494 #define	sd_send_scsi_WRITE(ssc, bufaddr, buflen, start_block, path_flag)\
1495 	sd_send_scsi_RDWR(ssc, SCMD_WRITE, bufaddr, buflen, start_block,\
1496 	path_flag)
1497 
1498 static int sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr,
1499 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1500 	uint16_t param_ptr, int path_flag);
1501 
1502 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1503 static void sd_free_rqs(struct sd_lun *un);
1504 
1505 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1506 	uchar_t *data, int len, int fmt);
1507 static void sd_panic_for_res_conflict(struct sd_lun *un);
1508 
1509 /*
1510  * Disk Ioctl Function Prototypes
1511  */
1512 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1513 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1514 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1515 
1516 /*
1517  * Multi-host Ioctl Prototypes
1518  */
1519 static int sd_check_mhd(dev_t dev, int interval);
1520 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1521 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1522 static char *sd_sname(uchar_t status);
1523 static void sd_mhd_resvd_recover(void *arg);
1524 static void sd_resv_reclaim_thread();
1525 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1526 static int sd_reserve_release(dev_t dev, int cmd);
1527 static void sd_rmv_resv_reclaim_req(dev_t dev);
1528 static void sd_mhd_reset_notify_cb(caddr_t arg);
1529 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1530 	mhioc_inkeys_t *usrp, int flag);
1531 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1532 	mhioc_inresvs_t *usrp, int flag);
1533 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1534 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1535 static int sd_mhdioc_release(dev_t dev);
1536 static int sd_mhdioc_register_devid(dev_t dev);
1537 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1538 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1539 
1540 /*
1541  * SCSI removable prototypes
1542  */
1543 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1544 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1545 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1546 static int sr_pause_resume(dev_t dev, int mode);
1547 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1548 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1549 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1550 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1551 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1552 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1553 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1554 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1555 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1556 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1557 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1558 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1559 static int sr_eject(dev_t dev);
1560 static void sr_ejected(register struct sd_lun *un);
1561 static int sr_check_wp(dev_t dev);
1562 static int sd_check_media(dev_t dev, enum dkio_state state);
1563 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1564 static void sd_delayed_cv_broadcast(void *arg);
1565 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1566 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1567 
1568 static int sd_log_page_supported(sd_ssc_t *ssc, int log_page);
1569 
1570 /*
1571  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1572  */
1573 static void sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag);
1574 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1575 static void sd_wm_cache_destructor(void *wm, void *un);
1576 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1577 	daddr_t endb, ushort_t typ);
1578 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1579 	daddr_t endb);
1580 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1581 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1582 static void sd_read_modify_write_task(void * arg);
1583 static int
1584 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1585 	struct buf **bpp);
1586 
1587 
1588 /*
1589  * Function prototypes for failfast support.
1590  */
1591 static void sd_failfast_flushq(struct sd_lun *un);
1592 static int sd_failfast_flushq_callback(struct buf *bp);
1593 
1594 /*
1595  * Function prototypes to check for lsi devices
1596  */
1597 static void sd_is_lsi(struct sd_lun *un);
1598 
1599 /*
1600  * Function prototypes for partial DMA support
1601  */
1602 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1603 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1604 
1605 
1606 /* Function prototypes for cmlb */
1607 static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1608     diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1609 
1610 static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1611 
1612 /*
1613  * Constants for failfast support:
1614  *
1615  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1616  * failfast processing being performed.
1617  *
1618  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1619  * failfast processing on all bufs with B_FAILFAST set.
1620  */
1621 
1622 #define	SD_FAILFAST_INACTIVE		0
1623 #define	SD_FAILFAST_ACTIVE		1
1624 
1625 /*
1626  * Bitmask to control behavior of buf(9S) flushes when a transition to
1627  * the failfast state occurs. Optional bits include:
1628  *
1629  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1630  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1631  * be flushed.
1632  *
1633  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1634  * driver, in addition to the regular wait queue. This includes the xbuf
1635  * queues. When clear, only the driver's wait queue will be flushed.
1636  */
1637 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1638 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1639 
1640 /*
1641  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1642  * to flush all queues within the driver.
1643  */
1644 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1645 
1646 
1647 /*
1648  * SD Testing Fault Injection
1649  */
1650 #ifdef SD_FAULT_INJECTION
1651 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1652 static void sd_faultinjection(struct scsi_pkt *pktp);
1653 static void sd_injection_log(char *buf, struct sd_lun *un);
1654 #endif
1655 
1656 /*
1657  * Device driver ops vector
1658  */
1659 static struct cb_ops sd_cb_ops = {
1660 	sdopen,			/* open */
1661 	sdclose,		/* close */
1662 	sdstrategy,		/* strategy */
1663 	nodev,			/* print */
1664 	sddump,			/* dump */
1665 	sdread,			/* read */
1666 	sdwrite,		/* write */
1667 	sdioctl,		/* ioctl */
1668 	nodev,			/* devmap */
1669 	nodev,			/* mmap */
1670 	nodev,			/* segmap */
1671 	nochpoll,		/* poll */
1672 	sd_prop_op,		/* cb_prop_op */
1673 	0,			/* streamtab  */
1674 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1675 	CB_REV,			/* cb_rev */
1676 	sdaread, 		/* async I/O read entry point */
1677 	sdawrite		/* async I/O write entry point */
1678 };
1679 
1680 struct dev_ops sd_ops = {
1681 	DEVO_REV,		/* devo_rev, */
1682 	0,			/* refcnt  */
1683 	sdinfo,			/* info */
1684 	nulldev,		/* identify */
1685 	sdprobe,		/* probe */
1686 	sdattach,		/* attach */
1687 	sddetach,		/* detach */
1688 	nodev,			/* reset */
1689 	&sd_cb_ops,		/* driver operations */
1690 	NULL,			/* bus operations */
1691 	sdpower,		/* power */
1692 	ddi_quiesce_not_needed,		/* quiesce */
1693 };
1694 
1695 /*
1696  * This is the loadable module wrapper.
1697  */
1698 #include <sys/modctl.h>
1699 
1700 #ifndef XPV_HVM_DRIVER
1701 static struct modldrv modldrv = {
1702 	&mod_driverops,		/* Type of module. This one is a driver */
1703 	SD_MODULE_NAME,		/* Module name. */
1704 	&sd_ops			/* driver ops */
1705 };
1706 
1707 static struct modlinkage modlinkage = {
1708 	MODREV_1, &modldrv, NULL
1709 };
1710 
1711 #else /* XPV_HVM_DRIVER */
1712 static struct modlmisc modlmisc = {
1713 	&mod_miscops,		/* Type of module. This one is a misc */
1714 	"HVM " SD_MODULE_NAME,		/* Module name. */
1715 };
1716 
1717 static struct modlinkage modlinkage = {
1718 	MODREV_1, &modlmisc, NULL
1719 };
1720 
1721 #endif /* XPV_HVM_DRIVER */
1722 
1723 static cmlb_tg_ops_t sd_tgops = {
1724 	TG_DK_OPS_VERSION_1,
1725 	sd_tg_rdwr,
1726 	sd_tg_getinfo
1727 };
1728 
1729 static struct scsi_asq_key_strings sd_additional_codes[] = {
1730 	0x81, 0, "Logical Unit is Reserved",
1731 	0x85, 0, "Audio Address Not Valid",
1732 	0xb6, 0, "Media Load Mechanism Failed",
1733 	0xB9, 0, "Audio Play Operation Aborted",
1734 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1735 	0x53, 2, "Medium removal prevented",
1736 	0x6f, 0, "Authentication failed during key exchange",
1737 	0x6f, 1, "Key not present",
1738 	0x6f, 2, "Key not established",
1739 	0x6f, 3, "Read without proper authentication",
1740 	0x6f, 4, "Mismatched region to this logical unit",
1741 	0x6f, 5, "Region reset count error",
1742 	0xffff, 0x0, NULL
1743 };
1744 
1745 
1746 /*
1747  * Struct for passing printing information for sense data messages
1748  */
1749 struct sd_sense_info {
1750 	int	ssi_severity;
1751 	int	ssi_pfa_flag;
1752 };
1753 
1754 /*
1755  * Table of function pointers for iostart-side routines. Separate "chains"
1756  * of layered function calls are formed by placing the function pointers
1757  * sequentially in the desired order. Functions are called according to an
1758  * incrementing table index ordering. The last function in each chain must
1759  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1760  * in the sd_iodone_chain[] array.
1761  *
1762  * Note: It may seem more natural to organize both the iostart and iodone
1763  * functions together, into an array of structures (or some similar
1764  * organization) with a common index, rather than two separate arrays which
1765  * must be maintained in synchronization. The purpose of this division is
1766  * to achieve improved performance: individual arrays allows for more
1767  * effective cache line utilization on certain platforms.
1768  */
1769 
1770 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1771 
1772 
1773 static sd_chain_t sd_iostart_chain[] = {
1774 
1775 	/* Chain for buf IO for disk drive targets (PM enabled) */
1776 	sd_mapblockaddr_iostart,	/* Index: 0 */
1777 	sd_pm_iostart,			/* Index: 1 */
1778 	sd_core_iostart,		/* Index: 2 */
1779 
1780 	/* Chain for buf IO for disk drive targets (PM disabled) */
1781 	sd_mapblockaddr_iostart,	/* Index: 3 */
1782 	sd_core_iostart,		/* Index: 4 */
1783 
1784 	/* Chain for buf IO for removable-media targets (PM enabled) */
1785 	sd_mapblockaddr_iostart,	/* Index: 5 */
1786 	sd_mapblocksize_iostart,	/* Index: 6 */
1787 	sd_pm_iostart,			/* Index: 7 */
1788 	sd_core_iostart,		/* Index: 8 */
1789 
1790 	/* Chain for buf IO for removable-media targets (PM disabled) */
1791 	sd_mapblockaddr_iostart,	/* Index: 9 */
1792 	sd_mapblocksize_iostart,	/* Index: 10 */
1793 	sd_core_iostart,		/* Index: 11 */
1794 
1795 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1796 	sd_mapblockaddr_iostart,	/* Index: 12 */
1797 	sd_checksum_iostart,		/* Index: 13 */
1798 	sd_pm_iostart,			/* Index: 14 */
1799 	sd_core_iostart,		/* Index: 15 */
1800 
1801 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1802 	sd_mapblockaddr_iostart,	/* Index: 16 */
1803 	sd_checksum_iostart,		/* Index: 17 */
1804 	sd_core_iostart,		/* Index: 18 */
1805 
1806 	/* Chain for USCSI commands (all targets) */
1807 	sd_pm_iostart,			/* Index: 19 */
1808 	sd_core_iostart,		/* Index: 20 */
1809 
1810 	/* Chain for checksumming USCSI commands (all targets) */
1811 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1812 	sd_pm_iostart,			/* Index: 22 */
1813 	sd_core_iostart,		/* Index: 23 */
1814 
1815 	/* Chain for "direct" USCSI commands (all targets) */
1816 	sd_core_iostart,		/* Index: 24 */
1817 
1818 	/* Chain for "direct priority" USCSI commands (all targets) */
1819 	sd_core_iostart,		/* Index: 25 */
1820 };
1821 
1822 /*
1823  * Macros to locate the first function of each iostart chain in the
1824  * sd_iostart_chain[] array. These are located by the index in the array.
1825  */
1826 #define	SD_CHAIN_DISK_IOSTART			0
1827 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1828 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1829 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1830 #define	SD_CHAIN_CHKSUM_IOSTART			12
1831 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1832 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1833 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1834 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1835 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1836 
1837 
1838 /*
1839  * Table of function pointers for the iodone-side routines for the driver-
1840  * internal layering mechanism.  The calling sequence for iodone routines
1841  * uses a decrementing table index, so the last routine called in a chain
1842  * must be at the lowest array index location for that chain.  The last
1843  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1844  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1845  * of the functions in an iodone side chain must correspond to the ordering
1846  * of the iostart routines for that chain.  Note that there is no iodone
1847  * side routine that corresponds to sd_core_iostart(), so there is no
1848  * entry in the table for this.
1849  */
1850 
1851 static sd_chain_t sd_iodone_chain[] = {
1852 
1853 	/* Chain for buf IO for disk drive targets (PM enabled) */
1854 	sd_buf_iodone,			/* Index: 0 */
1855 	sd_mapblockaddr_iodone,		/* Index: 1 */
1856 	sd_pm_iodone,			/* Index: 2 */
1857 
1858 	/* Chain for buf IO for disk drive targets (PM disabled) */
1859 	sd_buf_iodone,			/* Index: 3 */
1860 	sd_mapblockaddr_iodone,		/* Index: 4 */
1861 
1862 	/* Chain for buf IO for removable-media targets (PM enabled) */
1863 	sd_buf_iodone,			/* Index: 5 */
1864 	sd_mapblockaddr_iodone,		/* Index: 6 */
1865 	sd_mapblocksize_iodone,		/* Index: 7 */
1866 	sd_pm_iodone,			/* Index: 8 */
1867 
1868 	/* Chain for buf IO for removable-media targets (PM disabled) */
1869 	sd_buf_iodone,			/* Index: 9 */
1870 	sd_mapblockaddr_iodone,		/* Index: 10 */
1871 	sd_mapblocksize_iodone,		/* Index: 11 */
1872 
1873 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1874 	sd_buf_iodone,			/* Index: 12 */
1875 	sd_mapblockaddr_iodone,		/* Index: 13 */
1876 	sd_checksum_iodone,		/* Index: 14 */
1877 	sd_pm_iodone,			/* Index: 15 */
1878 
1879 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1880 	sd_buf_iodone,			/* Index: 16 */
1881 	sd_mapblockaddr_iodone,		/* Index: 17 */
1882 	sd_checksum_iodone,		/* Index: 18 */
1883 
1884 	/* Chain for USCSI commands (non-checksum targets) */
1885 	sd_uscsi_iodone,		/* Index: 19 */
1886 	sd_pm_iodone,			/* Index: 20 */
1887 
1888 	/* Chain for USCSI commands (checksum targets) */
1889 	sd_uscsi_iodone,		/* Index: 21 */
1890 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1891 	sd_pm_iodone,			/* Index: 22 */
1892 
1893 	/* Chain for "direct" USCSI commands (all targets) */
1894 	sd_uscsi_iodone,		/* Index: 24 */
1895 
1896 	/* Chain for "direct priority" USCSI commands (all targets) */
1897 	sd_uscsi_iodone,		/* Index: 25 */
1898 };
1899 
1900 
1901 /*
1902  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1903  * each iodone-side chain. These are located by the array index, but as the
1904  * iodone side functions are called in a decrementing-index order, the
1905  * highest index number in each chain must be specified (as these correspond
1906  * to the first function in the iodone chain that will be called by the core
1907  * at IO completion time).
1908  */
1909 
1910 #define	SD_CHAIN_DISK_IODONE			2
1911 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1912 #define	SD_CHAIN_RMMEDIA_IODONE			8
1913 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1914 #define	SD_CHAIN_CHKSUM_IODONE			15
1915 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1916 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1917 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1918 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1919 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1920 
1921 
1922 
1923 
1924 /*
1925  * Array to map a layering chain index to the appropriate initpkt routine.
1926  * The redundant entries are present so that the index used for accessing
1927  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1928  * with this table as well.
1929  */
1930 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1931 
1932 static sd_initpkt_t	sd_initpkt_map[] = {
1933 
1934 	/* Chain for buf IO for disk drive targets (PM enabled) */
1935 	sd_initpkt_for_buf,		/* Index: 0 */
1936 	sd_initpkt_for_buf,		/* Index: 1 */
1937 	sd_initpkt_for_buf,		/* Index: 2 */
1938 
1939 	/* Chain for buf IO for disk drive targets (PM disabled) */
1940 	sd_initpkt_for_buf,		/* Index: 3 */
1941 	sd_initpkt_for_buf,		/* Index: 4 */
1942 
1943 	/* Chain for buf IO for removable-media targets (PM enabled) */
1944 	sd_initpkt_for_buf,		/* Index: 5 */
1945 	sd_initpkt_for_buf,		/* Index: 6 */
1946 	sd_initpkt_for_buf,		/* Index: 7 */
1947 	sd_initpkt_for_buf,		/* Index: 8 */
1948 
1949 	/* Chain for buf IO for removable-media targets (PM disabled) */
1950 	sd_initpkt_for_buf,		/* Index: 9 */
1951 	sd_initpkt_for_buf,		/* Index: 10 */
1952 	sd_initpkt_for_buf,		/* Index: 11 */
1953 
1954 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1955 	sd_initpkt_for_buf,		/* Index: 12 */
1956 	sd_initpkt_for_buf,		/* Index: 13 */
1957 	sd_initpkt_for_buf,		/* Index: 14 */
1958 	sd_initpkt_for_buf,		/* Index: 15 */
1959 
1960 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1961 	sd_initpkt_for_buf,		/* Index: 16 */
1962 	sd_initpkt_for_buf,		/* Index: 17 */
1963 	sd_initpkt_for_buf,		/* Index: 18 */
1964 
1965 	/* Chain for USCSI commands (non-checksum targets) */
1966 	sd_initpkt_for_uscsi,		/* Index: 19 */
1967 	sd_initpkt_for_uscsi,		/* Index: 20 */
1968 
1969 	/* Chain for USCSI commands (checksum targets) */
1970 	sd_initpkt_for_uscsi,		/* Index: 21 */
1971 	sd_initpkt_for_uscsi,		/* Index: 22 */
1972 	sd_initpkt_for_uscsi,		/* Index: 22 */
1973 
1974 	/* Chain for "direct" USCSI commands (all targets) */
1975 	sd_initpkt_for_uscsi,		/* Index: 24 */
1976 
1977 	/* Chain for "direct priority" USCSI commands (all targets) */
1978 	sd_initpkt_for_uscsi,		/* Index: 25 */
1979 
1980 };
1981 
1982 
1983 /*
1984  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1985  * The redundant entries are present so that the index used for accessing
1986  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1987  * with this table as well.
1988  */
1989 typedef void (*sd_destroypkt_t)(struct buf *);
1990 
1991 static sd_destroypkt_t	sd_destroypkt_map[] = {
1992 
1993 	/* Chain for buf IO for disk drive targets (PM enabled) */
1994 	sd_destroypkt_for_buf,		/* Index: 0 */
1995 	sd_destroypkt_for_buf,		/* Index: 1 */
1996 	sd_destroypkt_for_buf,		/* Index: 2 */
1997 
1998 	/* Chain for buf IO for disk drive targets (PM disabled) */
1999 	sd_destroypkt_for_buf,		/* Index: 3 */
2000 	sd_destroypkt_for_buf,		/* Index: 4 */
2001 
2002 	/* Chain for buf IO for removable-media targets (PM enabled) */
2003 	sd_destroypkt_for_buf,		/* Index: 5 */
2004 	sd_destroypkt_for_buf,		/* Index: 6 */
2005 	sd_destroypkt_for_buf,		/* Index: 7 */
2006 	sd_destroypkt_for_buf,		/* Index: 8 */
2007 
2008 	/* Chain for buf IO for removable-media targets (PM disabled) */
2009 	sd_destroypkt_for_buf,		/* Index: 9 */
2010 	sd_destroypkt_for_buf,		/* Index: 10 */
2011 	sd_destroypkt_for_buf,		/* Index: 11 */
2012 
2013 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2014 	sd_destroypkt_for_buf,		/* Index: 12 */
2015 	sd_destroypkt_for_buf,		/* Index: 13 */
2016 	sd_destroypkt_for_buf,		/* Index: 14 */
2017 	sd_destroypkt_for_buf,		/* Index: 15 */
2018 
2019 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2020 	sd_destroypkt_for_buf,		/* Index: 16 */
2021 	sd_destroypkt_for_buf,		/* Index: 17 */
2022 	sd_destroypkt_for_buf,		/* Index: 18 */
2023 
2024 	/* Chain for USCSI commands (non-checksum targets) */
2025 	sd_destroypkt_for_uscsi,	/* Index: 19 */
2026 	sd_destroypkt_for_uscsi,	/* Index: 20 */
2027 
2028 	/* Chain for USCSI commands (checksum targets) */
2029 	sd_destroypkt_for_uscsi,	/* Index: 21 */
2030 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2031 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2032 
2033 	/* Chain for "direct" USCSI commands (all targets) */
2034 	sd_destroypkt_for_uscsi,	/* Index: 24 */
2035 
2036 	/* Chain for "direct priority" USCSI commands (all targets) */
2037 	sd_destroypkt_for_uscsi,	/* Index: 25 */
2038 
2039 };
2040 
2041 
2042 
2043 /*
2044  * Array to map a layering chain index to the appropriate chain "type".
2045  * The chain type indicates a specific property/usage of the chain.
2046  * The redundant entries are present so that the index used for accessing
2047  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2048  * with this table as well.
2049  */
2050 
2051 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2052 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2053 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2054 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2055 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2056 						/* (for error recovery) */
2057 
2058 static int sd_chain_type_map[] = {
2059 
2060 	/* Chain for buf IO for disk drive targets (PM enabled) */
2061 	SD_CHAIN_BUFIO,			/* Index: 0 */
2062 	SD_CHAIN_BUFIO,			/* Index: 1 */
2063 	SD_CHAIN_BUFIO,			/* Index: 2 */
2064 
2065 	/* Chain for buf IO for disk drive targets (PM disabled) */
2066 	SD_CHAIN_BUFIO,			/* Index: 3 */
2067 	SD_CHAIN_BUFIO,			/* Index: 4 */
2068 
2069 	/* Chain for buf IO for removable-media targets (PM enabled) */
2070 	SD_CHAIN_BUFIO,			/* Index: 5 */
2071 	SD_CHAIN_BUFIO,			/* Index: 6 */
2072 	SD_CHAIN_BUFIO,			/* Index: 7 */
2073 	SD_CHAIN_BUFIO,			/* Index: 8 */
2074 
2075 	/* Chain for buf IO for removable-media targets (PM disabled) */
2076 	SD_CHAIN_BUFIO,			/* Index: 9 */
2077 	SD_CHAIN_BUFIO,			/* Index: 10 */
2078 	SD_CHAIN_BUFIO,			/* Index: 11 */
2079 
2080 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2081 	SD_CHAIN_BUFIO,			/* Index: 12 */
2082 	SD_CHAIN_BUFIO,			/* Index: 13 */
2083 	SD_CHAIN_BUFIO,			/* Index: 14 */
2084 	SD_CHAIN_BUFIO,			/* Index: 15 */
2085 
2086 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2087 	SD_CHAIN_BUFIO,			/* Index: 16 */
2088 	SD_CHAIN_BUFIO,			/* Index: 17 */
2089 	SD_CHAIN_BUFIO,			/* Index: 18 */
2090 
2091 	/* Chain for USCSI commands (non-checksum targets) */
2092 	SD_CHAIN_USCSI,			/* Index: 19 */
2093 	SD_CHAIN_USCSI,			/* Index: 20 */
2094 
2095 	/* Chain for USCSI commands (checksum targets) */
2096 	SD_CHAIN_USCSI,			/* Index: 21 */
2097 	SD_CHAIN_USCSI,			/* Index: 22 */
2098 	SD_CHAIN_USCSI,			/* Index: 22 */
2099 
2100 	/* Chain for "direct" USCSI commands (all targets) */
2101 	SD_CHAIN_DIRECT,		/* Index: 24 */
2102 
2103 	/* Chain for "direct priority" USCSI commands (all targets) */
2104 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2105 };
2106 
2107 
2108 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2109 #define	SD_IS_BUFIO(xp)			\
2110 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2111 
2112 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2113 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2114 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2115 
2116 
2117 
2118 /*
2119  * Struct, array, and macros to map a specific chain to the appropriate
2120  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2121  *
2122  * The sd_chain_index_map[] array is used at attach time to set the various
2123  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2124  * chain to be used with the instance. This allows different instances to use
2125  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2126  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2127  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2128  * dynamically & without the use of locking; and (2) a layer may update the
2129  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2130  * to allow for deferred processing of an IO within the same chain from a
2131  * different execution context.
2132  */
2133 
2134 struct sd_chain_index {
2135 	int	sci_iostart_index;
2136 	int	sci_iodone_index;
2137 };
2138 
2139 static struct sd_chain_index	sd_chain_index_map[] = {
2140 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2141 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2142 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2143 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2144 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2145 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2146 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2147 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2148 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2149 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2150 };
2151 
2152 
2153 /*
2154  * The following are indexes into the sd_chain_index_map[] array.
2155  */
2156 
2157 /* un->un_buf_chain_type must be set to one of these */
2158 #define	SD_CHAIN_INFO_DISK		0
2159 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2160 #define	SD_CHAIN_INFO_RMMEDIA		2
2161 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2162 #define	SD_CHAIN_INFO_CHKSUM		4
2163 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2164 
2165 /* un->un_uscsi_chain_type must be set to one of these */
2166 #define	SD_CHAIN_INFO_USCSI_CMD		6
2167 /* USCSI with PM disabled is the same as DIRECT */
2168 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2169 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2170 
2171 /* un->un_direct_chain_type must be set to one of these */
2172 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2173 
2174 /* un->un_priority_chain_type must be set to one of these */
2175 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2176 
2177 /* size for devid inquiries */
2178 #define	MAX_INQUIRY_SIZE		0xF0
2179 
2180 /*
2181  * Macros used by functions to pass a given buf(9S) struct along to the
2182  * next function in the layering chain for further processing.
2183  *
2184  * In the following macros, passing more than three arguments to the called
2185  * routines causes the optimizer for the SPARC compiler to stop doing tail
2186  * call elimination which results in significant performance degradation.
2187  */
2188 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2189 	((*(sd_iostart_chain[index]))(index, un, bp))
2190 
2191 #define	SD_BEGIN_IODONE(index, un, bp)	\
2192 	((*(sd_iodone_chain[index]))(index, un, bp))
2193 
2194 #define	SD_NEXT_IOSTART(index, un, bp)				\
2195 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2196 
2197 #define	SD_NEXT_IODONE(index, un, bp)				\
2198 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2199 
2200 /*
2201  *    Function: _init
2202  *
2203  * Description: This is the driver _init(9E) entry point.
2204  *
2205  * Return Code: Returns the value from mod_install(9F) or
2206  *		ddi_soft_state_init(9F) as appropriate.
2207  *
2208  *     Context: Called when driver module loaded.
2209  */
2210 
2211 int
2212 _init(void)
2213 {
2214 	int	err;
2215 
2216 	/* establish driver name from module name */
2217 	sd_label = (char *)mod_modname(&modlinkage);
2218 
2219 #ifndef XPV_HVM_DRIVER
2220 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2221 	    SD_MAXUNIT);
2222 	if (err != 0) {
2223 		return (err);
2224 	}
2225 
2226 #else /* XPV_HVM_DRIVER */
2227 	/* Remove the leading "hvm_" from the module name */
2228 	ASSERT(strncmp(sd_label, "hvm_", strlen("hvm_")) == 0);
2229 	sd_label += strlen("hvm_");
2230 
2231 #endif /* XPV_HVM_DRIVER */
2232 
2233 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2234 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2235 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2236 
2237 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2238 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2239 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2240 
2241 	/*
2242 	 * it's ok to init here even for fibre device
2243 	 */
2244 	sd_scsi_probe_cache_init();
2245 
2246 	sd_scsi_target_lun_init();
2247 
2248 	/*
2249 	 * Creating taskq before mod_install ensures that all callers (threads)
2250 	 * that enter the module after a successful mod_install encounter
2251 	 * a valid taskq.
2252 	 */
2253 	sd_taskq_create();
2254 
2255 	err = mod_install(&modlinkage);
2256 	if (err != 0) {
2257 		/* delete taskq if install fails */
2258 		sd_taskq_delete();
2259 
2260 		mutex_destroy(&sd_detach_mutex);
2261 		mutex_destroy(&sd_log_mutex);
2262 		mutex_destroy(&sd_label_mutex);
2263 
2264 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2265 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2266 		cv_destroy(&sd_tr.srq_inprocess_cv);
2267 
2268 		sd_scsi_probe_cache_fini();
2269 
2270 		sd_scsi_target_lun_fini();
2271 
2272 #ifndef XPV_HVM_DRIVER
2273 		ddi_soft_state_fini(&sd_state);
2274 #endif /* !XPV_HVM_DRIVER */
2275 		return (err);
2276 	}
2277 
2278 	return (err);
2279 }
2280 
2281 
2282 /*
2283  *    Function: _fini
2284  *
2285  * Description: This is the driver _fini(9E) entry point.
2286  *
2287  * Return Code: Returns the value from mod_remove(9F)
2288  *
2289  *     Context: Called when driver module is unloaded.
2290  */
2291 
2292 int
2293 _fini(void)
2294 {
2295 	int err;
2296 
2297 	if ((err = mod_remove(&modlinkage)) != 0) {
2298 		return (err);
2299 	}
2300 
2301 	sd_taskq_delete();
2302 
2303 	mutex_destroy(&sd_detach_mutex);
2304 	mutex_destroy(&sd_log_mutex);
2305 	mutex_destroy(&sd_label_mutex);
2306 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2307 
2308 	sd_scsi_probe_cache_fini();
2309 
2310 	sd_scsi_target_lun_fini();
2311 
2312 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2313 	cv_destroy(&sd_tr.srq_inprocess_cv);
2314 
2315 #ifndef XPV_HVM_DRIVER
2316 	ddi_soft_state_fini(&sd_state);
2317 #endif /* !XPV_HVM_DRIVER */
2318 
2319 	return (err);
2320 }
2321 
2322 
2323 /*
2324  *    Function: _info
2325  *
2326  * Description: This is the driver _info(9E) entry point.
2327  *
2328  *   Arguments: modinfop - pointer to the driver modinfo structure
2329  *
2330  * Return Code: Returns the value from mod_info(9F).
2331  *
2332  *     Context: Kernel thread context
2333  */
2334 
2335 int
2336 _info(struct modinfo *modinfop)
2337 {
2338 	return (mod_info(&modlinkage, modinfop));
2339 }
2340 
2341 
2342 /*
2343  * The following routines implement the driver message logging facility.
2344  * They provide component- and level- based debug output filtering.
2345  * Output may also be restricted to messages for a single instance by
2346  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2347  * to NULL, then messages for all instances are printed.
2348  *
2349  * These routines have been cloned from each other due to the language
2350  * constraints of macros and variable argument list processing.
2351  */
2352 
2353 
2354 /*
2355  *    Function: sd_log_err
2356  *
2357  * Description: This routine is called by the SD_ERROR macro for debug
2358  *		logging of error conditions.
2359  *
2360  *   Arguments: comp - driver component being logged
2361  *		dev  - pointer to driver info structure
2362  *		fmt  - error string and format to be logged
2363  */
2364 
2365 static void
2366 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2367 {
2368 	va_list		ap;
2369 	dev_info_t	*dev;
2370 
2371 	ASSERT(un != NULL);
2372 	dev = SD_DEVINFO(un);
2373 	ASSERT(dev != NULL);
2374 
2375 	/*
2376 	 * Filter messages based on the global component and level masks.
2377 	 * Also print if un matches the value of sd_debug_un, or if
2378 	 * sd_debug_un is set to NULL.
2379 	 */
2380 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2381 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2382 		mutex_enter(&sd_log_mutex);
2383 		va_start(ap, fmt);
2384 		(void) vsprintf(sd_log_buf, fmt, ap);
2385 		va_end(ap);
2386 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2387 		mutex_exit(&sd_log_mutex);
2388 	}
2389 #ifdef SD_FAULT_INJECTION
2390 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2391 	if (un->sd_injection_mask & comp) {
2392 		mutex_enter(&sd_log_mutex);
2393 		va_start(ap, fmt);
2394 		(void) vsprintf(sd_log_buf, fmt, ap);
2395 		va_end(ap);
2396 		sd_injection_log(sd_log_buf, un);
2397 		mutex_exit(&sd_log_mutex);
2398 	}
2399 #endif
2400 }
2401 
2402 
2403 /*
2404  *    Function: sd_log_info
2405  *
2406  * Description: This routine is called by the SD_INFO macro for debug
2407  *		logging of general purpose informational conditions.
2408  *
2409  *   Arguments: comp - driver component being logged
2410  *		dev  - pointer to driver info structure
2411  *		fmt  - info string and format to be logged
2412  */
2413 
2414 static void
2415 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2416 {
2417 	va_list		ap;
2418 	dev_info_t	*dev;
2419 
2420 	ASSERT(un != NULL);
2421 	dev = SD_DEVINFO(un);
2422 	ASSERT(dev != NULL);
2423 
2424 	/*
2425 	 * Filter messages based on the global component and level masks.
2426 	 * Also print if un matches the value of sd_debug_un, or if
2427 	 * sd_debug_un is set to NULL.
2428 	 */
2429 	if ((sd_component_mask & component) &&
2430 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2431 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2432 		mutex_enter(&sd_log_mutex);
2433 		va_start(ap, fmt);
2434 		(void) vsprintf(sd_log_buf, fmt, ap);
2435 		va_end(ap);
2436 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2437 		mutex_exit(&sd_log_mutex);
2438 	}
2439 #ifdef SD_FAULT_INJECTION
2440 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2441 	if (un->sd_injection_mask & component) {
2442 		mutex_enter(&sd_log_mutex);
2443 		va_start(ap, fmt);
2444 		(void) vsprintf(sd_log_buf, fmt, ap);
2445 		va_end(ap);
2446 		sd_injection_log(sd_log_buf, un);
2447 		mutex_exit(&sd_log_mutex);
2448 	}
2449 #endif
2450 }
2451 
2452 
2453 /*
2454  *    Function: sd_log_trace
2455  *
2456  * Description: This routine is called by the SD_TRACE macro for debug
2457  *		logging of trace conditions (i.e. function entry/exit).
2458  *
2459  *   Arguments: comp - driver component being logged
2460  *		dev  - pointer to driver info structure
2461  *		fmt  - trace string and format to be logged
2462  */
2463 
2464 static void
2465 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2466 {
2467 	va_list		ap;
2468 	dev_info_t	*dev;
2469 
2470 	ASSERT(un != NULL);
2471 	dev = SD_DEVINFO(un);
2472 	ASSERT(dev != NULL);
2473 
2474 	/*
2475 	 * Filter messages based on the global component and level masks.
2476 	 * Also print if un matches the value of sd_debug_un, or if
2477 	 * sd_debug_un is set to NULL.
2478 	 */
2479 	if ((sd_component_mask & component) &&
2480 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2481 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2482 		mutex_enter(&sd_log_mutex);
2483 		va_start(ap, fmt);
2484 		(void) vsprintf(sd_log_buf, fmt, ap);
2485 		va_end(ap);
2486 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2487 		mutex_exit(&sd_log_mutex);
2488 	}
2489 #ifdef SD_FAULT_INJECTION
2490 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2491 	if (un->sd_injection_mask & component) {
2492 		mutex_enter(&sd_log_mutex);
2493 		va_start(ap, fmt);
2494 		(void) vsprintf(sd_log_buf, fmt, ap);
2495 		va_end(ap);
2496 		sd_injection_log(sd_log_buf, un);
2497 		mutex_exit(&sd_log_mutex);
2498 	}
2499 #endif
2500 }
2501 
2502 
2503 /*
2504  *    Function: sdprobe
2505  *
2506  * Description: This is the driver probe(9e) entry point function.
2507  *
2508  *   Arguments: devi - opaque device info handle
2509  *
2510  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2511  *              DDI_PROBE_FAILURE: If the probe failed.
2512  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2513  *				   but may be present in the future.
2514  */
2515 
2516 static int
2517 sdprobe(dev_info_t *devi)
2518 {
2519 	struct scsi_device	*devp;
2520 	int			rval;
2521 #ifndef XPV_HVM_DRIVER
2522 	int			instance = ddi_get_instance(devi);
2523 #endif /* !XPV_HVM_DRIVER */
2524 
2525 	/*
2526 	 * if it wasn't for pln, sdprobe could actually be nulldev
2527 	 * in the "__fibre" case.
2528 	 */
2529 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2530 		return (DDI_PROBE_DONTCARE);
2531 	}
2532 
2533 	devp = ddi_get_driver_private(devi);
2534 
2535 	if (devp == NULL) {
2536 		/* Ooops... nexus driver is mis-configured... */
2537 		return (DDI_PROBE_FAILURE);
2538 	}
2539 
2540 #ifndef XPV_HVM_DRIVER
2541 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2542 		return (DDI_PROBE_PARTIAL);
2543 	}
2544 #endif /* !XPV_HVM_DRIVER */
2545 
2546 	/*
2547 	 * Call the SCSA utility probe routine to see if we actually
2548 	 * have a target at this SCSI nexus.
2549 	 */
2550 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2551 	case SCSIPROBE_EXISTS:
2552 		switch (devp->sd_inq->inq_dtype) {
2553 		case DTYPE_DIRECT:
2554 			rval = DDI_PROBE_SUCCESS;
2555 			break;
2556 		case DTYPE_RODIRECT:
2557 			/* CDs etc. Can be removable media */
2558 			rval = DDI_PROBE_SUCCESS;
2559 			break;
2560 		case DTYPE_OPTICAL:
2561 			/*
2562 			 * Rewritable optical driver HP115AA
2563 			 * Can also be removable media
2564 			 */
2565 
2566 			/*
2567 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2568 			 * pre solaris 9 sparc sd behavior is required
2569 			 *
2570 			 * If first time through and sd_dtype_optical_bind
2571 			 * has not been set in /etc/system check properties
2572 			 */
2573 
2574 			if (sd_dtype_optical_bind  < 0) {
2575 				sd_dtype_optical_bind = ddi_prop_get_int
2576 				    (DDI_DEV_T_ANY, devi, 0,
2577 				    "optical-device-bind", 1);
2578 			}
2579 
2580 			if (sd_dtype_optical_bind == 0) {
2581 				rval = DDI_PROBE_FAILURE;
2582 			} else {
2583 				rval = DDI_PROBE_SUCCESS;
2584 			}
2585 			break;
2586 
2587 		case DTYPE_NOTPRESENT:
2588 		default:
2589 			rval = DDI_PROBE_FAILURE;
2590 			break;
2591 		}
2592 		break;
2593 	default:
2594 		rval = DDI_PROBE_PARTIAL;
2595 		break;
2596 	}
2597 
2598 	/*
2599 	 * This routine checks for resource allocation prior to freeing,
2600 	 * so it will take care of the "smart probing" case where a
2601 	 * scsi_probe() may or may not have been issued and will *not*
2602 	 * free previously-freed resources.
2603 	 */
2604 	scsi_unprobe(devp);
2605 	return (rval);
2606 }
2607 
2608 
2609 /*
2610  *    Function: sdinfo
2611  *
2612  * Description: This is the driver getinfo(9e) entry point function.
2613  * 		Given the device number, return the devinfo pointer from
2614  *		the scsi_device structure or the instance number
2615  *		associated with the dev_t.
2616  *
2617  *   Arguments: dip     - pointer to device info structure
2618  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2619  *			  DDI_INFO_DEVT2INSTANCE)
2620  *		arg     - driver dev_t
2621  *		resultp - user buffer for request response
2622  *
2623  * Return Code: DDI_SUCCESS
2624  *              DDI_FAILURE
2625  */
2626 /* ARGSUSED */
2627 static int
2628 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2629 {
2630 	struct sd_lun	*un;
2631 	dev_t		dev;
2632 	int		instance;
2633 	int		error;
2634 
2635 	switch (infocmd) {
2636 	case DDI_INFO_DEVT2DEVINFO:
2637 		dev = (dev_t)arg;
2638 		instance = SDUNIT(dev);
2639 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2640 			return (DDI_FAILURE);
2641 		}
2642 		*result = (void *) SD_DEVINFO(un);
2643 		error = DDI_SUCCESS;
2644 		break;
2645 	case DDI_INFO_DEVT2INSTANCE:
2646 		dev = (dev_t)arg;
2647 		instance = SDUNIT(dev);
2648 		*result = (void *)(uintptr_t)instance;
2649 		error = DDI_SUCCESS;
2650 		break;
2651 	default:
2652 		error = DDI_FAILURE;
2653 	}
2654 	return (error);
2655 }
2656 
2657 /*
2658  *    Function: sd_prop_op
2659  *
2660  * Description: This is the driver prop_op(9e) entry point function.
2661  *		Return the number of blocks for the partition in question
2662  *		or forward the request to the property facilities.
2663  *
2664  *   Arguments: dev       - device number
2665  *		dip       - pointer to device info structure
2666  *		prop_op   - property operator
2667  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2668  *		name      - pointer to property name
2669  *		valuep    - pointer or address of the user buffer
2670  *		lengthp   - property length
2671  *
2672  * Return Code: DDI_PROP_SUCCESS
2673  *              DDI_PROP_NOT_FOUND
2674  *              DDI_PROP_UNDEFINED
2675  *              DDI_PROP_NO_MEMORY
2676  *              DDI_PROP_BUF_TOO_SMALL
2677  */
2678 
2679 static int
2680 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2681 	char *name, caddr_t valuep, int *lengthp)
2682 {
2683 	struct sd_lun	*un;
2684 
2685 	if ((un = ddi_get_soft_state(sd_state, ddi_get_instance(dip))) == NULL)
2686 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2687 		    name, valuep, lengthp));
2688 
2689 	return (cmlb_prop_op(un->un_cmlbhandle,
2690 	    dev, dip, prop_op, mod_flags, name, valuep, lengthp,
2691 	    SDPART(dev), (void *)SD_PATH_DIRECT));
2692 }
2693 
2694 /*
2695  * The following functions are for smart probing:
2696  * sd_scsi_probe_cache_init()
2697  * sd_scsi_probe_cache_fini()
2698  * sd_scsi_clear_probe_cache()
2699  * sd_scsi_probe_with_cache()
2700  */
2701 
2702 /*
2703  *    Function: sd_scsi_probe_cache_init
2704  *
2705  * Description: Initializes the probe response cache mutex and head pointer.
2706  *
2707  *     Context: Kernel thread context
2708  */
2709 
2710 static void
2711 sd_scsi_probe_cache_init(void)
2712 {
2713 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2714 	sd_scsi_probe_cache_head = NULL;
2715 }
2716 
2717 
2718 /*
2719  *    Function: sd_scsi_probe_cache_fini
2720  *
2721  * Description: Frees all resources associated with the probe response cache.
2722  *
2723  *     Context: Kernel thread context
2724  */
2725 
2726 static void
2727 sd_scsi_probe_cache_fini(void)
2728 {
2729 	struct sd_scsi_probe_cache *cp;
2730 	struct sd_scsi_probe_cache *ncp;
2731 
2732 	/* Clean up our smart probing linked list */
2733 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2734 		ncp = cp->next;
2735 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2736 	}
2737 	sd_scsi_probe_cache_head = NULL;
2738 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2739 }
2740 
2741 
2742 /*
2743  *    Function: sd_scsi_clear_probe_cache
2744  *
2745  * Description: This routine clears the probe response cache. This is
2746  *		done when open() returns ENXIO so that when deferred
2747  *		attach is attempted (possibly after a device has been
2748  *		turned on) we will retry the probe. Since we don't know
2749  *		which target we failed to open, we just clear the
2750  *		entire cache.
2751  *
2752  *     Context: Kernel thread context
2753  */
2754 
2755 static void
2756 sd_scsi_clear_probe_cache(void)
2757 {
2758 	struct sd_scsi_probe_cache	*cp;
2759 	int				i;
2760 
2761 	mutex_enter(&sd_scsi_probe_cache_mutex);
2762 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2763 		/*
2764 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2765 		 * force probing to be performed the next time
2766 		 * sd_scsi_probe_with_cache is called.
2767 		 */
2768 		for (i = 0; i < NTARGETS_WIDE; i++) {
2769 			cp->cache[i] = SCSIPROBE_EXISTS;
2770 		}
2771 	}
2772 	mutex_exit(&sd_scsi_probe_cache_mutex);
2773 }
2774 
2775 
2776 /*
2777  *    Function: sd_scsi_probe_with_cache
2778  *
2779  * Description: This routine implements support for a scsi device probe
2780  *		with cache. The driver maintains a cache of the target
2781  *		responses to scsi probes. If we get no response from a
2782  *		target during a probe inquiry, we remember that, and we
2783  *		avoid additional calls to scsi_probe on non-zero LUNs
2784  *		on the same target until the cache is cleared. By doing
2785  *		so we avoid the 1/4 sec selection timeout for nonzero
2786  *		LUNs. lun0 of a target is always probed.
2787  *
2788  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2789  *              waitfunc - indicates what the allocator routines should
2790  *			   do when resources are not available. This value
2791  *			   is passed on to scsi_probe() when that routine
2792  *			   is called.
2793  *
2794  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2795  *		otherwise the value returned by scsi_probe(9F).
2796  *
2797  *     Context: Kernel thread context
2798  */
2799 
2800 static int
2801 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2802 {
2803 	struct sd_scsi_probe_cache	*cp;
2804 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2805 	int		lun, tgt;
2806 
2807 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2808 	    SCSI_ADDR_PROP_LUN, 0);
2809 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2810 	    SCSI_ADDR_PROP_TARGET, -1);
2811 
2812 	/* Make sure caching enabled and target in range */
2813 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2814 		/* do it the old way (no cache) */
2815 		return (scsi_probe(devp, waitfn));
2816 	}
2817 
2818 	mutex_enter(&sd_scsi_probe_cache_mutex);
2819 
2820 	/* Find the cache for this scsi bus instance */
2821 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2822 		if (cp->pdip == pdip) {
2823 			break;
2824 		}
2825 	}
2826 
2827 	/* If we can't find a cache for this pdip, create one */
2828 	if (cp == NULL) {
2829 		int i;
2830 
2831 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2832 		    KM_SLEEP);
2833 		cp->pdip = pdip;
2834 		cp->next = sd_scsi_probe_cache_head;
2835 		sd_scsi_probe_cache_head = cp;
2836 		for (i = 0; i < NTARGETS_WIDE; i++) {
2837 			cp->cache[i] = SCSIPROBE_EXISTS;
2838 		}
2839 	}
2840 
2841 	mutex_exit(&sd_scsi_probe_cache_mutex);
2842 
2843 	/* Recompute the cache for this target if LUN zero */
2844 	if (lun == 0) {
2845 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2846 	}
2847 
2848 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2849 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2850 		return (SCSIPROBE_NORESP);
2851 	}
2852 
2853 	/* Do the actual probe; save & return the result */
2854 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2855 }
2856 
2857 
2858 /*
2859  *    Function: sd_scsi_target_lun_init
2860  *
2861  * Description: Initializes the attached lun chain mutex and head pointer.
2862  *
2863  *     Context: Kernel thread context
2864  */
2865 
2866 static void
2867 sd_scsi_target_lun_init(void)
2868 {
2869 	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
2870 	sd_scsi_target_lun_head = NULL;
2871 }
2872 
2873 
2874 /*
2875  *    Function: sd_scsi_target_lun_fini
2876  *
2877  * Description: Frees all resources associated with the attached lun
2878  *              chain
2879  *
2880  *     Context: Kernel thread context
2881  */
2882 
2883 static void
2884 sd_scsi_target_lun_fini(void)
2885 {
2886 	struct sd_scsi_hba_tgt_lun	*cp;
2887 	struct sd_scsi_hba_tgt_lun	*ncp;
2888 
2889 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
2890 		ncp = cp->next;
2891 		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
2892 	}
2893 	sd_scsi_target_lun_head = NULL;
2894 	mutex_destroy(&sd_scsi_target_lun_mutex);
2895 }
2896 
2897 
2898 /*
2899  *    Function: sd_scsi_get_target_lun_count
2900  *
2901  * Description: This routine will check in the attached lun chain to see
2902  * 		how many luns are attached on the required SCSI controller
2903  * 		and target. Currently, some capabilities like tagged queue
2904  *		are supported per target based by HBA. So all luns in a
2905  *		target have the same capabilities. Based on this assumption,
2906  * 		sd should only set these capabilities once per target. This
2907  *		function is called when sd needs to decide how many luns
2908  *		already attached on a target.
2909  *
2910  *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
2911  *			  controller device.
2912  *              target	- The target ID on the controller's SCSI bus.
2913  *
2914  * Return Code: The number of luns attached on the required target and
2915  *		controller.
2916  *		-1 if target ID is not in parallel SCSI scope or the given
2917  * 		dip is not in the chain.
2918  *
2919  *     Context: Kernel thread context
2920  */
2921 
2922 static int
2923 sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
2924 {
2925 	struct sd_scsi_hba_tgt_lun	*cp;
2926 
2927 	if ((target < 0) || (target >= NTARGETS_WIDE)) {
2928 		return (-1);
2929 	}
2930 
2931 	mutex_enter(&sd_scsi_target_lun_mutex);
2932 
2933 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2934 		if (cp->pdip == dip) {
2935 			break;
2936 		}
2937 	}
2938 
2939 	mutex_exit(&sd_scsi_target_lun_mutex);
2940 
2941 	if (cp == NULL) {
2942 		return (-1);
2943 	}
2944 
2945 	return (cp->nlun[target]);
2946 }
2947 
2948 
2949 /*
2950  *    Function: sd_scsi_update_lun_on_target
2951  *
2952  * Description: This routine is used to update the attached lun chain when a
2953  *		lun is attached or detached on a target.
2954  *
2955  *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
2956  *                        controller device.
2957  *              target  - The target ID on the controller's SCSI bus.
2958  *		flag	- Indicate the lun is attached or detached.
2959  *
2960  *     Context: Kernel thread context
2961  */
2962 
2963 static void
2964 sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
2965 {
2966 	struct sd_scsi_hba_tgt_lun	*cp;
2967 
2968 	mutex_enter(&sd_scsi_target_lun_mutex);
2969 
2970 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2971 		if (cp->pdip == dip) {
2972 			break;
2973 		}
2974 	}
2975 
2976 	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
2977 		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
2978 		    KM_SLEEP);
2979 		cp->pdip = dip;
2980 		cp->next = sd_scsi_target_lun_head;
2981 		sd_scsi_target_lun_head = cp;
2982 	}
2983 
2984 	mutex_exit(&sd_scsi_target_lun_mutex);
2985 
2986 	if (cp != NULL) {
2987 		if (flag == SD_SCSI_LUN_ATTACH) {
2988 			cp->nlun[target] ++;
2989 		} else {
2990 			cp->nlun[target] --;
2991 		}
2992 	}
2993 }
2994 
2995 
2996 /*
2997  *    Function: sd_spin_up_unit
2998  *
2999  * Description: Issues the following commands to spin-up the device:
3000  *		START STOP UNIT, and INQUIRY.
3001  *
3002  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3003  *                      structure for this target.
3004  *
3005  * Return Code: 0 - success
3006  *		EIO - failure
3007  *		EACCES - reservation conflict
3008  *
3009  *     Context: Kernel thread context
3010  */
3011 
3012 static int
3013 sd_spin_up_unit(sd_ssc_t *ssc)
3014 {
3015 	size_t	resid		= 0;
3016 	int	has_conflict	= FALSE;
3017 	uchar_t *bufaddr;
3018 	int 	status;
3019 	struct sd_lun	*un;
3020 
3021 	ASSERT(ssc != NULL);
3022 	un = ssc->ssc_un;
3023 	ASSERT(un != NULL);
3024 
3025 	/*
3026 	 * Send a throwaway START UNIT command.
3027 	 *
3028 	 * If we fail on this, we don't care presently what precisely
3029 	 * is wrong.  EMC's arrays will also fail this with a check
3030 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
3031 	 * we don't want to fail the attach because it may become
3032 	 * "active" later.
3033 	 */
3034 	status = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_START,
3035 	    SD_PATH_DIRECT);
3036 
3037 	if (status != 0) {
3038 		if (status == EACCES)
3039 			has_conflict = TRUE;
3040 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3041 	}
3042 
3043 	/*
3044 	 * Send another INQUIRY command to the target. This is necessary for
3045 	 * non-removable media direct access devices because their INQUIRY data
3046 	 * may not be fully qualified until they are spun up (perhaps via the
3047 	 * START command above).  Note: This seems to be needed for some
3048 	 * legacy devices only.) The INQUIRY command should succeed even if a
3049 	 * Reservation Conflict is present.
3050 	 */
3051 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
3052 
3053 	if (sd_send_scsi_INQUIRY(ssc, bufaddr, SUN_INQSIZE, 0, 0, &resid)
3054 	    != 0) {
3055 		kmem_free(bufaddr, SUN_INQSIZE);
3056 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
3057 		return (EIO);
3058 	}
3059 
3060 	/*
3061 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
3062 	 * Note that this routine does not return a failure here even if the
3063 	 * INQUIRY command did not return any data.  This is a legacy behavior.
3064 	 */
3065 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
3066 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
3067 	}
3068 
3069 	kmem_free(bufaddr, SUN_INQSIZE);
3070 
3071 	/* If we hit a reservation conflict above, tell the caller. */
3072 	if (has_conflict == TRUE) {
3073 		return (EACCES);
3074 	}
3075 
3076 	return (0);
3077 }
3078 
3079 #ifdef _LP64
3080 /*
3081  *    Function: sd_enable_descr_sense
3082  *
3083  * Description: This routine attempts to select descriptor sense format
3084  *		using the Control mode page.  Devices that support 64 bit
3085  *		LBAs (for >2TB luns) should also implement descriptor
3086  *		sense data so we will call this function whenever we see
3087  *		a lun larger than 2TB.  If for some reason the device
3088  *		supports 64 bit LBAs but doesn't support descriptor sense
3089  *		presumably the mode select will fail.  Everything will
3090  *		continue to work normally except that we will not get
3091  *		complete sense data for commands that fail with an LBA
3092  *		larger than 32 bits.
3093  *
3094  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3095  *                      structure for this target.
3096  *
3097  *     Context: Kernel thread context only
3098  */
3099 
3100 static void
3101 sd_enable_descr_sense(sd_ssc_t *ssc)
3102 {
3103 	uchar_t			*header;
3104 	struct mode_control_scsi3 *ctrl_bufp;
3105 	size_t			buflen;
3106 	size_t			bd_len;
3107 	int			status;
3108 	struct sd_lun		*un;
3109 
3110 	ASSERT(ssc != NULL);
3111 	un = ssc->ssc_un;
3112 	ASSERT(un != NULL);
3113 
3114 	/*
3115 	 * Read MODE SENSE page 0xA, Control Mode Page
3116 	 */
3117 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
3118 	    sizeof (struct mode_control_scsi3);
3119 	header = kmem_zalloc(buflen, KM_SLEEP);
3120 
3121 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
3122 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT);
3123 
3124 	if (status != 0) {
3125 		SD_ERROR(SD_LOG_COMMON, un,
3126 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
3127 		goto eds_exit;
3128 	}
3129 
3130 	/*
3131 	 * Determine size of Block Descriptors in order to locate
3132 	 * the mode page data. ATAPI devices return 0, SCSI devices
3133 	 * should return MODE_BLK_DESC_LENGTH.
3134 	 */
3135 	bd_len  = ((struct mode_header *)header)->bdesc_length;
3136 
3137 	/* Clear the mode data length field for MODE SELECT */
3138 	((struct mode_header *)header)->length = 0;
3139 
3140 	ctrl_bufp = (struct mode_control_scsi3 *)
3141 	    (header + MODE_HEADER_LENGTH + bd_len);
3142 
3143 	/*
3144 	 * If the page length is smaller than the expected value,
3145 	 * the target device doesn't support D_SENSE. Bail out here.
3146 	 */
3147 	if (ctrl_bufp->mode_page.length <
3148 	    sizeof (struct mode_control_scsi3) - 2) {
3149 		SD_ERROR(SD_LOG_COMMON, un,
3150 		    "sd_enable_descr_sense: enable D_SENSE failed\n");
3151 		goto eds_exit;
3152 	}
3153 
3154 	/*
3155 	 * Clear PS bit for MODE SELECT
3156 	 */
3157 	ctrl_bufp->mode_page.ps = 0;
3158 
3159 	/*
3160 	 * Set D_SENSE to enable descriptor sense format.
3161 	 */
3162 	ctrl_bufp->d_sense = 1;
3163 
3164 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3165 
3166 	/*
3167 	 * Use MODE SELECT to commit the change to the D_SENSE bit
3168 	 */
3169 	status = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, header,
3170 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT);
3171 
3172 	if (status != 0) {
3173 		SD_INFO(SD_LOG_COMMON, un,
3174 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
3175 	} else {
3176 		kmem_free(header, buflen);
3177 		return;
3178 	}
3179 
3180 eds_exit:
3181 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3182 	kmem_free(header, buflen);
3183 }
3184 
3185 /*
3186  *    Function: sd_reenable_dsense_task
3187  *
3188  * Description: Re-enable descriptor sense after device or bus reset
3189  *
3190  *     Context: Executes in a taskq() thread context
3191  */
3192 static void
3193 sd_reenable_dsense_task(void *arg)
3194 {
3195 	struct	sd_lun	*un = arg;
3196 	sd_ssc_t	*ssc;
3197 
3198 	ASSERT(un != NULL);
3199 
3200 	ssc = sd_ssc_init(un);
3201 	sd_enable_descr_sense(ssc);
3202 	sd_ssc_fini(ssc);
3203 }
3204 #endif /* _LP64 */
3205 
3206 /*
3207  *    Function: sd_set_mmc_caps
3208  *
3209  * Description: This routine determines if the device is MMC compliant and if
3210  *		the device supports CDDA via a mode sense of the CDVD
3211  *		capabilities mode page. Also checks if the device is a
3212  *		dvdram writable device.
3213  *
3214  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3215  *                      structure for this target.
3216  *
3217  *     Context: Kernel thread context only
3218  */
3219 
3220 static void
3221 sd_set_mmc_caps(sd_ssc_t *ssc)
3222 {
3223 	struct mode_header_grp2		*sense_mhp;
3224 	uchar_t				*sense_page;
3225 	caddr_t				buf;
3226 	int				bd_len;
3227 	int				status;
3228 	struct uscsi_cmd		com;
3229 	int				rtn;
3230 	uchar_t				*out_data_rw, *out_data_hd;
3231 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3232 	struct sd_lun			*un;
3233 
3234 	ASSERT(ssc != NULL);
3235 	un = ssc->ssc_un;
3236 	ASSERT(un != NULL);
3237 
3238 	/*
3239 	 * The flags which will be set in this function are - mmc compliant,
3240 	 * dvdram writable device, cdda support. Initialize them to FALSE
3241 	 * and if a capability is detected - it will be set to TRUE.
3242 	 */
3243 	un->un_f_mmc_cap = FALSE;
3244 	un->un_f_dvdram_writable_device = FALSE;
3245 	un->un_f_cfg_cdda = FALSE;
3246 
3247 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3248 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3249 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3250 
3251 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3252 
3253 	if (status != 0) {
3254 		/* command failed; just return */
3255 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3256 		return;
3257 	}
3258 	/*
3259 	 * If the mode sense request for the CDROM CAPABILITIES
3260 	 * page (0x2A) succeeds the device is assumed to be MMC.
3261 	 */
3262 	un->un_f_mmc_cap = TRUE;
3263 
3264 	/* Get to the page data */
3265 	sense_mhp = (struct mode_header_grp2 *)buf;
3266 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3267 	    sense_mhp->bdesc_length_lo;
3268 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3269 		/*
3270 		 * We did not get back the expected block descriptor
3271 		 * length so we cannot determine if the device supports
3272 		 * CDDA. However, we still indicate the device is MMC
3273 		 * according to the successful response to the page
3274 		 * 0x2A mode sense request.
3275 		 */
3276 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3277 		    "sd_set_mmc_caps: Mode Sense returned "
3278 		    "invalid block descriptor length\n");
3279 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3280 		return;
3281 	}
3282 
3283 	/* See if read CDDA is supported */
3284 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3285 	    bd_len);
3286 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3287 
3288 	/* See if writing DVD RAM is supported. */
3289 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3290 	if (un->un_f_dvdram_writable_device == TRUE) {
3291 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3292 		return;
3293 	}
3294 
3295 	/*
3296 	 * If the device presents DVD or CD capabilities in the mode
3297 	 * page, we can return here since a RRD will not have
3298 	 * these capabilities.
3299 	 */
3300 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3301 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3302 		return;
3303 	}
3304 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3305 
3306 	/*
3307 	 * If un->un_f_dvdram_writable_device is still FALSE,
3308 	 * check for a Removable Rigid Disk (RRD).  A RRD
3309 	 * device is identified by the features RANDOM_WRITABLE and
3310 	 * HARDWARE_DEFECT_MANAGEMENT.
3311 	 */
3312 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3313 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3314 
3315 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3316 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3317 	    RANDOM_WRITABLE, SD_PATH_STANDARD);
3318 
3319 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3320 
3321 	if (rtn != 0) {
3322 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3323 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3324 		return;
3325 	}
3326 
3327 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3328 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3329 
3330 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3331 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3332 	    HARDWARE_DEFECT_MANAGEMENT, SD_PATH_STANDARD);
3333 
3334 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3335 
3336 	if (rtn == 0) {
3337 		/*
3338 		 * We have good information, check for random writable
3339 		 * and hardware defect features.
3340 		 */
3341 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3342 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3343 			un->un_f_dvdram_writable_device = TRUE;
3344 		}
3345 	}
3346 
3347 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3348 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3349 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3350 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3351 }
3352 
3353 /*
3354  *    Function: sd_check_for_writable_cd
3355  *
3356  * Description: This routine determines if the media in the device is
3357  *		writable or not. It uses the get configuration command (0x46)
3358  *		to determine if the media is writable
3359  *
3360  *   Arguments: un - driver soft state (unit) structure
3361  *              path_flag - SD_PATH_DIRECT to use the USCSI "direct"
3362  *                           chain and the normal command waitq, or
3363  *                           SD_PATH_DIRECT_PRIORITY to use the USCSI
3364  *                           "direct" chain and bypass the normal command
3365  *                           waitq.
3366  *
3367  *     Context: Never called at interrupt context.
3368  */
3369 
3370 static void
3371 sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag)
3372 {
3373 	struct uscsi_cmd		com;
3374 	uchar_t				*out_data;
3375 	uchar_t				*rqbuf;
3376 	int				rtn;
3377 	uchar_t				*out_data_rw, *out_data_hd;
3378 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3379 	struct mode_header_grp2		*sense_mhp;
3380 	uchar_t				*sense_page;
3381 	caddr_t				buf;
3382 	int				bd_len;
3383 	int				status;
3384 	struct sd_lun			*un;
3385 
3386 	ASSERT(ssc != NULL);
3387 	un = ssc->ssc_un;
3388 	ASSERT(un != NULL);
3389 	ASSERT(mutex_owned(SD_MUTEX(un)));
3390 
3391 	/*
3392 	 * Initialize the writable media to false, if configuration info.
3393 	 * tells us otherwise then only we will set it.
3394 	 */
3395 	un->un_f_mmc_writable_media = FALSE;
3396 	mutex_exit(SD_MUTEX(un));
3397 
3398 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3399 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3400 
3401 	rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf, SENSE_LENGTH,
3402 	    out_data, SD_PROFILE_HEADER_LEN, path_flag);
3403 
3404 	if (rtn != 0)
3405 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3406 
3407 	mutex_enter(SD_MUTEX(un));
3408 	if (rtn == 0) {
3409 		/*
3410 		 * We have good information, check for writable DVD.
3411 		 */
3412 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3413 			un->un_f_mmc_writable_media = TRUE;
3414 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3415 			kmem_free(rqbuf, SENSE_LENGTH);
3416 			return;
3417 		}
3418 	}
3419 
3420 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3421 	kmem_free(rqbuf, SENSE_LENGTH);
3422 
3423 	/*
3424 	 * Determine if this is a RRD type device.
3425 	 */
3426 	mutex_exit(SD_MUTEX(un));
3427 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3428 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3429 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, path_flag);
3430 
3431 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3432 
3433 	mutex_enter(SD_MUTEX(un));
3434 	if (status != 0) {
3435 		/* command failed; just return */
3436 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3437 		return;
3438 	}
3439 
3440 	/* Get to the page data */
3441 	sense_mhp = (struct mode_header_grp2 *)buf;
3442 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3443 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3444 		/*
3445 		 * We did not get back the expected block descriptor length so
3446 		 * we cannot check the mode page.
3447 		 */
3448 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3449 		    "sd_check_for_writable_cd: Mode Sense returned "
3450 		    "invalid block descriptor length\n");
3451 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3452 		return;
3453 	}
3454 
3455 	/*
3456 	 * If the device presents DVD or CD capabilities in the mode
3457 	 * page, we can return here since a RRD device will not have
3458 	 * these capabilities.
3459 	 */
3460 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3461 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3462 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3463 		return;
3464 	}
3465 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3466 
3467 	/*
3468 	 * If un->un_f_mmc_writable_media is still FALSE,
3469 	 * check for RRD type media.  A RRD device is identified
3470 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3471 	 */
3472 	mutex_exit(SD_MUTEX(un));
3473 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3474 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3475 
3476 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3477 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3478 	    RANDOM_WRITABLE, path_flag);
3479 
3480 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3481 	if (rtn != 0) {
3482 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3483 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3484 		mutex_enter(SD_MUTEX(un));
3485 		return;
3486 	}
3487 
3488 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3489 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3490 
3491 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3492 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3493 	    HARDWARE_DEFECT_MANAGEMENT, path_flag);
3494 
3495 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3496 	mutex_enter(SD_MUTEX(un));
3497 	if (rtn == 0) {
3498 		/*
3499 		 * We have good information, check for random writable
3500 		 * and hardware defect features as current.
3501 		 */
3502 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3503 		    (out_data_rw[10] & 0x1) &&
3504 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3505 		    (out_data_hd[10] & 0x1)) {
3506 			un->un_f_mmc_writable_media = TRUE;
3507 		}
3508 	}
3509 
3510 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3511 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3512 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3513 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3514 }
3515 
3516 /*
3517  *    Function: sd_read_unit_properties
3518  *
3519  * Description: The following implements a property lookup mechanism.
3520  *		Properties for particular disks (keyed on vendor, model
3521  *		and rev numbers) are sought in the sd.conf file via
3522  *		sd_process_sdconf_file(), and if not found there, are
3523  *		looked for in a list hardcoded in this driver via
3524  *		sd_process_sdconf_table() Once located the properties
3525  *		are used to update the driver unit structure.
3526  *
3527  *   Arguments: un - driver soft state (unit) structure
3528  */
3529 
3530 static void
3531 sd_read_unit_properties(struct sd_lun *un)
3532 {
3533 	/*
3534 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3535 	 * the "sd-config-list" property (from the sd.conf file) or if
3536 	 * there was not a match for the inquiry vid/pid. If this event
3537 	 * occurs the static driver configuration table is searched for
3538 	 * a match.
3539 	 */
3540 	ASSERT(un != NULL);
3541 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3542 		sd_process_sdconf_table(un);
3543 	}
3544 
3545 	/* check for LSI device */
3546 	sd_is_lsi(un);
3547 
3548 
3549 }
3550 
3551 
3552 /*
3553  *    Function: sd_process_sdconf_file
3554  *
3555  * Description: Use ddi_prop_lookup(9F) to obtain the properties from the
3556  *		driver's config file (ie, sd.conf) and update the driver
3557  *		soft state structure accordingly.
3558  *
3559  *   Arguments: un - driver soft state (unit) structure
3560  *
3561  * Return Code: SD_SUCCESS - The properties were successfully set according
3562  *			     to the driver configuration file.
3563  *		SD_FAILURE - The driver config list was not obtained or
3564  *			     there was no vid/pid match. This indicates that
3565  *			     the static config table should be used.
3566  *
3567  * The config file has a property, "sd-config-list". Currently we support
3568  * two kinds of formats. For both formats, the value of this property
3569  * is a list of duplets:
3570  *
3571  *  sd-config-list=
3572  *	<duplet>,
3573  *	[,<duplet>]*;
3574  *
3575  * For the improved format, where
3576  *
3577  *     <duplet>:= "<vid+pid>","<tunable-list>"
3578  *
3579  * and
3580  *
3581  *     <tunable-list>:=   <tunable> [, <tunable> ]*;
3582  *     <tunable> =        <name> : <value>
3583  *
3584  * The <vid+pid> is the string that is returned by the target device on a
3585  * SCSI inquiry command, the <tunable-list> contains one or more tunables
3586  * to apply to all target devices with the specified <vid+pid>.
3587  *
3588  * Each <tunable> is a "<name> : <value>" pair.
3589  *
3590  * For the old format, the structure of each duplet is as follows:
3591  *
3592  *  <duplet>:= "<vid+pid>","<data-property-name_list>"
3593  *
3594  * The first entry of the duplet is the device ID string (the concatenated
3595  * vid & pid; not to be confused with a device_id).  This is defined in
3596  * the same way as in the sd_disk_table.
3597  *
3598  * The second part of the duplet is a string that identifies a
3599  * data-property-name-list. The data-property-name-list is defined as
3600  * follows:
3601  *
3602  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3603  *
3604  * The syntax of <data-property-name> depends on the <version> field.
3605  *
3606  * If version = SD_CONF_VERSION_1 we have the following syntax:
3607  *
3608  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3609  *
3610  * where the prop0 value will be used to set prop0 if bit0 set in the
3611  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3612  *
3613  */
3614 
3615 static int
3616 sd_process_sdconf_file(struct sd_lun *un)
3617 {
3618 	char	**config_list = NULL;
3619 	uint_t	nelements;
3620 	char	*vidptr;
3621 	int	vidlen;
3622 	char	*dnlist_ptr;
3623 	char	*dataname_ptr;
3624 	char	*dataname_lasts;
3625 	int	*data_list = NULL;
3626 	uint_t	data_list_len;
3627 	int	rval = SD_FAILURE;
3628 	int	i;
3629 
3630 	ASSERT(un != NULL);
3631 
3632 	/* Obtain the configuration list associated with the .conf file */
3633 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, SD_DEVINFO(un),
3634 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, sd_config_list,
3635 	    &config_list, &nelements) != DDI_PROP_SUCCESS) {
3636 		return (SD_FAILURE);
3637 	}
3638 
3639 	/*
3640 	 * Compare vids in each duplet to the inquiry vid - if a match is
3641 	 * made, get the data value and update the soft state structure
3642 	 * accordingly.
3643 	 *
3644 	 * Each duplet should show as a pair of strings, return SD_FAILURE
3645 	 * otherwise.
3646 	 */
3647 	if (nelements & 1) {
3648 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3649 		    "sd-config-list should show as pairs of strings.\n");
3650 		if (config_list)
3651 			ddi_prop_free(config_list);
3652 		return (SD_FAILURE);
3653 	}
3654 
3655 	for (i = 0; i < nelements; i += 2) {
3656 		/*
3657 		 * Note: The assumption here is that each vid entry is on
3658 		 * a unique line from its associated duplet.
3659 		 */
3660 		vidptr = config_list[i];
3661 		vidlen = (int)strlen(vidptr);
3662 		if ((vidlen == 0) ||
3663 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3664 			continue;
3665 		}
3666 
3667 		/*
3668 		 * dnlist contains 1 or more blank separated
3669 		 * data-property-name entries
3670 		 */
3671 		dnlist_ptr = config_list[i + 1];
3672 
3673 		if (strchr(dnlist_ptr, ':') != NULL) {
3674 			/*
3675 			 * Decode the improved format sd-config-list.
3676 			 */
3677 			sd_nvpair_str_decode(un, dnlist_ptr);
3678 		} else {
3679 			/*
3680 			 * The old format sd-config-list, loop through all
3681 			 * data-property-name entries in the
3682 			 * data-property-name-list
3683 			 * setting the properties for each.
3684 			 */
3685 			for (dataname_ptr = sd_strtok_r(dnlist_ptr, " \t",
3686 			    &dataname_lasts); dataname_ptr != NULL;
3687 			    dataname_ptr = sd_strtok_r(NULL, " \t",
3688 			    &dataname_lasts)) {
3689 				int version;
3690 
3691 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3692 				    "sd_process_sdconf_file: disk:%s, "
3693 				    "data:%s\n", vidptr, dataname_ptr);
3694 
3695 				/* Get the data list */
3696 				if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY,
3697 				    SD_DEVINFO(un), 0, dataname_ptr, &data_list,
3698 				    &data_list_len) != DDI_PROP_SUCCESS) {
3699 					SD_INFO(SD_LOG_ATTACH_DETACH, un,
3700 					    "sd_process_sdconf_file: data "
3701 					    "property (%s) has no value\n",
3702 					    dataname_ptr);
3703 					continue;
3704 				}
3705 
3706 				version = data_list[0];
3707 
3708 				if (version == SD_CONF_VERSION_1) {
3709 					sd_tunables values;
3710 
3711 					/* Set the properties */
3712 					if (sd_chk_vers1_data(un, data_list[1],
3713 					    &data_list[2], data_list_len,
3714 					    dataname_ptr) == SD_SUCCESS) {
3715 						sd_get_tunables_from_conf(un,
3716 						    data_list[1], &data_list[2],
3717 						    &values);
3718 						sd_set_vers1_properties(un,
3719 						    data_list[1], &values);
3720 						rval = SD_SUCCESS;
3721 					} else {
3722 						rval = SD_FAILURE;
3723 					}
3724 				} else {
3725 					scsi_log(SD_DEVINFO(un), sd_label,
3726 					    CE_WARN, "data property %s version "
3727 					    "0x%x is invalid.",
3728 					    dataname_ptr, version);
3729 					rval = SD_FAILURE;
3730 				}
3731 				if (data_list)
3732 					ddi_prop_free(data_list);
3733 			}
3734 		}
3735 	}
3736 
3737 	/* free up the memory allocated by ddi_prop_lookup_string_array(). */
3738 	if (config_list) {
3739 		ddi_prop_free(config_list);
3740 	}
3741 
3742 	return (rval);
3743 }
3744 
3745 /*
3746  *    Function: sd_nvpair_str_decode()
3747  *
3748  * Description: Parse the improved format sd-config-list to get
3749  *    each entry of tunable, which includes a name-value pair.
3750  *    Then call sd_set_properties() to set the property.
3751  *
3752  *   Arguments: un - driver soft state (unit) structure
3753  *    nvpair_str - the tunable list
3754  */
3755 static void
3756 sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str)
3757 {
3758 	char	*nv, *name, *value, *token;
3759 	char	*nv_lasts, *v_lasts, *x_lasts;
3760 
3761 	for (nv = sd_strtok_r(nvpair_str, ",", &nv_lasts); nv != NULL;
3762 	    nv = sd_strtok_r(NULL, ",", &nv_lasts)) {
3763 		token = sd_strtok_r(nv, ":", &v_lasts);
3764 		name  = sd_strtok_r(token, " \t", &x_lasts);
3765 		token = sd_strtok_r(NULL, ":", &v_lasts);
3766 		value = sd_strtok_r(token, " \t", &x_lasts);
3767 		if (name == NULL || value == NULL) {
3768 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3769 			    "sd_nvpair_str_decode: "
3770 			    "name or value is not valid!\n");
3771 		} else {
3772 			sd_set_properties(un, name, value);
3773 		}
3774 	}
3775 }
3776 
3777 /*
3778  *    Function: sd_strtok_r()
3779  *
3780  * Description: This function uses strpbrk and strspn to break
3781  *    string into tokens on sequentially subsequent calls. Return
3782  *    NULL when no non-separator characters remain. The first
3783  *    argument is NULL for subsequent calls.
3784  */
3785 static char *
3786 sd_strtok_r(char *string, const char *sepset, char **lasts)
3787 {
3788 	char	*q, *r;
3789 
3790 	/* First or subsequent call */
3791 	if (string == NULL)
3792 		string = *lasts;
3793 
3794 	if (string == NULL)
3795 		return (NULL);
3796 
3797 	/* Skip leading separators */
3798 	q = string + strspn(string, sepset);
3799 
3800 	if (*q == '\0')
3801 		return (NULL);
3802 
3803 	if ((r = strpbrk(q, sepset)) == NULL)
3804 		*lasts = NULL;
3805 	else {
3806 		*r = '\0';
3807 		*lasts = r + 1;
3808 	}
3809 	return (q);
3810 }
3811 
3812 /*
3813  *    Function: sd_set_properties()
3814  *
3815  * Description: Set device properties based on the improved
3816  *    format sd-config-list.
3817  *
3818  *   Arguments: un - driver soft state (unit) structure
3819  *    name  - supported tunable name
3820  *    value - tunable value
3821  */
3822 static void
3823 sd_set_properties(struct sd_lun *un, char *name, char *value)
3824 {
3825 	char	*endptr = NULL;
3826 	long	val = 0;
3827 
3828 	if (strcasecmp(name, "cache-nonvolatile") == 0) {
3829 		if (strcasecmp(value, "true") == 0) {
3830 			un->un_f_suppress_cache_flush = TRUE;
3831 		} else if (strcasecmp(value, "false") == 0) {
3832 			un->un_f_suppress_cache_flush = FALSE;
3833 		} else {
3834 			goto value_invalid;
3835 		}
3836 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3837 		    "suppress_cache_flush flag set to %d\n",
3838 		    un->un_f_suppress_cache_flush);
3839 		return;
3840 	}
3841 
3842 	if (strcasecmp(name, "controller-type") == 0) {
3843 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3844 			un->un_ctype = val;
3845 		} else {
3846 			goto value_invalid;
3847 		}
3848 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3849 		    "ctype set to %d\n", un->un_ctype);
3850 		return;
3851 	}
3852 
3853 	if (strcasecmp(name, "delay-busy") == 0) {
3854 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3855 			un->un_busy_timeout = drv_usectohz(val / 1000);
3856 		} else {
3857 			goto value_invalid;
3858 		}
3859 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3860 		    "busy_timeout set to %d\n", un->un_busy_timeout);
3861 		return;
3862 	}
3863 
3864 	if (strcasecmp(name, "disksort") == 0) {
3865 		if (strcasecmp(value, "true") == 0) {
3866 			un->un_f_disksort_disabled = FALSE;
3867 		} else if (strcasecmp(value, "false") == 0) {
3868 			un->un_f_disksort_disabled = TRUE;
3869 		} else {
3870 			goto value_invalid;
3871 		}
3872 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3873 		    "disksort disabled flag set to %d\n",
3874 		    un->un_f_disksort_disabled);
3875 		return;
3876 	}
3877 
3878 	if (strcasecmp(name, "timeout-releasereservation") == 0) {
3879 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3880 			un->un_reserve_release_time = val;
3881 		} else {
3882 			goto value_invalid;
3883 		}
3884 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3885 		    "reservation release timeout set to %d\n",
3886 		    un->un_reserve_release_time);
3887 		return;
3888 	}
3889 
3890 	if (strcasecmp(name, "reset-lun") == 0) {
3891 		if (strcasecmp(value, "true") == 0) {
3892 			un->un_f_lun_reset_enabled = TRUE;
3893 		} else if (strcasecmp(value, "false") == 0) {
3894 			un->un_f_lun_reset_enabled = FALSE;
3895 		} else {
3896 			goto value_invalid;
3897 		}
3898 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3899 		    "lun reset enabled flag set to %d\n",
3900 		    un->un_f_lun_reset_enabled);
3901 		return;
3902 	}
3903 
3904 	if (strcasecmp(name, "retries-busy") == 0) {
3905 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3906 			un->un_busy_retry_count = val;
3907 		} else {
3908 			goto value_invalid;
3909 		}
3910 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3911 		    "busy retry count set to %d\n", un->un_busy_retry_count);
3912 		return;
3913 	}
3914 
3915 	if (strcasecmp(name, "retries-timeout") == 0) {
3916 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3917 			un->un_retry_count = val;
3918 		} else {
3919 			goto value_invalid;
3920 		}
3921 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3922 		    "timeout retry count set to %d\n", un->un_retry_count);
3923 		return;
3924 	}
3925 
3926 	if (strcasecmp(name, "retries-notready") == 0) {
3927 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3928 			un->un_notready_retry_count = val;
3929 		} else {
3930 			goto value_invalid;
3931 		}
3932 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3933 		    "notready retry count set to %d\n",
3934 		    un->un_notready_retry_count);
3935 		return;
3936 	}
3937 
3938 	if (strcasecmp(name, "retries-reset") == 0) {
3939 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3940 			un->un_reset_retry_count = val;
3941 		} else {
3942 			goto value_invalid;
3943 		}
3944 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3945 		    "reset retry count set to %d\n",
3946 		    un->un_reset_retry_count);
3947 		return;
3948 	}
3949 
3950 	if (strcasecmp(name, "throttle-max") == 0) {
3951 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3952 			un->un_saved_throttle = un->un_throttle = val;
3953 		} else {
3954 			goto value_invalid;
3955 		}
3956 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3957 		    "throttle set to %d\n", un->un_throttle);
3958 	}
3959 
3960 	if (strcasecmp(name, "throttle-min") == 0) {
3961 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3962 			un->un_min_throttle = val;
3963 		} else {
3964 			goto value_invalid;
3965 		}
3966 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3967 		    "min throttle set to %d\n", un->un_min_throttle);
3968 	}
3969 
3970 	/*
3971 	 * Validate the throttle values.
3972 	 * If any of the numbers are invalid, set everything to defaults.
3973 	 */
3974 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
3975 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
3976 	    (un->un_min_throttle > un->un_throttle)) {
3977 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
3978 		un->un_min_throttle = sd_min_throttle;
3979 	}
3980 	return;
3981 
3982 value_invalid:
3983 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3984 	    "value of prop %s is invalid\n", name);
3985 }
3986 
3987 /*
3988  *    Function: sd_get_tunables_from_conf()
3989  *
3990  *
3991  *    This function reads the data list from the sd.conf file and pulls
3992  *    the values that can have numeric values as arguments and places
3993  *    the values in the appropriate sd_tunables member.
3994  *    Since the order of the data list members varies across platforms
3995  *    This function reads them from the data list in a platform specific
3996  *    order and places them into the correct sd_tunable member that is
3997  *    consistent across all platforms.
3998  */
3999 static void
4000 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
4001     sd_tunables *values)
4002 {
4003 	int i;
4004 	int mask;
4005 
4006 	bzero(values, sizeof (sd_tunables));
4007 
4008 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4009 
4010 		mask = 1 << i;
4011 		if (mask > flags) {
4012 			break;
4013 		}
4014 
4015 		switch (mask & flags) {
4016 		case 0:	/* This mask bit not set in flags */
4017 			continue;
4018 		case SD_CONF_BSET_THROTTLE:
4019 			values->sdt_throttle = data_list[i];
4020 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4021 			    "sd_get_tunables_from_conf: throttle = %d\n",
4022 			    values->sdt_throttle);
4023 			break;
4024 		case SD_CONF_BSET_CTYPE:
4025 			values->sdt_ctype = data_list[i];
4026 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4027 			    "sd_get_tunables_from_conf: ctype = %d\n",
4028 			    values->sdt_ctype);
4029 			break;
4030 		case SD_CONF_BSET_NRR_COUNT:
4031 			values->sdt_not_rdy_retries = data_list[i];
4032 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4033 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
4034 			    values->sdt_not_rdy_retries);
4035 			break;
4036 		case SD_CONF_BSET_BSY_RETRY_COUNT:
4037 			values->sdt_busy_retries = data_list[i];
4038 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4039 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
4040 			    values->sdt_busy_retries);
4041 			break;
4042 		case SD_CONF_BSET_RST_RETRIES:
4043 			values->sdt_reset_retries = data_list[i];
4044 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4045 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
4046 			    values->sdt_reset_retries);
4047 			break;
4048 		case SD_CONF_BSET_RSV_REL_TIME:
4049 			values->sdt_reserv_rel_time = data_list[i];
4050 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4051 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
4052 			    values->sdt_reserv_rel_time);
4053 			break;
4054 		case SD_CONF_BSET_MIN_THROTTLE:
4055 			values->sdt_min_throttle = data_list[i];
4056 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4057 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
4058 			    values->sdt_min_throttle);
4059 			break;
4060 		case SD_CONF_BSET_DISKSORT_DISABLED:
4061 			values->sdt_disk_sort_dis = data_list[i];
4062 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4063 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
4064 			    values->sdt_disk_sort_dis);
4065 			break;
4066 		case SD_CONF_BSET_LUN_RESET_ENABLED:
4067 			values->sdt_lun_reset_enable = data_list[i];
4068 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4069 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
4070 			    "\n", values->sdt_lun_reset_enable);
4071 			break;
4072 		case SD_CONF_BSET_CACHE_IS_NV:
4073 			values->sdt_suppress_cache_flush = data_list[i];
4074 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4075 			    "sd_get_tunables_from_conf: \
4076 			    suppress_cache_flush = %d"
4077 			    "\n", values->sdt_suppress_cache_flush);
4078 			break;
4079 		}
4080 	}
4081 }
4082 
4083 /*
4084  *    Function: sd_process_sdconf_table
4085  *
4086  * Description: Search the static configuration table for a match on the
4087  *		inquiry vid/pid and update the driver soft state structure
4088  *		according to the table property values for the device.
4089  *
4090  *		The form of a configuration table entry is:
4091  *		  <vid+pid>,<flags>,<property-data>
4092  *		  "SEAGATE ST42400N",1,0x40000,
4093  *		  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1;
4094  *
4095  *   Arguments: un - driver soft state (unit) structure
4096  */
4097 
4098 static void
4099 sd_process_sdconf_table(struct sd_lun *un)
4100 {
4101 	char	*id = NULL;
4102 	int	table_index;
4103 	int	idlen;
4104 
4105 	ASSERT(un != NULL);
4106 	for (table_index = 0; table_index < sd_disk_table_size;
4107 	    table_index++) {
4108 		id = sd_disk_table[table_index].device_id;
4109 		idlen = strlen(id);
4110 		if (idlen == 0) {
4111 			continue;
4112 		}
4113 
4114 		/*
4115 		 * The static configuration table currently does not
4116 		 * implement version 10 properties. Additionally,
4117 		 * multiple data-property-name entries are not
4118 		 * implemented in the static configuration table.
4119 		 */
4120 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4121 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4122 			    "sd_process_sdconf_table: disk %s\n", id);
4123 			sd_set_vers1_properties(un,
4124 			    sd_disk_table[table_index].flags,
4125 			    sd_disk_table[table_index].properties);
4126 			break;
4127 		}
4128 	}
4129 }
4130 
4131 
4132 /*
4133  *    Function: sd_sdconf_id_match
4134  *
4135  * Description: This local function implements a case sensitive vid/pid
4136  *		comparison as well as the boundary cases of wild card and
4137  *		multiple blanks.
4138  *
4139  *		Note: An implicit assumption made here is that the scsi
4140  *		inquiry structure will always keep the vid, pid and
4141  *		revision strings in consecutive sequence, so they can be
4142  *		read as a single string. If this assumption is not the
4143  *		case, a separate string, to be used for the check, needs
4144  *		to be built with these strings concatenated.
4145  *
4146  *   Arguments: un - driver soft state (unit) structure
4147  *		id - table or config file vid/pid
4148  *		idlen  - length of the vid/pid (bytes)
4149  *
4150  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
4151  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
4152  */
4153 
4154 static int
4155 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
4156 {
4157 	struct scsi_inquiry	*sd_inq;
4158 	int 			rval = SD_SUCCESS;
4159 
4160 	ASSERT(un != NULL);
4161 	sd_inq = un->un_sd->sd_inq;
4162 	ASSERT(id != NULL);
4163 
4164 	/*
4165 	 * We use the inq_vid as a pointer to a buffer containing the
4166 	 * vid and pid and use the entire vid/pid length of the table
4167 	 * entry for the comparison. This works because the inq_pid
4168 	 * data member follows inq_vid in the scsi_inquiry structure.
4169 	 */
4170 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
4171 		/*
4172 		 * The user id string is compared to the inquiry vid/pid
4173 		 * using a case insensitive comparison and ignoring
4174 		 * multiple spaces.
4175 		 */
4176 		rval = sd_blank_cmp(un, id, idlen);
4177 		if (rval != SD_SUCCESS) {
4178 			/*
4179 			 * User id strings that start and end with a "*"
4180 			 * are a special case. These do not have a
4181 			 * specific vendor, and the product string can
4182 			 * appear anywhere in the 16 byte PID portion of
4183 			 * the inquiry data. This is a simple strstr()
4184 			 * type search for the user id in the inquiry data.
4185 			 */
4186 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
4187 				char	*pidptr = &id[1];
4188 				int	i;
4189 				int	j;
4190 				int	pidstrlen = idlen - 2;
4191 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
4192 				    pidstrlen;
4193 
4194 				if (j < 0) {
4195 					return (SD_FAILURE);
4196 				}
4197 				for (i = 0; i < j; i++) {
4198 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
4199 					    pidptr, pidstrlen) == 0) {
4200 						rval = SD_SUCCESS;
4201 						break;
4202 					}
4203 				}
4204 			}
4205 		}
4206 	}
4207 	return (rval);
4208 }
4209 
4210 
4211 /*
4212  *    Function: sd_blank_cmp
4213  *
4214  * Description: If the id string starts and ends with a space, treat
4215  *		multiple consecutive spaces as equivalent to a single
4216  *		space. For example, this causes a sd_disk_table entry
4217  *		of " NEC CDROM " to match a device's id string of
4218  *		"NEC       CDROM".
4219  *
4220  *		Note: The success exit condition for this routine is if
4221  *		the pointer to the table entry is '\0' and the cnt of
4222  *		the inquiry length is zero. This will happen if the inquiry
4223  *		string returned by the device is padded with spaces to be
4224  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
4225  *		SCSI spec states that the inquiry string is to be padded with
4226  *		spaces.
4227  *
4228  *   Arguments: un - driver soft state (unit) structure
4229  *		id - table or config file vid/pid
4230  *		idlen  - length of the vid/pid (bytes)
4231  *
4232  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
4233  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
4234  */
4235 
4236 static int
4237 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
4238 {
4239 	char		*p1;
4240 	char		*p2;
4241 	int		cnt;
4242 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
4243 	    sizeof (SD_INQUIRY(un)->inq_pid);
4244 
4245 	ASSERT(un != NULL);
4246 	p2 = un->un_sd->sd_inq->inq_vid;
4247 	ASSERT(id != NULL);
4248 	p1 = id;
4249 
4250 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
4251 		/*
4252 		 * Note: string p1 is terminated by a NUL but string p2
4253 		 * isn't.  The end of p2 is determined by cnt.
4254 		 */
4255 		for (;;) {
4256 			/* skip over any extra blanks in both strings */
4257 			while ((*p1 != '\0') && (*p1 == ' ')) {
4258 				p1++;
4259 			}
4260 			while ((cnt != 0) && (*p2 == ' ')) {
4261 				p2++;
4262 				cnt--;
4263 			}
4264 
4265 			/* compare the two strings */
4266 			if ((cnt == 0) ||
4267 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
4268 				break;
4269 			}
4270 			while ((cnt > 0) &&
4271 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
4272 				p1++;
4273 				p2++;
4274 				cnt--;
4275 			}
4276 		}
4277 	}
4278 
4279 	/* return SD_SUCCESS if both strings match */
4280 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
4281 }
4282 
4283 
4284 /*
4285  *    Function: sd_chk_vers1_data
4286  *
4287  * Description: Verify the version 1 device properties provided by the
4288  *		user via the configuration file
4289  *
4290  *   Arguments: un	     - driver soft state (unit) structure
4291  *		flags	     - integer mask indicating properties to be set
4292  *		prop_list    - integer list of property values
4293  *		list_len     - number of the elements
4294  *
4295  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
4296  *		SD_FAILURE - Indicates the user provided data is invalid
4297  */
4298 
4299 static int
4300 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
4301     int list_len, char *dataname_ptr)
4302 {
4303 	int i;
4304 	int mask = 1;
4305 	int index = 0;
4306 
4307 	ASSERT(un != NULL);
4308 
4309 	/* Check for a NULL property name and list */
4310 	if (dataname_ptr == NULL) {
4311 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4312 		    "sd_chk_vers1_data: NULL data property name.");
4313 		return (SD_FAILURE);
4314 	}
4315 	if (prop_list == NULL) {
4316 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4317 		    "sd_chk_vers1_data: %s NULL data property list.",
4318 		    dataname_ptr);
4319 		return (SD_FAILURE);
4320 	}
4321 
4322 	/* Display a warning if undefined bits are set in the flags */
4323 	if (flags & ~SD_CONF_BIT_MASK) {
4324 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4325 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
4326 		    "Properties not set.",
4327 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
4328 		return (SD_FAILURE);
4329 	}
4330 
4331 	/*
4332 	 * Verify the length of the list by identifying the highest bit set
4333 	 * in the flags and validating that the property list has a length
4334 	 * up to the index of this bit.
4335 	 */
4336 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4337 		if (flags & mask) {
4338 			index++;
4339 		}
4340 		mask = 1 << i;
4341 	}
4342 	if (list_len < (index + 2)) {
4343 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4344 		    "sd_chk_vers1_data: "
4345 		    "Data property list %s size is incorrect. "
4346 		    "Properties not set.", dataname_ptr);
4347 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
4348 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
4349 		return (SD_FAILURE);
4350 	}
4351 	return (SD_SUCCESS);
4352 }
4353 
4354 
4355 /*
4356  *    Function: sd_set_vers1_properties
4357  *
4358  * Description: Set version 1 device properties based on a property list
4359  *		retrieved from the driver configuration file or static
4360  *		configuration table. Version 1 properties have the format:
4361  *
4362  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
4363  *
4364  *		where the prop0 value will be used to set prop0 if bit0
4365  *		is set in the flags
4366  *
4367  *   Arguments: un	     - driver soft state (unit) structure
4368  *		flags	     - integer mask indicating properties to be set
4369  *		prop_list    - integer list of property values
4370  */
4371 
4372 static void
4373 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
4374 {
4375 	ASSERT(un != NULL);
4376 
4377 	/*
4378 	 * Set the flag to indicate cache is to be disabled. An attempt
4379 	 * to disable the cache via sd_cache_control() will be made
4380 	 * later during attach once the basic initialization is complete.
4381 	 */
4382 	if (flags & SD_CONF_BSET_NOCACHE) {
4383 		un->un_f_opt_disable_cache = TRUE;
4384 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4385 		    "sd_set_vers1_properties: caching disabled flag set\n");
4386 	}
4387 
4388 	/* CD-specific configuration parameters */
4389 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
4390 		un->un_f_cfg_playmsf_bcd = TRUE;
4391 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4392 		    "sd_set_vers1_properties: playmsf_bcd set\n");
4393 	}
4394 	if (flags & SD_CONF_BSET_READSUB_BCD) {
4395 		un->un_f_cfg_readsub_bcd = TRUE;
4396 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4397 		    "sd_set_vers1_properties: readsub_bcd set\n");
4398 	}
4399 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4400 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4401 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4402 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4403 	}
4404 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4405 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4406 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4407 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4408 	}
4409 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4410 		un->un_f_cfg_no_read_header = TRUE;
4411 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4412 		    "sd_set_vers1_properties: no_read_header set\n");
4413 	}
4414 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4415 		un->un_f_cfg_read_cd_xd4 = TRUE;
4416 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4417 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4418 	}
4419 
4420 	/* Support for devices which do not have valid/unique serial numbers */
4421 	if (flags & SD_CONF_BSET_FAB_DEVID) {
4422 		un->un_f_opt_fab_devid = TRUE;
4423 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4424 		    "sd_set_vers1_properties: fab_devid bit set\n");
4425 	}
4426 
4427 	/* Support for user throttle configuration */
4428 	if (flags & SD_CONF_BSET_THROTTLE) {
4429 		ASSERT(prop_list != NULL);
4430 		un->un_saved_throttle = un->un_throttle =
4431 		    prop_list->sdt_throttle;
4432 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4433 		    "sd_set_vers1_properties: throttle set to %d\n",
4434 		    prop_list->sdt_throttle);
4435 	}
4436 
4437 	/* Set the per disk retry count according to the conf file or table. */
4438 	if (flags & SD_CONF_BSET_NRR_COUNT) {
4439 		ASSERT(prop_list != NULL);
4440 		if (prop_list->sdt_not_rdy_retries) {
4441 			un->un_notready_retry_count =
4442 			    prop_list->sdt_not_rdy_retries;
4443 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4444 			    "sd_set_vers1_properties: not ready retry count"
4445 			    " set to %d\n", un->un_notready_retry_count);
4446 		}
4447 	}
4448 
4449 	/* The controller type is reported for generic disk driver ioctls */
4450 	if (flags & SD_CONF_BSET_CTYPE) {
4451 		ASSERT(prop_list != NULL);
4452 		switch (prop_list->sdt_ctype) {
4453 		case CTYPE_CDROM:
4454 			un->un_ctype = prop_list->sdt_ctype;
4455 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4456 			    "sd_set_vers1_properties: ctype set to "
4457 			    "CTYPE_CDROM\n");
4458 			break;
4459 		case CTYPE_CCS:
4460 			un->un_ctype = prop_list->sdt_ctype;
4461 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4462 			    "sd_set_vers1_properties: ctype set to "
4463 			    "CTYPE_CCS\n");
4464 			break;
4465 		case CTYPE_ROD:		/* RW optical */
4466 			un->un_ctype = prop_list->sdt_ctype;
4467 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4468 			    "sd_set_vers1_properties: ctype set to "
4469 			    "CTYPE_ROD\n");
4470 			break;
4471 		default:
4472 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4473 			    "sd_set_vers1_properties: Could not set "
4474 			    "invalid ctype value (%d)",
4475 			    prop_list->sdt_ctype);
4476 		}
4477 	}
4478 
4479 	/* Purple failover timeout */
4480 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4481 		ASSERT(prop_list != NULL);
4482 		un->un_busy_retry_count =
4483 		    prop_list->sdt_busy_retries;
4484 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4485 		    "sd_set_vers1_properties: "
4486 		    "busy retry count set to %d\n",
4487 		    un->un_busy_retry_count);
4488 	}
4489 
4490 	/* Purple reset retry count */
4491 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4492 		ASSERT(prop_list != NULL);
4493 		un->un_reset_retry_count =
4494 		    prop_list->sdt_reset_retries;
4495 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4496 		    "sd_set_vers1_properties: "
4497 		    "reset retry count set to %d\n",
4498 		    un->un_reset_retry_count);
4499 	}
4500 
4501 	/* Purple reservation release timeout */
4502 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4503 		ASSERT(prop_list != NULL);
4504 		un->un_reserve_release_time =
4505 		    prop_list->sdt_reserv_rel_time;
4506 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4507 		    "sd_set_vers1_properties: "
4508 		    "reservation release timeout set to %d\n",
4509 		    un->un_reserve_release_time);
4510 	}
4511 
4512 	/*
4513 	 * Driver flag telling the driver to verify that no commands are pending
4514 	 * for a device before issuing a Test Unit Ready. This is a workaround
4515 	 * for a firmware bug in some Seagate eliteI drives.
4516 	 */
4517 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4518 		un->un_f_cfg_tur_check = TRUE;
4519 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4520 		    "sd_set_vers1_properties: tur queue check set\n");
4521 	}
4522 
4523 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4524 		un->un_min_throttle = prop_list->sdt_min_throttle;
4525 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4526 		    "sd_set_vers1_properties: min throttle set to %d\n",
4527 		    un->un_min_throttle);
4528 	}
4529 
4530 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4531 		un->un_f_disksort_disabled =
4532 		    (prop_list->sdt_disk_sort_dis != 0) ?
4533 		    TRUE : FALSE;
4534 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4535 		    "sd_set_vers1_properties: disksort disabled "
4536 		    "flag set to %d\n",
4537 		    prop_list->sdt_disk_sort_dis);
4538 	}
4539 
4540 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4541 		un->un_f_lun_reset_enabled =
4542 		    (prop_list->sdt_lun_reset_enable != 0) ?
4543 		    TRUE : FALSE;
4544 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4545 		    "sd_set_vers1_properties: lun reset enabled "
4546 		    "flag set to %d\n",
4547 		    prop_list->sdt_lun_reset_enable);
4548 	}
4549 
4550 	if (flags & SD_CONF_BSET_CACHE_IS_NV) {
4551 		un->un_f_suppress_cache_flush =
4552 		    (prop_list->sdt_suppress_cache_flush != 0) ?
4553 		    TRUE : FALSE;
4554 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4555 		    "sd_set_vers1_properties: suppress_cache_flush "
4556 		    "flag set to %d\n",
4557 		    prop_list->sdt_suppress_cache_flush);
4558 	}
4559 
4560 	/*
4561 	 * Validate the throttle values.
4562 	 * If any of the numbers are invalid, set everything to defaults.
4563 	 */
4564 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4565 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4566 	    (un->un_min_throttle > un->un_throttle)) {
4567 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4568 		un->un_min_throttle = sd_min_throttle;
4569 	}
4570 }
4571 
4572 /*
4573  *   Function: sd_is_lsi()
4574  *
4575  *   Description: Check for lsi devices, step through the static device
4576  *	table to match vid/pid.
4577  *
4578  *   Args: un - ptr to sd_lun
4579  *
4580  *   Notes:  When creating new LSI property, need to add the new LSI property
4581  *		to this function.
4582  */
4583 static void
4584 sd_is_lsi(struct sd_lun *un)
4585 {
4586 	char	*id = NULL;
4587 	int	table_index;
4588 	int	idlen;
4589 	void	*prop;
4590 
4591 	ASSERT(un != NULL);
4592 	for (table_index = 0; table_index < sd_disk_table_size;
4593 	    table_index++) {
4594 		id = sd_disk_table[table_index].device_id;
4595 		idlen = strlen(id);
4596 		if (idlen == 0) {
4597 			continue;
4598 		}
4599 
4600 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4601 			prop = sd_disk_table[table_index].properties;
4602 			if (prop == &lsi_properties ||
4603 			    prop == &lsi_oem_properties ||
4604 			    prop == &lsi_properties_scsi ||
4605 			    prop == &symbios_properties) {
4606 				un->un_f_cfg_is_lsi = TRUE;
4607 			}
4608 			break;
4609 		}
4610 	}
4611 }
4612 
4613 /*
4614  *    Function: sd_get_physical_geometry
4615  *
4616  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4617  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4618  *		target, and use this information to initialize the physical
4619  *		geometry cache specified by pgeom_p.
4620  *
4621  *		MODE SENSE is an optional command, so failure in this case
4622  *		does not necessarily denote an error. We want to use the
4623  *		MODE SENSE commands to derive the physical geometry of the
4624  *		device, but if either command fails, the logical geometry is
4625  *		used as the fallback for disk label geometry in cmlb.
4626  *
4627  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4628  *		have already been initialized for the current target and
4629  *		that the current values be passed as args so that we don't
4630  *		end up ever trying to use -1 as a valid value. This could
4631  *		happen if either value is reset while we're not holding
4632  *		the mutex.
4633  *
4634  *   Arguments: un - driver soft state (unit) structure
4635  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4636  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4637  *			to use the USCSI "direct" chain and bypass the normal
4638  *			command waitq.
4639  *
4640  *     Context: Kernel thread only (can sleep).
4641  */
4642 
4643 static int
4644 sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4645 	diskaddr_t capacity, int lbasize, int path_flag)
4646 {
4647 	struct	mode_format	*page3p;
4648 	struct	mode_geometry	*page4p;
4649 	struct	mode_header	*headerp;
4650 	int	sector_size;
4651 	int	nsect;
4652 	int	nhead;
4653 	int	ncyl;
4654 	int	intrlv;
4655 	int	spc;
4656 	diskaddr_t	modesense_capacity;
4657 	int	rpm;
4658 	int	bd_len;
4659 	int	mode_header_length;
4660 	uchar_t	*p3bufp;
4661 	uchar_t	*p4bufp;
4662 	int	cdbsize;
4663 	int 	ret = EIO;
4664 	sd_ssc_t *ssc;
4665 	int	status;
4666 
4667 	ASSERT(un != NULL);
4668 
4669 	if (lbasize == 0) {
4670 		if (ISCD(un)) {
4671 			lbasize = 2048;
4672 		} else {
4673 			lbasize = un->un_sys_blocksize;
4674 		}
4675 	}
4676 	pgeom_p->g_secsize = (unsigned short)lbasize;
4677 
4678 	/*
4679 	 * If the unit is a cd/dvd drive MODE SENSE page three
4680 	 * and MODE SENSE page four are reserved (see SBC spec
4681 	 * and MMC spec). To prevent soft errors just return
4682 	 * using the default LBA size.
4683 	 */
4684 	if (ISCD(un))
4685 		return (ret);
4686 
4687 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4688 
4689 	/*
4690 	 * Retrieve MODE SENSE page 3 - Format Device Page
4691 	 */
4692 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4693 	ssc = sd_ssc_init(un);
4694 	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p3bufp,
4695 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag);
4696 	if (status != 0) {
4697 		SD_ERROR(SD_LOG_COMMON, un,
4698 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4699 		goto page3_exit;
4700 	}
4701 
4702 	/*
4703 	 * Determine size of Block Descriptors in order to locate the mode
4704 	 * page data.  ATAPI devices return 0, SCSI devices should return
4705 	 * MODE_BLK_DESC_LENGTH.
4706 	 */
4707 	headerp = (struct mode_header *)p3bufp;
4708 	if (un->un_f_cfg_is_atapi == TRUE) {
4709 		struct mode_header_grp2 *mhp =
4710 		    (struct mode_header_grp2 *)headerp;
4711 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4712 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4713 	} else {
4714 		mode_header_length = MODE_HEADER_LENGTH;
4715 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4716 	}
4717 
4718 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4719 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4720 		    "sd_get_physical_geometry: received unexpected bd_len "
4721 		    "of %d, page3\n", bd_len);
4722 		status = EIO;
4723 		goto page3_exit;
4724 	}
4725 
4726 	page3p = (struct mode_format *)
4727 	    ((caddr_t)headerp + mode_header_length + bd_len);
4728 
4729 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4730 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4731 		    "sd_get_physical_geometry: mode sense pg3 code mismatch "
4732 		    "%d\n", page3p->mode_page.code);
4733 		status = EIO;
4734 		goto page3_exit;
4735 	}
4736 
4737 	/*
4738 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4739 	 * complete successfully; otherwise, revert to the logical geometry.
4740 	 * So, we need to save everything in temporary variables.
4741 	 */
4742 	sector_size = BE_16(page3p->data_bytes_sect);
4743 
4744 	/*
4745 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4746 	 */
4747 	if (sector_size == 0) {
4748 		sector_size = un->un_sys_blocksize;
4749 	} else {
4750 		sector_size &= ~(un->un_sys_blocksize - 1);
4751 	}
4752 
4753 	nsect  = BE_16(page3p->sect_track);
4754 	intrlv = BE_16(page3p->interleave);
4755 
4756 	SD_INFO(SD_LOG_COMMON, un,
4757 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4758 	SD_INFO(SD_LOG_COMMON, un,
4759 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4760 	    page3p->mode_page.code, nsect, sector_size);
4761 	SD_INFO(SD_LOG_COMMON, un,
4762 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4763 	    BE_16(page3p->track_skew),
4764 	    BE_16(page3p->cylinder_skew));
4765 
4766 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
4767 
4768 	/*
4769 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4770 	 */
4771 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4772 	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p4bufp,
4773 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag);
4774 	if (status != 0) {
4775 		SD_ERROR(SD_LOG_COMMON, un,
4776 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4777 		goto page4_exit;
4778 	}
4779 
4780 	/*
4781 	 * Determine size of Block Descriptors in order to locate the mode
4782 	 * page data.  ATAPI devices return 0, SCSI devices should return
4783 	 * MODE_BLK_DESC_LENGTH.
4784 	 */
4785 	headerp = (struct mode_header *)p4bufp;
4786 	if (un->un_f_cfg_is_atapi == TRUE) {
4787 		struct mode_header_grp2 *mhp =
4788 		    (struct mode_header_grp2 *)headerp;
4789 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4790 	} else {
4791 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4792 	}
4793 
4794 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4795 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4796 		    "sd_get_physical_geometry: received unexpected bd_len of "
4797 		    "%d, page4\n", bd_len);
4798 		status = EIO;
4799 		goto page4_exit;
4800 	}
4801 
4802 	page4p = (struct mode_geometry *)
4803 	    ((caddr_t)headerp + mode_header_length + bd_len);
4804 
4805 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4806 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4807 		    "sd_get_physical_geometry: mode sense pg4 code mismatch "
4808 		    "%d\n", page4p->mode_page.code);
4809 		status = EIO;
4810 		goto page4_exit;
4811 	}
4812 
4813 	/*
4814 	 * Stash the data now, after we know that both commands completed.
4815 	 */
4816 
4817 
4818 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4819 	spc   = nhead * nsect;
4820 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4821 	rpm   = BE_16(page4p->rpm);
4822 
4823 	modesense_capacity = spc * ncyl;
4824 
4825 	SD_INFO(SD_LOG_COMMON, un,
4826 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4827 	SD_INFO(SD_LOG_COMMON, un,
4828 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4829 	SD_INFO(SD_LOG_COMMON, un,
4830 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4831 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4832 	    (void *)pgeom_p, capacity);
4833 
4834 	/*
4835 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4836 	 * the product of C * H * S returned by MODE SENSE >= that returned
4837 	 * by read capacity. This is an idiosyncrasy of the original x86
4838 	 * disk subsystem.
4839 	 */
4840 	if (modesense_capacity >= capacity) {
4841 		SD_INFO(SD_LOG_COMMON, un,
4842 		    "sd_get_physical_geometry: adjusting acyl; "
4843 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
4844 		    (modesense_capacity - capacity + spc - 1) / spc);
4845 		if (sector_size != 0) {
4846 			/* 1243403: NEC D38x7 drives don't support sec size */
4847 			pgeom_p->g_secsize = (unsigned short)sector_size;
4848 		}
4849 		pgeom_p->g_nsect    = (unsigned short)nsect;
4850 		pgeom_p->g_nhead    = (unsigned short)nhead;
4851 		pgeom_p->g_capacity = capacity;
4852 		pgeom_p->g_acyl	    =
4853 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
4854 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
4855 	}
4856 
4857 	pgeom_p->g_rpm    = (unsigned short)rpm;
4858 	pgeom_p->g_intrlv = (unsigned short)intrlv;
4859 	ret = 0;
4860 
4861 	SD_INFO(SD_LOG_COMMON, un,
4862 	    "sd_get_physical_geometry: mode sense geometry:\n");
4863 	SD_INFO(SD_LOG_COMMON, un,
4864 	    "   nsect: %d; sector size: %d; interlv: %d\n",
4865 	    nsect, sector_size, intrlv);
4866 	SD_INFO(SD_LOG_COMMON, un,
4867 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
4868 	    nhead, ncyl, rpm, modesense_capacity);
4869 	SD_INFO(SD_LOG_COMMON, un,
4870 	    "sd_get_physical_geometry: (cached)\n");
4871 	SD_INFO(SD_LOG_COMMON, un,
4872 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4873 	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
4874 	    pgeom_p->g_nhead, pgeom_p->g_nsect);
4875 	SD_INFO(SD_LOG_COMMON, un,
4876 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
4877 	    pgeom_p->g_secsize, pgeom_p->g_capacity,
4878 	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
4879 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
4880 
4881 page4_exit:
4882 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
4883 
4884 page3_exit:
4885 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
4886 
4887 	if (status != 0) {
4888 		if (status == EIO) {
4889 			/*
4890 			 * Some disks do not support mode sense(6), we
4891 			 * should ignore this kind of error(sense key is
4892 			 * 0x5 - illegal request).
4893 			 */
4894 			uint8_t *sensep;
4895 			int senlen;
4896 
4897 			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
4898 			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
4899 			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
4900 
4901 			if (senlen > 0 &&
4902 			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
4903 				sd_ssc_assessment(ssc,
4904 				    SD_FMT_IGNORE_COMPROMISE);
4905 			} else {
4906 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
4907 			}
4908 		} else {
4909 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
4910 		}
4911 	}
4912 	sd_ssc_fini(ssc);
4913 	return (ret);
4914 }
4915 
4916 /*
4917  *    Function: sd_get_virtual_geometry
4918  *
4919  * Description: Ask the controller to tell us about the target device.
4920  *
4921  *   Arguments: un - pointer to softstate
4922  *		capacity - disk capacity in #blocks
4923  *		lbasize - disk block size in bytes
4924  *
4925  *     Context: Kernel thread only
4926  */
4927 
4928 static int
4929 sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
4930     diskaddr_t capacity, int lbasize)
4931 {
4932 	uint_t	geombuf;
4933 	int	spc;
4934 
4935 	ASSERT(un != NULL);
4936 
4937 	/* Set sector size, and total number of sectors */
4938 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
4939 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
4940 
4941 	/* Let the HBA tell us its geometry */
4942 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
4943 
4944 	/* A value of -1 indicates an undefined "geometry" property */
4945 	if (geombuf == (-1)) {
4946 		return (EINVAL);
4947 	}
4948 
4949 	/* Initialize the logical geometry cache. */
4950 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
4951 	lgeom_p->g_nsect   = geombuf & 0xffff;
4952 	lgeom_p->g_secsize = un->un_sys_blocksize;
4953 
4954 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
4955 
4956 	/*
4957 	 * Note: The driver originally converted the capacity value from
4958 	 * target blocks to system blocks. However, the capacity value passed
4959 	 * to this routine is already in terms of system blocks (this scaling
4960 	 * is done when the READ CAPACITY command is issued and processed).
4961 	 * This 'error' may have gone undetected because the usage of g_ncyl
4962 	 * (which is based upon g_capacity) is very limited within the driver
4963 	 */
4964 	lgeom_p->g_capacity = capacity;
4965 
4966 	/*
4967 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
4968 	 * hba may return zero values if the device has been removed.
4969 	 */
4970 	if (spc == 0) {
4971 		lgeom_p->g_ncyl = 0;
4972 	} else {
4973 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
4974 	}
4975 	lgeom_p->g_acyl = 0;
4976 
4977 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
4978 	return (0);
4979 
4980 }
4981 /*
4982  *    Function: sd_update_block_info
4983  *
4984  * Description: Calculate a byte count to sector count bitshift value
4985  *		from sector size.
4986  *
4987  *   Arguments: un: unit struct.
4988  *		lbasize: new target sector size
4989  *		capacity: new target capacity, ie. block count
4990  *
4991  *     Context: Kernel thread context
4992  */
4993 
4994 static void
4995 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
4996 {
4997 	if (lbasize != 0) {
4998 		un->un_tgt_blocksize = lbasize;
4999 		un->un_f_tgt_blocksize_is_valid	= TRUE;
5000 	}
5001 
5002 	if (capacity != 0) {
5003 		un->un_blockcount		= capacity;
5004 		un->un_f_blockcount_is_valid	= TRUE;
5005 	}
5006 }
5007 
5008 
5009 /*
5010  *    Function: sd_register_devid
5011  *
5012  * Description: This routine will obtain the device id information from the
5013  *		target, obtain the serial number, and register the device
5014  *		id with the ddi framework.
5015  *
5016  *   Arguments: devi - the system's dev_info_t for the device.
5017  *		un - driver soft state (unit) structure
5018  *		reservation_flag - indicates if a reservation conflict
5019  *		occurred during attach
5020  *
5021  *     Context: Kernel Thread
5022  */
5023 static void
5024 sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi, int reservation_flag)
5025 {
5026 	int		rval		= 0;
5027 	uchar_t		*inq80		= NULL;
5028 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
5029 	size_t		inq80_resid	= 0;
5030 	uchar_t		*inq83		= NULL;
5031 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
5032 	size_t		inq83_resid	= 0;
5033 	int		dlen, len;
5034 	char		*sn;
5035 	struct sd_lun	*un;
5036 
5037 	ASSERT(ssc != NULL);
5038 	un = ssc->ssc_un;
5039 	ASSERT(un != NULL);
5040 	ASSERT(mutex_owned(SD_MUTEX(un)));
5041 	ASSERT((SD_DEVINFO(un)) == devi);
5042 
5043 
5044 	/*
5045 	 * We check the availability of the World Wide Name (0x83) and Unit
5046 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
5047 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
5048 	 * 0x83 is available, that is the best choice.  Our next choice is
5049 	 * 0x80.  If neither are available, we munge the devid from the device
5050 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
5051 	 * to fabricate a devid for non-Sun qualified disks.
5052 	 */
5053 	if (sd_check_vpd_page_support(ssc) == 0) {
5054 		/* collect page 80 data if available */
5055 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
5056 
5057 			mutex_exit(SD_MUTEX(un));
5058 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
5059 
5060 			rval = sd_send_scsi_INQUIRY(ssc, inq80, inq80_len,
5061 			    0x01, 0x80, &inq80_resid);
5062 
5063 			if (rval != 0) {
5064 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5065 				kmem_free(inq80, inq80_len);
5066 				inq80 = NULL;
5067 				inq80_len = 0;
5068 			} else if (ddi_prop_exists(
5069 			    DDI_DEV_T_NONE, SD_DEVINFO(un),
5070 			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
5071 			    INQUIRY_SERIAL_NO) == 0) {
5072 				/*
5073 				 * If we don't already have a serial number
5074 				 * property, do quick verify of data returned
5075 				 * and define property.
5076 				 */
5077 				dlen = inq80_len - inq80_resid;
5078 				len = (size_t)inq80[3];
5079 				if ((dlen >= 4) && ((len + 4) <= dlen)) {
5080 					/*
5081 					 * Ensure sn termination, skip leading
5082 					 * blanks, and create property
5083 					 * 'inquiry-serial-no'.
5084 					 */
5085 					sn = (char *)&inq80[4];
5086 					sn[len] = 0;
5087 					while (*sn && (*sn == ' '))
5088 						sn++;
5089 					if (*sn) {
5090 						(void) ddi_prop_update_string(
5091 						    DDI_DEV_T_NONE,
5092 						    SD_DEVINFO(un),
5093 						    INQUIRY_SERIAL_NO, sn);
5094 					}
5095 				}
5096 			}
5097 			mutex_enter(SD_MUTEX(un));
5098 		}
5099 
5100 		/* collect page 83 data if available */
5101 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
5102 			mutex_exit(SD_MUTEX(un));
5103 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
5104 
5105 			rval = sd_send_scsi_INQUIRY(ssc, inq83, inq83_len,
5106 			    0x01, 0x83, &inq83_resid);
5107 
5108 			if (rval != 0) {
5109 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5110 				kmem_free(inq83, inq83_len);
5111 				inq83 = NULL;
5112 				inq83_len = 0;
5113 			}
5114 			mutex_enter(SD_MUTEX(un));
5115 		}
5116 	}
5117 
5118 	/*
5119 	 * If transport has already registered a devid for this target
5120 	 * then that takes precedence over the driver's determination
5121 	 * of the devid.
5122 	 *
5123 	 * NOTE: The reason this check is done here instead of at the beginning
5124 	 * of the function is to allow the code above to create the
5125 	 * 'inquiry-serial-no' property.
5126 	 */
5127 	if (ddi_devid_get(SD_DEVINFO(un), &un->un_devid) == DDI_SUCCESS) {
5128 		ASSERT(un->un_devid);
5129 		un->un_f_devid_transport_defined = TRUE;
5130 		goto cleanup; /* use devid registered by the transport */
5131 	}
5132 
5133 	/*
5134 	 * This is the case of antiquated Sun disk drives that have the
5135 	 * FAB_DEVID property set in the disk_table.  These drives
5136 	 * manage the devid's by storing them in last 2 available sectors
5137 	 * on the drive and have them fabricated by the ddi layer by calling
5138 	 * ddi_devid_init and passing the DEVID_FAB flag.
5139 	 */
5140 	if (un->un_f_opt_fab_devid == TRUE) {
5141 		/*
5142 		 * Depending on EINVAL isn't reliable, since a reserved disk
5143 		 * may result in invalid geometry, so check to make sure a
5144 		 * reservation conflict did not occur during attach.
5145 		 */
5146 		if ((sd_get_devid(ssc) == EINVAL) &&
5147 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
5148 			/*
5149 			 * The devid is invalid AND there is no reservation
5150 			 * conflict.  Fabricate a new devid.
5151 			 */
5152 			(void) sd_create_devid(ssc);
5153 		}
5154 
5155 		/* Register the devid if it exists */
5156 		if (un->un_devid != NULL) {
5157 			(void) ddi_devid_register(SD_DEVINFO(un),
5158 			    un->un_devid);
5159 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5160 			    "sd_register_devid: Devid Fabricated\n");
5161 		}
5162 		goto cleanup;
5163 	}
5164 
5165 	/* encode best devid possible based on data available */
5166 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
5167 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
5168 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
5169 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
5170 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
5171 
5172 		/* devid successfully encoded, register devid */
5173 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
5174 
5175 	} else {
5176 		/*
5177 		 * Unable to encode a devid based on data available.
5178 		 * This is not a Sun qualified disk.  Older Sun disk
5179 		 * drives that have the SD_FAB_DEVID property
5180 		 * set in the disk_table and non Sun qualified
5181 		 * disks are treated in the same manner.  These
5182 		 * drives manage the devid's by storing them in
5183 		 * last 2 available sectors on the drive and
5184 		 * have them fabricated by the ddi layer by
5185 		 * calling ddi_devid_init and passing the
5186 		 * DEVID_FAB flag.
5187 		 * Create a fabricate devid only if there's no
5188 		 * fabricate devid existed.
5189 		 */
5190 		if (sd_get_devid(ssc) == EINVAL) {
5191 			(void) sd_create_devid(ssc);
5192 		}
5193 		un->un_f_opt_fab_devid = TRUE;
5194 
5195 		/* Register the devid if it exists */
5196 		if (un->un_devid != NULL) {
5197 			(void) ddi_devid_register(SD_DEVINFO(un),
5198 			    un->un_devid);
5199 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5200 			    "sd_register_devid: devid fabricated using "
5201 			    "ddi framework\n");
5202 		}
5203 	}
5204 
5205 cleanup:
5206 	/* clean up resources */
5207 	if (inq80 != NULL) {
5208 		kmem_free(inq80, inq80_len);
5209 	}
5210 	if (inq83 != NULL) {
5211 		kmem_free(inq83, inq83_len);
5212 	}
5213 }
5214 
5215 
5216 
5217 /*
5218  *    Function: sd_get_devid
5219  *
5220  * Description: This routine will return 0 if a valid device id has been
5221  *		obtained from the target and stored in the soft state. If a
5222  *		valid device id has not been previously read and stored, a
5223  *		read attempt will be made.
5224  *
5225  *   Arguments: un - driver soft state (unit) structure
5226  *
5227  * Return Code: 0 if we successfully get the device id
5228  *
5229  *     Context: Kernel Thread
5230  */
5231 
5232 static int
5233 sd_get_devid(sd_ssc_t *ssc)
5234 {
5235 	struct dk_devid		*dkdevid;
5236 	ddi_devid_t		tmpid;
5237 	uint_t			*ip;
5238 	size_t			sz;
5239 	diskaddr_t		blk;
5240 	int			status;
5241 	int			chksum;
5242 	int			i;
5243 	size_t			buffer_size;
5244 	struct sd_lun		*un;
5245 
5246 	ASSERT(ssc != NULL);
5247 	un = ssc->ssc_un;
5248 	ASSERT(un != NULL);
5249 	ASSERT(mutex_owned(SD_MUTEX(un)));
5250 
5251 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
5252 	    un);
5253 
5254 	if (un->un_devid != NULL) {
5255 		return (0);
5256 	}
5257 
5258 	mutex_exit(SD_MUTEX(un));
5259 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5260 	    (void *)SD_PATH_DIRECT) != 0) {
5261 		mutex_enter(SD_MUTEX(un));
5262 		return (EINVAL);
5263 	}
5264 
5265 	/*
5266 	 * Read and verify device id, stored in the reserved cylinders at the
5267 	 * end of the disk. Backup label is on the odd sectors of the last
5268 	 * track of the last cylinder. Device id will be on track of the next
5269 	 * to last cylinder.
5270 	 */
5271 	mutex_enter(SD_MUTEX(un));
5272 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
5273 	mutex_exit(SD_MUTEX(un));
5274 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
5275 	status = sd_send_scsi_READ(ssc, dkdevid, buffer_size, blk,
5276 	    SD_PATH_DIRECT);
5277 
5278 	if (status != 0) {
5279 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5280 		goto error;
5281 	}
5282 
5283 	/* Validate the revision */
5284 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
5285 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
5286 		status = EINVAL;
5287 		goto error;
5288 	}
5289 
5290 	/* Calculate the checksum */
5291 	chksum = 0;
5292 	ip = (uint_t *)dkdevid;
5293 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
5294 	    i++) {
5295 		chksum ^= ip[i];
5296 	}
5297 
5298 	/* Compare the checksums */
5299 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
5300 		status = EINVAL;
5301 		goto error;
5302 	}
5303 
5304 	/* Validate the device id */
5305 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
5306 		status = EINVAL;
5307 		goto error;
5308 	}
5309 
5310 	/*
5311 	 * Store the device id in the driver soft state
5312 	 */
5313 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
5314 	tmpid = kmem_alloc(sz, KM_SLEEP);
5315 
5316 	mutex_enter(SD_MUTEX(un));
5317 
5318 	un->un_devid = tmpid;
5319 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
5320 
5321 	kmem_free(dkdevid, buffer_size);
5322 
5323 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
5324 
5325 	return (status);
5326 error:
5327 	mutex_enter(SD_MUTEX(un));
5328 	kmem_free(dkdevid, buffer_size);
5329 	return (status);
5330 }
5331 
5332 
5333 /*
5334  *    Function: sd_create_devid
5335  *
5336  * Description: This routine will fabricate the device id and write it
5337  *		to the disk.
5338  *
5339  *   Arguments: un - driver soft state (unit) structure
5340  *
5341  * Return Code: value of the fabricated device id
5342  *
5343  *     Context: Kernel Thread
5344  */
5345 
5346 static ddi_devid_t
5347 sd_create_devid(sd_ssc_t *ssc)
5348 {
5349 	struct sd_lun	*un;
5350 
5351 	ASSERT(ssc != NULL);
5352 	un = ssc->ssc_un;
5353 	ASSERT(un != NULL);
5354 
5355 	/* Fabricate the devid */
5356 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
5357 	    == DDI_FAILURE) {
5358 		return (NULL);
5359 	}
5360 
5361 	/* Write the devid to disk */
5362 	if (sd_write_deviceid(ssc) != 0) {
5363 		ddi_devid_free(un->un_devid);
5364 		un->un_devid = NULL;
5365 	}
5366 
5367 	return (un->un_devid);
5368 }
5369 
5370 
5371 /*
5372  *    Function: sd_write_deviceid
5373  *
5374  * Description: This routine will write the device id to the disk
5375  *		reserved sector.
5376  *
5377  *   Arguments: un - driver soft state (unit) structure
5378  *
5379  * Return Code: EINVAL
5380  *		value returned by sd_send_scsi_cmd
5381  *
5382  *     Context: Kernel Thread
5383  */
5384 
5385 static int
5386 sd_write_deviceid(sd_ssc_t *ssc)
5387 {
5388 	struct dk_devid		*dkdevid;
5389 	diskaddr_t		blk;
5390 	uint_t			*ip, chksum;
5391 	int			status;
5392 	int			i;
5393 	struct sd_lun		*un;
5394 
5395 	ASSERT(ssc != NULL);
5396 	un = ssc->ssc_un;
5397 	ASSERT(un != NULL);
5398 	ASSERT(mutex_owned(SD_MUTEX(un)));
5399 
5400 	mutex_exit(SD_MUTEX(un));
5401 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5402 	    (void *)SD_PATH_DIRECT) != 0) {
5403 		mutex_enter(SD_MUTEX(un));
5404 		return (-1);
5405 	}
5406 
5407 
5408 	/* Allocate the buffer */
5409 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
5410 
5411 	/* Fill in the revision */
5412 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
5413 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
5414 
5415 	/* Copy in the device id */
5416 	mutex_enter(SD_MUTEX(un));
5417 	bcopy(un->un_devid, &dkdevid->dkd_devid,
5418 	    ddi_devid_sizeof(un->un_devid));
5419 	mutex_exit(SD_MUTEX(un));
5420 
5421 	/* Calculate the checksum */
5422 	chksum = 0;
5423 	ip = (uint_t *)dkdevid;
5424 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
5425 	    i++) {
5426 		chksum ^= ip[i];
5427 	}
5428 
5429 	/* Fill-in checksum */
5430 	DKD_FORMCHKSUM(chksum, dkdevid);
5431 
5432 	/* Write the reserved sector */
5433 	status = sd_send_scsi_WRITE(ssc, dkdevid, un->un_sys_blocksize, blk,
5434 	    SD_PATH_DIRECT);
5435 	if (status != 0)
5436 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5437 
5438 	kmem_free(dkdevid, un->un_sys_blocksize);
5439 
5440 	mutex_enter(SD_MUTEX(un));
5441 	return (status);
5442 }
5443 
5444 
5445 /*
5446  *    Function: sd_check_vpd_page_support
5447  *
5448  * Description: This routine sends an inquiry command with the EVPD bit set and
5449  *		a page code of 0x00 to the device. It is used to determine which
5450  *		vital product pages are available to find the devid. We are
5451  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
5452  *		device does not support that command.
5453  *
5454  *   Arguments: un  - driver soft state (unit) structure
5455  *
5456  * Return Code: 0 - success
5457  *		1 - check condition
5458  *
5459  *     Context: This routine can sleep.
5460  */
5461 
5462 static int
5463 sd_check_vpd_page_support(sd_ssc_t *ssc)
5464 {
5465 	uchar_t	*page_list	= NULL;
5466 	uchar_t	page_length	= 0xff;	/* Use max possible length */
5467 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
5468 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
5469 	int    	rval		= 0;
5470 	int	counter;
5471 	struct sd_lun		*un;
5472 
5473 	ASSERT(ssc != NULL);
5474 	un = ssc->ssc_un;
5475 	ASSERT(un != NULL);
5476 	ASSERT(mutex_owned(SD_MUTEX(un)));
5477 
5478 	mutex_exit(SD_MUTEX(un));
5479 
5480 	/*
5481 	 * We'll set the page length to the maximum to save figuring it out
5482 	 * with an additional call.
5483 	 */
5484 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
5485 
5486 	rval = sd_send_scsi_INQUIRY(ssc, page_list, page_length, evpd,
5487 	    page_code, NULL);
5488 
5489 	if (rval != 0)
5490 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5491 
5492 	mutex_enter(SD_MUTEX(un));
5493 
5494 	/*
5495 	 * Now we must validate that the device accepted the command, as some
5496 	 * drives do not support it.  If the drive does support it, we will
5497 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
5498 	 * not, we return -1.
5499 	 */
5500 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
5501 		/* Loop to find one of the 2 pages we need */
5502 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
5503 
5504 		/*
5505 		 * Pages are returned in ascending order, and 0x83 is what we
5506 		 * are hoping for.
5507 		 */
5508 		while ((page_list[counter] <= 0x86) &&
5509 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
5510 		    VPD_HEAD_OFFSET))) {
5511 			/*
5512 			 * Add 3 because page_list[3] is the number of
5513 			 * pages minus 3
5514 			 */
5515 
5516 			switch (page_list[counter]) {
5517 			case 0x00:
5518 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
5519 				break;
5520 			case 0x80:
5521 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
5522 				break;
5523 			case 0x81:
5524 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
5525 				break;
5526 			case 0x82:
5527 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
5528 				break;
5529 			case 0x83:
5530 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
5531 				break;
5532 			case 0x86:
5533 				un->un_vpd_page_mask |= SD_VPD_EXTENDED_DATA_PG;
5534 				break;
5535 			}
5536 			counter++;
5537 		}
5538 
5539 	} else {
5540 		rval = -1;
5541 
5542 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5543 		    "sd_check_vpd_page_support: This drive does not implement "
5544 		    "VPD pages.\n");
5545 	}
5546 
5547 	kmem_free(page_list, page_length);
5548 
5549 	return (rval);
5550 }
5551 
5552 
5553 /*
5554  *    Function: sd_setup_pm
5555  *
5556  * Description: Initialize Power Management on the device
5557  *
5558  *     Context: Kernel Thread
5559  */
5560 
5561 static void
5562 sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi)
5563 {
5564 	uint_t		log_page_size;
5565 	uchar_t		*log_page_data;
5566 	int		rval = 0;
5567 	struct sd_lun	*un;
5568 
5569 	ASSERT(ssc != NULL);
5570 	un = ssc->ssc_un;
5571 	ASSERT(un != NULL);
5572 
5573 	/*
5574 	 * Since we are called from attach, holding a mutex for
5575 	 * un is unnecessary. Because some of the routines called
5576 	 * from here require SD_MUTEX to not be held, assert this
5577 	 * right up front.
5578 	 */
5579 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5580 	/*
5581 	 * Since the sd device does not have the 'reg' property,
5582 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5583 	 * The following code is to tell cpr that this device
5584 	 * DOES need to be suspended and resumed.
5585 	 */
5586 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5587 	    "pm-hardware-state", "needs-suspend-resume");
5588 
5589 	/*
5590 	 * This complies with the new power management framework
5591 	 * for certain desktop machines. Create the pm_components
5592 	 * property as a string array property.
5593 	 */
5594 	if (un->un_f_pm_supported) {
5595 		/*
5596 		 * not all devices have a motor, try it first.
5597 		 * some devices may return ILLEGAL REQUEST, some
5598 		 * will hang
5599 		 * The following START_STOP_UNIT is used to check if target
5600 		 * device has a motor.
5601 		 */
5602 		un->un_f_start_stop_supported = TRUE;
5603 		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_START,
5604 		    SD_PATH_DIRECT);
5605 
5606 		if (rval != 0) {
5607 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5608 			un->un_f_start_stop_supported = FALSE;
5609 		}
5610 
5611 		/*
5612 		 * create pm properties anyways otherwise the parent can't
5613 		 * go to sleep
5614 		 */
5615 		(void) sd_create_pm_components(devi, un);
5616 		un->un_f_pm_is_enabled = TRUE;
5617 		return;
5618 	}
5619 
5620 	if (!un->un_f_log_sense_supported) {
5621 		un->un_power_level = SD_SPINDLE_ON;
5622 		un->un_f_pm_is_enabled = FALSE;
5623 		return;
5624 	}
5625 
5626 	rval = sd_log_page_supported(ssc, START_STOP_CYCLE_PAGE);
5627 
5628 #ifdef	SDDEBUG
5629 	if (sd_force_pm_supported) {
5630 		/* Force a successful result */
5631 		rval = 1;
5632 	}
5633 #endif
5634 
5635 	/*
5636 	 * If the start-stop cycle counter log page is not supported
5637 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
5638 	 * then we should not create the pm_components property.
5639 	 */
5640 	if (rval == -1) {
5641 		/*
5642 		 * Error.
5643 		 * Reading log sense failed, most likely this is
5644 		 * an older drive that does not support log sense.
5645 		 * If this fails auto-pm is not supported.
5646 		 */
5647 		un->un_power_level = SD_SPINDLE_ON;
5648 		un->un_f_pm_is_enabled = FALSE;
5649 
5650 	} else if (rval == 0) {
5651 		/*
5652 		 * Page not found.
5653 		 * The start stop cycle counter is implemented as page
5654 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
5655 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
5656 		 */
5657 		if (sd_log_page_supported(ssc, START_STOP_CYCLE_VU_PAGE) == 1) {
5658 			/*
5659 			 * Page found, use this one.
5660 			 */
5661 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
5662 			un->un_f_pm_is_enabled = TRUE;
5663 		} else {
5664 			/*
5665 			 * Error or page not found.
5666 			 * auto-pm is not supported for this device.
5667 			 */
5668 			un->un_power_level = SD_SPINDLE_ON;
5669 			un->un_f_pm_is_enabled = FALSE;
5670 		}
5671 	} else {
5672 		/*
5673 		 * Page found, use it.
5674 		 */
5675 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
5676 		un->un_f_pm_is_enabled = TRUE;
5677 	}
5678 
5679 
5680 	if (un->un_f_pm_is_enabled == TRUE) {
5681 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5682 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5683 
5684 		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
5685 		    log_page_size, un->un_start_stop_cycle_page,
5686 		    0x01, 0, SD_PATH_DIRECT);
5687 
5688 		if (rval != 0) {
5689 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5690 		}
5691 
5692 #ifdef	SDDEBUG
5693 		if (sd_force_pm_supported) {
5694 			/* Force a successful result */
5695 			rval = 0;
5696 		}
5697 #endif
5698 
5699 		/*
5700 		 * If the Log sense for Page( Start/stop cycle counter page)
5701 		 * succeeds, then power management is supported and we can
5702 		 * enable auto-pm.
5703 		 */
5704 		if (rval == 0)  {
5705 			(void) sd_create_pm_components(devi, un);
5706 		} else {
5707 			un->un_power_level = SD_SPINDLE_ON;
5708 			un->un_f_pm_is_enabled = FALSE;
5709 		}
5710 
5711 		kmem_free(log_page_data, log_page_size);
5712 	}
5713 }
5714 
5715 
5716 /*
5717  *    Function: sd_create_pm_components
5718  *
5719  * Description: Initialize PM property.
5720  *
5721  *     Context: Kernel thread context
5722  */
5723 
5724 static void
5725 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
5726 {
5727 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
5728 
5729 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5730 
5731 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
5732 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
5733 		/*
5734 		 * When components are initially created they are idle,
5735 		 * power up any non-removables.
5736 		 * Note: the return value of pm_raise_power can't be used
5737 		 * for determining if PM should be enabled for this device.
5738 		 * Even if you check the return values and remove this
5739 		 * property created above, the PM framework will not honor the
5740 		 * change after the first call to pm_raise_power. Hence,
5741 		 * removal of that property does not help if pm_raise_power
5742 		 * fails. In the case of removable media, the start/stop
5743 		 * will fail if the media is not present.
5744 		 */
5745 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
5746 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
5747 			mutex_enter(SD_MUTEX(un));
5748 			un->un_power_level = SD_SPINDLE_ON;
5749 			mutex_enter(&un->un_pm_mutex);
5750 			/* Set to on and not busy. */
5751 			un->un_pm_count = 0;
5752 		} else {
5753 			mutex_enter(SD_MUTEX(un));
5754 			un->un_power_level = SD_SPINDLE_OFF;
5755 			mutex_enter(&un->un_pm_mutex);
5756 			/* Set to off. */
5757 			un->un_pm_count = -1;
5758 		}
5759 		mutex_exit(&un->un_pm_mutex);
5760 		mutex_exit(SD_MUTEX(un));
5761 	} else {
5762 		un->un_power_level = SD_SPINDLE_ON;
5763 		un->un_f_pm_is_enabled = FALSE;
5764 	}
5765 }
5766 
5767 
5768 /*
5769  *    Function: sd_ddi_suspend
5770  *
5771  * Description: Performs system power-down operations. This includes
5772  *		setting the drive state to indicate its suspended so
5773  *		that no new commands will be accepted. Also, wait for
5774  *		all commands that are in transport or queued to a timer
5775  *		for retry to complete. All timeout threads are cancelled.
5776  *
5777  * Return Code: DDI_FAILURE or DDI_SUCCESS
5778  *
5779  *     Context: Kernel thread context
5780  */
5781 
5782 static int
5783 sd_ddi_suspend(dev_info_t *devi)
5784 {
5785 	struct	sd_lun	*un;
5786 	clock_t		wait_cmds_complete;
5787 
5788 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5789 	if (un == NULL) {
5790 		return (DDI_FAILURE);
5791 	}
5792 
5793 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
5794 
5795 	mutex_enter(SD_MUTEX(un));
5796 
5797 	/* Return success if the device is already suspended. */
5798 	if (un->un_state == SD_STATE_SUSPENDED) {
5799 		mutex_exit(SD_MUTEX(un));
5800 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5801 		    "device already suspended, exiting\n");
5802 		return (DDI_SUCCESS);
5803 	}
5804 
5805 	/* Return failure if the device is being used by HA */
5806 	if (un->un_resvd_status &
5807 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
5808 		mutex_exit(SD_MUTEX(un));
5809 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5810 		    "device in use by HA, exiting\n");
5811 		return (DDI_FAILURE);
5812 	}
5813 
5814 	/*
5815 	 * Return failure if the device is in a resource wait
5816 	 * or power changing state.
5817 	 */
5818 	if ((un->un_state == SD_STATE_RWAIT) ||
5819 	    (un->un_state == SD_STATE_PM_CHANGING)) {
5820 		mutex_exit(SD_MUTEX(un));
5821 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5822 		    "device in resource wait state, exiting\n");
5823 		return (DDI_FAILURE);
5824 	}
5825 
5826 
5827 	un->un_save_state = un->un_last_state;
5828 	New_state(un, SD_STATE_SUSPENDED);
5829 
5830 	/*
5831 	 * Wait for all commands that are in transport or queued to a timer
5832 	 * for retry to complete.
5833 	 *
5834 	 * While waiting, no new commands will be accepted or sent because of
5835 	 * the new state we set above.
5836 	 *
5837 	 * Wait till current operation has completed. If we are in the resource
5838 	 * wait state (with an intr outstanding) then we need to wait till the
5839 	 * intr completes and starts the next cmd. We want to wait for
5840 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
5841 	 */
5842 	wait_cmds_complete = ddi_get_lbolt() +
5843 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
5844 
5845 	while (un->un_ncmds_in_transport != 0) {
5846 		/*
5847 		 * Fail if commands do not finish in the specified time.
5848 		 */
5849 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
5850 		    wait_cmds_complete) == -1) {
5851 			/*
5852 			 * Undo the state changes made above. Everything
5853 			 * must go back to it's original value.
5854 			 */
5855 			Restore_state(un);
5856 			un->un_last_state = un->un_save_state;
5857 			/* Wake up any threads that might be waiting. */
5858 			cv_broadcast(&un->un_suspend_cv);
5859 			mutex_exit(SD_MUTEX(un));
5860 			SD_ERROR(SD_LOG_IO_PM, un,
5861 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
5862 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
5863 			return (DDI_FAILURE);
5864 		}
5865 	}
5866 
5867 	/*
5868 	 * Cancel SCSI watch thread and timeouts, if any are active
5869 	 */
5870 
5871 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
5872 		opaque_t temp_token = un->un_swr_token;
5873 		mutex_exit(SD_MUTEX(un));
5874 		scsi_watch_suspend(temp_token);
5875 		mutex_enter(SD_MUTEX(un));
5876 	}
5877 
5878 	if (un->un_reset_throttle_timeid != NULL) {
5879 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
5880 		un->un_reset_throttle_timeid = NULL;
5881 		mutex_exit(SD_MUTEX(un));
5882 		(void) untimeout(temp_id);
5883 		mutex_enter(SD_MUTEX(un));
5884 	}
5885 
5886 	if (un->un_dcvb_timeid != NULL) {
5887 		timeout_id_t temp_id = un->un_dcvb_timeid;
5888 		un->un_dcvb_timeid = NULL;
5889 		mutex_exit(SD_MUTEX(un));
5890 		(void) untimeout(temp_id);
5891 		mutex_enter(SD_MUTEX(un));
5892 	}
5893 
5894 	mutex_enter(&un->un_pm_mutex);
5895 	if (un->un_pm_timeid != NULL) {
5896 		timeout_id_t temp_id = un->un_pm_timeid;
5897 		un->un_pm_timeid = NULL;
5898 		mutex_exit(&un->un_pm_mutex);
5899 		mutex_exit(SD_MUTEX(un));
5900 		(void) untimeout(temp_id);
5901 		mutex_enter(SD_MUTEX(un));
5902 	} else {
5903 		mutex_exit(&un->un_pm_mutex);
5904 	}
5905 
5906 	if (un->un_retry_timeid != NULL) {
5907 		timeout_id_t temp_id = un->un_retry_timeid;
5908 		un->un_retry_timeid = NULL;
5909 		mutex_exit(SD_MUTEX(un));
5910 		(void) untimeout(temp_id);
5911 		mutex_enter(SD_MUTEX(un));
5912 
5913 		if (un->un_retry_bp != NULL) {
5914 			un->un_retry_bp->av_forw = un->un_waitq_headp;
5915 			un->un_waitq_headp = un->un_retry_bp;
5916 			if (un->un_waitq_tailp == NULL) {
5917 				un->un_waitq_tailp = un->un_retry_bp;
5918 			}
5919 			un->un_retry_bp = NULL;
5920 			un->un_retry_statp = NULL;
5921 		}
5922 	}
5923 
5924 	if (un->un_direct_priority_timeid != NULL) {
5925 		timeout_id_t temp_id = un->un_direct_priority_timeid;
5926 		un->un_direct_priority_timeid = NULL;
5927 		mutex_exit(SD_MUTEX(un));
5928 		(void) untimeout(temp_id);
5929 		mutex_enter(SD_MUTEX(un));
5930 	}
5931 
5932 	if (un->un_f_is_fibre == TRUE) {
5933 		/*
5934 		 * Remove callbacks for insert and remove events
5935 		 */
5936 		if (un->un_insert_event != NULL) {
5937 			mutex_exit(SD_MUTEX(un));
5938 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
5939 			mutex_enter(SD_MUTEX(un));
5940 			un->un_insert_event = NULL;
5941 		}
5942 
5943 		if (un->un_remove_event != NULL) {
5944 			mutex_exit(SD_MUTEX(un));
5945 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
5946 			mutex_enter(SD_MUTEX(un));
5947 			un->un_remove_event = NULL;
5948 		}
5949 	}
5950 
5951 	mutex_exit(SD_MUTEX(un));
5952 
5953 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
5954 
5955 	return (DDI_SUCCESS);
5956 }
5957 
5958 
5959 /*
5960  *    Function: sd_ddi_pm_suspend
5961  *
5962  * Description: Set the drive state to low power.
5963  *		Someone else is required to actually change the drive
5964  *		power level.
5965  *
5966  *   Arguments: un - driver soft state (unit) structure
5967  *
5968  * Return Code: DDI_FAILURE or DDI_SUCCESS
5969  *
5970  *     Context: Kernel thread context
5971  */
5972 
5973 static int
5974 sd_ddi_pm_suspend(struct sd_lun *un)
5975 {
5976 	ASSERT(un != NULL);
5977 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
5978 
5979 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5980 	mutex_enter(SD_MUTEX(un));
5981 
5982 	/*
5983 	 * Exit if power management is not enabled for this device, or if
5984 	 * the device is being used by HA.
5985 	 */
5986 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
5987 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
5988 		mutex_exit(SD_MUTEX(un));
5989 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
5990 		return (DDI_SUCCESS);
5991 	}
5992 
5993 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
5994 	    un->un_ncmds_in_driver);
5995 
5996 	/*
5997 	 * See if the device is not busy, ie.:
5998 	 *    - we have no commands in the driver for this device
5999 	 *    - not waiting for resources
6000 	 */
6001 	if ((un->un_ncmds_in_driver == 0) &&
6002 	    (un->un_state != SD_STATE_RWAIT)) {
6003 		/*
6004 		 * The device is not busy, so it is OK to go to low power state.
6005 		 * Indicate low power, but rely on someone else to actually
6006 		 * change it.
6007 		 */
6008 		mutex_enter(&un->un_pm_mutex);
6009 		un->un_pm_count = -1;
6010 		mutex_exit(&un->un_pm_mutex);
6011 		un->un_power_level = SD_SPINDLE_OFF;
6012 	}
6013 
6014 	mutex_exit(SD_MUTEX(un));
6015 
6016 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
6017 
6018 	return (DDI_SUCCESS);
6019 }
6020 
6021 
6022 /*
6023  *    Function: sd_ddi_resume
6024  *
6025  * Description: Performs system power-up operations..
6026  *
6027  * Return Code: DDI_SUCCESS
6028  *		DDI_FAILURE
6029  *
6030  *     Context: Kernel thread context
6031  */
6032 
6033 static int
6034 sd_ddi_resume(dev_info_t *devi)
6035 {
6036 	struct	sd_lun	*un;
6037 
6038 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6039 	if (un == NULL) {
6040 		return (DDI_FAILURE);
6041 	}
6042 
6043 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6044 
6045 	mutex_enter(SD_MUTEX(un));
6046 	Restore_state(un);
6047 
6048 	/*
6049 	 * Restore the state which was saved to give the
6050 	 * the right state in un_last_state
6051 	 */
6052 	un->un_last_state = un->un_save_state;
6053 	/*
6054 	 * Note: throttle comes back at full.
6055 	 * Also note: this MUST be done before calling pm_raise_power
6056 	 * otherwise the system can get hung in biowait. The scenario where
6057 	 * this'll happen is under cpr suspend. Writing of the system
6058 	 * state goes through sddump, which writes 0 to un_throttle. If
6059 	 * writing the system state then fails, example if the partition is
6060 	 * too small, then cpr attempts a resume. If throttle isn't restored
6061 	 * from the saved value until after calling pm_raise_power then
6062 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
6063 	 * in biowait.
6064 	 */
6065 	un->un_throttle = un->un_saved_throttle;
6066 
6067 	/*
6068 	 * The chance of failure is very rare as the only command done in power
6069 	 * entry point is START command when you transition from 0->1 or
6070 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
6071 	 * which suspend was done. Ignore the return value as the resume should
6072 	 * not be failed. In the case of removable media the media need not be
6073 	 * inserted and hence there is a chance that raise power will fail with
6074 	 * media not present.
6075 	 */
6076 	if (un->un_f_attach_spinup) {
6077 		mutex_exit(SD_MUTEX(un));
6078 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
6079 		mutex_enter(SD_MUTEX(un));
6080 	}
6081 
6082 	/*
6083 	 * Don't broadcast to the suspend cv and therefore possibly
6084 	 * start I/O until after power has been restored.
6085 	 */
6086 	cv_broadcast(&un->un_suspend_cv);
6087 	cv_broadcast(&un->un_state_cv);
6088 
6089 	/* restart thread */
6090 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
6091 		scsi_watch_resume(un->un_swr_token);
6092 	}
6093 
6094 #if (defined(__fibre))
6095 	if (un->un_f_is_fibre == TRUE) {
6096 		/*
6097 		 * Add callbacks for insert and remove events
6098 		 */
6099 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
6100 			sd_init_event_callbacks(un);
6101 		}
6102 	}
6103 #endif
6104 
6105 	/*
6106 	 * Transport any pending commands to the target.
6107 	 *
6108 	 * If this is a low-activity device commands in queue will have to wait
6109 	 * until new commands come in, which may take awhile. Also, we
6110 	 * specifically don't check un_ncmds_in_transport because we know that
6111 	 * there really are no commands in progress after the unit was
6112 	 * suspended and we could have reached the throttle level, been
6113 	 * suspended, and have no new commands coming in for awhile. Highly
6114 	 * unlikely, but so is the low-activity disk scenario.
6115 	 */
6116 	ddi_xbuf_dispatch(un->un_xbuf_attr);
6117 
6118 	sd_start_cmds(un, NULL);
6119 	mutex_exit(SD_MUTEX(un));
6120 
6121 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
6122 
6123 	return (DDI_SUCCESS);
6124 }
6125 
6126 
6127 /*
6128  *    Function: sd_ddi_pm_resume
6129  *
6130  * Description: Set the drive state to powered on.
6131  *		Someone else is required to actually change the drive
6132  *		power level.
6133  *
6134  *   Arguments: un - driver soft state (unit) structure
6135  *
6136  * Return Code: DDI_SUCCESS
6137  *
6138  *     Context: Kernel thread context
6139  */
6140 
6141 static int
6142 sd_ddi_pm_resume(struct sd_lun *un)
6143 {
6144 	ASSERT(un != NULL);
6145 
6146 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6147 	mutex_enter(SD_MUTEX(un));
6148 	un->un_power_level = SD_SPINDLE_ON;
6149 
6150 	ASSERT(!mutex_owned(&un->un_pm_mutex));
6151 	mutex_enter(&un->un_pm_mutex);
6152 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
6153 		un->un_pm_count++;
6154 		ASSERT(un->un_pm_count == 0);
6155 		/*
6156 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
6157 		 * un_suspend_cv is for a system resume, not a power management
6158 		 * device resume. (4297749)
6159 		 *	 cv_broadcast(&un->un_suspend_cv);
6160 		 */
6161 	}
6162 	mutex_exit(&un->un_pm_mutex);
6163 	mutex_exit(SD_MUTEX(un));
6164 
6165 	return (DDI_SUCCESS);
6166 }
6167 
6168 
6169 /*
6170  *    Function: sd_pm_idletimeout_handler
6171  *
6172  * Description: A timer routine that's active only while a device is busy.
6173  *		The purpose is to extend slightly the pm framework's busy
6174  *		view of the device to prevent busy/idle thrashing for
6175  *		back-to-back commands. Do this by comparing the current time
6176  *		to the time at which the last command completed and when the
6177  *		difference is greater than sd_pm_idletime, call
6178  *		pm_idle_component. In addition to indicating idle to the pm
6179  *		framework, update the chain type to again use the internal pm
6180  *		layers of the driver.
6181  *
6182  *   Arguments: arg - driver soft state (unit) structure
6183  *
6184  *     Context: Executes in a timeout(9F) thread context
6185  */
6186 
6187 static void
6188 sd_pm_idletimeout_handler(void *arg)
6189 {
6190 	struct sd_lun *un = arg;
6191 
6192 	time_t	now;
6193 
6194 	mutex_enter(&sd_detach_mutex);
6195 	if (un->un_detach_count != 0) {
6196 		/* Abort if the instance is detaching */
6197 		mutex_exit(&sd_detach_mutex);
6198 		return;
6199 	}
6200 	mutex_exit(&sd_detach_mutex);
6201 
6202 	now = ddi_get_time();
6203 	/*
6204 	 * Grab both mutexes, in the proper order, since we're accessing
6205 	 * both PM and softstate variables.
6206 	 */
6207 	mutex_enter(SD_MUTEX(un));
6208 	mutex_enter(&un->un_pm_mutex);
6209 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
6210 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
6211 		/*
6212 		 * Update the chain types.
6213 		 * This takes affect on the next new command received.
6214 		 */
6215 		if (un->un_f_non_devbsize_supported) {
6216 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6217 		} else {
6218 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6219 		}
6220 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
6221 
6222 		SD_TRACE(SD_LOG_IO_PM, un,
6223 		    "sd_pm_idletimeout_handler: idling device\n");
6224 		(void) pm_idle_component(SD_DEVINFO(un), 0);
6225 		un->un_pm_idle_timeid = NULL;
6226 	} else {
6227 		un->un_pm_idle_timeid =
6228 		    timeout(sd_pm_idletimeout_handler, un,
6229 		    (drv_usectohz((clock_t)300000))); /* 300 ms. */
6230 	}
6231 	mutex_exit(&un->un_pm_mutex);
6232 	mutex_exit(SD_MUTEX(un));
6233 }
6234 
6235 
6236 /*
6237  *    Function: sd_pm_timeout_handler
6238  *
6239  * Description: Callback to tell framework we are idle.
6240  *
6241  *     Context: timeout(9f) thread context.
6242  */
6243 
6244 static void
6245 sd_pm_timeout_handler(void *arg)
6246 {
6247 	struct sd_lun *un = arg;
6248 
6249 	(void) pm_idle_component(SD_DEVINFO(un), 0);
6250 	mutex_enter(&un->un_pm_mutex);
6251 	un->un_pm_timeid = NULL;
6252 	mutex_exit(&un->un_pm_mutex);
6253 }
6254 
6255 
6256 /*
6257  *    Function: sdpower
6258  *
6259  * Description: PM entry point.
6260  *
6261  * Return Code: DDI_SUCCESS
6262  *		DDI_FAILURE
6263  *
6264  *     Context: Kernel thread context
6265  */
6266 
6267 static int
6268 sdpower(dev_info_t *devi, int component, int level)
6269 {
6270 	struct sd_lun	*un;
6271 	int		instance;
6272 	int		rval = DDI_SUCCESS;
6273 	uint_t		i, log_page_size, maxcycles, ncycles;
6274 	uchar_t		*log_page_data;
6275 	int		log_sense_page;
6276 	int		medium_present;
6277 	time_t		intvlp;
6278 	dev_t		dev;
6279 	struct pm_trans_data	sd_pm_tran_data;
6280 	uchar_t		save_state;
6281 	int		sval;
6282 	uchar_t		state_before_pm;
6283 	int		got_semaphore_here;
6284 	sd_ssc_t	*ssc;
6285 
6286 	instance = ddi_get_instance(devi);
6287 
6288 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
6289 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
6290 	    component != 0) {
6291 		return (DDI_FAILURE);
6292 	}
6293 
6294 	dev = sd_make_device(SD_DEVINFO(un));
6295 	ssc = sd_ssc_init(un);
6296 
6297 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
6298 
6299 	/*
6300 	 * Must synchronize power down with close.
6301 	 * Attempt to decrement/acquire the open/close semaphore,
6302 	 * but do NOT wait on it. If it's not greater than zero,
6303 	 * ie. it can't be decremented without waiting, then
6304 	 * someone else, either open or close, already has it
6305 	 * and the try returns 0. Use that knowledge here to determine
6306 	 * if it's OK to change the device power level.
6307 	 * Also, only increment it on exit if it was decremented, ie. gotten,
6308 	 * here.
6309 	 */
6310 	got_semaphore_here = sema_tryp(&un->un_semoclose);
6311 
6312 	mutex_enter(SD_MUTEX(un));
6313 
6314 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
6315 	    un->un_ncmds_in_driver);
6316 
6317 	/*
6318 	 * If un_ncmds_in_driver is non-zero it indicates commands are
6319 	 * already being processed in the driver, or if the semaphore was
6320 	 * not gotten here it indicates an open or close is being processed.
6321 	 * At the same time somebody is requesting to go low power which
6322 	 * can't happen, therefore we need to return failure.
6323 	 */
6324 	if ((level == SD_SPINDLE_OFF) &&
6325 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
6326 		mutex_exit(SD_MUTEX(un));
6327 
6328 		if (got_semaphore_here != 0) {
6329 			sema_v(&un->un_semoclose);
6330 		}
6331 		SD_TRACE(SD_LOG_IO_PM, un,
6332 		    "sdpower: exit, device has queued cmds.\n");
6333 
6334 		goto sdpower_failed;
6335 	}
6336 
6337 	/*
6338 	 * if it is OFFLINE that means the disk is completely dead
6339 	 * in our case we have to put the disk in on or off by sending commands
6340 	 * Of course that will fail anyway so return back here.
6341 	 *
6342 	 * Power changes to a device that's OFFLINE or SUSPENDED
6343 	 * are not allowed.
6344 	 */
6345 	if ((un->un_state == SD_STATE_OFFLINE) ||
6346 	    (un->un_state == SD_STATE_SUSPENDED)) {
6347 		mutex_exit(SD_MUTEX(un));
6348 
6349 		if (got_semaphore_here != 0) {
6350 			sema_v(&un->un_semoclose);
6351 		}
6352 		SD_TRACE(SD_LOG_IO_PM, un,
6353 		    "sdpower: exit, device is off-line.\n");
6354 
6355 		goto sdpower_failed;
6356 	}
6357 
6358 	/*
6359 	 * Change the device's state to indicate it's power level
6360 	 * is being changed. Do this to prevent a power off in the
6361 	 * middle of commands, which is especially bad on devices
6362 	 * that are really powered off instead of just spun down.
6363 	 */
6364 	state_before_pm = un->un_state;
6365 	un->un_state = SD_STATE_PM_CHANGING;
6366 
6367 	mutex_exit(SD_MUTEX(un));
6368 
6369 	/*
6370 	 * If "pm-capable" property is set to TRUE by HBA drivers,
6371 	 * bypass the following checking, otherwise, check the log
6372 	 * sense information for this device
6373 	 */
6374 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
6375 		/*
6376 		 * Get the log sense information to understand whether the
6377 		 * the powercycle counts have gone beyond the threshhold.
6378 		 */
6379 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6380 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6381 
6382 		mutex_enter(SD_MUTEX(un));
6383 		log_sense_page = un->un_start_stop_cycle_page;
6384 		mutex_exit(SD_MUTEX(un));
6385 
6386 		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
6387 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
6388 
6389 		if (rval != 0) {
6390 			if (rval == EIO)
6391 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6392 			else
6393 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6394 		}
6395 
6396 #ifdef	SDDEBUG
6397 		if (sd_force_pm_supported) {
6398 			/* Force a successful result */
6399 			rval = 0;
6400 		}
6401 #endif
6402 		if (rval != 0) {
6403 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
6404 			    "Log Sense Failed\n");
6405 
6406 			kmem_free(log_page_data, log_page_size);
6407 			/* Cannot support power management on those drives */
6408 
6409 			if (got_semaphore_here != 0) {
6410 				sema_v(&un->un_semoclose);
6411 			}
6412 			/*
6413 			 * On exit put the state back to it's original value
6414 			 * and broadcast to anyone waiting for the power
6415 			 * change completion.
6416 			 */
6417 			mutex_enter(SD_MUTEX(un));
6418 			un->un_state = state_before_pm;
6419 			cv_broadcast(&un->un_suspend_cv);
6420 			mutex_exit(SD_MUTEX(un));
6421 			SD_TRACE(SD_LOG_IO_PM, un,
6422 			    "sdpower: exit, Log Sense Failed.\n");
6423 
6424 			goto sdpower_failed;
6425 		}
6426 
6427 		/*
6428 		 * From the page data - Convert the essential information to
6429 		 * pm_trans_data
6430 		 */
6431 		maxcycles =
6432 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
6433 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
6434 
6435 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
6436 
6437 		ncycles =
6438 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
6439 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
6440 
6441 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
6442 
6443 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
6444 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
6445 			    log_page_data[8+i];
6446 		}
6447 
6448 		kmem_free(log_page_data, log_page_size);
6449 
6450 		/*
6451 		 * Call pm_trans_check routine to get the Ok from
6452 		 * the global policy
6453 		 */
6454 
6455 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
6456 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
6457 
6458 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
6459 #ifdef	SDDEBUG
6460 		if (sd_force_pm_supported) {
6461 			/* Force a successful result */
6462 			rval = 1;
6463 		}
6464 #endif
6465 		switch (rval) {
6466 		case 0:
6467 			/*
6468 			 * Not Ok to Power cycle or error in parameters passed
6469 			 * Would have given the advised time to consider power
6470 			 * cycle. Based on the new intvlp parameter we are
6471 			 * supposed to pretend we are busy so that pm framework
6472 			 * will never call our power entry point. Because of
6473 			 * that install a timeout handler and wait for the
6474 			 * recommended time to elapse so that power management
6475 			 * can be effective again.
6476 			 *
6477 			 * To effect this behavior, call pm_busy_component to
6478 			 * indicate to the framework this device is busy.
6479 			 * By not adjusting un_pm_count the rest of PM in
6480 			 * the driver will function normally, and independent
6481 			 * of this but because the framework is told the device
6482 			 * is busy it won't attempt powering down until it gets
6483 			 * a matching idle. The timeout handler sends this.
6484 			 * Note: sd_pm_entry can't be called here to do this
6485 			 * because sdpower may have been called as a result
6486 			 * of a call to pm_raise_power from within sd_pm_entry.
6487 			 *
6488 			 * If a timeout handler is already active then
6489 			 * don't install another.
6490 			 */
6491 			mutex_enter(&un->un_pm_mutex);
6492 			if (un->un_pm_timeid == NULL) {
6493 				un->un_pm_timeid =
6494 				    timeout(sd_pm_timeout_handler,
6495 				    un, intvlp * drv_usectohz(1000000));
6496 				mutex_exit(&un->un_pm_mutex);
6497 				(void) pm_busy_component(SD_DEVINFO(un), 0);
6498 			} else {
6499 				mutex_exit(&un->un_pm_mutex);
6500 			}
6501 			if (got_semaphore_here != 0) {
6502 				sema_v(&un->un_semoclose);
6503 			}
6504 			/*
6505 			 * On exit put the state back to it's original value
6506 			 * and broadcast to anyone waiting for the power
6507 			 * change completion.
6508 			 */
6509 			mutex_enter(SD_MUTEX(un));
6510 			un->un_state = state_before_pm;
6511 			cv_broadcast(&un->un_suspend_cv);
6512 			mutex_exit(SD_MUTEX(un));
6513 
6514 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
6515 			    "trans check Failed, not ok to power cycle.\n");
6516 
6517 			goto sdpower_failed;
6518 		case -1:
6519 			if (got_semaphore_here != 0) {
6520 				sema_v(&un->un_semoclose);
6521 			}
6522 			/*
6523 			 * On exit put the state back to it's original value
6524 			 * and broadcast to anyone waiting for the power
6525 			 * change completion.
6526 			 */
6527 			mutex_enter(SD_MUTEX(un));
6528 			un->un_state = state_before_pm;
6529 			cv_broadcast(&un->un_suspend_cv);
6530 			mutex_exit(SD_MUTEX(un));
6531 			SD_TRACE(SD_LOG_IO_PM, un,
6532 			    "sdpower: exit, trans check command Failed.\n");
6533 
6534 			goto sdpower_failed;
6535 		}
6536 	}
6537 
6538 	if (level == SD_SPINDLE_OFF) {
6539 		/*
6540 		 * Save the last state... if the STOP FAILS we need it
6541 		 * for restoring
6542 		 */
6543 		mutex_enter(SD_MUTEX(un));
6544 		save_state = un->un_last_state;
6545 		/*
6546 		 * There must not be any cmds. getting processed
6547 		 * in the driver when we get here. Power to the
6548 		 * device is potentially going off.
6549 		 */
6550 		ASSERT(un->un_ncmds_in_driver == 0);
6551 		mutex_exit(SD_MUTEX(un));
6552 
6553 		/*
6554 		 * For now suspend the device completely before spindle is
6555 		 * turned off
6556 		 */
6557 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
6558 			if (got_semaphore_here != 0) {
6559 				sema_v(&un->un_semoclose);
6560 			}
6561 			/*
6562 			 * On exit put the state back to it's original value
6563 			 * and broadcast to anyone waiting for the power
6564 			 * change completion.
6565 			 */
6566 			mutex_enter(SD_MUTEX(un));
6567 			un->un_state = state_before_pm;
6568 			cv_broadcast(&un->un_suspend_cv);
6569 			mutex_exit(SD_MUTEX(un));
6570 			SD_TRACE(SD_LOG_IO_PM, un,
6571 			    "sdpower: exit, PM suspend Failed.\n");
6572 
6573 			goto sdpower_failed;
6574 		}
6575 	}
6576 
6577 	/*
6578 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
6579 	 * close, or strategy. Dump no long uses this routine, it uses it's
6580 	 * own code so it can be done in polled mode.
6581 	 */
6582 
6583 	medium_present = TRUE;
6584 
6585 	/*
6586 	 * When powering up, issue a TUR in case the device is at unit
6587 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
6588 	 * a deadlock on un_pm_busy_cv will occur.
6589 	 */
6590 	if (level == SD_SPINDLE_ON) {
6591 		sval = sd_send_scsi_TEST_UNIT_READY(ssc,
6592 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
6593 		if (sval != 0)
6594 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6595 	}
6596 
6597 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
6598 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
6599 
6600 	sval = sd_send_scsi_START_STOP_UNIT(ssc,
6601 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
6602 	    SD_PATH_DIRECT);
6603 	if (sval != 0) {
6604 		if (sval == EIO)
6605 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6606 		else
6607 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6608 	}
6609 
6610 	/* Command failed, check for media present. */
6611 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
6612 		medium_present = FALSE;
6613 	}
6614 
6615 	/*
6616 	 * The conditions of interest here are:
6617 	 *   if a spindle off with media present fails,
6618 	 *	then restore the state and return an error.
6619 	 *   else if a spindle on fails,
6620 	 *	then return an error (there's no state to restore).
6621 	 * In all other cases we setup for the new state
6622 	 * and return success.
6623 	 */
6624 	switch (level) {
6625 	case SD_SPINDLE_OFF:
6626 		if ((medium_present == TRUE) && (sval != 0)) {
6627 			/* The stop command from above failed */
6628 			rval = DDI_FAILURE;
6629 			/*
6630 			 * The stop command failed, and we have media
6631 			 * present. Put the level back by calling the
6632 			 * sd_pm_resume() and set the state back to
6633 			 * it's previous value.
6634 			 */
6635 			(void) sd_ddi_pm_resume(un);
6636 			mutex_enter(SD_MUTEX(un));
6637 			un->un_last_state = save_state;
6638 			mutex_exit(SD_MUTEX(un));
6639 			break;
6640 		}
6641 		/*
6642 		 * The stop command from above succeeded.
6643 		 */
6644 		if (un->un_f_monitor_media_state) {
6645 			/*
6646 			 * Terminate watch thread in case of removable media
6647 			 * devices going into low power state. This is as per
6648 			 * the requirements of pm framework, otherwise commands
6649 			 * will be generated for the device (through watch
6650 			 * thread), even when the device is in low power state.
6651 			 */
6652 			mutex_enter(SD_MUTEX(un));
6653 			un->un_f_watcht_stopped = FALSE;
6654 			if (un->un_swr_token != NULL) {
6655 				opaque_t temp_token = un->un_swr_token;
6656 				un->un_f_watcht_stopped = TRUE;
6657 				un->un_swr_token = NULL;
6658 				mutex_exit(SD_MUTEX(un));
6659 				(void) scsi_watch_request_terminate(temp_token,
6660 				    SCSI_WATCH_TERMINATE_ALL_WAIT);
6661 			} else {
6662 				mutex_exit(SD_MUTEX(un));
6663 			}
6664 		}
6665 		break;
6666 
6667 	default:	/* The level requested is spindle on... */
6668 		/*
6669 		 * Legacy behavior: return success on a failed spinup
6670 		 * if there is no media in the drive.
6671 		 * Do this by looking at medium_present here.
6672 		 */
6673 		if ((sval != 0) && medium_present) {
6674 			/* The start command from above failed */
6675 			rval = DDI_FAILURE;
6676 			break;
6677 		}
6678 		/*
6679 		 * The start command from above succeeded
6680 		 * Resume the devices now that we have
6681 		 * started the disks
6682 		 */
6683 		(void) sd_ddi_pm_resume(un);
6684 
6685 		/*
6686 		 * Resume the watch thread since it was suspended
6687 		 * when the device went into low power mode.
6688 		 */
6689 		if (un->un_f_monitor_media_state) {
6690 			mutex_enter(SD_MUTEX(un));
6691 			if (un->un_f_watcht_stopped == TRUE) {
6692 				opaque_t temp_token;
6693 
6694 				un->un_f_watcht_stopped = FALSE;
6695 				mutex_exit(SD_MUTEX(un));
6696 				temp_token = scsi_watch_request_submit(
6697 				    SD_SCSI_DEVP(un),
6698 				    sd_check_media_time,
6699 				    SENSE_LENGTH, sd_media_watch_cb,
6700 				    (caddr_t)dev);
6701 				mutex_enter(SD_MUTEX(un));
6702 				un->un_swr_token = temp_token;
6703 			}
6704 			mutex_exit(SD_MUTEX(un));
6705 		}
6706 	}
6707 	if (got_semaphore_here != 0) {
6708 		sema_v(&un->un_semoclose);
6709 	}
6710 	/*
6711 	 * On exit put the state back to it's original value
6712 	 * and broadcast to anyone waiting for the power
6713 	 * change completion.
6714 	 */
6715 	mutex_enter(SD_MUTEX(un));
6716 	un->un_state = state_before_pm;
6717 	cv_broadcast(&un->un_suspend_cv);
6718 	mutex_exit(SD_MUTEX(un));
6719 
6720 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
6721 
6722 	sd_ssc_fini(ssc);
6723 	return (rval);
6724 
6725 sdpower_failed:
6726 
6727 	sd_ssc_fini(ssc);
6728 	return (DDI_FAILURE);
6729 }
6730 
6731 
6732 
6733 /*
6734  *    Function: sdattach
6735  *
6736  * Description: Driver's attach(9e) entry point function.
6737  *
6738  *   Arguments: devi - opaque device info handle
6739  *		cmd  - attach  type
6740  *
6741  * Return Code: DDI_SUCCESS
6742  *		DDI_FAILURE
6743  *
6744  *     Context: Kernel thread context
6745  */
6746 
6747 static int
6748 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
6749 {
6750 	switch (cmd) {
6751 	case DDI_ATTACH:
6752 		return (sd_unit_attach(devi));
6753 	case DDI_RESUME:
6754 		return (sd_ddi_resume(devi));
6755 	default:
6756 		break;
6757 	}
6758 	return (DDI_FAILURE);
6759 }
6760 
6761 
6762 /*
6763  *    Function: sddetach
6764  *
6765  * Description: Driver's detach(9E) entry point function.
6766  *
6767  *   Arguments: devi - opaque device info handle
6768  *		cmd  - detach  type
6769  *
6770  * Return Code: DDI_SUCCESS
6771  *		DDI_FAILURE
6772  *
6773  *     Context: Kernel thread context
6774  */
6775 
6776 static int
6777 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
6778 {
6779 	switch (cmd) {
6780 	case DDI_DETACH:
6781 		return (sd_unit_detach(devi));
6782 	case DDI_SUSPEND:
6783 		return (sd_ddi_suspend(devi));
6784 	default:
6785 		break;
6786 	}
6787 	return (DDI_FAILURE);
6788 }
6789 
6790 
6791 /*
6792  *     Function: sd_sync_with_callback
6793  *
6794  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
6795  *		 state while the callback routine is active.
6796  *
6797  *    Arguments: un: softstate structure for the instance
6798  *
6799  *	Context: Kernel thread context
6800  */
6801 
6802 static void
6803 sd_sync_with_callback(struct sd_lun *un)
6804 {
6805 	ASSERT(un != NULL);
6806 
6807 	mutex_enter(SD_MUTEX(un));
6808 
6809 	ASSERT(un->un_in_callback >= 0);
6810 
6811 	while (un->un_in_callback > 0) {
6812 		mutex_exit(SD_MUTEX(un));
6813 		delay(2);
6814 		mutex_enter(SD_MUTEX(un));
6815 	}
6816 
6817 	mutex_exit(SD_MUTEX(un));
6818 }
6819 
6820 /*
6821  *    Function: sd_unit_attach
6822  *
6823  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
6824  *		the soft state structure for the device and performs
6825  *		all necessary structure and device initializations.
6826  *
6827  *   Arguments: devi: the system's dev_info_t for the device.
6828  *
6829  * Return Code: DDI_SUCCESS if attach is successful.
6830  *		DDI_FAILURE if any part of the attach fails.
6831  *
6832  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
6833  *		Kernel thread context only.  Can sleep.
6834  */
6835 
6836 static int
6837 sd_unit_attach(dev_info_t *devi)
6838 {
6839 	struct	scsi_device	*devp;
6840 	struct	sd_lun		*un;
6841 	char			*variantp;
6842 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
6843 	int	instance;
6844 	int	rval;
6845 	int	wc_enabled;
6846 	int	tgt;
6847 	uint64_t	capacity;
6848 	uint_t		lbasize = 0;
6849 	dev_info_t	*pdip = ddi_get_parent(devi);
6850 	int		offbyone = 0;
6851 	int		geom_label_valid = 0;
6852 	sd_ssc_t	*ssc;
6853 	int		status;
6854 	struct sd_fm_internal	*sfip = NULL;
6855 	int		max_xfer_size;
6856 
6857 	/*
6858 	 * Retrieve the target driver's private data area. This was set
6859 	 * up by the HBA.
6860 	 */
6861 	devp = ddi_get_driver_private(devi);
6862 
6863 	/*
6864 	 * Retrieve the target ID of the device.
6865 	 */
6866 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6867 	    SCSI_ADDR_PROP_TARGET, -1);
6868 
6869 	/*
6870 	 * Since we have no idea what state things were left in by the last
6871 	 * user of the device, set up some 'default' settings, ie. turn 'em
6872 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
6873 	 * Do this before the scsi_probe, which sends an inquiry.
6874 	 * This is a fix for bug (4430280).
6875 	 * Of special importance is wide-xfer. The drive could have been left
6876 	 * in wide transfer mode by the last driver to communicate with it,
6877 	 * this includes us. If that's the case, and if the following is not
6878 	 * setup properly or we don't re-negotiate with the drive prior to
6879 	 * transferring data to/from the drive, it causes bus parity errors,
6880 	 * data overruns, and unexpected interrupts. This first occurred when
6881 	 * the fix for bug (4378686) was made.
6882 	 */
6883 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
6884 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
6885 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
6886 
6887 	/*
6888 	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
6889 	 * on a target. Setting it per lun instance actually sets the
6890 	 * capability of this target, which affects those luns already
6891 	 * attached on the same target. So during attach, we can only disable
6892 	 * this capability only when no other lun has been attached on this
6893 	 * target. By doing this, we assume a target has the same tagged-qing
6894 	 * capability for every lun. The condition can be removed when HBA
6895 	 * is changed to support per lun based tagged-qing capability.
6896 	 */
6897 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
6898 		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
6899 	}
6900 
6901 	/*
6902 	 * Use scsi_probe() to issue an INQUIRY command to the device.
6903 	 * This call will allocate and fill in the scsi_inquiry structure
6904 	 * and point the sd_inq member of the scsi_device structure to it.
6905 	 * If the attach succeeds, then this memory will not be de-allocated
6906 	 * (via scsi_unprobe()) until the instance is detached.
6907 	 */
6908 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
6909 		goto probe_failed;
6910 	}
6911 
6912 	/*
6913 	 * Check the device type as specified in the inquiry data and
6914 	 * claim it if it is of a type that we support.
6915 	 */
6916 	switch (devp->sd_inq->inq_dtype) {
6917 	case DTYPE_DIRECT:
6918 		break;
6919 	case DTYPE_RODIRECT:
6920 		break;
6921 	case DTYPE_OPTICAL:
6922 		break;
6923 	case DTYPE_NOTPRESENT:
6924 	default:
6925 		/* Unsupported device type; fail the attach. */
6926 		goto probe_failed;
6927 	}
6928 
6929 	/*
6930 	 * Allocate the soft state structure for this unit.
6931 	 *
6932 	 * We rely upon this memory being set to all zeroes by
6933 	 * ddi_soft_state_zalloc().  We assume that any member of the
6934 	 * soft state structure that is not explicitly initialized by
6935 	 * this routine will have a value of zero.
6936 	 */
6937 	instance = ddi_get_instance(devp->sd_dev);
6938 #ifndef XPV_HVM_DRIVER
6939 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
6940 		goto probe_failed;
6941 	}
6942 #endif /* !XPV_HVM_DRIVER */
6943 
6944 	/*
6945 	 * Retrieve a pointer to the newly-allocated soft state.
6946 	 *
6947 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
6948 	 * was successful, unless something has gone horribly wrong and the
6949 	 * ddi's soft state internals are corrupt (in which case it is
6950 	 * probably better to halt here than just fail the attach....)
6951 	 */
6952 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
6953 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
6954 		    instance);
6955 		/*NOTREACHED*/
6956 	}
6957 
6958 	/*
6959 	 * Link the back ptr of the driver soft state to the scsi_device
6960 	 * struct for this lun.
6961 	 * Save a pointer to the softstate in the driver-private area of
6962 	 * the scsi_device struct.
6963 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
6964 	 * we first set un->un_sd below.
6965 	 */
6966 	un->un_sd = devp;
6967 	devp->sd_private = (opaque_t)un;
6968 
6969 	/*
6970 	 * The following must be after devp is stored in the soft state struct.
6971 	 */
6972 #ifdef SDDEBUG
6973 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6974 	    "%s_unit_attach: un:0x%p instance:%d\n",
6975 	    ddi_driver_name(devi), un, instance);
6976 #endif
6977 
6978 	/*
6979 	 * Set up the device type and node type (for the minor nodes).
6980 	 * By default we assume that the device can at least support the
6981 	 * Common Command Set. Call it a CD-ROM if it reports itself
6982 	 * as a RODIRECT device.
6983 	 */
6984 	switch (devp->sd_inq->inq_dtype) {
6985 	case DTYPE_RODIRECT:
6986 		un->un_node_type = DDI_NT_CD_CHAN;
6987 		un->un_ctype	 = CTYPE_CDROM;
6988 		break;
6989 	case DTYPE_OPTICAL:
6990 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6991 		un->un_ctype	 = CTYPE_ROD;
6992 		break;
6993 	default:
6994 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6995 		un->un_ctype	 = CTYPE_CCS;
6996 		break;
6997 	}
6998 
6999 	/*
7000 	 * Try to read the interconnect type from the HBA.
7001 	 *
7002 	 * Note: This driver is currently compiled as two binaries, a parallel
7003 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7004 	 * differences are determined at compile time. In the future a single
7005 	 * binary will be provided and the interconnect type will be used to
7006 	 * differentiate between fibre and parallel scsi behaviors. At that time
7007 	 * it will be necessary for all fibre channel HBAs to support this
7008 	 * property.
7009 	 *
7010 	 * set un_f_is_fiber to TRUE ( default fiber )
7011 	 */
7012 	un->un_f_is_fibre = TRUE;
7013 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7014 	case INTERCONNECT_SSA:
7015 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7016 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7017 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7018 		break;
7019 	case INTERCONNECT_PARALLEL:
7020 		un->un_f_is_fibre = FALSE;
7021 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7022 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7023 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7024 		break;
7025 	case INTERCONNECT_SAS:
7026 		un->un_f_is_fibre = FALSE;
7027 		un->un_interconnect_type = SD_INTERCONNECT_SAS;
7028 		un->un_node_type = DDI_NT_BLOCK_SAS;
7029 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7030 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SAS\n", un);
7031 		break;
7032 	case INTERCONNECT_SATA:
7033 		un->un_f_is_fibre = FALSE;
7034 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
7035 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7036 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
7037 		break;
7038 	case INTERCONNECT_FIBRE:
7039 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7040 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7041 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7042 		break;
7043 	case INTERCONNECT_FABRIC:
7044 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7045 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7046 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7047 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7048 		break;
7049 	default:
7050 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7051 		/*
7052 		 * The HBA does not support the "interconnect-type" property
7053 		 * (or did not provide a recognized type).
7054 		 *
7055 		 * Note: This will be obsoleted when a single fibre channel
7056 		 * and parallel scsi driver is delivered. In the meantime the
7057 		 * interconnect type will be set to the platform default.If that
7058 		 * type is not parallel SCSI, it means that we should be
7059 		 * assuming "ssd" semantics. However, here this also means that
7060 		 * the FC HBA is not supporting the "interconnect-type" property
7061 		 * like we expect it to, so log this occurrence.
7062 		 */
7063 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7064 		if (!SD_IS_PARALLEL_SCSI(un)) {
7065 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7066 			    "sd_unit_attach: un:0x%p Assuming "
7067 			    "INTERCONNECT_FIBRE\n", un);
7068 		} else {
7069 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7070 			    "sd_unit_attach: un:0x%p Assuming "
7071 			    "INTERCONNECT_PARALLEL\n", un);
7072 			un->un_f_is_fibre = FALSE;
7073 		}
7074 #else
7075 		/*
7076 		 * Note: This source will be implemented when a single fibre
7077 		 * channel and parallel scsi driver is delivered. The default
7078 		 * will be to assume that if a device does not support the
7079 		 * "interconnect-type" property it is a parallel SCSI HBA and
7080 		 * we will set the interconnect type for parallel scsi.
7081 		 */
7082 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7083 		un->un_f_is_fibre = FALSE;
7084 #endif
7085 		break;
7086 	}
7087 
7088 	if (un->un_f_is_fibre == TRUE) {
7089 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7090 		    SCSI_VERSION_3) {
7091 			switch (un->un_interconnect_type) {
7092 			case SD_INTERCONNECT_FIBRE:
7093 			case SD_INTERCONNECT_SSA:
7094 				un->un_node_type = DDI_NT_BLOCK_WWN;
7095 				break;
7096 			default:
7097 				break;
7098 			}
7099 		}
7100 	}
7101 
7102 	/*
7103 	 * Initialize the Request Sense command for the target
7104 	 */
7105 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7106 		goto alloc_rqs_failed;
7107 	}
7108 
7109 	/*
7110 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7111 	 * with separate binary for sd and ssd.
7112 	 *
7113 	 * x86 has 1 binary, un_retry_count is set base on connection type.
7114 	 * The hardcoded values will go away when Sparc uses 1 binary
7115 	 * for sd and ssd.  This hardcoded values need to match
7116 	 * SD_RETRY_COUNT in sddef.h
7117 	 * The value used is base on interconnect type.
7118 	 * fibre = 3, parallel = 5
7119 	 */
7120 #if defined(__i386) || defined(__amd64)
7121 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
7122 #else
7123 	un->un_retry_count = SD_RETRY_COUNT;
7124 #endif
7125 
7126 	/*
7127 	 * Set the per disk retry count to the default number of retries
7128 	 * for disks and CDROMs. This value can be overridden by the
7129 	 * disk property list or an entry in sd.conf.
7130 	 */
7131 	un->un_notready_retry_count =
7132 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
7133 	    : DISK_NOT_READY_RETRY_COUNT(un);
7134 
7135 	/*
7136 	 * Set the busy retry count to the default value of un_retry_count.
7137 	 * This can be overridden by entries in sd.conf or the device
7138 	 * config table.
7139 	 */
7140 	un->un_busy_retry_count = un->un_retry_count;
7141 
7142 	/*
7143 	 * Init the reset threshold for retries.  This number determines
7144 	 * how many retries must be performed before a reset can be issued
7145 	 * (for certain error conditions). This can be overridden by entries
7146 	 * in sd.conf or the device config table.
7147 	 */
7148 	un->un_reset_retry_count = (un->un_retry_count / 2);
7149 
7150 	/*
7151 	 * Set the victim_retry_count to the default un_retry_count
7152 	 */
7153 	un->un_victim_retry_count = (2 * un->un_retry_count);
7154 
7155 	/*
7156 	 * Set the reservation release timeout to the default value of
7157 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
7158 	 * device config table.
7159 	 */
7160 	un->un_reserve_release_time = 5;
7161 
7162 	/*
7163 	 * Set up the default maximum transfer size. Note that this may
7164 	 * get updated later in the attach, when setting up default wide
7165 	 * operations for disks.
7166 	 */
7167 #if defined(__i386) || defined(__amd64)
7168 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
7169 	un->un_partial_dma_supported = 1;
7170 #else
7171 	un->un_max_xfer_size = (uint_t)maxphys;
7172 #endif
7173 
7174 	/*
7175 	 * Get "allow bus device reset" property (defaults to "enabled" if
7176 	 * the property was not defined). This is to disable bus resets for
7177 	 * certain kinds of error recovery. Note: In the future when a run-time
7178 	 * fibre check is available the soft state flag should default to
7179 	 * enabled.
7180 	 */
7181 	if (un->un_f_is_fibre == TRUE) {
7182 		un->un_f_allow_bus_device_reset = TRUE;
7183 	} else {
7184 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7185 		    "allow-bus-device-reset", 1) != 0) {
7186 			un->un_f_allow_bus_device_reset = TRUE;
7187 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7188 			    "sd_unit_attach: un:0x%p Bus device reset "
7189 			    "enabled\n", un);
7190 		} else {
7191 			un->un_f_allow_bus_device_reset = FALSE;
7192 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7193 			    "sd_unit_attach: un:0x%p Bus device reset "
7194 			    "disabled\n", un);
7195 		}
7196 	}
7197 
7198 	/*
7199 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
7200 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
7201 	 *
7202 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
7203 	 * property. The new "variant" property with a value of "atapi" has been
7204 	 * introduced so that future 'variants' of standard SCSI behavior (like
7205 	 * atapi) could be specified by the underlying HBA drivers by supplying
7206 	 * a new value for the "variant" property, instead of having to define a
7207 	 * new property.
7208 	 */
7209 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
7210 		un->un_f_cfg_is_atapi = TRUE;
7211 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7212 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
7213 	}
7214 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
7215 	    &variantp) == DDI_PROP_SUCCESS) {
7216 		if (strcmp(variantp, "atapi") == 0) {
7217 			un->un_f_cfg_is_atapi = TRUE;
7218 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7219 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
7220 		}
7221 		ddi_prop_free(variantp);
7222 	}
7223 
7224 	un->un_cmd_timeout	= SD_IO_TIME;
7225 
7226 	un->un_busy_timeout  = SD_BSY_TIMEOUT;
7227 
7228 	/* Info on current states, statuses, etc. (Updated frequently) */
7229 	un->un_state		= SD_STATE_NORMAL;
7230 	un->un_last_state	= SD_STATE_NORMAL;
7231 
7232 	/* Control & status info for command throttling */
7233 	un->un_throttle		= sd_max_throttle;
7234 	un->un_saved_throttle	= sd_max_throttle;
7235 	un->un_min_throttle	= sd_min_throttle;
7236 
7237 	if (un->un_f_is_fibre == TRUE) {
7238 		un->un_f_use_adaptive_throttle = TRUE;
7239 	} else {
7240 		un->un_f_use_adaptive_throttle = FALSE;
7241 	}
7242 
7243 	/* Removable media support. */
7244 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
7245 	un->un_mediastate		= DKIO_NONE;
7246 	un->un_specified_mediastate	= DKIO_NONE;
7247 
7248 	/* CVs for suspend/resume (PM or DR) */
7249 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
7250 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
7251 
7252 	/* Power management support. */
7253 	un->un_power_level = SD_SPINDLE_UNINIT;
7254 
7255 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
7256 	un->un_f_wcc_inprog = 0;
7257 
7258 	/*
7259 	 * The open/close semaphore is used to serialize threads executing
7260 	 * in the driver's open & close entry point routines for a given
7261 	 * instance.
7262 	 */
7263 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
7264 
7265 	/*
7266 	 * The conf file entry and softstate variable is a forceful override,
7267 	 * meaning a non-zero value must be entered to change the default.
7268 	 */
7269 	un->un_f_disksort_disabled = FALSE;
7270 
7271 	/*
7272 	 * Retrieve the properties from the static driver table or the driver
7273 	 * configuration file (.conf) for this unit and update the soft state
7274 	 * for the device as needed for the indicated properties.
7275 	 * Note: the property configuration needs to occur here as some of the
7276 	 * following routines may have dependencies on soft state flags set
7277 	 * as part of the driver property configuration.
7278 	 */
7279 	sd_read_unit_properties(un);
7280 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7281 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
7282 
7283 	/*
7284 	 * Only if a device has "hotpluggable" property, it is
7285 	 * treated as hotpluggable device. Otherwise, it is
7286 	 * regarded as non-hotpluggable one.
7287 	 */
7288 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
7289 	    -1) != -1) {
7290 		un->un_f_is_hotpluggable = TRUE;
7291 	}
7292 
7293 	/*
7294 	 * set unit's attributes(flags) according to "hotpluggable" and
7295 	 * RMB bit in INQUIRY data.
7296 	 */
7297 	sd_set_unit_attributes(un, devi);
7298 
7299 	/*
7300 	 * By default, we mark the capacity, lbasize, and geometry
7301 	 * as invalid. Only if we successfully read a valid capacity
7302 	 * will we update the un_blockcount and un_tgt_blocksize with the
7303 	 * valid values (the geometry will be validated later).
7304 	 */
7305 	un->un_f_blockcount_is_valid	= FALSE;
7306 	un->un_f_tgt_blocksize_is_valid	= FALSE;
7307 
7308 	/*
7309 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
7310 	 * otherwise.
7311 	 */
7312 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
7313 	un->un_blockcount = 0;
7314 
7315 	/*
7316 	 * Set up the per-instance info needed to determine the correct
7317 	 * CDBs and other info for issuing commands to the target.
7318 	 */
7319 	sd_init_cdb_limits(un);
7320 
7321 	/*
7322 	 * Set up the IO chains to use, based upon the target type.
7323 	 */
7324 	if (un->un_f_non_devbsize_supported) {
7325 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7326 	} else {
7327 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7328 	}
7329 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7330 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
7331 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
7332 
7333 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
7334 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
7335 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
7336 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
7337 
7338 
7339 	if (ISCD(un)) {
7340 		un->un_additional_codes = sd_additional_codes;
7341 	} else {
7342 		un->un_additional_codes = NULL;
7343 	}
7344 
7345 	/*
7346 	 * Create the kstats here so they can be available for attach-time
7347 	 * routines that send commands to the unit (either polled or via
7348 	 * sd_send_scsi_cmd).
7349 	 *
7350 	 * Note: This is a critical sequence that needs to be maintained:
7351 	 *	1) Instantiate the kstats here, before any routines using the
7352 	 *	   iopath (i.e. sd_send_scsi_cmd).
7353 	 *	2) Instantiate and initialize the partition stats
7354 	 *	   (sd_set_pstats).
7355 	 *	3) Initialize the error stats (sd_set_errstats), following
7356 	 *	   sd_validate_geometry(),sd_register_devid(),
7357 	 *	   and sd_cache_control().
7358 	 */
7359 
7360 	un->un_stats = kstat_create(sd_label, instance,
7361 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
7362 	if (un->un_stats != NULL) {
7363 		un->un_stats->ks_lock = SD_MUTEX(un);
7364 		kstat_install(un->un_stats);
7365 	}
7366 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7367 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
7368 
7369 	sd_create_errstats(un, instance);
7370 	if (un->un_errstats == NULL) {
7371 		goto create_errstats_failed;
7372 	}
7373 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7374 	    "sd_unit_attach: un:0x%p errstats created\n", un);
7375 
7376 	/*
7377 	 * The following if/else code was relocated here from below as part
7378 	 * of the fix for bug (4430280). However with the default setup added
7379 	 * on entry to this routine, it's no longer absolutely necessary for
7380 	 * this to be before the call to sd_spin_up_unit.
7381 	 */
7382 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
7383 		int tq_trigger_flag = (((devp->sd_inq->inq_ansi == 4) ||
7384 		    (devp->sd_inq->inq_ansi == 5)) &&
7385 		    devp->sd_inq->inq_bque) || devp->sd_inq->inq_cmdque;
7386 
7387 		/*
7388 		 * If tagged queueing is supported by the target
7389 		 * and by the host adapter then we will enable it
7390 		 */
7391 		un->un_tagflags = 0;
7392 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) && tq_trigger_flag &&
7393 		    (un->un_f_arq_enabled == TRUE)) {
7394 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
7395 			    1, 1) == 1) {
7396 				un->un_tagflags = FLAG_STAG;
7397 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7398 				    "sd_unit_attach: un:0x%p tag queueing "
7399 				    "enabled\n", un);
7400 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
7401 			    "untagged-qing", 0) == 1) {
7402 				un->un_f_opt_queueing = TRUE;
7403 				un->un_saved_throttle = un->un_throttle =
7404 				    min(un->un_throttle, 3);
7405 			} else {
7406 				un->un_f_opt_queueing = FALSE;
7407 				un->un_saved_throttle = un->un_throttle = 1;
7408 			}
7409 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
7410 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
7411 			/* The Host Adapter supports internal queueing. */
7412 			un->un_f_opt_queueing = TRUE;
7413 			un->un_saved_throttle = un->un_throttle =
7414 			    min(un->un_throttle, 3);
7415 		} else {
7416 			un->un_f_opt_queueing = FALSE;
7417 			un->un_saved_throttle = un->un_throttle = 1;
7418 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7419 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
7420 		}
7421 
7422 		/*
7423 		 * Enable large transfers for SATA/SAS drives
7424 		 */
7425 		if (SD_IS_SERIAL(un)) {
7426 			un->un_max_xfer_size =
7427 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7428 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7429 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7430 			    "sd_unit_attach: un:0x%p max transfer "
7431 			    "size=0x%x\n", un, un->un_max_xfer_size);
7432 
7433 		}
7434 
7435 		/* Setup or tear down default wide operations for disks */
7436 
7437 		/*
7438 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
7439 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
7440 		 * system and be set to different values. In the future this
7441 		 * code may need to be updated when the ssd module is
7442 		 * obsoleted and removed from the system. (4299588)
7443 		 */
7444 		if (SD_IS_PARALLEL_SCSI(un) &&
7445 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
7446 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
7447 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7448 			    1, 1) == 1) {
7449 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7450 				    "sd_unit_attach: un:0x%p Wide Transfer "
7451 				    "enabled\n", un);
7452 			}
7453 
7454 			/*
7455 			 * If tagged queuing has also been enabled, then
7456 			 * enable large xfers
7457 			 */
7458 			if (un->un_saved_throttle == sd_max_throttle) {
7459 				un->un_max_xfer_size =
7460 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7461 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7462 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7463 				    "sd_unit_attach: un:0x%p max transfer "
7464 				    "size=0x%x\n", un, un->un_max_xfer_size);
7465 			}
7466 		} else {
7467 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7468 			    0, 1) == 1) {
7469 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7470 				    "sd_unit_attach: un:0x%p "
7471 				    "Wide Transfer disabled\n", un);
7472 			}
7473 		}
7474 	} else {
7475 		un->un_tagflags = FLAG_STAG;
7476 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
7477 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
7478 	}
7479 
7480 	/*
7481 	 * If this target supports LUN reset, try to enable it.
7482 	 */
7483 	if (un->un_f_lun_reset_enabled) {
7484 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
7485 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7486 			    "un:0x%p lun_reset capability set\n", un);
7487 		} else {
7488 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7489 			    "un:0x%p lun-reset capability not set\n", un);
7490 		}
7491 	}
7492 
7493 	/*
7494 	 * Adjust the maximum transfer size. This is to fix
7495 	 * the problem of partial DMA support on SPARC. Some
7496 	 * HBA driver, like aac, has very small dma_attr_maxxfer
7497 	 * size, which requires partial DMA support on SPARC.
7498 	 * In the future the SPARC pci nexus driver may solve
7499 	 * the problem instead of this fix.
7500 	 */
7501 	max_xfer_size = scsi_ifgetcap(SD_ADDRESS(un), "dma-max", 1);
7502 	if ((max_xfer_size > 0) && (max_xfer_size < un->un_max_xfer_size)) {
7503 		/* We need DMA partial even on sparc to ensure sddump() works */
7504 		un->un_max_xfer_size = max_xfer_size;
7505 		if (un->un_partial_dma_supported == 0)
7506 			un->un_partial_dma_supported = 1;
7507 	}
7508 	if (ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7509 	    DDI_PROP_DONTPASS, "buf_break", 0) == 1) {
7510 		if (ddi_xbuf_attr_setup_brk(un->un_xbuf_attr,
7511 		    un->un_max_xfer_size) == 1) {
7512 			un->un_buf_breakup_supported = 1;
7513 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7514 			    "un:0x%p Buf breakup enabled\n", un);
7515 		}
7516 	}
7517 
7518 	/*
7519 	 * Set PKT_DMA_PARTIAL flag.
7520 	 */
7521 	if (un->un_partial_dma_supported == 1) {
7522 		un->un_pkt_flags = PKT_DMA_PARTIAL;
7523 	} else {
7524 		un->un_pkt_flags = 0;
7525 	}
7526 
7527 	/* Initialize sd_ssc_t for internal uscsi commands */
7528 	ssc = sd_ssc_init(un);
7529 	scsi_fm_init(devp);
7530 
7531 	/*
7532 	 * Allocate memory for SCSI FMA stuffs.
7533 	 */
7534 	un->un_fm_private =
7535 	    kmem_zalloc(sizeof (struct sd_fm_internal), KM_SLEEP);
7536 	sfip = (struct sd_fm_internal *)un->un_fm_private;
7537 	sfip->fm_ssc.ssc_uscsi_cmd = &sfip->fm_ucmd;
7538 	sfip->fm_ssc.ssc_uscsi_info = &sfip->fm_uinfo;
7539 	sfip->fm_ssc.ssc_un = un;
7540 
7541 	if (ISCD(un) ||
7542 	    un->un_f_has_removable_media ||
7543 	    devp->sd_fm_capable == DDI_FM_NOT_CAPABLE) {
7544 		/*
7545 		 * We don't touch CDROM or the DDI_FM_NOT_CAPABLE device.
7546 		 * Their log are unchanged.
7547 		 */
7548 		sfip->fm_log_level = SD_FM_LOG_NSUP;
7549 	} else {
7550 		/*
7551 		 * If enter here, it should be non-CDROM and FM-capable
7552 		 * device, and it will not keep the old scsi_log as before
7553 		 * in /var/adm/messages. However, the property
7554 		 * "fm-scsi-log" will control whether the FM telemetry will
7555 		 * be logged in /var/adm/messages.
7556 		 */
7557 		int fm_scsi_log;
7558 		fm_scsi_log = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7559 		    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "fm-scsi-log", 0);
7560 
7561 		if (fm_scsi_log)
7562 			sfip->fm_log_level = SD_FM_LOG_EREPORT;
7563 		else
7564 			sfip->fm_log_level = SD_FM_LOG_SILENT;
7565 	}
7566 
7567 	/*
7568 	 * At this point in the attach, we have enough info in the
7569 	 * soft state to be able to issue commands to the target.
7570 	 *
7571 	 * All command paths used below MUST issue their commands as
7572 	 * SD_PATH_DIRECT. This is important as intermediate layers
7573 	 * are not all initialized yet (such as PM).
7574 	 */
7575 
7576 	/*
7577 	 * Send a TEST UNIT READY command to the device. This should clear
7578 	 * any outstanding UNIT ATTENTION that may be present.
7579 	 *
7580 	 * Note: Don't check for success, just track if there is a reservation,
7581 	 * this is a throw away command to clear any unit attentions.
7582 	 *
7583 	 * Note: This MUST be the first command issued to the target during
7584 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
7585 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
7586 	 * with attempts at spinning up a device with no media.
7587 	 */
7588 	status = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
7589 	if (status != 0) {
7590 		if (status == EACCES)
7591 			reservation_flag = SD_TARGET_IS_RESERVED;
7592 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7593 	}
7594 
7595 	/*
7596 	 * If the device is NOT a removable media device, attempt to spin
7597 	 * it up (using the START_STOP_UNIT command) and read its capacity
7598 	 * (using the READ CAPACITY command).  Note, however, that either
7599 	 * of these could fail and in some cases we would continue with
7600 	 * the attach despite the failure (see below).
7601 	 */
7602 	if (un->un_f_descr_format_supported) {
7603 
7604 		switch (sd_spin_up_unit(ssc)) {
7605 		case 0:
7606 			/*
7607 			 * Spin-up was successful; now try to read the
7608 			 * capacity.  If successful then save the results
7609 			 * and mark the capacity & lbasize as valid.
7610 			 */
7611 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7612 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
7613 
7614 			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
7615 			    &lbasize, SD_PATH_DIRECT);
7616 
7617 			switch (status) {
7618 			case 0: {
7619 				if (capacity > DK_MAX_BLOCKS) {
7620 #ifdef _LP64
7621 					if ((capacity + 1) >
7622 					    SD_GROUP1_MAX_ADDRESS) {
7623 						/*
7624 						 * Enable descriptor format
7625 						 * sense data so that we can
7626 						 * get 64 bit sense data
7627 						 * fields.
7628 						 */
7629 						sd_enable_descr_sense(ssc);
7630 					}
7631 #else
7632 					/* 32-bit kernels can't handle this */
7633 					scsi_log(SD_DEVINFO(un),
7634 					    sd_label, CE_WARN,
7635 					    "disk has %llu blocks, which "
7636 					    "is too large for a 32-bit "
7637 					    "kernel", capacity);
7638 
7639 #if defined(__i386) || defined(__amd64)
7640 					/*
7641 					 * 1TB disk was treated as (1T - 512)B
7642 					 * in the past, so that it might have
7643 					 * valid VTOC and solaris partitions,
7644 					 * we have to allow it to continue to
7645 					 * work.
7646 					 */
7647 					if (capacity -1 > DK_MAX_BLOCKS)
7648 #endif
7649 					goto spinup_failed;
7650 #endif
7651 				}
7652 
7653 				/*
7654 				 * Here it's not necessary to check the case:
7655 				 * the capacity of the device is bigger than
7656 				 * what the max hba cdb can support. Because
7657 				 * sd_send_scsi_READ_CAPACITY will retrieve
7658 				 * the capacity by sending USCSI command, which
7659 				 * is constrained by the max hba cdb. Actually,
7660 				 * sd_send_scsi_READ_CAPACITY will return
7661 				 * EINVAL when using bigger cdb than required
7662 				 * cdb length. Will handle this case in
7663 				 * "case EINVAL".
7664 				 */
7665 
7666 				/*
7667 				 * The following relies on
7668 				 * sd_send_scsi_READ_CAPACITY never
7669 				 * returning 0 for capacity and/or lbasize.
7670 				 */
7671 				sd_update_block_info(un, lbasize, capacity);
7672 
7673 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7674 				    "sd_unit_attach: un:0x%p capacity = %ld "
7675 				    "blocks; lbasize= %ld.\n", un,
7676 				    un->un_blockcount, un->un_tgt_blocksize);
7677 
7678 				break;
7679 			}
7680 			case EINVAL:
7681 				/*
7682 				 * In the case where the max-cdb-length property
7683 				 * is smaller than the required CDB length for
7684 				 * a SCSI device, a target driver can fail to
7685 				 * attach to that device.
7686 				 */
7687 				scsi_log(SD_DEVINFO(un),
7688 				    sd_label, CE_WARN,
7689 				    "disk capacity is too large "
7690 				    "for current cdb length");
7691 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7692 
7693 				goto spinup_failed;
7694 			case EACCES:
7695 				/*
7696 				 * Should never get here if the spin-up
7697 				 * succeeded, but code it in anyway.
7698 				 * From here, just continue with the attach...
7699 				 */
7700 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7701 				    "sd_unit_attach: un:0x%p "
7702 				    "sd_send_scsi_READ_CAPACITY "
7703 				    "returned reservation conflict\n", un);
7704 				reservation_flag = SD_TARGET_IS_RESERVED;
7705 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7706 				break;
7707 			default:
7708 				/*
7709 				 * Likewise, should never get here if the
7710 				 * spin-up succeeded. Just continue with
7711 				 * the attach...
7712 				 */
7713 				if (status == EIO)
7714 					sd_ssc_assessment(ssc,
7715 					    SD_FMT_STATUS_CHECK);
7716 				else
7717 					sd_ssc_assessment(ssc,
7718 					    SD_FMT_IGNORE);
7719 				break;
7720 			}
7721 			break;
7722 		case EACCES:
7723 			/*
7724 			 * Device is reserved by another host.  In this case
7725 			 * we could not spin it up or read the capacity, but
7726 			 * we continue with the attach anyway.
7727 			 */
7728 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7729 			    "sd_unit_attach: un:0x%p spin-up reservation "
7730 			    "conflict.\n", un);
7731 			reservation_flag = SD_TARGET_IS_RESERVED;
7732 			break;
7733 		default:
7734 			/* Fail the attach if the spin-up failed. */
7735 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7736 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
7737 			goto spinup_failed;
7738 		}
7739 
7740 	}
7741 
7742 	/*
7743 	 * Check to see if this is a MMC drive
7744 	 */
7745 	if (ISCD(un)) {
7746 		sd_set_mmc_caps(ssc);
7747 	}
7748 
7749 
7750 	/*
7751 	 * Add a zero-length attribute to tell the world we support
7752 	 * kernel ioctls (for layered drivers)
7753 	 */
7754 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7755 	    DDI_KERNEL_IOCTL, NULL, 0);
7756 
7757 	/*
7758 	 * Add a boolean property to tell the world we support
7759 	 * the B_FAILFAST flag (for layered drivers)
7760 	 */
7761 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7762 	    "ddi-failfast-supported", NULL, 0);
7763 
7764 	/*
7765 	 * Initialize power management
7766 	 */
7767 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
7768 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
7769 	sd_setup_pm(ssc, devi);
7770 	if (un->un_f_pm_is_enabled == FALSE) {
7771 		/*
7772 		 * For performance, point to a jump table that does
7773 		 * not include pm.
7774 		 * The direct and priority chains don't change with PM.
7775 		 *
7776 		 * Note: this is currently done based on individual device
7777 		 * capabilities. When an interface for determining system
7778 		 * power enabled state becomes available, or when additional
7779 		 * layers are added to the command chain, these values will
7780 		 * have to be re-evaluated for correctness.
7781 		 */
7782 		if (un->un_f_non_devbsize_supported) {
7783 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
7784 		} else {
7785 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
7786 		}
7787 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
7788 	}
7789 
7790 	/*
7791 	 * This property is set to 0 by HA software to avoid retries
7792 	 * on a reserved disk. (The preferred property name is
7793 	 * "retry-on-reservation-conflict") (1189689)
7794 	 *
7795 	 * Note: The use of a global here can have unintended consequences. A
7796 	 * per instance variable is preferable to match the capabilities of
7797 	 * different underlying hba's (4402600)
7798 	 */
7799 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
7800 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
7801 	    sd_retry_on_reservation_conflict);
7802 	if (sd_retry_on_reservation_conflict != 0) {
7803 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
7804 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
7805 		    sd_retry_on_reservation_conflict);
7806 	}
7807 
7808 	/* Set up options for QFULL handling. */
7809 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7810 	    "qfull-retries", -1)) != -1) {
7811 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
7812 		    rval, 1);
7813 	}
7814 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7815 	    "qfull-retry-interval", -1)) != -1) {
7816 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
7817 		    rval, 1);
7818 	}
7819 
7820 	/*
7821 	 * This just prints a message that announces the existence of the
7822 	 * device. The message is always printed in the system logfile, but
7823 	 * only appears on the console if the system is booted with the
7824 	 * -v (verbose) argument.
7825 	 */
7826 	ddi_report_dev(devi);
7827 
7828 	un->un_mediastate = DKIO_NONE;
7829 
7830 	cmlb_alloc_handle(&un->un_cmlbhandle);
7831 
7832 #if defined(__i386) || defined(__amd64)
7833 	/*
7834 	 * On x86, compensate for off-by-1 legacy error
7835 	 */
7836 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
7837 	    (lbasize == un->un_sys_blocksize))
7838 		offbyone = CMLB_OFF_BY_ONE;
7839 #endif
7840 
7841 	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
7842 	    VOID2BOOLEAN(un->un_f_has_removable_media != 0),
7843 	    VOID2BOOLEAN(un->un_f_is_hotpluggable != 0),
7844 	    un->un_node_type, offbyone, un->un_cmlbhandle,
7845 	    (void *)SD_PATH_DIRECT) != 0) {
7846 		goto cmlb_attach_failed;
7847 	}
7848 
7849 
7850 	/*
7851 	 * Read and validate the device's geometry (ie, disk label)
7852 	 * A new unformatted drive will not have a valid geometry, but
7853 	 * the driver needs to successfully attach to this device so
7854 	 * the drive can be formatted via ioctls.
7855 	 */
7856 	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
7857 	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
7858 
7859 	mutex_enter(SD_MUTEX(un));
7860 
7861 	/*
7862 	 * Read and initialize the devid for the unit.
7863 	 */
7864 	if (un->un_f_devid_supported) {
7865 		sd_register_devid(ssc, devi, reservation_flag);
7866 	}
7867 	mutex_exit(SD_MUTEX(un));
7868 
7869 #if (defined(__fibre))
7870 	/*
7871 	 * Register callbacks for fibre only.  You can't do this solely
7872 	 * on the basis of the devid_type because this is hba specific.
7873 	 * We need to query our hba capabilities to find out whether to
7874 	 * register or not.
7875 	 */
7876 	if (un->un_f_is_fibre) {
7877 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7878 			sd_init_event_callbacks(un);
7879 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7880 			    "sd_unit_attach: un:0x%p event callbacks inserted",
7881 			    un);
7882 		}
7883 	}
7884 #endif
7885 
7886 	if (un->un_f_opt_disable_cache == TRUE) {
7887 		/*
7888 		 * Disable both read cache and write cache.  This is
7889 		 * the historic behavior of the keywords in the config file.
7890 		 */
7891 		if (sd_cache_control(ssc, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
7892 		    0) {
7893 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7894 			    "sd_unit_attach: un:0x%p Could not disable "
7895 			    "caching", un);
7896 			goto devid_failed;
7897 		}
7898 	}
7899 
7900 	/*
7901 	 * Check the value of the WCE bit now and
7902 	 * set un_f_write_cache_enabled accordingly.
7903 	 */
7904 	(void) sd_get_write_cache_enabled(ssc, &wc_enabled);
7905 	mutex_enter(SD_MUTEX(un));
7906 	un->un_f_write_cache_enabled = (wc_enabled != 0);
7907 	mutex_exit(SD_MUTEX(un));
7908 
7909 	/*
7910 	 * Check the value of the NV_SUP bit and set
7911 	 * un_f_suppress_cache_flush accordingly.
7912 	 */
7913 	sd_get_nv_sup(ssc);
7914 
7915 	/*
7916 	 * Find out what type of reservation this disk supports.
7917 	 */
7918 	status = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS, 0, NULL);
7919 
7920 	switch (status) {
7921 	case 0:
7922 		/*
7923 		 * SCSI-3 reservations are supported.
7924 		 */
7925 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7926 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7927 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
7928 		break;
7929 	case ENOTSUP:
7930 		/*
7931 		 * The PERSISTENT RESERVE IN command would not be recognized by
7932 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
7933 		 */
7934 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7935 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
7936 		un->un_reservation_type = SD_SCSI2_RESERVATION;
7937 
7938 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7939 		break;
7940 	default:
7941 		/*
7942 		 * default to SCSI-3 reservations
7943 		 */
7944 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7945 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
7946 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7947 
7948 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7949 		break;
7950 	}
7951 
7952 	/*
7953 	 * Set the pstat and error stat values here, so data obtained during the
7954 	 * previous attach-time routines is available.
7955 	 *
7956 	 * Note: This is a critical sequence that needs to be maintained:
7957 	 *	1) Instantiate the kstats before any routines using the iopath
7958 	 *	   (i.e. sd_send_scsi_cmd).
7959 	 *	2) Initialize the error stats (sd_set_errstats) and partition
7960 	 *	   stats (sd_set_pstats)here, following
7961 	 *	   cmlb_validate_geometry(), sd_register_devid(), and
7962 	 *	   sd_cache_control().
7963 	 */
7964 
7965 	if (un->un_f_pkstats_enabled && geom_label_valid) {
7966 		sd_set_pstats(un);
7967 		SD_TRACE(SD_LOG_IO_PARTITION, un,
7968 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
7969 	}
7970 
7971 	sd_set_errstats(un);
7972 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7973 	    "sd_unit_attach: un:0x%p errstats set\n", un);
7974 
7975 
7976 	/*
7977 	 * After successfully attaching an instance, we record the information
7978 	 * of how many luns have been attached on the relative target and
7979 	 * controller for parallel SCSI. This information is used when sd tries
7980 	 * to set the tagged queuing capability in HBA.
7981 	 */
7982 	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7983 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
7984 	}
7985 
7986 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7987 	    "sd_unit_attach: un:0x%p exit success\n", un);
7988 
7989 	/* Uninitialize sd_ssc_t pointer */
7990 	sd_ssc_fini(ssc);
7991 
7992 	return (DDI_SUCCESS);
7993 
7994 	/*
7995 	 * An error occurred during the attach; clean up & return failure.
7996 	 */
7997 
7998 devid_failed:
7999 
8000 setup_pm_failed:
8001 	ddi_remove_minor_node(devi, NULL);
8002 
8003 cmlb_attach_failed:
8004 	/*
8005 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8006 	 */
8007 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8008 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8009 
8010 	/*
8011 	 * Refer to the comments of setting tagged-qing in the beginning of
8012 	 * sd_unit_attach. We can only disable tagged queuing when there is
8013 	 * no lun attached on the target.
8014 	 */
8015 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
8016 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8017 	}
8018 
8019 	if (un->un_f_is_fibre == FALSE) {
8020 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8021 	}
8022 
8023 spinup_failed:
8024 
8025 	/* Uninitialize sd_ssc_t pointer */
8026 	sd_ssc_fini(ssc);
8027 
8028 	mutex_enter(SD_MUTEX(un));
8029 
8030 	/* Deallocate SCSI FMA memory spaces */
8031 	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
8032 
8033 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8034 	if (un->un_direct_priority_timeid != NULL) {
8035 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8036 		un->un_direct_priority_timeid = NULL;
8037 		mutex_exit(SD_MUTEX(un));
8038 		(void) untimeout(temp_id);
8039 		mutex_enter(SD_MUTEX(un));
8040 	}
8041 
8042 	/* Cancel any pending start/stop timeouts */
8043 	if (un->un_startstop_timeid != NULL) {
8044 		timeout_id_t temp_id = un->un_startstop_timeid;
8045 		un->un_startstop_timeid = NULL;
8046 		mutex_exit(SD_MUTEX(un));
8047 		(void) untimeout(temp_id);
8048 		mutex_enter(SD_MUTEX(un));
8049 	}
8050 
8051 	/* Cancel any pending reset-throttle timeouts */
8052 	if (un->un_reset_throttle_timeid != NULL) {
8053 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8054 		un->un_reset_throttle_timeid = NULL;
8055 		mutex_exit(SD_MUTEX(un));
8056 		(void) untimeout(temp_id);
8057 		mutex_enter(SD_MUTEX(un));
8058 	}
8059 
8060 	/* Cancel any pending retry timeouts */
8061 	if (un->un_retry_timeid != NULL) {
8062 		timeout_id_t temp_id = un->un_retry_timeid;
8063 		un->un_retry_timeid = NULL;
8064 		mutex_exit(SD_MUTEX(un));
8065 		(void) untimeout(temp_id);
8066 		mutex_enter(SD_MUTEX(un));
8067 	}
8068 
8069 	/* Cancel any pending delayed cv broadcast timeouts */
8070 	if (un->un_dcvb_timeid != NULL) {
8071 		timeout_id_t temp_id = un->un_dcvb_timeid;
8072 		un->un_dcvb_timeid = NULL;
8073 		mutex_exit(SD_MUTEX(un));
8074 		(void) untimeout(temp_id);
8075 		mutex_enter(SD_MUTEX(un));
8076 	}
8077 
8078 	mutex_exit(SD_MUTEX(un));
8079 
8080 	/* There should not be any in-progress I/O so ASSERT this check */
8081 	ASSERT(un->un_ncmds_in_transport == 0);
8082 	ASSERT(un->un_ncmds_in_driver == 0);
8083 
8084 	/* Do not free the softstate if the callback routine is active */
8085 	sd_sync_with_callback(un);
8086 
8087 	/*
8088 	 * Partition stats apparently are not used with removables. These would
8089 	 * not have been created during attach, so no need to clean them up...
8090 	 */
8091 	if (un->un_errstats != NULL) {
8092 		kstat_delete(un->un_errstats);
8093 		un->un_errstats = NULL;
8094 	}
8095 
8096 create_errstats_failed:
8097 
8098 	if (un->un_stats != NULL) {
8099 		kstat_delete(un->un_stats);
8100 		un->un_stats = NULL;
8101 	}
8102 
8103 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8104 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8105 
8106 	ddi_prop_remove_all(devi);
8107 	sema_destroy(&un->un_semoclose);
8108 	cv_destroy(&un->un_state_cv);
8109 
8110 getrbuf_failed:
8111 
8112 	sd_free_rqs(un);
8113 
8114 alloc_rqs_failed:
8115 
8116 	devp->sd_private = NULL;
8117 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8118 
8119 get_softstate_failed:
8120 	/*
8121 	 * Note: the man pages are unclear as to whether or not doing a
8122 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8123 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8124 	 * ddi_get_soft_state() fails.  The implication seems to be
8125 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8126 	 */
8127 #ifndef XPV_HVM_DRIVER
8128 	ddi_soft_state_free(sd_state, instance);
8129 #endif /* !XPV_HVM_DRIVER */
8130 
8131 probe_failed:
8132 	scsi_unprobe(devp);
8133 
8134 	return (DDI_FAILURE);
8135 }
8136 
8137 
8138 /*
8139  *    Function: sd_unit_detach
8140  *
8141  * Description: Performs DDI_DETACH processing for sddetach().
8142  *
8143  * Return Code: DDI_SUCCESS
8144  *		DDI_FAILURE
8145  *
8146  *     Context: Kernel thread context
8147  */
8148 
8149 static int
8150 sd_unit_detach(dev_info_t *devi)
8151 {
8152 	struct scsi_device	*devp;
8153 	struct sd_lun		*un;
8154 	int			i;
8155 	int			tgt;
8156 	dev_t			dev;
8157 	dev_info_t		*pdip = ddi_get_parent(devi);
8158 #ifndef XPV_HVM_DRIVER
8159 	int			instance = ddi_get_instance(devi);
8160 #endif /* !XPV_HVM_DRIVER */
8161 
8162 	mutex_enter(&sd_detach_mutex);
8163 
8164 	/*
8165 	 * Fail the detach for any of the following:
8166 	 *  - Unable to get the sd_lun struct for the instance
8167 	 *  - A layered driver has an outstanding open on the instance
8168 	 *  - Another thread is already detaching this instance
8169 	 *  - Another thread is currently performing an open
8170 	 */
8171 	devp = ddi_get_driver_private(devi);
8172 	if ((devp == NULL) ||
8173 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8174 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8175 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8176 		mutex_exit(&sd_detach_mutex);
8177 		return (DDI_FAILURE);
8178 	}
8179 
8180 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8181 
8182 	/*
8183 	 * Mark this instance as currently in a detach, to inhibit any
8184 	 * opens from a layered driver.
8185 	 */
8186 	un->un_detach_count++;
8187 	mutex_exit(&sd_detach_mutex);
8188 
8189 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8190 	    SCSI_ADDR_PROP_TARGET, -1);
8191 
8192 	dev = sd_make_device(SD_DEVINFO(un));
8193 
8194 #ifndef lint
8195 	_NOTE(COMPETING_THREADS_NOW);
8196 #endif
8197 
8198 	mutex_enter(SD_MUTEX(un));
8199 
8200 	/*
8201 	 * Fail the detach if there are any outstanding layered
8202 	 * opens on this device.
8203 	 */
8204 	for (i = 0; i < NDKMAP; i++) {
8205 		if (un->un_ocmap.lyropen[i] != 0) {
8206 			goto err_notclosed;
8207 		}
8208 	}
8209 
8210 	/*
8211 	 * Verify there are NO outstanding commands issued to this device.
8212 	 * ie, un_ncmds_in_transport == 0.
8213 	 * It's possible to have outstanding commands through the physio
8214 	 * code path, even though everything's closed.
8215 	 */
8216 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8217 	    (un->un_direct_priority_timeid != NULL) ||
8218 	    (un->un_state == SD_STATE_RWAIT)) {
8219 		mutex_exit(SD_MUTEX(un));
8220 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8221 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
8222 		goto err_stillbusy;
8223 	}
8224 
8225 	/*
8226 	 * If we have the device reserved, release the reservation.
8227 	 */
8228 	if ((un->un_resvd_status & SD_RESERVE) &&
8229 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
8230 		mutex_exit(SD_MUTEX(un));
8231 		/*
8232 		 * Note: sd_reserve_release sends a command to the device
8233 		 * via the sd_ioctlcmd() path, and can sleep.
8234 		 */
8235 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
8236 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8237 			    "sd_dr_detach: Cannot release reservation \n");
8238 		}
8239 	} else {
8240 		mutex_exit(SD_MUTEX(un));
8241 	}
8242 
8243 	/*
8244 	 * Untimeout any reserve recover, throttle reset, restart unit
8245 	 * and delayed broadcast timeout threads. Protect the timeout pointer
8246 	 * from getting nulled by their callback functions.
8247 	 */
8248 	mutex_enter(SD_MUTEX(un));
8249 	if (un->un_resvd_timeid != NULL) {
8250 		timeout_id_t temp_id = un->un_resvd_timeid;
8251 		un->un_resvd_timeid = NULL;
8252 		mutex_exit(SD_MUTEX(un));
8253 		(void) untimeout(temp_id);
8254 		mutex_enter(SD_MUTEX(un));
8255 	}
8256 
8257 	if (un->un_reset_throttle_timeid != NULL) {
8258 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8259 		un->un_reset_throttle_timeid = NULL;
8260 		mutex_exit(SD_MUTEX(un));
8261 		(void) untimeout(temp_id);
8262 		mutex_enter(SD_MUTEX(un));
8263 	}
8264 
8265 	if (un->un_startstop_timeid != NULL) {
8266 		timeout_id_t temp_id = un->un_startstop_timeid;
8267 		un->un_startstop_timeid = NULL;
8268 		mutex_exit(SD_MUTEX(un));
8269 		(void) untimeout(temp_id);
8270 		mutex_enter(SD_MUTEX(un));
8271 	}
8272 
8273 	if (un->un_dcvb_timeid != NULL) {
8274 		timeout_id_t temp_id = un->un_dcvb_timeid;
8275 		un->un_dcvb_timeid = NULL;
8276 		mutex_exit(SD_MUTEX(un));
8277 		(void) untimeout(temp_id);
8278 	} else {
8279 		mutex_exit(SD_MUTEX(un));
8280 	}
8281 
8282 	/* Remove any pending reservation reclaim requests for this device */
8283 	sd_rmv_resv_reclaim_req(dev);
8284 
8285 	mutex_enter(SD_MUTEX(un));
8286 
8287 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
8288 	if (un->un_direct_priority_timeid != NULL) {
8289 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8290 		un->un_direct_priority_timeid = NULL;
8291 		mutex_exit(SD_MUTEX(un));
8292 		(void) untimeout(temp_id);
8293 		mutex_enter(SD_MUTEX(un));
8294 	}
8295 
8296 	/* Cancel any active multi-host disk watch thread requests */
8297 	if (un->un_mhd_token != NULL) {
8298 		mutex_exit(SD_MUTEX(un));
8299 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
8300 		if (scsi_watch_request_terminate(un->un_mhd_token,
8301 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8302 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8303 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
8304 			/*
8305 			 * Note: We are returning here after having removed
8306 			 * some driver timeouts above. This is consistent with
8307 			 * the legacy implementation but perhaps the watch
8308 			 * terminate call should be made with the wait flag set.
8309 			 */
8310 			goto err_stillbusy;
8311 		}
8312 		mutex_enter(SD_MUTEX(un));
8313 		un->un_mhd_token = NULL;
8314 	}
8315 
8316 	if (un->un_swr_token != NULL) {
8317 		mutex_exit(SD_MUTEX(un));
8318 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
8319 		if (scsi_watch_request_terminate(un->un_swr_token,
8320 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8321 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8322 			    "sd_dr_detach: Cannot cancel swr watch request\n");
8323 			/*
8324 			 * Note: We are returning here after having removed
8325 			 * some driver timeouts above. This is consistent with
8326 			 * the legacy implementation but perhaps the watch
8327 			 * terminate call should be made with the wait flag set.
8328 			 */
8329 			goto err_stillbusy;
8330 		}
8331 		mutex_enter(SD_MUTEX(un));
8332 		un->un_swr_token = NULL;
8333 	}
8334 
8335 	mutex_exit(SD_MUTEX(un));
8336 
8337 	/*
8338 	 * Clear any scsi_reset_notifies. We clear the reset notifies
8339 	 * if we have not registered one.
8340 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
8341 	 */
8342 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
8343 	    sd_mhd_reset_notify_cb, (caddr_t)un);
8344 
8345 	/*
8346 	 * protect the timeout pointers from getting nulled by
8347 	 * their callback functions during the cancellation process.
8348 	 * In such a scenario untimeout can be invoked with a null value.
8349 	 */
8350 	_NOTE(NO_COMPETING_THREADS_NOW);
8351 
8352 	mutex_enter(&un->un_pm_mutex);
8353 	if (un->un_pm_idle_timeid != NULL) {
8354 		timeout_id_t temp_id = un->un_pm_idle_timeid;
8355 		un->un_pm_idle_timeid = NULL;
8356 		mutex_exit(&un->un_pm_mutex);
8357 
8358 		/*
8359 		 * Timeout is active; cancel it.
8360 		 * Note that it'll never be active on a device
8361 		 * that does not support PM therefore we don't
8362 		 * have to check before calling pm_idle_component.
8363 		 */
8364 		(void) untimeout(temp_id);
8365 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8366 		mutex_enter(&un->un_pm_mutex);
8367 	}
8368 
8369 	/*
8370 	 * Check whether there is already a timeout scheduled for power
8371 	 * management. If yes then don't lower the power here, that's.
8372 	 * the timeout handler's job.
8373 	 */
8374 	if (un->un_pm_timeid != NULL) {
8375 		timeout_id_t temp_id = un->un_pm_timeid;
8376 		un->un_pm_timeid = NULL;
8377 		mutex_exit(&un->un_pm_mutex);
8378 		/*
8379 		 * Timeout is active; cancel it.
8380 		 * Note that it'll never be active on a device
8381 		 * that does not support PM therefore we don't
8382 		 * have to check before calling pm_idle_component.
8383 		 */
8384 		(void) untimeout(temp_id);
8385 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8386 
8387 	} else {
8388 		mutex_exit(&un->un_pm_mutex);
8389 		if ((un->un_f_pm_is_enabled == TRUE) &&
8390 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
8391 		    DDI_SUCCESS)) {
8392 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8393 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
8394 			/*
8395 			 * Fix for bug: 4297749, item # 13
8396 			 * The above test now includes a check to see if PM is
8397 			 * supported by this device before call
8398 			 * pm_lower_power().
8399 			 * Note, the following is not dead code. The call to
8400 			 * pm_lower_power above will generate a call back into
8401 			 * our sdpower routine which might result in a timeout
8402 			 * handler getting activated. Therefore the following
8403 			 * code is valid and necessary.
8404 			 */
8405 			mutex_enter(&un->un_pm_mutex);
8406 			if (un->un_pm_timeid != NULL) {
8407 				timeout_id_t temp_id = un->un_pm_timeid;
8408 				un->un_pm_timeid = NULL;
8409 				mutex_exit(&un->un_pm_mutex);
8410 				(void) untimeout(temp_id);
8411 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8412 			} else {
8413 				mutex_exit(&un->un_pm_mutex);
8414 			}
8415 		}
8416 	}
8417 
8418 	/*
8419 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8420 	 * Relocated here from above to be after the call to
8421 	 * pm_lower_power, which was getting errors.
8422 	 */
8423 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8424 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8425 
8426 	/*
8427 	 * Currently, tagged queuing is supported per target based by HBA.
8428 	 * Setting this per lun instance actually sets the capability of this
8429 	 * target in HBA, which affects those luns already attached on the
8430 	 * same target. So during detach, we can only disable this capability
8431 	 * only when this is the only lun left on this target. By doing
8432 	 * this, we assume a target has the same tagged queuing capability
8433 	 * for every lun. The condition can be removed when HBA is changed to
8434 	 * support per lun based tagged queuing capability.
8435 	 */
8436 	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
8437 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8438 	}
8439 
8440 	if (un->un_f_is_fibre == FALSE) {
8441 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8442 	}
8443 
8444 	/*
8445 	 * Remove any event callbacks, fibre only
8446 	 */
8447 	if (un->un_f_is_fibre == TRUE) {
8448 		if ((un->un_insert_event != NULL) &&
8449 		    (ddi_remove_event_handler(un->un_insert_cb_id) !=
8450 		    DDI_SUCCESS)) {
8451 			/*
8452 			 * Note: We are returning here after having done
8453 			 * substantial cleanup above. This is consistent
8454 			 * with the legacy implementation but this may not
8455 			 * be the right thing to do.
8456 			 */
8457 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8458 			    "sd_dr_detach: Cannot cancel insert event\n");
8459 			goto err_remove_event;
8460 		}
8461 		un->un_insert_event = NULL;
8462 
8463 		if ((un->un_remove_event != NULL) &&
8464 		    (ddi_remove_event_handler(un->un_remove_cb_id) !=
8465 		    DDI_SUCCESS)) {
8466 			/*
8467 			 * Note: We are returning here after having done
8468 			 * substantial cleanup above. This is consistent
8469 			 * with the legacy implementation but this may not
8470 			 * be the right thing to do.
8471 			 */
8472 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8473 			    "sd_dr_detach: Cannot cancel remove event\n");
8474 			goto err_remove_event;
8475 		}
8476 		un->un_remove_event = NULL;
8477 	}
8478 
8479 	/* Do not free the softstate if the callback routine is active */
8480 	sd_sync_with_callback(un);
8481 
8482 	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
8483 	cmlb_free_handle(&un->un_cmlbhandle);
8484 
8485 	/*
8486 	 * Hold the detach mutex here, to make sure that no other threads ever
8487 	 * can access a (partially) freed soft state structure.
8488 	 */
8489 	mutex_enter(&sd_detach_mutex);
8490 
8491 	/*
8492 	 * Clean up the soft state struct.
8493 	 * Cleanup is done in reverse order of allocs/inits.
8494 	 * At this point there should be no competing threads anymore.
8495 	 */
8496 
8497 	scsi_fm_fini(devp);
8498 
8499 	/*
8500 	 * Deallocate memory for SCSI FMA.
8501 	 */
8502 	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
8503 
8504 	/*
8505 	 * Unregister and free device id if it was not registered
8506 	 * by the transport.
8507 	 */
8508 	if (un->un_f_devid_transport_defined == FALSE) {
8509 		ddi_devid_unregister(devi);
8510 		if (un->un_devid) {
8511 			ddi_devid_free(un->un_devid);
8512 			un->un_devid = NULL;
8513 		}
8514 	}
8515 
8516 	/*
8517 	 * Destroy wmap cache if it exists.
8518 	 */
8519 	if (un->un_wm_cache != NULL) {
8520 		kmem_cache_destroy(un->un_wm_cache);
8521 		un->un_wm_cache = NULL;
8522 	}
8523 
8524 	/*
8525 	 * kstat cleanup is done in detach for all device types (4363169).
8526 	 * We do not want to fail detach if the device kstats are not deleted
8527 	 * since there is a confusion about the devo_refcnt for the device.
8528 	 * We just delete the kstats and let detach complete successfully.
8529 	 */
8530 	if (un->un_stats != NULL) {
8531 		kstat_delete(un->un_stats);
8532 		un->un_stats = NULL;
8533 	}
8534 	if (un->un_errstats != NULL) {
8535 		kstat_delete(un->un_errstats);
8536 		un->un_errstats = NULL;
8537 	}
8538 
8539 	/* Remove partition stats */
8540 	if (un->un_f_pkstats_enabled) {
8541 		for (i = 0; i < NSDMAP; i++) {
8542 			if (un->un_pstats[i] != NULL) {
8543 				kstat_delete(un->un_pstats[i]);
8544 				un->un_pstats[i] = NULL;
8545 			}
8546 		}
8547 	}
8548 
8549 	/* Remove xbuf registration */
8550 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8551 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8552 
8553 	/* Remove driver properties */
8554 	ddi_prop_remove_all(devi);
8555 
8556 	mutex_destroy(&un->un_pm_mutex);
8557 	cv_destroy(&un->un_pm_busy_cv);
8558 
8559 	cv_destroy(&un->un_wcc_cv);
8560 
8561 	/* Open/close semaphore */
8562 	sema_destroy(&un->un_semoclose);
8563 
8564 	/* Removable media condvar. */
8565 	cv_destroy(&un->un_state_cv);
8566 
8567 	/* Suspend/resume condvar. */
8568 	cv_destroy(&un->un_suspend_cv);
8569 	cv_destroy(&un->un_disk_busy_cv);
8570 
8571 	sd_free_rqs(un);
8572 
8573 	/* Free up soft state */
8574 	devp->sd_private = NULL;
8575 
8576 	bzero(un, sizeof (struct sd_lun));
8577 #ifndef XPV_HVM_DRIVER
8578 	ddi_soft_state_free(sd_state, instance);
8579 #endif /* !XPV_HVM_DRIVER */
8580 
8581 	mutex_exit(&sd_detach_mutex);
8582 
8583 	/* This frees up the INQUIRY data associated with the device. */
8584 	scsi_unprobe(devp);
8585 
8586 	/*
8587 	 * After successfully detaching an instance, we update the information
8588 	 * of how many luns have been attached in the relative target and
8589 	 * controller for parallel SCSI. This information is used when sd tries
8590 	 * to set the tagged queuing capability in HBA.
8591 	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
8592 	 * check if the device is parallel SCSI. However, we don't need to
8593 	 * check here because we've already checked during attach. No device
8594 	 * that is not parallel SCSI is in the chain.
8595 	 */
8596 	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
8597 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
8598 	}
8599 
8600 	return (DDI_SUCCESS);
8601 
8602 err_notclosed:
8603 	mutex_exit(SD_MUTEX(un));
8604 
8605 err_stillbusy:
8606 	_NOTE(NO_COMPETING_THREADS_NOW);
8607 
8608 err_remove_event:
8609 	mutex_enter(&sd_detach_mutex);
8610 	un->un_detach_count--;
8611 	mutex_exit(&sd_detach_mutex);
8612 
8613 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
8614 	return (DDI_FAILURE);
8615 }
8616 
8617 
8618 /*
8619  *    Function: sd_create_errstats
8620  *
8621  * Description: This routine instantiates the device error stats.
8622  *
8623  *		Note: During attach the stats are instantiated first so they are
8624  *		available for attach-time routines that utilize the driver
8625  *		iopath to send commands to the device. The stats are initialized
8626  *		separately so data obtained during some attach-time routines is
8627  *		available. (4362483)
8628  *
8629  *   Arguments: un - driver soft state (unit) structure
8630  *		instance - driver instance
8631  *
8632  *     Context: Kernel thread context
8633  */
8634 
8635 static void
8636 sd_create_errstats(struct sd_lun *un, int instance)
8637 {
8638 	struct	sd_errstats	*stp;
8639 	char	kstatmodule_err[KSTAT_STRLEN];
8640 	char	kstatname[KSTAT_STRLEN];
8641 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
8642 
8643 	ASSERT(un != NULL);
8644 
8645 	if (un->un_errstats != NULL) {
8646 		return;
8647 	}
8648 
8649 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
8650 	    "%serr", sd_label);
8651 	(void) snprintf(kstatname, sizeof (kstatname),
8652 	    "%s%d,err", sd_label, instance);
8653 
8654 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
8655 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
8656 
8657 	if (un->un_errstats == NULL) {
8658 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8659 		    "sd_create_errstats: Failed kstat_create\n");
8660 		return;
8661 	}
8662 
8663 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
8664 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
8665 	    KSTAT_DATA_UINT32);
8666 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
8667 	    KSTAT_DATA_UINT32);
8668 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
8669 	    KSTAT_DATA_UINT32);
8670 	kstat_named_init(&stp->sd_vid,		"Vendor",
8671 	    KSTAT_DATA_CHAR);
8672 	kstat_named_init(&stp->sd_pid,		"Product",
8673 	    KSTAT_DATA_CHAR);
8674 	kstat_named_init(&stp->sd_revision,	"Revision",
8675 	    KSTAT_DATA_CHAR);
8676 	kstat_named_init(&stp->sd_serial,	"Serial No",
8677 	    KSTAT_DATA_CHAR);
8678 	kstat_named_init(&stp->sd_capacity,	"Size",
8679 	    KSTAT_DATA_ULONGLONG);
8680 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
8681 	    KSTAT_DATA_UINT32);
8682 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
8683 	    KSTAT_DATA_UINT32);
8684 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
8685 	    KSTAT_DATA_UINT32);
8686 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
8687 	    KSTAT_DATA_UINT32);
8688 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
8689 	    KSTAT_DATA_UINT32);
8690 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
8691 	    KSTAT_DATA_UINT32);
8692 
8693 	un->un_errstats->ks_private = un;
8694 	un->un_errstats->ks_update  = nulldev;
8695 
8696 	kstat_install(un->un_errstats);
8697 }
8698 
8699 
8700 /*
8701  *    Function: sd_set_errstats
8702  *
8703  * Description: This routine sets the value of the vendor id, product id,
8704  *		revision, serial number, and capacity device error stats.
8705  *
8706  *		Note: During attach the stats are instantiated first so they are
8707  *		available for attach-time routines that utilize the driver
8708  *		iopath to send commands to the device. The stats are initialized
8709  *		separately so data obtained during some attach-time routines is
8710  *		available. (4362483)
8711  *
8712  *   Arguments: un - driver soft state (unit) structure
8713  *
8714  *     Context: Kernel thread context
8715  */
8716 
8717 static void
8718 sd_set_errstats(struct sd_lun *un)
8719 {
8720 	struct	sd_errstats	*stp;
8721 
8722 	ASSERT(un != NULL);
8723 	ASSERT(un->un_errstats != NULL);
8724 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
8725 	ASSERT(stp != NULL);
8726 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
8727 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
8728 	(void) strncpy(stp->sd_revision.value.c,
8729 	    un->un_sd->sd_inq->inq_revision, 4);
8730 
8731 	/*
8732 	 * All the errstats are persistent across detach/attach,
8733 	 * so reset all the errstats here in case of the hot
8734 	 * replacement of disk drives, except for not changed
8735 	 * Sun qualified drives.
8736 	 */
8737 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
8738 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8739 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
8740 		stp->sd_softerrs.value.ui32 = 0;
8741 		stp->sd_harderrs.value.ui32 = 0;
8742 		stp->sd_transerrs.value.ui32 = 0;
8743 		stp->sd_rq_media_err.value.ui32 = 0;
8744 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
8745 		stp->sd_rq_nodev_err.value.ui32 = 0;
8746 		stp->sd_rq_recov_err.value.ui32 = 0;
8747 		stp->sd_rq_illrq_err.value.ui32 = 0;
8748 		stp->sd_rq_pfa_err.value.ui32 = 0;
8749 	}
8750 
8751 	/*
8752 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
8753 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
8754 	 * (4376302))
8755 	 */
8756 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
8757 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8758 		    sizeof (SD_INQUIRY(un)->inq_serial));
8759 	}
8760 
8761 	if (un->un_f_blockcount_is_valid != TRUE) {
8762 		/*
8763 		 * Set capacity error stat to 0 for no media. This ensures
8764 		 * a valid capacity is displayed in response to 'iostat -E'
8765 		 * when no media is present in the device.
8766 		 */
8767 		stp->sd_capacity.value.ui64 = 0;
8768 	} else {
8769 		/*
8770 		 * Multiply un_blockcount by un->un_sys_blocksize to get
8771 		 * capacity.
8772 		 *
8773 		 * Note: for non-512 blocksize devices "un_blockcount" has been
8774 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
8775 		 * (un_tgt_blocksize / un->un_sys_blocksize).
8776 		 */
8777 		stp->sd_capacity.value.ui64 = (uint64_t)
8778 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
8779 	}
8780 }
8781 
8782 
8783 /*
8784  *    Function: sd_set_pstats
8785  *
8786  * Description: This routine instantiates and initializes the partition
8787  *              stats for each partition with more than zero blocks.
8788  *		(4363169)
8789  *
8790  *   Arguments: un - driver soft state (unit) structure
8791  *
8792  *     Context: Kernel thread context
8793  */
8794 
8795 static void
8796 sd_set_pstats(struct sd_lun *un)
8797 {
8798 	char	kstatname[KSTAT_STRLEN];
8799 	int	instance;
8800 	int	i;
8801 	diskaddr_t	nblks = 0;
8802 	char	*partname = NULL;
8803 
8804 	ASSERT(un != NULL);
8805 
8806 	instance = ddi_get_instance(SD_DEVINFO(un));
8807 
8808 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
8809 	for (i = 0; i < NSDMAP; i++) {
8810 
8811 		if (cmlb_partinfo(un->un_cmlbhandle, i,
8812 		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
8813 			continue;
8814 		mutex_enter(SD_MUTEX(un));
8815 
8816 		if ((un->un_pstats[i] == NULL) &&
8817 		    (nblks != 0)) {
8818 
8819 			(void) snprintf(kstatname, sizeof (kstatname),
8820 			    "%s%d,%s", sd_label, instance,
8821 			    partname);
8822 
8823 			un->un_pstats[i] = kstat_create(sd_label,
8824 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
8825 			    1, KSTAT_FLAG_PERSISTENT);
8826 			if (un->un_pstats[i] != NULL) {
8827 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
8828 				kstat_install(un->un_pstats[i]);
8829 			}
8830 		}
8831 		mutex_exit(SD_MUTEX(un));
8832 	}
8833 }
8834 
8835 
8836 #if (defined(__fibre))
8837 /*
8838  *    Function: sd_init_event_callbacks
8839  *
8840  * Description: This routine initializes the insertion and removal event
8841  *		callbacks. (fibre only)
8842  *
8843  *   Arguments: un - driver soft state (unit) structure
8844  *
8845  *     Context: Kernel thread context
8846  */
8847 
8848 static void
8849 sd_init_event_callbacks(struct sd_lun *un)
8850 {
8851 	ASSERT(un != NULL);
8852 
8853 	if ((un->un_insert_event == NULL) &&
8854 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
8855 	    &un->un_insert_event) == DDI_SUCCESS)) {
8856 		/*
8857 		 * Add the callback for an insertion event
8858 		 */
8859 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8860 		    un->un_insert_event, sd_event_callback, (void *)un,
8861 		    &(un->un_insert_cb_id));
8862 	}
8863 
8864 	if ((un->un_remove_event == NULL) &&
8865 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
8866 	    &un->un_remove_event) == DDI_SUCCESS)) {
8867 		/*
8868 		 * Add the callback for a removal event
8869 		 */
8870 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8871 		    un->un_remove_event, sd_event_callback, (void *)un,
8872 		    &(un->un_remove_cb_id));
8873 	}
8874 }
8875 
8876 
8877 /*
8878  *    Function: sd_event_callback
8879  *
8880  * Description: This routine handles insert/remove events (photon). The
8881  *		state is changed to OFFLINE which can be used to supress
8882  *		error msgs. (fibre only)
8883  *
8884  *   Arguments: un - driver soft state (unit) structure
8885  *
8886  *     Context: Callout thread context
8887  */
8888 /* ARGSUSED */
8889 static void
8890 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
8891     void *bus_impldata)
8892 {
8893 	struct sd_lun *un = (struct sd_lun *)arg;
8894 
8895 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
8896 	if (event == un->un_insert_event) {
8897 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
8898 		mutex_enter(SD_MUTEX(un));
8899 		if (un->un_state == SD_STATE_OFFLINE) {
8900 			if (un->un_last_state != SD_STATE_SUSPENDED) {
8901 				un->un_state = un->un_last_state;
8902 			} else {
8903 				/*
8904 				 * We have gone through SUSPEND/RESUME while
8905 				 * we were offline. Restore the last state
8906 				 */
8907 				un->un_state = un->un_save_state;
8908 			}
8909 		}
8910 		mutex_exit(SD_MUTEX(un));
8911 
8912 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
8913 	} else if (event == un->un_remove_event) {
8914 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
8915 		mutex_enter(SD_MUTEX(un));
8916 		/*
8917 		 * We need to handle an event callback that occurs during
8918 		 * the suspend operation, since we don't prevent it.
8919 		 */
8920 		if (un->un_state != SD_STATE_OFFLINE) {
8921 			if (un->un_state != SD_STATE_SUSPENDED) {
8922 				New_state(un, SD_STATE_OFFLINE);
8923 			} else {
8924 				un->un_last_state = SD_STATE_OFFLINE;
8925 			}
8926 		}
8927 		mutex_exit(SD_MUTEX(un));
8928 	} else {
8929 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
8930 		    "!Unknown event\n");
8931 	}
8932 
8933 }
8934 #endif
8935 
8936 /*
8937  *    Function: sd_cache_control()
8938  *
8939  * Description: This routine is the driver entry point for setting
8940  *		read and write caching by modifying the WCE (write cache
8941  *		enable) and RCD (read cache disable) bits of mode
8942  *		page 8 (MODEPAGE_CACHING).
8943  *
8944  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
8945  *                      structure for this target.
8946  *		rcd_flag - flag for controlling the read cache
8947  *		wce_flag - flag for controlling the write cache
8948  *
8949  * Return Code: EIO
8950  *		code returned by sd_send_scsi_MODE_SENSE and
8951  *		sd_send_scsi_MODE_SELECT
8952  *
8953  *     Context: Kernel Thread
8954  */
8955 
8956 static int
8957 sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag)
8958 {
8959 	struct mode_caching	*mode_caching_page;
8960 	uchar_t			*header;
8961 	size_t			buflen;
8962 	int			hdrlen;
8963 	int			bd_len;
8964 	int			rval = 0;
8965 	struct mode_header_grp2	*mhp;
8966 	struct sd_lun		*un;
8967 	int			status;
8968 
8969 	ASSERT(ssc != NULL);
8970 	un = ssc->ssc_un;
8971 	ASSERT(un != NULL);
8972 
8973 	/*
8974 	 * Do a test unit ready, otherwise a mode sense may not work if this
8975 	 * is the first command sent to the device after boot.
8976 	 */
8977 	status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
8978 	if (status != 0)
8979 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8980 
8981 	if (un->un_f_cfg_is_atapi == TRUE) {
8982 		hdrlen = MODE_HEADER_LENGTH_GRP2;
8983 	} else {
8984 		hdrlen = MODE_HEADER_LENGTH;
8985 	}
8986 
8987 	/*
8988 	 * Allocate memory for the retrieved mode page and its headers.  Set
8989 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
8990 	 * we get all of the mode sense data otherwise, the mode select
8991 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
8992 	 */
8993 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
8994 	    sizeof (struct mode_cache_scsi3);
8995 
8996 	header = kmem_zalloc(buflen, KM_SLEEP);
8997 
8998 	/* Get the information from the device. */
8999 	if (un->un_f_cfg_is_atapi == TRUE) {
9000 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, header, buflen,
9001 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9002 	} else {
9003 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
9004 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9005 	}
9006 
9007 	if (rval != 0) {
9008 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9009 		    "sd_cache_control: Mode Sense Failed\n");
9010 		goto mode_sense_failed;
9011 	}
9012 
9013 	/*
9014 	 * Determine size of Block Descriptors in order to locate
9015 	 * the mode page data. ATAPI devices return 0, SCSI devices
9016 	 * should return MODE_BLK_DESC_LENGTH.
9017 	 */
9018 	if (un->un_f_cfg_is_atapi == TRUE) {
9019 		mhp	= (struct mode_header_grp2 *)header;
9020 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9021 	} else {
9022 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9023 	}
9024 
9025 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9026 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, 0,
9027 		    "sd_cache_control: Mode Sense returned invalid block "
9028 		    "descriptor length\n");
9029 		rval = EIO;
9030 		goto mode_sense_failed;
9031 	}
9032 
9033 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9034 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
9035 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
9036 		    "sd_cache_control: Mode Sense caching page code mismatch "
9037 		    "%d\n", mode_caching_page->mode_page.code);
9038 		rval = EIO;
9039 		goto mode_sense_failed;
9040 	}
9041 
9042 	/* Check the relevant bits on successful mode sense. */
9043 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9044 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9045 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9046 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9047 
9048 		size_t sbuflen;
9049 		uchar_t save_pg;
9050 
9051 		/*
9052 		 * Construct select buffer length based on the
9053 		 * length of the sense data returned.
9054 		 */
9055 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
9056 		    sizeof (struct mode_page) +
9057 		    (int)mode_caching_page->mode_page.length;
9058 
9059 		/*
9060 		 * Set the caching bits as requested.
9061 		 */
9062 		if (rcd_flag == SD_CACHE_ENABLE)
9063 			mode_caching_page->rcd = 0;
9064 		else if (rcd_flag == SD_CACHE_DISABLE)
9065 			mode_caching_page->rcd = 1;
9066 
9067 		if (wce_flag == SD_CACHE_ENABLE)
9068 			mode_caching_page->wce = 1;
9069 		else if (wce_flag == SD_CACHE_DISABLE)
9070 			mode_caching_page->wce = 0;
9071 
9072 		/*
9073 		 * Save the page if the mode sense says the
9074 		 * drive supports it.
9075 		 */
9076 		save_pg = mode_caching_page->mode_page.ps ?
9077 		    SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9078 
9079 		/* Clear reserved bits before mode select. */
9080 		mode_caching_page->mode_page.ps = 0;
9081 
9082 		/*
9083 		 * Clear out mode header for mode select.
9084 		 * The rest of the retrieved page will be reused.
9085 		 */
9086 		bzero(header, hdrlen);
9087 
9088 		if (un->un_f_cfg_is_atapi == TRUE) {
9089 			mhp = (struct mode_header_grp2 *)header;
9090 			mhp->bdesc_length_hi = bd_len >> 8;
9091 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
9092 		} else {
9093 			((struct mode_header *)header)->bdesc_length = bd_len;
9094 		}
9095 
9096 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9097 
9098 		/* Issue mode select to change the cache settings */
9099 		if (un->un_f_cfg_is_atapi == TRUE) {
9100 			rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP1, header,
9101 			    sbuflen, save_pg, SD_PATH_DIRECT);
9102 		} else {
9103 			rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, header,
9104 			    sbuflen, save_pg, SD_PATH_DIRECT);
9105 		}
9106 
9107 	}
9108 
9109 
9110 mode_sense_failed:
9111 
9112 	kmem_free(header, buflen);
9113 
9114 	if (rval != 0) {
9115 		if (rval == EIO)
9116 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9117 		else
9118 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9119 	}
9120 	return (rval);
9121 }
9122 
9123 
9124 /*
9125  *    Function: sd_get_write_cache_enabled()
9126  *
9127  * Description: This routine is the driver entry point for determining if
9128  *		write caching is enabled.  It examines the WCE (write cache
9129  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
9130  *
9131  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
9132  *                      structure for this target.
9133  *		is_enabled - pointer to int where write cache enabled state
9134  *		is returned (non-zero -> write cache enabled)
9135  *
9136  *
9137  * Return Code: EIO
9138  *		code returned by sd_send_scsi_MODE_SENSE
9139  *
9140  *     Context: Kernel Thread
9141  *
9142  * NOTE: If ioctl is added to disable write cache, this sequence should
9143  * be followed so that no locking is required for accesses to
9144  * un->un_f_write_cache_enabled:
9145  * 	do mode select to clear wce
9146  * 	do synchronize cache to flush cache
9147  * 	set un->un_f_write_cache_enabled = FALSE
9148  *
9149  * Conversely, an ioctl to enable the write cache should be done
9150  * in this order:
9151  * 	set un->un_f_write_cache_enabled = TRUE
9152  * 	do mode select to set wce
9153  */
9154 
9155 static int
9156 sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled)
9157 {
9158 	struct mode_caching	*mode_caching_page;
9159 	uchar_t			*header;
9160 	size_t			buflen;
9161 	int			hdrlen;
9162 	int			bd_len;
9163 	int			rval = 0;
9164 	struct sd_lun		*un;
9165 	int			status;
9166 
9167 	ASSERT(ssc != NULL);
9168 	un = ssc->ssc_un;
9169 	ASSERT(un != NULL);
9170 	ASSERT(is_enabled != NULL);
9171 
9172 	/* in case of error, flag as enabled */
9173 	*is_enabled = TRUE;
9174 
9175 	/*
9176 	 * Do a test unit ready, otherwise a mode sense may not work if this
9177 	 * is the first command sent to the device after boot.
9178 	 */
9179 	status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
9180 
9181 	if (status != 0)
9182 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9183 
9184 	if (un->un_f_cfg_is_atapi == TRUE) {
9185 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9186 	} else {
9187 		hdrlen = MODE_HEADER_LENGTH;
9188 	}
9189 
9190 	/*
9191 	 * Allocate memory for the retrieved mode page and its headers.  Set
9192 	 * a pointer to the page itself.
9193 	 */
9194 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
9195 	header = kmem_zalloc(buflen, KM_SLEEP);
9196 
9197 	/* Get the information from the device. */
9198 	if (un->un_f_cfg_is_atapi == TRUE) {
9199 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, header, buflen,
9200 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9201 	} else {
9202 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
9203 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9204 	}
9205 
9206 	if (rval != 0) {
9207 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9208 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
9209 		goto mode_sense_failed;
9210 	}
9211 
9212 	/*
9213 	 * Determine size of Block Descriptors in order to locate
9214 	 * the mode page data. ATAPI devices return 0, SCSI devices
9215 	 * should return MODE_BLK_DESC_LENGTH.
9216 	 */
9217 	if (un->un_f_cfg_is_atapi == TRUE) {
9218 		struct mode_header_grp2	*mhp;
9219 		mhp	= (struct mode_header_grp2 *)header;
9220 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9221 	} else {
9222 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9223 	}
9224 
9225 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9226 		/* FMA should make upset complain here */
9227 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, 0,
9228 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
9229 		    "block descriptor length\n");
9230 		rval = EIO;
9231 		goto mode_sense_failed;
9232 	}
9233 
9234 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9235 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
9236 		/* FMA could make upset complain here */
9237 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
9238 		    "sd_get_write_cache_enabled: Mode Sense caching page "
9239 		    "code mismatch %d\n", mode_caching_page->mode_page.code);
9240 		rval = EIO;
9241 		goto mode_sense_failed;
9242 	}
9243 	*is_enabled = mode_caching_page->wce;
9244 
9245 mode_sense_failed:
9246 	if (rval == 0) {
9247 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
9248 	} else if (rval == EIO) {
9249 		/*
9250 		 * Some disks do not support mode sense(6), we
9251 		 * should ignore this kind of error(sense key is
9252 		 * 0x5 - illegal request).
9253 		 */
9254 		uint8_t *sensep;
9255 		int senlen;
9256 
9257 		sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
9258 		senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
9259 		    ssc->ssc_uscsi_cmd->uscsi_rqresid);
9260 
9261 		if (senlen > 0 &&
9262 		    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
9263 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
9264 		} else {
9265 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9266 		}
9267 	} else {
9268 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9269 	}
9270 	kmem_free(header, buflen);
9271 	return (rval);
9272 }
9273 
9274 /*
9275  *    Function: sd_get_nv_sup()
9276  *
9277  * Description: This routine is the driver entry point for
9278  * determining whether non-volatile cache is supported. This
9279  * determination process works as follows:
9280  *
9281  * 1. sd first queries sd.conf on whether
9282  * suppress_cache_flush bit is set for this device.
9283  *
9284  * 2. if not there, then queries the internal disk table.
9285  *
9286  * 3. if either sd.conf or internal disk table specifies
9287  * cache flush be suppressed, we don't bother checking
9288  * NV_SUP bit.
9289  *
9290  * If SUPPRESS_CACHE_FLUSH bit is not set to 1, sd queries
9291  * the optional INQUIRY VPD page 0x86. If the device
9292  * supports VPD page 0x86, sd examines the NV_SUP
9293  * (non-volatile cache support) bit in the INQUIRY VPD page
9294  * 0x86:
9295  *   o If NV_SUP bit is set, sd assumes the device has a
9296  *   non-volatile cache and set the
9297  *   un_f_sync_nv_supported to TRUE.
9298  *   o Otherwise cache is not non-volatile,
9299  *   un_f_sync_nv_supported is set to FALSE.
9300  *
9301  * Arguments: un - driver soft state (unit) structure
9302  *
9303  * Return Code:
9304  *
9305  *     Context: Kernel Thread
9306  */
9307 
9308 static void
9309 sd_get_nv_sup(sd_ssc_t *ssc)
9310 {
9311 	int		rval		= 0;
9312 	uchar_t		*inq86		= NULL;
9313 	size_t		inq86_len	= MAX_INQUIRY_SIZE;
9314 	size_t		inq86_resid	= 0;
9315 	struct		dk_callback *dkc;
9316 	struct sd_lun	*un;
9317 
9318 	ASSERT(ssc != NULL);
9319 	un = ssc->ssc_un;
9320 	ASSERT(un != NULL);
9321 
9322 	mutex_enter(SD_MUTEX(un));
9323 
9324 	/*
9325 	 * Be conservative on the device's support of
9326 	 * SYNC_NV bit: un_f_sync_nv_supported is
9327 	 * initialized to be false.
9328 	 */
9329 	un->un_f_sync_nv_supported = FALSE;
9330 
9331 	/*
9332 	 * If either sd.conf or internal disk table
9333 	 * specifies cache flush be suppressed, then
9334 	 * we don't bother checking NV_SUP bit.
9335 	 */
9336 	if (un->un_f_suppress_cache_flush == TRUE) {
9337 		mutex_exit(SD_MUTEX(un));
9338 		return;
9339 	}
9340 
9341 	if (sd_check_vpd_page_support(ssc) == 0 &&
9342 	    un->un_vpd_page_mask & SD_VPD_EXTENDED_DATA_PG) {
9343 		mutex_exit(SD_MUTEX(un));
9344 		/* collect page 86 data if available */
9345 		inq86 = kmem_zalloc(inq86_len, KM_SLEEP);
9346 
9347 		rval = sd_send_scsi_INQUIRY(ssc, inq86, inq86_len,
9348 		    0x01, 0x86, &inq86_resid);
9349 
9350 		if (rval == 0 && (inq86_len - inq86_resid > 6)) {
9351 			SD_TRACE(SD_LOG_COMMON, un,
9352 			    "sd_get_nv_sup: \
9353 			    successfully get VPD page: %x \
9354 			    PAGE LENGTH: %x BYTE 6: %x\n",
9355 			    inq86[1], inq86[3], inq86[6]);
9356 
9357 			mutex_enter(SD_MUTEX(un));
9358 			/*
9359 			 * check the value of NV_SUP bit: only if the device
9360 			 * reports NV_SUP bit to be 1, the
9361 			 * un_f_sync_nv_supported bit will be set to true.
9362 			 */
9363 			if (inq86[6] & SD_VPD_NV_SUP) {
9364 				un->un_f_sync_nv_supported = TRUE;
9365 			}
9366 			mutex_exit(SD_MUTEX(un));
9367 		} else if (rval != 0) {
9368 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9369 		}
9370 
9371 		kmem_free(inq86, inq86_len);
9372 	} else {
9373 		mutex_exit(SD_MUTEX(un));
9374 	}
9375 
9376 	/*
9377 	 * Send a SYNC CACHE command to check whether
9378 	 * SYNC_NV bit is supported. This command should have
9379 	 * un_f_sync_nv_supported set to correct value.
9380 	 */
9381 	mutex_enter(SD_MUTEX(un));
9382 	if (un->un_f_sync_nv_supported) {
9383 		mutex_exit(SD_MUTEX(un));
9384 		dkc = kmem_zalloc(sizeof (struct dk_callback), KM_SLEEP);
9385 		dkc->dkc_flag = FLUSH_VOLATILE;
9386 		(void) sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
9387 
9388 		/*
9389 		 * Send a TEST UNIT READY command to the device. This should
9390 		 * clear any outstanding UNIT ATTENTION that may be present.
9391 		 */
9392 		rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
9393 		if (rval != 0)
9394 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9395 
9396 		kmem_free(dkc, sizeof (struct dk_callback));
9397 	} else {
9398 		mutex_exit(SD_MUTEX(un));
9399 	}
9400 
9401 	SD_TRACE(SD_LOG_COMMON, un, "sd_get_nv_sup: \
9402 	    un_f_suppress_cache_flush is set to %d\n",
9403 	    un->un_f_suppress_cache_flush);
9404 }
9405 
9406 /*
9407  *    Function: sd_make_device
9408  *
9409  * Description: Utility routine to return the Solaris device number from
9410  *		the data in the device's dev_info structure.
9411  *
9412  * Return Code: The Solaris device number
9413  *
9414  *     Context: Any
9415  */
9416 
9417 static dev_t
9418 sd_make_device(dev_info_t *devi)
9419 {
9420 	return (makedevice(ddi_driver_major(devi),
9421 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
9422 }
9423 
9424 
9425 /*
9426  *    Function: sd_pm_entry
9427  *
9428  * Description: Called at the start of a new command to manage power
9429  *		and busy status of a device. This includes determining whether
9430  *		the current power state of the device is sufficient for
9431  *		performing the command or whether it must be changed.
9432  *		The PM framework is notified appropriately.
9433  *		Only with a return status of DDI_SUCCESS will the
9434  *		component be busy to the framework.
9435  *
9436  *		All callers of sd_pm_entry must check the return status
9437  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
9438  *		of DDI_FAILURE indicates the device failed to power up.
9439  *		In this case un_pm_count has been adjusted so the result
9440  *		on exit is still powered down, ie. count is less than 0.
9441  *		Calling sd_pm_exit with this count value hits an ASSERT.
9442  *
9443  * Return Code: DDI_SUCCESS or DDI_FAILURE
9444  *
9445  *     Context: Kernel thread context.
9446  */
9447 
9448 static int
9449 sd_pm_entry(struct sd_lun *un)
9450 {
9451 	int return_status = DDI_SUCCESS;
9452 
9453 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9454 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9455 
9456 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
9457 
9458 	if (un->un_f_pm_is_enabled == FALSE) {
9459 		SD_TRACE(SD_LOG_IO_PM, un,
9460 		    "sd_pm_entry: exiting, PM not enabled\n");
9461 		return (return_status);
9462 	}
9463 
9464 	/*
9465 	 * Just increment a counter if PM is enabled. On the transition from
9466 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
9467 	 * the count with each IO and mark the device as idle when the count
9468 	 * hits 0.
9469 	 *
9470 	 * If the count is less than 0 the device is powered down. If a powered
9471 	 * down device is successfully powered up then the count must be
9472 	 * incremented to reflect the power up. Note that it'll get incremented
9473 	 * a second time to become busy.
9474 	 *
9475 	 * Because the following has the potential to change the device state
9476 	 * and must release the un_pm_mutex to do so, only one thread can be
9477 	 * allowed through at a time.
9478 	 */
9479 
9480 	mutex_enter(&un->un_pm_mutex);
9481 	while (un->un_pm_busy == TRUE) {
9482 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
9483 	}
9484 	un->un_pm_busy = TRUE;
9485 
9486 	if (un->un_pm_count < 1) {
9487 
9488 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
9489 
9490 		/*
9491 		 * Indicate we are now busy so the framework won't attempt to
9492 		 * power down the device. This call will only fail if either
9493 		 * we passed a bad component number or the device has no
9494 		 * components. Neither of these should ever happen.
9495 		 */
9496 		mutex_exit(&un->un_pm_mutex);
9497 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
9498 		ASSERT(return_status == DDI_SUCCESS);
9499 
9500 		mutex_enter(&un->un_pm_mutex);
9501 
9502 		if (un->un_pm_count < 0) {
9503 			mutex_exit(&un->un_pm_mutex);
9504 
9505 			SD_TRACE(SD_LOG_IO_PM, un,
9506 			    "sd_pm_entry: power up component\n");
9507 
9508 			/*
9509 			 * pm_raise_power will cause sdpower to be called
9510 			 * which brings the device power level to the
9511 			 * desired state, ON in this case. If successful,
9512 			 * un_pm_count and un_power_level will be updated
9513 			 * appropriately.
9514 			 */
9515 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
9516 			    SD_SPINDLE_ON);
9517 
9518 			mutex_enter(&un->un_pm_mutex);
9519 
9520 			if (return_status != DDI_SUCCESS) {
9521 				/*
9522 				 * Power up failed.
9523 				 * Idle the device and adjust the count
9524 				 * so the result on exit is that we're
9525 				 * still powered down, ie. count is less than 0.
9526 				 */
9527 				SD_TRACE(SD_LOG_IO_PM, un,
9528 				    "sd_pm_entry: power up failed,"
9529 				    " idle the component\n");
9530 
9531 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9532 				un->un_pm_count--;
9533 			} else {
9534 				/*
9535 				 * Device is powered up, verify the
9536 				 * count is non-negative.
9537 				 * This is debug only.
9538 				 */
9539 				ASSERT(un->un_pm_count == 0);
9540 			}
9541 		}
9542 
9543 		if (return_status == DDI_SUCCESS) {
9544 			/*
9545 			 * For performance, now that the device has been tagged
9546 			 * as busy, and it's known to be powered up, update the
9547 			 * chain types to use jump tables that do not include
9548 			 * pm. This significantly lowers the overhead and
9549 			 * therefore improves performance.
9550 			 */
9551 
9552 			mutex_exit(&un->un_pm_mutex);
9553 			mutex_enter(SD_MUTEX(un));
9554 			SD_TRACE(SD_LOG_IO_PM, un,
9555 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
9556 			    un->un_uscsi_chain_type);
9557 
9558 			if (un->un_f_non_devbsize_supported) {
9559 				un->un_buf_chain_type =
9560 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
9561 			} else {
9562 				un->un_buf_chain_type =
9563 				    SD_CHAIN_INFO_DISK_NO_PM;
9564 			}
9565 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
9566 
9567 			SD_TRACE(SD_LOG_IO_PM, un,
9568 			    "             changed  uscsi_chain_type to   %d\n",
9569 			    un->un_uscsi_chain_type);
9570 			mutex_exit(SD_MUTEX(un));
9571 			mutex_enter(&un->un_pm_mutex);
9572 
9573 			if (un->un_pm_idle_timeid == NULL) {
9574 				/* 300 ms. */
9575 				un->un_pm_idle_timeid =
9576 				    timeout(sd_pm_idletimeout_handler, un,
9577 				    (drv_usectohz((clock_t)300000)));
9578 				/*
9579 				 * Include an extra call to busy which keeps the
9580 				 * device busy with-respect-to the PM layer
9581 				 * until the timer fires, at which time it'll
9582 				 * get the extra idle call.
9583 				 */
9584 				(void) pm_busy_component(SD_DEVINFO(un), 0);
9585 			}
9586 		}
9587 	}
9588 	un->un_pm_busy = FALSE;
9589 	/* Next... */
9590 	cv_signal(&un->un_pm_busy_cv);
9591 
9592 	un->un_pm_count++;
9593 
9594 	SD_TRACE(SD_LOG_IO_PM, un,
9595 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
9596 
9597 	mutex_exit(&un->un_pm_mutex);
9598 
9599 	return (return_status);
9600 }
9601 
9602 
9603 /*
9604  *    Function: sd_pm_exit
9605  *
9606  * Description: Called at the completion of a command to manage busy
9607  *		status for the device. If the device becomes idle the
9608  *		PM framework is notified.
9609  *
9610  *     Context: Kernel thread context
9611  */
9612 
9613 static void
9614 sd_pm_exit(struct sd_lun *un)
9615 {
9616 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9617 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9618 
9619 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
9620 
9621 	/*
9622 	 * After attach the following flag is only read, so don't
9623 	 * take the penalty of acquiring a mutex for it.
9624 	 */
9625 	if (un->un_f_pm_is_enabled == TRUE) {
9626 
9627 		mutex_enter(&un->un_pm_mutex);
9628 		un->un_pm_count--;
9629 
9630 		SD_TRACE(SD_LOG_IO_PM, un,
9631 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
9632 
9633 		ASSERT(un->un_pm_count >= 0);
9634 		if (un->un_pm_count == 0) {
9635 			mutex_exit(&un->un_pm_mutex);
9636 
9637 			SD_TRACE(SD_LOG_IO_PM, un,
9638 			    "sd_pm_exit: idle component\n");
9639 
9640 			(void) pm_idle_component(SD_DEVINFO(un), 0);
9641 
9642 		} else {
9643 			mutex_exit(&un->un_pm_mutex);
9644 		}
9645 	}
9646 
9647 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
9648 }
9649 
9650 
9651 /*
9652  *    Function: sdopen
9653  *
9654  * Description: Driver's open(9e) entry point function.
9655  *
9656  *   Arguments: dev_i   - pointer to device number
9657  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
9658  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9659  *		cred_p  - user credential pointer
9660  *
9661  * Return Code: EINVAL
9662  *		ENXIO
9663  *		EIO
9664  *		EROFS
9665  *		EBUSY
9666  *
9667  *     Context: Kernel thread context
9668  */
9669 /* ARGSUSED */
9670 static int
9671 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
9672 {
9673 	struct sd_lun	*un;
9674 	int		nodelay;
9675 	int		part;
9676 	uint64_t	partmask;
9677 	int		instance;
9678 	dev_t		dev;
9679 	int		rval = EIO;
9680 	diskaddr_t	nblks = 0;
9681 	diskaddr_t	label_cap;
9682 
9683 	/* Validate the open type */
9684 	if (otyp >= OTYPCNT) {
9685 		return (EINVAL);
9686 	}
9687 
9688 	dev = *dev_p;
9689 	instance = SDUNIT(dev);
9690 	mutex_enter(&sd_detach_mutex);
9691 
9692 	/*
9693 	 * Fail the open if there is no softstate for the instance, or
9694 	 * if another thread somewhere is trying to detach the instance.
9695 	 */
9696 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
9697 	    (un->un_detach_count != 0)) {
9698 		mutex_exit(&sd_detach_mutex);
9699 		/*
9700 		 * The probe cache only needs to be cleared when open (9e) fails
9701 		 * with ENXIO (4238046).
9702 		 */
9703 		/*
9704 		 * un-conditionally clearing probe cache is ok with
9705 		 * separate sd/ssd binaries
9706 		 * x86 platform can be an issue with both parallel
9707 		 * and fibre in 1 binary
9708 		 */
9709 		sd_scsi_clear_probe_cache();
9710 		return (ENXIO);
9711 	}
9712 
9713 	/*
9714 	 * The un_layer_count is to prevent another thread in specfs from
9715 	 * trying to detach the instance, which can happen when we are
9716 	 * called from a higher-layer driver instead of thru specfs.
9717 	 * This will not be needed when DDI provides a layered driver
9718 	 * interface that allows specfs to know that an instance is in
9719 	 * use by a layered driver & should not be detached.
9720 	 *
9721 	 * Note: the semantics for layered driver opens are exactly one
9722 	 * close for every open.
9723 	 */
9724 	if (otyp == OTYP_LYR) {
9725 		un->un_layer_count++;
9726 	}
9727 
9728 	/*
9729 	 * Keep a count of the current # of opens in progress. This is because
9730 	 * some layered drivers try to call us as a regular open. This can
9731 	 * cause problems that we cannot prevent, however by keeping this count
9732 	 * we can at least keep our open and detach routines from racing against
9733 	 * each other under such conditions.
9734 	 */
9735 	un->un_opens_in_progress++;
9736 	mutex_exit(&sd_detach_mutex);
9737 
9738 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
9739 	part	 = SDPART(dev);
9740 	partmask = 1 << part;
9741 
9742 	/*
9743 	 * We use a semaphore here in order to serialize
9744 	 * open and close requests on the device.
9745 	 */
9746 	sema_p(&un->un_semoclose);
9747 
9748 	mutex_enter(SD_MUTEX(un));
9749 
9750 	/*
9751 	 * All device accesses go thru sdstrategy() where we check
9752 	 * on suspend status but there could be a scsi_poll command,
9753 	 * which bypasses sdstrategy(), so we need to check pm
9754 	 * status.
9755 	 */
9756 
9757 	if (!nodelay) {
9758 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9759 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9760 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9761 		}
9762 
9763 		mutex_exit(SD_MUTEX(un));
9764 		if (sd_pm_entry(un) != DDI_SUCCESS) {
9765 			rval = EIO;
9766 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
9767 			    "sdopen: sd_pm_entry failed\n");
9768 			goto open_failed_with_pm;
9769 		}
9770 		mutex_enter(SD_MUTEX(un));
9771 	}
9772 
9773 	/* check for previous exclusive open */
9774 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
9775 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9776 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
9777 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
9778 
9779 	if (un->un_exclopen & (partmask)) {
9780 		goto excl_open_fail;
9781 	}
9782 
9783 	if (flag & FEXCL) {
9784 		int i;
9785 		if (un->un_ocmap.lyropen[part]) {
9786 			goto excl_open_fail;
9787 		}
9788 		for (i = 0; i < (OTYPCNT - 1); i++) {
9789 			if (un->un_ocmap.regopen[i] & (partmask)) {
9790 				goto excl_open_fail;
9791 			}
9792 		}
9793 	}
9794 
9795 	/*
9796 	 * Check the write permission if this is a removable media device,
9797 	 * NDELAY has not been set, and writable permission is requested.
9798 	 *
9799 	 * Note: If NDELAY was set and this is write-protected media the WRITE
9800 	 * attempt will fail with EIO as part of the I/O processing. This is a
9801 	 * more permissive implementation that allows the open to succeed and
9802 	 * WRITE attempts to fail when appropriate.
9803 	 */
9804 	if (un->un_f_chk_wp_open) {
9805 		if ((flag & FWRITE) && (!nodelay)) {
9806 			mutex_exit(SD_MUTEX(un));
9807 			/*
9808 			 * Defer the check for write permission on writable
9809 			 * DVD drive till sdstrategy and will not fail open even
9810 			 * if FWRITE is set as the device can be writable
9811 			 * depending upon the media and the media can change
9812 			 * after the call to open().
9813 			 */
9814 			if (un->un_f_dvdram_writable_device == FALSE) {
9815 				if (ISCD(un) || sr_check_wp(dev)) {
9816 				rval = EROFS;
9817 				mutex_enter(SD_MUTEX(un));
9818 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9819 				    "write to cd or write protected media\n");
9820 				goto open_fail;
9821 				}
9822 			}
9823 			mutex_enter(SD_MUTEX(un));
9824 		}
9825 	}
9826 
9827 	/*
9828 	 * If opening in NDELAY/NONBLOCK mode, just return.
9829 	 * Check if disk is ready and has a valid geometry later.
9830 	 */
9831 	if (!nodelay) {
9832 		sd_ssc_t	*ssc;
9833 
9834 		mutex_exit(SD_MUTEX(un));
9835 		ssc = sd_ssc_init(un);
9836 		rval = sd_ready_and_valid(ssc, part);
9837 		sd_ssc_fini(ssc);
9838 		mutex_enter(SD_MUTEX(un));
9839 		/*
9840 		 * Fail if device is not ready or if the number of disk
9841 		 * blocks is zero or negative for non CD devices.
9842 		 */
9843 
9844 		nblks = 0;
9845 
9846 		if (rval == SD_READY_VALID && (!ISCD(un))) {
9847 			/* if cmlb_partinfo fails, nblks remains 0 */
9848 			mutex_exit(SD_MUTEX(un));
9849 			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
9850 			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
9851 			mutex_enter(SD_MUTEX(un));
9852 		}
9853 
9854 		if ((rval != SD_READY_VALID) ||
9855 		    (!ISCD(un) && nblks <= 0)) {
9856 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
9857 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9858 			    "device not ready or invalid disk block value\n");
9859 			goto open_fail;
9860 		}
9861 #if defined(__i386) || defined(__amd64)
9862 	} else {
9863 		uchar_t *cp;
9864 		/*
9865 		 * x86 requires special nodelay handling, so that p0 is
9866 		 * always defined and accessible.
9867 		 * Invalidate geometry only if device is not already open.
9868 		 */
9869 		cp = &un->un_ocmap.chkd[0];
9870 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
9871 			if (*cp != (uchar_t)0) {
9872 				break;
9873 			}
9874 			cp++;
9875 		}
9876 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
9877 			mutex_exit(SD_MUTEX(un));
9878 			cmlb_invalidate(un->un_cmlbhandle,
9879 			    (void *)SD_PATH_DIRECT);
9880 			mutex_enter(SD_MUTEX(un));
9881 		}
9882 
9883 #endif
9884 	}
9885 
9886 	if (otyp == OTYP_LYR) {
9887 		un->un_ocmap.lyropen[part]++;
9888 	} else {
9889 		un->un_ocmap.regopen[otyp] |= partmask;
9890 	}
9891 
9892 	/* Set up open and exclusive open flags */
9893 	if (flag & FEXCL) {
9894 		un->un_exclopen |= (partmask);
9895 	}
9896 
9897 	/*
9898 	 * If the lun is EFI labeled and lun capacity is greater than the
9899 	 * capacity contained in the label, log a sys-event to notify the
9900 	 * interested module.
9901 	 * To avoid an infinite loop of logging sys-event, we only log the
9902 	 * event when the lun is not opened in NDELAY mode. The event handler
9903 	 * should open the lun in NDELAY mode.
9904 	 */
9905 	if (!(flag & FNDELAY)) {
9906 		mutex_exit(SD_MUTEX(un));
9907 		if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
9908 		    (void*)SD_PATH_DIRECT) == 0) {
9909 			mutex_enter(SD_MUTEX(un));
9910 			if (un->un_f_blockcount_is_valid &&
9911 			    un->un_blockcount > label_cap) {
9912 				mutex_exit(SD_MUTEX(un));
9913 				sd_log_lun_expansion_event(un,
9914 				    (nodelay ? KM_NOSLEEP : KM_SLEEP));
9915 				mutex_enter(SD_MUTEX(un));
9916 			}
9917 		} else {
9918 			mutex_enter(SD_MUTEX(un));
9919 		}
9920 	}
9921 
9922 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9923 	    "open of part %d type %d\n", part, otyp);
9924 
9925 	mutex_exit(SD_MUTEX(un));
9926 	if (!nodelay) {
9927 		sd_pm_exit(un);
9928 	}
9929 
9930 	sema_v(&un->un_semoclose);
9931 
9932 	mutex_enter(&sd_detach_mutex);
9933 	un->un_opens_in_progress--;
9934 	mutex_exit(&sd_detach_mutex);
9935 
9936 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
9937 	return (DDI_SUCCESS);
9938 
9939 excl_open_fail:
9940 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
9941 	rval = EBUSY;
9942 
9943 open_fail:
9944 	mutex_exit(SD_MUTEX(un));
9945 
9946 	/*
9947 	 * On a failed open we must exit the pm management.
9948 	 */
9949 	if (!nodelay) {
9950 		sd_pm_exit(un);
9951 	}
9952 open_failed_with_pm:
9953 	sema_v(&un->un_semoclose);
9954 
9955 	mutex_enter(&sd_detach_mutex);
9956 	un->un_opens_in_progress--;
9957 	if (otyp == OTYP_LYR) {
9958 		un->un_layer_count--;
9959 	}
9960 	mutex_exit(&sd_detach_mutex);
9961 
9962 	return (rval);
9963 }
9964 
9965 
9966 /*
9967  *    Function: sdclose
9968  *
9969  * Description: Driver's close(9e) entry point function.
9970  *
9971  *   Arguments: dev    - device number
9972  *		flag   - file status flag, informational only
9973  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9974  *		cred_p - user credential pointer
9975  *
9976  * Return Code: ENXIO
9977  *
9978  *     Context: Kernel thread context
9979  */
9980 /* ARGSUSED */
9981 static int
9982 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
9983 {
9984 	struct sd_lun	*un;
9985 	uchar_t		*cp;
9986 	int		part;
9987 	int		nodelay;
9988 	int		rval = 0;
9989 
9990 	/* Validate the open type */
9991 	if (otyp >= OTYPCNT) {
9992 		return (ENXIO);
9993 	}
9994 
9995 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9996 		return (ENXIO);
9997 	}
9998 
9999 	part = SDPART(dev);
10000 	nodelay = flag & (FNDELAY | FNONBLOCK);
10001 
10002 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10003 	    "sdclose: close of part %d type %d\n", part, otyp);
10004 
10005 	/*
10006 	 * We use a semaphore here in order to serialize
10007 	 * open and close requests on the device.
10008 	 */
10009 	sema_p(&un->un_semoclose);
10010 
10011 	mutex_enter(SD_MUTEX(un));
10012 
10013 	/* Don't proceed if power is being changed. */
10014 	while (un->un_state == SD_STATE_PM_CHANGING) {
10015 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10016 	}
10017 
10018 	if (un->un_exclopen & (1 << part)) {
10019 		un->un_exclopen &= ~(1 << part);
10020 	}
10021 
10022 	/* Update the open partition map */
10023 	if (otyp == OTYP_LYR) {
10024 		un->un_ocmap.lyropen[part] -= 1;
10025 	} else {
10026 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10027 	}
10028 
10029 	cp = &un->un_ocmap.chkd[0];
10030 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10031 		if (*cp != NULL) {
10032 			break;
10033 		}
10034 		cp++;
10035 	}
10036 
10037 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10038 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10039 
10040 		/*
10041 		 * We avoid persistance upon the last close, and set
10042 		 * the throttle back to the maximum.
10043 		 */
10044 		un->un_throttle = un->un_saved_throttle;
10045 
10046 		if (un->un_state == SD_STATE_OFFLINE) {
10047 			if (un->un_f_is_fibre == FALSE) {
10048 				scsi_log(SD_DEVINFO(un), sd_label,
10049 				    CE_WARN, "offline\n");
10050 			}
10051 			mutex_exit(SD_MUTEX(un));
10052 			cmlb_invalidate(un->un_cmlbhandle,
10053 			    (void *)SD_PATH_DIRECT);
10054 			mutex_enter(SD_MUTEX(un));
10055 
10056 		} else {
10057 			/*
10058 			 * Flush any outstanding writes in NVRAM cache.
10059 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10060 			 * cmd, it may not work for non-Pluto devices.
10061 			 * SYNCHRONIZE CACHE is not required for removables,
10062 			 * except DVD-RAM drives.
10063 			 *
10064 			 * Also note: because SYNCHRONIZE CACHE is currently
10065 			 * the only command issued here that requires the
10066 			 * drive be powered up, only do the power up before
10067 			 * sending the Sync Cache command. If additional
10068 			 * commands are added which require a powered up
10069 			 * drive, the following sequence may have to change.
10070 			 *
10071 			 * And finally, note that parallel SCSI on SPARC
10072 			 * only issues a Sync Cache to DVD-RAM, a newly
10073 			 * supported device.
10074 			 */
10075 #if defined(__i386) || defined(__amd64)
10076 			if ((un->un_f_sync_cache_supported &&
10077 			    un->un_f_sync_cache_required) ||
10078 			    un->un_f_dvdram_writable_device == TRUE) {
10079 #else
10080 			if (un->un_f_dvdram_writable_device == TRUE) {
10081 #endif
10082 				mutex_exit(SD_MUTEX(un));
10083 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10084 					rval =
10085 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10086 					    NULL);
10087 					/* ignore error if not supported */
10088 					if (rval == ENOTSUP) {
10089 						rval = 0;
10090 					} else if (rval != 0) {
10091 						rval = EIO;
10092 					}
10093 					sd_pm_exit(un);
10094 				} else {
10095 					rval = EIO;
10096 				}
10097 				mutex_enter(SD_MUTEX(un));
10098 			}
10099 
10100 			/*
10101 			 * For devices which supports DOOR_LOCK, send an ALLOW
10102 			 * MEDIA REMOVAL command, but don't get upset if it
10103 			 * fails. We need to raise the power of the drive before
10104 			 * we can call sd_send_scsi_DOORLOCK()
10105 			 */
10106 			if (un->un_f_doorlock_supported) {
10107 				mutex_exit(SD_MUTEX(un));
10108 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10109 					sd_ssc_t	*ssc;
10110 
10111 					ssc = sd_ssc_init(un);
10112 					rval = sd_send_scsi_DOORLOCK(ssc,
10113 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10114 					if (rval != 0)
10115 						sd_ssc_assessment(ssc,
10116 						    SD_FMT_IGNORE);
10117 					sd_ssc_fini(ssc);
10118 
10119 					sd_pm_exit(un);
10120 					if (ISCD(un) && (rval != 0) &&
10121 					    (nodelay != 0)) {
10122 						rval = ENXIO;
10123 					}
10124 				} else {
10125 					rval = EIO;
10126 				}
10127 				mutex_enter(SD_MUTEX(un));
10128 			}
10129 
10130 			/*
10131 			 * If a device has removable media, invalidate all
10132 			 * parameters related to media, such as geometry,
10133 			 * blocksize, and blockcount.
10134 			 */
10135 			if (un->un_f_has_removable_media) {
10136 				sr_ejected(un);
10137 			}
10138 
10139 			/*
10140 			 * Destroy the cache (if it exists) which was
10141 			 * allocated for the write maps since this is
10142 			 * the last close for this media.
10143 			 */
10144 			if (un->un_wm_cache) {
10145 				/*
10146 				 * Check if there are pending commands.
10147 				 * and if there are give a warning and
10148 				 * do not destroy the cache.
10149 				 */
10150 				if (un->un_ncmds_in_driver > 0) {
10151 					scsi_log(SD_DEVINFO(un),
10152 					    sd_label, CE_WARN,
10153 					    "Unable to clean up memory "
10154 					    "because of pending I/O\n");
10155 				} else {
10156 					kmem_cache_destroy(
10157 					    un->un_wm_cache);
10158 					un->un_wm_cache = NULL;
10159 				}
10160 			}
10161 		}
10162 	}
10163 
10164 	mutex_exit(SD_MUTEX(un));
10165 	sema_v(&un->un_semoclose);
10166 
10167 	if (otyp == OTYP_LYR) {
10168 		mutex_enter(&sd_detach_mutex);
10169 		/*
10170 		 * The detach routine may run when the layer count
10171 		 * drops to zero.
10172 		 */
10173 		un->un_layer_count--;
10174 		mutex_exit(&sd_detach_mutex);
10175 	}
10176 
10177 	return (rval);
10178 }
10179 
10180 
10181 /*
10182  *    Function: sd_ready_and_valid
10183  *
10184  * Description: Test if device is ready and has a valid geometry.
10185  *
10186  *   Arguments: ssc - sd_ssc_t will contain un
10187  *		un  - driver soft state (unit) structure
10188  *
10189  * Return Code: SD_READY_VALID		ready and valid label
10190  *		SD_NOT_READY_VALID	not ready, no label
10191  *		SD_RESERVED_BY_OTHERS	reservation conflict
10192  *
10193  *     Context: Never called at interrupt context.
10194  */
10195 
10196 static int
10197 sd_ready_and_valid(sd_ssc_t *ssc, int part)
10198 {
10199 	struct sd_errstats	*stp;
10200 	uint64_t		capacity;
10201 	uint_t			lbasize;
10202 	int			rval = SD_READY_VALID;
10203 	char			name_str[48];
10204 	boolean_t		is_valid;
10205 	struct sd_lun		*un;
10206 	int			status;
10207 
10208 	ASSERT(ssc != NULL);
10209 	un = ssc->ssc_un;
10210 	ASSERT(un != NULL);
10211 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10212 
10213 	mutex_enter(SD_MUTEX(un));
10214 	/*
10215 	 * If a device has removable media, we must check if media is
10216 	 * ready when checking if this device is ready and valid.
10217 	 */
10218 	if (un->un_f_has_removable_media) {
10219 		mutex_exit(SD_MUTEX(un));
10220 		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10221 
10222 		if (status != 0) {
10223 			rval = SD_NOT_READY_VALID;
10224 			mutex_enter(SD_MUTEX(un));
10225 
10226 			/* Ignore all failed status for removalbe media */
10227 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10228 
10229 			goto done;
10230 		}
10231 
10232 		is_valid = SD_IS_VALID_LABEL(un);
10233 		mutex_enter(SD_MUTEX(un));
10234 		if (!is_valid ||
10235 		    (un->un_f_blockcount_is_valid == FALSE) ||
10236 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10237 
10238 			/* capacity has to be read every open. */
10239 			mutex_exit(SD_MUTEX(un));
10240 			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
10241 			    &lbasize, SD_PATH_DIRECT);
10242 
10243 			if (status != 0) {
10244 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10245 
10246 				cmlb_invalidate(un->un_cmlbhandle,
10247 				    (void *)SD_PATH_DIRECT);
10248 				mutex_enter(SD_MUTEX(un));
10249 				rval = SD_NOT_READY_VALID;
10250 
10251 				goto done;
10252 			} else {
10253 				mutex_enter(SD_MUTEX(un));
10254 				sd_update_block_info(un, lbasize, capacity);
10255 			}
10256 		}
10257 
10258 		/*
10259 		 * Check if the media in the device is writable or not.
10260 		 */
10261 		if (!is_valid && ISCD(un)) {
10262 			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
10263 		}
10264 
10265 	} else {
10266 		/*
10267 		 * Do a test unit ready to clear any unit attention from non-cd
10268 		 * devices.
10269 		 */
10270 		mutex_exit(SD_MUTEX(un));
10271 
10272 		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10273 		if (status != 0) {
10274 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10275 		}
10276 
10277 		mutex_enter(SD_MUTEX(un));
10278 	}
10279 
10280 
10281 	/*
10282 	 * If this is a non 512 block device, allocate space for
10283 	 * the wmap cache. This is being done here since every time
10284 	 * a media is changed this routine will be called and the
10285 	 * block size is a function of media rather than device.
10286 	 */
10287 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
10288 		if (!(un->un_wm_cache)) {
10289 			(void) snprintf(name_str, sizeof (name_str),
10290 			    "%s%d_cache",
10291 			    ddi_driver_name(SD_DEVINFO(un)),
10292 			    ddi_get_instance(SD_DEVINFO(un)));
10293 			un->un_wm_cache = kmem_cache_create(
10294 			    name_str, sizeof (struct sd_w_map),
10295 			    8, sd_wm_cache_constructor,
10296 			    sd_wm_cache_destructor, NULL,
10297 			    (void *)un, NULL, 0);
10298 			if (!(un->un_wm_cache)) {
10299 				rval = ENOMEM;
10300 				goto done;
10301 			}
10302 		}
10303 	}
10304 
10305 	if (un->un_state == SD_STATE_NORMAL) {
10306 		/*
10307 		 * If the target is not yet ready here (defined by a TUR
10308 		 * failure), invalidate the geometry and print an 'offline'
10309 		 * message. This is a legacy message, as the state of the
10310 		 * target is not actually changed to SD_STATE_OFFLINE.
10311 		 *
10312 		 * If the TUR fails for EACCES (Reservation Conflict),
10313 		 * SD_RESERVED_BY_OTHERS will be returned to indicate
10314 		 * reservation conflict. If the TUR fails for other
10315 		 * reasons, SD_NOT_READY_VALID will be returned.
10316 		 */
10317 		int err;
10318 
10319 		mutex_exit(SD_MUTEX(un));
10320 		err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10321 		mutex_enter(SD_MUTEX(un));
10322 
10323 		if (err != 0) {
10324 			mutex_exit(SD_MUTEX(un));
10325 			cmlb_invalidate(un->un_cmlbhandle,
10326 			    (void *)SD_PATH_DIRECT);
10327 			mutex_enter(SD_MUTEX(un));
10328 			if (err == EACCES) {
10329 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10330 				    "reservation conflict\n");
10331 				rval = SD_RESERVED_BY_OTHERS;
10332 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10333 			} else {
10334 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10335 				    "drive offline\n");
10336 				rval = SD_NOT_READY_VALID;
10337 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
10338 			}
10339 			goto done;
10340 		}
10341 	}
10342 
10343 	if (un->un_f_format_in_progress == FALSE) {
10344 		mutex_exit(SD_MUTEX(un));
10345 
10346 		(void) cmlb_validate(un->un_cmlbhandle, 0,
10347 		    (void *)SD_PATH_DIRECT);
10348 		if (cmlb_partinfo(un->un_cmlbhandle, part, NULL, NULL, NULL,
10349 		    NULL, (void *) SD_PATH_DIRECT) != 0) {
10350 			rval = SD_NOT_READY_VALID;
10351 			mutex_enter(SD_MUTEX(un));
10352 
10353 			goto done;
10354 		}
10355 		if (un->un_f_pkstats_enabled) {
10356 			sd_set_pstats(un);
10357 			SD_TRACE(SD_LOG_IO_PARTITION, un,
10358 			    "sd_ready_and_valid: un:0x%p pstats created and "
10359 			    "set\n", un);
10360 		}
10361 		mutex_enter(SD_MUTEX(un));
10362 	}
10363 
10364 	/*
10365 	 * If this device supports DOOR_LOCK command, try and send
10366 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10367 	 * if it fails. For a CD, however, it is an error
10368 	 */
10369 	if (un->un_f_doorlock_supported) {
10370 		mutex_exit(SD_MUTEX(un));
10371 		status = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
10372 		    SD_PATH_DIRECT);
10373 
10374 		if ((status != 0) && ISCD(un)) {
10375 			rval = SD_NOT_READY_VALID;
10376 			mutex_enter(SD_MUTEX(un));
10377 
10378 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10379 
10380 			goto done;
10381 		} else if (status != 0)
10382 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10383 		mutex_enter(SD_MUTEX(un));
10384 	}
10385 
10386 	/* The state has changed, inform the media watch routines */
10387 	un->un_mediastate = DKIO_INSERTED;
10388 	cv_broadcast(&un->un_state_cv);
10389 	rval = SD_READY_VALID;
10390 
10391 done:
10392 
10393 	/*
10394 	 * Initialize the capacity kstat value, if no media previously
10395 	 * (capacity kstat is 0) and a media has been inserted
10396 	 * (un_blockcount > 0).
10397 	 */
10398 	if (un->un_errstats != NULL) {
10399 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10400 		if ((stp->sd_capacity.value.ui64 == 0) &&
10401 		    (un->un_f_blockcount_is_valid == TRUE)) {
10402 			stp->sd_capacity.value.ui64 =
10403 			    (uint64_t)((uint64_t)un->un_blockcount *
10404 			    un->un_sys_blocksize);
10405 		}
10406 	}
10407 
10408 	mutex_exit(SD_MUTEX(un));
10409 	return (rval);
10410 }
10411 
10412 
10413 /*
10414  *    Function: sdmin
10415  *
10416  * Description: Routine to limit the size of a data transfer. Used in
10417  *		conjunction with physio(9F).
10418  *
10419  *   Arguments: bp - pointer to the indicated buf(9S) struct.
10420  *
10421  *     Context: Kernel thread context.
10422  */
10423 
10424 static void
10425 sdmin(struct buf *bp)
10426 {
10427 	struct sd_lun	*un;
10428 	int		instance;
10429 
10430 	instance = SDUNIT(bp->b_edev);
10431 
10432 	un = ddi_get_soft_state(sd_state, instance);
10433 	ASSERT(un != NULL);
10434 
10435 	/*
10436 	 * We depend on DMA partial or buf breakup to restrict
10437 	 * IO size if any of them enabled.
10438 	 */
10439 	if (un->un_partial_dma_supported ||
10440 	    un->un_buf_breakup_supported) {
10441 		return;
10442 	}
10443 
10444 	if (bp->b_bcount > un->un_max_xfer_size) {
10445 		bp->b_bcount = un->un_max_xfer_size;
10446 	}
10447 }
10448 
10449 
10450 /*
10451  *    Function: sdread
10452  *
10453  * Description: Driver's read(9e) entry point function.
10454  *
10455  *   Arguments: dev   - device number
10456  *		uio   - structure pointer describing where data is to be stored
10457  *			in user's space
10458  *		cred_p  - user credential pointer
10459  *
10460  * Return Code: ENXIO
10461  *		EIO
10462  *		EINVAL
10463  *		value returned by physio
10464  *
10465  *     Context: Kernel thread context.
10466  */
10467 /* ARGSUSED */
10468 static int
10469 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
10470 {
10471 	struct sd_lun	*un = NULL;
10472 	int		secmask;
10473 	int		err = 0;
10474 	sd_ssc_t	*ssc;
10475 
10476 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10477 		return (ENXIO);
10478 	}
10479 
10480 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10481 
10482 
10483 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10484 		mutex_enter(SD_MUTEX(un));
10485 		/*
10486 		 * Because the call to sd_ready_and_valid will issue I/O we
10487 		 * must wait here if either the device is suspended or
10488 		 * if it's power level is changing.
10489 		 */
10490 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10491 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10492 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10493 		}
10494 		un->un_ncmds_in_driver++;
10495 		mutex_exit(SD_MUTEX(un));
10496 
10497 		/* Initialize sd_ssc_t for internal uscsi commands */
10498 		ssc = sd_ssc_init(un);
10499 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10500 			err = EIO;
10501 		} else {
10502 			err = 0;
10503 		}
10504 		sd_ssc_fini(ssc);
10505 
10506 		mutex_enter(SD_MUTEX(un));
10507 		un->un_ncmds_in_driver--;
10508 		ASSERT(un->un_ncmds_in_driver >= 0);
10509 		mutex_exit(SD_MUTEX(un));
10510 		if (err != 0)
10511 			return (err);
10512 	}
10513 
10514 	/*
10515 	 * Read requests are restricted to multiples of the system block size.
10516 	 */
10517 	secmask = un->un_sys_blocksize - 1;
10518 
10519 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10520 		SD_ERROR(SD_LOG_READ_WRITE, un,
10521 		    "sdread: file offset not modulo %d\n",
10522 		    un->un_sys_blocksize);
10523 		err = EINVAL;
10524 	} else if (uio->uio_iov->iov_len & (secmask)) {
10525 		SD_ERROR(SD_LOG_READ_WRITE, un,
10526 		    "sdread: transfer length not modulo %d\n",
10527 		    un->un_sys_blocksize);
10528 		err = EINVAL;
10529 	} else {
10530 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
10531 	}
10532 
10533 	return (err);
10534 }
10535 
10536 
10537 /*
10538  *    Function: sdwrite
10539  *
10540  * Description: Driver's write(9e) entry point function.
10541  *
10542  *   Arguments: dev   - device number
10543  *		uio   - structure pointer describing where data is stored in
10544  *			user's space
10545  *		cred_p  - user credential pointer
10546  *
10547  * Return Code: ENXIO
10548  *		EIO
10549  *		EINVAL
10550  *		value returned by physio
10551  *
10552  *     Context: Kernel thread context.
10553  */
10554 /* ARGSUSED */
10555 static int
10556 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
10557 {
10558 	struct sd_lun	*un = NULL;
10559 	int		secmask;
10560 	int		err = 0;
10561 	sd_ssc_t	*ssc;
10562 
10563 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10564 		return (ENXIO);
10565 	}
10566 
10567 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10568 
10569 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10570 		mutex_enter(SD_MUTEX(un));
10571 		/*
10572 		 * Because the call to sd_ready_and_valid will issue I/O we
10573 		 * must wait here if either the device is suspended or
10574 		 * if it's power level is changing.
10575 		 */
10576 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10577 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10578 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10579 		}
10580 		un->un_ncmds_in_driver++;
10581 		mutex_exit(SD_MUTEX(un));
10582 
10583 		/* Initialize sd_ssc_t for internal uscsi commands */
10584 		ssc = sd_ssc_init(un);
10585 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10586 			err = EIO;
10587 		} else {
10588 			err = 0;
10589 		}
10590 		sd_ssc_fini(ssc);
10591 
10592 		mutex_enter(SD_MUTEX(un));
10593 		un->un_ncmds_in_driver--;
10594 		ASSERT(un->un_ncmds_in_driver >= 0);
10595 		mutex_exit(SD_MUTEX(un));
10596 		if (err != 0)
10597 			return (err);
10598 	}
10599 
10600 	/*
10601 	 * Write requests are restricted to multiples of the system block size.
10602 	 */
10603 	secmask = un->un_sys_blocksize - 1;
10604 
10605 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10606 		SD_ERROR(SD_LOG_READ_WRITE, un,
10607 		    "sdwrite: file offset not modulo %d\n",
10608 		    un->un_sys_blocksize);
10609 		err = EINVAL;
10610 	} else if (uio->uio_iov->iov_len & (secmask)) {
10611 		SD_ERROR(SD_LOG_READ_WRITE, un,
10612 		    "sdwrite: transfer length not modulo %d\n",
10613 		    un->un_sys_blocksize);
10614 		err = EINVAL;
10615 	} else {
10616 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
10617 	}
10618 
10619 	return (err);
10620 }
10621 
10622 
10623 /*
10624  *    Function: sdaread
10625  *
10626  * Description: Driver's aread(9e) entry point function.
10627  *
10628  *   Arguments: dev   - device number
10629  *		aio   - structure pointer describing where data is to be stored
10630  *		cred_p  - user credential pointer
10631  *
10632  * Return Code: ENXIO
10633  *		EIO
10634  *		EINVAL
10635  *		value returned by aphysio
10636  *
10637  *     Context: Kernel thread context.
10638  */
10639 /* ARGSUSED */
10640 static int
10641 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
10642 {
10643 	struct sd_lun	*un = NULL;
10644 	struct uio	*uio = aio->aio_uio;
10645 	int		secmask;
10646 	int		err = 0;
10647 	sd_ssc_t	*ssc;
10648 
10649 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10650 		return (ENXIO);
10651 	}
10652 
10653 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10654 
10655 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10656 		mutex_enter(SD_MUTEX(un));
10657 		/*
10658 		 * Because the call to sd_ready_and_valid will issue I/O we
10659 		 * must wait here if either the device is suspended or
10660 		 * if it's power level is changing.
10661 		 */
10662 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10663 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10664 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10665 		}
10666 		un->un_ncmds_in_driver++;
10667 		mutex_exit(SD_MUTEX(un));
10668 
10669 		/* Initialize sd_ssc_t for internal uscsi commands */
10670 		ssc = sd_ssc_init(un);
10671 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10672 			err = EIO;
10673 		} else {
10674 			err = 0;
10675 		}
10676 		sd_ssc_fini(ssc);
10677 
10678 		mutex_enter(SD_MUTEX(un));
10679 		un->un_ncmds_in_driver--;
10680 		ASSERT(un->un_ncmds_in_driver >= 0);
10681 		mutex_exit(SD_MUTEX(un));
10682 		if (err != 0)
10683 			return (err);
10684 	}
10685 
10686 	/*
10687 	 * Read requests are restricted to multiples of the system block size.
10688 	 */
10689 	secmask = un->un_sys_blocksize - 1;
10690 
10691 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10692 		SD_ERROR(SD_LOG_READ_WRITE, un,
10693 		    "sdaread: file offset not modulo %d\n",
10694 		    un->un_sys_blocksize);
10695 		err = EINVAL;
10696 	} else if (uio->uio_iov->iov_len & (secmask)) {
10697 		SD_ERROR(SD_LOG_READ_WRITE, un,
10698 		    "sdaread: transfer length not modulo %d\n",
10699 		    un->un_sys_blocksize);
10700 		err = EINVAL;
10701 	} else {
10702 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
10703 	}
10704 
10705 	return (err);
10706 }
10707 
10708 
10709 /*
10710  *    Function: sdawrite
10711  *
10712  * Description: Driver's awrite(9e) entry point function.
10713  *
10714  *   Arguments: dev   - device number
10715  *		aio   - structure pointer describing where data is stored
10716  *		cred_p  - user credential pointer
10717  *
10718  * Return Code: ENXIO
10719  *		EIO
10720  *		EINVAL
10721  *		value returned by aphysio
10722  *
10723  *     Context: Kernel thread context.
10724  */
10725 /* ARGSUSED */
10726 static int
10727 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
10728 {
10729 	struct sd_lun	*un = NULL;
10730 	struct uio	*uio = aio->aio_uio;
10731 	int		secmask;
10732 	int		err = 0;
10733 	sd_ssc_t	*ssc;
10734 
10735 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10736 		return (ENXIO);
10737 	}
10738 
10739 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10740 
10741 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10742 		mutex_enter(SD_MUTEX(un));
10743 		/*
10744 		 * Because the call to sd_ready_and_valid will issue I/O we
10745 		 * must wait here if either the device is suspended or
10746 		 * if it's power level is changing.
10747 		 */
10748 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10749 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10750 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10751 		}
10752 		un->un_ncmds_in_driver++;
10753 		mutex_exit(SD_MUTEX(un));
10754 
10755 		/* Initialize sd_ssc_t for internal uscsi commands */
10756 		ssc = sd_ssc_init(un);
10757 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10758 			err = EIO;
10759 		} else {
10760 			err = 0;
10761 		}
10762 		sd_ssc_fini(ssc);
10763 
10764 		mutex_enter(SD_MUTEX(un));
10765 		un->un_ncmds_in_driver--;
10766 		ASSERT(un->un_ncmds_in_driver >= 0);
10767 		mutex_exit(SD_MUTEX(un));
10768 		if (err != 0)
10769 			return (err);
10770 	}
10771 
10772 	/*
10773 	 * Write requests are restricted to multiples of the system block size.
10774 	 */
10775 	secmask = un->un_sys_blocksize - 1;
10776 
10777 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10778 		SD_ERROR(SD_LOG_READ_WRITE, un,
10779 		    "sdawrite: file offset not modulo %d\n",
10780 		    un->un_sys_blocksize);
10781 		err = EINVAL;
10782 	} else if (uio->uio_iov->iov_len & (secmask)) {
10783 		SD_ERROR(SD_LOG_READ_WRITE, un,
10784 		    "sdawrite: transfer length not modulo %d\n",
10785 		    un->un_sys_blocksize);
10786 		err = EINVAL;
10787 	} else {
10788 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
10789 	}
10790 
10791 	return (err);
10792 }
10793 
10794 
10795 
10796 
10797 
10798 /*
10799  * Driver IO processing follows the following sequence:
10800  *
10801  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
10802  *         |                |                     ^
10803  *         v                v                     |
10804  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
10805  *         |                |                     |                   |
10806  *         v                |                     |                   |
10807  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
10808  *         |                |                     ^                   ^
10809  *         v                v                     |                   |
10810  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
10811  *         |                |                     |                   |
10812  *     +---+                |                     +------------+      +-------+
10813  *     |                    |                                  |              |
10814  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10815  *     |                    v                                  |              |
10816  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
10817  *     |                    |                                  ^              |
10818  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10819  *     |                    v                                  |              |
10820  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
10821  *     |                    |                                  ^              |
10822  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10823  *     |                    v                                  |              |
10824  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
10825  *     |                    |                                  ^              |
10826  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
10827  *     |                    v                                  |              |
10828  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
10829  *     |                    |                                  ^              |
10830  *     |                    |                                  |              |
10831  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
10832  *                          |                           ^
10833  *                          v                           |
10834  *                   sd_core_iostart()                  |
10835  *                          |                           |
10836  *                          |                           +------>(*destroypkt)()
10837  *                          +-> sd_start_cmds() <-+     |           |
10838  *                          |                     |     |           v
10839  *                          |                     |     |  scsi_destroy_pkt(9F)
10840  *                          |                     |     |
10841  *                          +->(*initpkt)()       +- sdintr()
10842  *                          |  |                        |  |
10843  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
10844  *                          |  +-> scsi_setup_cdb(9F)   |
10845  *                          |                           |
10846  *                          +--> scsi_transport(9F)     |
10847  *                                     |                |
10848  *                                     +----> SCSA ---->+
10849  *
10850  *
10851  * This code is based upon the following presumptions:
10852  *
10853  *   - iostart and iodone functions operate on buf(9S) structures. These
10854  *     functions perform the necessary operations on the buf(9S) and pass
10855  *     them along to the next function in the chain by using the macros
10856  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
10857  *     (for iodone side functions).
10858  *
10859  *   - The iostart side functions may sleep. The iodone side functions
10860  *     are called under interrupt context and may NOT sleep. Therefore
10861  *     iodone side functions also may not call iostart side functions.
10862  *     (NOTE: iostart side functions should NOT sleep for memory, as
10863  *     this could result in deadlock.)
10864  *
10865  *   - An iostart side function may call its corresponding iodone side
10866  *     function directly (if necessary).
10867  *
10868  *   - In the event of an error, an iostart side function can return a buf(9S)
10869  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
10870  *     b_error in the usual way of course).
10871  *
10872  *   - The taskq mechanism may be used by the iodone side functions to dispatch
10873  *     requests to the iostart side functions.  The iostart side functions in
10874  *     this case would be called under the context of a taskq thread, so it's
10875  *     OK for them to block/sleep/spin in this case.
10876  *
10877  *   - iostart side functions may allocate "shadow" buf(9S) structs and
10878  *     pass them along to the next function in the chain.  The corresponding
10879  *     iodone side functions must coalesce the "shadow" bufs and return
10880  *     the "original" buf to the next higher layer.
10881  *
10882  *   - The b_private field of the buf(9S) struct holds a pointer to
10883  *     an sd_xbuf struct, which contains information needed to
10884  *     construct the scsi_pkt for the command.
10885  *
10886  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
10887  *     layer must acquire & release the SD_MUTEX(un) as needed.
10888  */
10889 
10890 
10891 /*
10892  * Create taskq for all targets in the system. This is created at
10893  * _init(9E) and destroyed at _fini(9E).
10894  *
10895  * Note: here we set the minalloc to a reasonably high number to ensure that
10896  * we will have an adequate supply of task entries available at interrupt time.
10897  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
10898  * sd_create_taskq().  Since we do not want to sleep for allocations at
10899  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
10900  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
10901  * requests any one instant in time.
10902  */
10903 #define	SD_TASKQ_NUMTHREADS	8
10904 #define	SD_TASKQ_MINALLOC	256
10905 #define	SD_TASKQ_MAXALLOC	256
10906 
10907 static taskq_t	*sd_tq = NULL;
10908 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
10909 
10910 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
10911 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
10912 
10913 /*
10914  * The following task queue is being created for the write part of
10915  * read-modify-write of non-512 block size devices.
10916  * Limit the number of threads to 1 for now. This number has been chosen
10917  * considering the fact that it applies only to dvd ram drives/MO drives
10918  * currently. Performance for which is not main criteria at this stage.
10919  * Note: It needs to be explored if we can use a single taskq in future
10920  */
10921 #define	SD_WMR_TASKQ_NUMTHREADS	1
10922 static taskq_t	*sd_wmr_tq = NULL;
10923 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
10924 
10925 /*
10926  *    Function: sd_taskq_create
10927  *
10928  * Description: Create taskq thread(s) and preallocate task entries
10929  *
10930  * Return Code: Returns a pointer to the allocated taskq_t.
10931  *
10932  *     Context: Can sleep. Requires blockable context.
10933  *
10934  *       Notes: - The taskq() facility currently is NOT part of the DDI.
10935  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
10936  *		- taskq_create() will block for memory, also it will panic
10937  *		  if it cannot create the requested number of threads.
10938  *		- Currently taskq_create() creates threads that cannot be
10939  *		  swapped.
10940  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
10941  *		  supply of taskq entries at interrupt time (ie, so that we
10942  *		  do not have to sleep for memory)
10943  */
10944 
10945 static void
10946 sd_taskq_create(void)
10947 {
10948 	char	taskq_name[TASKQ_NAMELEN];
10949 
10950 	ASSERT(sd_tq == NULL);
10951 	ASSERT(sd_wmr_tq == NULL);
10952 
10953 	(void) snprintf(taskq_name, sizeof (taskq_name),
10954 	    "%s_drv_taskq", sd_label);
10955 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
10956 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
10957 	    TASKQ_PREPOPULATE));
10958 
10959 	(void) snprintf(taskq_name, sizeof (taskq_name),
10960 	    "%s_rmw_taskq", sd_label);
10961 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
10962 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
10963 	    TASKQ_PREPOPULATE));
10964 }
10965 
10966 
10967 /*
10968  *    Function: sd_taskq_delete
10969  *
10970  * Description: Complementary cleanup routine for sd_taskq_create().
10971  *
10972  *     Context: Kernel thread context.
10973  */
10974 
10975 static void
10976 sd_taskq_delete(void)
10977 {
10978 	ASSERT(sd_tq != NULL);
10979 	ASSERT(sd_wmr_tq != NULL);
10980 	taskq_destroy(sd_tq);
10981 	taskq_destroy(sd_wmr_tq);
10982 	sd_tq = NULL;
10983 	sd_wmr_tq = NULL;
10984 }
10985 
10986 
10987 /*
10988  *    Function: sdstrategy
10989  *
10990  * Description: Driver's strategy (9E) entry point function.
10991  *
10992  *   Arguments: bp - pointer to buf(9S)
10993  *
10994  * Return Code: Always returns zero
10995  *
10996  *     Context: Kernel thread context.
10997  */
10998 
10999 static int
11000 sdstrategy(struct buf *bp)
11001 {
11002 	struct sd_lun *un;
11003 
11004 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11005 	if (un == NULL) {
11006 		bioerror(bp, EIO);
11007 		bp->b_resid = bp->b_bcount;
11008 		biodone(bp);
11009 		return (0);
11010 	}
11011 	/* As was done in the past, fail new cmds. if state is dumping. */
11012 	if (un->un_state == SD_STATE_DUMPING) {
11013 		bioerror(bp, ENXIO);
11014 		bp->b_resid = bp->b_bcount;
11015 		biodone(bp);
11016 		return (0);
11017 	}
11018 
11019 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11020 
11021 	/*
11022 	 * Commands may sneak in while we released the mutex in
11023 	 * DDI_SUSPEND, we should block new commands. However, old
11024 	 * commands that are still in the driver at this point should
11025 	 * still be allowed to drain.
11026 	 */
11027 	mutex_enter(SD_MUTEX(un));
11028 	/*
11029 	 * Must wait here if either the device is suspended or
11030 	 * if it's power level is changing.
11031 	 */
11032 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11033 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11034 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11035 	}
11036 
11037 	un->un_ncmds_in_driver++;
11038 
11039 	/*
11040 	 * atapi: Since we are running the CD for now in PIO mode we need to
11041 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11042 	 * the HBA's init_pkt routine.
11043 	 */
11044 	if (un->un_f_cfg_is_atapi == TRUE) {
11045 		mutex_exit(SD_MUTEX(un));
11046 		bp_mapin(bp);
11047 		mutex_enter(SD_MUTEX(un));
11048 	}
11049 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11050 	    un->un_ncmds_in_driver);
11051 
11052 	if (bp->b_flags & B_WRITE)
11053 		un->un_f_sync_cache_required = TRUE;
11054 
11055 	mutex_exit(SD_MUTEX(un));
11056 
11057 	/*
11058 	 * This will (eventually) allocate the sd_xbuf area and
11059 	 * call sd_xbuf_strategy().  We just want to return the
11060 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11061 	 * imized tail call which saves us a stack frame.
11062 	 */
11063 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11064 }
11065 
11066 
11067 /*
11068  *    Function: sd_xbuf_strategy
11069  *
11070  * Description: Function for initiating IO operations via the
11071  *		ddi_xbuf_qstrategy() mechanism.
11072  *
11073  *     Context: Kernel thread context.
11074  */
11075 
11076 static void
11077 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11078 {
11079 	struct sd_lun *un = arg;
11080 
11081 	ASSERT(bp != NULL);
11082 	ASSERT(xp != NULL);
11083 	ASSERT(un != NULL);
11084 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11085 
11086 	/*
11087 	 * Initialize the fields in the xbuf and save a pointer to the
11088 	 * xbuf in bp->b_private.
11089 	 */
11090 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11091 
11092 	/* Send the buf down the iostart chain */
11093 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11094 }
11095 
11096 
11097 /*
11098  *    Function: sd_xbuf_init
11099  *
11100  * Description: Prepare the given sd_xbuf struct for use.
11101  *
11102  *   Arguments: un - ptr to softstate
11103  *		bp - ptr to associated buf(9S)
11104  *		xp - ptr to associated sd_xbuf
11105  *		chain_type - IO chain type to use:
11106  *			SD_CHAIN_NULL
11107  *			SD_CHAIN_BUFIO
11108  *			SD_CHAIN_USCSI
11109  *			SD_CHAIN_DIRECT
11110  *			SD_CHAIN_DIRECT_PRIORITY
11111  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11112  *			initialization; may be NULL if none.
11113  *
11114  *     Context: Kernel thread context
11115  */
11116 
11117 static void
11118 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11119 	uchar_t chain_type, void *pktinfop)
11120 {
11121 	int index;
11122 
11123 	ASSERT(un != NULL);
11124 	ASSERT(bp != NULL);
11125 	ASSERT(xp != NULL);
11126 
11127 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11128 	    bp, chain_type);
11129 
11130 	xp->xb_un	= un;
11131 	xp->xb_pktp	= NULL;
11132 	xp->xb_pktinfo	= pktinfop;
11133 	xp->xb_private	= bp->b_private;
11134 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11135 
11136 	/*
11137 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11138 	 * upon the specified chain type to use.
11139 	 */
11140 	switch (chain_type) {
11141 	case SD_CHAIN_NULL:
11142 		/*
11143 		 * Fall thru to just use the values for the buf type, even
11144 		 * tho for the NULL chain these values will never be used.
11145 		 */
11146 		/* FALLTHRU */
11147 	case SD_CHAIN_BUFIO:
11148 		index = un->un_buf_chain_type;
11149 		break;
11150 	case SD_CHAIN_USCSI:
11151 		index = un->un_uscsi_chain_type;
11152 		break;
11153 	case SD_CHAIN_DIRECT:
11154 		index = un->un_direct_chain_type;
11155 		break;
11156 	case SD_CHAIN_DIRECT_PRIORITY:
11157 		index = un->un_priority_chain_type;
11158 		break;
11159 	default:
11160 		/* We're really broken if we ever get here... */
11161 		panic("sd_xbuf_init: illegal chain type!");
11162 		/*NOTREACHED*/
11163 	}
11164 
11165 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11166 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11167 
11168 	/*
11169 	 * It might be a bit easier to simply bzero the entire xbuf above,
11170 	 * but it turns out that since we init a fair number of members anyway,
11171 	 * we save a fair number cycles by doing explicit assignment of zero.
11172 	 */
11173 	xp->xb_pkt_flags	= 0;
11174 	xp->xb_dma_resid	= 0;
11175 	xp->xb_retry_count	= 0;
11176 	xp->xb_victim_retry_count = 0;
11177 	xp->xb_ua_retry_count	= 0;
11178 	xp->xb_nr_retry_count	= 0;
11179 	xp->xb_sense_bp		= NULL;
11180 	xp->xb_sense_status	= 0;
11181 	xp->xb_sense_state	= 0;
11182 	xp->xb_sense_resid	= 0;
11183 	xp->xb_ena		= 0;
11184 
11185 	bp->b_private	= xp;
11186 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11187 	bp->b_resid	= 0;
11188 	bp->av_forw	= NULL;
11189 	bp->av_back	= NULL;
11190 	bioerror(bp, 0);
11191 
11192 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11193 }
11194 
11195 
11196 /*
11197  *    Function: sd_uscsi_strategy
11198  *
11199  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11200  *
11201  *   Arguments: bp - buf struct ptr
11202  *
11203  * Return Code: Always returns 0
11204  *
11205  *     Context: Kernel thread context
11206  */
11207 
11208 static int
11209 sd_uscsi_strategy(struct buf *bp)
11210 {
11211 	struct sd_lun		*un;
11212 	struct sd_uscsi_info	*uip;
11213 	struct sd_xbuf		*xp;
11214 	uchar_t			chain_type;
11215 	uchar_t			cmd;
11216 
11217 	ASSERT(bp != NULL);
11218 
11219 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11220 	if (un == NULL) {
11221 		bioerror(bp, EIO);
11222 		bp->b_resid = bp->b_bcount;
11223 		biodone(bp);
11224 		return (0);
11225 	}
11226 
11227 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11228 
11229 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11230 
11231 	/*
11232 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11233 	 */
11234 	ASSERT(bp->b_private != NULL);
11235 	uip = (struct sd_uscsi_info *)bp->b_private;
11236 	cmd = ((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_cdb[0];
11237 
11238 	mutex_enter(SD_MUTEX(un));
11239 	/*
11240 	 * atapi: Since we are running the CD for now in PIO mode we need to
11241 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11242 	 * the HBA's init_pkt routine.
11243 	 */
11244 	if (un->un_f_cfg_is_atapi == TRUE) {
11245 		mutex_exit(SD_MUTEX(un));
11246 		bp_mapin(bp);
11247 		mutex_enter(SD_MUTEX(un));
11248 	}
11249 	un->un_ncmds_in_driver++;
11250 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11251 	    un->un_ncmds_in_driver);
11252 
11253 	if ((bp->b_flags & B_WRITE) && (bp->b_bcount != 0) &&
11254 	    (cmd != SCMD_MODE_SELECT) && (cmd != SCMD_MODE_SELECT_G1))
11255 		un->un_f_sync_cache_required = TRUE;
11256 
11257 	mutex_exit(SD_MUTEX(un));
11258 
11259 	switch (uip->ui_flags) {
11260 	case SD_PATH_DIRECT:
11261 		chain_type = SD_CHAIN_DIRECT;
11262 		break;
11263 	case SD_PATH_DIRECT_PRIORITY:
11264 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11265 		break;
11266 	default:
11267 		chain_type = SD_CHAIN_USCSI;
11268 		break;
11269 	}
11270 
11271 	/*
11272 	 * We may allocate extra buf for external USCSI commands. If the
11273 	 * application asks for bigger than 20-byte sense data via USCSI,
11274 	 * SCSA layer will allocate 252 bytes sense buf for that command.
11275 	 */
11276 	if (((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_rqlen >
11277 	    SENSE_LENGTH) {
11278 		xp = kmem_zalloc(sizeof (struct sd_xbuf) - SENSE_LENGTH +
11279 		    MAX_SENSE_LENGTH, KM_SLEEP);
11280 	} else {
11281 		xp = kmem_zalloc(sizeof (struct sd_xbuf), KM_SLEEP);
11282 	}
11283 
11284 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11285 
11286 	/* Use the index obtained within xbuf_init */
11287 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11288 
11289 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11290 
11291 	return (0);
11292 }
11293 
11294 /*
11295  *    Function: sd_send_scsi_cmd
11296  *
11297  * Description: Runs a USCSI command for user (when called thru sdioctl),
11298  *		or for the driver
11299  *
11300  *   Arguments: dev - the dev_t for the device
11301  *		incmd - ptr to a valid uscsi_cmd struct
11302  *		flag - bit flag, indicating open settings, 32/64 bit type
11303  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11304  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11305  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11306  *			to use the USCSI "direct" chain and bypass the normal
11307  *			command waitq.
11308  *
11309  * Return Code: 0 -  successful completion of the given command
11310  *		EIO - scsi_uscsi_handle_command() failed
11311  *		ENXIO  - soft state not found for specified dev
11312  *		EINVAL
11313  *		EFAULT - copyin/copyout error
11314  *		return code of scsi_uscsi_handle_command():
11315  *			EIO
11316  *			ENXIO
11317  *			EACCES
11318  *
11319  *     Context: Waits for command to complete. Can sleep.
11320  */
11321 
11322 static int
11323 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
11324 	enum uio_seg dataspace, int path_flag)
11325 {
11326 	struct sd_lun	*un;
11327 	sd_ssc_t	*ssc;
11328 	int		rval;
11329 
11330 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11331 	if (un == NULL) {
11332 		return (ENXIO);
11333 	}
11334 
11335 	/*
11336 	 * Using sd_ssc_send to handle uscsi cmd
11337 	 */
11338 	ssc = sd_ssc_init(un);
11339 	rval = sd_ssc_send(ssc, incmd, flag, dataspace, path_flag);
11340 	sd_ssc_fini(ssc);
11341 
11342 	return (rval);
11343 }
11344 
11345 /*
11346  *    Function: sd_ssc_init
11347  *
11348  * Description: Uscsi end-user call this function to initialize necessary
11349  *              fields, such as uscsi_cmd and sd_uscsi_info struct.
11350  *
11351  *              The return value of sd_send_scsi_cmd will be treated as a
11352  *              fault in various conditions. Even it is not Zero, some
11353  *              callers may ignore the return value. That is to say, we can
11354  *              not make an accurate assessment in sdintr, since if a
11355  *              command is failed in sdintr it does not mean the caller of
11356  *              sd_send_scsi_cmd will treat it as a real failure.
11357  *
11358  *              To avoid printing too many error logs for a failed uscsi
11359  *              packet that the caller may not treat it as a failure, the
11360  *              sd will keep silent for handling all uscsi commands.
11361  *
11362  *              During detach->attach and attach-open, for some types of
11363  *              problems, the driver should be providing information about
11364  *              the problem encountered. Device use USCSI_SILENT, which
11365  *              suppresses all driver information. The result is that no
11366  *              information about the problem is available. Being
11367  *              completely silent during this time is inappropriate. The
11368  *              driver needs a more selective filter than USCSI_SILENT, so
11369  *              that information related to faults is provided.
11370  *
11371  *              To make the accurate accessment, the caller  of
11372  *              sd_send_scsi_USCSI_CMD should take the ownership and
11373  *              get necessary information to print error messages.
11374  *
11375  *              If we want to print necessary info of uscsi command, we need to
11376  *              keep the uscsi_cmd and sd_uscsi_info till we can make the
11377  *              assessment. We use sd_ssc_init to alloc necessary
11378  *              structs for sending an uscsi command and we are also
11379  *              responsible for free the memory by calling
11380  *              sd_ssc_fini.
11381  *
11382  *              The calling secquences will look like:
11383  *              sd_ssc_init->
11384  *
11385  *                  ...
11386  *
11387  *                  sd_send_scsi_USCSI_CMD->
11388  *                      sd_ssc_send-> - - - sdintr
11389  *                  ...
11390  *
11391  *                  if we think the return value should be treated as a
11392  *                  failure, we make the accessment here and print out
11393  *                  necessary by retrieving uscsi_cmd and sd_uscsi_info'
11394  *
11395  *                  ...
11396  *
11397  *              sd_ssc_fini
11398  *
11399  *
11400  *   Arguments: un - pointer to driver soft state (unit) structure for this
11401  *                   target.
11402  *
11403  * Return code: sd_ssc_t - pointer to allocated sd_ssc_t struct, it contains
11404  *                         uscsi_cmd and sd_uscsi_info.
11405  *                  NULL - if can not alloc memory for sd_ssc_t struct
11406  *
11407  *     Context: Kernel Thread.
11408  */
11409 static sd_ssc_t *
11410 sd_ssc_init(struct sd_lun *un)
11411 {
11412 	sd_ssc_t		*ssc;
11413 	struct uscsi_cmd	*ucmdp;
11414 	struct sd_uscsi_info	*uip;
11415 
11416 	ASSERT(un != NULL);
11417 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11418 
11419 	/*
11420 	 * Allocate sd_ssc_t structure
11421 	 */
11422 	ssc = kmem_zalloc(sizeof (sd_ssc_t), KM_SLEEP);
11423 
11424 	/*
11425 	 * Allocate uscsi_cmd by calling scsi_uscsi_alloc common routine
11426 	 */
11427 	ucmdp = scsi_uscsi_alloc();
11428 
11429 	/*
11430 	 * Allocate sd_uscsi_info structure
11431 	 */
11432 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11433 
11434 	ssc->ssc_uscsi_cmd = ucmdp;
11435 	ssc->ssc_uscsi_info = uip;
11436 	ssc->ssc_un = un;
11437 
11438 	return (ssc);
11439 }
11440 
11441 /*
11442  * Function: sd_ssc_fini
11443  *
11444  * Description: To free sd_ssc_t and it's hanging off
11445  *
11446  * Arguments: ssc - struct pointer of sd_ssc_t.
11447  */
11448 static void
11449 sd_ssc_fini(sd_ssc_t *ssc)
11450 {
11451 	scsi_uscsi_free(ssc->ssc_uscsi_cmd);
11452 
11453 	if (ssc->ssc_uscsi_info != NULL) {
11454 		kmem_free(ssc->ssc_uscsi_info, sizeof (struct sd_uscsi_info));
11455 		ssc->ssc_uscsi_info = NULL;
11456 	}
11457 
11458 	kmem_free(ssc, sizeof (sd_ssc_t));
11459 	ssc = NULL;
11460 }
11461 
11462 /*
11463  * Function: sd_ssc_send
11464  *
11465  * Description: Runs a USCSI command for user when called through sdioctl,
11466  *              or for the driver.
11467  *
11468  *   Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11469  *                    sd_uscsi_info in.
11470  *		incmd - ptr to a valid uscsi_cmd struct
11471  *		flag - bit flag, indicating open settings, 32/64 bit type
11472  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11473  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11474  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11475  *			to use the USCSI "direct" chain and bypass the normal
11476  *			command waitq.
11477  *
11478  * Return Code: 0 -  successful completion of the given command
11479  *		EIO - scsi_uscsi_handle_command() failed
11480  *		ENXIO  - soft state not found for specified dev
11481  *		EINVAL
11482  *		EFAULT - copyin/copyout error
11483  *		return code of scsi_uscsi_handle_command():
11484  *			EIO
11485  *			ENXIO
11486  *			EACCES
11487  *
11488  *     Context: Kernel Thread;
11489  *              Waits for command to complete. Can sleep.
11490  */
11491 static int
11492 sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd, int flag,
11493 	enum uio_seg dataspace, int path_flag)
11494 {
11495 	struct sd_uscsi_info	*uip;
11496 	struct uscsi_cmd	*uscmd;
11497 	struct sd_lun		*un;
11498 	dev_t			dev;
11499 
11500 	int	format = 0;
11501 	int	rval;
11502 
11503 	ASSERT(ssc != NULL);
11504 	un = ssc->ssc_un;
11505 	ASSERT(un != NULL);
11506 	uscmd = ssc->ssc_uscsi_cmd;
11507 	ASSERT(uscmd != NULL);
11508 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11509 	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
11510 		/*
11511 		 * If enter here, it indicates that the previous uscsi
11512 		 * command has not been processed by sd_ssc_assessment.
11513 		 * This is violating our rules of FMA telemetry processing.
11514 		 * We should print out this message and the last undisposed
11515 		 * uscsi command.
11516 		 */
11517 		if (uscmd->uscsi_cdb != NULL) {
11518 			SD_INFO(SD_LOG_SDTEST, un,
11519 			    "sd_ssc_send is missing the alternative "
11520 			    "sd_ssc_assessment when running command 0x%x.\n",
11521 			    uscmd->uscsi_cdb[0]);
11522 		}
11523 		/*
11524 		 * Set the ssc_flags to SSC_FLAGS_UNKNOWN, which should be
11525 		 * the initial status.
11526 		 */
11527 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11528 	}
11529 
11530 	/*
11531 	 * We need to make sure sd_ssc_send will have sd_ssc_assessment
11532 	 * followed to avoid missing FMA telemetries.
11533 	 */
11534 	ssc->ssc_flags |= SSC_FLAGS_NEED_ASSESSMENT;
11535 
11536 #ifdef SDDEBUG
11537 	switch (dataspace) {
11538 	case UIO_USERSPACE:
11539 		SD_TRACE(SD_LOG_IO, un,
11540 		    "sd_ssc_send: entry: un:0x%p UIO_USERSPACE\n", un);
11541 		break;
11542 	case UIO_SYSSPACE:
11543 		SD_TRACE(SD_LOG_IO, un,
11544 		    "sd_ssc_send: entry: un:0x%p UIO_SYSSPACE\n", un);
11545 		break;
11546 	default:
11547 		SD_TRACE(SD_LOG_IO, un,
11548 		    "sd_ssc_send: entry: un:0x%p UNEXPECTED SPACE\n", un);
11549 		break;
11550 	}
11551 #endif
11552 
11553 	rval = scsi_uscsi_copyin((intptr_t)incmd, flag,
11554 	    SD_ADDRESS(un), &uscmd);
11555 	if (rval != 0) {
11556 		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
11557 		    "scsi_uscsi_alloc_and_copyin failed\n", un);
11558 		return (rval);
11559 	}
11560 
11561 	if ((uscmd->uscsi_cdb != NULL) &&
11562 	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
11563 		mutex_enter(SD_MUTEX(un));
11564 		un->un_f_format_in_progress = TRUE;
11565 		mutex_exit(SD_MUTEX(un));
11566 		format = 1;
11567 	}
11568 
11569 	/*
11570 	 * Allocate an sd_uscsi_info struct and fill it with the info
11571 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11572 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
11573 	 * since we allocate the buf here in this function, we do not
11574 	 * need to preserve the prior contents of b_private.
11575 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
11576 	 */
11577 	uip = ssc->ssc_uscsi_info;
11578 	uip->ui_flags = path_flag;
11579 	uip->ui_cmdp = uscmd;
11580 
11581 	/*
11582 	 * Commands sent with priority are intended for error recovery
11583 	 * situations, and do not have retries performed.
11584 	 */
11585 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
11586 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
11587 	}
11588 	uscmd->uscsi_flags &= ~USCSI_NOINTR;
11589 
11590 	dev = SD_GET_DEV(un);
11591 	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
11592 	    sd_uscsi_strategy, NULL, uip);
11593 
11594 	/*
11595 	 * mark ssc_flags right after handle_cmd to make sure
11596 	 * the uscsi has been sent
11597 	 */
11598 	ssc->ssc_flags |= SSC_FLAGS_CMD_ISSUED;
11599 
11600 #ifdef SDDEBUG
11601 	SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
11602 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
11603 	    uscmd->uscsi_status, uscmd->uscsi_resid);
11604 	if (uscmd->uscsi_bufaddr != NULL) {
11605 		SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
11606 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
11607 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
11608 		if (dataspace == UIO_SYSSPACE) {
11609 			SD_DUMP_MEMORY(un, SD_LOG_IO,
11610 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
11611 			    uscmd->uscsi_buflen, SD_LOG_HEX);
11612 		}
11613 	}
11614 #endif
11615 
11616 	if (format == 1) {
11617 		mutex_enter(SD_MUTEX(un));
11618 		un->un_f_format_in_progress = FALSE;
11619 		mutex_exit(SD_MUTEX(un));
11620 	}
11621 
11622 	(void) scsi_uscsi_copyout((intptr_t)incmd, uscmd);
11623 
11624 	return (rval);
11625 }
11626 
11627 /*
11628  *     Function: sd_ssc_print
11629  *
11630  * Description: Print information available to the console.
11631  *
11632  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11633  *                    sd_uscsi_info in.
11634  *            sd_severity - log level.
11635  *     Context: Kernel thread or interrupt context.
11636  */
11637 static void
11638 sd_ssc_print(sd_ssc_t *ssc, int sd_severity)
11639 {
11640 	struct uscsi_cmd	*ucmdp;
11641 	struct scsi_device	*devp;
11642 	dev_info_t 		*devinfo;
11643 	uchar_t			*sensep;
11644 	int			senlen;
11645 	union scsi_cdb		*cdbp;
11646 	uchar_t			com;
11647 	extern struct scsi_key_strings scsi_cmds[];
11648 
11649 	ASSERT(ssc != NULL);
11650 	ASSERT(ssc->ssc_un != NULL);
11651 
11652 	if (SD_FM_LOG(ssc->ssc_un) != SD_FM_LOG_EREPORT)
11653 		return;
11654 	ucmdp = ssc->ssc_uscsi_cmd;
11655 	devp = SD_SCSI_DEVP(ssc->ssc_un);
11656 	devinfo = SD_DEVINFO(ssc->ssc_un);
11657 	ASSERT(ucmdp != NULL);
11658 	ASSERT(devp != NULL);
11659 	ASSERT(devinfo != NULL);
11660 	sensep = (uint8_t *)ucmdp->uscsi_rqbuf;
11661 	senlen = ucmdp->uscsi_rqlen - ucmdp->uscsi_rqresid;
11662 	cdbp = (union scsi_cdb *)ucmdp->uscsi_cdb;
11663 
11664 	/* In certain case (like DOORLOCK), the cdb could be NULL. */
11665 	if (cdbp == NULL)
11666 		return;
11667 	/* We don't print log if no sense data available. */
11668 	if (senlen == 0)
11669 		sensep = NULL;
11670 	com = cdbp->scc_cmd;
11671 	scsi_generic_errmsg(devp, sd_label, sd_severity, 0, 0, com,
11672 	    scsi_cmds, sensep, ssc->ssc_un->un_additional_codes, NULL);
11673 }
11674 
11675 /*
11676  *     Function: sd_ssc_assessment
11677  *
11678  * Description: We use this function to make an assessment at the point
11679  *              where SD driver may encounter a potential error.
11680  *
11681  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11682  *                  sd_uscsi_info in.
11683  *            tp_assess - a hint of strategy for ereport posting.
11684  *            Possible values of tp_assess include:
11685  *                SD_FMT_IGNORE - we don't post any ereport because we're
11686  *                sure that it is ok to ignore the underlying problems.
11687  *                SD_FMT_IGNORE_COMPROMISE - we don't post any ereport for now
11688  *                but it might be not correct to ignore the underlying hardware
11689  *                error.
11690  *                SD_FMT_STATUS_CHECK - we will post an ereport with the
11691  *                payload driver-assessment of value "fail" or
11692  *                "fatal"(depending on what information we have here). This
11693  *                assessment value is usually set when SD driver think there
11694  *                is a potential error occurred(Typically, when return value
11695  *                of the SCSI command is EIO).
11696  *                SD_FMT_STANDARD - we will post an ereport with the payload
11697  *                driver-assessment of value "info". This assessment value is
11698  *                set when the SCSI command returned successfully and with
11699  *                sense data sent back.
11700  *
11701  *     Context: Kernel thread.
11702  */
11703 static void
11704 sd_ssc_assessment(sd_ssc_t *ssc, enum sd_type_assessment tp_assess)
11705 {
11706 	int senlen = 0;
11707 	struct uscsi_cmd *ucmdp = NULL;
11708 	struct sd_lun *un;
11709 
11710 	ASSERT(ssc != NULL);
11711 	un = ssc->ssc_un;
11712 	ASSERT(un != NULL);
11713 	ucmdp = ssc->ssc_uscsi_cmd;
11714 	ASSERT(ucmdp != NULL);
11715 
11716 	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
11717 		ssc->ssc_flags &= ~SSC_FLAGS_NEED_ASSESSMENT;
11718 	} else {
11719 		/*
11720 		 * If enter here, it indicates that we have a wrong
11721 		 * calling sequence of sd_ssc_send and sd_ssc_assessment,
11722 		 * both of which should be called in a pair in case of
11723 		 * loss of FMA telemetries.
11724 		 */
11725 		if (ucmdp->uscsi_cdb != NULL) {
11726 			SD_INFO(SD_LOG_SDTEST, un,
11727 			    "sd_ssc_assessment is missing the "
11728 			    "alternative sd_ssc_send when running 0x%x, "
11729 			    "or there are superfluous sd_ssc_assessment for "
11730 			    "the same sd_ssc_send.\n",
11731 			    ucmdp->uscsi_cdb[0]);
11732 		}
11733 		/*
11734 		 * Set the ssc_flags to the initial value to avoid passing
11735 		 * down dirty flags to the following sd_ssc_send function.
11736 		 */
11737 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11738 		return;
11739 	}
11740 
11741 	/*
11742 	 * Only handle an issued command which is waiting for assessment.
11743 	 * A command which is not issued will not have
11744 	 * SSC_FLAGS_INVALID_DATA set, so it'ok we just return here.
11745 	 */
11746 	if (!(ssc->ssc_flags & SSC_FLAGS_CMD_ISSUED)) {
11747 		sd_ssc_print(ssc, SCSI_ERR_INFO);
11748 		return;
11749 	} else {
11750 		/*
11751 		 * For an issued command, we should clear this flag in
11752 		 * order to make the sd_ssc_t structure be used off
11753 		 * multiple uscsi commands.
11754 		 */
11755 		ssc->ssc_flags &= ~SSC_FLAGS_CMD_ISSUED;
11756 	}
11757 
11758 	/*
11759 	 * We will not deal with non-retryable(flag USCSI_DIAGNOSE set)
11760 	 * commands here. And we should clear the ssc_flags before return.
11761 	 */
11762 	if (ucmdp->uscsi_flags & USCSI_DIAGNOSE) {
11763 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11764 		return;
11765 	}
11766 
11767 	switch (tp_assess) {
11768 	case SD_FMT_IGNORE:
11769 	case SD_FMT_IGNORE_COMPROMISE:
11770 		break;
11771 	case SD_FMT_STATUS_CHECK:
11772 		/*
11773 		 * For a failed command(including the succeeded command
11774 		 * with invalid data sent back).
11775 		 */
11776 		sd_ssc_post(ssc, SD_FM_DRV_FATAL);
11777 		break;
11778 	case SD_FMT_STANDARD:
11779 		/*
11780 		 * Always for the succeeded commands probably with sense
11781 		 * data sent back.
11782 		 * Limitation:
11783 		 *	We can only handle a succeeded command with sense
11784 		 *	data sent back when auto-request-sense is enabled.
11785 		 */
11786 		senlen = ssc->ssc_uscsi_cmd->uscsi_rqlen -
11787 		    ssc->ssc_uscsi_cmd->uscsi_rqresid;
11788 		if ((ssc->ssc_uscsi_info->ui_pkt_state & STATE_ARQ_DONE) &&
11789 		    (un->un_f_arq_enabled == TRUE) &&
11790 		    senlen > 0 &&
11791 		    ssc->ssc_uscsi_cmd->uscsi_rqbuf != NULL) {
11792 			sd_ssc_post(ssc, SD_FM_DRV_NOTICE);
11793 		}
11794 		break;
11795 	default:
11796 		/*
11797 		 * Should not have other type of assessment.
11798 		 */
11799 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
11800 		    "sd_ssc_assessment got wrong "
11801 		    "sd_type_assessment %d.\n", tp_assess);
11802 		break;
11803 	}
11804 	/*
11805 	 * Clear up the ssc_flags before return.
11806 	 */
11807 	ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11808 }
11809 
11810 /*
11811  *    Function: sd_ssc_post
11812  *
11813  * Description: 1. read the driver property to get fm-scsi-log flag.
11814  *              2. print log if fm_log_capable is non-zero.
11815  *              3. call sd_ssc_ereport_post to post ereport if possible.
11816  *
11817  *    Context: May be called from kernel thread or interrupt context.
11818  */
11819 static void
11820 sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess)
11821 {
11822 	struct sd_lun	*un;
11823 	int		sd_severity;
11824 
11825 	ASSERT(ssc != NULL);
11826 	un = ssc->ssc_un;
11827 	ASSERT(un != NULL);
11828 
11829 	/*
11830 	 * We may enter here from sd_ssc_assessment(for USCSI command) or
11831 	 * by directly called from sdintr context.
11832 	 * We don't handle a non-disk drive(CD-ROM, removable media).
11833 	 * Clear the ssc_flags before return in case we've set
11834 	 * SSC_FLAGS_INVALID_XXX which should be skipped for a non-disk
11835 	 * driver.
11836 	 */
11837 	if (ISCD(un) || un->un_f_has_removable_media) {
11838 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11839 		return;
11840 	}
11841 
11842 	switch (sd_assess) {
11843 		case SD_FM_DRV_FATAL:
11844 			sd_severity = SCSI_ERR_FATAL;
11845 			break;
11846 		case SD_FM_DRV_RECOVERY:
11847 			sd_severity = SCSI_ERR_RECOVERED;
11848 			break;
11849 		case SD_FM_DRV_RETRY:
11850 			sd_severity = SCSI_ERR_RETRYABLE;
11851 			break;
11852 		case SD_FM_DRV_NOTICE:
11853 			sd_severity = SCSI_ERR_INFO;
11854 			break;
11855 		default:
11856 			sd_severity = SCSI_ERR_UNKNOWN;
11857 	}
11858 	/* print log */
11859 	sd_ssc_print(ssc, sd_severity);
11860 
11861 	/* always post ereport */
11862 	sd_ssc_ereport_post(ssc, sd_assess);
11863 }
11864 
11865 /*
11866  *    Function: sd_ssc_set_info
11867  *
11868  * Description: Mark ssc_flags and set ssc_info which would be the
11869  *              payload of uderr ereport. This function will cause
11870  *              sd_ssc_ereport_post to post uderr ereport only.
11871  *              Besides, when ssc_flags == SSC_FLAGS_INVALID_DATA(USCSI),
11872  *              the function will also call SD_ERROR or scsi_log for a
11873  *              CDROM/removable-media/DDI_FM_NOT_CAPABLE device.
11874  *
11875  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11876  *                  sd_uscsi_info in.
11877  *            ssc_flags - indicate the sub-category of a uderr.
11878  *            comp - this argument is meaningful only when
11879  *                   ssc_flags == SSC_FLAGS_INVALID_DATA, and its possible
11880  *                   values include:
11881  *                   > 0, SD_ERROR is used with comp as the driver logging
11882  *                   component;
11883  *                   = 0, scsi-log is used to log error telemetries;
11884  *                   < 0, no log available for this telemetry.
11885  *
11886  *    Context: Kernel thread or interrupt context
11887  */
11888 static void
11889 sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp, const char *fmt, ...)
11890 {
11891 	va_list	ap;
11892 
11893 	ASSERT(ssc != NULL);
11894 	ASSERT(ssc->ssc_un != NULL);
11895 
11896 	ssc->ssc_flags |= ssc_flags;
11897 	va_start(ap, fmt);
11898 	(void) vsnprintf(ssc->ssc_info, sizeof (ssc->ssc_info), fmt, ap);
11899 	va_end(ap);
11900 
11901 	/*
11902 	 * If SSC_FLAGS_INVALID_DATA is set, it should be a uscsi command
11903 	 * with invalid data sent back. For non-uscsi command, the
11904 	 * following code will be bypassed.
11905 	 */
11906 	if (ssc_flags & SSC_FLAGS_INVALID_DATA) {
11907 		if (SD_FM_LOG(ssc->ssc_un) == SD_FM_LOG_NSUP) {
11908 			/*
11909 			 * If the error belong to certain component and we
11910 			 * do not want it to show up on the console, we
11911 			 * will use SD_ERROR, otherwise scsi_log is
11912 			 * preferred.
11913 			 */
11914 			if (comp > 0) {
11915 				SD_ERROR(comp, ssc->ssc_un, ssc->ssc_info);
11916 			} else if (comp == 0) {
11917 				scsi_log(SD_DEVINFO(ssc->ssc_un), sd_label,
11918 				    CE_WARN, ssc->ssc_info);
11919 			}
11920 		}
11921 	}
11922 }
11923 
11924 /*
11925  *    Function: sd_buf_iodone
11926  *
11927  * Description: Frees the sd_xbuf & returns the buf to its originator.
11928  *
11929  *     Context: May be called from interrupt context.
11930  */
11931 /* ARGSUSED */
11932 static void
11933 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
11934 {
11935 	struct sd_xbuf *xp;
11936 
11937 	ASSERT(un != NULL);
11938 	ASSERT(bp != NULL);
11939 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11940 
11941 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
11942 
11943 	xp = SD_GET_XBUF(bp);
11944 	ASSERT(xp != NULL);
11945 
11946 	/* xbuf is gone after this */
11947 	if (ddi_xbuf_done(bp, un->un_xbuf_attr)) {
11948 		mutex_enter(SD_MUTEX(un));
11949 
11950 		/*
11951 		 * Grab time when the cmd completed.
11952 		 * This is used for determining if the system has been
11953 		 * idle long enough to make it idle to the PM framework.
11954 		 * This is for lowering the overhead, and therefore improving
11955 		 * performance per I/O operation.
11956 		 */
11957 		un->un_pm_idle_time = ddi_get_time();
11958 
11959 		un->un_ncmds_in_driver--;
11960 		ASSERT(un->un_ncmds_in_driver >= 0);
11961 		SD_INFO(SD_LOG_IO, un,
11962 		    "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
11963 		    un->un_ncmds_in_driver);
11964 
11965 		mutex_exit(SD_MUTEX(un));
11966 	}
11967 
11968 	biodone(bp);				/* bp is gone after this */
11969 
11970 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
11971 }
11972 
11973 
11974 /*
11975  *    Function: sd_uscsi_iodone
11976  *
11977  * Description: Frees the sd_xbuf & returns the buf to its originator.
11978  *
11979  *     Context: May be called from interrupt context.
11980  */
11981 /* ARGSUSED */
11982 static void
11983 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
11984 {
11985 	struct sd_xbuf *xp;
11986 
11987 	ASSERT(un != NULL);
11988 	ASSERT(bp != NULL);
11989 
11990 	xp = SD_GET_XBUF(bp);
11991 	ASSERT(xp != NULL);
11992 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11993 
11994 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
11995 
11996 	bp->b_private = xp->xb_private;
11997 
11998 	mutex_enter(SD_MUTEX(un));
11999 
12000 	/*
12001 	 * Grab time when the cmd completed.
12002 	 * This is used for determining if the system has been
12003 	 * idle long enough to make it idle to the PM framework.
12004 	 * This is for lowering the overhead, and therefore improving
12005 	 * performance per I/O operation.
12006 	 */
12007 	un->un_pm_idle_time = ddi_get_time();
12008 
12009 	un->un_ncmds_in_driver--;
12010 	ASSERT(un->un_ncmds_in_driver >= 0);
12011 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12012 	    un->un_ncmds_in_driver);
12013 
12014 	mutex_exit(SD_MUTEX(un));
12015 
12016 	if (((struct uscsi_cmd *)(xp->xb_pktinfo))->uscsi_rqlen >
12017 	    SENSE_LENGTH) {
12018 		kmem_free(xp, sizeof (struct sd_xbuf) - SENSE_LENGTH +
12019 		    MAX_SENSE_LENGTH);
12020 	} else {
12021 		kmem_free(xp, sizeof (struct sd_xbuf));
12022 	}
12023 
12024 	biodone(bp);
12025 
12026 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12027 }
12028 
12029 
12030 /*
12031  *    Function: sd_mapblockaddr_iostart
12032  *
12033  * Description: Verify request lies within the partition limits for
12034  *		the indicated minor device.  Issue "overrun" buf if
12035  *		request would exceed partition range.  Converts
12036  *		partition-relative block address to absolute.
12037  *
12038  *     Context: Can sleep
12039  *
12040  *      Issues: This follows what the old code did, in terms of accessing
12041  *		some of the partition info in the unit struct without holding
12042  *		the mutext.  This is a general issue, if the partition info
12043  *		can be altered while IO is in progress... as soon as we send
12044  *		a buf, its partitioning can be invalid before it gets to the
12045  *		device.  Probably the right fix is to move partitioning out
12046  *		of the driver entirely.
12047  */
12048 
12049 static void
12050 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12051 {
12052 	diskaddr_t	nblocks;	/* #blocks in the given partition */
12053 	daddr_t	blocknum;	/* Block number specified by the buf */
12054 	size_t	requested_nblocks;
12055 	size_t	available_nblocks;
12056 	int	partition;
12057 	diskaddr_t	partition_offset;
12058 	struct sd_xbuf *xp;
12059 
12060 	ASSERT(un != NULL);
12061 	ASSERT(bp != NULL);
12062 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12063 
12064 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12065 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12066 
12067 	xp = SD_GET_XBUF(bp);
12068 	ASSERT(xp != NULL);
12069 
12070 	/*
12071 	 * If the geometry is not indicated as valid, attempt to access
12072 	 * the unit & verify the geometry/label. This can be the case for
12073 	 * removable-media devices, of if the device was opened in
12074 	 * NDELAY/NONBLOCK mode.
12075 	 */
12076 	partition = SDPART(bp->b_edev);
12077 
12078 	if (!SD_IS_VALID_LABEL(un)) {
12079 		sd_ssc_t *ssc;
12080 		/*
12081 		 * Initialize sd_ssc_t for internal uscsi commands
12082 		 * In case of potential porformance issue, we need
12083 		 * to alloc memory only if there is invalid label
12084 		 */
12085 		ssc = sd_ssc_init(un);
12086 
12087 		if (sd_ready_and_valid(ssc, partition) != SD_READY_VALID) {
12088 			/*
12089 			 * For removable devices it is possible to start an
12090 			 * I/O without a media by opening the device in nodelay
12091 			 * mode. Also for writable CDs there can be many
12092 			 * scenarios where there is no geometry yet but volume
12093 			 * manager is trying to issue a read() just because
12094 			 * it can see TOC on the CD. So do not print a message
12095 			 * for removables.
12096 			 */
12097 			if (!un->un_f_has_removable_media) {
12098 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12099 				    "i/o to invalid geometry\n");
12100 			}
12101 			bioerror(bp, EIO);
12102 			bp->b_resid = bp->b_bcount;
12103 			SD_BEGIN_IODONE(index, un, bp);
12104 
12105 			sd_ssc_fini(ssc);
12106 			return;
12107 		}
12108 		sd_ssc_fini(ssc);
12109 	}
12110 
12111 	nblocks = 0;
12112 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
12113 	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
12114 
12115 	/*
12116 	 * blocknum is the starting block number of the request. At this
12117 	 * point it is still relative to the start of the minor device.
12118 	 */
12119 	blocknum = xp->xb_blkno;
12120 
12121 	/*
12122 	 * Legacy: If the starting block number is one past the last block
12123 	 * in the partition, do not set B_ERROR in the buf.
12124 	 */
12125 	if (blocknum == nblocks)  {
12126 		goto error_exit;
12127 	}
12128 
12129 	/*
12130 	 * Confirm that the first block of the request lies within the
12131 	 * partition limits. Also the requested number of bytes must be
12132 	 * a multiple of the system block size.
12133 	 */
12134 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12135 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
12136 		bp->b_flags |= B_ERROR;
12137 		goto error_exit;
12138 	}
12139 
12140 	/*
12141 	 * If the requsted # blocks exceeds the available # blocks, that
12142 	 * is an overrun of the partition.
12143 	 */
12144 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
12145 	available_nblocks = (size_t)(nblocks - blocknum);
12146 	ASSERT(nblocks >= blocknum);
12147 
12148 	if (requested_nblocks > available_nblocks) {
12149 		/*
12150 		 * Allocate an "overrun" buf to allow the request to proceed
12151 		 * for the amount of space available in the partition. The
12152 		 * amount not transferred will be added into the b_resid
12153 		 * when the operation is complete. The overrun buf
12154 		 * replaces the original buf here, and the original buf
12155 		 * is saved inside the overrun buf, for later use.
12156 		 */
12157 		size_t resid = SD_SYSBLOCKS2BYTES(un,
12158 		    (offset_t)(requested_nblocks - available_nblocks));
12159 		size_t count = bp->b_bcount - resid;
12160 		/*
12161 		 * Note: count is an unsigned entity thus it'll NEVER
12162 		 * be less than 0 so ASSERT the original values are
12163 		 * correct.
12164 		 */
12165 		ASSERT(bp->b_bcount >= resid);
12166 
12167 		bp = sd_bioclone_alloc(bp, count, blocknum,
12168 		    (int (*)(struct buf *)) sd_mapblockaddr_iodone);
12169 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12170 		ASSERT(xp != NULL);
12171 	}
12172 
12173 	/* At this point there should be no residual for this buf. */
12174 	ASSERT(bp->b_resid == 0);
12175 
12176 	/* Convert the block number to an absolute address. */
12177 	xp->xb_blkno += partition_offset;
12178 
12179 	SD_NEXT_IOSTART(index, un, bp);
12180 
12181 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12182 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12183 
12184 	return;
12185 
12186 error_exit:
12187 	bp->b_resid = bp->b_bcount;
12188 	SD_BEGIN_IODONE(index, un, bp);
12189 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12190 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12191 }
12192 
12193 
12194 /*
12195  *    Function: sd_mapblockaddr_iodone
12196  *
12197  * Description: Completion-side processing for partition management.
12198  *
12199  *     Context: May be called under interrupt context
12200  */
12201 
12202 static void
12203 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12204 {
12205 	/* int	partition; */	/* Not used, see below. */
12206 	ASSERT(un != NULL);
12207 	ASSERT(bp != NULL);
12208 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12209 
12210 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12211 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12212 
12213 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12214 		/*
12215 		 * We have an "overrun" buf to deal with...
12216 		 */
12217 		struct sd_xbuf	*xp;
12218 		struct buf	*obp;	/* ptr to the original buf */
12219 
12220 		xp = SD_GET_XBUF(bp);
12221 		ASSERT(xp != NULL);
12222 
12223 		/* Retrieve the pointer to the original buf */
12224 		obp = (struct buf *)xp->xb_private;
12225 		ASSERT(obp != NULL);
12226 
12227 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12228 		bioerror(obp, bp->b_error);
12229 
12230 		sd_bioclone_free(bp);
12231 
12232 		/*
12233 		 * Get back the original buf.
12234 		 * Note that since the restoration of xb_blkno below
12235 		 * was removed, the sd_xbuf is not needed.
12236 		 */
12237 		bp = obp;
12238 		/*
12239 		 * xp = SD_GET_XBUF(bp);
12240 		 * ASSERT(xp != NULL);
12241 		 */
12242 	}
12243 
12244 	/*
12245 	 * Convert sd->xb_blkno back to a minor-device relative value.
12246 	 * Note: this has been commented out, as it is not needed in the
12247 	 * current implementation of the driver (ie, since this function
12248 	 * is at the top of the layering chains, so the info will be
12249 	 * discarded) and it is in the "hot" IO path.
12250 	 *
12251 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12252 	 * xp->xb_blkno -= un->un_offset[partition];
12253 	 */
12254 
12255 	SD_NEXT_IODONE(index, un, bp);
12256 
12257 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12258 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12259 }
12260 
12261 
12262 /*
12263  *    Function: sd_mapblocksize_iostart
12264  *
12265  * Description: Convert between system block size (un->un_sys_blocksize)
12266  *		and target block size (un->un_tgt_blocksize).
12267  *
12268  *     Context: Can sleep to allocate resources.
12269  *
12270  * Assumptions: A higher layer has already performed any partition validation,
12271  *		and converted the xp->xb_blkno to an absolute value relative
12272  *		to the start of the device.
12273  *
12274  *		It is also assumed that the higher layer has implemented
12275  *		an "overrun" mechanism for the case where the request would
12276  *		read/write beyond the end of a partition.  In this case we
12277  *		assume (and ASSERT) that bp->b_resid == 0.
12278  *
12279  *		Note: The implementation for this routine assumes the target
12280  *		block size remains constant between allocation and transport.
12281  */
12282 
12283 static void
12284 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12285 {
12286 	struct sd_mapblocksize_info	*bsp;
12287 	struct sd_xbuf			*xp;
12288 	offset_t first_byte;
12289 	daddr_t	start_block, end_block;
12290 	daddr_t	request_bytes;
12291 	ushort_t is_aligned = FALSE;
12292 
12293 	ASSERT(un != NULL);
12294 	ASSERT(bp != NULL);
12295 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12296 	ASSERT(bp->b_resid == 0);
12297 
12298 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12299 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12300 
12301 	/*
12302 	 * For a non-writable CD, a write request is an error
12303 	 */
12304 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12305 	    (un->un_f_mmc_writable_media == FALSE)) {
12306 		bioerror(bp, EIO);
12307 		bp->b_resid = bp->b_bcount;
12308 		SD_BEGIN_IODONE(index, un, bp);
12309 		return;
12310 	}
12311 
12312 	/*
12313 	 * We do not need a shadow buf if the device is using
12314 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12315 	 * In this case there is no layer-private data block allocated.
12316 	 */
12317 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12318 	    (bp->b_bcount == 0)) {
12319 		goto done;
12320 	}
12321 
12322 #if defined(__i386) || defined(__amd64)
12323 	/* We do not support non-block-aligned transfers for ROD devices */
12324 	ASSERT(!ISROD(un));
12325 #endif
12326 
12327 	xp = SD_GET_XBUF(bp);
12328 	ASSERT(xp != NULL);
12329 
12330 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12331 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12332 	    un->un_tgt_blocksize, un->un_sys_blocksize);
12333 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12334 	    "request start block:0x%x\n", xp->xb_blkno);
12335 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12336 	    "request len:0x%x\n", bp->b_bcount);
12337 
12338 	/*
12339 	 * Allocate the layer-private data area for the mapblocksize layer.
12340 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12341 	 * struct to store the pointer to their layer-private data block, but
12342 	 * each layer also has the responsibility of restoring the prior
12343 	 * contents of xb_private before returning the buf/xbuf to the
12344 	 * higher layer that sent it.
12345 	 *
12346 	 * Here we save the prior contents of xp->xb_private into the
12347 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12348 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12349 	 * the layer-private area and returning the buf/xbuf to the layer
12350 	 * that sent it.
12351 	 *
12352 	 * Note that here we use kmem_zalloc for the allocation as there are
12353 	 * parts of the mapblocksize code that expect certain fields to be
12354 	 * zero unless explicitly set to a required value.
12355 	 */
12356 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12357 	bsp->mbs_oprivate = xp->xb_private;
12358 	xp->xb_private = bsp;
12359 
12360 	/*
12361 	 * This treats the data on the disk (target) as an array of bytes.
12362 	 * first_byte is the byte offset, from the beginning of the device,
12363 	 * to the location of the request. This is converted from a
12364 	 * un->un_sys_blocksize block address to a byte offset, and then back
12365 	 * to a block address based upon a un->un_tgt_blocksize block size.
12366 	 *
12367 	 * xp->xb_blkno should be absolute upon entry into this function,
12368 	 * but, but it is based upon partitions that use the "system"
12369 	 * block size. It must be adjusted to reflect the block size of
12370 	 * the target.
12371 	 *
12372 	 * Note that end_block is actually the block that follows the last
12373 	 * block of the request, but that's what is needed for the computation.
12374 	 */
12375 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12376 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12377 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12378 	    un->un_tgt_blocksize;
12379 
12380 	/* request_bytes is rounded up to a multiple of the target block size */
12381 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12382 
12383 	/*
12384 	 * See if the starting address of the request and the request
12385 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12386 	 * then we do not need to allocate a shadow buf to handle the request.
12387 	 */
12388 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12389 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12390 		is_aligned = TRUE;
12391 	}
12392 
12393 	if ((bp->b_flags & B_READ) == 0) {
12394 		/*
12395 		 * Lock the range for a write operation. An aligned request is
12396 		 * considered a simple write; otherwise the request must be a
12397 		 * read-modify-write.
12398 		 */
12399 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12400 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12401 	}
12402 
12403 	/*
12404 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12405 	 * where the READ command is generated for a read-modify-write. (The
12406 	 * write phase is deferred until after the read completes.)
12407 	 */
12408 	if (is_aligned == FALSE) {
12409 
12410 		struct sd_mapblocksize_info	*shadow_bsp;
12411 		struct sd_xbuf	*shadow_xp;
12412 		struct buf	*shadow_bp;
12413 
12414 		/*
12415 		 * Allocate the shadow buf and it associated xbuf. Note that
12416 		 * after this call the xb_blkno value in both the original
12417 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12418 		 * same: absolute relative to the start of the device, and
12419 		 * adjusted for the target block size. The b_blkno in the
12420 		 * shadow buf will also be set to this value. We should never
12421 		 * change b_blkno in the original bp however.
12422 		 *
12423 		 * Note also that the shadow buf will always need to be a
12424 		 * READ command, regardless of whether the incoming command
12425 		 * is a READ or a WRITE.
12426 		 */
12427 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12428 		    xp->xb_blkno,
12429 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12430 
12431 		shadow_xp = SD_GET_XBUF(shadow_bp);
12432 
12433 		/*
12434 		 * Allocate the layer-private data for the shadow buf.
12435 		 * (No need to preserve xb_private in the shadow xbuf.)
12436 		 */
12437 		shadow_xp->xb_private = shadow_bsp =
12438 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12439 
12440 		/*
12441 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12442 		 * to figure out where the start of the user data is (based upon
12443 		 * the system block size) in the data returned by the READ
12444 		 * command (which will be based upon the target blocksize). Note
12445 		 * that this is only really used if the request is unaligned.
12446 		 */
12447 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
12448 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12449 		ASSERT((bsp->mbs_copy_offset >= 0) &&
12450 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12451 
12452 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12453 
12454 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12455 
12456 		/* Transfer the wmap (if any) to the shadow buf */
12457 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12458 		bsp->mbs_wmp = NULL;
12459 
12460 		/*
12461 		 * The shadow buf goes on from here in place of the
12462 		 * original buf.
12463 		 */
12464 		shadow_bsp->mbs_orig_bp = bp;
12465 		bp = shadow_bp;
12466 	}
12467 
12468 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12469 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12470 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12471 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12472 	    request_bytes);
12473 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12474 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12475 
12476 done:
12477 	SD_NEXT_IOSTART(index, un, bp);
12478 
12479 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12480 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12481 }
12482 
12483 
12484 /*
12485  *    Function: sd_mapblocksize_iodone
12486  *
12487  * Description: Completion side processing for block-size mapping.
12488  *
12489  *     Context: May be called under interrupt context
12490  */
12491 
12492 static void
12493 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12494 {
12495 	struct sd_mapblocksize_info	*bsp;
12496 	struct sd_xbuf	*xp;
12497 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12498 	struct buf	*orig_bp;	/* ptr to the original buf */
12499 	offset_t	shadow_end;
12500 	offset_t	request_end;
12501 	offset_t	shadow_start;
12502 	ssize_t		copy_offset;
12503 	size_t		copy_length;
12504 	size_t		shortfall;
12505 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12506 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12507 
12508 	ASSERT(un != NULL);
12509 	ASSERT(bp != NULL);
12510 
12511 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12512 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12513 
12514 	/*
12515 	 * There is no shadow buf or layer-private data if the target is
12516 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12517 	 */
12518 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12519 	    (bp->b_bcount == 0)) {
12520 		goto exit;
12521 	}
12522 
12523 	xp = SD_GET_XBUF(bp);
12524 	ASSERT(xp != NULL);
12525 
12526 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12527 	bsp = xp->xb_private;
12528 
12529 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12530 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12531 
12532 	if (is_write) {
12533 		/*
12534 		 * For a WRITE request we must free up the block range that
12535 		 * we have locked up.  This holds regardless of whether this is
12536 		 * an aligned write request or a read-modify-write request.
12537 		 */
12538 		sd_range_unlock(un, bsp->mbs_wmp);
12539 		bsp->mbs_wmp = NULL;
12540 	}
12541 
12542 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
12543 		/*
12544 		 * An aligned read or write command will have no shadow buf;
12545 		 * there is not much else to do with it.
12546 		 */
12547 		goto done;
12548 	}
12549 
12550 	orig_bp = bsp->mbs_orig_bp;
12551 	ASSERT(orig_bp != NULL);
12552 	orig_xp = SD_GET_XBUF(orig_bp);
12553 	ASSERT(orig_xp != NULL);
12554 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12555 
12556 	if (!is_write && has_wmap) {
12557 		/*
12558 		 * A READ with a wmap means this is the READ phase of a
12559 		 * read-modify-write. If an error occurred on the READ then
12560 		 * we do not proceed with the WRITE phase or copy any data.
12561 		 * Just release the write maps and return with an error.
12562 		 */
12563 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12564 			orig_bp->b_resid = orig_bp->b_bcount;
12565 			bioerror(orig_bp, bp->b_error);
12566 			sd_range_unlock(un, bsp->mbs_wmp);
12567 			goto freebuf_done;
12568 		}
12569 	}
12570 
12571 	/*
12572 	 * Here is where we set up to copy the data from the shadow buf
12573 	 * into the space associated with the original buf.
12574 	 *
12575 	 * To deal with the conversion between block sizes, these
12576 	 * computations treat the data as an array of bytes, with the
12577 	 * first byte (byte 0) corresponding to the first byte in the
12578 	 * first block on the disk.
12579 	 */
12580 
12581 	/*
12582 	 * shadow_start and shadow_len indicate the location and size of
12583 	 * the data returned with the shadow IO request.
12584 	 */
12585 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12586 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12587 
12588 	/*
12589 	 * copy_offset gives the offset (in bytes) from the start of the first
12590 	 * block of the READ request to the beginning of the data.  We retrieve
12591 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12592 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12593 	 * data to be copied (in bytes).
12594 	 */
12595 	copy_offset  = bsp->mbs_copy_offset;
12596 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
12597 	copy_length  = orig_bp->b_bcount;
12598 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12599 
12600 	/*
12601 	 * Set up the resid and error fields of orig_bp as appropriate.
12602 	 */
12603 	if (shadow_end >= request_end) {
12604 		/* We got all the requested data; set resid to zero */
12605 		orig_bp->b_resid = 0;
12606 	} else {
12607 		/*
12608 		 * We failed to get enough data to fully satisfy the original
12609 		 * request. Just copy back whatever data we got and set
12610 		 * up the residual and error code as required.
12611 		 *
12612 		 * 'shortfall' is the amount by which the data received with the
12613 		 * shadow buf has "fallen short" of the requested amount.
12614 		 */
12615 		shortfall = (size_t)(request_end - shadow_end);
12616 
12617 		if (shortfall > orig_bp->b_bcount) {
12618 			/*
12619 			 * We did not get enough data to even partially
12620 			 * fulfill the original request.  The residual is
12621 			 * equal to the amount requested.
12622 			 */
12623 			orig_bp->b_resid = orig_bp->b_bcount;
12624 		} else {
12625 			/*
12626 			 * We did not get all the data that we requested
12627 			 * from the device, but we will try to return what
12628 			 * portion we did get.
12629 			 */
12630 			orig_bp->b_resid = shortfall;
12631 		}
12632 		ASSERT(copy_length >= orig_bp->b_resid);
12633 		copy_length  -= orig_bp->b_resid;
12634 	}
12635 
12636 	/* Propagate the error code from the shadow buf to the original buf */
12637 	bioerror(orig_bp, bp->b_error);
12638 
12639 	if (is_write) {
12640 		goto freebuf_done;	/* No data copying for a WRITE */
12641 	}
12642 
12643 	if (has_wmap) {
12644 		/*
12645 		 * This is a READ command from the READ phase of a
12646 		 * read-modify-write request. We have to copy the data given
12647 		 * by the user OVER the data returned by the READ command,
12648 		 * then convert the command from a READ to a WRITE and send
12649 		 * it back to the target.
12650 		 */
12651 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12652 		    copy_length);
12653 
12654 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12655 
12656 		/*
12657 		 * Dispatch the WRITE command to the taskq thread, which
12658 		 * will in turn send the command to the target. When the
12659 		 * WRITE command completes, we (sd_mapblocksize_iodone())
12660 		 * will get called again as part of the iodone chain
12661 		 * processing for it. Note that we will still be dealing
12662 		 * with the shadow buf at that point.
12663 		 */
12664 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12665 		    KM_NOSLEEP) != 0) {
12666 			/*
12667 			 * Dispatch was successful so we are done. Return
12668 			 * without going any higher up the iodone chain. Do
12669 			 * not free up any layer-private data until after the
12670 			 * WRITE completes.
12671 			 */
12672 			return;
12673 		}
12674 
12675 		/*
12676 		 * Dispatch of the WRITE command failed; set up the error
12677 		 * condition and send this IO back up the iodone chain.
12678 		 */
12679 		bioerror(orig_bp, EIO);
12680 		orig_bp->b_resid = orig_bp->b_bcount;
12681 
12682 	} else {
12683 		/*
12684 		 * This is a regular READ request (ie, not a RMW). Copy the
12685 		 * data from the shadow buf into the original buf. The
12686 		 * copy_offset compensates for any "misalignment" between the
12687 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12688 		 * original buf (with its un->un_sys_blocksize blocks).
12689 		 */
12690 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12691 		    copy_length);
12692 	}
12693 
12694 freebuf_done:
12695 
12696 	/*
12697 	 * At this point we still have both the shadow buf AND the original
12698 	 * buf to deal with, as well as the layer-private data area in each.
12699 	 * Local variables are as follows:
12700 	 *
12701 	 * bp -- points to shadow buf
12702 	 * xp -- points to xbuf of shadow buf
12703 	 * bsp -- points to layer-private data area of shadow buf
12704 	 * orig_bp -- points to original buf
12705 	 *
12706 	 * First free the shadow buf and its associated xbuf, then free the
12707 	 * layer-private data area from the shadow buf. There is no need to
12708 	 * restore xb_private in the shadow xbuf.
12709 	 */
12710 	sd_shadow_buf_free(bp);
12711 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12712 
12713 	/*
12714 	 * Now update the local variables to point to the original buf, xbuf,
12715 	 * and layer-private area.
12716 	 */
12717 	bp = orig_bp;
12718 	xp = SD_GET_XBUF(bp);
12719 	ASSERT(xp != NULL);
12720 	ASSERT(xp == orig_xp);
12721 	bsp = xp->xb_private;
12722 	ASSERT(bsp != NULL);
12723 
12724 done:
12725 	/*
12726 	 * Restore xb_private to whatever it was set to by the next higher
12727 	 * layer in the chain, then free the layer-private data area.
12728 	 */
12729 	xp->xb_private = bsp->mbs_oprivate;
12730 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12731 
12732 exit:
12733 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
12734 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
12735 
12736 	SD_NEXT_IODONE(index, un, bp);
12737 }
12738 
12739 
12740 /*
12741  *    Function: sd_checksum_iostart
12742  *
12743  * Description: A stub function for a layer that's currently not used.
12744  *		For now just a placeholder.
12745  *
12746  *     Context: Kernel thread context
12747  */
12748 
12749 static void
12750 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
12751 {
12752 	ASSERT(un != NULL);
12753 	ASSERT(bp != NULL);
12754 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12755 	SD_NEXT_IOSTART(index, un, bp);
12756 }
12757 
12758 
12759 /*
12760  *    Function: sd_checksum_iodone
12761  *
12762  * Description: A stub function for a layer that's currently not used.
12763  *		For now just a placeholder.
12764  *
12765  *     Context: May be called under interrupt context
12766  */
12767 
12768 static void
12769 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
12770 {
12771 	ASSERT(un != NULL);
12772 	ASSERT(bp != NULL);
12773 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12774 	SD_NEXT_IODONE(index, un, bp);
12775 }
12776 
12777 
12778 /*
12779  *    Function: sd_checksum_uscsi_iostart
12780  *
12781  * Description: A stub function for a layer that's currently not used.
12782  *		For now just a placeholder.
12783  *
12784  *     Context: Kernel thread context
12785  */
12786 
12787 static void
12788 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
12789 {
12790 	ASSERT(un != NULL);
12791 	ASSERT(bp != NULL);
12792 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12793 	SD_NEXT_IOSTART(index, un, bp);
12794 }
12795 
12796 
12797 /*
12798  *    Function: sd_checksum_uscsi_iodone
12799  *
12800  * Description: A stub function for a layer that's currently not used.
12801  *		For now just a placeholder.
12802  *
12803  *     Context: May be called under interrupt context
12804  */
12805 
12806 static void
12807 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12808 {
12809 	ASSERT(un != NULL);
12810 	ASSERT(bp != NULL);
12811 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12812 	SD_NEXT_IODONE(index, un, bp);
12813 }
12814 
12815 
12816 /*
12817  *    Function: sd_pm_iostart
12818  *
12819  * Description: iostart-side routine for Power mangement.
12820  *
12821  *     Context: Kernel thread context
12822  */
12823 
12824 static void
12825 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
12826 {
12827 	ASSERT(un != NULL);
12828 	ASSERT(bp != NULL);
12829 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12830 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12831 
12832 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
12833 
12834 	if (sd_pm_entry(un) != DDI_SUCCESS) {
12835 		/*
12836 		 * Set up to return the failed buf back up the 'iodone'
12837 		 * side of the calling chain.
12838 		 */
12839 		bioerror(bp, EIO);
12840 		bp->b_resid = bp->b_bcount;
12841 
12842 		SD_BEGIN_IODONE(index, un, bp);
12843 
12844 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12845 		return;
12846 	}
12847 
12848 	SD_NEXT_IOSTART(index, un, bp);
12849 
12850 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12851 }
12852 
12853 
12854 /*
12855  *    Function: sd_pm_iodone
12856  *
12857  * Description: iodone-side routine for power mangement.
12858  *
12859  *     Context: may be called from interrupt context
12860  */
12861 
12862 static void
12863 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
12864 {
12865 	ASSERT(un != NULL);
12866 	ASSERT(bp != NULL);
12867 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12868 
12869 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
12870 
12871 	/*
12872 	 * After attach the following flag is only read, so don't
12873 	 * take the penalty of acquiring a mutex for it.
12874 	 */
12875 	if (un->un_f_pm_is_enabled == TRUE) {
12876 		sd_pm_exit(un);
12877 	}
12878 
12879 	SD_NEXT_IODONE(index, un, bp);
12880 
12881 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
12882 }
12883 
12884 
12885 /*
12886  *    Function: sd_core_iostart
12887  *
12888  * Description: Primary driver function for enqueuing buf(9S) structs from
12889  *		the system and initiating IO to the target device
12890  *
12891  *     Context: Kernel thread context. Can sleep.
12892  *
12893  * Assumptions:  - The given xp->xb_blkno is absolute
12894  *		   (ie, relative to the start of the device).
12895  *		 - The IO is to be done using the native blocksize of
12896  *		   the device, as specified in un->un_tgt_blocksize.
12897  */
12898 /* ARGSUSED */
12899 static void
12900 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
12901 {
12902 	struct sd_xbuf *xp;
12903 
12904 	ASSERT(un != NULL);
12905 	ASSERT(bp != NULL);
12906 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12907 	ASSERT(bp->b_resid == 0);
12908 
12909 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
12910 
12911 	xp = SD_GET_XBUF(bp);
12912 	ASSERT(xp != NULL);
12913 
12914 	mutex_enter(SD_MUTEX(un));
12915 
12916 	/*
12917 	 * If we are currently in the failfast state, fail any new IO
12918 	 * that has B_FAILFAST set, then return.
12919 	 */
12920 	if ((bp->b_flags & B_FAILFAST) &&
12921 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
12922 		mutex_exit(SD_MUTEX(un));
12923 		bioerror(bp, EIO);
12924 		bp->b_resid = bp->b_bcount;
12925 		SD_BEGIN_IODONE(index, un, bp);
12926 		return;
12927 	}
12928 
12929 	if (SD_IS_DIRECT_PRIORITY(xp)) {
12930 		/*
12931 		 * Priority command -- transport it immediately.
12932 		 *
12933 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
12934 		 * because all direct priority commands should be associated
12935 		 * with error recovery actions which we don't want to retry.
12936 		 */
12937 		sd_start_cmds(un, bp);
12938 	} else {
12939 		/*
12940 		 * Normal command -- add it to the wait queue, then start
12941 		 * transporting commands from the wait queue.
12942 		 */
12943 		sd_add_buf_to_waitq(un, bp);
12944 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
12945 		sd_start_cmds(un, NULL);
12946 	}
12947 
12948 	mutex_exit(SD_MUTEX(un));
12949 
12950 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
12951 }
12952 
12953 
12954 /*
12955  *    Function: sd_init_cdb_limits
12956  *
12957  * Description: This is to handle scsi_pkt initialization differences
12958  *		between the driver platforms.
12959  *
12960  *		Legacy behaviors:
12961  *
12962  *		If the block number or the sector count exceeds the
12963  *		capabilities of a Group 0 command, shift over to a
12964  *		Group 1 command. We don't blindly use Group 1
12965  *		commands because a) some drives (CDC Wren IVs) get a
12966  *		bit confused, and b) there is probably a fair amount
12967  *		of speed difference for a target to receive and decode
12968  *		a 10 byte command instead of a 6 byte command.
12969  *
12970  *		The xfer time difference of 6 vs 10 byte CDBs is
12971  *		still significant so this code is still worthwhile.
12972  *		10 byte CDBs are very inefficient with the fas HBA driver
12973  *		and older disks. Each CDB byte took 1 usec with some
12974  *		popular disks.
12975  *
12976  *     Context: Must be called at attach time
12977  */
12978 
12979 static void
12980 sd_init_cdb_limits(struct sd_lun *un)
12981 {
12982 	int hba_cdb_limit;
12983 
12984 	/*
12985 	 * Use CDB_GROUP1 commands for most devices except for
12986 	 * parallel SCSI fixed drives in which case we get better
12987 	 * performance using CDB_GROUP0 commands (where applicable).
12988 	 */
12989 	un->un_mincdb = SD_CDB_GROUP1;
12990 #if !defined(__fibre)
12991 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
12992 	    !un->un_f_has_removable_media) {
12993 		un->un_mincdb = SD_CDB_GROUP0;
12994 	}
12995 #endif
12996 
12997 	/*
12998 	 * Try to read the max-cdb-length supported by HBA.
12999 	 */
13000 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
13001 	if (0 >= un->un_max_hba_cdb) {
13002 		un->un_max_hba_cdb = CDB_GROUP4;
13003 		hba_cdb_limit = SD_CDB_GROUP4;
13004 	} else if (0 < un->un_max_hba_cdb &&
13005 	    un->un_max_hba_cdb < CDB_GROUP1) {
13006 		hba_cdb_limit = SD_CDB_GROUP0;
13007 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
13008 	    un->un_max_hba_cdb < CDB_GROUP5) {
13009 		hba_cdb_limit = SD_CDB_GROUP1;
13010 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
13011 	    un->un_max_hba_cdb < CDB_GROUP4) {
13012 		hba_cdb_limit = SD_CDB_GROUP5;
13013 	} else {
13014 		hba_cdb_limit = SD_CDB_GROUP4;
13015 	}
13016 
13017 	/*
13018 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13019 	 * commands for fixed disks unless we are building for a 32 bit
13020 	 * kernel.
13021 	 */
13022 #ifdef _LP64
13023 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13024 	    min(hba_cdb_limit, SD_CDB_GROUP4);
13025 #else
13026 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13027 	    min(hba_cdb_limit, SD_CDB_GROUP1);
13028 #endif
13029 
13030 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13031 	    ? sizeof (struct scsi_arq_status) : 1);
13032 	un->un_cmd_timeout = (ushort_t)sd_io_time;
13033 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13034 }
13035 
13036 
13037 /*
13038  *    Function: sd_initpkt_for_buf
13039  *
13040  * Description: Allocate and initialize for transport a scsi_pkt struct,
13041  *		based upon the info specified in the given buf struct.
13042  *
13043  *		Assumes the xb_blkno in the request is absolute (ie,
13044  *		relative to the start of the device (NOT partition!).
13045  *		Also assumes that the request is using the native block
13046  *		size of the device (as returned by the READ CAPACITY
13047  *		command).
13048  *
13049  * Return Code: SD_PKT_ALLOC_SUCCESS
13050  *		SD_PKT_ALLOC_FAILURE
13051  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13052  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13053  *
13054  *     Context: Kernel thread and may be called from software interrupt context
13055  *		as part of a sdrunout callback. This function may not block or
13056  *		call routines that block
13057  */
13058 
13059 static int
13060 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13061 {
13062 	struct sd_xbuf	*xp;
13063 	struct scsi_pkt *pktp = NULL;
13064 	struct sd_lun	*un;
13065 	size_t		blockcount;
13066 	daddr_t		startblock;
13067 	int		rval;
13068 	int		cmd_flags;
13069 
13070 	ASSERT(bp != NULL);
13071 	ASSERT(pktpp != NULL);
13072 	xp = SD_GET_XBUF(bp);
13073 	ASSERT(xp != NULL);
13074 	un = SD_GET_UN(bp);
13075 	ASSERT(un != NULL);
13076 	ASSERT(mutex_owned(SD_MUTEX(un)));
13077 	ASSERT(bp->b_resid == 0);
13078 
13079 	SD_TRACE(SD_LOG_IO_CORE, un,
13080 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13081 
13082 	mutex_exit(SD_MUTEX(un));
13083 
13084 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13085 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13086 		/*
13087 		 * Already have a scsi_pkt -- just need DMA resources.
13088 		 * We must recompute the CDB in case the mapping returns
13089 		 * a nonzero pkt_resid.
13090 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13091 		 * that is being retried, the unmap/remap of the DMA resouces
13092 		 * will result in the entire transfer starting over again
13093 		 * from the very first block.
13094 		 */
13095 		ASSERT(xp->xb_pktp != NULL);
13096 		pktp = xp->xb_pktp;
13097 	} else {
13098 		pktp = NULL;
13099 	}
13100 #endif /* __i386 || __amd64 */
13101 
13102 	startblock = xp->xb_blkno;	/* Absolute block num. */
13103 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13104 
13105 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13106 
13107 	/*
13108 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13109 	 * call scsi_init_pkt, and build the CDB.
13110 	 */
13111 	rval = sd_setup_rw_pkt(un, &pktp, bp,
13112 	    cmd_flags, sdrunout, (caddr_t)un,
13113 	    startblock, blockcount);
13114 
13115 	if (rval == 0) {
13116 		/*
13117 		 * Success.
13118 		 *
13119 		 * If partial DMA is being used and required for this transfer.
13120 		 * set it up here.
13121 		 */
13122 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13123 		    (pktp->pkt_resid != 0)) {
13124 
13125 			/*
13126 			 * Save the CDB length and pkt_resid for the
13127 			 * next xfer
13128 			 */
13129 			xp->xb_dma_resid = pktp->pkt_resid;
13130 
13131 			/* rezero resid */
13132 			pktp->pkt_resid = 0;
13133 
13134 		} else {
13135 			xp->xb_dma_resid = 0;
13136 		}
13137 
13138 		pktp->pkt_flags = un->un_tagflags;
13139 		pktp->pkt_time  = un->un_cmd_timeout;
13140 		pktp->pkt_comp  = sdintr;
13141 
13142 		pktp->pkt_private = bp;
13143 		*pktpp = pktp;
13144 
13145 		SD_TRACE(SD_LOG_IO_CORE, un,
13146 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13147 
13148 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13149 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13150 #endif
13151 
13152 		mutex_enter(SD_MUTEX(un));
13153 		return (SD_PKT_ALLOC_SUCCESS);
13154 
13155 	}
13156 
13157 	/*
13158 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13159 	 * from sd_setup_rw_pkt.
13160 	 */
13161 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13162 
13163 	if (rval == SD_PKT_ALLOC_FAILURE) {
13164 		*pktpp = NULL;
13165 		/*
13166 		 * Set the driver state to RWAIT to indicate the driver
13167 		 * is waiting on resource allocations. The driver will not
13168 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13169 		 */
13170 		mutex_enter(SD_MUTEX(un));
13171 		New_state(un, SD_STATE_RWAIT);
13172 
13173 		SD_ERROR(SD_LOG_IO_CORE, un,
13174 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13175 
13176 		if ((bp->b_flags & B_ERROR) != 0) {
13177 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13178 		}
13179 		return (SD_PKT_ALLOC_FAILURE);
13180 	} else {
13181 		/*
13182 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13183 		 *
13184 		 * This should never happen.  Maybe someone messed with the
13185 		 * kernel's minphys?
13186 		 */
13187 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13188 		    "Request rejected: too large for CDB: "
13189 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13190 		SD_ERROR(SD_LOG_IO_CORE, un,
13191 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13192 		mutex_enter(SD_MUTEX(un));
13193 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13194 
13195 	}
13196 }
13197 
13198 
13199 /*
13200  *    Function: sd_destroypkt_for_buf
13201  *
13202  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13203  *
13204  *     Context: Kernel thread or interrupt context
13205  */
13206 
13207 static void
13208 sd_destroypkt_for_buf(struct buf *bp)
13209 {
13210 	ASSERT(bp != NULL);
13211 	ASSERT(SD_GET_UN(bp) != NULL);
13212 
13213 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13214 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13215 
13216 	ASSERT(SD_GET_PKTP(bp) != NULL);
13217 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13218 
13219 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13220 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13221 }
13222 
13223 /*
13224  *    Function: sd_setup_rw_pkt
13225  *
13226  * Description: Determines appropriate CDB group for the requested LBA
13227  *		and transfer length, calls scsi_init_pkt, and builds
13228  *		the CDB.  Do not use for partial DMA transfers except
13229  *		for the initial transfer since the CDB size must
13230  *		remain constant.
13231  *
13232  *     Context: Kernel thread and may be called from software interrupt
13233  *		context as part of a sdrunout callback. This function may not
13234  *		block or call routines that block
13235  */
13236 
13237 
13238 int
13239 sd_setup_rw_pkt(struct sd_lun *un,
13240     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13241     int (*callback)(caddr_t), caddr_t callback_arg,
13242     diskaddr_t lba, uint32_t blockcount)
13243 {
13244 	struct scsi_pkt *return_pktp;
13245 	union scsi_cdb *cdbp;
13246 	struct sd_cdbinfo *cp = NULL;
13247 	int i;
13248 
13249 	/*
13250 	 * See which size CDB to use, based upon the request.
13251 	 */
13252 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13253 
13254 		/*
13255 		 * Check lba and block count against sd_cdbtab limits.
13256 		 * In the partial DMA case, we have to use the same size
13257 		 * CDB for all the transfers.  Check lba + blockcount
13258 		 * against the max LBA so we know that segment of the
13259 		 * transfer can use the CDB we select.
13260 		 */
13261 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13262 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13263 
13264 			/*
13265 			 * The command will fit into the CDB type
13266 			 * specified by sd_cdbtab[i].
13267 			 */
13268 			cp = sd_cdbtab + i;
13269 
13270 			/*
13271 			 * Call scsi_init_pkt so we can fill in the
13272 			 * CDB.
13273 			 */
13274 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13275 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13276 			    flags, callback, callback_arg);
13277 
13278 			if (return_pktp != NULL) {
13279 
13280 				/*
13281 				 * Return new value of pkt
13282 				 */
13283 				*pktpp = return_pktp;
13284 
13285 				/*
13286 				 * To be safe, zero the CDB insuring there is
13287 				 * no leftover data from a previous command.
13288 				 */
13289 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13290 
13291 				/*
13292 				 * Handle partial DMA mapping
13293 				 */
13294 				if (return_pktp->pkt_resid != 0) {
13295 
13296 					/*
13297 					 * Not going to xfer as many blocks as
13298 					 * originally expected
13299 					 */
13300 					blockcount -=
13301 					    SD_BYTES2TGTBLOCKS(un,
13302 					    return_pktp->pkt_resid);
13303 				}
13304 
13305 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13306 
13307 				/*
13308 				 * Set command byte based on the CDB
13309 				 * type we matched.
13310 				 */
13311 				cdbp->scc_cmd = cp->sc_grpmask |
13312 				    ((bp->b_flags & B_READ) ?
13313 				    SCMD_READ : SCMD_WRITE);
13314 
13315 				SD_FILL_SCSI1_LUN(un, return_pktp);
13316 
13317 				/*
13318 				 * Fill in LBA and length
13319 				 */
13320 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13321 				    (cp->sc_grpcode == CDB_GROUP4) ||
13322 				    (cp->sc_grpcode == CDB_GROUP0) ||
13323 				    (cp->sc_grpcode == CDB_GROUP5));
13324 
13325 				if (cp->sc_grpcode == CDB_GROUP1) {
13326 					FORMG1ADDR(cdbp, lba);
13327 					FORMG1COUNT(cdbp, blockcount);
13328 					return (0);
13329 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13330 					FORMG4LONGADDR(cdbp, lba);
13331 					FORMG4COUNT(cdbp, blockcount);
13332 					return (0);
13333 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13334 					FORMG0ADDR(cdbp, lba);
13335 					FORMG0COUNT(cdbp, blockcount);
13336 					return (0);
13337 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13338 					FORMG5ADDR(cdbp, lba);
13339 					FORMG5COUNT(cdbp, blockcount);
13340 					return (0);
13341 				}
13342 
13343 				/*
13344 				 * It should be impossible to not match one
13345 				 * of the CDB types above, so we should never
13346 				 * reach this point.  Set the CDB command byte
13347 				 * to test-unit-ready to avoid writing
13348 				 * to somewhere we don't intend.
13349 				 */
13350 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13351 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13352 			} else {
13353 				/*
13354 				 * Couldn't get scsi_pkt
13355 				 */
13356 				return (SD_PKT_ALLOC_FAILURE);
13357 			}
13358 		}
13359 	}
13360 
13361 	/*
13362 	 * None of the available CDB types were suitable.  This really
13363 	 * should never happen:  on a 64 bit system we support
13364 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13365 	 * and on a 32 bit system we will refuse to bind to a device
13366 	 * larger than 2TB so addresses will never be larger than 32 bits.
13367 	 */
13368 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13369 }
13370 
13371 /*
13372  *    Function: sd_setup_next_rw_pkt
13373  *
13374  * Description: Setup packet for partial DMA transfers, except for the
13375  * 		initial transfer.  sd_setup_rw_pkt should be used for
13376  *		the initial transfer.
13377  *
13378  *     Context: Kernel thread and may be called from interrupt context.
13379  */
13380 
13381 int
13382 sd_setup_next_rw_pkt(struct sd_lun *un,
13383     struct scsi_pkt *pktp, struct buf *bp,
13384     diskaddr_t lba, uint32_t blockcount)
13385 {
13386 	uchar_t com;
13387 	union scsi_cdb *cdbp;
13388 	uchar_t cdb_group_id;
13389 
13390 	ASSERT(pktp != NULL);
13391 	ASSERT(pktp->pkt_cdbp != NULL);
13392 
13393 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13394 	com = cdbp->scc_cmd;
13395 	cdb_group_id = CDB_GROUPID(com);
13396 
13397 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13398 	    (cdb_group_id == CDB_GROUPID_1) ||
13399 	    (cdb_group_id == CDB_GROUPID_4) ||
13400 	    (cdb_group_id == CDB_GROUPID_5));
13401 
13402 	/*
13403 	 * Move pkt to the next portion of the xfer.
13404 	 * func is NULL_FUNC so we do not have to release
13405 	 * the disk mutex here.
13406 	 */
13407 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13408 	    NULL_FUNC, NULL) == pktp) {
13409 		/* Success.  Handle partial DMA */
13410 		if (pktp->pkt_resid != 0) {
13411 			blockcount -=
13412 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13413 		}
13414 
13415 		cdbp->scc_cmd = com;
13416 		SD_FILL_SCSI1_LUN(un, pktp);
13417 		if (cdb_group_id == CDB_GROUPID_1) {
13418 			FORMG1ADDR(cdbp, lba);
13419 			FORMG1COUNT(cdbp, blockcount);
13420 			return (0);
13421 		} else if (cdb_group_id == CDB_GROUPID_4) {
13422 			FORMG4LONGADDR(cdbp, lba);
13423 			FORMG4COUNT(cdbp, blockcount);
13424 			return (0);
13425 		} else if (cdb_group_id == CDB_GROUPID_0) {
13426 			FORMG0ADDR(cdbp, lba);
13427 			FORMG0COUNT(cdbp, blockcount);
13428 			return (0);
13429 		} else if (cdb_group_id == CDB_GROUPID_5) {
13430 			FORMG5ADDR(cdbp, lba);
13431 			FORMG5COUNT(cdbp, blockcount);
13432 			return (0);
13433 		}
13434 
13435 		/* Unreachable */
13436 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13437 	}
13438 
13439 	/*
13440 	 * Error setting up next portion of cmd transfer.
13441 	 * Something is definitely very wrong and this
13442 	 * should not happen.
13443 	 */
13444 	return (SD_PKT_ALLOC_FAILURE);
13445 }
13446 
13447 /*
13448  *    Function: sd_initpkt_for_uscsi
13449  *
13450  * Description: Allocate and initialize for transport a scsi_pkt struct,
13451  *		based upon the info specified in the given uscsi_cmd struct.
13452  *
13453  * Return Code: SD_PKT_ALLOC_SUCCESS
13454  *		SD_PKT_ALLOC_FAILURE
13455  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13456  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13457  *
13458  *     Context: Kernel thread and may be called from software interrupt context
13459  *		as part of a sdrunout callback. This function may not block or
13460  *		call routines that block
13461  */
13462 
13463 static int
13464 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13465 {
13466 	struct uscsi_cmd *uscmd;
13467 	struct sd_xbuf	*xp;
13468 	struct scsi_pkt	*pktp;
13469 	struct sd_lun	*un;
13470 	uint32_t	flags = 0;
13471 
13472 	ASSERT(bp != NULL);
13473 	ASSERT(pktpp != NULL);
13474 	xp = SD_GET_XBUF(bp);
13475 	ASSERT(xp != NULL);
13476 	un = SD_GET_UN(bp);
13477 	ASSERT(un != NULL);
13478 	ASSERT(mutex_owned(SD_MUTEX(un)));
13479 
13480 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13481 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13482 	ASSERT(uscmd != NULL);
13483 
13484 	SD_TRACE(SD_LOG_IO_CORE, un,
13485 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13486 
13487 	/*
13488 	 * Allocate the scsi_pkt for the command.
13489 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13490 	 *	 during scsi_init_pkt time and will continue to use the
13491 	 *	 same path as long as the same scsi_pkt is used without
13492 	 *	 intervening scsi_dma_free(). Since uscsi command does
13493 	 *	 not call scsi_dmafree() before retry failed command, it
13494 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13495 	 *	 set such that scsi_vhci can use other available path for
13496 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13497 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13498 	 */
13499 	if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
13500 		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13501 		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13502 		    ((int)(uscmd->uscsi_rqlen) + sizeof (struct scsi_arq_status)
13503 		    - sizeof (struct scsi_extended_sense)), 0,
13504 		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL) | PKT_XARQ,
13505 		    sdrunout, (caddr_t)un);
13506 	} else {
13507 		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13508 		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13509 		    sizeof (struct scsi_arq_status), 0,
13510 		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
13511 		    sdrunout, (caddr_t)un);
13512 	}
13513 
13514 	if (pktp == NULL) {
13515 		*pktpp = NULL;
13516 		/*
13517 		 * Set the driver state to RWAIT to indicate the driver
13518 		 * is waiting on resource allocations. The driver will not
13519 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13520 		 */
13521 		New_state(un, SD_STATE_RWAIT);
13522 
13523 		SD_ERROR(SD_LOG_IO_CORE, un,
13524 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13525 
13526 		if ((bp->b_flags & B_ERROR) != 0) {
13527 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13528 		}
13529 		return (SD_PKT_ALLOC_FAILURE);
13530 	}
13531 
13532 	/*
13533 	 * We do not do DMA breakup for USCSI commands, so return failure
13534 	 * here if all the needed DMA resources were not allocated.
13535 	 */
13536 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13537 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13538 		scsi_destroy_pkt(pktp);
13539 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13540 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13541 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13542 	}
13543 
13544 	/* Init the cdb from the given uscsi struct */
13545 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13546 	    uscmd->uscsi_cdb[0], 0, 0, 0);
13547 
13548 	SD_FILL_SCSI1_LUN(un, pktp);
13549 
13550 	/*
13551 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
13552 	 * for listing of the supported flags.
13553 	 */
13554 
13555 	if (uscmd->uscsi_flags & USCSI_SILENT) {
13556 		flags |= FLAG_SILENT;
13557 	}
13558 
13559 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13560 		flags |= FLAG_DIAGNOSE;
13561 	}
13562 
13563 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13564 		flags |= FLAG_ISOLATE;
13565 	}
13566 
13567 	if (un->un_f_is_fibre == FALSE) {
13568 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13569 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13570 		}
13571 	}
13572 
13573 	/*
13574 	 * Set the pkt flags here so we save time later.
13575 	 * Note: These flags are NOT in the uscsi man page!!!
13576 	 */
13577 	if (uscmd->uscsi_flags & USCSI_HEAD) {
13578 		flags |= FLAG_HEAD;
13579 	}
13580 
13581 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13582 		flags |= FLAG_NOINTR;
13583 	}
13584 
13585 	/*
13586 	 * For tagged queueing, things get a bit complicated.
13587 	 * Check first for head of queue and last for ordered queue.
13588 	 * If neither head nor order, use the default driver tag flags.
13589 	 */
13590 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13591 		if (uscmd->uscsi_flags & USCSI_HTAG) {
13592 			flags |= FLAG_HTAG;
13593 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13594 			flags |= FLAG_OTAG;
13595 		} else {
13596 			flags |= un->un_tagflags & FLAG_TAGMASK;
13597 		}
13598 	}
13599 
13600 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13601 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13602 	}
13603 
13604 	pktp->pkt_flags = flags;
13605 
13606 	/* Transfer uscsi information to scsi_pkt */
13607 	(void) scsi_uscsi_pktinit(uscmd, pktp);
13608 
13609 	/* Copy the caller's CDB into the pkt... */
13610 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13611 
13612 	if (uscmd->uscsi_timeout == 0) {
13613 		pktp->pkt_time = un->un_uscsi_timeout;
13614 	} else {
13615 		pktp->pkt_time = uscmd->uscsi_timeout;
13616 	}
13617 
13618 	/* need it later to identify USCSI request in sdintr */
13619 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13620 
13621 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13622 
13623 	pktp->pkt_private = bp;
13624 	pktp->pkt_comp = sdintr;
13625 	*pktpp = pktp;
13626 
13627 	SD_TRACE(SD_LOG_IO_CORE, un,
13628 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13629 
13630 	return (SD_PKT_ALLOC_SUCCESS);
13631 }
13632 
13633 
13634 /*
13635  *    Function: sd_destroypkt_for_uscsi
13636  *
13637  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13638  *		IOs.. Also saves relevant info into the associated uscsi_cmd
13639  *		struct.
13640  *
13641  *     Context: May be called under interrupt context
13642  */
13643 
13644 static void
13645 sd_destroypkt_for_uscsi(struct buf *bp)
13646 {
13647 	struct uscsi_cmd *uscmd;
13648 	struct sd_xbuf	*xp;
13649 	struct scsi_pkt	*pktp;
13650 	struct sd_lun	*un;
13651 	struct sd_uscsi_info *suip;
13652 
13653 	ASSERT(bp != NULL);
13654 	xp = SD_GET_XBUF(bp);
13655 	ASSERT(xp != NULL);
13656 	un = SD_GET_UN(bp);
13657 	ASSERT(un != NULL);
13658 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13659 	pktp = SD_GET_PKTP(bp);
13660 	ASSERT(pktp != NULL);
13661 
13662 	SD_TRACE(SD_LOG_IO_CORE, un,
13663 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13664 
13665 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13666 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13667 	ASSERT(uscmd != NULL);
13668 
13669 	/* Save the status and the residual into the uscsi_cmd struct */
13670 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13671 	uscmd->uscsi_resid  = bp->b_resid;
13672 
13673 	/* Transfer scsi_pkt information to uscsi */
13674 	(void) scsi_uscsi_pktfini(pktp, uscmd);
13675 
13676 	/*
13677 	 * If enabled, copy any saved sense data into the area specified
13678 	 * by the uscsi command.
13679 	 */
13680 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13681 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13682 		/*
13683 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13684 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13685 		 */
13686 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13687 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13688 		if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
13689 			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
13690 			    MAX_SENSE_LENGTH);
13691 		} else {
13692 			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
13693 			    SENSE_LENGTH);
13694 		}
13695 	}
13696 	/*
13697 	 * The following assignments are for SCSI FMA.
13698 	 */
13699 	ASSERT(xp->xb_private != NULL);
13700 	suip = (struct sd_uscsi_info *)xp->xb_private;
13701 	suip->ui_pkt_reason = pktp->pkt_reason;
13702 	suip->ui_pkt_state = pktp->pkt_state;
13703 	suip->ui_pkt_statistics = pktp->pkt_statistics;
13704 	suip->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
13705 
13706 	/* We are done with the scsi_pkt; free it now */
13707 	ASSERT(SD_GET_PKTP(bp) != NULL);
13708 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13709 
13710 	SD_TRACE(SD_LOG_IO_CORE, un,
13711 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13712 }
13713 
13714 
13715 /*
13716  *    Function: sd_bioclone_alloc
13717  *
13718  * Description: Allocate a buf(9S) and init it as per the given buf
13719  *		and the various arguments.  The associated sd_xbuf
13720  *		struct is (nearly) duplicated.  The struct buf *bp
13721  *		argument is saved in new_xp->xb_private.
13722  *
13723  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13724  *		datalen - size of data area for the shadow bp
13725  *		blkno - starting LBA
13726  *		func - function pointer for b_iodone in the shadow buf. (May
13727  *			be NULL if none.)
13728  *
13729  * Return Code: Pointer to allocates buf(9S) struct
13730  *
13731  *     Context: Can sleep.
13732  */
13733 
13734 static struct buf *
13735 sd_bioclone_alloc(struct buf *bp, size_t datalen,
13736 	daddr_t blkno, int (*func)(struct buf *))
13737 {
13738 	struct	sd_lun	*un;
13739 	struct	sd_xbuf	*xp;
13740 	struct	sd_xbuf	*new_xp;
13741 	struct	buf	*new_bp;
13742 
13743 	ASSERT(bp != NULL);
13744 	xp = SD_GET_XBUF(bp);
13745 	ASSERT(xp != NULL);
13746 	un = SD_GET_UN(bp);
13747 	ASSERT(un != NULL);
13748 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13749 
13750 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
13751 	    NULL, KM_SLEEP);
13752 
13753 	new_bp->b_lblkno	= blkno;
13754 
13755 	/*
13756 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13757 	 * original xbuf into it.
13758 	 */
13759 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13760 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13761 
13762 	/*
13763 	 * The given bp is automatically saved in the xb_private member
13764 	 * of the new xbuf.  Callers are allowed to depend on this.
13765 	 */
13766 	new_xp->xb_private = bp;
13767 
13768 	new_bp->b_private  = new_xp;
13769 
13770 	return (new_bp);
13771 }
13772 
13773 /*
13774  *    Function: sd_shadow_buf_alloc
13775  *
13776  * Description: Allocate a buf(9S) and init it as per the given buf
13777  *		and the various arguments.  The associated sd_xbuf
13778  *		struct is (nearly) duplicated.  The struct buf *bp
13779  *		argument is saved in new_xp->xb_private.
13780  *
13781  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13782  *		datalen - size of data area for the shadow bp
13783  *		bflags - B_READ or B_WRITE (pseudo flag)
13784  *		blkno - starting LBA
13785  *		func - function pointer for b_iodone in the shadow buf. (May
13786  *			be NULL if none.)
13787  *
13788  * Return Code: Pointer to allocates buf(9S) struct
13789  *
13790  *     Context: Can sleep.
13791  */
13792 
13793 static struct buf *
13794 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
13795 	daddr_t blkno, int (*func)(struct buf *))
13796 {
13797 	struct	sd_lun	*un;
13798 	struct	sd_xbuf	*xp;
13799 	struct	sd_xbuf	*new_xp;
13800 	struct	buf	*new_bp;
13801 
13802 	ASSERT(bp != NULL);
13803 	xp = SD_GET_XBUF(bp);
13804 	ASSERT(xp != NULL);
13805 	un = SD_GET_UN(bp);
13806 	ASSERT(un != NULL);
13807 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13808 
13809 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
13810 		bp_mapin(bp);
13811 	}
13812 
13813 	bflags &= (B_READ | B_WRITE);
13814 #if defined(__i386) || defined(__amd64)
13815 	new_bp = getrbuf(KM_SLEEP);
13816 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
13817 	new_bp->b_bcount = datalen;
13818 	new_bp->b_flags = bflags |
13819 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
13820 #else
13821 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
13822 	    datalen, bflags, SLEEP_FUNC, NULL);
13823 #endif
13824 	new_bp->av_forw	= NULL;
13825 	new_bp->av_back	= NULL;
13826 	new_bp->b_dev	= bp->b_dev;
13827 	new_bp->b_blkno	= blkno;
13828 	new_bp->b_iodone = func;
13829 	new_bp->b_edev	= bp->b_edev;
13830 	new_bp->b_resid	= 0;
13831 
13832 	/* We need to preserve the B_FAILFAST flag */
13833 	if (bp->b_flags & B_FAILFAST) {
13834 		new_bp->b_flags |= B_FAILFAST;
13835 	}
13836 
13837 	/*
13838 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13839 	 * original xbuf into it.
13840 	 */
13841 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13842 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13843 
13844 	/* Need later to copy data between the shadow buf & original buf! */
13845 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
13846 
13847 	/*
13848 	 * The given bp is automatically saved in the xb_private member
13849 	 * of the new xbuf.  Callers are allowed to depend on this.
13850 	 */
13851 	new_xp->xb_private = bp;
13852 
13853 	new_bp->b_private  = new_xp;
13854 
13855 	return (new_bp);
13856 }
13857 
13858 /*
13859  *    Function: sd_bioclone_free
13860  *
13861  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
13862  *		in the larger than partition operation.
13863  *
13864  *     Context: May be called under interrupt context
13865  */
13866 
13867 static void
13868 sd_bioclone_free(struct buf *bp)
13869 {
13870 	struct sd_xbuf	*xp;
13871 
13872 	ASSERT(bp != NULL);
13873 	xp = SD_GET_XBUF(bp);
13874 	ASSERT(xp != NULL);
13875 
13876 	/*
13877 	 * Call bp_mapout() before freeing the buf,  in case a lower
13878 	 * layer or HBA  had done a bp_mapin().  we must do this here
13879 	 * as we are the "originator" of the shadow buf.
13880 	 */
13881 	bp_mapout(bp);
13882 
13883 	/*
13884 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13885 	 * never gets confused by a stale value in this field. (Just a little
13886 	 * extra defensiveness here.)
13887 	 */
13888 	bp->b_iodone = NULL;
13889 
13890 	freerbuf(bp);
13891 
13892 	kmem_free(xp, sizeof (struct sd_xbuf));
13893 }
13894 
13895 /*
13896  *    Function: sd_shadow_buf_free
13897  *
13898  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
13899  *
13900  *     Context: May be called under interrupt context
13901  */
13902 
13903 static void
13904 sd_shadow_buf_free(struct buf *bp)
13905 {
13906 	struct sd_xbuf	*xp;
13907 
13908 	ASSERT(bp != NULL);
13909 	xp = SD_GET_XBUF(bp);
13910 	ASSERT(xp != NULL);
13911 
13912 #if defined(__sparc)
13913 	/*
13914 	 * Call bp_mapout() before freeing the buf,  in case a lower
13915 	 * layer or HBA  had done a bp_mapin().  we must do this here
13916 	 * as we are the "originator" of the shadow buf.
13917 	 */
13918 	bp_mapout(bp);
13919 #endif
13920 
13921 	/*
13922 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13923 	 * never gets confused by a stale value in this field. (Just a little
13924 	 * extra defensiveness here.)
13925 	 */
13926 	bp->b_iodone = NULL;
13927 
13928 #if defined(__i386) || defined(__amd64)
13929 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
13930 	freerbuf(bp);
13931 #else
13932 	scsi_free_consistent_buf(bp);
13933 #endif
13934 
13935 	kmem_free(xp, sizeof (struct sd_xbuf));
13936 }
13937 
13938 
13939 /*
13940  *    Function: sd_print_transport_rejected_message
13941  *
13942  * Description: This implements the ludicrously complex rules for printing
13943  *		a "transport rejected" message.  This is to address the
13944  *		specific problem of having a flood of this error message
13945  *		produced when a failover occurs.
13946  *
13947  *     Context: Any.
13948  */
13949 
13950 static void
13951 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
13952 	int code)
13953 {
13954 	ASSERT(un != NULL);
13955 	ASSERT(mutex_owned(SD_MUTEX(un)));
13956 	ASSERT(xp != NULL);
13957 
13958 	/*
13959 	 * Print the "transport rejected" message under the following
13960 	 * conditions:
13961 	 *
13962 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
13963 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
13964 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
13965 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
13966 	 *   scsi_transport(9F) (which indicates that the target might have
13967 	 *   gone off-line).  This uses the un->un_tran_fatal_count
13968 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
13969 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
13970 	 *   from scsi_transport().
13971 	 *
13972 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
13973 	 * the preceeding cases in order for the message to be printed.
13974 	 */
13975 	if (((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) &&
13976 	    (SD_FM_LOG(un) == SD_FM_LOG_NSUP)) {
13977 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
13978 		    (code != TRAN_FATAL_ERROR) ||
13979 		    (un->un_tran_fatal_count == 1)) {
13980 			switch (code) {
13981 			case TRAN_BADPKT:
13982 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13983 				    "transport rejected bad packet\n");
13984 				break;
13985 			case TRAN_FATAL_ERROR:
13986 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13987 				    "transport rejected fatal error\n");
13988 				break;
13989 			default:
13990 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13991 				    "transport rejected (%d)\n", code);
13992 				break;
13993 			}
13994 		}
13995 	}
13996 }
13997 
13998 
13999 /*
14000  *    Function: sd_add_buf_to_waitq
14001  *
14002  * Description: Add the given buf(9S) struct to the wait queue for the
14003  *		instance.  If sorting is enabled, then the buf is added
14004  *		to the queue via an elevator sort algorithm (a la
14005  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
14006  *		If sorting is not enabled, then the buf is just added
14007  *		to the end of the wait queue.
14008  *
14009  * Return Code: void
14010  *
14011  *     Context: Does not sleep/block, therefore technically can be called
14012  *		from any context.  However if sorting is enabled then the
14013  *		execution time is indeterminate, and may take long if
14014  *		the wait queue grows large.
14015  */
14016 
14017 static void
14018 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14019 {
14020 	struct buf *ap;
14021 
14022 	ASSERT(bp != NULL);
14023 	ASSERT(un != NULL);
14024 	ASSERT(mutex_owned(SD_MUTEX(un)));
14025 
14026 	/* If the queue is empty, add the buf as the only entry & return. */
14027 	if (un->un_waitq_headp == NULL) {
14028 		ASSERT(un->un_waitq_tailp == NULL);
14029 		un->un_waitq_headp = un->un_waitq_tailp = bp;
14030 		bp->av_forw = NULL;
14031 		return;
14032 	}
14033 
14034 	ASSERT(un->un_waitq_tailp != NULL);
14035 
14036 	/*
14037 	 * If sorting is disabled, just add the buf to the tail end of
14038 	 * the wait queue and return.
14039 	 */
14040 	if (un->un_f_disksort_disabled) {
14041 		un->un_waitq_tailp->av_forw = bp;
14042 		un->un_waitq_tailp = bp;
14043 		bp->av_forw = NULL;
14044 		return;
14045 	}
14046 
14047 	/*
14048 	 * Sort thru the list of requests currently on the wait queue
14049 	 * and add the new buf request at the appropriate position.
14050 	 *
14051 	 * The un->un_waitq_headp is an activity chain pointer on which
14052 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14053 	 * first queue holds those requests which are positioned after
14054 	 * the current SD_GET_BLKNO() (in the first request); the second holds
14055 	 * requests which came in after their SD_GET_BLKNO() number was passed.
14056 	 * Thus we implement a one way scan, retracting after reaching
14057 	 * the end of the drive to the first request on the second
14058 	 * queue, at which time it becomes the first queue.
14059 	 * A one-way scan is natural because of the way UNIX read-ahead
14060 	 * blocks are allocated.
14061 	 *
14062 	 * If we lie after the first request, then we must locate the
14063 	 * second request list and add ourselves to it.
14064 	 */
14065 	ap = un->un_waitq_headp;
14066 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14067 		while (ap->av_forw != NULL) {
14068 			/*
14069 			 * Look for an "inversion" in the (normally
14070 			 * ascending) block numbers. This indicates
14071 			 * the start of the second request list.
14072 			 */
14073 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14074 				/*
14075 				 * Search the second request list for the
14076 				 * first request at a larger block number.
14077 				 * We go before that; however if there is
14078 				 * no such request, we go at the end.
14079 				 */
14080 				do {
14081 					if (SD_GET_BLKNO(bp) <
14082 					    SD_GET_BLKNO(ap->av_forw)) {
14083 						goto insert;
14084 					}
14085 					ap = ap->av_forw;
14086 				} while (ap->av_forw != NULL);
14087 				goto insert;		/* after last */
14088 			}
14089 			ap = ap->av_forw;
14090 		}
14091 
14092 		/*
14093 		 * No inversions... we will go after the last, and
14094 		 * be the first request in the second request list.
14095 		 */
14096 		goto insert;
14097 	}
14098 
14099 	/*
14100 	 * Request is at/after the current request...
14101 	 * sort in the first request list.
14102 	 */
14103 	while (ap->av_forw != NULL) {
14104 		/*
14105 		 * We want to go after the current request (1) if
14106 		 * there is an inversion after it (i.e. it is the end
14107 		 * of the first request list), or (2) if the next
14108 		 * request is a larger block no. than our request.
14109 		 */
14110 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14111 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14112 			goto insert;
14113 		}
14114 		ap = ap->av_forw;
14115 	}
14116 
14117 	/*
14118 	 * Neither a second list nor a larger request, therefore
14119 	 * we go at the end of the first list (which is the same
14120 	 * as the end of the whole schebang).
14121 	 */
14122 insert:
14123 	bp->av_forw = ap->av_forw;
14124 	ap->av_forw = bp;
14125 
14126 	/*
14127 	 * If we inserted onto the tail end of the waitq, make sure the
14128 	 * tail pointer is updated.
14129 	 */
14130 	if (ap == un->un_waitq_tailp) {
14131 		un->un_waitq_tailp = bp;
14132 	}
14133 }
14134 
14135 
14136 /*
14137  *    Function: sd_start_cmds
14138  *
14139  * Description: Remove and transport cmds from the driver queues.
14140  *
14141  *   Arguments: un - pointer to the unit (soft state) struct for the target.
14142  *
14143  *		immed_bp - ptr to a buf to be transported immediately. Only
14144  *		the immed_bp is transported; bufs on the waitq are not
14145  *		processed and the un_retry_bp is not checked.  If immed_bp is
14146  *		NULL, then normal queue processing is performed.
14147  *
14148  *     Context: May be called from kernel thread context, interrupt context,
14149  *		or runout callback context. This function may not block or
14150  *		call routines that block.
14151  */
14152 
14153 static void
14154 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14155 {
14156 	struct	sd_xbuf	*xp;
14157 	struct	buf	*bp;
14158 	void	(*statp)(kstat_io_t *);
14159 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14160 	void	(*saved_statp)(kstat_io_t *);
14161 #endif
14162 	int	rval;
14163 	struct sd_fm_internal *sfip = NULL;
14164 
14165 	ASSERT(un != NULL);
14166 	ASSERT(mutex_owned(SD_MUTEX(un)));
14167 	ASSERT(un->un_ncmds_in_transport >= 0);
14168 	ASSERT(un->un_throttle >= 0);
14169 
14170 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14171 
14172 	do {
14173 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14174 		saved_statp = NULL;
14175 #endif
14176 
14177 		/*
14178 		 * If we are syncing or dumping, fail the command to
14179 		 * avoid recursively calling back into scsi_transport().
14180 		 * The dump I/O itself uses a separate code path so this
14181 		 * only prevents non-dump I/O from being sent while dumping.
14182 		 * File system sync takes place before dumping begins.
14183 		 * During panic, filesystem I/O is allowed provided
14184 		 * un_in_callback is <= 1.  This is to prevent recursion
14185 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14186 		 * sd_start_cmds and so on.  See panic.c for more information
14187 		 * about the states the system can be in during panic.
14188 		 */
14189 		if ((un->un_state == SD_STATE_DUMPING) ||
14190 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14191 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14192 			    "sd_start_cmds: panicking\n");
14193 			goto exit;
14194 		}
14195 
14196 		if ((bp = immed_bp) != NULL) {
14197 			/*
14198 			 * We have a bp that must be transported immediately.
14199 			 * It's OK to transport the immed_bp here without doing
14200 			 * the throttle limit check because the immed_bp is
14201 			 * always used in a retry/recovery case. This means
14202 			 * that we know we are not at the throttle limit by
14203 			 * virtue of the fact that to get here we must have
14204 			 * already gotten a command back via sdintr(). This also
14205 			 * relies on (1) the command on un_retry_bp preventing
14206 			 * further commands from the waitq from being issued;
14207 			 * and (2) the code in sd_retry_command checking the
14208 			 * throttle limit before issuing a delayed or immediate
14209 			 * retry. This holds even if the throttle limit is
14210 			 * currently ratcheted down from its maximum value.
14211 			 */
14212 			statp = kstat_runq_enter;
14213 			if (bp == un->un_retry_bp) {
14214 				ASSERT((un->un_retry_statp == NULL) ||
14215 				    (un->un_retry_statp == kstat_waitq_enter) ||
14216 				    (un->un_retry_statp ==
14217 				    kstat_runq_back_to_waitq));
14218 				/*
14219 				 * If the waitq kstat was incremented when
14220 				 * sd_set_retry_bp() queued this bp for a retry,
14221 				 * then we must set up statp so that the waitq
14222 				 * count will get decremented correctly below.
14223 				 * Also we must clear un->un_retry_statp to
14224 				 * ensure that we do not act on a stale value
14225 				 * in this field.
14226 				 */
14227 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14228 				    (un->un_retry_statp ==
14229 				    kstat_runq_back_to_waitq)) {
14230 					statp = kstat_waitq_to_runq;
14231 				}
14232 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14233 				saved_statp = un->un_retry_statp;
14234 #endif
14235 				un->un_retry_statp = NULL;
14236 
14237 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14238 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14239 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14240 				    un, un->un_retry_bp, un->un_throttle,
14241 				    un->un_ncmds_in_transport);
14242 			} else {
14243 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14244 				    "processing priority bp:0x%p\n", bp);
14245 			}
14246 
14247 		} else if ((bp = un->un_waitq_headp) != NULL) {
14248 			/*
14249 			 * A command on the waitq is ready to go, but do not
14250 			 * send it if:
14251 			 *
14252 			 * (1) the throttle limit has been reached, or
14253 			 * (2) a retry is pending, or
14254 			 * (3) a START_STOP_UNIT callback pending, or
14255 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14256 			 *	command is pending.
14257 			 *
14258 			 * For all of these conditions, IO processing will
14259 			 * restart after the condition is cleared.
14260 			 */
14261 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14262 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14263 				    "sd_start_cmds: exiting, "
14264 				    "throttle limit reached!\n");
14265 				goto exit;
14266 			}
14267 			if (un->un_retry_bp != NULL) {
14268 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14269 				    "sd_start_cmds: exiting, retry pending!\n");
14270 				goto exit;
14271 			}
14272 			if (un->un_startstop_timeid != NULL) {
14273 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14274 				    "sd_start_cmds: exiting, "
14275 				    "START_STOP pending!\n");
14276 				goto exit;
14277 			}
14278 			if (un->un_direct_priority_timeid != NULL) {
14279 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14280 				    "sd_start_cmds: exiting, "
14281 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14282 				goto exit;
14283 			}
14284 
14285 			/* Dequeue the command */
14286 			un->un_waitq_headp = bp->av_forw;
14287 			if (un->un_waitq_headp == NULL) {
14288 				un->un_waitq_tailp = NULL;
14289 			}
14290 			bp->av_forw = NULL;
14291 			statp = kstat_waitq_to_runq;
14292 			SD_TRACE(SD_LOG_IO_CORE, un,
14293 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14294 
14295 		} else {
14296 			/* No work to do so bail out now */
14297 			SD_TRACE(SD_LOG_IO_CORE, un,
14298 			    "sd_start_cmds: no more work, exiting!\n");
14299 			goto exit;
14300 		}
14301 
14302 		/*
14303 		 * Reset the state to normal. This is the mechanism by which
14304 		 * the state transitions from either SD_STATE_RWAIT or
14305 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14306 		 * If state is SD_STATE_PM_CHANGING then this command is
14307 		 * part of the device power control and the state must
14308 		 * not be put back to normal. Doing so would would
14309 		 * allow new commands to proceed when they shouldn't,
14310 		 * the device may be going off.
14311 		 */
14312 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14313 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14314 			New_state(un, SD_STATE_NORMAL);
14315 		}
14316 
14317 		xp = SD_GET_XBUF(bp);
14318 		ASSERT(xp != NULL);
14319 
14320 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14321 		/*
14322 		 * Allocate the scsi_pkt if we need one, or attach DMA
14323 		 * resources if we have a scsi_pkt that needs them. The
14324 		 * latter should only occur for commands that are being
14325 		 * retried.
14326 		 */
14327 		if ((xp->xb_pktp == NULL) ||
14328 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14329 #else
14330 		if (xp->xb_pktp == NULL) {
14331 #endif
14332 			/*
14333 			 * There is no scsi_pkt allocated for this buf. Call
14334 			 * the initpkt function to allocate & init one.
14335 			 *
14336 			 * The scsi_init_pkt runout callback functionality is
14337 			 * implemented as follows:
14338 			 *
14339 			 * 1) The initpkt function always calls
14340 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14341 			 *    callback routine.
14342 			 * 2) A successful packet allocation is initialized and
14343 			 *    the I/O is transported.
14344 			 * 3) The I/O associated with an allocation resource
14345 			 *    failure is left on its queue to be retried via
14346 			 *    runout or the next I/O.
14347 			 * 4) The I/O associated with a DMA error is removed
14348 			 *    from the queue and failed with EIO. Processing of
14349 			 *    the transport queues is also halted to be
14350 			 *    restarted via runout or the next I/O.
14351 			 * 5) The I/O associated with a CDB size or packet
14352 			 *    size error is removed from the queue and failed
14353 			 *    with EIO. Processing of the transport queues is
14354 			 *    continued.
14355 			 *
14356 			 * Note: there is no interface for canceling a runout
14357 			 * callback. To prevent the driver from detaching or
14358 			 * suspending while a runout is pending the driver
14359 			 * state is set to SD_STATE_RWAIT
14360 			 *
14361 			 * Note: using the scsi_init_pkt callback facility can
14362 			 * result in an I/O request persisting at the head of
14363 			 * the list which cannot be satisfied even after
14364 			 * multiple retries. In the future the driver may
14365 			 * implement some kind of maximum runout count before
14366 			 * failing an I/O.
14367 			 *
14368 			 * Note: the use of funcp below may seem superfluous,
14369 			 * but it helps warlock figure out the correct
14370 			 * initpkt function calls (see [s]sd.wlcmd).
14371 			 */
14372 			struct scsi_pkt	*pktp;
14373 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14374 
14375 			ASSERT(bp != un->un_rqs_bp);
14376 
14377 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14378 			switch ((*funcp)(bp, &pktp)) {
14379 			case  SD_PKT_ALLOC_SUCCESS:
14380 				xp->xb_pktp = pktp;
14381 				SD_TRACE(SD_LOG_IO_CORE, un,
14382 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14383 				    pktp);
14384 				goto got_pkt;
14385 
14386 			case SD_PKT_ALLOC_FAILURE:
14387 				/*
14388 				 * Temporary (hopefully) resource depletion.
14389 				 * Since retries and RQS commands always have a
14390 				 * scsi_pkt allocated, these cases should never
14391 				 * get here. So the only cases this needs to
14392 				 * handle is a bp from the waitq (which we put
14393 				 * back onto the waitq for sdrunout), or a bp
14394 				 * sent as an immed_bp (which we just fail).
14395 				 */
14396 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14397 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14398 
14399 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14400 
14401 				if (bp == immed_bp) {
14402 					/*
14403 					 * If SD_XB_DMA_FREED is clear, then
14404 					 * this is a failure to allocate a
14405 					 * scsi_pkt, and we must fail the
14406 					 * command.
14407 					 */
14408 					if ((xp->xb_pkt_flags &
14409 					    SD_XB_DMA_FREED) == 0) {
14410 						break;
14411 					}
14412 
14413 					/*
14414 					 * If this immediate command is NOT our
14415 					 * un_retry_bp, then we must fail it.
14416 					 */
14417 					if (bp != un->un_retry_bp) {
14418 						break;
14419 					}
14420 
14421 					/*
14422 					 * We get here if this cmd is our
14423 					 * un_retry_bp that was DMAFREED, but
14424 					 * scsi_init_pkt() failed to reallocate
14425 					 * DMA resources when we attempted to
14426 					 * retry it. This can happen when an
14427 					 * mpxio failover is in progress, but
14428 					 * we don't want to just fail the
14429 					 * command in this case.
14430 					 *
14431 					 * Use timeout(9F) to restart it after
14432 					 * a 100ms delay.  We don't want to
14433 					 * let sdrunout() restart it, because
14434 					 * sdrunout() is just supposed to start
14435 					 * commands that are sitting on the
14436 					 * wait queue.  The un_retry_bp stays
14437 					 * set until the command completes, but
14438 					 * sdrunout can be called many times
14439 					 * before that happens.  Since sdrunout
14440 					 * cannot tell if the un_retry_bp is
14441 					 * already in the transport, it could
14442 					 * end up calling scsi_transport() for
14443 					 * the un_retry_bp multiple times.
14444 					 *
14445 					 * Also: don't schedule the callback
14446 					 * if some other callback is already
14447 					 * pending.
14448 					 */
14449 					if (un->un_retry_statp == NULL) {
14450 						/*
14451 						 * restore the kstat pointer to
14452 						 * keep kstat counts coherent
14453 						 * when we do retry the command.
14454 						 */
14455 						un->un_retry_statp =
14456 						    saved_statp;
14457 					}
14458 
14459 					if ((un->un_startstop_timeid == NULL) &&
14460 					    (un->un_retry_timeid == NULL) &&
14461 					    (un->un_direct_priority_timeid ==
14462 					    NULL)) {
14463 
14464 						un->un_retry_timeid =
14465 						    timeout(
14466 						    sd_start_retry_command,
14467 						    un, SD_RESTART_TIMEOUT);
14468 					}
14469 					goto exit;
14470 				}
14471 
14472 #else
14473 				if (bp == immed_bp) {
14474 					break;	/* Just fail the command */
14475 				}
14476 #endif
14477 
14478 				/* Add the buf back to the head of the waitq */
14479 				bp->av_forw = un->un_waitq_headp;
14480 				un->un_waitq_headp = bp;
14481 				if (un->un_waitq_tailp == NULL) {
14482 					un->un_waitq_tailp = bp;
14483 				}
14484 				goto exit;
14485 
14486 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14487 				/*
14488 				 * HBA DMA resource failure. Fail the command
14489 				 * and continue processing of the queues.
14490 				 */
14491 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14492 				    "sd_start_cmds: "
14493 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14494 				break;
14495 
14496 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14497 				/*
14498 				 * Note:x86: Partial DMA mapping not supported
14499 				 * for USCSI commands, and all the needed DMA
14500 				 * resources were not allocated.
14501 				 */
14502 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14503 				    "sd_start_cmds: "
14504 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14505 				break;
14506 
14507 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14508 				/*
14509 				 * Note:x86: Request cannot fit into CDB based
14510 				 * on lba and len.
14511 				 */
14512 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14513 				    "sd_start_cmds: "
14514 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14515 				break;
14516 
14517 			default:
14518 				/* Should NEVER get here! */
14519 				panic("scsi_initpkt error");
14520 				/*NOTREACHED*/
14521 			}
14522 
14523 			/*
14524 			 * Fatal error in allocating a scsi_pkt for this buf.
14525 			 * Update kstats & return the buf with an error code.
14526 			 * We must use sd_return_failed_command_no_restart() to
14527 			 * avoid a recursive call back into sd_start_cmds().
14528 			 * However this also means that we must keep processing
14529 			 * the waitq here in order to avoid stalling.
14530 			 */
14531 			if (statp == kstat_waitq_to_runq) {
14532 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14533 			}
14534 			sd_return_failed_command_no_restart(un, bp, EIO);
14535 			if (bp == immed_bp) {
14536 				/* immed_bp is gone by now, so clear this */
14537 				immed_bp = NULL;
14538 			}
14539 			continue;
14540 		}
14541 got_pkt:
14542 		if (bp == immed_bp) {
14543 			/* goto the head of the class.... */
14544 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14545 		}
14546 
14547 		un->un_ncmds_in_transport++;
14548 		SD_UPDATE_KSTATS(un, statp, bp);
14549 
14550 		/*
14551 		 * Call scsi_transport() to send the command to the target.
14552 		 * According to SCSA architecture, we must drop the mutex here
14553 		 * before calling scsi_transport() in order to avoid deadlock.
14554 		 * Note that the scsi_pkt's completion routine can be executed
14555 		 * (from interrupt context) even before the call to
14556 		 * scsi_transport() returns.
14557 		 */
14558 		SD_TRACE(SD_LOG_IO_CORE, un,
14559 		    "sd_start_cmds: calling scsi_transport()\n");
14560 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14561 
14562 		mutex_exit(SD_MUTEX(un));
14563 		rval = scsi_transport(xp->xb_pktp);
14564 		mutex_enter(SD_MUTEX(un));
14565 
14566 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14567 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14568 
14569 		switch (rval) {
14570 		case TRAN_ACCEPT:
14571 			/* Clear this with every pkt accepted by the HBA */
14572 			un->un_tran_fatal_count = 0;
14573 			break;	/* Success; try the next cmd (if any) */
14574 
14575 		case TRAN_BUSY:
14576 			un->un_ncmds_in_transport--;
14577 			ASSERT(un->un_ncmds_in_transport >= 0);
14578 
14579 			/*
14580 			 * Don't retry request sense, the sense data
14581 			 * is lost when another request is sent.
14582 			 * Free up the rqs buf and retry
14583 			 * the original failed cmd.  Update kstat.
14584 			 */
14585 			if (bp == un->un_rqs_bp) {
14586 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14587 				bp = sd_mark_rqs_idle(un, xp);
14588 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14589 				    NULL, NULL, EIO, un->un_busy_timeout / 500,
14590 				    kstat_waitq_enter);
14591 				goto exit;
14592 			}
14593 
14594 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14595 			/*
14596 			 * Free the DMA resources for the  scsi_pkt. This will
14597 			 * allow mpxio to select another path the next time
14598 			 * we call scsi_transport() with this scsi_pkt.
14599 			 * See sdintr() for the rationalization behind this.
14600 			 */
14601 			if ((un->un_f_is_fibre == TRUE) &&
14602 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14603 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14604 				scsi_dmafree(xp->xb_pktp);
14605 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14606 			}
14607 #endif
14608 
14609 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14610 				/*
14611 				 * Commands that are SD_PATH_DIRECT_PRIORITY
14612 				 * are for error recovery situations. These do
14613 				 * not use the normal command waitq, so if they
14614 				 * get a TRAN_BUSY we cannot put them back onto
14615 				 * the waitq for later retry. One possible
14616 				 * problem is that there could already be some
14617 				 * other command on un_retry_bp that is waiting
14618 				 * for this one to complete, so we would be
14619 				 * deadlocked if we put this command back onto
14620 				 * the waitq for later retry (since un_retry_bp
14621 				 * must complete before the driver gets back to
14622 				 * commands on the waitq).
14623 				 *
14624 				 * To avoid deadlock we must schedule a callback
14625 				 * that will restart this command after a set
14626 				 * interval.  This should keep retrying for as
14627 				 * long as the underlying transport keeps
14628 				 * returning TRAN_BUSY (just like for other
14629 				 * commands).  Use the same timeout interval as
14630 				 * for the ordinary TRAN_BUSY retry.
14631 				 */
14632 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14633 				    "sd_start_cmds: scsi_transport() returned "
14634 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14635 
14636 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14637 				un->un_direct_priority_timeid =
14638 				    timeout(sd_start_direct_priority_command,
14639 				    bp, un->un_busy_timeout / 500);
14640 
14641 				goto exit;
14642 			}
14643 
14644 			/*
14645 			 * For TRAN_BUSY, we want to reduce the throttle value,
14646 			 * unless we are retrying a command.
14647 			 */
14648 			if (bp != un->un_retry_bp) {
14649 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14650 			}
14651 
14652 			/*
14653 			 * Set up the bp to be tried again 10 ms later.
14654 			 * Note:x86: Is there a timeout value in the sd_lun
14655 			 * for this condition?
14656 			 */
14657 			sd_set_retry_bp(un, bp, un->un_busy_timeout / 500,
14658 			    kstat_runq_back_to_waitq);
14659 			goto exit;
14660 
14661 		case TRAN_FATAL_ERROR:
14662 			un->un_tran_fatal_count++;
14663 			/* FALLTHRU */
14664 
14665 		case TRAN_BADPKT:
14666 		default:
14667 			un->un_ncmds_in_transport--;
14668 			ASSERT(un->un_ncmds_in_transport >= 0);
14669 
14670 			/*
14671 			 * If this is our REQUEST SENSE command with a
14672 			 * transport error, we must get back the pointers
14673 			 * to the original buf, and mark the REQUEST
14674 			 * SENSE command as "available".
14675 			 */
14676 			if (bp == un->un_rqs_bp) {
14677 				bp = sd_mark_rqs_idle(un, xp);
14678 				xp = SD_GET_XBUF(bp);
14679 			} else {
14680 				/*
14681 				 * Legacy behavior: do not update transport
14682 				 * error count for request sense commands.
14683 				 */
14684 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14685 			}
14686 
14687 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14688 			sd_print_transport_rejected_message(un, xp, rval);
14689 
14690 			/*
14691 			 * This command will be terminated by SD driver due
14692 			 * to a fatal transport error. We should post
14693 			 * ereport.io.scsi.cmd.disk.tran with driver-assessment
14694 			 * of "fail" for any command to indicate this
14695 			 * situation.
14696 			 */
14697 			if (xp->xb_ena > 0) {
14698 				ASSERT(un->un_fm_private != NULL);
14699 				sfip = un->un_fm_private;
14700 				sfip->fm_ssc.ssc_flags |= SSC_FLAGS_TRAN_ABORT;
14701 				sd_ssc_extract_info(&sfip->fm_ssc, un,
14702 				    xp->xb_pktp, bp, xp);
14703 				sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
14704 			}
14705 
14706 			/*
14707 			 * We must use sd_return_failed_command_no_restart() to
14708 			 * avoid a recursive call back into sd_start_cmds().
14709 			 * However this also means that we must keep processing
14710 			 * the waitq here in order to avoid stalling.
14711 			 */
14712 			sd_return_failed_command_no_restart(un, bp, EIO);
14713 
14714 			/*
14715 			 * Notify any threads waiting in sd_ddi_suspend() that
14716 			 * a command completion has occurred.
14717 			 */
14718 			if (un->un_state == SD_STATE_SUSPENDED) {
14719 				cv_broadcast(&un->un_disk_busy_cv);
14720 			}
14721 
14722 			if (bp == immed_bp) {
14723 				/* immed_bp is gone by now, so clear this */
14724 				immed_bp = NULL;
14725 			}
14726 			break;
14727 		}
14728 
14729 	} while (immed_bp == NULL);
14730 
14731 exit:
14732 	ASSERT(mutex_owned(SD_MUTEX(un)));
14733 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14734 }
14735 
14736 
14737 /*
14738  *    Function: sd_return_command
14739  *
14740  * Description: Returns a command to its originator (with or without an
14741  *		error).  Also starts commands waiting to be transported
14742  *		to the target.
14743  *
14744  *     Context: May be called from interrupt, kernel, or timeout context
14745  */
14746 
14747 static void
14748 sd_return_command(struct sd_lun *un, struct buf *bp)
14749 {
14750 	struct sd_xbuf *xp;
14751 	struct scsi_pkt *pktp;
14752 	struct sd_fm_internal *sfip;
14753 
14754 	ASSERT(bp != NULL);
14755 	ASSERT(un != NULL);
14756 	ASSERT(mutex_owned(SD_MUTEX(un)));
14757 	ASSERT(bp != un->un_rqs_bp);
14758 	xp = SD_GET_XBUF(bp);
14759 	ASSERT(xp != NULL);
14760 
14761 	pktp = SD_GET_PKTP(bp);
14762 	sfip = (struct sd_fm_internal *)un->un_fm_private;
14763 	ASSERT(sfip != NULL);
14764 
14765 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
14766 
14767 	/*
14768 	 * Note: check for the "sdrestart failed" case.
14769 	 */
14770 	if ((un->un_partial_dma_supported == 1) &&
14771 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
14772 	    (geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
14773 	    (xp->xb_pktp->pkt_resid == 0)) {
14774 
14775 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
14776 			/*
14777 			 * Successfully set up next portion of cmd
14778 			 * transfer, try sending it
14779 			 */
14780 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14781 			    NULL, NULL, 0, (clock_t)0, NULL);
14782 			sd_start_cmds(un, NULL);
14783 			return;	/* Note:x86: need a return here? */
14784 		}
14785 	}
14786 
14787 	/*
14788 	 * If this is the failfast bp, clear it from un_failfast_bp. This
14789 	 * can happen if upon being re-tried the failfast bp either
14790 	 * succeeded or encountered another error (possibly even a different
14791 	 * error than the one that precipitated the failfast state, but in
14792 	 * that case it would have had to exhaust retries as well). Regardless,
14793 	 * this should not occur whenever the instance is in the active
14794 	 * failfast state.
14795 	 */
14796 	if (bp == un->un_failfast_bp) {
14797 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14798 		un->un_failfast_bp = NULL;
14799 	}
14800 
14801 	/*
14802 	 * Clear the failfast state upon successful completion of ANY cmd.
14803 	 */
14804 	if (bp->b_error == 0) {
14805 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
14806 		/*
14807 		 * If this is a successful command, but used to be retried,
14808 		 * we will take it as a recovered command and post an
14809 		 * ereport with driver-assessment of "recovered".
14810 		 */
14811 		if (xp->xb_ena > 0) {
14812 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
14813 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RECOVERY);
14814 		}
14815 	} else {
14816 		/*
14817 		 * If this is a failed non-USCSI command we will post an
14818 		 * ereport with driver-assessment set accordingly("fail" or
14819 		 * "fatal").
14820 		 */
14821 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
14822 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
14823 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
14824 		}
14825 	}
14826 
14827 	/*
14828 	 * This is used if the command was retried one or more times. Show that
14829 	 * we are done with it, and allow processing of the waitq to resume.
14830 	 */
14831 	if (bp == un->un_retry_bp) {
14832 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14833 		    "sd_return_command: un:0x%p: "
14834 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14835 		un->un_retry_bp = NULL;
14836 		un->un_retry_statp = NULL;
14837 	}
14838 
14839 	SD_UPDATE_RDWR_STATS(un, bp);
14840 	SD_UPDATE_PARTITION_STATS(un, bp);
14841 
14842 	switch (un->un_state) {
14843 	case SD_STATE_SUSPENDED:
14844 		/*
14845 		 * Notify any threads waiting in sd_ddi_suspend() that
14846 		 * a command completion has occurred.
14847 		 */
14848 		cv_broadcast(&un->un_disk_busy_cv);
14849 		break;
14850 	default:
14851 		sd_start_cmds(un, NULL);
14852 		break;
14853 	}
14854 
14855 	/* Return this command up the iodone chain to its originator. */
14856 	mutex_exit(SD_MUTEX(un));
14857 
14858 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14859 	xp->xb_pktp = NULL;
14860 
14861 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14862 
14863 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14864 	mutex_enter(SD_MUTEX(un));
14865 
14866 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
14867 }
14868 
14869 
14870 /*
14871  *    Function: sd_return_failed_command
14872  *
14873  * Description: Command completion when an error occurred.
14874  *
14875  *     Context: May be called from interrupt context
14876  */
14877 
14878 static void
14879 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
14880 {
14881 	ASSERT(bp != NULL);
14882 	ASSERT(un != NULL);
14883 	ASSERT(mutex_owned(SD_MUTEX(un)));
14884 
14885 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14886 	    "sd_return_failed_command: entry\n");
14887 
14888 	/*
14889 	 * b_resid could already be nonzero due to a partial data
14890 	 * transfer, so do not change it here.
14891 	 */
14892 	SD_BIOERROR(bp, errcode);
14893 
14894 	sd_return_command(un, bp);
14895 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14896 	    "sd_return_failed_command: exit\n");
14897 }
14898 
14899 
14900 /*
14901  *    Function: sd_return_failed_command_no_restart
14902  *
14903  * Description: Same as sd_return_failed_command, but ensures that no
14904  *		call back into sd_start_cmds will be issued.
14905  *
14906  *     Context: May be called from interrupt context
14907  */
14908 
14909 static void
14910 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
14911 	int errcode)
14912 {
14913 	struct sd_xbuf *xp;
14914 
14915 	ASSERT(bp != NULL);
14916 	ASSERT(un != NULL);
14917 	ASSERT(mutex_owned(SD_MUTEX(un)));
14918 	xp = SD_GET_XBUF(bp);
14919 	ASSERT(xp != NULL);
14920 	ASSERT(errcode != 0);
14921 
14922 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14923 	    "sd_return_failed_command_no_restart: entry\n");
14924 
14925 	/*
14926 	 * b_resid could already be nonzero due to a partial data
14927 	 * transfer, so do not change it here.
14928 	 */
14929 	SD_BIOERROR(bp, errcode);
14930 
14931 	/*
14932 	 * If this is the failfast bp, clear it. This can happen if the
14933 	 * failfast bp encounterd a fatal error when we attempted to
14934 	 * re-try it (such as a scsi_transport(9F) failure).  However
14935 	 * we should NOT be in an active failfast state if the failfast
14936 	 * bp is not NULL.
14937 	 */
14938 	if (bp == un->un_failfast_bp) {
14939 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14940 		un->un_failfast_bp = NULL;
14941 	}
14942 
14943 	if (bp == un->un_retry_bp) {
14944 		/*
14945 		 * This command was retried one or more times. Show that we are
14946 		 * done with it, and allow processing of the waitq to resume.
14947 		 */
14948 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14949 		    "sd_return_failed_command_no_restart: "
14950 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14951 		un->un_retry_bp = NULL;
14952 		un->un_retry_statp = NULL;
14953 	}
14954 
14955 	SD_UPDATE_RDWR_STATS(un, bp);
14956 	SD_UPDATE_PARTITION_STATS(un, bp);
14957 
14958 	mutex_exit(SD_MUTEX(un));
14959 
14960 	if (xp->xb_pktp != NULL) {
14961 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14962 		xp->xb_pktp = NULL;
14963 	}
14964 
14965 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14966 
14967 	mutex_enter(SD_MUTEX(un));
14968 
14969 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14970 	    "sd_return_failed_command_no_restart: exit\n");
14971 }
14972 
14973 
14974 /*
14975  *    Function: sd_retry_command
14976  *
14977  * Description: queue up a command for retry, or (optionally) fail it
14978  *		if retry counts are exhausted.
14979  *
14980  *   Arguments: un - Pointer to the sd_lun struct for the target.
14981  *
14982  *		bp - Pointer to the buf for the command to be retried.
14983  *
14984  *		retry_check_flag - Flag to see which (if any) of the retry
14985  *		   counts should be decremented/checked. If the indicated
14986  *		   retry count is exhausted, then the command will not be
14987  *		   retried; it will be failed instead. This should use a
14988  *		   value equal to one of the following:
14989  *
14990  *			SD_RETRIES_NOCHECK
14991  *			SD_RESD_RETRIES_STANDARD
14992  *			SD_RETRIES_VICTIM
14993  *
14994  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
14995  *		   if the check should be made to see of FLAG_ISOLATE is set
14996  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
14997  *		   not retried, it is simply failed.
14998  *
14999  *		user_funcp - Ptr to function to call before dispatching the
15000  *		   command. May be NULL if no action needs to be performed.
15001  *		   (Primarily intended for printing messages.)
15002  *
15003  *		user_arg - Optional argument to be passed along to
15004  *		   the user_funcp call.
15005  *
15006  *		failure_code - errno return code to set in the bp if the
15007  *		   command is going to be failed.
15008  *
15009  *		retry_delay - Retry delay interval in (clock_t) units. May
15010  *		   be zero which indicates that the retry should be retried
15011  *		   immediately (ie, without an intervening delay).
15012  *
15013  *		statp - Ptr to kstat function to be updated if the command
15014  *		   is queued for a delayed retry. May be NULL if no kstat
15015  *		   update is desired.
15016  *
15017  *     Context: May be called from interrupt context.
15018  */
15019 
15020 static void
15021 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
15022 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
15023 	code), void *user_arg, int failure_code,  clock_t retry_delay,
15024 	void (*statp)(kstat_io_t *))
15025 {
15026 	struct sd_xbuf	*xp;
15027 	struct scsi_pkt	*pktp;
15028 	struct sd_fm_internal *sfip;
15029 
15030 	ASSERT(un != NULL);
15031 	ASSERT(mutex_owned(SD_MUTEX(un)));
15032 	ASSERT(bp != NULL);
15033 	xp = SD_GET_XBUF(bp);
15034 	ASSERT(xp != NULL);
15035 	pktp = SD_GET_PKTP(bp);
15036 	ASSERT(pktp != NULL);
15037 
15038 	sfip = (struct sd_fm_internal *)un->un_fm_private;
15039 	ASSERT(sfip != NULL);
15040 
15041 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15042 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
15043 
15044 	/*
15045 	 * If we are syncing or dumping, fail the command to avoid
15046 	 * recursively calling back into scsi_transport().
15047 	 */
15048 	if (ddi_in_panic()) {
15049 		goto fail_command_no_log;
15050 	}
15051 
15052 	/*
15053 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15054 	 * log an error and fail the command.
15055 	 */
15056 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15057 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15058 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15059 		sd_dump_memory(un, SD_LOG_IO, "CDB",
15060 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15061 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15062 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15063 		goto fail_command;
15064 	}
15065 
15066 	/*
15067 	 * If we are suspended, then put the command onto head of the
15068 	 * wait queue since we don't want to start more commands, and
15069 	 * clear the un_retry_bp. Next time when we are resumed, will
15070 	 * handle the command in the wait queue.
15071 	 */
15072 	switch (un->un_state) {
15073 	case SD_STATE_SUSPENDED:
15074 	case SD_STATE_DUMPING:
15075 		bp->av_forw = un->un_waitq_headp;
15076 		un->un_waitq_headp = bp;
15077 		if (un->un_waitq_tailp == NULL) {
15078 			un->un_waitq_tailp = bp;
15079 		}
15080 		if (bp == un->un_retry_bp) {
15081 			un->un_retry_bp = NULL;
15082 			un->un_retry_statp = NULL;
15083 		}
15084 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15085 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15086 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15087 		return;
15088 	default:
15089 		break;
15090 	}
15091 
15092 	/*
15093 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15094 	 * is set; if it is then we do not want to retry the command.
15095 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15096 	 */
15097 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15098 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15099 			goto fail_command;
15100 		}
15101 	}
15102 
15103 
15104 	/*
15105 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15106 	 * command timeout or a selection timeout has occurred. This means
15107 	 * that we were unable to establish an kind of communication with
15108 	 * the target, and subsequent retries and/or commands are likely
15109 	 * to encounter similar results and take a long time to complete.
15110 	 *
15111 	 * If this is a failfast error condition, we need to update the
15112 	 * failfast state, even if this bp does not have B_FAILFAST set.
15113 	 */
15114 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15115 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15116 			ASSERT(un->un_failfast_bp == NULL);
15117 			/*
15118 			 * If we are already in the active failfast state, and
15119 			 * another failfast error condition has been detected,
15120 			 * then fail this command if it has B_FAILFAST set.
15121 			 * If B_FAILFAST is clear, then maintain the legacy
15122 			 * behavior of retrying heroically, even tho this will
15123 			 * take a lot more time to fail the command.
15124 			 */
15125 			if (bp->b_flags & B_FAILFAST) {
15126 				goto fail_command;
15127 			}
15128 		} else {
15129 			/*
15130 			 * We're not in the active failfast state, but we
15131 			 * have a failfast error condition, so we must begin
15132 			 * transition to the next state. We do this regardless
15133 			 * of whether or not this bp has B_FAILFAST set.
15134 			 */
15135 			if (un->un_failfast_bp == NULL) {
15136 				/*
15137 				 * This is the first bp to meet a failfast
15138 				 * condition so save it on un_failfast_bp &
15139 				 * do normal retry processing. Do not enter
15140 				 * active failfast state yet. This marks
15141 				 * entry into the "failfast pending" state.
15142 				 */
15143 				un->un_failfast_bp = bp;
15144 
15145 			} else if (un->un_failfast_bp == bp) {
15146 				/*
15147 				 * This is the second time *this* bp has
15148 				 * encountered a failfast error condition,
15149 				 * so enter active failfast state & flush
15150 				 * queues as appropriate.
15151 				 */
15152 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15153 				un->un_failfast_bp = NULL;
15154 				sd_failfast_flushq(un);
15155 
15156 				/*
15157 				 * Fail this bp now if B_FAILFAST set;
15158 				 * otherwise continue with retries. (It would
15159 				 * be pretty ironic if this bp succeeded on a
15160 				 * subsequent retry after we just flushed all
15161 				 * the queues).
15162 				 */
15163 				if (bp->b_flags & B_FAILFAST) {
15164 					goto fail_command;
15165 				}
15166 
15167 #if !defined(lint) && !defined(__lint)
15168 			} else {
15169 				/*
15170 				 * If neither of the preceeding conditionals
15171 				 * was true, it means that there is some
15172 				 * *other* bp that has met an inital failfast
15173 				 * condition and is currently either being
15174 				 * retried or is waiting to be retried. In
15175 				 * that case we should perform normal retry
15176 				 * processing on *this* bp, since there is a
15177 				 * chance that the current failfast condition
15178 				 * is transient and recoverable. If that does
15179 				 * not turn out to be the case, then retries
15180 				 * will be cleared when the wait queue is
15181 				 * flushed anyway.
15182 				 */
15183 #endif
15184 			}
15185 		}
15186 	} else {
15187 		/*
15188 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15189 		 * likely were able to at least establish some level of
15190 		 * communication with the target and subsequent commands
15191 		 * and/or retries are likely to get through to the target,
15192 		 * In this case we want to be aggressive about clearing
15193 		 * the failfast state. Note that this does not affect
15194 		 * the "failfast pending" condition.
15195 		 */
15196 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15197 	}
15198 
15199 
15200 	/*
15201 	 * Check the specified retry count to see if we can still do
15202 	 * any retries with this pkt before we should fail it.
15203 	 */
15204 	switch (retry_check_flag & SD_RETRIES_MASK) {
15205 	case SD_RETRIES_VICTIM:
15206 		/*
15207 		 * Check the victim retry count. If exhausted, then fall
15208 		 * thru & check against the standard retry count.
15209 		 */
15210 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15211 			/* Increment count & proceed with the retry */
15212 			xp->xb_victim_retry_count++;
15213 			break;
15214 		}
15215 		/* Victim retries exhausted, fall back to std. retries... */
15216 		/* FALLTHRU */
15217 
15218 	case SD_RETRIES_STANDARD:
15219 		if (xp->xb_retry_count >= un->un_retry_count) {
15220 			/* Retries exhausted, fail the command */
15221 			SD_TRACE(SD_LOG_IO_CORE, un,
15222 			    "sd_retry_command: retries exhausted!\n");
15223 			/*
15224 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15225 			 * commands with nonzero pkt_resid.
15226 			 */
15227 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15228 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15229 			    (pktp->pkt_resid != 0)) {
15230 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15231 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15232 					SD_UPDATE_B_RESID(bp, pktp);
15233 				}
15234 			}
15235 			goto fail_command;
15236 		}
15237 		xp->xb_retry_count++;
15238 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15239 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15240 		break;
15241 
15242 	case SD_RETRIES_UA:
15243 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15244 			/* Retries exhausted, fail the command */
15245 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15246 			    "Unit Attention retries exhausted. "
15247 			    "Check the target.\n");
15248 			goto fail_command;
15249 		}
15250 		xp->xb_ua_retry_count++;
15251 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15252 		    "sd_retry_command: retry count:%d\n",
15253 		    xp->xb_ua_retry_count);
15254 		break;
15255 
15256 	case SD_RETRIES_BUSY:
15257 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15258 			/* Retries exhausted, fail the command */
15259 			SD_TRACE(SD_LOG_IO_CORE, un,
15260 			    "sd_retry_command: retries exhausted!\n");
15261 			goto fail_command;
15262 		}
15263 		xp->xb_retry_count++;
15264 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15265 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15266 		break;
15267 
15268 	case SD_RETRIES_NOCHECK:
15269 	default:
15270 		/* No retry count to check. Just proceed with the retry */
15271 		break;
15272 	}
15273 
15274 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15275 
15276 	/*
15277 	 * If this is a non-USCSI command being retried
15278 	 * during execution last time, we should post an ereport with
15279 	 * driver-assessment of the value "retry".
15280 	 * For partial DMA, request sense and STATUS_QFULL, there are no
15281 	 * hardware errors, we bypass ereport posting.
15282 	 */
15283 	if (failure_code != 0) {
15284 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
15285 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15286 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RETRY);
15287 		}
15288 	}
15289 
15290 	/*
15291 	 * If we were given a zero timeout, we must attempt to retry the
15292 	 * command immediately (ie, without a delay).
15293 	 */
15294 	if (retry_delay == 0) {
15295 		/*
15296 		 * Check some limiting conditions to see if we can actually
15297 		 * do the immediate retry.  If we cannot, then we must
15298 		 * fall back to queueing up a delayed retry.
15299 		 */
15300 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15301 			/*
15302 			 * We are at the throttle limit for the target,
15303 			 * fall back to delayed retry.
15304 			 */
15305 			retry_delay = un->un_busy_timeout;
15306 			statp = kstat_waitq_enter;
15307 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15308 			    "sd_retry_command: immed. retry hit "
15309 			    "throttle!\n");
15310 		} else {
15311 			/*
15312 			 * We're clear to proceed with the immediate retry.
15313 			 * First call the user-provided function (if any)
15314 			 */
15315 			if (user_funcp != NULL) {
15316 				(*user_funcp)(un, bp, user_arg,
15317 				    SD_IMMEDIATE_RETRY_ISSUED);
15318 #ifdef __lock_lint
15319 				sd_print_incomplete_msg(un, bp, user_arg,
15320 				    SD_IMMEDIATE_RETRY_ISSUED);
15321 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15322 				    SD_IMMEDIATE_RETRY_ISSUED);
15323 				sd_print_sense_failed_msg(un, bp, user_arg,
15324 				    SD_IMMEDIATE_RETRY_ISSUED);
15325 #endif
15326 			}
15327 
15328 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15329 			    "sd_retry_command: issuing immediate retry\n");
15330 
15331 			/*
15332 			 * Call sd_start_cmds() to transport the command to
15333 			 * the target.
15334 			 */
15335 			sd_start_cmds(un, bp);
15336 
15337 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15338 			    "sd_retry_command exit\n");
15339 			return;
15340 		}
15341 	}
15342 
15343 	/*
15344 	 * Set up to retry the command after a delay.
15345 	 * First call the user-provided function (if any)
15346 	 */
15347 	if (user_funcp != NULL) {
15348 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15349 	}
15350 
15351 	sd_set_retry_bp(un, bp, retry_delay, statp);
15352 
15353 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15354 	return;
15355 
15356 fail_command:
15357 
15358 	if (user_funcp != NULL) {
15359 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15360 	}
15361 
15362 fail_command_no_log:
15363 
15364 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15365 	    "sd_retry_command: returning failed command\n");
15366 
15367 	sd_return_failed_command(un, bp, failure_code);
15368 
15369 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15370 }
15371 
15372 
15373 /*
15374  *    Function: sd_set_retry_bp
15375  *
15376  * Description: Set up the given bp for retry.
15377  *
15378  *   Arguments: un - ptr to associated softstate
15379  *		bp - ptr to buf(9S) for the command
15380  *		retry_delay - time interval before issuing retry (may be 0)
15381  *		statp - optional pointer to kstat function
15382  *
15383  *     Context: May be called under interrupt context
15384  */
15385 
15386 static void
15387 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15388 	void (*statp)(kstat_io_t *))
15389 {
15390 	ASSERT(un != NULL);
15391 	ASSERT(mutex_owned(SD_MUTEX(un)));
15392 	ASSERT(bp != NULL);
15393 
15394 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15395 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15396 
15397 	/*
15398 	 * Indicate that the command is being retried. This will not allow any
15399 	 * other commands on the wait queue to be transported to the target
15400 	 * until this command has been completed (success or failure). The
15401 	 * "retry command" is not transported to the target until the given
15402 	 * time delay expires, unless the user specified a 0 retry_delay.
15403 	 *
15404 	 * Note: the timeout(9F) callback routine is what actually calls
15405 	 * sd_start_cmds() to transport the command, with the exception of a
15406 	 * zero retry_delay. The only current implementor of a zero retry delay
15407 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15408 	 */
15409 	if (un->un_retry_bp == NULL) {
15410 		ASSERT(un->un_retry_statp == NULL);
15411 		un->un_retry_bp = bp;
15412 
15413 		/*
15414 		 * If the user has not specified a delay the command should
15415 		 * be queued and no timeout should be scheduled.
15416 		 */
15417 		if (retry_delay == 0) {
15418 			/*
15419 			 * Save the kstat pointer that will be used in the
15420 			 * call to SD_UPDATE_KSTATS() below, so that
15421 			 * sd_start_cmds() can correctly decrement the waitq
15422 			 * count when it is time to transport this command.
15423 			 */
15424 			un->un_retry_statp = statp;
15425 			goto done;
15426 		}
15427 	}
15428 
15429 	if (un->un_retry_bp == bp) {
15430 		/*
15431 		 * Save the kstat pointer that will be used in the call to
15432 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15433 		 * correctly decrement the waitq count when it is time to
15434 		 * transport this command.
15435 		 */
15436 		un->un_retry_statp = statp;
15437 
15438 		/*
15439 		 * Schedule a timeout if:
15440 		 *   1) The user has specified a delay.
15441 		 *   2) There is not a START_STOP_UNIT callback pending.
15442 		 *
15443 		 * If no delay has been specified, then it is up to the caller
15444 		 * to ensure that IO processing continues without stalling.
15445 		 * Effectively, this means that the caller will issue the
15446 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15447 		 * callback does this after the START STOP UNIT command has
15448 		 * completed. In either of these cases we should not schedule
15449 		 * a timeout callback here.  Also don't schedule the timeout if
15450 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15451 		 */
15452 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15453 		    (un->un_direct_priority_timeid == NULL)) {
15454 			un->un_retry_timeid =
15455 			    timeout(sd_start_retry_command, un, retry_delay);
15456 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15457 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15458 			    " bp:0x%p un_retry_timeid:0x%p\n",
15459 			    un, bp, un->un_retry_timeid);
15460 		}
15461 	} else {
15462 		/*
15463 		 * We only get in here if there is already another command
15464 		 * waiting to be retried.  In this case, we just put the
15465 		 * given command onto the wait queue, so it can be transported
15466 		 * after the current retry command has completed.
15467 		 *
15468 		 * Also we have to make sure that if the command at the head
15469 		 * of the wait queue is the un_failfast_bp, that we do not
15470 		 * put ahead of it any other commands that are to be retried.
15471 		 */
15472 		if ((un->un_failfast_bp != NULL) &&
15473 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15474 			/*
15475 			 * Enqueue this command AFTER the first command on
15476 			 * the wait queue (which is also un_failfast_bp).
15477 			 */
15478 			bp->av_forw = un->un_waitq_headp->av_forw;
15479 			un->un_waitq_headp->av_forw = bp;
15480 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15481 				un->un_waitq_tailp = bp;
15482 			}
15483 		} else {
15484 			/* Enqueue this command at the head of the waitq. */
15485 			bp->av_forw = un->un_waitq_headp;
15486 			un->un_waitq_headp = bp;
15487 			if (un->un_waitq_tailp == NULL) {
15488 				un->un_waitq_tailp = bp;
15489 			}
15490 		}
15491 
15492 		if (statp == NULL) {
15493 			statp = kstat_waitq_enter;
15494 		}
15495 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15496 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15497 	}
15498 
15499 done:
15500 	if (statp != NULL) {
15501 		SD_UPDATE_KSTATS(un, statp, bp);
15502 	}
15503 
15504 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15505 	    "sd_set_retry_bp: exit un:0x%p\n", un);
15506 }
15507 
15508 
15509 /*
15510  *    Function: sd_start_retry_command
15511  *
15512  * Description: Start the command that has been waiting on the target's
15513  *		retry queue.  Called from timeout(9F) context after the
15514  *		retry delay interval has expired.
15515  *
15516  *   Arguments: arg - pointer to associated softstate for the device.
15517  *
15518  *     Context: timeout(9F) thread context.  May not sleep.
15519  */
15520 
15521 static void
15522 sd_start_retry_command(void *arg)
15523 {
15524 	struct sd_lun *un = arg;
15525 
15526 	ASSERT(un != NULL);
15527 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15528 
15529 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15530 	    "sd_start_retry_command: entry\n");
15531 
15532 	mutex_enter(SD_MUTEX(un));
15533 
15534 	un->un_retry_timeid = NULL;
15535 
15536 	if (un->un_retry_bp != NULL) {
15537 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15538 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15539 		    un, un->un_retry_bp);
15540 		sd_start_cmds(un, un->un_retry_bp);
15541 	}
15542 
15543 	mutex_exit(SD_MUTEX(un));
15544 
15545 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15546 	    "sd_start_retry_command: exit\n");
15547 }
15548 
15549 
15550 /*
15551  *    Function: sd_start_direct_priority_command
15552  *
15553  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15554  *		received TRAN_BUSY when we called scsi_transport() to send it
15555  *		to the underlying HBA. This function is called from timeout(9F)
15556  *		context after the delay interval has expired.
15557  *
15558  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15559  *
15560  *     Context: timeout(9F) thread context.  May not sleep.
15561  */
15562 
15563 static void
15564 sd_start_direct_priority_command(void *arg)
15565 {
15566 	struct buf	*priority_bp = arg;
15567 	struct sd_lun	*un;
15568 
15569 	ASSERT(priority_bp != NULL);
15570 	un = SD_GET_UN(priority_bp);
15571 	ASSERT(un != NULL);
15572 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15573 
15574 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15575 	    "sd_start_direct_priority_command: entry\n");
15576 
15577 	mutex_enter(SD_MUTEX(un));
15578 	un->un_direct_priority_timeid = NULL;
15579 	sd_start_cmds(un, priority_bp);
15580 	mutex_exit(SD_MUTEX(un));
15581 
15582 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15583 	    "sd_start_direct_priority_command: exit\n");
15584 }
15585 
15586 
15587 /*
15588  *    Function: sd_send_request_sense_command
15589  *
15590  * Description: Sends a REQUEST SENSE command to the target
15591  *
15592  *     Context: May be called from interrupt context.
15593  */
15594 
15595 static void
15596 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15597 	struct scsi_pkt *pktp)
15598 {
15599 	ASSERT(bp != NULL);
15600 	ASSERT(un != NULL);
15601 	ASSERT(mutex_owned(SD_MUTEX(un)));
15602 
15603 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15604 	    "entry: buf:0x%p\n", bp);
15605 
15606 	/*
15607 	 * If we are syncing or dumping, then fail the command to avoid a
15608 	 * recursive callback into scsi_transport(). Also fail the command
15609 	 * if we are suspended (legacy behavior).
15610 	 */
15611 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15612 	    (un->un_state == SD_STATE_DUMPING)) {
15613 		sd_return_failed_command(un, bp, EIO);
15614 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15615 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15616 		return;
15617 	}
15618 
15619 	/*
15620 	 * Retry the failed command and don't issue the request sense if:
15621 	 *    1) the sense buf is busy
15622 	 *    2) we have 1 or more outstanding commands on the target
15623 	 *    (the sense data will be cleared or invalidated any way)
15624 	 *
15625 	 * Note: There could be an issue with not checking a retry limit here,
15626 	 * the problem is determining which retry limit to check.
15627 	 */
15628 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15629 		/* Don't retry if the command is flagged as non-retryable */
15630 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15631 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15632 			    NULL, NULL, 0, un->un_busy_timeout,
15633 			    kstat_waitq_enter);
15634 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15635 			    "sd_send_request_sense_command: "
15636 			    "at full throttle, retrying exit\n");
15637 		} else {
15638 			sd_return_failed_command(un, bp, EIO);
15639 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15640 			    "sd_send_request_sense_command: "
15641 			    "at full throttle, non-retryable exit\n");
15642 		}
15643 		return;
15644 	}
15645 
15646 	sd_mark_rqs_busy(un, bp);
15647 	sd_start_cmds(un, un->un_rqs_bp);
15648 
15649 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15650 	    "sd_send_request_sense_command: exit\n");
15651 }
15652 
15653 
15654 /*
15655  *    Function: sd_mark_rqs_busy
15656  *
15657  * Description: Indicate that the request sense bp for this instance is
15658  *		in use.
15659  *
15660  *     Context: May be called under interrupt context
15661  */
15662 
15663 static void
15664 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15665 {
15666 	struct sd_xbuf	*sense_xp;
15667 
15668 	ASSERT(un != NULL);
15669 	ASSERT(bp != NULL);
15670 	ASSERT(mutex_owned(SD_MUTEX(un)));
15671 	ASSERT(un->un_sense_isbusy == 0);
15672 
15673 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15674 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15675 
15676 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15677 	ASSERT(sense_xp != NULL);
15678 
15679 	SD_INFO(SD_LOG_IO, un,
15680 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15681 
15682 	ASSERT(sense_xp->xb_pktp != NULL);
15683 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15684 	    == (FLAG_SENSING | FLAG_HEAD));
15685 
15686 	un->un_sense_isbusy = 1;
15687 	un->un_rqs_bp->b_resid = 0;
15688 	sense_xp->xb_pktp->pkt_resid  = 0;
15689 	sense_xp->xb_pktp->pkt_reason = 0;
15690 
15691 	/* So we can get back the bp at interrupt time! */
15692 	sense_xp->xb_sense_bp = bp;
15693 
15694 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15695 
15696 	/*
15697 	 * Mark this buf as awaiting sense data. (This is already set in
15698 	 * the pkt_flags for the RQS packet.)
15699 	 */
15700 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15701 
15702 	/* Request sense down same path */
15703 	if (scsi_pkt_allocated_correctly((SD_GET_XBUF(bp))->xb_pktp) &&
15704 	    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance)
15705 		sense_xp->xb_pktp->pkt_path_instance =
15706 		    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance;
15707 
15708 	sense_xp->xb_retry_count	= 0;
15709 	sense_xp->xb_victim_retry_count = 0;
15710 	sense_xp->xb_ua_retry_count	= 0;
15711 	sense_xp->xb_nr_retry_count 	= 0;
15712 	sense_xp->xb_dma_resid  = 0;
15713 
15714 	/* Clean up the fields for auto-request sense */
15715 	sense_xp->xb_sense_status = 0;
15716 	sense_xp->xb_sense_state  = 0;
15717 	sense_xp->xb_sense_resid  = 0;
15718 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15719 
15720 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15721 }
15722 
15723 
15724 /*
15725  *    Function: sd_mark_rqs_idle
15726  *
15727  * Description: SD_MUTEX must be held continuously through this routine
15728  *		to prevent reuse of the rqs struct before the caller can
15729  *		complete it's processing.
15730  *
15731  * Return Code: Pointer to the RQS buf
15732  *
15733  *     Context: May be called under interrupt context
15734  */
15735 
15736 static struct buf *
15737 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15738 {
15739 	struct buf *bp;
15740 	ASSERT(un != NULL);
15741 	ASSERT(sense_xp != NULL);
15742 	ASSERT(mutex_owned(SD_MUTEX(un)));
15743 	ASSERT(un->un_sense_isbusy != 0);
15744 
15745 	un->un_sense_isbusy = 0;
15746 	bp = sense_xp->xb_sense_bp;
15747 	sense_xp->xb_sense_bp = NULL;
15748 
15749 	/* This pkt is no longer interested in getting sense data */
15750 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15751 
15752 	return (bp);
15753 }
15754 
15755 
15756 
15757 /*
15758  *    Function: sd_alloc_rqs
15759  *
15760  * Description: Set up the unit to receive auto request sense data
15761  *
15762  * Return Code: DDI_SUCCESS or DDI_FAILURE
15763  *
15764  *     Context: Called under attach(9E) context
15765  */
15766 
15767 static int
15768 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15769 {
15770 	struct sd_xbuf *xp;
15771 
15772 	ASSERT(un != NULL);
15773 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15774 	ASSERT(un->un_rqs_bp == NULL);
15775 	ASSERT(un->un_rqs_pktp == NULL);
15776 
15777 	/*
15778 	 * First allocate the required buf and scsi_pkt structs, then set up
15779 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15780 	 */
15781 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15782 	    MAX_SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15783 	if (un->un_rqs_bp == NULL) {
15784 		return (DDI_FAILURE);
15785 	}
15786 
15787 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15788 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15789 
15790 	if (un->un_rqs_pktp == NULL) {
15791 		sd_free_rqs(un);
15792 		return (DDI_FAILURE);
15793 	}
15794 
15795 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15796 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15797 	    SCMD_REQUEST_SENSE, 0, MAX_SENSE_LENGTH, 0);
15798 
15799 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
15800 
15801 	/* Set up the other needed members in the ARQ scsi_pkt. */
15802 	un->un_rqs_pktp->pkt_comp   = sdintr;
15803 	un->un_rqs_pktp->pkt_time   = sd_io_time;
15804 	un->un_rqs_pktp->pkt_flags |=
15805 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
15806 
15807 	/*
15808 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
15809 	 * provide any intpkt, destroypkt routines as we take care of
15810 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
15811 	 */
15812 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
15813 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
15814 	xp->xb_pktp = un->un_rqs_pktp;
15815 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
15816 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
15817 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
15818 
15819 	/*
15820 	 * Save the pointer to the request sense private bp so it can
15821 	 * be retrieved in sdintr.
15822 	 */
15823 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
15824 	ASSERT(un->un_rqs_bp->b_private == xp);
15825 
15826 	/*
15827 	 * See if the HBA supports auto-request sense for the specified
15828 	 * target/lun. If it does, then try to enable it (if not already
15829 	 * enabled).
15830 	 *
15831 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
15832 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
15833 	 * return success.  However, in both of these cases ARQ is always
15834 	 * enabled and scsi_ifgetcap will always return true. The best approach
15835 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
15836 	 *
15837 	 * The 3rd case is the HBA (adp) always return enabled on
15838 	 * scsi_ifgetgetcap even when it's not enable, the best approach
15839 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
15840 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
15841 	 */
15842 
15843 	if (un->un_f_is_fibre == TRUE) {
15844 		un->un_f_arq_enabled = TRUE;
15845 	} else {
15846 #if defined(__i386) || defined(__amd64)
15847 		/*
15848 		 * Circumvent the Adaptec bug, remove this code when
15849 		 * the bug is fixed
15850 		 */
15851 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
15852 #endif
15853 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
15854 		case 0:
15855 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15856 			    "sd_alloc_rqs: HBA supports ARQ\n");
15857 			/*
15858 			 * ARQ is supported by this HBA but currently is not
15859 			 * enabled. Attempt to enable it and if successful then
15860 			 * mark this instance as ARQ enabled.
15861 			 */
15862 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
15863 			    == 1) {
15864 				/* Successfully enabled ARQ in the HBA */
15865 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15866 				    "sd_alloc_rqs: ARQ enabled\n");
15867 				un->un_f_arq_enabled = TRUE;
15868 			} else {
15869 				/* Could not enable ARQ in the HBA */
15870 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15871 				    "sd_alloc_rqs: failed ARQ enable\n");
15872 				un->un_f_arq_enabled = FALSE;
15873 			}
15874 			break;
15875 		case 1:
15876 			/*
15877 			 * ARQ is supported by this HBA and is already enabled.
15878 			 * Just mark ARQ as enabled for this instance.
15879 			 */
15880 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15881 			    "sd_alloc_rqs: ARQ already enabled\n");
15882 			un->un_f_arq_enabled = TRUE;
15883 			break;
15884 		default:
15885 			/*
15886 			 * ARQ is not supported by this HBA; disable it for this
15887 			 * instance.
15888 			 */
15889 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15890 			    "sd_alloc_rqs: HBA does not support ARQ\n");
15891 			un->un_f_arq_enabled = FALSE;
15892 			break;
15893 		}
15894 	}
15895 
15896 	return (DDI_SUCCESS);
15897 }
15898 
15899 
15900 /*
15901  *    Function: sd_free_rqs
15902  *
15903  * Description: Cleanup for the pre-instance RQS command.
15904  *
15905  *     Context: Kernel thread context
15906  */
15907 
15908 static void
15909 sd_free_rqs(struct sd_lun *un)
15910 {
15911 	ASSERT(un != NULL);
15912 
15913 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
15914 
15915 	/*
15916 	 * If consistent memory is bound to a scsi_pkt, the pkt
15917 	 * has to be destroyed *before* freeing the consistent memory.
15918 	 * Don't change the sequence of this operations.
15919 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
15920 	 * after it was freed in scsi_free_consistent_buf().
15921 	 */
15922 	if (un->un_rqs_pktp != NULL) {
15923 		scsi_destroy_pkt(un->un_rqs_pktp);
15924 		un->un_rqs_pktp = NULL;
15925 	}
15926 
15927 	if (un->un_rqs_bp != NULL) {
15928 		struct sd_xbuf *xp = SD_GET_XBUF(un->un_rqs_bp);
15929 		if (xp != NULL) {
15930 			kmem_free(xp, sizeof (struct sd_xbuf));
15931 		}
15932 		scsi_free_consistent_buf(un->un_rqs_bp);
15933 		un->un_rqs_bp = NULL;
15934 	}
15935 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
15936 }
15937 
15938 
15939 
15940 /*
15941  *    Function: sd_reduce_throttle
15942  *
15943  * Description: Reduces the maximum # of outstanding commands on a
15944  *		target to the current number of outstanding commands.
15945  *		Queues a tiemout(9F) callback to restore the limit
15946  *		after a specified interval has elapsed.
15947  *		Typically used when we get a TRAN_BUSY return code
15948  *		back from scsi_transport().
15949  *
15950  *   Arguments: un - ptr to the sd_lun softstate struct
15951  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
15952  *
15953  *     Context: May be called from interrupt context
15954  */
15955 
15956 static void
15957 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
15958 {
15959 	ASSERT(un != NULL);
15960 	ASSERT(mutex_owned(SD_MUTEX(un)));
15961 	ASSERT(un->un_ncmds_in_transport >= 0);
15962 
15963 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15964 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
15965 	    un, un->un_throttle, un->un_ncmds_in_transport);
15966 
15967 	if (un->un_throttle > 1) {
15968 		if (un->un_f_use_adaptive_throttle == TRUE) {
15969 			switch (throttle_type) {
15970 			case SD_THROTTLE_TRAN_BUSY:
15971 				if (un->un_busy_throttle == 0) {
15972 					un->un_busy_throttle = un->un_throttle;
15973 				}
15974 				break;
15975 			case SD_THROTTLE_QFULL:
15976 				un->un_busy_throttle = 0;
15977 				break;
15978 			default:
15979 				ASSERT(FALSE);
15980 			}
15981 
15982 			if (un->un_ncmds_in_transport > 0) {
15983 				un->un_throttle = un->un_ncmds_in_transport;
15984 			}
15985 
15986 		} else {
15987 			if (un->un_ncmds_in_transport == 0) {
15988 				un->un_throttle = 1;
15989 			} else {
15990 				un->un_throttle = un->un_ncmds_in_transport;
15991 			}
15992 		}
15993 	}
15994 
15995 	/* Reschedule the timeout if none is currently active */
15996 	if (un->un_reset_throttle_timeid == NULL) {
15997 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
15998 		    un, SD_THROTTLE_RESET_INTERVAL);
15999 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16000 		    "sd_reduce_throttle: timeout scheduled!\n");
16001 	}
16002 
16003 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16004 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16005 }
16006 
16007 
16008 
16009 /*
16010  *    Function: sd_restore_throttle
16011  *
16012  * Description: Callback function for timeout(9F).  Resets the current
16013  *		value of un->un_throttle to its default.
16014  *
16015  *   Arguments: arg - pointer to associated softstate for the device.
16016  *
16017  *     Context: May be called from interrupt context
16018  */
16019 
16020 static void
16021 sd_restore_throttle(void *arg)
16022 {
16023 	struct sd_lun	*un = arg;
16024 
16025 	ASSERT(un != NULL);
16026 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16027 
16028 	mutex_enter(SD_MUTEX(un));
16029 
16030 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16031 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16032 
16033 	un->un_reset_throttle_timeid = NULL;
16034 
16035 	if (un->un_f_use_adaptive_throttle == TRUE) {
16036 		/*
16037 		 * If un_busy_throttle is nonzero, then it contains the
16038 		 * value that un_throttle was when we got a TRAN_BUSY back
16039 		 * from scsi_transport(). We want to revert back to this
16040 		 * value.
16041 		 *
16042 		 * In the QFULL case, the throttle limit will incrementally
16043 		 * increase until it reaches max throttle.
16044 		 */
16045 		if (un->un_busy_throttle > 0) {
16046 			un->un_throttle = un->un_busy_throttle;
16047 			un->un_busy_throttle = 0;
16048 		} else {
16049 			/*
16050 			 * increase throttle by 10% open gate slowly, schedule
16051 			 * another restore if saved throttle has not been
16052 			 * reached
16053 			 */
16054 			short throttle;
16055 			if (sd_qfull_throttle_enable) {
16056 				throttle = un->un_throttle +
16057 				    max((un->un_throttle / 10), 1);
16058 				un->un_throttle =
16059 				    (throttle < un->un_saved_throttle) ?
16060 				    throttle : un->un_saved_throttle;
16061 				if (un->un_throttle < un->un_saved_throttle) {
16062 					un->un_reset_throttle_timeid =
16063 					    timeout(sd_restore_throttle,
16064 					    un,
16065 					    SD_QFULL_THROTTLE_RESET_INTERVAL);
16066 				}
16067 			}
16068 		}
16069 
16070 		/*
16071 		 * If un_throttle has fallen below the low-water mark, we
16072 		 * restore the maximum value here (and allow it to ratchet
16073 		 * down again if necessary).
16074 		 */
16075 		if (un->un_throttle < un->un_min_throttle) {
16076 			un->un_throttle = un->un_saved_throttle;
16077 		}
16078 	} else {
16079 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16080 		    "restoring limit from 0x%x to 0x%x\n",
16081 		    un->un_throttle, un->un_saved_throttle);
16082 		un->un_throttle = un->un_saved_throttle;
16083 	}
16084 
16085 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16086 	    "sd_restore_throttle: calling sd_start_cmds!\n");
16087 
16088 	sd_start_cmds(un, NULL);
16089 
16090 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16091 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16092 	    un, un->un_throttle);
16093 
16094 	mutex_exit(SD_MUTEX(un));
16095 
16096 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16097 }
16098 
16099 /*
16100  *    Function: sdrunout
16101  *
16102  * Description: Callback routine for scsi_init_pkt when a resource allocation
16103  *		fails.
16104  *
16105  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16106  *		soft state instance.
16107  *
16108  * Return Code: The scsi_init_pkt routine allows for the callback function to
16109  *		return a 0 indicating the callback should be rescheduled or a 1
16110  *		indicating not to reschedule. This routine always returns 1
16111  *		because the driver always provides a callback function to
16112  *		scsi_init_pkt. This results in a callback always being scheduled
16113  *		(via the scsi_init_pkt callback implementation) if a resource
16114  *		failure occurs.
16115  *
16116  *     Context: This callback function may not block or call routines that block
16117  *
16118  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16119  *		request persisting at the head of the list which cannot be
16120  *		satisfied even after multiple retries. In the future the driver
16121  *		may implement some time of maximum runout count before failing
16122  *		an I/O.
16123  */
16124 
16125 static int
16126 sdrunout(caddr_t arg)
16127 {
16128 	struct sd_lun	*un = (struct sd_lun *)arg;
16129 
16130 	ASSERT(un != NULL);
16131 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16132 
16133 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16134 
16135 	mutex_enter(SD_MUTEX(un));
16136 	sd_start_cmds(un, NULL);
16137 	mutex_exit(SD_MUTEX(un));
16138 	/*
16139 	 * This callback routine always returns 1 (i.e. do not reschedule)
16140 	 * because we always specify sdrunout as the callback handler for
16141 	 * scsi_init_pkt inside the call to sd_start_cmds.
16142 	 */
16143 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16144 	return (1);
16145 }
16146 
16147 
16148 /*
16149  *    Function: sdintr
16150  *
16151  * Description: Completion callback routine for scsi_pkt(9S) structs
16152  *		sent to the HBA driver via scsi_transport(9F).
16153  *
16154  *     Context: Interrupt context
16155  */
16156 
16157 static void
16158 sdintr(struct scsi_pkt *pktp)
16159 {
16160 	struct buf	*bp;
16161 	struct sd_xbuf	*xp;
16162 	struct sd_lun	*un;
16163 	size_t		actual_len;
16164 	sd_ssc_t	*sscp;
16165 
16166 	ASSERT(pktp != NULL);
16167 	bp = (struct buf *)pktp->pkt_private;
16168 	ASSERT(bp != NULL);
16169 	xp = SD_GET_XBUF(bp);
16170 	ASSERT(xp != NULL);
16171 	ASSERT(xp->xb_pktp != NULL);
16172 	un = SD_GET_UN(bp);
16173 	ASSERT(un != NULL);
16174 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16175 
16176 #ifdef SD_FAULT_INJECTION
16177 
16178 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16179 	/* SD FaultInjection */
16180 	sd_faultinjection(pktp);
16181 
16182 #endif /* SD_FAULT_INJECTION */
16183 
16184 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16185 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16186 
16187 	mutex_enter(SD_MUTEX(un));
16188 
16189 	ASSERT(un->un_fm_private != NULL);
16190 	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
16191 	ASSERT(sscp != NULL);
16192 
16193 	/* Reduce the count of the #commands currently in transport */
16194 	un->un_ncmds_in_transport--;
16195 	ASSERT(un->un_ncmds_in_transport >= 0);
16196 
16197 	/* Increment counter to indicate that the callback routine is active */
16198 	un->un_in_callback++;
16199 
16200 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16201 
16202 #ifdef	SDDEBUG
16203 	if (bp == un->un_retry_bp) {
16204 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16205 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16206 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16207 	}
16208 #endif
16209 
16210 	/*
16211 	 * If pkt_reason is CMD_DEV_GONE, fail the command, and update the media
16212 	 * state if needed.
16213 	 */
16214 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16215 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16216 		    "Command failed to complete...Device is gone\n");
16217 		if (un->un_mediastate != DKIO_DEV_GONE) {
16218 			un->un_mediastate = DKIO_DEV_GONE;
16219 			cv_broadcast(&un->un_state_cv);
16220 		}
16221 		sd_return_failed_command(un, bp, EIO);
16222 		goto exit;
16223 	}
16224 
16225 	if (pktp->pkt_state & STATE_XARQ_DONE) {
16226 		SD_TRACE(SD_LOG_COMMON, un,
16227 		    "sdintr: extra sense data received. pkt=%p\n", pktp);
16228 	}
16229 
16230 	/*
16231 	 * First see if the pkt has auto-request sense data with it....
16232 	 * Look at the packet state first so we don't take a performance
16233 	 * hit looking at the arq enabled flag unless absolutely necessary.
16234 	 */
16235 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16236 	    (un->un_f_arq_enabled == TRUE)) {
16237 		/*
16238 		 * The HBA did an auto request sense for this command so check
16239 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16240 		 * driver command that should not be retried.
16241 		 */
16242 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16243 			/*
16244 			 * Save the relevant sense info into the xp for the
16245 			 * original cmd.
16246 			 */
16247 			struct scsi_arq_status *asp;
16248 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16249 			xp->xb_sense_status =
16250 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16251 			xp->xb_sense_state  = asp->sts_rqpkt_state;
16252 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16253 			if (pktp->pkt_state & STATE_XARQ_DONE) {
16254 				actual_len = MAX_SENSE_LENGTH -
16255 				    xp->xb_sense_resid;
16256 				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16257 				    MAX_SENSE_LENGTH);
16258 			} else {
16259 				if (xp->xb_sense_resid > SENSE_LENGTH) {
16260 					actual_len = MAX_SENSE_LENGTH -
16261 					    xp->xb_sense_resid;
16262 				} else {
16263 					actual_len = SENSE_LENGTH -
16264 					    xp->xb_sense_resid;
16265 				}
16266 				if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16267 					if ((((struct uscsi_cmd *)
16268 					    (xp->xb_pktinfo))->uscsi_rqlen) >
16269 					    actual_len) {
16270 						xp->xb_sense_resid =
16271 						    (((struct uscsi_cmd *)
16272 						    (xp->xb_pktinfo))->
16273 						    uscsi_rqlen) - actual_len;
16274 					} else {
16275 						xp->xb_sense_resid = 0;
16276 					}
16277 				}
16278 				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16279 				    SENSE_LENGTH);
16280 			}
16281 
16282 			/* fail the command */
16283 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16284 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16285 			sd_return_failed_command(un, bp, EIO);
16286 			goto exit;
16287 		}
16288 
16289 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16290 		/*
16291 		 * We want to either retry or fail this command, so free
16292 		 * the DMA resources here.  If we retry the command then
16293 		 * the DMA resources will be reallocated in sd_start_cmds().
16294 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16295 		 * causes the *entire* transfer to start over again from the
16296 		 * beginning of the request, even for PARTIAL chunks that
16297 		 * have already transferred successfully.
16298 		 */
16299 		if ((un->un_f_is_fibre == TRUE) &&
16300 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16301 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16302 			scsi_dmafree(pktp);
16303 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16304 		}
16305 #endif
16306 
16307 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16308 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16309 
16310 		sd_handle_auto_request_sense(un, bp, xp, pktp);
16311 		goto exit;
16312 	}
16313 
16314 	/* Next see if this is the REQUEST SENSE pkt for the instance */
16315 	if (pktp->pkt_flags & FLAG_SENSING)  {
16316 		/* This pktp is from the unit's REQUEST_SENSE command */
16317 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16318 		    "sdintr: sd_handle_request_sense\n");
16319 		sd_handle_request_sense(un, bp, xp, pktp);
16320 		goto exit;
16321 	}
16322 
16323 	/*
16324 	 * Check to see if the command successfully completed as requested;
16325 	 * this is the most common case (and also the hot performance path).
16326 	 *
16327 	 * Requirements for successful completion are:
16328 	 * pkt_reason is CMD_CMPLT and packet status is status good.
16329 	 * In addition:
16330 	 * - A residual of zero indicates successful completion no matter what
16331 	 *   the command is.
16332 	 * - If the residual is not zero and the command is not a read or
16333 	 *   write, then it's still defined as successful completion. In other
16334 	 *   words, if the command is a read or write the residual must be
16335 	 *   zero for successful completion.
16336 	 * - If the residual is not zero and the command is a read or
16337 	 *   write, and it's a USCSICMD, then it's still defined as
16338 	 *   successful completion.
16339 	 */
16340 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16341 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16342 
16343 		/*
16344 		 * Since this command is returned with a good status, we
16345 		 * can reset the count for Sonoma failover.
16346 		 */
16347 		un->un_sonoma_failure_count = 0;
16348 
16349 		/*
16350 		 * Return all USCSI commands on good status
16351 		 */
16352 		if (pktp->pkt_resid == 0) {
16353 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16354 			    "sdintr: returning command for resid == 0\n");
16355 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16356 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16357 			SD_UPDATE_B_RESID(bp, pktp);
16358 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16359 			    "sdintr: returning command for resid != 0\n");
16360 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16361 			SD_UPDATE_B_RESID(bp, pktp);
16362 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16363 			    "sdintr: returning uscsi command\n");
16364 		} else {
16365 			goto not_successful;
16366 		}
16367 		sd_return_command(un, bp);
16368 
16369 		/*
16370 		 * Decrement counter to indicate that the callback routine
16371 		 * is done.
16372 		 */
16373 		un->un_in_callback--;
16374 		ASSERT(un->un_in_callback >= 0);
16375 		mutex_exit(SD_MUTEX(un));
16376 
16377 		return;
16378 	}
16379 
16380 not_successful:
16381 
16382 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16383 	/*
16384 	 * The following is based upon knowledge of the underlying transport
16385 	 * and its use of DMA resources.  This code should be removed when
16386 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16387 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16388 	 * and sd_start_cmds().
16389 	 *
16390 	 * Free any DMA resources associated with this command if there
16391 	 * is a chance it could be retried or enqueued for later retry.
16392 	 * If we keep the DMA binding then mpxio cannot reissue the
16393 	 * command on another path whenever a path failure occurs.
16394 	 *
16395 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16396 	 * causes the *entire* transfer to start over again from the
16397 	 * beginning of the request, even for PARTIAL chunks that
16398 	 * have already transferred successfully.
16399 	 *
16400 	 * This is only done for non-uscsi commands (and also skipped for the
16401 	 * driver's internal RQS command). Also just do this for Fibre Channel
16402 	 * devices as these are the only ones that support mpxio.
16403 	 */
16404 	if ((un->un_f_is_fibre == TRUE) &&
16405 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16406 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16407 		scsi_dmafree(pktp);
16408 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16409 	}
16410 #endif
16411 
16412 	/*
16413 	 * The command did not successfully complete as requested so check
16414 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16415 	 * driver command that should not be retried so just return. If
16416 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16417 	 */
16418 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16419 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16420 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16421 		/*
16422 		 * Issue a request sense if a check condition caused the error
16423 		 * (we handle the auto request sense case above), otherwise
16424 		 * just fail the command.
16425 		 */
16426 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16427 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16428 			sd_send_request_sense_command(un, bp, pktp);
16429 		} else {
16430 			sd_return_failed_command(un, bp, EIO);
16431 		}
16432 		goto exit;
16433 	}
16434 
16435 	/*
16436 	 * The command did not successfully complete as requested so process
16437 	 * the error, retry, and/or attempt recovery.
16438 	 */
16439 	switch (pktp->pkt_reason) {
16440 	case CMD_CMPLT:
16441 		switch (SD_GET_PKT_STATUS(pktp)) {
16442 		case STATUS_GOOD:
16443 			/*
16444 			 * The command completed successfully with a non-zero
16445 			 * residual
16446 			 */
16447 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16448 			    "sdintr: STATUS_GOOD \n");
16449 			sd_pkt_status_good(un, bp, xp, pktp);
16450 			break;
16451 
16452 		case STATUS_CHECK:
16453 		case STATUS_TERMINATED:
16454 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16455 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16456 			sd_pkt_status_check_condition(un, bp, xp, pktp);
16457 			break;
16458 
16459 		case STATUS_BUSY:
16460 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16461 			    "sdintr: STATUS_BUSY\n");
16462 			sd_pkt_status_busy(un, bp, xp, pktp);
16463 			break;
16464 
16465 		case STATUS_RESERVATION_CONFLICT:
16466 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16467 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16468 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16469 			break;
16470 
16471 		case STATUS_QFULL:
16472 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16473 			    "sdintr: STATUS_QFULL\n");
16474 			sd_pkt_status_qfull(un, bp, xp, pktp);
16475 			break;
16476 
16477 		case STATUS_MET:
16478 		case STATUS_INTERMEDIATE:
16479 		case STATUS_SCSI2:
16480 		case STATUS_INTERMEDIATE_MET:
16481 		case STATUS_ACA_ACTIVE:
16482 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16483 			    "Unexpected SCSI status received: 0x%x\n",
16484 			    SD_GET_PKT_STATUS(pktp));
16485 			/*
16486 			 * Mark the ssc_flags when detected invalid status
16487 			 * code for non-USCSI command.
16488 			 */
16489 			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16490 				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
16491 				    0, "stat-code");
16492 			}
16493 			sd_return_failed_command(un, bp, EIO);
16494 			break;
16495 
16496 		default:
16497 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16498 			    "Invalid SCSI status received: 0x%x\n",
16499 			    SD_GET_PKT_STATUS(pktp));
16500 			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16501 				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
16502 				    0, "stat-code");
16503 			}
16504 			sd_return_failed_command(un, bp, EIO);
16505 			break;
16506 
16507 		}
16508 		break;
16509 
16510 	case CMD_INCOMPLETE:
16511 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16512 		    "sdintr:  CMD_INCOMPLETE\n");
16513 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16514 		break;
16515 	case CMD_TRAN_ERR:
16516 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16517 		    "sdintr: CMD_TRAN_ERR\n");
16518 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16519 		break;
16520 	case CMD_RESET:
16521 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16522 		    "sdintr: CMD_RESET \n");
16523 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16524 		break;
16525 	case CMD_ABORTED:
16526 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16527 		    "sdintr: CMD_ABORTED \n");
16528 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16529 		break;
16530 	case CMD_TIMEOUT:
16531 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16532 		    "sdintr: CMD_TIMEOUT\n");
16533 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16534 		break;
16535 	case CMD_UNX_BUS_FREE:
16536 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16537 		    "sdintr: CMD_UNX_BUS_FREE \n");
16538 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16539 		break;
16540 	case CMD_TAG_REJECT:
16541 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16542 		    "sdintr: CMD_TAG_REJECT\n");
16543 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16544 		break;
16545 	default:
16546 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16547 		    "sdintr: default\n");
16548 		/*
16549 		 * Mark the ssc_flags for detecting invliad pkt_reason.
16550 		 */
16551 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16552 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_PKT_REASON,
16553 			    0, "pkt-reason");
16554 		}
16555 		sd_pkt_reason_default(un, bp, xp, pktp);
16556 		break;
16557 	}
16558 
16559 exit:
16560 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16561 
16562 	/* Decrement counter to indicate that the callback routine is done. */
16563 	un->un_in_callback--;
16564 	ASSERT(un->un_in_callback >= 0);
16565 
16566 	/*
16567 	 * At this point, the pkt has been dispatched, ie, it is either
16568 	 * being re-tried or has been returned to its caller and should
16569 	 * not be referenced.
16570 	 */
16571 
16572 	mutex_exit(SD_MUTEX(un));
16573 }
16574 
16575 
16576 /*
16577  *    Function: sd_print_incomplete_msg
16578  *
16579  * Description: Prints the error message for a CMD_INCOMPLETE error.
16580  *
16581  *   Arguments: un - ptr to associated softstate for the device.
16582  *		bp - ptr to the buf(9S) for the command.
16583  *		arg - message string ptr
16584  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16585  *			or SD_NO_RETRY_ISSUED.
16586  *
16587  *     Context: May be called under interrupt context
16588  */
16589 
16590 static void
16591 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16592 {
16593 	struct scsi_pkt	*pktp;
16594 	char	*msgp;
16595 	char	*cmdp = arg;
16596 
16597 	ASSERT(un != NULL);
16598 	ASSERT(mutex_owned(SD_MUTEX(un)));
16599 	ASSERT(bp != NULL);
16600 	ASSERT(arg != NULL);
16601 	pktp = SD_GET_PKTP(bp);
16602 	ASSERT(pktp != NULL);
16603 
16604 	switch (code) {
16605 	case SD_DELAYED_RETRY_ISSUED:
16606 	case SD_IMMEDIATE_RETRY_ISSUED:
16607 		msgp = "retrying";
16608 		break;
16609 	case SD_NO_RETRY_ISSUED:
16610 	default:
16611 		msgp = "giving up";
16612 		break;
16613 	}
16614 
16615 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16616 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16617 		    "incomplete %s- %s\n", cmdp, msgp);
16618 	}
16619 }
16620 
16621 
16622 
16623 /*
16624  *    Function: sd_pkt_status_good
16625  *
16626  * Description: Processing for a STATUS_GOOD code in pkt_status.
16627  *
16628  *     Context: May be called under interrupt context
16629  */
16630 
16631 static void
16632 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16633 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16634 {
16635 	char	*cmdp;
16636 
16637 	ASSERT(un != NULL);
16638 	ASSERT(mutex_owned(SD_MUTEX(un)));
16639 	ASSERT(bp != NULL);
16640 	ASSERT(xp != NULL);
16641 	ASSERT(pktp != NULL);
16642 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16643 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16644 	ASSERT(pktp->pkt_resid != 0);
16645 
16646 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16647 
16648 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16649 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16650 	case SCMD_READ:
16651 		cmdp = "read";
16652 		break;
16653 	case SCMD_WRITE:
16654 		cmdp = "write";
16655 		break;
16656 	default:
16657 		SD_UPDATE_B_RESID(bp, pktp);
16658 		sd_return_command(un, bp);
16659 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16660 		return;
16661 	}
16662 
16663 	/*
16664 	 * See if we can retry the read/write, preferrably immediately.
16665 	 * If retries are exhaused, then sd_retry_command() will update
16666 	 * the b_resid count.
16667 	 */
16668 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16669 	    cmdp, EIO, (clock_t)0, NULL);
16670 
16671 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16672 }
16673 
16674 
16675 
16676 
16677 
16678 /*
16679  *    Function: sd_handle_request_sense
16680  *
16681  * Description: Processing for non-auto Request Sense command.
16682  *
16683  *   Arguments: un - ptr to associated softstate
16684  *		sense_bp - ptr to buf(9S) for the RQS command
16685  *		sense_xp - ptr to the sd_xbuf for the RQS command
16686  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16687  *
16688  *     Context: May be called under interrupt context
16689  */
16690 
16691 static void
16692 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16693 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16694 {
16695 	struct buf	*cmd_bp;	/* buf for the original command */
16696 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16697 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16698 	size_t		actual_len;	/* actual sense data length */
16699 
16700 	ASSERT(un != NULL);
16701 	ASSERT(mutex_owned(SD_MUTEX(un)));
16702 	ASSERT(sense_bp != NULL);
16703 	ASSERT(sense_xp != NULL);
16704 	ASSERT(sense_pktp != NULL);
16705 
16706 	/*
16707 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16708 	 * RQS command and not the original command.
16709 	 */
16710 	ASSERT(sense_pktp == un->un_rqs_pktp);
16711 	ASSERT(sense_bp   == un->un_rqs_bp);
16712 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16713 	    (FLAG_SENSING | FLAG_HEAD));
16714 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16715 	    FLAG_SENSING) == FLAG_SENSING);
16716 
16717 	/* These are the bp, xp, and pktp for the original command */
16718 	cmd_bp = sense_xp->xb_sense_bp;
16719 	cmd_xp = SD_GET_XBUF(cmd_bp);
16720 	cmd_pktp = SD_GET_PKTP(cmd_bp);
16721 
16722 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16723 		/*
16724 		 * The REQUEST SENSE command failed.  Release the REQUEST
16725 		 * SENSE command for re-use, get back the bp for the original
16726 		 * command, and attempt to re-try the original command if
16727 		 * FLAG_DIAGNOSE is not set in the original packet.
16728 		 */
16729 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16730 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16731 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16732 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16733 			    NULL, NULL, EIO, (clock_t)0, NULL);
16734 			return;
16735 		}
16736 	}
16737 
16738 	/*
16739 	 * Save the relevant sense info into the xp for the original cmd.
16740 	 *
16741 	 * Note: if the request sense failed the state info will be zero
16742 	 * as set in sd_mark_rqs_busy()
16743 	 */
16744 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16745 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16746 	actual_len = MAX_SENSE_LENGTH - sense_pktp->pkt_resid;
16747 	if ((cmd_xp->xb_pkt_flags & SD_XB_USCSICMD) &&
16748 	    (((struct uscsi_cmd *)cmd_xp->xb_pktinfo)->uscsi_rqlen >
16749 	    SENSE_LENGTH)) {
16750 		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
16751 		    MAX_SENSE_LENGTH);
16752 		cmd_xp->xb_sense_resid = sense_pktp->pkt_resid;
16753 	} else {
16754 		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
16755 		    SENSE_LENGTH);
16756 		if (actual_len < SENSE_LENGTH) {
16757 			cmd_xp->xb_sense_resid = SENSE_LENGTH - actual_len;
16758 		} else {
16759 			cmd_xp->xb_sense_resid = 0;
16760 		}
16761 	}
16762 
16763 	/*
16764 	 *  Free up the RQS command....
16765 	 *  NOTE:
16766 	 *	Must do this BEFORE calling sd_validate_sense_data!
16767 	 *	sd_validate_sense_data may return the original command in
16768 	 *	which case the pkt will be freed and the flags can no
16769 	 *	longer be touched.
16770 	 *	SD_MUTEX is held through this process until the command
16771 	 *	is dispatched based upon the sense data, so there are
16772 	 *	no race conditions.
16773 	 */
16774 	(void) sd_mark_rqs_idle(un, sense_xp);
16775 
16776 	/*
16777 	 * For a retryable command see if we have valid sense data, if so then
16778 	 * turn it over to sd_decode_sense() to figure out the right course of
16779 	 * action. Just fail a non-retryable command.
16780 	 */
16781 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16782 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp, actual_len) ==
16783 		    SD_SENSE_DATA_IS_VALID) {
16784 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16785 		}
16786 	} else {
16787 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16788 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16789 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16790 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16791 		sd_return_failed_command(un, cmd_bp, EIO);
16792 	}
16793 }
16794 
16795 
16796 
16797 
16798 /*
16799  *    Function: sd_handle_auto_request_sense
16800  *
16801  * Description: Processing for auto-request sense information.
16802  *
16803  *   Arguments: un - ptr to associated softstate
16804  *		bp - ptr to buf(9S) for the command
16805  *		xp - ptr to the sd_xbuf for the command
16806  *		pktp - ptr to the scsi_pkt(9S) for the command
16807  *
16808  *     Context: May be called under interrupt context
16809  */
16810 
16811 static void
16812 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16813 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16814 {
16815 	struct scsi_arq_status *asp;
16816 	size_t actual_len;
16817 
16818 	ASSERT(un != NULL);
16819 	ASSERT(mutex_owned(SD_MUTEX(un)));
16820 	ASSERT(bp != NULL);
16821 	ASSERT(xp != NULL);
16822 	ASSERT(pktp != NULL);
16823 	ASSERT(pktp != un->un_rqs_pktp);
16824 	ASSERT(bp   != un->un_rqs_bp);
16825 
16826 	/*
16827 	 * For auto-request sense, we get a scsi_arq_status back from
16828 	 * the HBA, with the sense data in the sts_sensedata member.
16829 	 * The pkt_scbp of the packet points to this scsi_arq_status.
16830 	 */
16831 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16832 
16833 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16834 		/*
16835 		 * The auto REQUEST SENSE failed; see if we can re-try
16836 		 * the original command.
16837 		 */
16838 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16839 		    "auto request sense failed (reason=%s)\n",
16840 		    scsi_rname(asp->sts_rqpkt_reason));
16841 
16842 		sd_reset_target(un, pktp);
16843 
16844 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16845 		    NULL, NULL, EIO, (clock_t)0, NULL);
16846 		return;
16847 	}
16848 
16849 	/* Save the relevant sense info into the xp for the original cmd. */
16850 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16851 	xp->xb_sense_state  = asp->sts_rqpkt_state;
16852 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16853 	if (xp->xb_sense_state & STATE_XARQ_DONE) {
16854 		actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
16855 		bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16856 		    MAX_SENSE_LENGTH);
16857 	} else {
16858 		if (xp->xb_sense_resid > SENSE_LENGTH) {
16859 			actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
16860 		} else {
16861 			actual_len = SENSE_LENGTH - xp->xb_sense_resid;
16862 		}
16863 		if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16864 			if ((((struct uscsi_cmd *)
16865 			    (xp->xb_pktinfo))->uscsi_rqlen) > actual_len) {
16866 				xp->xb_sense_resid = (((struct uscsi_cmd *)
16867 				    (xp->xb_pktinfo))->uscsi_rqlen) -
16868 				    actual_len;
16869 			} else {
16870 				xp->xb_sense_resid = 0;
16871 			}
16872 		}
16873 		bcopy(&asp->sts_sensedata, xp->xb_sense_data, SENSE_LENGTH);
16874 	}
16875 
16876 	/*
16877 	 * See if we have valid sense data, if so then turn it over to
16878 	 * sd_decode_sense() to figure out the right course of action.
16879 	 */
16880 	if (sd_validate_sense_data(un, bp, xp, actual_len) ==
16881 	    SD_SENSE_DATA_IS_VALID) {
16882 		sd_decode_sense(un, bp, xp, pktp);
16883 	}
16884 }
16885 
16886 
16887 /*
16888  *    Function: sd_print_sense_failed_msg
16889  *
16890  * Description: Print log message when RQS has failed.
16891  *
16892  *   Arguments: un - ptr to associated softstate
16893  *		bp - ptr to buf(9S) for the command
16894  *		arg - generic message string ptr
16895  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16896  *			or SD_NO_RETRY_ISSUED
16897  *
16898  *     Context: May be called from interrupt context
16899  */
16900 
16901 static void
16902 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
16903 	int code)
16904 {
16905 	char	*msgp = arg;
16906 
16907 	ASSERT(un != NULL);
16908 	ASSERT(mutex_owned(SD_MUTEX(un)));
16909 	ASSERT(bp != NULL);
16910 
16911 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
16912 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
16913 	}
16914 }
16915 
16916 
16917 /*
16918  *    Function: sd_validate_sense_data
16919  *
16920  * Description: Check the given sense data for validity.
16921  *		If the sense data is not valid, the command will
16922  *		be either failed or retried!
16923  *
16924  * Return Code: SD_SENSE_DATA_IS_INVALID
16925  *		SD_SENSE_DATA_IS_VALID
16926  *
16927  *     Context: May be called from interrupt context
16928  */
16929 
16930 static int
16931 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16932 	size_t actual_len)
16933 {
16934 	struct scsi_extended_sense *esp;
16935 	struct	scsi_pkt *pktp;
16936 	char	*msgp = NULL;
16937 	sd_ssc_t *sscp;
16938 
16939 	ASSERT(un != NULL);
16940 	ASSERT(mutex_owned(SD_MUTEX(un)));
16941 	ASSERT(bp != NULL);
16942 	ASSERT(bp != un->un_rqs_bp);
16943 	ASSERT(xp != NULL);
16944 	ASSERT(un->un_fm_private != NULL);
16945 
16946 	pktp = SD_GET_PKTP(bp);
16947 	ASSERT(pktp != NULL);
16948 
16949 	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
16950 	ASSERT(sscp != NULL);
16951 
16952 	/*
16953 	 * Check the status of the RQS command (auto or manual).
16954 	 */
16955 	switch (xp->xb_sense_status & STATUS_MASK) {
16956 	case STATUS_GOOD:
16957 		break;
16958 
16959 	case STATUS_RESERVATION_CONFLICT:
16960 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16961 		return (SD_SENSE_DATA_IS_INVALID);
16962 
16963 	case STATUS_BUSY:
16964 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16965 		    "Busy Status on REQUEST SENSE\n");
16966 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
16967 		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
16968 		return (SD_SENSE_DATA_IS_INVALID);
16969 
16970 	case STATUS_QFULL:
16971 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16972 		    "QFULL Status on REQUEST SENSE\n");
16973 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
16974 		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
16975 		return (SD_SENSE_DATA_IS_INVALID);
16976 
16977 	case STATUS_CHECK:
16978 	case STATUS_TERMINATED:
16979 		msgp = "Check Condition on REQUEST SENSE\n";
16980 		goto sense_failed;
16981 
16982 	default:
16983 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
16984 		goto sense_failed;
16985 	}
16986 
16987 	/*
16988 	 * See if we got the minimum required amount of sense data.
16989 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
16990 	 * or less.
16991 	 */
16992 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
16993 	    (actual_len == 0)) {
16994 		msgp = "Request Sense couldn't get sense data\n";
16995 		goto sense_failed;
16996 	}
16997 
16998 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
16999 		msgp = "Not enough sense information\n";
17000 		/* Mark the ssc_flags for detecting invalid sense data */
17001 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17002 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17003 			    "sense-data");
17004 		}
17005 		goto sense_failed;
17006 	}
17007 
17008 	/*
17009 	 * We require the extended sense data
17010 	 */
17011 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
17012 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
17013 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17014 			static char tmp[8];
17015 			static char buf[148];
17016 			char *p = (char *)(xp->xb_sense_data);
17017 			int i;
17018 
17019 			mutex_enter(&sd_sense_mutex);
17020 			(void) strcpy(buf, "undecodable sense information:");
17021 			for (i = 0; i < actual_len; i++) {
17022 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
17023 				(void) strcpy(&buf[strlen(buf)], tmp);
17024 			}
17025 			i = strlen(buf);
17026 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
17027 
17028 			if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
17029 				scsi_log(SD_DEVINFO(un), sd_label,
17030 				    CE_WARN, buf);
17031 			}
17032 			mutex_exit(&sd_sense_mutex);
17033 		}
17034 
17035 		/* Mark the ssc_flags for detecting invalid sense data */
17036 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17037 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17038 			    "sense-data");
17039 		}
17040 
17041 		/* Note: Legacy behavior, fail the command with no retry */
17042 		sd_return_failed_command(un, bp, EIO);
17043 		return (SD_SENSE_DATA_IS_INVALID);
17044 	}
17045 
17046 	/*
17047 	 * Check that es_code is valid (es_class concatenated with es_code
17048 	 * make up the "response code" field.  es_class will always be 7, so
17049 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
17050 	 * format.
17051 	 */
17052 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
17053 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
17054 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
17055 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
17056 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
17057 		/* Mark the ssc_flags for detecting invalid sense data */
17058 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17059 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17060 			    "sense-data");
17061 		}
17062 		goto sense_failed;
17063 	}
17064 
17065 	return (SD_SENSE_DATA_IS_VALID);
17066 
17067 sense_failed:
17068 	/*
17069 	 * If the request sense failed (for whatever reason), attempt
17070 	 * to retry the original command.
17071 	 */
17072 #if defined(__i386) || defined(__amd64)
17073 	/*
17074 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
17075 	 * sddef.h for Sparc platform, and x86 uses 1 binary
17076 	 * for both SCSI/FC.
17077 	 * The SD_RETRY_DELAY value need to be adjusted here
17078 	 * when SD_RETRY_DELAY change in sddef.h
17079 	 */
17080 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17081 	    sd_print_sense_failed_msg, msgp, EIO,
17082 	    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
17083 #else
17084 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17085 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
17086 #endif
17087 
17088 	return (SD_SENSE_DATA_IS_INVALID);
17089 }
17090 
17091 /*
17092  *    Function: sd_decode_sense
17093  *
17094  * Description: Take recovery action(s) when SCSI Sense Data is received.
17095  *
17096  *     Context: Interrupt context.
17097  */
17098 
17099 static void
17100 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17101 	struct scsi_pkt *pktp)
17102 {
17103 	uint8_t sense_key;
17104 
17105 	ASSERT(un != NULL);
17106 	ASSERT(mutex_owned(SD_MUTEX(un)));
17107 	ASSERT(bp != NULL);
17108 	ASSERT(bp != un->un_rqs_bp);
17109 	ASSERT(xp != NULL);
17110 	ASSERT(pktp != NULL);
17111 
17112 	sense_key = scsi_sense_key(xp->xb_sense_data);
17113 
17114 	switch (sense_key) {
17115 	case KEY_NO_SENSE:
17116 		sd_sense_key_no_sense(un, bp, xp, pktp);
17117 		break;
17118 	case KEY_RECOVERABLE_ERROR:
17119 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
17120 		    bp, xp, pktp);
17121 		break;
17122 	case KEY_NOT_READY:
17123 		sd_sense_key_not_ready(un, xp->xb_sense_data,
17124 		    bp, xp, pktp);
17125 		break;
17126 	case KEY_MEDIUM_ERROR:
17127 	case KEY_HARDWARE_ERROR:
17128 		sd_sense_key_medium_or_hardware_error(un,
17129 		    xp->xb_sense_data, bp, xp, pktp);
17130 		break;
17131 	case KEY_ILLEGAL_REQUEST:
17132 		sd_sense_key_illegal_request(un, bp, xp, pktp);
17133 		break;
17134 	case KEY_UNIT_ATTENTION:
17135 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17136 		    bp, xp, pktp);
17137 		break;
17138 	case KEY_WRITE_PROTECT:
17139 	case KEY_VOLUME_OVERFLOW:
17140 	case KEY_MISCOMPARE:
17141 		sd_sense_key_fail_command(un, bp, xp, pktp);
17142 		break;
17143 	case KEY_BLANK_CHECK:
17144 		sd_sense_key_blank_check(un, bp, xp, pktp);
17145 		break;
17146 	case KEY_ABORTED_COMMAND:
17147 		sd_sense_key_aborted_command(un, bp, xp, pktp);
17148 		break;
17149 	case KEY_VENDOR_UNIQUE:
17150 	case KEY_COPY_ABORTED:
17151 	case KEY_EQUAL:
17152 	case KEY_RESERVED:
17153 	default:
17154 		sd_sense_key_default(un, xp->xb_sense_data,
17155 		    bp, xp, pktp);
17156 		break;
17157 	}
17158 }
17159 
17160 
17161 /*
17162  *    Function: sd_dump_memory
17163  *
17164  * Description: Debug logging routine to print the contents of a user provided
17165  *		buffer. The output of the buffer is broken up into 256 byte
17166  *		segments due to a size constraint of the scsi_log.
17167  *		implementation.
17168  *
17169  *   Arguments: un - ptr to softstate
17170  *		comp - component mask
17171  *		title - "title" string to preceed data when printed
17172  *		data - ptr to data block to be printed
17173  *		len - size of data block to be printed
17174  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17175  *
17176  *     Context: May be called from interrupt context
17177  */
17178 
17179 #define	SD_DUMP_MEMORY_BUF_SIZE	256
17180 
17181 static char *sd_dump_format_string[] = {
17182 		" 0x%02x",
17183 		" %c"
17184 };
17185 
17186 static void
17187 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17188     int len, int fmt)
17189 {
17190 	int	i, j;
17191 	int	avail_count;
17192 	int	start_offset;
17193 	int	end_offset;
17194 	size_t	entry_len;
17195 	char	*bufp;
17196 	char	*local_buf;
17197 	char	*format_string;
17198 
17199 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17200 
17201 	/*
17202 	 * In the debug version of the driver, this function is called from a
17203 	 * number of places which are NOPs in the release driver.
17204 	 * The debug driver therefore has additional methods of filtering
17205 	 * debug output.
17206 	 */
17207 #ifdef SDDEBUG
17208 	/*
17209 	 * In the debug version of the driver we can reduce the amount of debug
17210 	 * messages by setting sd_error_level to something other than
17211 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17212 	 * sd_component_mask.
17213 	 */
17214 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17215 	    (sd_error_level != SCSI_ERR_ALL)) {
17216 		return;
17217 	}
17218 	if (((sd_component_mask & comp) == 0) ||
17219 	    (sd_error_level != SCSI_ERR_ALL)) {
17220 		return;
17221 	}
17222 #else
17223 	if (sd_error_level != SCSI_ERR_ALL) {
17224 		return;
17225 	}
17226 #endif
17227 
17228 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17229 	bufp = local_buf;
17230 	/*
17231 	 * Available length is the length of local_buf[], minus the
17232 	 * length of the title string, minus one for the ":", minus
17233 	 * one for the newline, minus one for the NULL terminator.
17234 	 * This gives the #bytes available for holding the printed
17235 	 * values from the given data buffer.
17236 	 */
17237 	if (fmt == SD_LOG_HEX) {
17238 		format_string = sd_dump_format_string[0];
17239 	} else /* SD_LOG_CHAR */ {
17240 		format_string = sd_dump_format_string[1];
17241 	}
17242 	/*
17243 	 * Available count is the number of elements from the given
17244 	 * data buffer that we can fit into the available length.
17245 	 * This is based upon the size of the format string used.
17246 	 * Make one entry and find it's size.
17247 	 */
17248 	(void) sprintf(bufp, format_string, data[0]);
17249 	entry_len = strlen(bufp);
17250 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17251 
17252 	j = 0;
17253 	while (j < len) {
17254 		bufp = local_buf;
17255 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17256 		start_offset = j;
17257 
17258 		end_offset = start_offset + avail_count;
17259 
17260 		(void) sprintf(bufp, "%s:", title);
17261 		bufp += strlen(bufp);
17262 		for (i = start_offset; ((i < end_offset) && (j < len));
17263 		    i++, j++) {
17264 			(void) sprintf(bufp, format_string, data[i]);
17265 			bufp += entry_len;
17266 		}
17267 		(void) sprintf(bufp, "\n");
17268 
17269 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17270 	}
17271 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17272 }
17273 
17274 /*
17275  *    Function: sd_print_sense_msg
17276  *
17277  * Description: Log a message based upon the given sense data.
17278  *
17279  *   Arguments: un - ptr to associated softstate
17280  *		bp - ptr to buf(9S) for the command
17281  *		arg - ptr to associate sd_sense_info struct
17282  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17283  *			or SD_NO_RETRY_ISSUED
17284  *
17285  *     Context: May be called from interrupt context
17286  */
17287 
17288 static void
17289 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17290 {
17291 	struct sd_xbuf	*xp;
17292 	struct scsi_pkt	*pktp;
17293 	uint8_t *sensep;
17294 	daddr_t request_blkno;
17295 	diskaddr_t err_blkno;
17296 	int severity;
17297 	int pfa_flag;
17298 	extern struct scsi_key_strings scsi_cmds[];
17299 
17300 	ASSERT(un != NULL);
17301 	ASSERT(mutex_owned(SD_MUTEX(un)));
17302 	ASSERT(bp != NULL);
17303 	xp = SD_GET_XBUF(bp);
17304 	ASSERT(xp != NULL);
17305 	pktp = SD_GET_PKTP(bp);
17306 	ASSERT(pktp != NULL);
17307 	ASSERT(arg != NULL);
17308 
17309 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17310 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17311 
17312 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17313 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17314 		severity = SCSI_ERR_RETRYABLE;
17315 	}
17316 
17317 	/* Use absolute block number for the request block number */
17318 	request_blkno = xp->xb_blkno;
17319 
17320 	/*
17321 	 * Now try to get the error block number from the sense data
17322 	 */
17323 	sensep = xp->xb_sense_data;
17324 
17325 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
17326 	    (uint64_t *)&err_blkno)) {
17327 		/*
17328 		 * We retrieved the error block number from the information
17329 		 * portion of the sense data.
17330 		 *
17331 		 * For USCSI commands we are better off using the error
17332 		 * block no. as the requested block no. (This is the best
17333 		 * we can estimate.)
17334 		 */
17335 		if ((SD_IS_BUFIO(xp) == FALSE) &&
17336 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17337 			request_blkno = err_blkno;
17338 		}
17339 	} else {
17340 		/*
17341 		 * Without the es_valid bit set (for fixed format) or an
17342 		 * information descriptor (for descriptor format) we cannot
17343 		 * be certain of the error blkno, so just use the
17344 		 * request_blkno.
17345 		 */
17346 		err_blkno = (diskaddr_t)request_blkno;
17347 	}
17348 
17349 	/*
17350 	 * The following will log the buffer contents for the release driver
17351 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17352 	 * level is set to verbose.
17353 	 */
17354 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17355 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17356 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17357 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17358 
17359 	if (pfa_flag == FALSE) {
17360 		/* This is normally only set for USCSI */
17361 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17362 			return;
17363 		}
17364 
17365 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17366 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17367 		    (severity < sd_error_level))) {
17368 			return;
17369 		}
17370 	}
17371 	/*
17372 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
17373 	 */
17374 	if ((SD_IS_LSI(un)) &&
17375 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
17376 	    (scsi_sense_asc(sensep) == 0x94) &&
17377 	    (scsi_sense_ascq(sensep) == 0x01)) {
17378 		un->un_sonoma_failure_count++;
17379 		if (un->un_sonoma_failure_count > 1) {
17380 			return;
17381 		}
17382 	}
17383 
17384 	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP ||
17385 	    ((scsi_sense_key(sensep) == KEY_RECOVERABLE_ERROR) &&
17386 	    (pktp->pkt_resid == 0))) {
17387 		scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17388 		    request_blkno, err_blkno, scsi_cmds,
17389 		    (struct scsi_extended_sense *)sensep,
17390 		    un->un_additional_codes, NULL);
17391 	}
17392 }
17393 
17394 /*
17395  *    Function: sd_sense_key_no_sense
17396  *
17397  * Description: Recovery action when sense data was not received.
17398  *
17399  *     Context: May be called from interrupt context
17400  */
17401 
17402 static void
17403 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17404 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17405 {
17406 	struct sd_sense_info	si;
17407 
17408 	ASSERT(un != NULL);
17409 	ASSERT(mutex_owned(SD_MUTEX(un)));
17410 	ASSERT(bp != NULL);
17411 	ASSERT(xp != NULL);
17412 	ASSERT(pktp != NULL);
17413 
17414 	si.ssi_severity = SCSI_ERR_FATAL;
17415 	si.ssi_pfa_flag = FALSE;
17416 
17417 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17418 
17419 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17420 	    &si, EIO, (clock_t)0, NULL);
17421 }
17422 
17423 
17424 /*
17425  *    Function: sd_sense_key_recoverable_error
17426  *
17427  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17428  *
17429  *     Context: May be called from interrupt context
17430  */
17431 
17432 static void
17433 sd_sense_key_recoverable_error(struct sd_lun *un,
17434 	uint8_t *sense_datap,
17435 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17436 {
17437 	struct sd_sense_info	si;
17438 	uint8_t asc = scsi_sense_asc(sense_datap);
17439 
17440 	ASSERT(un != NULL);
17441 	ASSERT(mutex_owned(SD_MUTEX(un)));
17442 	ASSERT(bp != NULL);
17443 	ASSERT(xp != NULL);
17444 	ASSERT(pktp != NULL);
17445 
17446 	/*
17447 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17448 	 */
17449 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17450 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17451 		si.ssi_severity = SCSI_ERR_INFO;
17452 		si.ssi_pfa_flag = TRUE;
17453 	} else {
17454 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17455 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17456 		si.ssi_severity = SCSI_ERR_RECOVERED;
17457 		si.ssi_pfa_flag = FALSE;
17458 	}
17459 
17460 	if (pktp->pkt_resid == 0) {
17461 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17462 		sd_return_command(un, bp);
17463 		return;
17464 	}
17465 
17466 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17467 	    &si, EIO, (clock_t)0, NULL);
17468 }
17469 
17470 
17471 
17472 
17473 /*
17474  *    Function: sd_sense_key_not_ready
17475  *
17476  * Description: Recovery actions for a SCSI "Not Ready" sense key.
17477  *
17478  *     Context: May be called from interrupt context
17479  */
17480 
17481 static void
17482 sd_sense_key_not_ready(struct sd_lun *un,
17483 	uint8_t *sense_datap,
17484 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17485 {
17486 	struct sd_sense_info	si;
17487 	uint8_t asc = scsi_sense_asc(sense_datap);
17488 	uint8_t ascq = scsi_sense_ascq(sense_datap);
17489 
17490 	ASSERT(un != NULL);
17491 	ASSERT(mutex_owned(SD_MUTEX(un)));
17492 	ASSERT(bp != NULL);
17493 	ASSERT(xp != NULL);
17494 	ASSERT(pktp != NULL);
17495 
17496 	si.ssi_severity = SCSI_ERR_FATAL;
17497 	si.ssi_pfa_flag = FALSE;
17498 
17499 	/*
17500 	 * Update error stats after first NOT READY error. Disks may have
17501 	 * been powered down and may need to be restarted.  For CDROMs,
17502 	 * report NOT READY errors only if media is present.
17503 	 */
17504 	if ((ISCD(un) && (asc == 0x3A)) ||
17505 	    (xp->xb_nr_retry_count > 0)) {
17506 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17507 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17508 	}
17509 
17510 	/*
17511 	 * Just fail if the "not ready" retry limit has been reached.
17512 	 */
17513 	if (xp->xb_nr_retry_count >= un->un_notready_retry_count) {
17514 		/* Special check for error message printing for removables. */
17515 		if (un->un_f_has_removable_media && (asc == 0x04) &&
17516 		    (ascq >= 0x04)) {
17517 			si.ssi_severity = SCSI_ERR_ALL;
17518 		}
17519 		goto fail_command;
17520 	}
17521 
17522 	/*
17523 	 * Check the ASC and ASCQ in the sense data as needed, to determine
17524 	 * what to do.
17525 	 */
17526 	switch (asc) {
17527 	case 0x04:	/* LOGICAL UNIT NOT READY */
17528 		/*
17529 		 * disk drives that don't spin up result in a very long delay
17530 		 * in format without warning messages. We will log a message
17531 		 * if the error level is set to verbose.
17532 		 */
17533 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17534 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17535 			    "logical unit not ready, resetting disk\n");
17536 		}
17537 
17538 		/*
17539 		 * There are different requirements for CDROMs and disks for
17540 		 * the number of retries.  If a CD-ROM is giving this, it is
17541 		 * probably reading TOC and is in the process of getting
17542 		 * ready, so we should keep on trying for a long time to make
17543 		 * sure that all types of media are taken in account (for
17544 		 * some media the drive takes a long time to read TOC).  For
17545 		 * disks we do not want to retry this too many times as this
17546 		 * can cause a long hang in format when the drive refuses to
17547 		 * spin up (a very common failure).
17548 		 */
17549 		switch (ascq) {
17550 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17551 			/*
17552 			 * Disk drives frequently refuse to spin up which
17553 			 * results in a very long hang in format without
17554 			 * warning messages.
17555 			 *
17556 			 * Note: This code preserves the legacy behavior of
17557 			 * comparing xb_nr_retry_count against zero for fibre
17558 			 * channel targets instead of comparing against the
17559 			 * un_reset_retry_count value.  The reason for this
17560 			 * discrepancy has been so utterly lost beneath the
17561 			 * Sands of Time that even Indiana Jones could not
17562 			 * find it.
17563 			 */
17564 			if (un->un_f_is_fibre == TRUE) {
17565 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17566 				    (xp->xb_nr_retry_count > 0)) &&
17567 				    (un->un_startstop_timeid == NULL)) {
17568 					scsi_log(SD_DEVINFO(un), sd_label,
17569 					    CE_WARN, "logical unit not ready, "
17570 					    "resetting disk\n");
17571 					sd_reset_target(un, pktp);
17572 				}
17573 			} else {
17574 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17575 				    (xp->xb_nr_retry_count >
17576 				    un->un_reset_retry_count)) &&
17577 				    (un->un_startstop_timeid == NULL)) {
17578 					scsi_log(SD_DEVINFO(un), sd_label,
17579 					    CE_WARN, "logical unit not ready, "
17580 					    "resetting disk\n");
17581 					sd_reset_target(un, pktp);
17582 				}
17583 			}
17584 			break;
17585 
17586 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17587 			/*
17588 			 * If the target is in the process of becoming
17589 			 * ready, just proceed with the retry. This can
17590 			 * happen with CD-ROMs that take a long time to
17591 			 * read TOC after a power cycle or reset.
17592 			 */
17593 			goto do_retry;
17594 
17595 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17596 			break;
17597 
17598 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17599 			/*
17600 			 * Retries cannot help here so just fail right away.
17601 			 */
17602 			goto fail_command;
17603 
17604 		case 0x88:
17605 			/*
17606 			 * Vendor-unique code for T3/T4: it indicates a
17607 			 * path problem in a mutipathed config, but as far as
17608 			 * the target driver is concerned it equates to a fatal
17609 			 * error, so we should just fail the command right away
17610 			 * (without printing anything to the console). If this
17611 			 * is not a T3/T4, fall thru to the default recovery
17612 			 * action.
17613 			 * T3/T4 is FC only, don't need to check is_fibre
17614 			 */
17615 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
17616 				sd_return_failed_command(un, bp, EIO);
17617 				return;
17618 			}
17619 			/* FALLTHRU */
17620 
17621 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17622 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17623 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17624 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17625 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17626 		default:    /* Possible future codes in SCSI spec? */
17627 			/*
17628 			 * For removable-media devices, do not retry if
17629 			 * ASCQ > 2 as these result mostly from USCSI commands
17630 			 * on MMC devices issued to check status of an
17631 			 * operation initiated in immediate mode.  Also for
17632 			 * ASCQ >= 4 do not print console messages as these
17633 			 * mainly represent a user-initiated operation
17634 			 * instead of a system failure.
17635 			 */
17636 			if (un->un_f_has_removable_media) {
17637 				si.ssi_severity = SCSI_ERR_ALL;
17638 				goto fail_command;
17639 			}
17640 			break;
17641 		}
17642 
17643 		/*
17644 		 * As part of our recovery attempt for the NOT READY
17645 		 * condition, we issue a START STOP UNIT command. However
17646 		 * we want to wait for a short delay before attempting this
17647 		 * as there may still be more commands coming back from the
17648 		 * target with the check condition. To do this we use
17649 		 * timeout(9F) to call sd_start_stop_unit_callback() after
17650 		 * the delay interval expires. (sd_start_stop_unit_callback()
17651 		 * dispatches sd_start_stop_unit_task(), which will issue
17652 		 * the actual START STOP UNIT command. The delay interval
17653 		 * is one-half of the delay that we will use to retry the
17654 		 * command that generated the NOT READY condition.
17655 		 *
17656 		 * Note that we could just dispatch sd_start_stop_unit_task()
17657 		 * from here and allow it to sleep for the delay interval,
17658 		 * but then we would be tying up the taskq thread
17659 		 * uncesessarily for the duration of the delay.
17660 		 *
17661 		 * Do not issue the START STOP UNIT if the current command
17662 		 * is already a START STOP UNIT.
17663 		 */
17664 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17665 			break;
17666 		}
17667 
17668 		/*
17669 		 * Do not schedule the timeout if one is already pending.
17670 		 */
17671 		if (un->un_startstop_timeid != NULL) {
17672 			SD_INFO(SD_LOG_ERROR, un,
17673 			    "sd_sense_key_not_ready: restart already issued to"
17674 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17675 			    ddi_get_instance(SD_DEVINFO(un)));
17676 			break;
17677 		}
17678 
17679 		/*
17680 		 * Schedule the START STOP UNIT command, then queue the command
17681 		 * for a retry.
17682 		 *
17683 		 * Note: A timeout is not scheduled for this retry because we
17684 		 * want the retry to be serial with the START_STOP_UNIT. The
17685 		 * retry will be started when the START_STOP_UNIT is completed
17686 		 * in sd_start_stop_unit_task.
17687 		 */
17688 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17689 		    un, un->un_busy_timeout / 2);
17690 		xp->xb_nr_retry_count++;
17691 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17692 		return;
17693 
17694 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17695 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17696 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17697 			    "unit does not respond to selection\n");
17698 		}
17699 		break;
17700 
17701 	case 0x3A:	/* MEDIUM NOT PRESENT */
17702 		if (sd_error_level >= SCSI_ERR_FATAL) {
17703 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17704 			    "Caddy not inserted in drive\n");
17705 		}
17706 
17707 		sr_ejected(un);
17708 		un->un_mediastate = DKIO_EJECTED;
17709 		/* The state has changed, inform the media watch routines */
17710 		cv_broadcast(&un->un_state_cv);
17711 		/* Just fail if no media is present in the drive. */
17712 		goto fail_command;
17713 
17714 	default:
17715 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17716 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17717 			    "Unit not Ready. Additional sense code 0x%x\n",
17718 			    asc);
17719 		}
17720 		break;
17721 	}
17722 
17723 do_retry:
17724 
17725 	/*
17726 	 * Retry the command, as some targets may report NOT READY for
17727 	 * several seconds after being reset.
17728 	 */
17729 	xp->xb_nr_retry_count++;
17730 	si.ssi_severity = SCSI_ERR_RETRYABLE;
17731 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17732 	    &si, EIO, un->un_busy_timeout, NULL);
17733 
17734 	return;
17735 
17736 fail_command:
17737 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17738 	sd_return_failed_command(un, bp, EIO);
17739 }
17740 
17741 
17742 
17743 /*
17744  *    Function: sd_sense_key_medium_or_hardware_error
17745  *
17746  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17747  *		sense key.
17748  *
17749  *     Context: May be called from interrupt context
17750  */
17751 
17752 static void
17753 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
17754 	uint8_t *sense_datap,
17755 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17756 {
17757 	struct sd_sense_info	si;
17758 	uint8_t sense_key = scsi_sense_key(sense_datap);
17759 	uint8_t asc = scsi_sense_asc(sense_datap);
17760 
17761 	ASSERT(un != NULL);
17762 	ASSERT(mutex_owned(SD_MUTEX(un)));
17763 	ASSERT(bp != NULL);
17764 	ASSERT(xp != NULL);
17765 	ASSERT(pktp != NULL);
17766 
17767 	si.ssi_severity = SCSI_ERR_FATAL;
17768 	si.ssi_pfa_flag = FALSE;
17769 
17770 	if (sense_key == KEY_MEDIUM_ERROR) {
17771 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17772 	}
17773 
17774 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17775 
17776 	if ((un->un_reset_retry_count != 0) &&
17777 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17778 		mutex_exit(SD_MUTEX(un));
17779 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17780 		if (un->un_f_allow_bus_device_reset == TRUE) {
17781 
17782 			boolean_t try_resetting_target = B_TRUE;
17783 
17784 			/*
17785 			 * We need to be able to handle specific ASC when we are
17786 			 * handling a KEY_HARDWARE_ERROR. In particular
17787 			 * taking the default action of resetting the target may
17788 			 * not be the appropriate way to attempt recovery.
17789 			 * Resetting a target because of a single LUN failure
17790 			 * victimizes all LUNs on that target.
17791 			 *
17792 			 * This is true for the LSI arrays, if an LSI
17793 			 * array controller returns an ASC of 0x84 (LUN Dead) we
17794 			 * should trust it.
17795 			 */
17796 
17797 			if (sense_key == KEY_HARDWARE_ERROR) {
17798 				switch (asc) {
17799 				case 0x84:
17800 					if (SD_IS_LSI(un)) {
17801 						try_resetting_target = B_FALSE;
17802 					}
17803 					break;
17804 				default:
17805 					break;
17806 				}
17807 			}
17808 
17809 			if (try_resetting_target == B_TRUE) {
17810 				int reset_retval = 0;
17811 				if (un->un_f_lun_reset_enabled == TRUE) {
17812 					SD_TRACE(SD_LOG_IO_CORE, un,
17813 					    "sd_sense_key_medium_or_hardware_"
17814 					    "error: issuing RESET_LUN\n");
17815 					reset_retval =
17816 					    scsi_reset(SD_ADDRESS(un),
17817 					    RESET_LUN);
17818 				}
17819 				if (reset_retval == 0) {
17820 					SD_TRACE(SD_LOG_IO_CORE, un,
17821 					    "sd_sense_key_medium_or_hardware_"
17822 					    "error: issuing RESET_TARGET\n");
17823 					(void) scsi_reset(SD_ADDRESS(un),
17824 					    RESET_TARGET);
17825 				}
17826 			}
17827 		}
17828 		mutex_enter(SD_MUTEX(un));
17829 	}
17830 
17831 	/*
17832 	 * This really ought to be a fatal error, but we will retry anyway
17833 	 * as some drives report this as a spurious error.
17834 	 */
17835 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17836 	    &si, EIO, (clock_t)0, NULL);
17837 }
17838 
17839 
17840 
17841 /*
17842  *    Function: sd_sense_key_illegal_request
17843  *
17844  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17845  *
17846  *     Context: May be called from interrupt context
17847  */
17848 
17849 static void
17850 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17851 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17852 {
17853 	struct sd_sense_info	si;
17854 
17855 	ASSERT(un != NULL);
17856 	ASSERT(mutex_owned(SD_MUTEX(un)));
17857 	ASSERT(bp != NULL);
17858 	ASSERT(xp != NULL);
17859 	ASSERT(pktp != NULL);
17860 
17861 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17862 
17863 	si.ssi_severity = SCSI_ERR_INFO;
17864 	si.ssi_pfa_flag = FALSE;
17865 
17866 	/* Pointless to retry if the target thinks it's an illegal request */
17867 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17868 	sd_return_failed_command(un, bp, EIO);
17869 }
17870 
17871 
17872 
17873 
17874 /*
17875  *    Function: sd_sense_key_unit_attention
17876  *
17877  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17878  *
17879  *     Context: May be called from interrupt context
17880  */
17881 
17882 static void
17883 sd_sense_key_unit_attention(struct sd_lun *un,
17884 	uint8_t *sense_datap,
17885 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17886 {
17887 	/*
17888 	 * For UNIT ATTENTION we allow retries for one minute. Devices
17889 	 * like Sonoma can return UNIT ATTENTION close to a minute
17890 	 * under certain conditions.
17891 	 */
17892 	int	retry_check_flag = SD_RETRIES_UA;
17893 	boolean_t	kstat_updated = B_FALSE;
17894 	struct	sd_sense_info		si;
17895 	uint8_t asc = scsi_sense_asc(sense_datap);
17896 	uint8_t	ascq = scsi_sense_ascq(sense_datap);
17897 
17898 	ASSERT(un != NULL);
17899 	ASSERT(mutex_owned(SD_MUTEX(un)));
17900 	ASSERT(bp != NULL);
17901 	ASSERT(xp != NULL);
17902 	ASSERT(pktp != NULL);
17903 
17904 	si.ssi_severity = SCSI_ERR_INFO;
17905 	si.ssi_pfa_flag = FALSE;
17906 
17907 
17908 	switch (asc) {
17909 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17910 		if (sd_report_pfa != 0) {
17911 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17912 			si.ssi_pfa_flag = TRUE;
17913 			retry_check_flag = SD_RETRIES_STANDARD;
17914 			goto do_retry;
17915 		}
17916 
17917 		break;
17918 
17919 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17920 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17921 			un->un_resvd_status |=
17922 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17923 		}
17924 #ifdef _LP64
17925 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
17926 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
17927 			    un, KM_NOSLEEP) == 0) {
17928 				/*
17929 				 * If we can't dispatch the task we'll just
17930 				 * live without descriptor sense.  We can
17931 				 * try again on the next "unit attention"
17932 				 */
17933 				SD_ERROR(SD_LOG_ERROR, un,
17934 				    "sd_sense_key_unit_attention: "
17935 				    "Could not dispatch "
17936 				    "sd_reenable_dsense_task\n");
17937 			}
17938 		}
17939 #endif /* _LP64 */
17940 		/* FALLTHRU */
17941 
17942 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
17943 		if (!un->un_f_has_removable_media) {
17944 			break;
17945 		}
17946 
17947 		/*
17948 		 * When we get a unit attention from a removable-media device,
17949 		 * it may be in a state that will take a long time to recover
17950 		 * (e.g., from a reset).  Since we are executing in interrupt
17951 		 * context here, we cannot wait around for the device to come
17952 		 * back. So hand this command off to sd_media_change_task()
17953 		 * for deferred processing under taskq thread context. (Note
17954 		 * that the command still may be failed if a problem is
17955 		 * encountered at a later time.)
17956 		 */
17957 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
17958 		    KM_NOSLEEP) == 0) {
17959 			/*
17960 			 * Cannot dispatch the request so fail the command.
17961 			 */
17962 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
17963 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17964 			si.ssi_severity = SCSI_ERR_FATAL;
17965 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17966 			sd_return_failed_command(un, bp, EIO);
17967 		}
17968 
17969 		/*
17970 		 * If failed to dispatch sd_media_change_task(), we already
17971 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
17972 		 * we should update kstat later if it encounters an error. So,
17973 		 * we update kstat_updated flag here.
17974 		 */
17975 		kstat_updated = B_TRUE;
17976 
17977 		/*
17978 		 * Either the command has been successfully dispatched to a
17979 		 * task Q for retrying, or the dispatch failed. In either case
17980 		 * do NOT retry again by calling sd_retry_command. This sets up
17981 		 * two retries of the same command and when one completes and
17982 		 * frees the resources the other will access freed memory,
17983 		 * a bad thing.
17984 		 */
17985 		return;
17986 
17987 	default:
17988 		break;
17989 	}
17990 
17991 	/*
17992 	 * ASC  ASCQ
17993 	 *  2A   09	Capacity data has changed
17994 	 *  2A   01	Mode parameters changed
17995 	 *  3F   0E	Reported luns data has changed
17996 	 * Arrays that support logical unit expansion should report
17997 	 * capacity changes(2Ah/09). Mode parameters changed and
17998 	 * reported luns data has changed are the approximation.
17999 	 */
18000 	if (((asc == 0x2a) && (ascq == 0x09)) ||
18001 	    ((asc == 0x2a) && (ascq == 0x01)) ||
18002 	    ((asc == 0x3f) && (ascq == 0x0e))) {
18003 		if (taskq_dispatch(sd_tq, sd_target_change_task, un,
18004 		    KM_NOSLEEP) == 0) {
18005 			SD_ERROR(SD_LOG_ERROR, un,
18006 			    "sd_sense_key_unit_attention: "
18007 			    "Could not dispatch sd_target_change_task\n");
18008 		}
18009 	}
18010 
18011 	/*
18012 	 * Update kstat if we haven't done that.
18013 	 */
18014 	if (!kstat_updated) {
18015 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18016 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18017 	}
18018 
18019 do_retry:
18020 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
18021 	    EIO, SD_UA_RETRY_DELAY, NULL);
18022 }
18023 
18024 
18025 
18026 /*
18027  *    Function: sd_sense_key_fail_command
18028  *
18029  * Description: Use to fail a command when we don't like the sense key that
18030  *		was returned.
18031  *
18032  *     Context: May be called from interrupt context
18033  */
18034 
18035 static void
18036 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
18037 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18038 {
18039 	struct sd_sense_info	si;
18040 
18041 	ASSERT(un != NULL);
18042 	ASSERT(mutex_owned(SD_MUTEX(un)));
18043 	ASSERT(bp != NULL);
18044 	ASSERT(xp != NULL);
18045 	ASSERT(pktp != NULL);
18046 
18047 	si.ssi_severity = SCSI_ERR_FATAL;
18048 	si.ssi_pfa_flag = FALSE;
18049 
18050 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18051 	sd_return_failed_command(un, bp, EIO);
18052 }
18053 
18054 
18055 
18056 /*
18057  *    Function: sd_sense_key_blank_check
18058  *
18059  * Description: Recovery actions for a SCSI "Blank Check" sense key.
18060  *		Has no monetary connotation.
18061  *
18062  *     Context: May be called from interrupt context
18063  */
18064 
18065 static void
18066 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
18067 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18068 {
18069 	struct sd_sense_info	si;
18070 
18071 	ASSERT(un != NULL);
18072 	ASSERT(mutex_owned(SD_MUTEX(un)));
18073 	ASSERT(bp != NULL);
18074 	ASSERT(xp != NULL);
18075 	ASSERT(pktp != NULL);
18076 
18077 	/*
18078 	 * Blank check is not fatal for removable devices, therefore
18079 	 * it does not require a console message.
18080 	 */
18081 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
18082 	    SCSI_ERR_FATAL;
18083 	si.ssi_pfa_flag = FALSE;
18084 
18085 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18086 	sd_return_failed_command(un, bp, EIO);
18087 }
18088 
18089 
18090 
18091 
18092 /*
18093  *    Function: sd_sense_key_aborted_command
18094  *
18095  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
18096  *
18097  *     Context: May be called from interrupt context
18098  */
18099 
18100 static void
18101 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
18102 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18103 {
18104 	struct sd_sense_info	si;
18105 
18106 	ASSERT(un != NULL);
18107 	ASSERT(mutex_owned(SD_MUTEX(un)));
18108 	ASSERT(bp != NULL);
18109 	ASSERT(xp != NULL);
18110 	ASSERT(pktp != NULL);
18111 
18112 	si.ssi_severity = SCSI_ERR_FATAL;
18113 	si.ssi_pfa_flag = FALSE;
18114 
18115 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18116 
18117 	/*
18118 	 * This really ought to be a fatal error, but we will retry anyway
18119 	 * as some drives report this as a spurious error.
18120 	 */
18121 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18122 	    &si, EIO, drv_usectohz(100000), NULL);
18123 }
18124 
18125 
18126 
18127 /*
18128  *    Function: sd_sense_key_default
18129  *
18130  * Description: Default recovery action for several SCSI sense keys (basically
18131  *		attempts a retry).
18132  *
18133  *     Context: May be called from interrupt context
18134  */
18135 
18136 static void
18137 sd_sense_key_default(struct sd_lun *un,
18138 	uint8_t *sense_datap,
18139 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18140 {
18141 	struct sd_sense_info	si;
18142 	uint8_t sense_key = scsi_sense_key(sense_datap);
18143 
18144 	ASSERT(un != NULL);
18145 	ASSERT(mutex_owned(SD_MUTEX(un)));
18146 	ASSERT(bp != NULL);
18147 	ASSERT(xp != NULL);
18148 	ASSERT(pktp != NULL);
18149 
18150 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18151 
18152 	/*
18153 	 * Undecoded sense key.	Attempt retries and hope that will fix
18154 	 * the problem.  Otherwise, we're dead.
18155 	 */
18156 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18157 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18158 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18159 	}
18160 
18161 	si.ssi_severity = SCSI_ERR_FATAL;
18162 	si.ssi_pfa_flag = FALSE;
18163 
18164 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18165 	    &si, EIO, (clock_t)0, NULL);
18166 }
18167 
18168 
18169 
18170 /*
18171  *    Function: sd_print_retry_msg
18172  *
18173  * Description: Print a message indicating the retry action being taken.
18174  *
18175  *   Arguments: un - ptr to associated softstate
18176  *		bp - ptr to buf(9S) for the command
18177  *		arg - not used.
18178  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18179  *			or SD_NO_RETRY_ISSUED
18180  *
18181  *     Context: May be called from interrupt context
18182  */
18183 /* ARGSUSED */
18184 static void
18185 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18186 {
18187 	struct sd_xbuf	*xp;
18188 	struct scsi_pkt *pktp;
18189 	char *reasonp;
18190 	char *msgp;
18191 
18192 	ASSERT(un != NULL);
18193 	ASSERT(mutex_owned(SD_MUTEX(un)));
18194 	ASSERT(bp != NULL);
18195 	pktp = SD_GET_PKTP(bp);
18196 	ASSERT(pktp != NULL);
18197 	xp = SD_GET_XBUF(bp);
18198 	ASSERT(xp != NULL);
18199 
18200 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18201 	mutex_enter(&un->un_pm_mutex);
18202 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18203 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18204 	    (pktp->pkt_flags & FLAG_SILENT)) {
18205 		mutex_exit(&un->un_pm_mutex);
18206 		goto update_pkt_reason;
18207 	}
18208 	mutex_exit(&un->un_pm_mutex);
18209 
18210 	/*
18211 	 * Suppress messages if they are all the same pkt_reason; with
18212 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18213 	 * If we are in panic, then suppress the retry messages.
18214 	 */
18215 	switch (flag) {
18216 	case SD_NO_RETRY_ISSUED:
18217 		msgp = "giving up";
18218 		break;
18219 	case SD_IMMEDIATE_RETRY_ISSUED:
18220 	case SD_DELAYED_RETRY_ISSUED:
18221 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18222 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18223 		    (sd_error_level != SCSI_ERR_ALL))) {
18224 			return;
18225 		}
18226 		msgp = "retrying command";
18227 		break;
18228 	default:
18229 		goto update_pkt_reason;
18230 	}
18231 
18232 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18233 	    scsi_rname(pktp->pkt_reason));
18234 
18235 	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
18236 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18237 		    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18238 	}
18239 
18240 update_pkt_reason:
18241 	/*
18242 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18243 	 * This is to prevent multiple console messages for the same failure
18244 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18245 	 * when the command is retried successfully because there still may be
18246 	 * more commands coming back with the same value of pktp->pkt_reason.
18247 	 */
18248 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18249 		un->un_last_pkt_reason = pktp->pkt_reason;
18250 	}
18251 }
18252 
18253 
18254 /*
18255  *    Function: sd_print_cmd_incomplete_msg
18256  *
18257  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18258  *
18259  *   Arguments: un - ptr to associated softstate
18260  *		bp - ptr to buf(9S) for the command
18261  *		arg - passed to sd_print_retry_msg()
18262  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18263  *			or SD_NO_RETRY_ISSUED
18264  *
18265  *     Context: May be called from interrupt context
18266  */
18267 
18268 static void
18269 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18270 	int code)
18271 {
18272 	dev_info_t	*dip;
18273 
18274 	ASSERT(un != NULL);
18275 	ASSERT(mutex_owned(SD_MUTEX(un)));
18276 	ASSERT(bp != NULL);
18277 
18278 	switch (code) {
18279 	case SD_NO_RETRY_ISSUED:
18280 		/* Command was failed. Someone turned off this target? */
18281 		if (un->un_state != SD_STATE_OFFLINE) {
18282 			/*
18283 			 * Suppress message if we are detaching and
18284 			 * device has been disconnected
18285 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18286 			 * private interface and not part of the DDI
18287 			 */
18288 			dip = un->un_sd->sd_dev;
18289 			if (!(DEVI_IS_DETACHING(dip) &&
18290 			    DEVI_IS_DEVICE_REMOVED(dip))) {
18291 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18292 				"disk not responding to selection\n");
18293 			}
18294 			New_state(un, SD_STATE_OFFLINE);
18295 		}
18296 		break;
18297 
18298 	case SD_DELAYED_RETRY_ISSUED:
18299 	case SD_IMMEDIATE_RETRY_ISSUED:
18300 	default:
18301 		/* Command was successfully queued for retry */
18302 		sd_print_retry_msg(un, bp, arg, code);
18303 		break;
18304 	}
18305 }
18306 
18307 
18308 /*
18309  *    Function: sd_pkt_reason_cmd_incomplete
18310  *
18311  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18312  *
18313  *     Context: May be called from interrupt context
18314  */
18315 
18316 static void
18317 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18318 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18319 {
18320 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18321 
18322 	ASSERT(un != NULL);
18323 	ASSERT(mutex_owned(SD_MUTEX(un)));
18324 	ASSERT(bp != NULL);
18325 	ASSERT(xp != NULL);
18326 	ASSERT(pktp != NULL);
18327 
18328 	/* Do not do a reset if selection did not complete */
18329 	/* Note: Should this not just check the bit? */
18330 	if (pktp->pkt_state != STATE_GOT_BUS) {
18331 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18332 		sd_reset_target(un, pktp);
18333 	}
18334 
18335 	/*
18336 	 * If the target was not successfully selected, then set
18337 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18338 	 * with the target, and further retries and/or commands are
18339 	 * likely to take a long time.
18340 	 */
18341 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18342 		flag |= SD_RETRIES_FAILFAST;
18343 	}
18344 
18345 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18346 
18347 	sd_retry_command(un, bp, flag,
18348 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18349 }
18350 
18351 
18352 
18353 /*
18354  *    Function: sd_pkt_reason_cmd_tran_err
18355  *
18356  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18357  *
18358  *     Context: May be called from interrupt context
18359  */
18360 
18361 static void
18362 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18363 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18364 {
18365 	ASSERT(un != NULL);
18366 	ASSERT(mutex_owned(SD_MUTEX(un)));
18367 	ASSERT(bp != NULL);
18368 	ASSERT(xp != NULL);
18369 	ASSERT(pktp != NULL);
18370 
18371 	/*
18372 	 * Do not reset if we got a parity error, or if
18373 	 * selection did not complete.
18374 	 */
18375 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18376 	/* Note: Should this not just check the bit for pkt_state? */
18377 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18378 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18379 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18380 		sd_reset_target(un, pktp);
18381 	}
18382 
18383 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18384 
18385 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18386 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18387 }
18388 
18389 
18390 
18391 /*
18392  *    Function: sd_pkt_reason_cmd_reset
18393  *
18394  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18395  *
18396  *     Context: May be called from interrupt context
18397  */
18398 
18399 static void
18400 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18401 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18402 {
18403 	ASSERT(un != NULL);
18404 	ASSERT(mutex_owned(SD_MUTEX(un)));
18405 	ASSERT(bp != NULL);
18406 	ASSERT(xp != NULL);
18407 	ASSERT(pktp != NULL);
18408 
18409 	/* The target may still be running the command, so try to reset. */
18410 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18411 	sd_reset_target(un, pktp);
18412 
18413 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18414 
18415 	/*
18416 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18417 	 * reset because another target on this bus caused it. The target
18418 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18419 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18420 	 */
18421 
18422 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18423 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18424 }
18425 
18426 
18427 
18428 
18429 /*
18430  *    Function: sd_pkt_reason_cmd_aborted
18431  *
18432  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18433  *
18434  *     Context: May be called from interrupt context
18435  */
18436 
18437 static void
18438 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18439 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18440 {
18441 	ASSERT(un != NULL);
18442 	ASSERT(mutex_owned(SD_MUTEX(un)));
18443 	ASSERT(bp != NULL);
18444 	ASSERT(xp != NULL);
18445 	ASSERT(pktp != NULL);
18446 
18447 	/* The target may still be running the command, so try to reset. */
18448 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18449 	sd_reset_target(un, pktp);
18450 
18451 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18452 
18453 	/*
18454 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18455 	 * aborted because another target on this bus caused it. The target
18456 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18457 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18458 	 */
18459 
18460 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18461 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18462 }
18463 
18464 
18465 
18466 /*
18467  *    Function: sd_pkt_reason_cmd_timeout
18468  *
18469  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18470  *
18471  *     Context: May be called from interrupt context
18472  */
18473 
18474 static void
18475 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
18476 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18477 {
18478 	ASSERT(un != NULL);
18479 	ASSERT(mutex_owned(SD_MUTEX(un)));
18480 	ASSERT(bp != NULL);
18481 	ASSERT(xp != NULL);
18482 	ASSERT(pktp != NULL);
18483 
18484 
18485 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18486 	sd_reset_target(un, pktp);
18487 
18488 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18489 
18490 	/*
18491 	 * A command timeout indicates that we could not establish
18492 	 * communication with the target, so set SD_RETRIES_FAILFAST
18493 	 * as further retries/commands are likely to take a long time.
18494 	 */
18495 	sd_retry_command(un, bp,
18496 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18497 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18498 }
18499 
18500 
18501 
18502 /*
18503  *    Function: sd_pkt_reason_cmd_unx_bus_free
18504  *
18505  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18506  *
18507  *     Context: May be called from interrupt context
18508  */
18509 
18510 static void
18511 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18512 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18513 {
18514 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18515 
18516 	ASSERT(un != NULL);
18517 	ASSERT(mutex_owned(SD_MUTEX(un)));
18518 	ASSERT(bp != NULL);
18519 	ASSERT(xp != NULL);
18520 	ASSERT(pktp != NULL);
18521 
18522 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18523 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18524 
18525 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18526 	    sd_print_retry_msg : NULL;
18527 
18528 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18529 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18530 }
18531 
18532 
18533 /*
18534  *    Function: sd_pkt_reason_cmd_tag_reject
18535  *
18536  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18537  *
18538  *     Context: May be called from interrupt context
18539  */
18540 
18541 static void
18542 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18543 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18544 {
18545 	ASSERT(un != NULL);
18546 	ASSERT(mutex_owned(SD_MUTEX(un)));
18547 	ASSERT(bp != NULL);
18548 	ASSERT(xp != NULL);
18549 	ASSERT(pktp != NULL);
18550 
18551 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18552 	pktp->pkt_flags = 0;
18553 	un->un_tagflags = 0;
18554 	if (un->un_f_opt_queueing == TRUE) {
18555 		un->un_throttle = min(un->un_throttle, 3);
18556 	} else {
18557 		un->un_throttle = 1;
18558 	}
18559 	mutex_exit(SD_MUTEX(un));
18560 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18561 	mutex_enter(SD_MUTEX(un));
18562 
18563 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18564 
18565 	/* Legacy behavior not to check retry counts here. */
18566 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18567 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18568 }
18569 
18570 
18571 /*
18572  *    Function: sd_pkt_reason_default
18573  *
18574  * Description: Default recovery actions for SCSA pkt_reason values that
18575  *		do not have more explicit recovery actions.
18576  *
18577  *     Context: May be called from interrupt context
18578  */
18579 
18580 static void
18581 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
18582 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18583 {
18584 	ASSERT(un != NULL);
18585 	ASSERT(mutex_owned(SD_MUTEX(un)));
18586 	ASSERT(bp != NULL);
18587 	ASSERT(xp != NULL);
18588 	ASSERT(pktp != NULL);
18589 
18590 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18591 	sd_reset_target(un, pktp);
18592 
18593 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18594 
18595 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18596 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18597 }
18598 
18599 
18600 
18601 /*
18602  *    Function: sd_pkt_status_check_condition
18603  *
18604  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18605  *
18606  *     Context: May be called from interrupt context
18607  */
18608 
18609 static void
18610 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18611 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18612 {
18613 	ASSERT(un != NULL);
18614 	ASSERT(mutex_owned(SD_MUTEX(un)));
18615 	ASSERT(bp != NULL);
18616 	ASSERT(xp != NULL);
18617 	ASSERT(pktp != NULL);
18618 
18619 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18620 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18621 
18622 	/*
18623 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18624 	 * command will be retried after the request sense). Otherwise, retry
18625 	 * the command. Note: we are issuing the request sense even though the
18626 	 * retry limit may have been reached for the failed command.
18627 	 */
18628 	if (un->un_f_arq_enabled == FALSE) {
18629 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18630 		    "no ARQ, sending request sense command\n");
18631 		sd_send_request_sense_command(un, bp, pktp);
18632 	} else {
18633 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18634 		    "ARQ,retrying request sense command\n");
18635 #if defined(__i386) || defined(__amd64)
18636 		/*
18637 		 * The SD_RETRY_DELAY value need to be adjusted here
18638 		 * when SD_RETRY_DELAY change in sddef.h
18639 		 */
18640 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18641 		    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18642 		    NULL);
18643 #else
18644 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
18645 		    EIO, SD_RETRY_DELAY, NULL);
18646 #endif
18647 	}
18648 
18649 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18650 }
18651 
18652 
18653 /*
18654  *    Function: sd_pkt_status_busy
18655  *
18656  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18657  *
18658  *     Context: May be called from interrupt context
18659  */
18660 
18661 static void
18662 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18663 	struct scsi_pkt *pktp)
18664 {
18665 	ASSERT(un != NULL);
18666 	ASSERT(mutex_owned(SD_MUTEX(un)));
18667 	ASSERT(bp != NULL);
18668 	ASSERT(xp != NULL);
18669 	ASSERT(pktp != NULL);
18670 
18671 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18672 	    "sd_pkt_status_busy: entry\n");
18673 
18674 	/* If retries are exhausted, just fail the command. */
18675 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18676 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18677 		    "device busy too long\n");
18678 		sd_return_failed_command(un, bp, EIO);
18679 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18680 		    "sd_pkt_status_busy: exit\n");
18681 		return;
18682 	}
18683 	xp->xb_retry_count++;
18684 
18685 	/*
18686 	 * Try to reset the target. However, we do not want to perform
18687 	 * more than one reset if the device continues to fail. The reset
18688 	 * will be performed when the retry count reaches the reset
18689 	 * threshold.  This threshold should be set such that at least
18690 	 * one retry is issued before the reset is performed.
18691 	 */
18692 	if (xp->xb_retry_count ==
18693 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18694 		int rval = 0;
18695 		mutex_exit(SD_MUTEX(un));
18696 		if (un->un_f_allow_bus_device_reset == TRUE) {
18697 			/*
18698 			 * First try to reset the LUN; if we cannot then
18699 			 * try to reset the target.
18700 			 */
18701 			if (un->un_f_lun_reset_enabled == TRUE) {
18702 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18703 				    "sd_pkt_status_busy: RESET_LUN\n");
18704 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18705 			}
18706 			if (rval == 0) {
18707 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18708 				    "sd_pkt_status_busy: RESET_TARGET\n");
18709 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18710 			}
18711 		}
18712 		if (rval == 0) {
18713 			/*
18714 			 * If the RESET_LUN and/or RESET_TARGET failed,
18715 			 * try RESET_ALL
18716 			 */
18717 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18718 			    "sd_pkt_status_busy: RESET_ALL\n");
18719 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18720 		}
18721 		mutex_enter(SD_MUTEX(un));
18722 		if (rval == 0) {
18723 			/*
18724 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18725 			 * At this point we give up & fail the command.
18726 			 */
18727 			sd_return_failed_command(un, bp, EIO);
18728 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18729 			    "sd_pkt_status_busy: exit (failed cmd)\n");
18730 			return;
18731 		}
18732 	}
18733 
18734 	/*
18735 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18736 	 * we have already checked the retry counts above.
18737 	 */
18738 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18739 	    EIO, un->un_busy_timeout, NULL);
18740 
18741 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18742 	    "sd_pkt_status_busy: exit\n");
18743 }
18744 
18745 
18746 /*
18747  *    Function: sd_pkt_status_reservation_conflict
18748  *
18749  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18750  *		command status.
18751  *
18752  *     Context: May be called from interrupt context
18753  */
18754 
18755 static void
18756 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18757 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18758 {
18759 	ASSERT(un != NULL);
18760 	ASSERT(mutex_owned(SD_MUTEX(un)));
18761 	ASSERT(bp != NULL);
18762 	ASSERT(xp != NULL);
18763 	ASSERT(pktp != NULL);
18764 
18765 	/*
18766 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18767 	 * conflict could be due to various reasons like incorrect keys, not
18768 	 * registered or not reserved etc. So, we return EACCES to the caller.
18769 	 */
18770 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18771 		int cmd = SD_GET_PKT_OPCODE(pktp);
18772 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18773 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18774 			sd_return_failed_command(un, bp, EACCES);
18775 			return;
18776 		}
18777 	}
18778 
18779 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18780 
18781 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18782 		if (sd_failfast_enable != 0) {
18783 			/* By definition, we must panic here.... */
18784 			sd_panic_for_res_conflict(un);
18785 			/*NOTREACHED*/
18786 		}
18787 		SD_ERROR(SD_LOG_IO, un,
18788 		    "sd_handle_resv_conflict: Disk Reserved\n");
18789 		sd_return_failed_command(un, bp, EACCES);
18790 		return;
18791 	}
18792 
18793 	/*
18794 	 * 1147670: retry only if sd_retry_on_reservation_conflict
18795 	 * property is set (default is 1). Retries will not succeed
18796 	 * on a disk reserved by another initiator. HA systems
18797 	 * may reset this via sd.conf to avoid these retries.
18798 	 *
18799 	 * Note: The legacy return code for this failure is EIO, however EACCES
18800 	 * seems more appropriate for a reservation conflict.
18801 	 */
18802 	if (sd_retry_on_reservation_conflict == 0) {
18803 		SD_ERROR(SD_LOG_IO, un,
18804 		    "sd_handle_resv_conflict: Device Reserved\n");
18805 		sd_return_failed_command(un, bp, EIO);
18806 		return;
18807 	}
18808 
18809 	/*
18810 	 * Retry the command if we can.
18811 	 *
18812 	 * Note: The legacy return code for this failure is EIO, however EACCES
18813 	 * seems more appropriate for a reservation conflict.
18814 	 */
18815 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18816 	    (clock_t)2, NULL);
18817 }
18818 
18819 
18820 
18821 /*
18822  *    Function: sd_pkt_status_qfull
18823  *
18824  * Description: Handle a QUEUE FULL condition from the target.  This can
18825  *		occur if the HBA does not handle the queue full condition.
18826  *		(Basically this means third-party HBAs as Sun HBAs will
18827  *		handle the queue full condition.)  Note that if there are
18828  *		some commands already in the transport, then the queue full
18829  *		has occurred because the queue for this nexus is actually
18830  *		full. If there are no commands in the transport, then the
18831  *		queue full is resulting from some other initiator or lun
18832  *		consuming all the resources at the target.
18833  *
18834  *     Context: May be called from interrupt context
18835  */
18836 
18837 static void
18838 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
18839 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18840 {
18841 	ASSERT(un != NULL);
18842 	ASSERT(mutex_owned(SD_MUTEX(un)));
18843 	ASSERT(bp != NULL);
18844 	ASSERT(xp != NULL);
18845 	ASSERT(pktp != NULL);
18846 
18847 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18848 	    "sd_pkt_status_qfull: entry\n");
18849 
18850 	/*
18851 	 * Just lower the QFULL throttle and retry the command.  Note that
18852 	 * we do not limit the number of retries here.
18853 	 */
18854 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18855 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18856 	    SD_RESTART_TIMEOUT, NULL);
18857 
18858 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18859 	    "sd_pkt_status_qfull: exit\n");
18860 }
18861 
18862 
18863 /*
18864  *    Function: sd_reset_target
18865  *
18866  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18867  *		RESET_TARGET, or RESET_ALL.
18868  *
18869  *     Context: May be called under interrupt context.
18870  */
18871 
18872 static void
18873 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18874 {
18875 	int rval = 0;
18876 
18877 	ASSERT(un != NULL);
18878 	ASSERT(mutex_owned(SD_MUTEX(un)));
18879 	ASSERT(pktp != NULL);
18880 
18881 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18882 
18883 	/*
18884 	 * No need to reset if the transport layer has already done so.
18885 	 */
18886 	if ((pktp->pkt_statistics &
18887 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18888 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18889 		    "sd_reset_target: no reset\n");
18890 		return;
18891 	}
18892 
18893 	mutex_exit(SD_MUTEX(un));
18894 
18895 	if (un->un_f_allow_bus_device_reset == TRUE) {
18896 		if (un->un_f_lun_reset_enabled == TRUE) {
18897 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18898 			    "sd_reset_target: RESET_LUN\n");
18899 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18900 		}
18901 		if (rval == 0) {
18902 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18903 			    "sd_reset_target: RESET_TARGET\n");
18904 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18905 		}
18906 	}
18907 
18908 	if (rval == 0) {
18909 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18910 		    "sd_reset_target: RESET_ALL\n");
18911 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18912 	}
18913 
18914 	mutex_enter(SD_MUTEX(un));
18915 
18916 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18917 }
18918 
18919 /*
18920  *    Function: sd_target_change_task
18921  *
18922  * Description: Handle dynamic target change
18923  *
18924  *     Context: Executes in a taskq() thread context
18925  */
18926 static void
18927 sd_target_change_task(void *arg)
18928 {
18929 	struct sd_lun		*un = arg;
18930 	uint64_t		capacity;
18931 	diskaddr_t		label_cap;
18932 	uint_t			lbasize;
18933 	sd_ssc_t		*ssc;
18934 
18935 	ASSERT(un != NULL);
18936 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18937 
18938 	if ((un->un_f_blockcount_is_valid == FALSE) ||
18939 	    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
18940 		return;
18941 	}
18942 
18943 	ssc = sd_ssc_init(un);
18944 
18945 	if (sd_send_scsi_READ_CAPACITY(ssc, &capacity,
18946 	    &lbasize, SD_PATH_DIRECT) != 0) {
18947 		SD_ERROR(SD_LOG_ERROR, un,
18948 		    "sd_target_change_task: fail to read capacity\n");
18949 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
18950 		goto task_exit;
18951 	}
18952 
18953 	mutex_enter(SD_MUTEX(un));
18954 	if (capacity <= un->un_blockcount) {
18955 		mutex_exit(SD_MUTEX(un));
18956 		goto task_exit;
18957 	}
18958 
18959 	sd_update_block_info(un, lbasize, capacity);
18960 	mutex_exit(SD_MUTEX(un));
18961 
18962 	/*
18963 	 * If lun is EFI labeled and lun capacity is greater than the
18964 	 * capacity contained in the label, log a sys event.
18965 	 */
18966 	if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
18967 	    (void*)SD_PATH_DIRECT) == 0) {
18968 		mutex_enter(SD_MUTEX(un));
18969 		if (un->un_f_blockcount_is_valid &&
18970 		    un->un_blockcount > label_cap) {
18971 			mutex_exit(SD_MUTEX(un));
18972 			sd_log_lun_expansion_event(un, KM_SLEEP);
18973 		} else {
18974 			mutex_exit(SD_MUTEX(un));
18975 		}
18976 	}
18977 
18978 task_exit:
18979 	sd_ssc_fini(ssc);
18980 }
18981 
18982 /*
18983  *    Function: sd_log_lun_expansion_event
18984  *
18985  * Description: Log lun expansion sys event
18986  *
18987  *     Context: Never called from interrupt context
18988  */
18989 static void
18990 sd_log_lun_expansion_event(struct sd_lun *un, int km_flag)
18991 {
18992 	int err;
18993 	char			*path;
18994 	nvlist_t		*dle_attr_list;
18995 
18996 	/* Allocate and build sysevent attribute list */
18997 	err = nvlist_alloc(&dle_attr_list, NV_UNIQUE_NAME_TYPE, km_flag);
18998 	if (err != 0) {
18999 		SD_ERROR(SD_LOG_ERROR, un,
19000 		    "sd_log_lun_expansion_event: fail to allocate space\n");
19001 		return;
19002 	}
19003 
19004 	path = kmem_alloc(MAXPATHLEN, km_flag);
19005 	if (path == NULL) {
19006 		nvlist_free(dle_attr_list);
19007 		SD_ERROR(SD_LOG_ERROR, un,
19008 		    "sd_log_lun_expansion_event: fail to allocate space\n");
19009 		return;
19010 	}
19011 	/*
19012 	 * Add path attribute to identify the lun.
19013 	 * We are using minor node 'a' as the sysevent attribute.
19014 	 */
19015 	(void) snprintf(path, MAXPATHLEN, "/devices");
19016 	(void) ddi_pathname(SD_DEVINFO(un), path + strlen(path));
19017 	(void) snprintf(path + strlen(path), MAXPATHLEN - strlen(path),
19018 	    ":a");
19019 
19020 	err = nvlist_add_string(dle_attr_list, DEV_PHYS_PATH, path);
19021 	if (err != 0) {
19022 		nvlist_free(dle_attr_list);
19023 		kmem_free(path, MAXPATHLEN);
19024 		SD_ERROR(SD_LOG_ERROR, un,
19025 		    "sd_log_lun_expansion_event: fail to add attribute\n");
19026 		return;
19027 	}
19028 
19029 	/* Log dynamic lun expansion sysevent */
19030 	err = ddi_log_sysevent(SD_DEVINFO(un), SUNW_VENDOR, EC_DEV_STATUS,
19031 	    ESC_DEV_DLE, dle_attr_list, NULL, km_flag);
19032 	if (err != DDI_SUCCESS) {
19033 		SD_ERROR(SD_LOG_ERROR, un,
19034 		    "sd_log_lun_expansion_event: fail to log sysevent\n");
19035 	}
19036 
19037 	nvlist_free(dle_attr_list);
19038 	kmem_free(path, MAXPATHLEN);
19039 }
19040 
19041 /*
19042  *    Function: sd_media_change_task
19043  *
19044  * Description: Recovery action for CDROM to become available.
19045  *
19046  *     Context: Executes in a taskq() thread context
19047  */
19048 
19049 static void
19050 sd_media_change_task(void *arg)
19051 {
19052 	struct	scsi_pkt	*pktp = arg;
19053 	struct	sd_lun		*un;
19054 	struct	buf		*bp;
19055 	struct	sd_xbuf		*xp;
19056 	int	err		= 0;
19057 	int	retry_count	= 0;
19058 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
19059 	struct	sd_sense_info	si;
19060 
19061 	ASSERT(pktp != NULL);
19062 	bp = (struct buf *)pktp->pkt_private;
19063 	ASSERT(bp != NULL);
19064 	xp = SD_GET_XBUF(bp);
19065 	ASSERT(xp != NULL);
19066 	un = SD_GET_UN(bp);
19067 	ASSERT(un != NULL);
19068 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19069 	ASSERT(un->un_f_monitor_media_state);
19070 
19071 	si.ssi_severity = SCSI_ERR_INFO;
19072 	si.ssi_pfa_flag = FALSE;
19073 
19074 	/*
19075 	 * When a reset is issued on a CDROM, it takes a long time to
19076 	 * recover. First few attempts to read capacity and other things
19077 	 * related to handling unit attention fail (with a ASC 0x4 and
19078 	 * ASCQ 0x1). In that case we want to do enough retries and we want
19079 	 * to limit the retries in other cases of genuine failures like
19080 	 * no media in drive.
19081 	 */
19082 	while (retry_count++ < retry_limit) {
19083 		if ((err = sd_handle_mchange(un)) == 0) {
19084 			break;
19085 		}
19086 		if (err == EAGAIN) {
19087 			retry_limit = SD_UNIT_ATTENTION_RETRY;
19088 		}
19089 		/* Sleep for 0.5 sec. & try again */
19090 		delay(drv_usectohz(500000));
19091 	}
19092 
19093 	/*
19094 	 * Dispatch (retry or fail) the original command here,
19095 	 * along with appropriate console messages....
19096 	 *
19097 	 * Must grab the mutex before calling sd_retry_command,
19098 	 * sd_print_sense_msg and sd_return_failed_command.
19099 	 */
19100 	mutex_enter(SD_MUTEX(un));
19101 	if (err != SD_CMD_SUCCESS) {
19102 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
19103 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
19104 		si.ssi_severity = SCSI_ERR_FATAL;
19105 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
19106 		sd_return_failed_command(un, bp, EIO);
19107 	} else {
19108 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
19109 		    &si, EIO, (clock_t)0, NULL);
19110 	}
19111 	mutex_exit(SD_MUTEX(un));
19112 }
19113 
19114 
19115 
19116 /*
19117  *    Function: sd_handle_mchange
19118  *
19119  * Description: Perform geometry validation & other recovery when CDROM
19120  *		has been removed from drive.
19121  *
19122  * Return Code: 0 for success
19123  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
19124  *		sd_send_scsi_READ_CAPACITY()
19125  *
19126  *     Context: Executes in a taskq() thread context
19127  */
19128 
19129 static int
19130 sd_handle_mchange(struct sd_lun *un)
19131 {
19132 	uint64_t	capacity;
19133 	uint32_t	lbasize;
19134 	int		rval;
19135 	sd_ssc_t	*ssc;
19136 
19137 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19138 	ASSERT(un->un_f_monitor_media_state);
19139 
19140 	ssc = sd_ssc_init(un);
19141 	rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
19142 	    SD_PATH_DIRECT_PRIORITY);
19143 
19144 	if (rval != 0)
19145 		goto failed;
19146 
19147 	mutex_enter(SD_MUTEX(un));
19148 	sd_update_block_info(un, lbasize, capacity);
19149 
19150 	if (un->un_errstats != NULL) {
19151 		struct	sd_errstats *stp =
19152 		    (struct sd_errstats *)un->un_errstats->ks_data;
19153 		stp->sd_capacity.value.ui64 = (uint64_t)
19154 		    ((uint64_t)un->un_blockcount *
19155 		    (uint64_t)un->un_tgt_blocksize);
19156 	}
19157 
19158 	/*
19159 	 * Check if the media in the device is writable or not
19160 	 */
19161 	if (ISCD(un)) {
19162 		sd_check_for_writable_cd(ssc, SD_PATH_DIRECT_PRIORITY);
19163 	}
19164 
19165 	/*
19166 	 * Note: Maybe let the strategy/partitioning chain worry about getting
19167 	 * valid geometry.
19168 	 */
19169 	mutex_exit(SD_MUTEX(un));
19170 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
19171 
19172 
19173 	if (cmlb_validate(un->un_cmlbhandle, 0,
19174 	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
19175 		sd_ssc_fini(ssc);
19176 		return (EIO);
19177 	} else {
19178 		if (un->un_f_pkstats_enabled) {
19179 			sd_set_pstats(un);
19180 			SD_TRACE(SD_LOG_IO_PARTITION, un,
19181 			    "sd_handle_mchange: un:0x%p pstats created and "
19182 			    "set\n", un);
19183 		}
19184 	}
19185 
19186 	/*
19187 	 * Try to lock the door
19188 	 */
19189 	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
19190 	    SD_PATH_DIRECT_PRIORITY);
19191 failed:
19192 	if (rval != 0)
19193 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19194 	sd_ssc_fini(ssc);
19195 	return (rval);
19196 }
19197 
19198 
19199 /*
19200  *    Function: sd_send_scsi_DOORLOCK
19201  *
19202  * Description: Issue the scsi DOOR LOCK command
19203  *
19204  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19205  *                      structure for this target.
19206  *		flag  - SD_REMOVAL_ALLOW
19207  *			SD_REMOVAL_PREVENT
19208  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19209  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19210  *			to use the USCSI "direct" chain and bypass the normal
19211  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19212  *			command is issued as part of an error recovery action.
19213  *
19214  * Return Code: 0   - Success
19215  *		errno return code from sd_ssc_send()
19216  *
19217  *     Context: Can sleep.
19218  */
19219 
19220 static int
19221 sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag)
19222 {
19223 	struct scsi_extended_sense	sense_buf;
19224 	union scsi_cdb		cdb;
19225 	struct uscsi_cmd	ucmd_buf;
19226 	int			status;
19227 	struct sd_lun		*un;
19228 
19229 	ASSERT(ssc != NULL);
19230 	un = ssc->ssc_un;
19231 	ASSERT(un != NULL);
19232 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19233 
19234 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
19235 
19236 	/* already determined doorlock is not supported, fake success */
19237 	if (un->un_f_doorlock_supported == FALSE) {
19238 		return (0);
19239 	}
19240 
19241 	/*
19242 	 * If we are ejecting and see an SD_REMOVAL_PREVENT
19243 	 * ignore the command so we can complete the eject
19244 	 * operation.
19245 	 */
19246 	if (flag == SD_REMOVAL_PREVENT) {
19247 		mutex_enter(SD_MUTEX(un));
19248 		if (un->un_f_ejecting == TRUE) {
19249 			mutex_exit(SD_MUTEX(un));
19250 			return (EAGAIN);
19251 		}
19252 		mutex_exit(SD_MUTEX(un));
19253 	}
19254 
19255 	bzero(&cdb, sizeof (cdb));
19256 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19257 
19258 	cdb.scc_cmd = SCMD_DOORLOCK;
19259 	cdb.cdb_opaque[4] = (uchar_t)flag;
19260 
19261 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19262 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19263 	ucmd_buf.uscsi_bufaddr	= NULL;
19264 	ucmd_buf.uscsi_buflen	= 0;
19265 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19266 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19267 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19268 	ucmd_buf.uscsi_timeout	= 15;
19269 
19270 	SD_TRACE(SD_LOG_IO, un,
19271 	    "sd_send_scsi_DOORLOCK: returning sd_ssc_send\n");
19272 
19273 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19274 	    UIO_SYSSPACE, path_flag);
19275 
19276 	if (status == 0)
19277 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19278 
19279 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
19280 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19281 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
19282 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19283 
19284 		/* fake success and skip subsequent doorlock commands */
19285 		un->un_f_doorlock_supported = FALSE;
19286 		return (0);
19287 	}
19288 
19289 	return (status);
19290 }
19291 
19292 /*
19293  *    Function: sd_send_scsi_READ_CAPACITY
19294  *
19295  * Description: This routine uses the scsi READ CAPACITY command to determine
19296  *		the device capacity in number of blocks and the device native
19297  *		block size. If this function returns a failure, then the
19298  *		values in *capp and *lbap are undefined.  If the capacity
19299  *		returned is 0xffffffff then the lun is too large for a
19300  *		normal READ CAPACITY command and the results of a
19301  *		READ CAPACITY 16 will be used instead.
19302  *
19303  *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
19304  *		capp - ptr to unsigned 64-bit variable to receive the
19305  *			capacity value from the command.
19306  *		lbap - ptr to unsigned 32-bit varaible to receive the
19307  *			block size value from the command
19308  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19309  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19310  *			to use the USCSI "direct" chain and bypass the normal
19311  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19312  *			command is issued as part of an error recovery action.
19313  *
19314  * Return Code: 0   - Success
19315  *		EIO - IO error
19316  *		EACCES - Reservation conflict detected
19317  *		EAGAIN - Device is becoming ready
19318  *		errno return code from sd_ssc_send()
19319  *
19320  *     Context: Can sleep.  Blocks until command completes.
19321  */
19322 
19323 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19324 
19325 static int
19326 sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
19327 	int path_flag)
19328 {
19329 	struct	scsi_extended_sense	sense_buf;
19330 	struct	uscsi_cmd	ucmd_buf;
19331 	union	scsi_cdb	cdb;
19332 	uint32_t		*capacity_buf;
19333 	uint64_t		capacity;
19334 	uint32_t		lbasize;
19335 	int			status;
19336 	struct sd_lun		*un;
19337 
19338 	ASSERT(ssc != NULL);
19339 
19340 	un = ssc->ssc_un;
19341 	ASSERT(un != NULL);
19342 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19343 	ASSERT(capp != NULL);
19344 	ASSERT(lbap != NULL);
19345 
19346 	SD_TRACE(SD_LOG_IO, un,
19347 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19348 
19349 	/*
19350 	 * First send a READ_CAPACITY command to the target.
19351 	 * (This command is mandatory under SCSI-2.)
19352 	 *
19353 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19354 	 * Medium Indicator bit is cleared.  The address field must be
19355 	 * zero if the PMI bit is zero.
19356 	 */
19357 	bzero(&cdb, sizeof (cdb));
19358 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19359 
19360 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19361 
19362 	cdb.scc_cmd = SCMD_READ_CAPACITY;
19363 
19364 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19365 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19366 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19367 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19368 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19369 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19370 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19371 	ucmd_buf.uscsi_timeout	= 60;
19372 
19373 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19374 	    UIO_SYSSPACE, path_flag);
19375 
19376 	switch (status) {
19377 	case 0:
19378 		/* Return failure if we did not get valid capacity data. */
19379 		if (ucmd_buf.uscsi_resid != 0) {
19380 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19381 			    "sd_send_scsi_READ_CAPACITY received invalid "
19382 			    "capacity data");
19383 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19384 			return (EIO);
19385 		}
19386 		/*
19387 		 * Read capacity and block size from the READ CAPACITY 10 data.
19388 		 * This data may be adjusted later due to device specific
19389 		 * issues.
19390 		 *
19391 		 * According to the SCSI spec, the READ CAPACITY 10
19392 		 * command returns the following:
19393 		 *
19394 		 *  bytes 0-3: Maximum logical block address available.
19395 		 *		(MSB in byte:0 & LSB in byte:3)
19396 		 *
19397 		 *  bytes 4-7: Block length in bytes
19398 		 *		(MSB in byte:4 & LSB in byte:7)
19399 		 *
19400 		 */
19401 		capacity = BE_32(capacity_buf[0]);
19402 		lbasize = BE_32(capacity_buf[1]);
19403 
19404 		/*
19405 		 * Done with capacity_buf
19406 		 */
19407 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19408 
19409 		/*
19410 		 * if the reported capacity is set to all 0xf's, then
19411 		 * this disk is too large and requires SBC-2 commands.
19412 		 * Reissue the request using READ CAPACITY 16.
19413 		 */
19414 		if (capacity == 0xffffffff) {
19415 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19416 			status = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity,
19417 			    &lbasize, path_flag);
19418 			if (status != 0) {
19419 				return (status);
19420 			}
19421 		}
19422 		break;	/* Success! */
19423 	case EIO:
19424 		switch (ucmd_buf.uscsi_status) {
19425 		case STATUS_RESERVATION_CONFLICT:
19426 			status = EACCES;
19427 			break;
19428 		case STATUS_CHECK:
19429 			/*
19430 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19431 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19432 			 */
19433 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19434 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19435 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19436 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19437 				return (EAGAIN);
19438 			}
19439 			break;
19440 		default:
19441 			break;
19442 		}
19443 		/* FALLTHRU */
19444 	default:
19445 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19446 		return (status);
19447 	}
19448 
19449 	/*
19450 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19451 	 * (2352 and 0 are common) so for these devices always force the value
19452 	 * to 2048 as required by the ATAPI specs.
19453 	 */
19454 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19455 		lbasize = 2048;
19456 	}
19457 
19458 	/*
19459 	 * Get the maximum LBA value from the READ CAPACITY data.
19460 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19461 	 * was cleared when issuing the command. This means that the LBA
19462 	 * returned from the device is the LBA of the last logical block
19463 	 * on the logical unit.  The actual logical block count will be
19464 	 * this value plus one.
19465 	 *
19466 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
19467 	 * so scale the capacity value to reflect this.
19468 	 */
19469 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
19470 
19471 	/*
19472 	 * Copy the values from the READ CAPACITY command into the space
19473 	 * provided by the caller.
19474 	 */
19475 	*capp = capacity;
19476 	*lbap = lbasize;
19477 
19478 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19479 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19480 
19481 	/*
19482 	 * Both the lbasize and capacity from the device must be nonzero,
19483 	 * otherwise we assume that the values are not valid and return
19484 	 * failure to the caller. (4203735)
19485 	 */
19486 	if ((capacity == 0) || (lbasize == 0)) {
19487 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19488 		    "sd_send_scsi_READ_CAPACITY received invalid value "
19489 		    "capacity %llu lbasize %d", capacity, lbasize);
19490 		return (EIO);
19491 	}
19492 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19493 	return (0);
19494 }
19495 
19496 /*
19497  *    Function: sd_send_scsi_READ_CAPACITY_16
19498  *
19499  * Description: This routine uses the scsi READ CAPACITY 16 command to
19500  *		determine the device capacity in number of blocks and the
19501  *		device native block size.  If this function returns a failure,
19502  *		then the values in *capp and *lbap are undefined.
19503  *		This routine should always be called by
19504  *		sd_send_scsi_READ_CAPACITY which will appy any device
19505  *		specific adjustments to capacity and lbasize.
19506  *
19507  *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
19508  *		capp - ptr to unsigned 64-bit variable to receive the
19509  *			capacity value from the command.
19510  *		lbap - ptr to unsigned 32-bit varaible to receive the
19511  *			block size value from the command
19512  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19513  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19514  *			to use the USCSI "direct" chain and bypass the normal
19515  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19516  *			this command is issued as part of an error recovery
19517  *			action.
19518  *
19519  * Return Code: 0   - Success
19520  *		EIO - IO error
19521  *		EACCES - Reservation conflict detected
19522  *		EAGAIN - Device is becoming ready
19523  *		errno return code from sd_ssc_send()
19524  *
19525  *     Context: Can sleep.  Blocks until command completes.
19526  */
19527 
19528 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19529 
19530 static int
19531 sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
19532 	uint32_t *lbap, int path_flag)
19533 {
19534 	struct	scsi_extended_sense	sense_buf;
19535 	struct	uscsi_cmd	ucmd_buf;
19536 	union	scsi_cdb	cdb;
19537 	uint64_t		*capacity16_buf;
19538 	uint64_t		capacity;
19539 	uint32_t		lbasize;
19540 	int			status;
19541 	struct sd_lun		*un;
19542 
19543 	ASSERT(ssc != NULL);
19544 
19545 	un = ssc->ssc_un;
19546 	ASSERT(un != NULL);
19547 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19548 	ASSERT(capp != NULL);
19549 	ASSERT(lbap != NULL);
19550 
19551 	SD_TRACE(SD_LOG_IO, un,
19552 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19553 
19554 	/*
19555 	 * First send a READ_CAPACITY_16 command to the target.
19556 	 *
19557 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19558 	 * Medium Indicator bit is cleared.  The address field must be
19559 	 * zero if the PMI bit is zero.
19560 	 */
19561 	bzero(&cdb, sizeof (cdb));
19562 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19563 
19564 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19565 
19566 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19567 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19568 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19569 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19570 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19571 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19572 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19573 	ucmd_buf.uscsi_timeout	= 60;
19574 
19575 	/*
19576 	 * Read Capacity (16) is a Service Action In command.  One
19577 	 * command byte (0x9E) is overloaded for multiple operations,
19578 	 * with the second CDB byte specifying the desired operation
19579 	 */
19580 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19581 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19582 
19583 	/*
19584 	 * Fill in allocation length field
19585 	 */
19586 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19587 
19588 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19589 	    UIO_SYSSPACE, path_flag);
19590 
19591 	switch (status) {
19592 	case 0:
19593 		/* Return failure if we did not get valid capacity data. */
19594 		if (ucmd_buf.uscsi_resid > 20) {
19595 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19596 			    "sd_send_scsi_READ_CAPACITY_16 received invalid "
19597 			    "capacity data");
19598 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19599 			return (EIO);
19600 		}
19601 
19602 		/*
19603 		 * Read capacity and block size from the READ CAPACITY 10 data.
19604 		 * This data may be adjusted later due to device specific
19605 		 * issues.
19606 		 *
19607 		 * According to the SCSI spec, the READ CAPACITY 10
19608 		 * command returns the following:
19609 		 *
19610 		 *  bytes 0-7: Maximum logical block address available.
19611 		 *		(MSB in byte:0 & LSB in byte:7)
19612 		 *
19613 		 *  bytes 8-11: Block length in bytes
19614 		 *		(MSB in byte:8 & LSB in byte:11)
19615 		 *
19616 		 */
19617 		capacity = BE_64(capacity16_buf[0]);
19618 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19619 
19620 		/*
19621 		 * Done with capacity16_buf
19622 		 */
19623 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19624 
19625 		/*
19626 		 * if the reported capacity is set to all 0xf's, then
19627 		 * this disk is too large.  This could only happen with
19628 		 * a device that supports LBAs larger than 64 bits which
19629 		 * are not defined by any current T10 standards.
19630 		 */
19631 		if (capacity == 0xffffffffffffffff) {
19632 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19633 			    "disk is too large");
19634 			return (EIO);
19635 		}
19636 		break;	/* Success! */
19637 	case EIO:
19638 		switch (ucmd_buf.uscsi_status) {
19639 		case STATUS_RESERVATION_CONFLICT:
19640 			status = EACCES;
19641 			break;
19642 		case STATUS_CHECK:
19643 			/*
19644 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19645 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19646 			 */
19647 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19648 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19649 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19650 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19651 				return (EAGAIN);
19652 			}
19653 			break;
19654 		default:
19655 			break;
19656 		}
19657 		/* FALLTHRU */
19658 	default:
19659 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19660 		return (status);
19661 	}
19662 
19663 	*capp = capacity;
19664 	*lbap = lbasize;
19665 
19666 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19667 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19668 
19669 	return (0);
19670 }
19671 
19672 
19673 /*
19674  *    Function: sd_send_scsi_START_STOP_UNIT
19675  *
19676  * Description: Issue a scsi START STOP UNIT command to the target.
19677  *
19678  *   Arguments: ssc    - ssc contatins pointer to driver soft state (unit)
19679  *                       structure for this target.
19680  *		flag  - SD_TARGET_START
19681  *			SD_TARGET_STOP
19682  *			SD_TARGET_EJECT
19683  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19684  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19685  *			to use the USCSI "direct" chain and bypass the normal
19686  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19687  *			command is issued as part of an error recovery action.
19688  *
19689  * Return Code: 0   - Success
19690  *		EIO - IO error
19691  *		EACCES - Reservation conflict detected
19692  *		ENXIO  - Not Ready, medium not present
19693  *		errno return code from sd_ssc_send()
19694  *
19695  *     Context: Can sleep.
19696  */
19697 
19698 static int
19699 sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int flag, int path_flag)
19700 {
19701 	struct	scsi_extended_sense	sense_buf;
19702 	union scsi_cdb		cdb;
19703 	struct uscsi_cmd	ucmd_buf;
19704 	int			status;
19705 	struct sd_lun		*un;
19706 
19707 	ASSERT(ssc != NULL);
19708 	un = ssc->ssc_un;
19709 	ASSERT(un != NULL);
19710 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19711 
19712 	SD_TRACE(SD_LOG_IO, un,
19713 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19714 
19715 	if (un->un_f_check_start_stop &&
19716 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19717 	    (un->un_f_start_stop_supported != TRUE)) {
19718 		return (0);
19719 	}
19720 
19721 	/*
19722 	 * If we are performing an eject operation and
19723 	 * we receive any command other than SD_TARGET_EJECT
19724 	 * we should immediately return.
19725 	 */
19726 	if (flag != SD_TARGET_EJECT) {
19727 		mutex_enter(SD_MUTEX(un));
19728 		if (un->un_f_ejecting == TRUE) {
19729 			mutex_exit(SD_MUTEX(un));
19730 			return (EAGAIN);
19731 		}
19732 		mutex_exit(SD_MUTEX(un));
19733 	}
19734 
19735 	bzero(&cdb, sizeof (cdb));
19736 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19737 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19738 
19739 	cdb.scc_cmd = SCMD_START_STOP;
19740 	cdb.cdb_opaque[4] = (uchar_t)flag;
19741 
19742 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19743 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19744 	ucmd_buf.uscsi_bufaddr	= NULL;
19745 	ucmd_buf.uscsi_buflen	= 0;
19746 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19747 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19748 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19749 	ucmd_buf.uscsi_timeout	= 200;
19750 
19751 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19752 	    UIO_SYSSPACE, path_flag);
19753 
19754 	switch (status) {
19755 	case 0:
19756 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19757 		break;	/* Success! */
19758 	case EIO:
19759 		switch (ucmd_buf.uscsi_status) {
19760 		case STATUS_RESERVATION_CONFLICT:
19761 			status = EACCES;
19762 			break;
19763 		case STATUS_CHECK:
19764 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19765 				switch (scsi_sense_key(
19766 				    (uint8_t *)&sense_buf)) {
19767 				case KEY_ILLEGAL_REQUEST:
19768 					status = ENOTSUP;
19769 					break;
19770 				case KEY_NOT_READY:
19771 					if (scsi_sense_asc(
19772 					    (uint8_t *)&sense_buf)
19773 					    == 0x3A) {
19774 						status = ENXIO;
19775 					}
19776 					break;
19777 				default:
19778 					break;
19779 				}
19780 			}
19781 			break;
19782 		default:
19783 			break;
19784 		}
19785 		break;
19786 	default:
19787 		break;
19788 	}
19789 
19790 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19791 
19792 	return (status);
19793 }
19794 
19795 
19796 /*
19797  *    Function: sd_start_stop_unit_callback
19798  *
19799  * Description: timeout(9F) callback to begin recovery process for a
19800  *		device that has spun down.
19801  *
19802  *   Arguments: arg - pointer to associated softstate struct.
19803  *
19804  *     Context: Executes in a timeout(9F) thread context
19805  */
19806 
19807 static void
19808 sd_start_stop_unit_callback(void *arg)
19809 {
19810 	struct sd_lun	*un = arg;
19811 	ASSERT(un != NULL);
19812 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19813 
19814 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19815 
19816 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19817 }
19818 
19819 
19820 /*
19821  *    Function: sd_start_stop_unit_task
19822  *
19823  * Description: Recovery procedure when a drive is spun down.
19824  *
19825  *   Arguments: arg - pointer to associated softstate struct.
19826  *
19827  *     Context: Executes in a taskq() thread context
19828  */
19829 
19830 static void
19831 sd_start_stop_unit_task(void *arg)
19832 {
19833 	struct sd_lun	*un = arg;
19834 	sd_ssc_t	*ssc;
19835 	int		rval;
19836 
19837 	ASSERT(un != NULL);
19838 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19839 
19840 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19841 
19842 	/*
19843 	 * Some unformatted drives report not ready error, no need to
19844 	 * restart if format has been initiated.
19845 	 */
19846 	mutex_enter(SD_MUTEX(un));
19847 	if (un->un_f_format_in_progress == TRUE) {
19848 		mutex_exit(SD_MUTEX(un));
19849 		return;
19850 	}
19851 	mutex_exit(SD_MUTEX(un));
19852 
19853 	/*
19854 	 * When a START STOP command is issued from here, it is part of a
19855 	 * failure recovery operation and must be issued before any other
19856 	 * commands, including any pending retries. Thus it must be sent
19857 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19858 	 * succeeds or not, we will start I/O after the attempt.
19859 	 */
19860 	ssc = sd_ssc_init(un);
19861 	rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_START,
19862 	    SD_PATH_DIRECT_PRIORITY);
19863 	if (rval != 0)
19864 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19865 	sd_ssc_fini(ssc);
19866 	/*
19867 	 * The above call blocks until the START_STOP_UNIT command completes.
19868 	 * Now that it has completed, we must re-try the original IO that
19869 	 * received the NOT READY condition in the first place. There are
19870 	 * three possible conditions here:
19871 	 *
19872 	 *  (1) The original IO is on un_retry_bp.
19873 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19874 	 *	is NULL.
19875 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19876 	 *	points to some other, unrelated bp.
19877 	 *
19878 	 * For each case, we must call sd_start_cmds() with un_retry_bp
19879 	 * as the argument. If un_retry_bp is NULL, this will initiate
19880 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19881 	 * then this will process the bp on un_retry_bp. That may or may not
19882 	 * be the original IO, but that does not matter: the important thing
19883 	 * is to keep the IO processing going at this point.
19884 	 *
19885 	 * Note: This is a very specific error recovery sequence associated
19886 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19887 	 * serialize the I/O with completion of the spin-up.
19888 	 */
19889 	mutex_enter(SD_MUTEX(un));
19890 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19891 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19892 	    un, un->un_retry_bp);
19893 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19894 	sd_start_cmds(un, un->un_retry_bp);
19895 	mutex_exit(SD_MUTEX(un));
19896 
19897 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19898 }
19899 
19900 
19901 /*
19902  *    Function: sd_send_scsi_INQUIRY
19903  *
19904  * Description: Issue the scsi INQUIRY command.
19905  *
19906  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19907  *                      structure for this target.
19908  *		bufaddr
19909  *		buflen
19910  *		evpd
19911  *		page_code
19912  *		page_length
19913  *
19914  * Return Code: 0   - Success
19915  *		errno return code from sd_ssc_send()
19916  *
19917  *     Context: Can sleep. Does not return until command is completed.
19918  */
19919 
19920 static int
19921 sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr, size_t buflen,
19922 	uchar_t evpd, uchar_t page_code, size_t *residp)
19923 {
19924 	union scsi_cdb		cdb;
19925 	struct uscsi_cmd	ucmd_buf;
19926 	int			status;
19927 	struct sd_lun		*un;
19928 
19929 	ASSERT(ssc != NULL);
19930 	un = ssc->ssc_un;
19931 	ASSERT(un != NULL);
19932 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19933 	ASSERT(bufaddr != NULL);
19934 
19935 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19936 
19937 	bzero(&cdb, sizeof (cdb));
19938 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19939 	bzero(bufaddr, buflen);
19940 
19941 	cdb.scc_cmd = SCMD_INQUIRY;
19942 	cdb.cdb_opaque[1] = evpd;
19943 	cdb.cdb_opaque[2] = page_code;
19944 	FORMG0COUNT(&cdb, buflen);
19945 
19946 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19947 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19948 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19949 	ucmd_buf.uscsi_buflen	= buflen;
19950 	ucmd_buf.uscsi_rqbuf	= NULL;
19951 	ucmd_buf.uscsi_rqlen	= 0;
19952 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19953 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19954 
19955 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19956 	    UIO_SYSSPACE, SD_PATH_DIRECT);
19957 
19958 	/*
19959 	 * Only handle status == 0, the upper-level caller
19960 	 * will put different assessment based on the context.
19961 	 */
19962 	if (status == 0)
19963 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19964 
19965 	if ((status == 0) && (residp != NULL)) {
19966 		*residp = ucmd_buf.uscsi_resid;
19967 	}
19968 
19969 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19970 
19971 	return (status);
19972 }
19973 
19974 
19975 /*
19976  *    Function: sd_send_scsi_TEST_UNIT_READY
19977  *
19978  * Description: Issue the scsi TEST UNIT READY command.
19979  *		This routine can be told to set the flag USCSI_DIAGNOSE to
19980  *		prevent retrying failed commands. Use this when the intent
19981  *		is either to check for device readiness, to clear a Unit
19982  *		Attention, or to clear any outstanding sense data.
19983  *		However under specific conditions the expected behavior
19984  *		is for retries to bring a device ready, so use the flag
19985  *		with caution.
19986  *
19987  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19988  *                      structure for this target.
19989  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19990  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19991  *			0: dont check for media present, do retries on cmd.
19992  *
19993  * Return Code: 0   - Success
19994  *		EIO - IO error
19995  *		EACCES - Reservation conflict detected
19996  *		ENXIO  - Not Ready, medium not present
19997  *		errno return code from sd_ssc_send()
19998  *
19999  *     Context: Can sleep. Does not return until command is completed.
20000  */
20001 
20002 static int
20003 sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag)
20004 {
20005 	struct	scsi_extended_sense	sense_buf;
20006 	union scsi_cdb		cdb;
20007 	struct uscsi_cmd	ucmd_buf;
20008 	int			status;
20009 	struct sd_lun		*un;
20010 
20011 	ASSERT(ssc != NULL);
20012 	un = ssc->ssc_un;
20013 	ASSERT(un != NULL);
20014 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20015 
20016 	SD_TRACE(SD_LOG_IO, un,
20017 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
20018 
20019 	/*
20020 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
20021 	 * timeouts when they receive a TUR and the queue is not empty. Check
20022 	 * the configuration flag set during attach (indicating the drive has
20023 	 * this firmware bug) and un_ncmds_in_transport before issuing the
20024 	 * TUR. If there are
20025 	 * pending commands return success, this is a bit arbitrary but is ok
20026 	 * for non-removables (i.e. the eliteI disks) and non-clustering
20027 	 * configurations.
20028 	 */
20029 	if (un->un_f_cfg_tur_check == TRUE) {
20030 		mutex_enter(SD_MUTEX(un));
20031 		if (un->un_ncmds_in_transport != 0) {
20032 			mutex_exit(SD_MUTEX(un));
20033 			return (0);
20034 		}
20035 		mutex_exit(SD_MUTEX(un));
20036 	}
20037 
20038 	bzero(&cdb, sizeof (cdb));
20039 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20040 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20041 
20042 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
20043 
20044 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20045 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20046 	ucmd_buf.uscsi_bufaddr	= NULL;
20047 	ucmd_buf.uscsi_buflen	= 0;
20048 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20049 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20050 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
20051 
20052 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
20053 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
20054 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
20055 	}
20056 	ucmd_buf.uscsi_timeout	= 60;
20057 
20058 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20059 	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
20060 	    SD_PATH_STANDARD));
20061 
20062 	switch (status) {
20063 	case 0:
20064 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20065 		break;	/* Success! */
20066 	case EIO:
20067 		switch (ucmd_buf.uscsi_status) {
20068 		case STATUS_RESERVATION_CONFLICT:
20069 			status = EACCES;
20070 			break;
20071 		case STATUS_CHECK:
20072 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
20073 				break;
20074 			}
20075 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20076 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20077 			    KEY_NOT_READY) &&
20078 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
20079 				status = ENXIO;
20080 			}
20081 			break;
20082 		default:
20083 			break;
20084 		}
20085 		break;
20086 	default:
20087 		break;
20088 	}
20089 
20090 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
20091 
20092 	return (status);
20093 }
20094 
20095 /*
20096  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
20097  *
20098  * Description: Issue the scsi PERSISTENT RESERVE IN command.
20099  *
20100  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20101  *                      structure for this target.
20102  *
20103  * Return Code: 0   - Success
20104  *		EACCES
20105  *		ENOTSUP
20106  *		errno return code from sd_ssc_send()
20107  *
20108  *     Context: Can sleep. Does not return until command is completed.
20109  */
20110 
20111 static int
20112 sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc, uchar_t  usr_cmd,
20113 	uint16_t data_len, uchar_t *data_bufp)
20114 {
20115 	struct scsi_extended_sense	sense_buf;
20116 	union scsi_cdb		cdb;
20117 	struct uscsi_cmd	ucmd_buf;
20118 	int			status;
20119 	int			no_caller_buf = FALSE;
20120 	struct sd_lun		*un;
20121 
20122 	ASSERT(ssc != NULL);
20123 	un = ssc->ssc_un;
20124 	ASSERT(un != NULL);
20125 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20126 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
20127 
20128 	SD_TRACE(SD_LOG_IO, un,
20129 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
20130 
20131 	bzero(&cdb, sizeof (cdb));
20132 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20133 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20134 	if (data_bufp == NULL) {
20135 		/* Allocate a default buf if the caller did not give one */
20136 		ASSERT(data_len == 0);
20137 		data_len  = MHIOC_RESV_KEY_SIZE;
20138 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
20139 		no_caller_buf = TRUE;
20140 	}
20141 
20142 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
20143 	cdb.cdb_opaque[1] = usr_cmd;
20144 	FORMG1COUNT(&cdb, data_len);
20145 
20146 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20147 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20148 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
20149 	ucmd_buf.uscsi_buflen	= data_len;
20150 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20151 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20152 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20153 	ucmd_buf.uscsi_timeout	= 60;
20154 
20155 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20156 	    UIO_SYSSPACE, SD_PATH_STANDARD);
20157 
20158 	switch (status) {
20159 	case 0:
20160 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20161 
20162 		break;	/* Success! */
20163 	case EIO:
20164 		switch (ucmd_buf.uscsi_status) {
20165 		case STATUS_RESERVATION_CONFLICT:
20166 			status = EACCES;
20167 			break;
20168 		case STATUS_CHECK:
20169 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20170 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20171 			    KEY_ILLEGAL_REQUEST)) {
20172 				status = ENOTSUP;
20173 			}
20174 			break;
20175 		default:
20176 			break;
20177 		}
20178 		break;
20179 	default:
20180 		break;
20181 	}
20182 
20183 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
20184 
20185 	if (no_caller_buf == TRUE) {
20186 		kmem_free(data_bufp, data_len);
20187 	}
20188 
20189 	return (status);
20190 }
20191 
20192 
20193 /*
20194  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
20195  *
20196  * Description: This routine is the driver entry point for handling CD-ROM
20197  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
20198  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
20199  *		device.
20200  *
20201  *   Arguments: ssc  -  ssc contains un - pointer to soft state struct
20202  *                      for the target.
20203  *		usr_cmd SCSI-3 reservation facility command (one of
20204  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
20205  *			SD_SCSI3_PREEMPTANDABORT)
20206  *		usr_bufp - user provided pointer register, reserve descriptor or
20207  *			preempt and abort structure (mhioc_register_t,
20208  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
20209  *
20210  * Return Code: 0   - Success
20211  *		EACCES
20212  *		ENOTSUP
20213  *		errno return code from sd_ssc_send()
20214  *
20215  *     Context: Can sleep. Does not return until command is completed.
20216  */
20217 
20218 static int
20219 sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc, uchar_t usr_cmd,
20220 	uchar_t	*usr_bufp)
20221 {
20222 	struct scsi_extended_sense	sense_buf;
20223 	union scsi_cdb		cdb;
20224 	struct uscsi_cmd	ucmd_buf;
20225 	int			status;
20226 	uchar_t			data_len = sizeof (sd_prout_t);
20227 	sd_prout_t		*prp;
20228 	struct sd_lun		*un;
20229 
20230 	ASSERT(ssc != NULL);
20231 	un = ssc->ssc_un;
20232 	ASSERT(un != NULL);
20233 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20234 	ASSERT(data_len == 24);	/* required by scsi spec */
20235 
20236 	SD_TRACE(SD_LOG_IO, un,
20237 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
20238 
20239 	if (usr_bufp == NULL) {
20240 		return (EINVAL);
20241 	}
20242 
20243 	bzero(&cdb, sizeof (cdb));
20244 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20245 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20246 	prp = kmem_zalloc(data_len, KM_SLEEP);
20247 
20248 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
20249 	cdb.cdb_opaque[1] = usr_cmd;
20250 	FORMG1COUNT(&cdb, data_len);
20251 
20252 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20253 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20254 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
20255 	ucmd_buf.uscsi_buflen	= data_len;
20256 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20257 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20258 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20259 	ucmd_buf.uscsi_timeout	= 60;
20260 
20261 	switch (usr_cmd) {
20262 	case SD_SCSI3_REGISTER: {
20263 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
20264 
20265 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20266 		bcopy(ptr->newkey.key, prp->service_key,
20267 		    MHIOC_RESV_KEY_SIZE);
20268 		prp->aptpl = ptr->aptpl;
20269 		break;
20270 	}
20271 	case SD_SCSI3_RESERVE:
20272 	case SD_SCSI3_RELEASE: {
20273 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
20274 
20275 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20276 		prp->scope_address = BE_32(ptr->scope_specific_addr);
20277 		cdb.cdb_opaque[2] = ptr->type;
20278 		break;
20279 	}
20280 	case SD_SCSI3_PREEMPTANDABORT: {
20281 		mhioc_preemptandabort_t *ptr =
20282 		    (mhioc_preemptandabort_t *)usr_bufp;
20283 
20284 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20285 		bcopy(ptr->victim_key.key, prp->service_key,
20286 		    MHIOC_RESV_KEY_SIZE);
20287 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
20288 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
20289 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
20290 		break;
20291 	}
20292 	case SD_SCSI3_REGISTERANDIGNOREKEY:
20293 	{
20294 		mhioc_registerandignorekey_t *ptr;
20295 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
20296 		bcopy(ptr->newkey.key,
20297 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
20298 		prp->aptpl = ptr->aptpl;
20299 		break;
20300 	}
20301 	default:
20302 		ASSERT(FALSE);
20303 		break;
20304 	}
20305 
20306 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20307 	    UIO_SYSSPACE, SD_PATH_STANDARD);
20308 
20309 	switch (status) {
20310 	case 0:
20311 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20312 		break;	/* Success! */
20313 	case EIO:
20314 		switch (ucmd_buf.uscsi_status) {
20315 		case STATUS_RESERVATION_CONFLICT:
20316 			status = EACCES;
20317 			break;
20318 		case STATUS_CHECK:
20319 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20320 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20321 			    KEY_ILLEGAL_REQUEST)) {
20322 				status = ENOTSUP;
20323 			}
20324 			break;
20325 		default:
20326 			break;
20327 		}
20328 		break;
20329 	default:
20330 		break;
20331 	}
20332 
20333 	kmem_free(prp, data_len);
20334 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
20335 	return (status);
20336 }
20337 
20338 
20339 /*
20340  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
20341  *
20342  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
20343  *
20344  *   Arguments: un - pointer to the target's soft state struct
20345  *              dkc - pointer to the callback structure
20346  *
20347  * Return Code: 0 - success
20348  *		errno-type error code
20349  *
20350  *     Context: kernel thread context only.
20351  *
20352  *  _______________________________________________________________
20353  * | dkc_flag &   | dkc_callback | DKIOCFLUSHWRITECACHE            |
20354  * |FLUSH_VOLATILE|              | operation                       |
20355  * |______________|______________|_________________________________|
20356  * | 0            | NULL         | Synchronous flush on both       |
20357  * |              |              | volatile and non-volatile cache |
20358  * |______________|______________|_________________________________|
20359  * | 1            | NULL         | Synchronous flush on volatile   |
20360  * |              |              | cache; disk drivers may suppress|
20361  * |              |              | flush if disk table indicates   |
20362  * |              |              | non-volatile cache              |
20363  * |______________|______________|_________________________________|
20364  * | 0            | !NULL        | Asynchronous flush on both      |
20365  * |              |              | volatile and non-volatile cache;|
20366  * |______________|______________|_________________________________|
20367  * | 1            | !NULL        | Asynchronous flush on volatile  |
20368  * |              |              | cache; disk drivers may suppress|
20369  * |              |              | flush if disk table indicates   |
20370  * |              |              | non-volatile cache              |
20371  * |______________|______________|_________________________________|
20372  *
20373  */
20374 
20375 static int
20376 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
20377 {
20378 	struct sd_uscsi_info	*uip;
20379 	struct uscsi_cmd	*uscmd;
20380 	union scsi_cdb		*cdb;
20381 	struct buf		*bp;
20382 	int			rval = 0;
20383 	int			is_async;
20384 
20385 	SD_TRACE(SD_LOG_IO, un,
20386 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
20387 
20388 	ASSERT(un != NULL);
20389 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20390 
20391 	if (dkc == NULL || dkc->dkc_callback == NULL) {
20392 		is_async = FALSE;
20393 	} else {
20394 		is_async = TRUE;
20395 	}
20396 
20397 	mutex_enter(SD_MUTEX(un));
20398 	/* check whether cache flush should be suppressed */
20399 	if (un->un_f_suppress_cache_flush == TRUE) {
20400 		mutex_exit(SD_MUTEX(un));
20401 		/*
20402 		 * suppress the cache flush if the device is told to do
20403 		 * so by sd.conf or disk table
20404 		 */
20405 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_SYNCHRONIZE_CACHE: \
20406 		    skip the cache flush since suppress_cache_flush is %d!\n",
20407 		    un->un_f_suppress_cache_flush);
20408 
20409 		if (is_async == TRUE) {
20410 			/* invoke callback for asynchronous flush */
20411 			(*dkc->dkc_callback)(dkc->dkc_cookie, 0);
20412 		}
20413 		return (rval);
20414 	}
20415 	mutex_exit(SD_MUTEX(un));
20416 
20417 	/*
20418 	 * check dkc_flag & FLUSH_VOLATILE so SYNC_NV bit can be
20419 	 * set properly
20420 	 */
20421 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
20422 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
20423 
20424 	mutex_enter(SD_MUTEX(un));
20425 	if (dkc != NULL && un->un_f_sync_nv_supported &&
20426 	    (dkc->dkc_flag & FLUSH_VOLATILE)) {
20427 		/*
20428 		 * if the device supports SYNC_NV bit, turn on
20429 		 * the SYNC_NV bit to only flush volatile cache
20430 		 */
20431 		cdb->cdb_un.tag |= SD_SYNC_NV_BIT;
20432 	}
20433 	mutex_exit(SD_MUTEX(un));
20434 
20435 	/*
20436 	 * First get some memory for the uscsi_cmd struct and cdb
20437 	 * and initialize for SYNCHRONIZE_CACHE cmd.
20438 	 */
20439 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
20440 	uscmd->uscsi_cdblen = CDB_GROUP1;
20441 	uscmd->uscsi_cdb = (caddr_t)cdb;
20442 	uscmd->uscsi_bufaddr = NULL;
20443 	uscmd->uscsi_buflen = 0;
20444 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20445 	uscmd->uscsi_rqlen = SENSE_LENGTH;
20446 	uscmd->uscsi_rqresid = SENSE_LENGTH;
20447 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
20448 	uscmd->uscsi_timeout = sd_io_time;
20449 
20450 	/*
20451 	 * Allocate an sd_uscsi_info struct and fill it with the info
20452 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
20453 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
20454 	 * since we allocate the buf here in this function, we do not
20455 	 * need to preserve the prior contents of b_private.
20456 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
20457 	 */
20458 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
20459 	uip->ui_flags = SD_PATH_DIRECT;
20460 	uip->ui_cmdp  = uscmd;
20461 
20462 	bp = getrbuf(KM_SLEEP);
20463 	bp->b_private = uip;
20464 
20465 	/*
20466 	 * Setup buffer to carry uscsi request.
20467 	 */
20468 	bp->b_flags  = B_BUSY;
20469 	bp->b_bcount = 0;
20470 	bp->b_blkno  = 0;
20471 
20472 	if (is_async == TRUE) {
20473 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20474 		uip->ui_dkc = *dkc;
20475 	}
20476 
20477 	bp->b_edev = SD_GET_DEV(un);
20478 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20479 
20480 	/*
20481 	 * Unset un_f_sync_cache_required flag
20482 	 */
20483 	mutex_enter(SD_MUTEX(un));
20484 	un->un_f_sync_cache_required = FALSE;
20485 	mutex_exit(SD_MUTEX(un));
20486 
20487 	(void) sd_uscsi_strategy(bp);
20488 
20489 	/*
20490 	 * If synchronous request, wait for completion
20491 	 * If async just return and let b_iodone callback
20492 	 * cleanup.
20493 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20494 	 * but it was also incremented in sd_uscsi_strategy(), so
20495 	 * we should be ok.
20496 	 */
20497 	if (is_async == FALSE) {
20498 		(void) biowait(bp);
20499 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20500 	}
20501 
20502 	return (rval);
20503 }
20504 
20505 
20506 static int
20507 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20508 {
20509 	struct sd_uscsi_info *uip;
20510 	struct uscsi_cmd *uscmd;
20511 	uint8_t *sense_buf;
20512 	struct sd_lun *un;
20513 	int status;
20514 	union scsi_cdb *cdb;
20515 
20516 	uip = (struct sd_uscsi_info *)(bp->b_private);
20517 	ASSERT(uip != NULL);
20518 
20519 	uscmd = uip->ui_cmdp;
20520 	ASSERT(uscmd != NULL);
20521 
20522 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
20523 	ASSERT(sense_buf != NULL);
20524 
20525 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20526 	ASSERT(un != NULL);
20527 
20528 	cdb = (union scsi_cdb *)uscmd->uscsi_cdb;
20529 
20530 	status = geterror(bp);
20531 	switch (status) {
20532 	case 0:
20533 		break;	/* Success! */
20534 	case EIO:
20535 		switch (uscmd->uscsi_status) {
20536 		case STATUS_RESERVATION_CONFLICT:
20537 			/* Ignore reservation conflict */
20538 			status = 0;
20539 			goto done;
20540 
20541 		case STATUS_CHECK:
20542 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20543 			    (scsi_sense_key(sense_buf) ==
20544 			    KEY_ILLEGAL_REQUEST)) {
20545 				/* Ignore Illegal Request error */
20546 				if (cdb->cdb_un.tag&SD_SYNC_NV_BIT) {
20547 					mutex_enter(SD_MUTEX(un));
20548 					un->un_f_sync_nv_supported = FALSE;
20549 					mutex_exit(SD_MUTEX(un));
20550 					status = 0;
20551 					SD_TRACE(SD_LOG_IO, un,
20552 					    "un_f_sync_nv_supported \
20553 					    is set to false.\n");
20554 					goto done;
20555 				}
20556 
20557 				mutex_enter(SD_MUTEX(un));
20558 				un->un_f_sync_cache_supported = FALSE;
20559 				mutex_exit(SD_MUTEX(un));
20560 				SD_TRACE(SD_LOG_IO, un,
20561 				    "sd_send_scsi_SYNCHRONIZE_CACHE_biodone: \
20562 				    un_f_sync_cache_supported set to false \
20563 				    with asc = %x, ascq = %x\n",
20564 				    scsi_sense_asc(sense_buf),
20565 				    scsi_sense_ascq(sense_buf));
20566 				status = ENOTSUP;
20567 				goto done;
20568 			}
20569 			break;
20570 		default:
20571 			break;
20572 		}
20573 		/* FALLTHRU */
20574 	default:
20575 		/*
20576 		 * Turn on the un_f_sync_cache_required flag
20577 		 * since the SYNC CACHE command failed
20578 		 */
20579 		mutex_enter(SD_MUTEX(un));
20580 		un->un_f_sync_cache_required = TRUE;
20581 		mutex_exit(SD_MUTEX(un));
20582 
20583 		/*
20584 		 * Don't log an error message if this device
20585 		 * has removable media.
20586 		 */
20587 		if (!un->un_f_has_removable_media) {
20588 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20589 			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20590 		}
20591 		break;
20592 	}
20593 
20594 done:
20595 	if (uip->ui_dkc.dkc_callback != NULL) {
20596 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20597 	}
20598 
20599 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20600 	freerbuf(bp);
20601 	kmem_free(uip, sizeof (struct sd_uscsi_info));
20602 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20603 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20604 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20605 
20606 	return (status);
20607 }
20608 
20609 
20610 /*
20611  *    Function: sd_send_scsi_GET_CONFIGURATION
20612  *
20613  * Description: Issues the get configuration command to the device.
20614  *		Called from sd_check_for_writable_cd & sd_get_media_info
20615  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20616  *   Arguments: ssc
20617  *		ucmdbuf
20618  *		rqbuf
20619  *		rqbuflen
20620  *		bufaddr
20621  *		buflen
20622  *		path_flag
20623  *
20624  * Return Code: 0   - Success
20625  *		errno return code from sd_ssc_send()
20626  *
20627  *     Context: Can sleep. Does not return until command is completed.
20628  *
20629  */
20630 
20631 static int
20632 sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc, struct uscsi_cmd *ucmdbuf,
20633 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
20634 	int path_flag)
20635 {
20636 	char	cdb[CDB_GROUP1];
20637 	int	status;
20638 	struct sd_lun	*un;
20639 
20640 	ASSERT(ssc != NULL);
20641 	un = ssc->ssc_un;
20642 	ASSERT(un != NULL);
20643 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20644 	ASSERT(bufaddr != NULL);
20645 	ASSERT(ucmdbuf != NULL);
20646 	ASSERT(rqbuf != NULL);
20647 
20648 	SD_TRACE(SD_LOG_IO, un,
20649 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20650 
20651 	bzero(cdb, sizeof (cdb));
20652 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20653 	bzero(rqbuf, rqbuflen);
20654 	bzero(bufaddr, buflen);
20655 
20656 	/*
20657 	 * Set up cdb field for the get configuration command.
20658 	 */
20659 	cdb[0] = SCMD_GET_CONFIGURATION;
20660 	cdb[1] = 0x02;  /* Requested Type */
20661 	cdb[8] = SD_PROFILE_HEADER_LEN;
20662 	ucmdbuf->uscsi_cdb = cdb;
20663 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20664 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20665 	ucmdbuf->uscsi_buflen = buflen;
20666 	ucmdbuf->uscsi_timeout = sd_io_time;
20667 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20668 	ucmdbuf->uscsi_rqlen = rqbuflen;
20669 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20670 
20671 	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
20672 	    UIO_SYSSPACE, path_flag);
20673 
20674 	switch (status) {
20675 	case 0:
20676 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20677 		break;  /* Success! */
20678 	case EIO:
20679 		switch (ucmdbuf->uscsi_status) {
20680 		case STATUS_RESERVATION_CONFLICT:
20681 			status = EACCES;
20682 			break;
20683 		default:
20684 			break;
20685 		}
20686 		break;
20687 	default:
20688 		break;
20689 	}
20690 
20691 	if (status == 0) {
20692 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20693 		    "sd_send_scsi_GET_CONFIGURATION: data",
20694 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20695 	}
20696 
20697 	SD_TRACE(SD_LOG_IO, un,
20698 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20699 
20700 	return (status);
20701 }
20702 
20703 /*
20704  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20705  *
20706  * Description: Issues the get configuration command to the device to
20707  *              retrieve a specific feature. Called from
20708  *		sd_check_for_writable_cd & sd_set_mmc_caps.
20709  *   Arguments: ssc
20710  *              ucmdbuf
20711  *              rqbuf
20712  *              rqbuflen
20713  *              bufaddr
20714  *              buflen
20715  *		feature
20716  *
20717  * Return Code: 0   - Success
20718  *              errno return code from sd_ssc_send()
20719  *
20720  *     Context: Can sleep. Does not return until command is completed.
20721  *
20722  */
20723 static int
20724 sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc,
20725 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
20726 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag)
20727 {
20728 	char    cdb[CDB_GROUP1];
20729 	int	status;
20730 	struct sd_lun	*un;
20731 
20732 	ASSERT(ssc != NULL);
20733 	un = ssc->ssc_un;
20734 	ASSERT(un != NULL);
20735 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20736 	ASSERT(bufaddr != NULL);
20737 	ASSERT(ucmdbuf != NULL);
20738 	ASSERT(rqbuf != NULL);
20739 
20740 	SD_TRACE(SD_LOG_IO, un,
20741 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20742 
20743 	bzero(cdb, sizeof (cdb));
20744 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20745 	bzero(rqbuf, rqbuflen);
20746 	bzero(bufaddr, buflen);
20747 
20748 	/*
20749 	 * Set up cdb field for the get configuration command.
20750 	 */
20751 	cdb[0] = SCMD_GET_CONFIGURATION;
20752 	cdb[1] = 0x02;  /* Requested Type */
20753 	cdb[3] = feature;
20754 	cdb[8] = buflen;
20755 	ucmdbuf->uscsi_cdb = cdb;
20756 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20757 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20758 	ucmdbuf->uscsi_buflen = buflen;
20759 	ucmdbuf->uscsi_timeout = sd_io_time;
20760 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20761 	ucmdbuf->uscsi_rqlen = rqbuflen;
20762 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20763 
20764 	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
20765 	    UIO_SYSSPACE, path_flag);
20766 
20767 	switch (status) {
20768 	case 0:
20769 
20770 		break;  /* Success! */
20771 	case EIO:
20772 		switch (ucmdbuf->uscsi_status) {
20773 		case STATUS_RESERVATION_CONFLICT:
20774 			status = EACCES;
20775 			break;
20776 		default:
20777 			break;
20778 		}
20779 		break;
20780 	default:
20781 		break;
20782 	}
20783 
20784 	if (status == 0) {
20785 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20786 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20787 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20788 	}
20789 
20790 	SD_TRACE(SD_LOG_IO, un,
20791 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20792 
20793 	return (status);
20794 }
20795 
20796 
20797 /*
20798  *    Function: sd_send_scsi_MODE_SENSE
20799  *
20800  * Description: Utility function for issuing a scsi MODE SENSE command.
20801  *		Note: This routine uses a consistent implementation for Group0,
20802  *		Group1, and Group2 commands across all platforms. ATAPI devices
20803  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20804  *
20805  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20806  *                      structure for this target.
20807  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20808  *			  CDB_GROUP[1|2] (10 byte).
20809  *		bufaddr - buffer for page data retrieved from the target.
20810  *		buflen - size of page to be retrieved.
20811  *		page_code - page code of data to be retrieved from the target.
20812  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20813  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20814  *			to use the USCSI "direct" chain and bypass the normal
20815  *			command waitq.
20816  *
20817  * Return Code: 0   - Success
20818  *		errno return code from sd_ssc_send()
20819  *
20820  *     Context: Can sleep. Does not return until command is completed.
20821  */
20822 
20823 static int
20824 sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
20825 	size_t buflen,  uchar_t page_code, int path_flag)
20826 {
20827 	struct	scsi_extended_sense	sense_buf;
20828 	union scsi_cdb		cdb;
20829 	struct uscsi_cmd	ucmd_buf;
20830 	int			status;
20831 	int			headlen;
20832 	struct sd_lun		*un;
20833 
20834 	ASSERT(ssc != NULL);
20835 	un = ssc->ssc_un;
20836 	ASSERT(un != NULL);
20837 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20838 	ASSERT(bufaddr != NULL);
20839 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20840 	    (cdbsize == CDB_GROUP2));
20841 
20842 	SD_TRACE(SD_LOG_IO, un,
20843 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
20844 
20845 	bzero(&cdb, sizeof (cdb));
20846 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20847 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20848 	bzero(bufaddr, buflen);
20849 
20850 	if (cdbsize == CDB_GROUP0) {
20851 		cdb.scc_cmd = SCMD_MODE_SENSE;
20852 		cdb.cdb_opaque[2] = page_code;
20853 		FORMG0COUNT(&cdb, buflen);
20854 		headlen = MODE_HEADER_LENGTH;
20855 	} else {
20856 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
20857 		cdb.cdb_opaque[2] = page_code;
20858 		FORMG1COUNT(&cdb, buflen);
20859 		headlen = MODE_HEADER_LENGTH_GRP2;
20860 	}
20861 
20862 	ASSERT(headlen <= buflen);
20863 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20864 
20865 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20866 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20867 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20868 	ucmd_buf.uscsi_buflen	= buflen;
20869 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20870 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20871 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20872 	ucmd_buf.uscsi_timeout	= 60;
20873 
20874 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20875 	    UIO_SYSSPACE, path_flag);
20876 
20877 	switch (status) {
20878 	case 0:
20879 		/*
20880 		 * sr_check_wp() uses 0x3f page code and check the header of
20881 		 * mode page to determine if target device is write-protected.
20882 		 * But some USB devices return 0 bytes for 0x3f page code. For
20883 		 * this case, make sure that mode page header is returned at
20884 		 * least.
20885 		 */
20886 		if (buflen - ucmd_buf.uscsi_resid <  headlen) {
20887 			status = EIO;
20888 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20889 			    "mode page header is not returned");
20890 		}
20891 		break;	/* Success! */
20892 	case EIO:
20893 		switch (ucmd_buf.uscsi_status) {
20894 		case STATUS_RESERVATION_CONFLICT:
20895 			status = EACCES;
20896 			break;
20897 		default:
20898 			break;
20899 		}
20900 		break;
20901 	default:
20902 		break;
20903 	}
20904 
20905 	if (status == 0) {
20906 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
20907 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20908 	}
20909 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
20910 
20911 	return (status);
20912 }
20913 
20914 
20915 /*
20916  *    Function: sd_send_scsi_MODE_SELECT
20917  *
20918  * Description: Utility function for issuing a scsi MODE SELECT command.
20919  *		Note: This routine uses a consistent implementation for Group0,
20920  *		Group1, and Group2 commands across all platforms. ATAPI devices
20921  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20922  *
20923  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20924  *                      structure for this target.
20925  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20926  *			  CDB_GROUP[1|2] (10 byte).
20927  *		bufaddr - buffer for page data retrieved from the target.
20928  *		buflen - size of page to be retrieved.
20929  *		save_page - boolean to determin if SP bit should be set.
20930  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20931  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20932  *			to use the USCSI "direct" chain and bypass the normal
20933  *			command waitq.
20934  *
20935  * Return Code: 0   - Success
20936  *		errno return code from sd_ssc_send()
20937  *
20938  *     Context: Can sleep. Does not return until command is completed.
20939  */
20940 
20941 static int
20942 sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
20943 	size_t buflen,  uchar_t save_page, int path_flag)
20944 {
20945 	struct	scsi_extended_sense	sense_buf;
20946 	union scsi_cdb		cdb;
20947 	struct uscsi_cmd	ucmd_buf;
20948 	int			status;
20949 	struct sd_lun		*un;
20950 
20951 	ASSERT(ssc != NULL);
20952 	un = ssc->ssc_un;
20953 	ASSERT(un != NULL);
20954 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20955 	ASSERT(bufaddr != NULL);
20956 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20957 	    (cdbsize == CDB_GROUP2));
20958 
20959 	SD_TRACE(SD_LOG_IO, un,
20960 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
20961 
20962 	bzero(&cdb, sizeof (cdb));
20963 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20964 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20965 
20966 	/* Set the PF bit for many third party drives */
20967 	cdb.cdb_opaque[1] = 0x10;
20968 
20969 	/* Set the savepage(SP) bit if given */
20970 	if (save_page == SD_SAVE_PAGE) {
20971 		cdb.cdb_opaque[1] |= 0x01;
20972 	}
20973 
20974 	if (cdbsize == CDB_GROUP0) {
20975 		cdb.scc_cmd = SCMD_MODE_SELECT;
20976 		FORMG0COUNT(&cdb, buflen);
20977 	} else {
20978 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
20979 		FORMG1COUNT(&cdb, buflen);
20980 	}
20981 
20982 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20983 
20984 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20985 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20986 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20987 	ucmd_buf.uscsi_buflen	= buflen;
20988 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20989 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20990 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20991 	ucmd_buf.uscsi_timeout	= 60;
20992 
20993 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20994 	    UIO_SYSSPACE, path_flag);
20995 
20996 	switch (status) {
20997 	case 0:
20998 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20999 		break;	/* Success! */
21000 	case EIO:
21001 		switch (ucmd_buf.uscsi_status) {
21002 		case STATUS_RESERVATION_CONFLICT:
21003 			status = EACCES;
21004 			break;
21005 		default:
21006 			break;
21007 		}
21008 		break;
21009 	default:
21010 		break;
21011 	}
21012 
21013 	if (status == 0) {
21014 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
21015 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21016 	}
21017 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
21018 
21019 	return (status);
21020 }
21021 
21022 
21023 /*
21024  *    Function: sd_send_scsi_RDWR
21025  *
21026  * Description: Issue a scsi READ or WRITE command with the given parameters.
21027  *
21028  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21029  *                      structure for this target.
21030  *		cmd:	 SCMD_READ or SCMD_WRITE
21031  *		bufaddr: Address of caller's buffer to receive the RDWR data
21032  *		buflen:  Length of caller's buffer receive the RDWR data.
21033  *		start_block: Block number for the start of the RDWR operation.
21034  *			 (Assumes target-native block size.)
21035  *		residp:  Pointer to variable to receive the redisual of the
21036  *			 RDWR operation (may be NULL of no residual requested).
21037  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21038  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21039  *			to use the USCSI "direct" chain and bypass the normal
21040  *			command waitq.
21041  *
21042  * Return Code: 0   - Success
21043  *		errno return code from sd_ssc_send()
21044  *
21045  *     Context: Can sleep. Does not return until command is completed.
21046  */
21047 
21048 static int
21049 sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
21050 	size_t buflen, daddr_t start_block, int path_flag)
21051 {
21052 	struct	scsi_extended_sense	sense_buf;
21053 	union scsi_cdb		cdb;
21054 	struct uscsi_cmd	ucmd_buf;
21055 	uint32_t		block_count;
21056 	int			status;
21057 	int			cdbsize;
21058 	uchar_t			flag;
21059 	struct sd_lun		*un;
21060 
21061 	ASSERT(ssc != NULL);
21062 	un = ssc->ssc_un;
21063 	ASSERT(un != NULL);
21064 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21065 	ASSERT(bufaddr != NULL);
21066 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
21067 
21068 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
21069 
21070 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
21071 		return (EINVAL);
21072 	}
21073 
21074 	mutex_enter(SD_MUTEX(un));
21075 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
21076 	mutex_exit(SD_MUTEX(un));
21077 
21078 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
21079 
21080 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
21081 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
21082 	    bufaddr, buflen, start_block, block_count);
21083 
21084 	bzero(&cdb, sizeof (cdb));
21085 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21086 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21087 
21088 	/* Compute CDB size to use */
21089 	if (start_block > 0xffffffff)
21090 		cdbsize = CDB_GROUP4;
21091 	else if ((start_block & 0xFFE00000) ||
21092 	    (un->un_f_cfg_is_atapi == TRUE))
21093 		cdbsize = CDB_GROUP1;
21094 	else
21095 		cdbsize = CDB_GROUP0;
21096 
21097 	switch (cdbsize) {
21098 	case CDB_GROUP0:	/* 6-byte CDBs */
21099 		cdb.scc_cmd = cmd;
21100 		FORMG0ADDR(&cdb, start_block);
21101 		FORMG0COUNT(&cdb, block_count);
21102 		break;
21103 	case CDB_GROUP1:	/* 10-byte CDBs */
21104 		cdb.scc_cmd = cmd | SCMD_GROUP1;
21105 		FORMG1ADDR(&cdb, start_block);
21106 		FORMG1COUNT(&cdb, block_count);
21107 		break;
21108 	case CDB_GROUP4:	/* 16-byte CDBs */
21109 		cdb.scc_cmd = cmd | SCMD_GROUP4;
21110 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
21111 		FORMG4COUNT(&cdb, block_count);
21112 		break;
21113 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
21114 	default:
21115 		/* All others reserved */
21116 		return (EINVAL);
21117 	}
21118 
21119 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
21120 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21121 
21122 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21123 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21124 	ucmd_buf.uscsi_bufaddr	= bufaddr;
21125 	ucmd_buf.uscsi_buflen	= buflen;
21126 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21127 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21128 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
21129 	ucmd_buf.uscsi_timeout	= 60;
21130 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21131 	    UIO_SYSSPACE, path_flag);
21132 
21133 	switch (status) {
21134 	case 0:
21135 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21136 		break;	/* Success! */
21137 	case EIO:
21138 		switch (ucmd_buf.uscsi_status) {
21139 		case STATUS_RESERVATION_CONFLICT:
21140 			status = EACCES;
21141 			break;
21142 		default:
21143 			break;
21144 		}
21145 		break;
21146 	default:
21147 		break;
21148 	}
21149 
21150 	if (status == 0) {
21151 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
21152 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21153 	}
21154 
21155 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
21156 
21157 	return (status);
21158 }
21159 
21160 
21161 /*
21162  *    Function: sd_send_scsi_LOG_SENSE
21163  *
21164  * Description: Issue a scsi LOG_SENSE command with the given parameters.
21165  *
21166  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21167  *                      structure for this target.
21168  *
21169  * Return Code: 0   - Success
21170  *		errno return code from sd_ssc_send()
21171  *
21172  *     Context: Can sleep. Does not return until command is completed.
21173  */
21174 
21175 static int
21176 sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr, uint16_t buflen,
21177 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
21178 	int path_flag)
21179 
21180 {
21181 	struct scsi_extended_sense	sense_buf;
21182 	union scsi_cdb		cdb;
21183 	struct uscsi_cmd	ucmd_buf;
21184 	int			status;
21185 	struct sd_lun		*un;
21186 
21187 	ASSERT(ssc != NULL);
21188 	un = ssc->ssc_un;
21189 	ASSERT(un != NULL);
21190 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21191 
21192 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
21193 
21194 	bzero(&cdb, sizeof (cdb));
21195 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21196 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21197 
21198 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
21199 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
21200 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
21201 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
21202 	FORMG1COUNT(&cdb, buflen);
21203 
21204 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21205 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21206 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21207 	ucmd_buf.uscsi_buflen	= buflen;
21208 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21209 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21210 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21211 	ucmd_buf.uscsi_timeout	= 60;
21212 
21213 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21214 	    UIO_SYSSPACE, path_flag);
21215 
21216 	switch (status) {
21217 	case 0:
21218 		break;
21219 	case EIO:
21220 		switch (ucmd_buf.uscsi_status) {
21221 		case STATUS_RESERVATION_CONFLICT:
21222 			status = EACCES;
21223 			break;
21224 		case STATUS_CHECK:
21225 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
21226 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
21227 				KEY_ILLEGAL_REQUEST) &&
21228 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
21229 				/*
21230 				 * ASC 0x24: INVALID FIELD IN CDB
21231 				 */
21232 				switch (page_code) {
21233 				case START_STOP_CYCLE_PAGE:
21234 					/*
21235 					 * The start stop cycle counter is
21236 					 * implemented as page 0x31 in earlier
21237 					 * generation disks. In new generation
21238 					 * disks the start stop cycle counter is
21239 					 * implemented as page 0xE. To properly
21240 					 * handle this case if an attempt for
21241 					 * log page 0xE is made and fails we
21242 					 * will try again using page 0x31.
21243 					 *
21244 					 * Network storage BU committed to
21245 					 * maintain the page 0x31 for this
21246 					 * purpose and will not have any other
21247 					 * page implemented with page code 0x31
21248 					 * until all disks transition to the
21249 					 * standard page.
21250 					 */
21251 					mutex_enter(SD_MUTEX(un));
21252 					un->un_start_stop_cycle_page =
21253 					    START_STOP_CYCLE_VU_PAGE;
21254 					cdb.cdb_opaque[2] =
21255 					    (char)(page_control << 6) |
21256 					    un->un_start_stop_cycle_page;
21257 					mutex_exit(SD_MUTEX(un));
21258 					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
21259 					status = sd_ssc_send(
21260 					    ssc, &ucmd_buf, FKIOCTL,
21261 					    UIO_SYSSPACE, path_flag);
21262 
21263 					break;
21264 				case TEMPERATURE_PAGE:
21265 					status = ENOTTY;
21266 					break;
21267 				default:
21268 					break;
21269 				}
21270 			}
21271 			break;
21272 		default:
21273 			break;
21274 		}
21275 		break;
21276 	default:
21277 		break;
21278 	}
21279 
21280 	if (status == 0) {
21281 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21282 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
21283 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21284 	}
21285 
21286 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
21287 
21288 	return (status);
21289 }
21290 
21291 
21292 /*
21293  *    Function: sdioctl
21294  *
21295  * Description: Driver's ioctl(9e) entry point function.
21296  *
21297  *   Arguments: dev     - device number
21298  *		cmd     - ioctl operation to be performed
21299  *		arg     - user argument, contains data to be set or reference
21300  *			  parameter for get
21301  *		flag    - bit flag, indicating open settings, 32/64 bit type
21302  *		cred_p  - user credential pointer
21303  *		rval_p  - calling process return value (OPT)
21304  *
21305  * Return Code: EINVAL
21306  *		ENOTTY
21307  *		ENXIO
21308  *		EIO
21309  *		EFAULT
21310  *		ENOTSUP
21311  *		EPERM
21312  *
21313  *     Context: Called from the device switch at normal priority.
21314  */
21315 
21316 static int
21317 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
21318 {
21319 	struct sd_lun	*un = NULL;
21320 	int		err = 0;
21321 	int		i = 0;
21322 	cred_t		*cr;
21323 	int		tmprval = EINVAL;
21324 	boolean_t	is_valid;
21325 	sd_ssc_t	*ssc;
21326 
21327 	/*
21328 	 * All device accesses go thru sdstrategy where we check on suspend
21329 	 * status
21330 	 */
21331 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21332 		return (ENXIO);
21333 	}
21334 
21335 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21336 
21337 	/* Initialize sd_ssc_t for internal uscsi commands */
21338 	ssc = sd_ssc_init(un);
21339 
21340 	is_valid = SD_IS_VALID_LABEL(un);
21341 
21342 	/*
21343 	 * Moved this wait from sd_uscsi_strategy to here for
21344 	 * reasons of deadlock prevention. Internal driver commands,
21345 	 * specifically those to change a devices power level, result
21346 	 * in a call to sd_uscsi_strategy.
21347 	 */
21348 	mutex_enter(SD_MUTEX(un));
21349 	while ((un->un_state == SD_STATE_SUSPENDED) ||
21350 	    (un->un_state == SD_STATE_PM_CHANGING)) {
21351 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
21352 	}
21353 	/*
21354 	 * Twiddling the counter here protects commands from now
21355 	 * through to the top of sd_uscsi_strategy. Without the
21356 	 * counter inc. a power down, for example, could get in
21357 	 * after the above check for state is made and before
21358 	 * execution gets to the top of sd_uscsi_strategy.
21359 	 * That would cause problems.
21360 	 */
21361 	un->un_ncmds_in_driver++;
21362 
21363 	if (!is_valid &&
21364 	    (flag & (FNDELAY | FNONBLOCK))) {
21365 		switch (cmd) {
21366 		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
21367 		case DKIOCGVTOC:
21368 		case DKIOCGEXTVTOC:
21369 		case DKIOCGAPART:
21370 		case DKIOCPARTINFO:
21371 		case DKIOCEXTPARTINFO:
21372 		case DKIOCSGEOM:
21373 		case DKIOCSAPART:
21374 		case DKIOCGETEFI:
21375 		case DKIOCPARTITION:
21376 		case DKIOCSVTOC:
21377 		case DKIOCSEXTVTOC:
21378 		case DKIOCSETEFI:
21379 		case DKIOCGMBOOT:
21380 		case DKIOCSMBOOT:
21381 		case DKIOCG_PHYGEOM:
21382 		case DKIOCG_VIRTGEOM:
21383 			/* let cmlb handle it */
21384 			goto skip_ready_valid;
21385 
21386 		case CDROMPAUSE:
21387 		case CDROMRESUME:
21388 		case CDROMPLAYMSF:
21389 		case CDROMPLAYTRKIND:
21390 		case CDROMREADTOCHDR:
21391 		case CDROMREADTOCENTRY:
21392 		case CDROMSTOP:
21393 		case CDROMSTART:
21394 		case CDROMVOLCTRL:
21395 		case CDROMSUBCHNL:
21396 		case CDROMREADMODE2:
21397 		case CDROMREADMODE1:
21398 		case CDROMREADOFFSET:
21399 		case CDROMSBLKMODE:
21400 		case CDROMGBLKMODE:
21401 		case CDROMGDRVSPEED:
21402 		case CDROMSDRVSPEED:
21403 		case CDROMCDDA:
21404 		case CDROMCDXA:
21405 		case CDROMSUBCODE:
21406 			if (!ISCD(un)) {
21407 				un->un_ncmds_in_driver--;
21408 				ASSERT(un->un_ncmds_in_driver >= 0);
21409 				mutex_exit(SD_MUTEX(un));
21410 				err = ENOTTY;
21411 				goto done_without_assess;
21412 			}
21413 			break;
21414 		case FDEJECT:
21415 		case DKIOCEJECT:
21416 		case CDROMEJECT:
21417 			if (!un->un_f_eject_media_supported) {
21418 				un->un_ncmds_in_driver--;
21419 				ASSERT(un->un_ncmds_in_driver >= 0);
21420 				mutex_exit(SD_MUTEX(un));
21421 				err = ENOTTY;
21422 				goto done_without_assess;
21423 			}
21424 			break;
21425 		case DKIOCFLUSHWRITECACHE:
21426 			mutex_exit(SD_MUTEX(un));
21427 			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
21428 			if (err != 0) {
21429 				mutex_enter(SD_MUTEX(un));
21430 				un->un_ncmds_in_driver--;
21431 				ASSERT(un->un_ncmds_in_driver >= 0);
21432 				mutex_exit(SD_MUTEX(un));
21433 				err = EIO;
21434 				goto done_quick_assess;
21435 			}
21436 			mutex_enter(SD_MUTEX(un));
21437 			/* FALLTHROUGH */
21438 		case DKIOCREMOVABLE:
21439 		case DKIOCHOTPLUGGABLE:
21440 		case DKIOCINFO:
21441 		case DKIOCGMEDIAINFO:
21442 		case MHIOCENFAILFAST:
21443 		case MHIOCSTATUS:
21444 		case MHIOCTKOWN:
21445 		case MHIOCRELEASE:
21446 		case MHIOCGRP_INKEYS:
21447 		case MHIOCGRP_INRESV:
21448 		case MHIOCGRP_REGISTER:
21449 		case MHIOCGRP_RESERVE:
21450 		case MHIOCGRP_PREEMPTANDABORT:
21451 		case MHIOCGRP_REGISTERANDIGNOREKEY:
21452 		case CDROMCLOSETRAY:
21453 		case USCSICMD:
21454 			goto skip_ready_valid;
21455 		default:
21456 			break;
21457 		}
21458 
21459 		mutex_exit(SD_MUTEX(un));
21460 		err = sd_ready_and_valid(ssc, SDPART(dev));
21461 		mutex_enter(SD_MUTEX(un));
21462 
21463 		if (err != SD_READY_VALID) {
21464 			switch (cmd) {
21465 			case DKIOCSTATE:
21466 			case CDROMGDRVSPEED:
21467 			case CDROMSDRVSPEED:
21468 			case FDEJECT:	/* for eject command */
21469 			case DKIOCEJECT:
21470 			case CDROMEJECT:
21471 			case DKIOCREMOVABLE:
21472 			case DKIOCHOTPLUGGABLE:
21473 				break;
21474 			default:
21475 				if (un->un_f_has_removable_media) {
21476 					err = ENXIO;
21477 				} else {
21478 				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
21479 					if (err == SD_RESERVED_BY_OTHERS) {
21480 						err = EACCES;
21481 					} else {
21482 						err = EIO;
21483 					}
21484 				}
21485 				un->un_ncmds_in_driver--;
21486 				ASSERT(un->un_ncmds_in_driver >= 0);
21487 				mutex_exit(SD_MUTEX(un));
21488 
21489 				goto done_without_assess;
21490 			}
21491 		}
21492 	}
21493 
21494 skip_ready_valid:
21495 	mutex_exit(SD_MUTEX(un));
21496 
21497 	switch (cmd) {
21498 	case DKIOCINFO:
21499 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
21500 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
21501 		break;
21502 
21503 	case DKIOCGMEDIAINFO:
21504 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
21505 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
21506 		break;
21507 
21508 	case DKIOCGGEOM:
21509 	case DKIOCGVTOC:
21510 	case DKIOCGEXTVTOC:
21511 	case DKIOCGAPART:
21512 	case DKIOCPARTINFO:
21513 	case DKIOCEXTPARTINFO:
21514 	case DKIOCSGEOM:
21515 	case DKIOCSAPART:
21516 	case DKIOCGETEFI:
21517 	case DKIOCPARTITION:
21518 	case DKIOCSVTOC:
21519 	case DKIOCSEXTVTOC:
21520 	case DKIOCSETEFI:
21521 	case DKIOCGMBOOT:
21522 	case DKIOCSMBOOT:
21523 	case DKIOCG_PHYGEOM:
21524 	case DKIOCG_VIRTGEOM:
21525 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
21526 
21527 		/* TUR should spin up */
21528 
21529 		if (un->un_f_has_removable_media)
21530 			err = sd_send_scsi_TEST_UNIT_READY(ssc,
21531 			    SD_CHECK_FOR_MEDIA);
21532 
21533 		else
21534 			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
21535 
21536 		if (err != 0)
21537 			goto done_with_assess;
21538 
21539 		err = cmlb_ioctl(un->un_cmlbhandle, dev,
21540 		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
21541 
21542 		if ((err == 0) &&
21543 		    ((cmd == DKIOCSETEFI) ||
21544 		    (un->un_f_pkstats_enabled) &&
21545 		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC ||
21546 		    cmd == DKIOCSEXTVTOC))) {
21547 
21548 			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
21549 			    (void *)SD_PATH_DIRECT);
21550 			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
21551 				sd_set_pstats(un);
21552 				SD_TRACE(SD_LOG_IO_PARTITION, un,
21553 				    "sd_ioctl: un:0x%p pstats created and "
21554 				    "set\n", un);
21555 			}
21556 		}
21557 
21558 		if ((cmd == DKIOCSVTOC || cmd == DKIOCSEXTVTOC) ||
21559 		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
21560 
21561 			mutex_enter(SD_MUTEX(un));
21562 			if (un->un_f_devid_supported &&
21563 			    (un->un_f_opt_fab_devid == TRUE)) {
21564 				if (un->un_devid == NULL) {
21565 					sd_register_devid(ssc, SD_DEVINFO(un),
21566 					    SD_TARGET_IS_UNRESERVED);
21567 				} else {
21568 					/*
21569 					 * The device id for this disk
21570 					 * has been fabricated. The
21571 					 * device id must be preserved
21572 					 * by writing it back out to
21573 					 * disk.
21574 					 */
21575 					if (sd_write_deviceid(ssc) != 0) {
21576 						ddi_devid_free(un->un_devid);
21577 						un->un_devid = NULL;
21578 					}
21579 				}
21580 			}
21581 			mutex_exit(SD_MUTEX(un));
21582 		}
21583 
21584 		break;
21585 
21586 	case DKIOCLOCK:
21587 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21588 		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
21589 		    SD_PATH_STANDARD);
21590 		goto done_with_assess;
21591 
21592 	case DKIOCUNLOCK:
21593 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21594 		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
21595 		    SD_PATH_STANDARD);
21596 		goto done_with_assess;
21597 
21598 	case DKIOCSTATE: {
21599 		enum dkio_state		state;
21600 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21601 
21602 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21603 			err = EFAULT;
21604 		} else {
21605 			err = sd_check_media(dev, state);
21606 			if (err == 0) {
21607 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21608 				    sizeof (int), flag) != 0)
21609 					err = EFAULT;
21610 			}
21611 		}
21612 		break;
21613 	}
21614 
21615 	case DKIOCREMOVABLE:
21616 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21617 		i = un->un_f_has_removable_media ? 1 : 0;
21618 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21619 			err = EFAULT;
21620 		} else {
21621 			err = 0;
21622 		}
21623 		break;
21624 
21625 	case DKIOCHOTPLUGGABLE:
21626 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
21627 		i = un->un_f_is_hotpluggable ? 1 : 0;
21628 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21629 			err = EFAULT;
21630 		} else {
21631 			err = 0;
21632 		}
21633 		break;
21634 
21635 	case DKIOCGTEMPERATURE:
21636 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21637 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21638 		break;
21639 
21640 	case MHIOCENFAILFAST:
21641 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21642 		if ((err = drv_priv(cred_p)) == 0) {
21643 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21644 		}
21645 		break;
21646 
21647 	case MHIOCTKOWN:
21648 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21649 		if ((err = drv_priv(cred_p)) == 0) {
21650 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21651 		}
21652 		break;
21653 
21654 	case MHIOCRELEASE:
21655 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21656 		if ((err = drv_priv(cred_p)) == 0) {
21657 			err = sd_mhdioc_release(dev);
21658 		}
21659 		break;
21660 
21661 	case MHIOCSTATUS:
21662 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21663 		if ((err = drv_priv(cred_p)) == 0) {
21664 			switch (sd_send_scsi_TEST_UNIT_READY(ssc, 0)) {
21665 			case 0:
21666 				err = 0;
21667 				break;
21668 			case EACCES:
21669 				*rval_p = 1;
21670 				err = 0;
21671 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
21672 				break;
21673 			default:
21674 				err = EIO;
21675 				goto done_with_assess;
21676 			}
21677 		}
21678 		break;
21679 
21680 	case MHIOCQRESERVE:
21681 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21682 		if ((err = drv_priv(cred_p)) == 0) {
21683 			err = sd_reserve_release(dev, SD_RESERVE);
21684 		}
21685 		break;
21686 
21687 	case MHIOCREREGISTERDEVID:
21688 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21689 		if (drv_priv(cred_p) == EPERM) {
21690 			err = EPERM;
21691 		} else if (!un->un_f_devid_supported) {
21692 			err = ENOTTY;
21693 		} else {
21694 			err = sd_mhdioc_register_devid(dev);
21695 		}
21696 		break;
21697 
21698 	case MHIOCGRP_INKEYS:
21699 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
21700 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21701 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21702 				err = ENOTSUP;
21703 			} else {
21704 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
21705 				    flag);
21706 			}
21707 		}
21708 		break;
21709 
21710 	case MHIOCGRP_INRESV:
21711 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
21712 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21713 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21714 				err = ENOTSUP;
21715 			} else {
21716 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
21717 			}
21718 		}
21719 		break;
21720 
21721 	case MHIOCGRP_REGISTER:
21722 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
21723 		if ((err = drv_priv(cred_p)) != EPERM) {
21724 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21725 				err = ENOTSUP;
21726 			} else if (arg != NULL) {
21727 				mhioc_register_t reg;
21728 				if (ddi_copyin((void *)arg, &reg,
21729 				    sizeof (mhioc_register_t), flag) != 0) {
21730 					err = EFAULT;
21731 				} else {
21732 					err =
21733 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21734 					    ssc, SD_SCSI3_REGISTER,
21735 					    (uchar_t *)&reg);
21736 					if (err != 0)
21737 						goto done_with_assess;
21738 				}
21739 			}
21740 		}
21741 		break;
21742 
21743 	case MHIOCGRP_RESERVE:
21744 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
21745 		if ((err = drv_priv(cred_p)) != EPERM) {
21746 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21747 				err = ENOTSUP;
21748 			} else if (arg != NULL) {
21749 				mhioc_resv_desc_t resv_desc;
21750 				if (ddi_copyin((void *)arg, &resv_desc,
21751 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
21752 					err = EFAULT;
21753 				} else {
21754 					err =
21755 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21756 					    ssc, SD_SCSI3_RESERVE,
21757 					    (uchar_t *)&resv_desc);
21758 					if (err != 0)
21759 						goto done_with_assess;
21760 				}
21761 			}
21762 		}
21763 		break;
21764 
21765 	case MHIOCGRP_PREEMPTANDABORT:
21766 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21767 		if ((err = drv_priv(cred_p)) != EPERM) {
21768 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21769 				err = ENOTSUP;
21770 			} else if (arg != NULL) {
21771 				mhioc_preemptandabort_t preempt_abort;
21772 				if (ddi_copyin((void *)arg, &preempt_abort,
21773 				    sizeof (mhioc_preemptandabort_t),
21774 				    flag) != 0) {
21775 					err = EFAULT;
21776 				} else {
21777 					err =
21778 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21779 					    ssc, SD_SCSI3_PREEMPTANDABORT,
21780 					    (uchar_t *)&preempt_abort);
21781 					if (err != 0)
21782 						goto done_with_assess;
21783 				}
21784 			}
21785 		}
21786 		break;
21787 
21788 	case MHIOCGRP_REGISTERANDIGNOREKEY:
21789 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTERANDIGNOREKEY\n");
21790 		if ((err = drv_priv(cred_p)) != EPERM) {
21791 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21792 				err = ENOTSUP;
21793 			} else if (arg != NULL) {
21794 				mhioc_registerandignorekey_t r_and_i;
21795 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
21796 				    sizeof (mhioc_registerandignorekey_t),
21797 				    flag) != 0) {
21798 					err = EFAULT;
21799 				} else {
21800 					err =
21801 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21802 					    ssc, SD_SCSI3_REGISTERANDIGNOREKEY,
21803 					    (uchar_t *)&r_and_i);
21804 					if (err != 0)
21805 						goto done_with_assess;
21806 				}
21807 			}
21808 		}
21809 		break;
21810 
21811 	case USCSICMD:
21812 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
21813 		cr = ddi_get_cred();
21814 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
21815 			err = EPERM;
21816 		} else {
21817 			enum uio_seg	uioseg;
21818 
21819 			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
21820 			    UIO_USERSPACE;
21821 			if (un->un_f_format_in_progress == TRUE) {
21822 				err = EAGAIN;
21823 				break;
21824 			}
21825 
21826 			err = sd_ssc_send(ssc,
21827 			    (struct uscsi_cmd *)arg,
21828 			    flag, uioseg, SD_PATH_STANDARD);
21829 			if (err != 0)
21830 				goto done_with_assess;
21831 			else
21832 				sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21833 		}
21834 		break;
21835 
21836 	case CDROMPAUSE:
21837 	case CDROMRESUME:
21838 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
21839 		if (!ISCD(un)) {
21840 			err = ENOTTY;
21841 		} else {
21842 			err = sr_pause_resume(dev, cmd);
21843 		}
21844 		break;
21845 
21846 	case CDROMPLAYMSF:
21847 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
21848 		if (!ISCD(un)) {
21849 			err = ENOTTY;
21850 		} else {
21851 			err = sr_play_msf(dev, (caddr_t)arg, flag);
21852 		}
21853 		break;
21854 
21855 	case CDROMPLAYTRKIND:
21856 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
21857 #if defined(__i386) || defined(__amd64)
21858 		/*
21859 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
21860 		 */
21861 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21862 #else
21863 		if (!ISCD(un)) {
21864 #endif
21865 			err = ENOTTY;
21866 		} else {
21867 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
21868 		}
21869 		break;
21870 
21871 	case CDROMREADTOCHDR:
21872 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
21873 		if (!ISCD(un)) {
21874 			err = ENOTTY;
21875 		} else {
21876 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
21877 		}
21878 		break;
21879 
21880 	case CDROMREADTOCENTRY:
21881 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
21882 		if (!ISCD(un)) {
21883 			err = ENOTTY;
21884 		} else {
21885 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
21886 		}
21887 		break;
21888 
21889 	case CDROMSTOP:
21890 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
21891 		if (!ISCD(un)) {
21892 			err = ENOTTY;
21893 		} else {
21894 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_STOP,
21895 			    SD_PATH_STANDARD);
21896 			goto done_with_assess;
21897 		}
21898 		break;
21899 
21900 	case CDROMSTART:
21901 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
21902 		if (!ISCD(un)) {
21903 			err = ENOTTY;
21904 		} else {
21905 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_START,
21906 			    SD_PATH_STANDARD);
21907 			goto done_with_assess;
21908 		}
21909 		break;
21910 
21911 	case CDROMCLOSETRAY:
21912 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
21913 		if (!ISCD(un)) {
21914 			err = ENOTTY;
21915 		} else {
21916 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_CLOSE,
21917 			    SD_PATH_STANDARD);
21918 			goto done_with_assess;
21919 		}
21920 		break;
21921 
21922 	case FDEJECT:	/* for eject command */
21923 	case DKIOCEJECT:
21924 	case CDROMEJECT:
21925 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
21926 		if (!un->un_f_eject_media_supported) {
21927 			err = ENOTTY;
21928 		} else {
21929 			err = sr_eject(dev);
21930 		}
21931 		break;
21932 
21933 	case CDROMVOLCTRL:
21934 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
21935 		if (!ISCD(un)) {
21936 			err = ENOTTY;
21937 		} else {
21938 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
21939 		}
21940 		break;
21941 
21942 	case CDROMSUBCHNL:
21943 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
21944 		if (!ISCD(un)) {
21945 			err = ENOTTY;
21946 		} else {
21947 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
21948 		}
21949 		break;
21950 
21951 	case CDROMREADMODE2:
21952 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
21953 		if (!ISCD(un)) {
21954 			err = ENOTTY;
21955 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21956 			/*
21957 			 * If the drive supports READ CD, use that instead of
21958 			 * switching the LBA size via a MODE SELECT
21959 			 * Block Descriptor
21960 			 */
21961 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
21962 		} else {
21963 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
21964 		}
21965 		break;
21966 
21967 	case CDROMREADMODE1:
21968 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
21969 		if (!ISCD(un)) {
21970 			err = ENOTTY;
21971 		} else {
21972 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
21973 		}
21974 		break;
21975 
21976 	case CDROMREADOFFSET:
21977 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
21978 		if (!ISCD(un)) {
21979 			err = ENOTTY;
21980 		} else {
21981 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
21982 			    flag);
21983 		}
21984 		break;
21985 
21986 	case CDROMSBLKMODE:
21987 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
21988 		/*
21989 		 * There is no means of changing block size in case of atapi
21990 		 * drives, thus return ENOTTY if drive type is atapi
21991 		 */
21992 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21993 			err = ENOTTY;
21994 		} else if (un->un_f_mmc_cap == TRUE) {
21995 
21996 			/*
21997 			 * MMC Devices do not support changing the
21998 			 * logical block size
21999 			 *
22000 			 * Note: EINVAL is being returned instead of ENOTTY to
22001 			 * maintain consistancy with the original mmc
22002 			 * driver update.
22003 			 */
22004 			err = EINVAL;
22005 		} else {
22006 			mutex_enter(SD_MUTEX(un));
22007 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
22008 			    (un->un_ncmds_in_transport > 0)) {
22009 				mutex_exit(SD_MUTEX(un));
22010 				err = EINVAL;
22011 			} else {
22012 				mutex_exit(SD_MUTEX(un));
22013 				err = sr_change_blkmode(dev, cmd, arg, flag);
22014 			}
22015 		}
22016 		break;
22017 
22018 	case CDROMGBLKMODE:
22019 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
22020 		if (!ISCD(un)) {
22021 			err = ENOTTY;
22022 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
22023 		    (un->un_f_blockcount_is_valid != FALSE)) {
22024 			/*
22025 			 * Drive is an ATAPI drive so return target block
22026 			 * size for ATAPI drives since we cannot change the
22027 			 * blocksize on ATAPI drives. Used primarily to detect
22028 			 * if an ATAPI cdrom is present.
22029 			 */
22030 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
22031 			    sizeof (int), flag) != 0) {
22032 				err = EFAULT;
22033 			} else {
22034 				err = 0;
22035 			}
22036 
22037 		} else {
22038 			/*
22039 			 * Drive supports changing block sizes via a Mode
22040 			 * Select.
22041 			 */
22042 			err = sr_change_blkmode(dev, cmd, arg, flag);
22043 		}
22044 		break;
22045 
22046 	case CDROMGDRVSPEED:
22047 	case CDROMSDRVSPEED:
22048 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
22049 		if (!ISCD(un)) {
22050 			err = ENOTTY;
22051 		} else if (un->un_f_mmc_cap == TRUE) {
22052 			/*
22053 			 * Note: In the future the driver implementation
22054 			 * for getting and
22055 			 * setting cd speed should entail:
22056 			 * 1) If non-mmc try the Toshiba mode page
22057 			 *    (sr_change_speed)
22058 			 * 2) If mmc but no support for Real Time Streaming try
22059 			 *    the SET CD SPEED (0xBB) command
22060 			 *   (sr_atapi_change_speed)
22061 			 * 3) If mmc and support for Real Time Streaming
22062 			 *    try the GET PERFORMANCE and SET STREAMING
22063 			 *    commands (not yet implemented, 4380808)
22064 			 */
22065 			/*
22066 			 * As per recent MMC spec, CD-ROM speed is variable
22067 			 * and changes with LBA. Since there is no such
22068 			 * things as drive speed now, fail this ioctl.
22069 			 *
22070 			 * Note: EINVAL is returned for consistancy of original
22071 			 * implementation which included support for getting
22072 			 * the drive speed of mmc devices but not setting
22073 			 * the drive speed. Thus EINVAL would be returned
22074 			 * if a set request was made for an mmc device.
22075 			 * We no longer support get or set speed for
22076 			 * mmc but need to remain consistent with regard
22077 			 * to the error code returned.
22078 			 */
22079 			err = EINVAL;
22080 		} else if (un->un_f_cfg_is_atapi == TRUE) {
22081 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
22082 		} else {
22083 			err = sr_change_speed(dev, cmd, arg, flag);
22084 		}
22085 		break;
22086 
22087 	case CDROMCDDA:
22088 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
22089 		if (!ISCD(un)) {
22090 			err = ENOTTY;
22091 		} else {
22092 			err = sr_read_cdda(dev, (void *)arg, flag);
22093 		}
22094 		break;
22095 
22096 	case CDROMCDXA:
22097 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
22098 		if (!ISCD(un)) {
22099 			err = ENOTTY;
22100 		} else {
22101 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
22102 		}
22103 		break;
22104 
22105 	case CDROMSUBCODE:
22106 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
22107 		if (!ISCD(un)) {
22108 			err = ENOTTY;
22109 		} else {
22110 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
22111 		}
22112 		break;
22113 
22114 
22115 #ifdef SDDEBUG
22116 /* RESET/ABORTS testing ioctls */
22117 	case DKIOCRESET: {
22118 		int	reset_level;
22119 
22120 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
22121 			err = EFAULT;
22122 		} else {
22123 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
22124 			    "reset_level = 0x%lx\n", reset_level);
22125 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
22126 				err = 0;
22127 			} else {
22128 				err = EIO;
22129 			}
22130 		}
22131 		break;
22132 	}
22133 
22134 	case DKIOCABORT:
22135 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
22136 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
22137 			err = 0;
22138 		} else {
22139 			err = EIO;
22140 		}
22141 		break;
22142 #endif
22143 
22144 #ifdef SD_FAULT_INJECTION
22145 /* SDIOC FaultInjection testing ioctls */
22146 	case SDIOCSTART:
22147 	case SDIOCSTOP:
22148 	case SDIOCINSERTPKT:
22149 	case SDIOCINSERTXB:
22150 	case SDIOCINSERTUN:
22151 	case SDIOCINSERTARQ:
22152 	case SDIOCPUSH:
22153 	case SDIOCRETRIEVE:
22154 	case SDIOCRUN:
22155 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
22156 		    "SDIOC detected cmd:0x%X:\n", cmd);
22157 		/* call error generator */
22158 		sd_faultinjection_ioctl(cmd, arg, un);
22159 		err = 0;
22160 		break;
22161 
22162 #endif /* SD_FAULT_INJECTION */
22163 
22164 	case DKIOCFLUSHWRITECACHE:
22165 		{
22166 			struct dk_callback *dkc = (struct dk_callback *)arg;
22167 
22168 			mutex_enter(SD_MUTEX(un));
22169 			if (!un->un_f_sync_cache_supported ||
22170 			    !un->un_f_write_cache_enabled) {
22171 				err = un->un_f_sync_cache_supported ?
22172 				    0 : ENOTSUP;
22173 				mutex_exit(SD_MUTEX(un));
22174 				if ((flag & FKIOCTL) && dkc != NULL &&
22175 				    dkc->dkc_callback != NULL) {
22176 					(*dkc->dkc_callback)(dkc->dkc_cookie,
22177 					    err);
22178 					/*
22179 					 * Did callback and reported error.
22180 					 * Since we did a callback, ioctl
22181 					 * should return 0.
22182 					 */
22183 					err = 0;
22184 				}
22185 				break;
22186 			}
22187 			mutex_exit(SD_MUTEX(un));
22188 
22189 			if ((flag & FKIOCTL) && dkc != NULL &&
22190 			    dkc->dkc_callback != NULL) {
22191 				/* async SYNC CACHE request */
22192 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
22193 			} else {
22194 				/* synchronous SYNC CACHE request */
22195 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22196 			}
22197 		}
22198 		break;
22199 
22200 	case DKIOCGETWCE: {
22201 
22202 		int wce;
22203 
22204 		if ((err = sd_get_write_cache_enabled(ssc, &wce)) != 0) {
22205 			break;
22206 		}
22207 
22208 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
22209 			err = EFAULT;
22210 		}
22211 		break;
22212 	}
22213 
22214 	case DKIOCSETWCE: {
22215 
22216 		int wce, sync_supported;
22217 
22218 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
22219 			err = EFAULT;
22220 			break;
22221 		}
22222 
22223 		/*
22224 		 * Synchronize multiple threads trying to enable
22225 		 * or disable the cache via the un_f_wcc_cv
22226 		 * condition variable.
22227 		 */
22228 		mutex_enter(SD_MUTEX(un));
22229 
22230 		/*
22231 		 * Don't allow the cache to be enabled if the
22232 		 * config file has it disabled.
22233 		 */
22234 		if (un->un_f_opt_disable_cache && wce) {
22235 			mutex_exit(SD_MUTEX(un));
22236 			err = EINVAL;
22237 			break;
22238 		}
22239 
22240 		/*
22241 		 * Wait for write cache change in progress
22242 		 * bit to be clear before proceeding.
22243 		 */
22244 		while (un->un_f_wcc_inprog)
22245 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
22246 
22247 		un->un_f_wcc_inprog = 1;
22248 
22249 		if (un->un_f_write_cache_enabled && wce == 0) {
22250 			/*
22251 			 * Disable the write cache.  Don't clear
22252 			 * un_f_write_cache_enabled until after
22253 			 * the mode select and flush are complete.
22254 			 */
22255 			sync_supported = un->un_f_sync_cache_supported;
22256 
22257 			/*
22258 			 * If cache flush is suppressed, we assume that the
22259 			 * controller firmware will take care of managing the
22260 			 * write cache for us: no need to explicitly
22261 			 * disable it.
22262 			 */
22263 			if (!un->un_f_suppress_cache_flush) {
22264 				mutex_exit(SD_MUTEX(un));
22265 				if ((err = sd_cache_control(ssc,
22266 				    SD_CACHE_NOCHANGE,
22267 				    SD_CACHE_DISABLE)) == 0 &&
22268 				    sync_supported) {
22269 					err = sd_send_scsi_SYNCHRONIZE_CACHE(un,
22270 					    NULL);
22271 				}
22272 			} else {
22273 				mutex_exit(SD_MUTEX(un));
22274 			}
22275 
22276 			mutex_enter(SD_MUTEX(un));
22277 			if (err == 0) {
22278 				un->un_f_write_cache_enabled = 0;
22279 			}
22280 
22281 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
22282 			/*
22283 			 * Set un_f_write_cache_enabled first, so there is
22284 			 * no window where the cache is enabled, but the
22285 			 * bit says it isn't.
22286 			 */
22287 			un->un_f_write_cache_enabled = 1;
22288 
22289 			/*
22290 			 * If cache flush is suppressed, we assume that the
22291 			 * controller firmware will take care of managing the
22292 			 * write cache for us: no need to explicitly
22293 			 * enable it.
22294 			 */
22295 			if (!un->un_f_suppress_cache_flush) {
22296 				mutex_exit(SD_MUTEX(un));
22297 				err = sd_cache_control(ssc, SD_CACHE_NOCHANGE,
22298 				    SD_CACHE_ENABLE);
22299 			} else {
22300 				mutex_exit(SD_MUTEX(un));
22301 			}
22302 
22303 			mutex_enter(SD_MUTEX(un));
22304 
22305 			if (err) {
22306 				un->un_f_write_cache_enabled = 0;
22307 			}
22308 		}
22309 
22310 		un->un_f_wcc_inprog = 0;
22311 		cv_broadcast(&un->un_wcc_cv);
22312 		mutex_exit(SD_MUTEX(un));
22313 		break;
22314 	}
22315 
22316 	default:
22317 		err = ENOTTY;
22318 		break;
22319 	}
22320 	mutex_enter(SD_MUTEX(un));
22321 	un->un_ncmds_in_driver--;
22322 	ASSERT(un->un_ncmds_in_driver >= 0);
22323 	mutex_exit(SD_MUTEX(un));
22324 
22325 
22326 done_without_assess:
22327 	sd_ssc_fini(ssc);
22328 
22329 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22330 	return (err);
22331 
22332 done_with_assess:
22333 	mutex_enter(SD_MUTEX(un));
22334 	un->un_ncmds_in_driver--;
22335 	ASSERT(un->un_ncmds_in_driver >= 0);
22336 	mutex_exit(SD_MUTEX(un));
22337 
22338 done_quick_assess:
22339 	if (err != 0)
22340 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22341 	/* Uninitialize sd_ssc_t pointer */
22342 	sd_ssc_fini(ssc);
22343 
22344 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22345 	return (err);
22346 }
22347 
22348 
22349 /*
22350  *    Function: sd_dkio_ctrl_info
22351  *
22352  * Description: This routine is the driver entry point for handling controller
22353  *		information ioctl requests (DKIOCINFO).
22354  *
22355  *   Arguments: dev  - the device number
22356  *		arg  - pointer to user provided dk_cinfo structure
22357  *		       specifying the controller type and attributes.
22358  *		flag - this argument is a pass through to ddi_copyxxx()
22359  *		       directly from the mode argument of ioctl().
22360  *
22361  * Return Code: 0
22362  *		EFAULT
22363  *		ENXIO
22364  */
22365 
22366 static int
22367 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
22368 {
22369 	struct sd_lun	*un = NULL;
22370 	struct dk_cinfo	*info;
22371 	dev_info_t	*pdip;
22372 	int		lun, tgt;
22373 
22374 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22375 		return (ENXIO);
22376 	}
22377 
22378 	info = (struct dk_cinfo *)
22379 	    kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
22380 
22381 	switch (un->un_ctype) {
22382 	case CTYPE_CDROM:
22383 		info->dki_ctype = DKC_CDROM;
22384 		break;
22385 	default:
22386 		info->dki_ctype = DKC_SCSI_CCS;
22387 		break;
22388 	}
22389 	pdip = ddi_get_parent(SD_DEVINFO(un));
22390 	info->dki_cnum = ddi_get_instance(pdip);
22391 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
22392 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
22393 	} else {
22394 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
22395 		    DK_DEVLEN - 1);
22396 	}
22397 
22398 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22399 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
22400 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22401 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
22402 
22403 	/* Unit Information */
22404 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
22405 	info->dki_slave = ((tgt << 3) | lun);
22406 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
22407 	    DK_DEVLEN - 1);
22408 	info->dki_flags = DKI_FMTVOL;
22409 	info->dki_partition = SDPART(dev);
22410 
22411 	/* Max Transfer size of this device in blocks */
22412 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
22413 	info->dki_addr = 0;
22414 	info->dki_space = 0;
22415 	info->dki_prio = 0;
22416 	info->dki_vec = 0;
22417 
22418 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
22419 		kmem_free(info, sizeof (struct dk_cinfo));
22420 		return (EFAULT);
22421 	} else {
22422 		kmem_free(info, sizeof (struct dk_cinfo));
22423 		return (0);
22424 	}
22425 }
22426 
22427 
22428 /*
22429  *    Function: sd_get_media_info
22430  *
22431  * Description: This routine is the driver entry point for handling ioctl
22432  *		requests for the media type or command set profile used by the
22433  *		drive to operate on the media (DKIOCGMEDIAINFO).
22434  *
22435  *   Arguments: dev	- the device number
22436  *		arg	- pointer to user provided dk_minfo structure
22437  *			  specifying the media type, logical block size and
22438  *			  drive capacity.
22439  *		flag	- this argument is a pass through to ddi_copyxxx()
22440  *			  directly from the mode argument of ioctl().
22441  *
22442  * Return Code: 0
22443  *		EACCESS
22444  *		EFAULT
22445  *		ENXIO
22446  *		EIO
22447  */
22448 
22449 static int
22450 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
22451 {
22452 	struct sd_lun		*un = NULL;
22453 	struct uscsi_cmd	com;
22454 	struct scsi_inquiry	*sinq;
22455 	struct dk_minfo		media_info;
22456 	u_longlong_t		media_capacity;
22457 	uint64_t		capacity;
22458 	uint_t			lbasize;
22459 	uchar_t			*out_data;
22460 	uchar_t			*rqbuf;
22461 	int			rval = 0;
22462 	int			rtn;
22463 	sd_ssc_t		*ssc;
22464 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22465 	    (un->un_state == SD_STATE_OFFLINE)) {
22466 		return (ENXIO);
22467 	}
22468 
22469 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
22470 
22471 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22472 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22473 
22474 	/* Issue a TUR to determine if the drive is ready with media present */
22475 	ssc = sd_ssc_init(un);
22476 	rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_CHECK_FOR_MEDIA);
22477 	if (rval == ENXIO) {
22478 		goto done;
22479 	} else if (rval != 0) {
22480 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22481 	}
22482 
22483 	/* Now get configuration data */
22484 	if (ISCD(un)) {
22485 		media_info.dki_media_type = DK_CDROM;
22486 
22487 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22488 		if (un->un_f_mmc_cap == TRUE) {
22489 			rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf,
22490 			    SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
22491 			    SD_PATH_STANDARD);
22492 
22493 			if (rtn) {
22494 				/*
22495 				 * We ignore all failures for CD and need to
22496 				 * put the assessment before processing code
22497 				 * to avoid missing assessment for FMA.
22498 				 */
22499 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22500 				/*
22501 				 * Failed for other than an illegal request
22502 				 * or command not supported
22503 				 */
22504 				if ((com.uscsi_status == STATUS_CHECK) &&
22505 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22506 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22507 					    (rqbuf[12] != 0x20)) {
22508 						rval = EIO;
22509 						goto no_assessment;
22510 					}
22511 				}
22512 			} else {
22513 				/*
22514 				 * The GET CONFIGURATION command succeeded
22515 				 * so set the media type according to the
22516 				 * returned data
22517 				 */
22518 				media_info.dki_media_type = out_data[6];
22519 				media_info.dki_media_type <<= 8;
22520 				media_info.dki_media_type |= out_data[7];
22521 			}
22522 		}
22523 	} else {
22524 		/*
22525 		 * The profile list is not available, so we attempt to identify
22526 		 * the media type based on the inquiry data
22527 		 */
22528 		sinq = un->un_sd->sd_inq;
22529 		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
22530 		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
22531 			/* This is a direct access device  or optical disk */
22532 			media_info.dki_media_type = DK_FIXED_DISK;
22533 
22534 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22535 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22536 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22537 					media_info.dki_media_type = DK_ZIP;
22538 				} else if (
22539 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22540 					media_info.dki_media_type = DK_JAZ;
22541 				}
22542 			}
22543 		} else {
22544 			/*
22545 			 * Not a CD, direct access or optical disk so return
22546 			 * unknown media
22547 			 */
22548 			media_info.dki_media_type = DK_UNKNOWN;
22549 		}
22550 	}
22551 
22552 	/* Now read the capacity so we can provide the lbasize and capacity */
22553 	rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
22554 	    SD_PATH_DIRECT);
22555 	switch (rval) {
22556 	case 0:
22557 		break;
22558 	case EACCES:
22559 		rval = EACCES;
22560 		goto done;
22561 	default:
22562 		rval = EIO;
22563 		goto done;
22564 	}
22565 
22566 	/*
22567 	 * If lun is expanded dynamically, update the un structure.
22568 	 */
22569 	mutex_enter(SD_MUTEX(un));
22570 	if ((un->un_f_blockcount_is_valid == TRUE) &&
22571 	    (un->un_f_tgt_blocksize_is_valid == TRUE) &&
22572 	    (capacity > un->un_blockcount)) {
22573 		sd_update_block_info(un, lbasize, capacity);
22574 	}
22575 	mutex_exit(SD_MUTEX(un));
22576 
22577 	media_info.dki_lbsize = lbasize;
22578 	media_capacity = capacity;
22579 
22580 	/*
22581 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
22582 	 * un->un_sys_blocksize chunks. So we need to convert it into
22583 	 * cap.lbasize chunks.
22584 	 */
22585 	media_capacity *= un->un_sys_blocksize;
22586 	media_capacity /= lbasize;
22587 	media_info.dki_capacity = media_capacity;
22588 
22589 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
22590 		rval = EFAULT;
22591 		/* Put goto. Anybody might add some code below in future */
22592 		goto no_assessment;
22593 	}
22594 done:
22595 	if (rval != 0) {
22596 		if (rval == EIO)
22597 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
22598 		else
22599 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22600 	}
22601 no_assessment:
22602 	sd_ssc_fini(ssc);
22603 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
22604 	kmem_free(rqbuf, SENSE_LENGTH);
22605 	return (rval);
22606 }
22607 
22608 
22609 /*
22610  *    Function: sd_check_media
22611  *
22612  * Description: This utility routine implements the functionality for the
22613  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
22614  *		driver state changes from that specified by the user
22615  *		(inserted or ejected). For example, if the user specifies
22616  *		DKIO_EJECTED and the current media state is inserted this
22617  *		routine will immediately return DKIO_INSERTED. However, if the
22618  *		current media state is not inserted the user thread will be
22619  *		blocked until the drive state changes. If DKIO_NONE is specified
22620  *		the user thread will block until a drive state change occurs.
22621  *
22622  *   Arguments: dev  - the device number
22623  *		state  - user pointer to a dkio_state, updated with the current
22624  *			drive state at return.
22625  *
22626  * Return Code: ENXIO
22627  *		EIO
22628  *		EAGAIN
22629  *		EINTR
22630  */
22631 
22632 static int
22633 sd_check_media(dev_t dev, enum dkio_state state)
22634 {
22635 	struct sd_lun		*un = NULL;
22636 	enum dkio_state		prev_state;
22637 	opaque_t		token = NULL;
22638 	int			rval = 0;
22639 	sd_ssc_t		*ssc;
22640 	dev_t			sub_dev;
22641 
22642 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22643 		return (ENXIO);
22644 	}
22645 
22646 	/*
22647 	 * sub_dev is used when submitting request to scsi watch.
22648 	 * All submissions are unified to use same device number.
22649 	 */
22650 	sub_dev = sd_make_device(SD_DEVINFO(un));
22651 
22652 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
22653 
22654 	ssc = sd_ssc_init(un);
22655 
22656 	mutex_enter(SD_MUTEX(un));
22657 
22658 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
22659 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
22660 
22661 	prev_state = un->un_mediastate;
22662 
22663 	/* is there anything to do? */
22664 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
22665 		/*
22666 		 * submit the request to the scsi_watch service;
22667 		 * scsi_media_watch_cb() does the real work
22668 		 */
22669 		mutex_exit(SD_MUTEX(un));
22670 
22671 		/*
22672 		 * This change handles the case where a scsi watch request is
22673 		 * added to a device that is powered down. To accomplish this
22674 		 * we power up the device before adding the scsi watch request,
22675 		 * since the scsi watch sends a TUR directly to the device
22676 		 * which the device cannot handle if it is powered down.
22677 		 */
22678 		if (sd_pm_entry(un) != DDI_SUCCESS) {
22679 			mutex_enter(SD_MUTEX(un));
22680 			goto done;
22681 		}
22682 
22683 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
22684 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
22685 		    (caddr_t)sub_dev);
22686 
22687 		sd_pm_exit(un);
22688 
22689 		mutex_enter(SD_MUTEX(un));
22690 		if (token == NULL) {
22691 			rval = EAGAIN;
22692 			goto done;
22693 		}
22694 
22695 		/*
22696 		 * This is a special case IOCTL that doesn't return
22697 		 * until the media state changes. Routine sdpower
22698 		 * knows about and handles this so don't count it
22699 		 * as an active cmd in the driver, which would
22700 		 * keep the device busy to the pm framework.
22701 		 * If the count isn't decremented the device can't
22702 		 * be powered down.
22703 		 */
22704 		un->un_ncmds_in_driver--;
22705 		ASSERT(un->un_ncmds_in_driver >= 0);
22706 
22707 		/*
22708 		 * if a prior request had been made, this will be the same
22709 		 * token, as scsi_watch was designed that way.
22710 		 */
22711 		un->un_swr_token = token;
22712 		un->un_specified_mediastate = state;
22713 
22714 		/*
22715 		 * now wait for media change
22716 		 * we will not be signalled unless mediastate == state but it is
22717 		 * still better to test for this condition, since there is a
22718 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
22719 		 */
22720 		SD_TRACE(SD_LOG_COMMON, un,
22721 		    "sd_check_media: waiting for media state change\n");
22722 		while (un->un_mediastate == state) {
22723 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
22724 				SD_TRACE(SD_LOG_COMMON, un,
22725 				    "sd_check_media: waiting for media state "
22726 				    "was interrupted\n");
22727 				un->un_ncmds_in_driver++;
22728 				rval = EINTR;
22729 				goto done;
22730 			}
22731 			SD_TRACE(SD_LOG_COMMON, un,
22732 			    "sd_check_media: received signal, state=%x\n",
22733 			    un->un_mediastate);
22734 		}
22735 		/*
22736 		 * Inc the counter to indicate the device once again
22737 		 * has an active outstanding cmd.
22738 		 */
22739 		un->un_ncmds_in_driver++;
22740 	}
22741 
22742 	/* invalidate geometry */
22743 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
22744 		sr_ejected(un);
22745 	}
22746 
22747 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
22748 		uint64_t	capacity;
22749 		uint_t		lbasize;
22750 
22751 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
22752 		mutex_exit(SD_MUTEX(un));
22753 		/*
22754 		 * Since the following routines use SD_PATH_DIRECT, we must
22755 		 * call PM directly before the upcoming disk accesses. This
22756 		 * may cause the disk to be power/spin up.
22757 		 */
22758 
22759 		if (sd_pm_entry(un) == DDI_SUCCESS) {
22760 			rval = sd_send_scsi_READ_CAPACITY(ssc,
22761 			    &capacity, &lbasize, SD_PATH_DIRECT);
22762 			if (rval != 0) {
22763 				sd_pm_exit(un);
22764 				if (rval == EIO)
22765 					sd_ssc_assessment(ssc,
22766 					    SD_FMT_STATUS_CHECK);
22767 				else
22768 					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22769 				mutex_enter(SD_MUTEX(un));
22770 				goto done;
22771 			}
22772 		} else {
22773 			rval = EIO;
22774 			mutex_enter(SD_MUTEX(un));
22775 			goto done;
22776 		}
22777 		mutex_enter(SD_MUTEX(un));
22778 
22779 		sd_update_block_info(un, lbasize, capacity);
22780 
22781 		/*
22782 		 *  Check if the media in the device is writable or not
22783 		 */
22784 		if (ISCD(un)) {
22785 			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
22786 		}
22787 
22788 		mutex_exit(SD_MUTEX(un));
22789 		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
22790 		if ((cmlb_validate(un->un_cmlbhandle, 0,
22791 		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
22792 			sd_set_pstats(un);
22793 			SD_TRACE(SD_LOG_IO_PARTITION, un,
22794 			    "sd_check_media: un:0x%p pstats created and "
22795 			    "set\n", un);
22796 		}
22797 
22798 		rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
22799 		    SD_PATH_DIRECT);
22800 
22801 		sd_pm_exit(un);
22802 
22803 		if (rval != 0) {
22804 			if (rval == EIO)
22805 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
22806 			else
22807 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22808 		}
22809 
22810 		mutex_enter(SD_MUTEX(un));
22811 	}
22812 done:
22813 	sd_ssc_fini(ssc);
22814 	un->un_f_watcht_stopped = FALSE;
22815 	if (token != NULL && un->un_swr_token != NULL) {
22816 		/*
22817 		 * Use of this local token and the mutex ensures that we avoid
22818 		 * some race conditions associated with terminating the
22819 		 * scsi watch.
22820 		 */
22821 		token = un->un_swr_token;
22822 		mutex_exit(SD_MUTEX(un));
22823 		(void) scsi_watch_request_terminate(token,
22824 		    SCSI_WATCH_TERMINATE_WAIT);
22825 		if (scsi_watch_get_ref_count(token) == 0) {
22826 			mutex_enter(SD_MUTEX(un));
22827 			un->un_swr_token = (opaque_t)NULL;
22828 		} else {
22829 			mutex_enter(SD_MUTEX(un));
22830 		}
22831 	}
22832 
22833 	/*
22834 	 * Update the capacity kstat value, if no media previously
22835 	 * (capacity kstat is 0) and a media has been inserted
22836 	 * (un_f_blockcount_is_valid == TRUE)
22837 	 */
22838 	if (un->un_errstats) {
22839 		struct sd_errstats	*stp = NULL;
22840 
22841 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
22842 		if ((stp->sd_capacity.value.ui64 == 0) &&
22843 		    (un->un_f_blockcount_is_valid == TRUE)) {
22844 			stp->sd_capacity.value.ui64 =
22845 			    (uint64_t)((uint64_t)un->un_blockcount *
22846 			    un->un_sys_blocksize);
22847 		}
22848 	}
22849 	mutex_exit(SD_MUTEX(un));
22850 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
22851 	return (rval);
22852 }
22853 
22854 
22855 /*
22856  *    Function: sd_delayed_cv_broadcast
22857  *
22858  * Description: Delayed cv_broadcast to allow for target to recover from media
22859  *		insertion.
22860  *
22861  *   Arguments: arg - driver soft state (unit) structure
22862  */
22863 
22864 static void
22865 sd_delayed_cv_broadcast(void *arg)
22866 {
22867 	struct sd_lun *un = arg;
22868 
22869 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
22870 
22871 	mutex_enter(SD_MUTEX(un));
22872 	un->un_dcvb_timeid = NULL;
22873 	cv_broadcast(&un->un_state_cv);
22874 	mutex_exit(SD_MUTEX(un));
22875 }
22876 
22877 
22878 /*
22879  *    Function: sd_media_watch_cb
22880  *
22881  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
22882  *		routine processes the TUR sense data and updates the driver
22883  *		state if a transition has occurred. The user thread
22884  *		(sd_check_media) is then signalled.
22885  *
22886  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
22887  *			among multiple watches that share this callback function
22888  *		resultp - scsi watch facility result packet containing scsi
22889  *			  packet, status byte and sense data
22890  *
22891  * Return Code: 0 for success, -1 for failure
22892  */
22893 
22894 static int
22895 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
22896 {
22897 	struct sd_lun			*un;
22898 	struct scsi_status		*statusp = resultp->statusp;
22899 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
22900 	enum dkio_state			state = DKIO_NONE;
22901 	dev_t				dev = (dev_t)arg;
22902 	uchar_t				actual_sense_length;
22903 	uint8_t				skey, asc, ascq;
22904 
22905 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22906 		return (-1);
22907 	}
22908 	actual_sense_length = resultp->actual_sense_length;
22909 
22910 	mutex_enter(SD_MUTEX(un));
22911 	SD_TRACE(SD_LOG_COMMON, un,
22912 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
22913 	    *((char *)statusp), (void *)sensep, actual_sense_length);
22914 
22915 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
22916 		un->un_mediastate = DKIO_DEV_GONE;
22917 		cv_broadcast(&un->un_state_cv);
22918 		mutex_exit(SD_MUTEX(un));
22919 
22920 		return (0);
22921 	}
22922 
22923 	/*
22924 	 * If there was a check condition then sensep points to valid sense data
22925 	 * If status was not a check condition but a reservation or busy status
22926 	 * then the new state is DKIO_NONE
22927 	 */
22928 	if (sensep != NULL) {
22929 		skey = scsi_sense_key(sensep);
22930 		asc = scsi_sense_asc(sensep);
22931 		ascq = scsi_sense_ascq(sensep);
22932 
22933 		SD_INFO(SD_LOG_COMMON, un,
22934 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
22935 		    skey, asc, ascq);
22936 		/* This routine only uses up to 13 bytes of sense data. */
22937 		if (actual_sense_length >= 13) {
22938 			if (skey == KEY_UNIT_ATTENTION) {
22939 				if (asc == 0x28) {
22940 					state = DKIO_INSERTED;
22941 				}
22942 			} else if (skey == KEY_NOT_READY) {
22943 				/*
22944 				 * Sense data of 02/06/00 means that the
22945 				 * drive could not read the media (No
22946 				 * reference position found). In this case
22947 				 * to prevent a hang on the DKIOCSTATE IOCTL
22948 				 * we set the media state to DKIO_INSERTED.
22949 				 */
22950 				if (asc == 0x06 && ascq == 0x00)
22951 					state = DKIO_INSERTED;
22952 
22953 				/*
22954 				 * if 02/04/02  means that the host
22955 				 * should send start command. Explicitly
22956 				 * leave the media state as is
22957 				 * (inserted) as the media is inserted
22958 				 * and host has stopped device for PM
22959 				 * reasons. Upon next true read/write
22960 				 * to this media will bring the
22961 				 * device to the right state good for
22962 				 * media access.
22963 				 */
22964 				if (asc == 0x3a) {
22965 					state = DKIO_EJECTED;
22966 				} else {
22967 					/*
22968 					 * If the drive is busy with an
22969 					 * operation or long write, keep the
22970 					 * media in an inserted state.
22971 					 */
22972 
22973 					if ((asc == 0x04) &&
22974 					    ((ascq == 0x02) ||
22975 					    (ascq == 0x07) ||
22976 					    (ascq == 0x08))) {
22977 						state = DKIO_INSERTED;
22978 					}
22979 				}
22980 			} else if (skey == KEY_NO_SENSE) {
22981 				if ((asc == 0x00) && (ascq == 0x00)) {
22982 					/*
22983 					 * Sense Data 00/00/00 does not provide
22984 					 * any information about the state of
22985 					 * the media. Ignore it.
22986 					 */
22987 					mutex_exit(SD_MUTEX(un));
22988 					return (0);
22989 				}
22990 			}
22991 		}
22992 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
22993 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
22994 		state = DKIO_INSERTED;
22995 	}
22996 
22997 	SD_TRACE(SD_LOG_COMMON, un,
22998 	    "sd_media_watch_cb: state=%x, specified=%x\n",
22999 	    state, un->un_specified_mediastate);
23000 
23001 	/*
23002 	 * now signal the waiting thread if this is *not* the specified state;
23003 	 * delay the signal if the state is DKIO_INSERTED to allow the target
23004 	 * to recover
23005 	 */
23006 	if (state != un->un_specified_mediastate) {
23007 		un->un_mediastate = state;
23008 		if (state == DKIO_INSERTED) {
23009 			/*
23010 			 * delay the signal to give the drive a chance
23011 			 * to do what it apparently needs to do
23012 			 */
23013 			SD_TRACE(SD_LOG_COMMON, un,
23014 			    "sd_media_watch_cb: delayed cv_broadcast\n");
23015 			if (un->un_dcvb_timeid == NULL) {
23016 				un->un_dcvb_timeid =
23017 				    timeout(sd_delayed_cv_broadcast, un,
23018 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
23019 			}
23020 		} else {
23021 			SD_TRACE(SD_LOG_COMMON, un,
23022 			    "sd_media_watch_cb: immediate cv_broadcast\n");
23023 			cv_broadcast(&un->un_state_cv);
23024 		}
23025 	}
23026 	mutex_exit(SD_MUTEX(un));
23027 	return (0);
23028 }
23029 
23030 
23031 /*
23032  *    Function: sd_dkio_get_temp
23033  *
23034  * Description: This routine is the driver entry point for handling ioctl
23035  *		requests to get the disk temperature.
23036  *
23037  *   Arguments: dev  - the device number
23038  *		arg  - pointer to user provided dk_temperature structure.
23039  *		flag - this argument is a pass through to ddi_copyxxx()
23040  *		       directly from the mode argument of ioctl().
23041  *
23042  * Return Code: 0
23043  *		EFAULT
23044  *		ENXIO
23045  *		EAGAIN
23046  */
23047 
23048 static int
23049 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
23050 {
23051 	struct sd_lun		*un = NULL;
23052 	struct dk_temperature	*dktemp = NULL;
23053 	uchar_t			*temperature_page;
23054 	int			rval = 0;
23055 	int			path_flag = SD_PATH_STANDARD;
23056 	sd_ssc_t		*ssc;
23057 
23058 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23059 		return (ENXIO);
23060 	}
23061 
23062 	ssc = sd_ssc_init(un);
23063 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
23064 
23065 	/* copyin the disk temp argument to get the user flags */
23066 	if (ddi_copyin((void *)arg, dktemp,
23067 	    sizeof (struct dk_temperature), flag) != 0) {
23068 		rval = EFAULT;
23069 		goto done;
23070 	}
23071 
23072 	/* Initialize the temperature to invalid. */
23073 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
23074 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
23075 
23076 	/*
23077 	 * Note: Investigate removing the "bypass pm" semantic.
23078 	 * Can we just bypass PM always?
23079 	 */
23080 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
23081 		path_flag = SD_PATH_DIRECT;
23082 		ASSERT(!mutex_owned(&un->un_pm_mutex));
23083 		mutex_enter(&un->un_pm_mutex);
23084 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
23085 			/*
23086 			 * If DKT_BYPASS_PM is set, and the drive happens to be
23087 			 * in low power mode, we can not wake it up, Need to
23088 			 * return EAGAIN.
23089 			 */
23090 			mutex_exit(&un->un_pm_mutex);
23091 			rval = EAGAIN;
23092 			goto done;
23093 		} else {
23094 			/*
23095 			 * Indicate to PM the device is busy. This is required
23096 			 * to avoid a race - i.e. the ioctl is issuing a
23097 			 * command and the pm framework brings down the device
23098 			 * to low power mode (possible power cut-off on some
23099 			 * platforms).
23100 			 */
23101 			mutex_exit(&un->un_pm_mutex);
23102 			if (sd_pm_entry(un) != DDI_SUCCESS) {
23103 				rval = EAGAIN;
23104 				goto done;
23105 			}
23106 		}
23107 	}
23108 
23109 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
23110 
23111 	rval = sd_send_scsi_LOG_SENSE(ssc, temperature_page,
23112 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag);
23113 	if (rval != 0)
23114 		goto done2;
23115 
23116 	/*
23117 	 * For the current temperature verify that the parameter length is 0x02
23118 	 * and the parameter code is 0x00
23119 	 */
23120 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
23121 	    (temperature_page[5] == 0x00)) {
23122 		if (temperature_page[9] == 0xFF) {
23123 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
23124 		} else {
23125 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
23126 		}
23127 	}
23128 
23129 	/*
23130 	 * For the reference temperature verify that the parameter
23131 	 * length is 0x02 and the parameter code is 0x01
23132 	 */
23133 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
23134 	    (temperature_page[11] == 0x01)) {
23135 		if (temperature_page[15] == 0xFF) {
23136 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
23137 		} else {
23138 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
23139 		}
23140 	}
23141 
23142 	/* Do the copyout regardless of the temperature commands status. */
23143 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
23144 	    flag) != 0) {
23145 		rval = EFAULT;
23146 		goto done1;
23147 	}
23148 
23149 done2:
23150 	if (rval != 0) {
23151 		if (rval == EIO)
23152 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23153 		else
23154 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23155 	}
23156 done1:
23157 	if (path_flag == SD_PATH_DIRECT) {
23158 		sd_pm_exit(un);
23159 	}
23160 
23161 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
23162 done:
23163 	sd_ssc_fini(ssc);
23164 	if (dktemp != NULL) {
23165 		kmem_free(dktemp, sizeof (struct dk_temperature));
23166 	}
23167 
23168 	return (rval);
23169 }
23170 
23171 
23172 /*
23173  *    Function: sd_log_page_supported
23174  *
23175  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
23176  *		supported log pages.
23177  *
23178  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
23179  *                      structure for this target.
23180  *		log_page -
23181  *
23182  * Return Code: -1 - on error (log sense is optional and may not be supported).
23183  *		0  - log page not found.
23184  *  		1  - log page found.
23185  */
23186 
23187 static int
23188 sd_log_page_supported(sd_ssc_t *ssc, int log_page)
23189 {
23190 	uchar_t *log_page_data;
23191 	int	i;
23192 	int	match = 0;
23193 	int	log_size;
23194 	int	status = 0;
23195 	struct sd_lun	*un;
23196 
23197 	ASSERT(ssc != NULL);
23198 	un = ssc->ssc_un;
23199 	ASSERT(un != NULL);
23200 
23201 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
23202 
23203 	status = sd_send_scsi_LOG_SENSE(ssc, log_page_data, 0xFF, 0, 0x01, 0,
23204 	    SD_PATH_DIRECT);
23205 
23206 	if (status != 0) {
23207 		if (status == EIO) {
23208 			/*
23209 			 * Some disks do not support log sense, we
23210 			 * should ignore this kind of error(sense key is
23211 			 * 0x5 - illegal request).
23212 			 */
23213 			uint8_t *sensep;
23214 			int senlen;
23215 
23216 			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
23217 			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
23218 			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
23219 
23220 			if (senlen > 0 &&
23221 			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
23222 				sd_ssc_assessment(ssc,
23223 				    SD_FMT_IGNORE_COMPROMISE);
23224 			} else {
23225 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23226 			}
23227 		} else {
23228 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23229 		}
23230 
23231 		SD_ERROR(SD_LOG_COMMON, un,
23232 		    "sd_log_page_supported: failed log page retrieval\n");
23233 		kmem_free(log_page_data, 0xFF);
23234 		return (-1);
23235 	}
23236 
23237 	log_size = log_page_data[3];
23238 
23239 	/*
23240 	 * The list of supported log pages start from the fourth byte. Check
23241 	 * until we run out of log pages or a match is found.
23242 	 */
23243 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
23244 		if (log_page_data[i] == log_page) {
23245 			match++;
23246 		}
23247 	}
23248 	kmem_free(log_page_data, 0xFF);
23249 	return (match);
23250 }
23251 
23252 
23253 /*
23254  *    Function: sd_mhdioc_failfast
23255  *
23256  * Description: This routine is the driver entry point for handling ioctl
23257  *		requests to enable/disable the multihost failfast option.
23258  *		(MHIOCENFAILFAST)
23259  *
23260  *   Arguments: dev	- the device number
23261  *		arg	- user specified probing interval.
23262  *		flag	- this argument is a pass through to ddi_copyxxx()
23263  *			  directly from the mode argument of ioctl().
23264  *
23265  * Return Code: 0
23266  *		EFAULT
23267  *		ENXIO
23268  */
23269 
23270 static int
23271 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
23272 {
23273 	struct sd_lun	*un = NULL;
23274 	int		mh_time;
23275 	int		rval = 0;
23276 
23277 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23278 		return (ENXIO);
23279 	}
23280 
23281 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
23282 		return (EFAULT);
23283 
23284 	if (mh_time) {
23285 		mutex_enter(SD_MUTEX(un));
23286 		un->un_resvd_status |= SD_FAILFAST;
23287 		mutex_exit(SD_MUTEX(un));
23288 		/*
23289 		 * If mh_time is INT_MAX, then this ioctl is being used for
23290 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
23291 		 */
23292 		if (mh_time != INT_MAX) {
23293 			rval = sd_check_mhd(dev, mh_time);
23294 		}
23295 	} else {
23296 		(void) sd_check_mhd(dev, 0);
23297 		mutex_enter(SD_MUTEX(un));
23298 		un->un_resvd_status &= ~SD_FAILFAST;
23299 		mutex_exit(SD_MUTEX(un));
23300 	}
23301 	return (rval);
23302 }
23303 
23304 
23305 /*
23306  *    Function: sd_mhdioc_takeown
23307  *
23308  * Description: This routine is the driver entry point for handling ioctl
23309  *		requests to forcefully acquire exclusive access rights to the
23310  *		multihost disk (MHIOCTKOWN).
23311  *
23312  *   Arguments: dev	- the device number
23313  *		arg	- user provided structure specifying the delay
23314  *			  parameters in milliseconds
23315  *		flag	- this argument is a pass through to ddi_copyxxx()
23316  *			  directly from the mode argument of ioctl().
23317  *
23318  * Return Code: 0
23319  *		EFAULT
23320  *		ENXIO
23321  */
23322 
23323 static int
23324 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
23325 {
23326 	struct sd_lun		*un = NULL;
23327 	struct mhioctkown	*tkown = NULL;
23328 	int			rval = 0;
23329 
23330 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23331 		return (ENXIO);
23332 	}
23333 
23334 	if (arg != NULL) {
23335 		tkown = (struct mhioctkown *)
23336 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
23337 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
23338 		if (rval != 0) {
23339 			rval = EFAULT;
23340 			goto error;
23341 		}
23342 	}
23343 
23344 	rval = sd_take_ownership(dev, tkown);
23345 	mutex_enter(SD_MUTEX(un));
23346 	if (rval == 0) {
23347 		un->un_resvd_status |= SD_RESERVE;
23348 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
23349 			sd_reinstate_resv_delay =
23350 			    tkown->reinstate_resv_delay * 1000;
23351 		} else {
23352 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
23353 		}
23354 		/*
23355 		 * Give the scsi_watch routine interval set by
23356 		 * the MHIOCENFAILFAST ioctl precedence here.
23357 		 */
23358 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
23359 			mutex_exit(SD_MUTEX(un));
23360 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
23361 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
23362 			    "sd_mhdioc_takeown : %d\n",
23363 			    sd_reinstate_resv_delay);
23364 		} else {
23365 			mutex_exit(SD_MUTEX(un));
23366 		}
23367 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
23368 		    sd_mhd_reset_notify_cb, (caddr_t)un);
23369 	} else {
23370 		un->un_resvd_status &= ~SD_RESERVE;
23371 		mutex_exit(SD_MUTEX(un));
23372 	}
23373 
23374 error:
23375 	if (tkown != NULL) {
23376 		kmem_free(tkown, sizeof (struct mhioctkown));
23377 	}
23378 	return (rval);
23379 }
23380 
23381 
23382 /*
23383  *    Function: sd_mhdioc_release
23384  *
23385  * Description: This routine is the driver entry point for handling ioctl
23386  *		requests to release exclusive access rights to the multihost
23387  *		disk (MHIOCRELEASE).
23388  *
23389  *   Arguments: dev	- the device number
23390  *
23391  * Return Code: 0
23392  *		ENXIO
23393  */
23394 
23395 static int
23396 sd_mhdioc_release(dev_t dev)
23397 {
23398 	struct sd_lun		*un = NULL;
23399 	timeout_id_t		resvd_timeid_save;
23400 	int			resvd_status_save;
23401 	int			rval = 0;
23402 
23403 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23404 		return (ENXIO);
23405 	}
23406 
23407 	mutex_enter(SD_MUTEX(un));
23408 	resvd_status_save = un->un_resvd_status;
23409 	un->un_resvd_status &=
23410 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
23411 	if (un->un_resvd_timeid) {
23412 		resvd_timeid_save = un->un_resvd_timeid;
23413 		un->un_resvd_timeid = NULL;
23414 		mutex_exit(SD_MUTEX(un));
23415 		(void) untimeout(resvd_timeid_save);
23416 	} else {
23417 		mutex_exit(SD_MUTEX(un));
23418 	}
23419 
23420 	/*
23421 	 * destroy any pending timeout thread that may be attempting to
23422 	 * reinstate reservation on this device.
23423 	 */
23424 	sd_rmv_resv_reclaim_req(dev);
23425 
23426 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
23427 		mutex_enter(SD_MUTEX(un));
23428 		if ((un->un_mhd_token) &&
23429 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
23430 			mutex_exit(SD_MUTEX(un));
23431 			(void) sd_check_mhd(dev, 0);
23432 		} else {
23433 			mutex_exit(SD_MUTEX(un));
23434 		}
23435 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
23436 		    sd_mhd_reset_notify_cb, (caddr_t)un);
23437 	} else {
23438 		/*
23439 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
23440 		 */
23441 		mutex_enter(SD_MUTEX(un));
23442 		un->un_resvd_status = resvd_status_save;
23443 		mutex_exit(SD_MUTEX(un));
23444 	}
23445 	return (rval);
23446 }
23447 
23448 
23449 /*
23450  *    Function: sd_mhdioc_register_devid
23451  *
23452  * Description: This routine is the driver entry point for handling ioctl
23453  *		requests to register the device id (MHIOCREREGISTERDEVID).
23454  *
23455  *		Note: The implementation for this ioctl has been updated to
23456  *		be consistent with the original PSARC case (1999/357)
23457  *		(4375899, 4241671, 4220005)
23458  *
23459  *   Arguments: dev	- the device number
23460  *
23461  * Return Code: 0
23462  *		ENXIO
23463  */
23464 
23465 static int
23466 sd_mhdioc_register_devid(dev_t dev)
23467 {
23468 	struct sd_lun	*un = NULL;
23469 	int		rval = 0;
23470 	sd_ssc_t	*ssc;
23471 
23472 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23473 		return (ENXIO);
23474 	}
23475 
23476 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23477 
23478 	mutex_enter(SD_MUTEX(un));
23479 
23480 	/* If a devid already exists, de-register it */
23481 	if (un->un_devid != NULL) {
23482 		ddi_devid_unregister(SD_DEVINFO(un));
23483 		/*
23484 		 * After unregister devid, needs to free devid memory
23485 		 */
23486 		ddi_devid_free(un->un_devid);
23487 		un->un_devid = NULL;
23488 	}
23489 
23490 	/* Check for reservation conflict */
23491 	mutex_exit(SD_MUTEX(un));
23492 	ssc = sd_ssc_init(un);
23493 	rval = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
23494 	mutex_enter(SD_MUTEX(un));
23495 
23496 	switch (rval) {
23497 	case 0:
23498 		sd_register_devid(ssc, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
23499 		break;
23500 	case EACCES:
23501 		break;
23502 	default:
23503 		rval = EIO;
23504 	}
23505 
23506 	mutex_exit(SD_MUTEX(un));
23507 	if (rval != 0) {
23508 		if (rval == EIO)
23509 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23510 		else
23511 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23512 	}
23513 	sd_ssc_fini(ssc);
23514 	return (rval);
23515 }
23516 
23517 
23518 /*
23519  *    Function: sd_mhdioc_inkeys
23520  *
23521  * Description: This routine is the driver entry point for handling ioctl
23522  *		requests to issue the SCSI-3 Persistent In Read Keys command
23523  *		to the device (MHIOCGRP_INKEYS).
23524  *
23525  *   Arguments: dev	- the device number
23526  *		arg	- user provided in_keys structure
23527  *		flag	- this argument is a pass through to ddi_copyxxx()
23528  *			  directly from the mode argument of ioctl().
23529  *
23530  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
23531  *		ENXIO
23532  *		EFAULT
23533  */
23534 
23535 static int
23536 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
23537 {
23538 	struct sd_lun		*un;
23539 	mhioc_inkeys_t		inkeys;
23540 	int			rval = 0;
23541 
23542 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23543 		return (ENXIO);
23544 	}
23545 
23546 #ifdef _MULTI_DATAMODEL
23547 	switch (ddi_model_convert_from(flag & FMODELS)) {
23548 	case DDI_MODEL_ILP32: {
23549 		struct mhioc_inkeys32	inkeys32;
23550 
23551 		if (ddi_copyin(arg, &inkeys32,
23552 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
23553 			return (EFAULT);
23554 		}
23555 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
23556 		if ((rval = sd_persistent_reservation_in_read_keys(un,
23557 		    &inkeys, flag)) != 0) {
23558 			return (rval);
23559 		}
23560 		inkeys32.generation = inkeys.generation;
23561 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
23562 		    flag) != 0) {
23563 			return (EFAULT);
23564 		}
23565 		break;
23566 	}
23567 	case DDI_MODEL_NONE:
23568 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
23569 		    flag) != 0) {
23570 			return (EFAULT);
23571 		}
23572 		if ((rval = sd_persistent_reservation_in_read_keys(un,
23573 		    &inkeys, flag)) != 0) {
23574 			return (rval);
23575 		}
23576 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
23577 		    flag) != 0) {
23578 			return (EFAULT);
23579 		}
23580 		break;
23581 	}
23582 
23583 #else /* ! _MULTI_DATAMODEL */
23584 
23585 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
23586 		return (EFAULT);
23587 	}
23588 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
23589 	if (rval != 0) {
23590 		return (rval);
23591 	}
23592 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
23593 		return (EFAULT);
23594 	}
23595 
23596 #endif /* _MULTI_DATAMODEL */
23597 
23598 	return (rval);
23599 }
23600 
23601 
23602 /*
23603  *    Function: sd_mhdioc_inresv
23604  *
23605  * Description: This routine is the driver entry point for handling ioctl
23606  *		requests to issue the SCSI-3 Persistent In Read Reservations
23607  *		command to the device (MHIOCGRP_INKEYS).
23608  *
23609  *   Arguments: dev	- the device number
23610  *		arg	- user provided in_resv structure
23611  *		flag	- this argument is a pass through to ddi_copyxxx()
23612  *			  directly from the mode argument of ioctl().
23613  *
23614  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
23615  *		ENXIO
23616  *		EFAULT
23617  */
23618 
23619 static int
23620 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
23621 {
23622 	struct sd_lun		*un;
23623 	mhioc_inresvs_t		inresvs;
23624 	int			rval = 0;
23625 
23626 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23627 		return (ENXIO);
23628 	}
23629 
23630 #ifdef _MULTI_DATAMODEL
23631 
23632 	switch (ddi_model_convert_from(flag & FMODELS)) {
23633 	case DDI_MODEL_ILP32: {
23634 		struct mhioc_inresvs32	inresvs32;
23635 
23636 		if (ddi_copyin(arg, &inresvs32,
23637 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
23638 			return (EFAULT);
23639 		}
23640 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
23641 		if ((rval = sd_persistent_reservation_in_read_resv(un,
23642 		    &inresvs, flag)) != 0) {
23643 			return (rval);
23644 		}
23645 		inresvs32.generation = inresvs.generation;
23646 		if (ddi_copyout(&inresvs32, arg,
23647 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
23648 			return (EFAULT);
23649 		}
23650 		break;
23651 	}
23652 	case DDI_MODEL_NONE:
23653 		if (ddi_copyin(arg, &inresvs,
23654 		    sizeof (mhioc_inresvs_t), flag) != 0) {
23655 			return (EFAULT);
23656 		}
23657 		if ((rval = sd_persistent_reservation_in_read_resv(un,
23658 		    &inresvs, flag)) != 0) {
23659 			return (rval);
23660 		}
23661 		if (ddi_copyout(&inresvs, arg,
23662 		    sizeof (mhioc_inresvs_t), flag) != 0) {
23663 			return (EFAULT);
23664 		}
23665 		break;
23666 	}
23667 
23668 #else /* ! _MULTI_DATAMODEL */
23669 
23670 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
23671 		return (EFAULT);
23672 	}
23673 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
23674 	if (rval != 0) {
23675 		return (rval);
23676 	}
23677 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
23678 		return (EFAULT);
23679 	}
23680 
23681 #endif /* ! _MULTI_DATAMODEL */
23682 
23683 	return (rval);
23684 }
23685 
23686 
23687 /*
23688  * The following routines support the clustering functionality described below
23689  * and implement lost reservation reclaim functionality.
23690  *
23691  * Clustering
23692  * ----------
23693  * The clustering code uses two different, independent forms of SCSI
23694  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
23695  * Persistent Group Reservations. For any particular disk, it will use either
23696  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
23697  *
23698  * SCSI-2
23699  * The cluster software takes ownership of a multi-hosted disk by issuing the
23700  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
23701  * MHIOCRELEASE ioctl.  Closely related is the MHIOCENFAILFAST ioctl -- a
23702  * cluster, just after taking ownership of the disk with the MHIOCTKOWN ioctl
23703  * then issues the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the
23704  * driver. The meaning of failfast is that if the driver (on this host) ever
23705  * encounters the scsi error return code RESERVATION_CONFLICT from the device,
23706  * it should immediately panic the host. The motivation for this ioctl is that
23707  * if this host does encounter reservation conflict, the underlying cause is
23708  * that some other host of the cluster has decided that this host is no longer
23709  * in the cluster and has seized control of the disks for itself. Since this
23710  * host is no longer in the cluster, it ought to panic itself. The
23711  * MHIOCENFAILFAST ioctl does two things:
23712  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
23713  *      error to panic the host
23714  *      (b) it sets up a periodic timer to test whether this host still has
23715  *      "access" (in that no other host has reserved the device):  if the
23716  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
23717  *      purpose of that periodic timer is to handle scenarios where the host is
23718  *      otherwise temporarily quiescent, temporarily doing no real i/o.
23719  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
23720  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
23721  * the device itself.
23722  *
23723  * SCSI-3 PGR
23724  * A direct semantic implementation of the SCSI-3 Persistent Reservation
23725  * facility is supported through the shared multihost disk ioctls
23726  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
23727  * MHIOCGRP_PREEMPTANDABORT)
23728  *
23729  * Reservation Reclaim:
23730  * --------------------
23731  * To support the lost reservation reclaim operations this driver creates a
23732  * single thread to handle reinstating reservations on all devices that have
23733  * lost reservations sd_resv_reclaim_requests are logged for all devices that
23734  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
23735  * and the reservation reclaim thread loops through the requests to regain the
23736  * lost reservations.
23737  */
23738 
23739 /*
23740  *    Function: sd_check_mhd()
23741  *
23742  * Description: This function sets up and submits a scsi watch request or
23743  *		terminates an existing watch request. This routine is used in
23744  *		support of reservation reclaim.
23745  *
23746  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
23747  *			 among multiple watches that share the callback function
23748  *		interval - the number of microseconds specifying the watch
23749  *			   interval for issuing TEST UNIT READY commands. If
23750  *			   set to 0 the watch should be terminated. If the
23751  *			   interval is set to 0 and if the device is required
23752  *			   to hold reservation while disabling failfast, the
23753  *			   watch is restarted with an interval of
23754  *			   reinstate_resv_delay.
23755  *
23756  * Return Code: 0	   - Successful submit/terminate of scsi watch request
23757  *		ENXIO      - Indicates an invalid device was specified
23758  *		EAGAIN     - Unable to submit the scsi watch request
23759  */
23760 
23761 static int
23762 sd_check_mhd(dev_t dev, int interval)
23763 {
23764 	struct sd_lun	*un;
23765 	opaque_t	token;
23766 
23767 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23768 		return (ENXIO);
23769 	}
23770 
23771 	/* is this a watch termination request? */
23772 	if (interval == 0) {
23773 		mutex_enter(SD_MUTEX(un));
23774 		/* if there is an existing watch task then terminate it */
23775 		if (un->un_mhd_token) {
23776 			token = un->un_mhd_token;
23777 			un->un_mhd_token = NULL;
23778 			mutex_exit(SD_MUTEX(un));
23779 			(void) scsi_watch_request_terminate(token,
23780 			    SCSI_WATCH_TERMINATE_ALL_WAIT);
23781 			mutex_enter(SD_MUTEX(un));
23782 		} else {
23783 			mutex_exit(SD_MUTEX(un));
23784 			/*
23785 			 * Note: If we return here we don't check for the
23786 			 * failfast case. This is the original legacy
23787 			 * implementation but perhaps we should be checking
23788 			 * the failfast case.
23789 			 */
23790 			return (0);
23791 		}
23792 		/*
23793 		 * If the device is required to hold reservation while
23794 		 * disabling failfast, we need to restart the scsi_watch
23795 		 * routine with an interval of reinstate_resv_delay.
23796 		 */
23797 		if (un->un_resvd_status & SD_RESERVE) {
23798 			interval = sd_reinstate_resv_delay/1000;
23799 		} else {
23800 			/* no failfast so bail */
23801 			mutex_exit(SD_MUTEX(un));
23802 			return (0);
23803 		}
23804 		mutex_exit(SD_MUTEX(un));
23805 	}
23806 
23807 	/*
23808 	 * adjust minimum time interval to 1 second,
23809 	 * and convert from msecs to usecs
23810 	 */
23811 	if (interval > 0 && interval < 1000) {
23812 		interval = 1000;
23813 	}
23814 	interval *= 1000;
23815 
23816 	/*
23817 	 * submit the request to the scsi_watch service
23818 	 */
23819 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
23820 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
23821 	if (token == NULL) {
23822 		return (EAGAIN);
23823 	}
23824 
23825 	/*
23826 	 * save token for termination later on
23827 	 */
23828 	mutex_enter(SD_MUTEX(un));
23829 	un->un_mhd_token = token;
23830 	mutex_exit(SD_MUTEX(un));
23831 	return (0);
23832 }
23833 
23834 
23835 /*
23836  *    Function: sd_mhd_watch_cb()
23837  *
23838  * Description: This function is the call back function used by the scsi watch
23839  *		facility. The scsi watch facility sends the "Test Unit Ready"
23840  *		and processes the status. If applicable (i.e. a "Unit Attention"
23841  *		status and automatic "Request Sense" not used) the scsi watch
23842  *		facility will send a "Request Sense" and retrieve the sense data
23843  *		to be passed to this callback function. In either case the
23844  *		automatic "Request Sense" or the facility submitting one, this
23845  *		callback is passed the status and sense data.
23846  *
23847  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
23848  *			among multiple watches that share this callback function
23849  *		resultp - scsi watch facility result packet containing scsi
23850  *			  packet, status byte and sense data
23851  *
23852  * Return Code: 0 - continue the watch task
23853  *		non-zero - terminate the watch task
23854  */
23855 
23856 static int
23857 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
23858 {
23859 	struct sd_lun			*un;
23860 	struct scsi_status		*statusp;
23861 	uint8_t				*sensep;
23862 	struct scsi_pkt			*pkt;
23863 	uchar_t				actual_sense_length;
23864 	dev_t  				dev = (dev_t)arg;
23865 
23866 	ASSERT(resultp != NULL);
23867 	statusp			= resultp->statusp;
23868 	sensep			= (uint8_t *)resultp->sensep;
23869 	pkt			= resultp->pkt;
23870 	actual_sense_length	= resultp->actual_sense_length;
23871 
23872 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23873 		return (ENXIO);
23874 	}
23875 
23876 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
23877 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
23878 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
23879 
23880 	/* Begin processing of the status and/or sense data */
23881 	if (pkt->pkt_reason != CMD_CMPLT) {
23882 		/* Handle the incomplete packet */
23883 		sd_mhd_watch_incomplete(un, pkt);
23884 		return (0);
23885 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
23886 		if (*((unsigned char *)statusp)
23887 		    == STATUS_RESERVATION_CONFLICT) {
23888 			/*
23889 			 * Handle a reservation conflict by panicking if
23890 			 * configured for failfast or by logging the conflict
23891 			 * and updating the reservation status
23892 			 */
23893 			mutex_enter(SD_MUTEX(un));
23894 			if ((un->un_resvd_status & SD_FAILFAST) &&
23895 			    (sd_failfast_enable)) {
23896 				sd_panic_for_res_conflict(un);
23897 				/*NOTREACHED*/
23898 			}
23899 			SD_INFO(SD_LOG_IOCTL_MHD, un,
23900 			    "sd_mhd_watch_cb: Reservation Conflict\n");
23901 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
23902 			mutex_exit(SD_MUTEX(un));
23903 		}
23904 	}
23905 
23906 	if (sensep != NULL) {
23907 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
23908 			mutex_enter(SD_MUTEX(un));
23909 			if ((scsi_sense_asc(sensep) ==
23910 			    SD_SCSI_RESET_SENSE_CODE) &&
23911 			    (un->un_resvd_status & SD_RESERVE)) {
23912 				/*
23913 				 * The additional sense code indicates a power
23914 				 * on or bus device reset has occurred; update
23915 				 * the reservation status.
23916 				 */
23917 				un->un_resvd_status |=
23918 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
23919 				SD_INFO(SD_LOG_IOCTL_MHD, un,
23920 				    "sd_mhd_watch_cb: Lost Reservation\n");
23921 			}
23922 		} else {
23923 			return (0);
23924 		}
23925 	} else {
23926 		mutex_enter(SD_MUTEX(un));
23927 	}
23928 
23929 	if ((un->un_resvd_status & SD_RESERVE) &&
23930 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
23931 		if (un->un_resvd_status & SD_WANT_RESERVE) {
23932 			/*
23933 			 * A reset occurred in between the last probe and this
23934 			 * one so if a timeout is pending cancel it.
23935 			 */
23936 			if (un->un_resvd_timeid) {
23937 				timeout_id_t temp_id = un->un_resvd_timeid;
23938 				un->un_resvd_timeid = NULL;
23939 				mutex_exit(SD_MUTEX(un));
23940 				(void) untimeout(temp_id);
23941 				mutex_enter(SD_MUTEX(un));
23942 			}
23943 			un->un_resvd_status &= ~SD_WANT_RESERVE;
23944 		}
23945 		if (un->un_resvd_timeid == 0) {
23946 			/* Schedule a timeout to handle the lost reservation */
23947 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
23948 			    (void *)dev,
23949 			    drv_usectohz(sd_reinstate_resv_delay));
23950 		}
23951 	}
23952 	mutex_exit(SD_MUTEX(un));
23953 	return (0);
23954 }
23955 
23956 
23957 /*
23958  *    Function: sd_mhd_watch_incomplete()
23959  *
23960  * Description: This function is used to find out why a scsi pkt sent by the
23961  *		scsi watch facility was not completed. Under some scenarios this
23962  *		routine will return. Otherwise it will send a bus reset to see
23963  *		if the drive is still online.
23964  *
23965  *   Arguments: un  - driver soft state (unit) structure
23966  *		pkt - incomplete scsi pkt
23967  */
23968 
23969 static void
23970 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
23971 {
23972 	int	be_chatty;
23973 	int	perr;
23974 
23975 	ASSERT(pkt != NULL);
23976 	ASSERT(un != NULL);
23977 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
23978 	perr		= (pkt->pkt_statistics & STAT_PERR);
23979 
23980 	mutex_enter(SD_MUTEX(un));
23981 	if (un->un_state == SD_STATE_DUMPING) {
23982 		mutex_exit(SD_MUTEX(un));
23983 		return;
23984 	}
23985 
23986 	switch (pkt->pkt_reason) {
23987 	case CMD_UNX_BUS_FREE:
23988 		/*
23989 		 * If we had a parity error that caused the target to drop BSY*,
23990 		 * don't be chatty about it.
23991 		 */
23992 		if (perr && be_chatty) {
23993 			be_chatty = 0;
23994 		}
23995 		break;
23996 	case CMD_TAG_REJECT:
23997 		/*
23998 		 * The SCSI-2 spec states that a tag reject will be sent by the
23999 		 * target if tagged queuing is not supported. A tag reject may
24000 		 * also be sent during certain initialization periods or to
24001 		 * control internal resources. For the latter case the target
24002 		 * may also return Queue Full.
24003 		 *
24004 		 * If this driver receives a tag reject from a target that is
24005 		 * going through an init period or controlling internal
24006 		 * resources tagged queuing will be disabled. This is a less
24007 		 * than optimal behavior but the driver is unable to determine
24008 		 * the target state and assumes tagged queueing is not supported
24009 		 */
24010 		pkt->pkt_flags = 0;
24011 		un->un_tagflags = 0;
24012 
24013 		if (un->un_f_opt_queueing == TRUE) {
24014 			un->un_throttle = min(un->un_throttle, 3);
24015 		} else {
24016 			un->un_throttle = 1;
24017 		}
24018 		mutex_exit(SD_MUTEX(un));
24019 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
24020 		mutex_enter(SD_MUTEX(un));
24021 		break;
24022 	case CMD_INCOMPLETE:
24023 		/*
24024 		 * The transport stopped with an abnormal state, fallthrough and
24025 		 * reset the target and/or bus unless selection did not complete
24026 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
24027 		 * go through a target/bus reset
24028 		 */
24029 		if (pkt->pkt_state == STATE_GOT_BUS) {
24030 			break;
24031 		}
24032 		/*FALLTHROUGH*/
24033 
24034 	case CMD_TIMEOUT:
24035 	default:
24036 		/*
24037 		 * The lun may still be running the command, so a lun reset
24038 		 * should be attempted. If the lun reset fails or cannot be
24039 		 * issued, than try a target reset. Lastly try a bus reset.
24040 		 */
24041 		if ((pkt->pkt_statistics &
24042 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
24043 			int reset_retval = 0;
24044 			mutex_exit(SD_MUTEX(un));
24045 			if (un->un_f_allow_bus_device_reset == TRUE) {
24046 				if (un->un_f_lun_reset_enabled == TRUE) {
24047 					reset_retval =
24048 					    scsi_reset(SD_ADDRESS(un),
24049 					    RESET_LUN);
24050 				}
24051 				if (reset_retval == 0) {
24052 					reset_retval =
24053 					    scsi_reset(SD_ADDRESS(un),
24054 					    RESET_TARGET);
24055 				}
24056 			}
24057 			if (reset_retval == 0) {
24058 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
24059 			}
24060 			mutex_enter(SD_MUTEX(un));
24061 		}
24062 		break;
24063 	}
24064 
24065 	/* A device/bus reset has occurred; update the reservation status. */
24066 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
24067 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
24068 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
24069 			un->un_resvd_status |=
24070 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
24071 			SD_INFO(SD_LOG_IOCTL_MHD, un,
24072 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
24073 		}
24074 	}
24075 
24076 	/*
24077 	 * The disk has been turned off; Update the device state.
24078 	 *
24079 	 * Note: Should we be offlining the disk here?
24080 	 */
24081 	if (pkt->pkt_state == STATE_GOT_BUS) {
24082 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
24083 		    "Disk not responding to selection\n");
24084 		if (un->un_state != SD_STATE_OFFLINE) {
24085 			New_state(un, SD_STATE_OFFLINE);
24086 		}
24087 	} else if (be_chatty) {
24088 		/*
24089 		 * suppress messages if they are all the same pkt reason;
24090 		 * with TQ, many (up to 256) are returned with the same
24091 		 * pkt_reason
24092 		 */
24093 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
24094 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
24095 			    "sd_mhd_watch_incomplete: "
24096 			    "SCSI transport failed: reason '%s'\n",
24097 			    scsi_rname(pkt->pkt_reason));
24098 		}
24099 	}
24100 	un->un_last_pkt_reason = pkt->pkt_reason;
24101 	mutex_exit(SD_MUTEX(un));
24102 }
24103 
24104 
24105 /*
24106  *    Function: sd_sname()
24107  *
24108  * Description: This is a simple little routine to return a string containing
24109  *		a printable description of command status byte for use in
24110  *		logging.
24111  *
24112  *   Arguments: status - pointer to a status byte
24113  *
24114  * Return Code: char * - string containing status description.
24115  */
24116 
24117 static char *
24118 sd_sname(uchar_t status)
24119 {
24120 	switch (status & STATUS_MASK) {
24121 	case STATUS_GOOD:
24122 		return ("good status");
24123 	case STATUS_CHECK:
24124 		return ("check condition");
24125 	case STATUS_MET:
24126 		return ("condition met");
24127 	case STATUS_BUSY:
24128 		return ("busy");
24129 	case STATUS_INTERMEDIATE:
24130 		return ("intermediate");
24131 	case STATUS_INTERMEDIATE_MET:
24132 		return ("intermediate - condition met");
24133 	case STATUS_RESERVATION_CONFLICT:
24134 		return ("reservation_conflict");
24135 	case STATUS_TERMINATED:
24136 		return ("command terminated");
24137 	case STATUS_QFULL:
24138 		return ("queue full");
24139 	default:
24140 		return ("<unknown status>");
24141 	}
24142 }
24143 
24144 
24145 /*
24146  *    Function: sd_mhd_resvd_recover()
24147  *
24148  * Description: This function adds a reservation entry to the
24149  *		sd_resv_reclaim_request list and signals the reservation
24150  *		reclaim thread that there is work pending. If the reservation
24151  *		reclaim thread has not been previously created this function
24152  *		will kick it off.
24153  *
24154  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24155  *			among multiple watches that share this callback function
24156  *
24157  *     Context: This routine is called by timeout() and is run in interrupt
24158  *		context. It must not sleep or call other functions which may
24159  *		sleep.
24160  */
24161 
24162 static void
24163 sd_mhd_resvd_recover(void *arg)
24164 {
24165 	dev_t			dev = (dev_t)arg;
24166 	struct sd_lun		*un;
24167 	struct sd_thr_request	*sd_treq = NULL;
24168 	struct sd_thr_request	*sd_cur = NULL;
24169 	struct sd_thr_request	*sd_prev = NULL;
24170 	int			already_there = 0;
24171 
24172 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24173 		return;
24174 	}
24175 
24176 	mutex_enter(SD_MUTEX(un));
24177 	un->un_resvd_timeid = NULL;
24178 	if (un->un_resvd_status & SD_WANT_RESERVE) {
24179 		/*
24180 		 * There was a reset so don't issue the reserve, allow the
24181 		 * sd_mhd_watch_cb callback function to notice this and
24182 		 * reschedule the timeout for reservation.
24183 		 */
24184 		mutex_exit(SD_MUTEX(un));
24185 		return;
24186 	}
24187 	mutex_exit(SD_MUTEX(un));
24188 
24189 	/*
24190 	 * Add this device to the sd_resv_reclaim_request list and the
24191 	 * sd_resv_reclaim_thread should take care of the rest.
24192 	 *
24193 	 * Note: We can't sleep in this context so if the memory allocation
24194 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
24195 	 * reschedule the timeout for reservation.  (4378460)
24196 	 */
24197 	sd_treq = (struct sd_thr_request *)
24198 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
24199 	if (sd_treq == NULL) {
24200 		return;
24201 	}
24202 
24203 	sd_treq->sd_thr_req_next = NULL;
24204 	sd_treq->dev = dev;
24205 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24206 	if (sd_tr.srq_thr_req_head == NULL) {
24207 		sd_tr.srq_thr_req_head = sd_treq;
24208 	} else {
24209 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
24210 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
24211 			if (sd_cur->dev == dev) {
24212 				/*
24213 				 * already in Queue so don't log
24214 				 * another request for the device
24215 				 */
24216 				already_there = 1;
24217 				break;
24218 			}
24219 			sd_prev = sd_cur;
24220 		}
24221 		if (!already_there) {
24222 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
24223 			    "logging request for %lx\n", dev);
24224 			sd_prev->sd_thr_req_next = sd_treq;
24225 		} else {
24226 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
24227 		}
24228 	}
24229 
24230 	/*
24231 	 * Create a kernel thread to do the reservation reclaim and free up this
24232 	 * thread. We cannot block this thread while we go away to do the
24233 	 * reservation reclaim
24234 	 */
24235 	if (sd_tr.srq_resv_reclaim_thread == NULL)
24236 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
24237 		    sd_resv_reclaim_thread, NULL,
24238 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
24239 
24240 	/* Tell the reservation reclaim thread that it has work to do */
24241 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
24242 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24243 }
24244 
24245 /*
24246  *    Function: sd_resv_reclaim_thread()
24247  *
24248  * Description: This function implements the reservation reclaim operations
24249  *
24250  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
24251  *		      among multiple watches that share this callback function
24252  */
24253 
24254 static void
24255 sd_resv_reclaim_thread()
24256 {
24257 	struct sd_lun		*un;
24258 	struct sd_thr_request	*sd_mhreq;
24259 
24260 	/* Wait for work */
24261 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24262 	if (sd_tr.srq_thr_req_head == NULL) {
24263 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
24264 		    &sd_tr.srq_resv_reclaim_mutex);
24265 	}
24266 
24267 	/* Loop while we have work */
24268 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
24269 		un = ddi_get_soft_state(sd_state,
24270 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
24271 		if (un == NULL) {
24272 			/*
24273 			 * softstate structure is NULL so just
24274 			 * dequeue the request and continue
24275 			 */
24276 			sd_tr.srq_thr_req_head =
24277 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
24278 			kmem_free(sd_tr.srq_thr_cur_req,
24279 			    sizeof (struct sd_thr_request));
24280 			continue;
24281 		}
24282 
24283 		/* dequeue the request */
24284 		sd_mhreq = sd_tr.srq_thr_cur_req;
24285 		sd_tr.srq_thr_req_head =
24286 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
24287 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24288 
24289 		/*
24290 		 * Reclaim reservation only if SD_RESERVE is still set. There
24291 		 * may have been a call to MHIOCRELEASE before we got here.
24292 		 */
24293 		mutex_enter(SD_MUTEX(un));
24294 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
24295 			/*
24296 			 * Note: The SD_LOST_RESERVE flag is cleared before
24297 			 * reclaiming the reservation. If this is done after the
24298 			 * call to sd_reserve_release a reservation loss in the
24299 			 * window between pkt completion of reserve cmd and
24300 			 * mutex_enter below may not be recognized
24301 			 */
24302 			un->un_resvd_status &= ~SD_LOST_RESERVE;
24303 			mutex_exit(SD_MUTEX(un));
24304 
24305 			if (sd_reserve_release(sd_mhreq->dev,
24306 			    SD_RESERVE) == 0) {
24307 				mutex_enter(SD_MUTEX(un));
24308 				un->un_resvd_status |= SD_RESERVE;
24309 				mutex_exit(SD_MUTEX(un));
24310 				SD_INFO(SD_LOG_IOCTL_MHD, un,
24311 				    "sd_resv_reclaim_thread: "
24312 				    "Reservation Recovered\n");
24313 			} else {
24314 				mutex_enter(SD_MUTEX(un));
24315 				un->un_resvd_status |= SD_LOST_RESERVE;
24316 				mutex_exit(SD_MUTEX(un));
24317 				SD_INFO(SD_LOG_IOCTL_MHD, un,
24318 				    "sd_resv_reclaim_thread: Failed "
24319 				    "Reservation Recovery\n");
24320 			}
24321 		} else {
24322 			mutex_exit(SD_MUTEX(un));
24323 		}
24324 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24325 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
24326 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
24327 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
24328 		/*
24329 		 * wakeup the destroy thread if anyone is waiting on
24330 		 * us to complete.
24331 		 */
24332 		cv_signal(&sd_tr.srq_inprocess_cv);
24333 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
24334 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
24335 	}
24336 
24337 	/*
24338 	 * cleanup the sd_tr structure now that this thread will not exist
24339 	 */
24340 	ASSERT(sd_tr.srq_thr_req_head == NULL);
24341 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
24342 	sd_tr.srq_resv_reclaim_thread = NULL;
24343 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24344 	thread_exit();
24345 }
24346 
24347 
24348 /*
24349  *    Function: sd_rmv_resv_reclaim_req()
24350  *
24351  * Description: This function removes any pending reservation reclaim requests
24352  *		for the specified device.
24353  *
24354  *   Arguments: dev - the device 'dev_t'
24355  */
24356 
24357 static void
24358 sd_rmv_resv_reclaim_req(dev_t dev)
24359 {
24360 	struct sd_thr_request *sd_mhreq;
24361 	struct sd_thr_request *sd_prev;
24362 
24363 	/* Remove a reservation reclaim request from the list */
24364 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24365 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
24366 		/*
24367 		 * We are attempting to reinstate reservation for
24368 		 * this device. We wait for sd_reserve_release()
24369 		 * to return before we return.
24370 		 */
24371 		cv_wait(&sd_tr.srq_inprocess_cv,
24372 		    &sd_tr.srq_resv_reclaim_mutex);
24373 	} else {
24374 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
24375 		if (sd_mhreq && sd_mhreq->dev == dev) {
24376 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
24377 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
24378 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24379 			return;
24380 		}
24381 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
24382 			if (sd_mhreq && sd_mhreq->dev == dev) {
24383 				break;
24384 			}
24385 			sd_prev = sd_mhreq;
24386 		}
24387 		if (sd_mhreq != NULL) {
24388 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
24389 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
24390 		}
24391 	}
24392 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24393 }
24394 
24395 
24396 /*
24397  *    Function: sd_mhd_reset_notify_cb()
24398  *
24399  * Description: This is a call back function for scsi_reset_notify. This
24400  *		function updates the softstate reserved status and logs the
24401  *		reset. The driver scsi watch facility callback function
24402  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
24403  *		will reclaim the reservation.
24404  *
24405  *   Arguments: arg  - driver soft state (unit) structure
24406  */
24407 
24408 static void
24409 sd_mhd_reset_notify_cb(caddr_t arg)
24410 {
24411 	struct sd_lun *un = (struct sd_lun *)arg;
24412 
24413 	mutex_enter(SD_MUTEX(un));
24414 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
24415 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
24416 		SD_INFO(SD_LOG_IOCTL_MHD, un,
24417 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
24418 	}
24419 	mutex_exit(SD_MUTEX(un));
24420 }
24421 
24422 
24423 /*
24424  *    Function: sd_take_ownership()
24425  *
24426  * Description: This routine implements an algorithm to achieve a stable
24427  *		reservation on disks which don't implement priority reserve,
24428  *		and makes sure that other host lose re-reservation attempts.
24429  *		This algorithm contains of a loop that keeps issuing the RESERVE
24430  *		for some period of time (min_ownership_delay, default 6 seconds)
24431  *		During that loop, it looks to see if there has been a bus device
24432  *		reset or bus reset (both of which cause an existing reservation
24433  *		to be lost). If the reservation is lost issue RESERVE until a
24434  *		period of min_ownership_delay with no resets has gone by, or
24435  *		until max_ownership_delay has expired. This loop ensures that
24436  *		the host really did manage to reserve the device, in spite of
24437  *		resets. The looping for min_ownership_delay (default six
24438  *		seconds) is important to early generation clustering products,
24439  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
24440  *		MHIOCENFAILFAST periodic timer of two seconds. By having
24441  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
24442  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
24443  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
24444  *		have already noticed, via the MHIOCENFAILFAST polling, that it
24445  *		no longer "owns" the disk and will have panicked itself.  Thus,
24446  *		the host issuing the MHIOCTKOWN is assured (with timing
24447  *		dependencies) that by the time it actually starts to use the
24448  *		disk for real work, the old owner is no longer accessing it.
24449  *
24450  *		min_ownership_delay is the minimum amount of time for which the
24451  *		disk must be reserved continuously devoid of resets before the
24452  *		MHIOCTKOWN ioctl will return success.
24453  *
24454  *		max_ownership_delay indicates the amount of time by which the
24455  *		take ownership should succeed or timeout with an error.
24456  *
24457  *   Arguments: dev - the device 'dev_t'
24458  *		*p  - struct containing timing info.
24459  *
24460  * Return Code: 0 for success or error code
24461  */
24462 
24463 static int
24464 sd_take_ownership(dev_t dev, struct mhioctkown *p)
24465 {
24466 	struct sd_lun	*un;
24467 	int		rval;
24468 	int		err;
24469 	int		reservation_count   = 0;
24470 	int		min_ownership_delay =  6000000; /* in usec */
24471 	int		max_ownership_delay = 30000000; /* in usec */
24472 	clock_t		start_time;	/* starting time of this algorithm */
24473 	clock_t		end_time;	/* time limit for giving up */
24474 	clock_t		ownership_time;	/* time limit for stable ownership */
24475 	clock_t		current_time;
24476 	clock_t		previous_current_time;
24477 
24478 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24479 		return (ENXIO);
24480 	}
24481 
24482 	/*
24483 	 * Attempt a device reservation. A priority reservation is requested.
24484 	 */
24485 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
24486 	    != SD_SUCCESS) {
24487 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
24488 		    "sd_take_ownership: return(1)=%d\n", rval);
24489 		return (rval);
24490 	}
24491 
24492 	/* Update the softstate reserved status to indicate the reservation */
24493 	mutex_enter(SD_MUTEX(un));
24494 	un->un_resvd_status |= SD_RESERVE;
24495 	un->un_resvd_status &=
24496 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
24497 	mutex_exit(SD_MUTEX(un));
24498 
24499 	if (p != NULL) {
24500 		if (p->min_ownership_delay != 0) {
24501 			min_ownership_delay = p->min_ownership_delay * 1000;
24502 		}
24503 		if (p->max_ownership_delay != 0) {
24504 			max_ownership_delay = p->max_ownership_delay * 1000;
24505 		}
24506 	}
24507 	SD_INFO(SD_LOG_IOCTL_MHD, un,
24508 	    "sd_take_ownership: min, max delays: %d, %d\n",
24509 	    min_ownership_delay, max_ownership_delay);
24510 
24511 	start_time = ddi_get_lbolt();
24512 	current_time	= start_time;
24513 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
24514 	end_time	= start_time + drv_usectohz(max_ownership_delay);
24515 
24516 	while (current_time - end_time < 0) {
24517 		delay(drv_usectohz(500000));
24518 
24519 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
24520 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
24521 				mutex_enter(SD_MUTEX(un));
24522 				rval = (un->un_resvd_status &
24523 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
24524 				mutex_exit(SD_MUTEX(un));
24525 				break;
24526 			}
24527 		}
24528 		previous_current_time = current_time;
24529 		current_time = ddi_get_lbolt();
24530 		mutex_enter(SD_MUTEX(un));
24531 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
24532 			ownership_time = ddi_get_lbolt() +
24533 			    drv_usectohz(min_ownership_delay);
24534 			reservation_count = 0;
24535 		} else {
24536 			reservation_count++;
24537 		}
24538 		un->un_resvd_status |= SD_RESERVE;
24539 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
24540 		mutex_exit(SD_MUTEX(un));
24541 
24542 		SD_INFO(SD_LOG_IOCTL_MHD, un,
24543 		    "sd_take_ownership: ticks for loop iteration=%ld, "
24544 		    "reservation=%s\n", (current_time - previous_current_time),
24545 		    reservation_count ? "ok" : "reclaimed");
24546 
24547 		if (current_time - ownership_time >= 0 &&
24548 		    reservation_count >= 4) {
24549 			rval = 0; /* Achieved a stable ownership */
24550 			break;
24551 		}
24552 		if (current_time - end_time >= 0) {
24553 			rval = EACCES; /* No ownership in max possible time */
24554 			break;
24555 		}
24556 	}
24557 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
24558 	    "sd_take_ownership: return(2)=%d\n", rval);
24559 	return (rval);
24560 }
24561 
24562 
24563 /*
24564  *    Function: sd_reserve_release()
24565  *
24566  * Description: This function builds and sends scsi RESERVE, RELEASE, and
24567  *		PRIORITY RESERVE commands based on a user specified command type
24568  *
24569  *   Arguments: dev - the device 'dev_t'
24570  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
24571  *		      SD_RESERVE, SD_RELEASE
24572  *
24573  * Return Code: 0 or Error Code
24574  */
24575 
24576 static int
24577 sd_reserve_release(dev_t dev, int cmd)
24578 {
24579 	struct uscsi_cmd	*com = NULL;
24580 	struct sd_lun		*un = NULL;
24581 	char			cdb[CDB_GROUP0];
24582 	int			rval;
24583 
24584 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
24585 	    (cmd == SD_PRIORITY_RESERVE));
24586 
24587 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24588 		return (ENXIO);
24589 	}
24590 
24591 	/* instantiate and initialize the command and cdb */
24592 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24593 	bzero(cdb, CDB_GROUP0);
24594 	com->uscsi_flags   = USCSI_SILENT;
24595 	com->uscsi_timeout = un->un_reserve_release_time;
24596 	com->uscsi_cdblen  = CDB_GROUP0;
24597 	com->uscsi_cdb	   = cdb;
24598 	if (cmd == SD_RELEASE) {
24599 		cdb[0] = SCMD_RELEASE;
24600 	} else {
24601 		cdb[0] = SCMD_RESERVE;
24602 	}
24603 
24604 	/* Send the command. */
24605 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24606 	    SD_PATH_STANDARD);
24607 
24608 	/*
24609 	 * "break" a reservation that is held by another host, by issuing a
24610 	 * reset if priority reserve is desired, and we could not get the
24611 	 * device.
24612 	 */
24613 	if ((cmd == SD_PRIORITY_RESERVE) &&
24614 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
24615 		/*
24616 		 * First try to reset the LUN. If we cannot, then try a target
24617 		 * reset, followed by a bus reset if the target reset fails.
24618 		 */
24619 		int reset_retval = 0;
24620 		if (un->un_f_lun_reset_enabled == TRUE) {
24621 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
24622 		}
24623 		if (reset_retval == 0) {
24624 			/* The LUN reset either failed or was not issued */
24625 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
24626 		}
24627 		if ((reset_retval == 0) &&
24628 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
24629 			rval = EIO;
24630 			kmem_free(com, sizeof (*com));
24631 			return (rval);
24632 		}
24633 
24634 		bzero(com, sizeof (struct uscsi_cmd));
24635 		com->uscsi_flags   = USCSI_SILENT;
24636 		com->uscsi_cdb	   = cdb;
24637 		com->uscsi_cdblen  = CDB_GROUP0;
24638 		com->uscsi_timeout = 5;
24639 
24640 		/*
24641 		 * Reissue the last reserve command, this time without request
24642 		 * sense.  Assume that it is just a regular reserve command.
24643 		 */
24644 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24645 		    SD_PATH_STANDARD);
24646 	}
24647 
24648 	/* Return an error if still getting a reservation conflict. */
24649 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
24650 		rval = EACCES;
24651 	}
24652 
24653 	kmem_free(com, sizeof (*com));
24654 	return (rval);
24655 }
24656 
24657 
24658 #define	SD_NDUMP_RETRIES	12
24659 /*
24660  *	System Crash Dump routine
24661  */
24662 
24663 static int
24664 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
24665 {
24666 	int		instance;
24667 	int		partition;
24668 	int		i;
24669 	int		err;
24670 	struct sd_lun	*un;
24671 	struct scsi_pkt *wr_pktp;
24672 	struct buf	*wr_bp;
24673 	struct buf	wr_buf;
24674 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
24675 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
24676 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
24677 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
24678 	size_t		io_start_offset;
24679 	int		doing_rmw = FALSE;
24680 	int		rval;
24681 	ssize_t		dma_resid;
24682 	daddr_t		oblkno;
24683 	diskaddr_t	nblks = 0;
24684 	diskaddr_t	start_block;
24685 
24686 	instance = SDUNIT(dev);
24687 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
24688 	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
24689 		return (ENXIO);
24690 	}
24691 
24692 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
24693 
24694 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
24695 
24696 	partition = SDPART(dev);
24697 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
24698 
24699 	/* Validate blocks to dump at against partition size. */
24700 
24701 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
24702 	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
24703 
24704 	if ((blkno + nblk) > nblks) {
24705 		SD_TRACE(SD_LOG_DUMP, un,
24706 		    "sddump: dump range larger than partition: "
24707 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
24708 		    blkno, nblk, nblks);
24709 		return (EINVAL);
24710 	}
24711 
24712 	mutex_enter(&un->un_pm_mutex);
24713 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24714 		struct scsi_pkt *start_pktp;
24715 
24716 		mutex_exit(&un->un_pm_mutex);
24717 
24718 		/*
24719 		 * use pm framework to power on HBA 1st
24720 		 */
24721 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
24722 
24723 		/*
24724 		 * Dump no long uses sdpower to power on a device, it's
24725 		 * in-line here so it can be done in polled mode.
24726 		 */
24727 
24728 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
24729 
24730 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
24731 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
24732 
24733 		if (start_pktp == NULL) {
24734 			/* We were not given a SCSI packet, fail. */
24735 			return (EIO);
24736 		}
24737 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
24738 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
24739 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
24740 		start_pktp->pkt_flags = FLAG_NOINTR;
24741 
24742 		mutex_enter(SD_MUTEX(un));
24743 		SD_FILL_SCSI1_LUN(un, start_pktp);
24744 		mutex_exit(SD_MUTEX(un));
24745 		/*
24746 		 * Scsi_poll returns 0 (success) if the command completes and
24747 		 * the status block is STATUS_GOOD.
24748 		 */
24749 		if (sd_scsi_poll(un, start_pktp) != 0) {
24750 			scsi_destroy_pkt(start_pktp);
24751 			return (EIO);
24752 		}
24753 		scsi_destroy_pkt(start_pktp);
24754 		(void) sd_ddi_pm_resume(un);
24755 	} else {
24756 		mutex_exit(&un->un_pm_mutex);
24757 	}
24758 
24759 	mutex_enter(SD_MUTEX(un));
24760 	un->un_throttle = 0;
24761 
24762 	/*
24763 	 * The first time through, reset the specific target device.
24764 	 * However, when cpr calls sddump we know that sd is in a
24765 	 * a good state so no bus reset is required.
24766 	 * Clear sense data via Request Sense cmd.
24767 	 * In sddump we don't care about allow_bus_device_reset anymore
24768 	 */
24769 
24770 	if ((un->un_state != SD_STATE_SUSPENDED) &&
24771 	    (un->un_state != SD_STATE_DUMPING)) {
24772 
24773 		New_state(un, SD_STATE_DUMPING);
24774 
24775 		if (un->un_f_is_fibre == FALSE) {
24776 			mutex_exit(SD_MUTEX(un));
24777 			/*
24778 			 * Attempt a bus reset for parallel scsi.
24779 			 *
24780 			 * Note: A bus reset is required because on some host
24781 			 * systems (i.e. E420R) a bus device reset is
24782 			 * insufficient to reset the state of the target.
24783 			 *
24784 			 * Note: Don't issue the reset for fibre-channel,
24785 			 * because this tends to hang the bus (loop) for
24786 			 * too long while everyone is logging out and in
24787 			 * and the deadman timer for dumping will fire
24788 			 * before the dump is complete.
24789 			 */
24790 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
24791 				mutex_enter(SD_MUTEX(un));
24792 				Restore_state(un);
24793 				mutex_exit(SD_MUTEX(un));
24794 				return (EIO);
24795 			}
24796 
24797 			/* Delay to give the device some recovery time. */
24798 			drv_usecwait(10000);
24799 
24800 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
24801 				SD_INFO(SD_LOG_DUMP, un,
24802 				    "sddump: sd_send_polled_RQS failed\n");
24803 			}
24804 			mutex_enter(SD_MUTEX(un));
24805 		}
24806 	}
24807 
24808 	/*
24809 	 * Convert the partition-relative block number to a
24810 	 * disk physical block number.
24811 	 */
24812 	blkno += start_block;
24813 
24814 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
24815 
24816 
24817 	/*
24818 	 * Check if the device has a non-512 block size.
24819 	 */
24820 	wr_bp = NULL;
24821 	if (NOT_DEVBSIZE(un)) {
24822 		tgt_byte_offset = blkno * un->un_sys_blocksize;
24823 		tgt_byte_count = nblk * un->un_sys_blocksize;
24824 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
24825 		    (tgt_byte_count % un->un_tgt_blocksize)) {
24826 			doing_rmw = TRUE;
24827 			/*
24828 			 * Calculate the block number and number of block
24829 			 * in terms of the media block size.
24830 			 */
24831 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
24832 			tgt_nblk =
24833 			    ((tgt_byte_offset + tgt_byte_count +
24834 			    (un->un_tgt_blocksize - 1)) /
24835 			    un->un_tgt_blocksize) - tgt_blkno;
24836 
24837 			/*
24838 			 * Invoke the routine which is going to do read part
24839 			 * of read-modify-write.
24840 			 * Note that this routine returns a pointer to
24841 			 * a valid bp in wr_bp.
24842 			 */
24843 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
24844 			    &wr_bp);
24845 			if (err) {
24846 				mutex_exit(SD_MUTEX(un));
24847 				return (err);
24848 			}
24849 			/*
24850 			 * Offset is being calculated as -
24851 			 * (original block # * system block size) -
24852 			 * (new block # * target block size)
24853 			 */
24854 			io_start_offset =
24855 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
24856 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
24857 
24858 			ASSERT((io_start_offset >= 0) &&
24859 			    (io_start_offset < un->un_tgt_blocksize));
24860 			/*
24861 			 * Do the modify portion of read modify write.
24862 			 */
24863 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
24864 			    (size_t)nblk * un->un_sys_blocksize);
24865 		} else {
24866 			doing_rmw = FALSE;
24867 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
24868 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
24869 		}
24870 
24871 		/* Convert blkno and nblk to target blocks */
24872 		blkno = tgt_blkno;
24873 		nblk = tgt_nblk;
24874 	} else {
24875 		wr_bp = &wr_buf;
24876 		bzero(wr_bp, sizeof (struct buf));
24877 		wr_bp->b_flags		= B_BUSY;
24878 		wr_bp->b_un.b_addr	= addr;
24879 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
24880 		wr_bp->b_resid		= 0;
24881 	}
24882 
24883 	mutex_exit(SD_MUTEX(un));
24884 
24885 	/*
24886 	 * Obtain a SCSI packet for the write command.
24887 	 * It should be safe to call the allocator here without
24888 	 * worrying about being locked for DVMA mapping because
24889 	 * the address we're passed is already a DVMA mapping
24890 	 *
24891 	 * We are also not going to worry about semaphore ownership
24892 	 * in the dump buffer. Dumping is single threaded at present.
24893 	 */
24894 
24895 	wr_pktp = NULL;
24896 
24897 	dma_resid = wr_bp->b_bcount;
24898 	oblkno = blkno;
24899 
24900 	while (dma_resid != 0) {
24901 
24902 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
24903 		wr_bp->b_flags &= ~B_ERROR;
24904 
24905 		if (un->un_partial_dma_supported == 1) {
24906 			blkno = oblkno +
24907 			    ((wr_bp->b_bcount - dma_resid) /
24908 			    un->un_tgt_blocksize);
24909 			nblk = dma_resid / un->un_tgt_blocksize;
24910 
24911 			if (wr_pktp) {
24912 				/*
24913 				 * Partial DMA transfers after initial transfer
24914 				 */
24915 				rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
24916 				    blkno, nblk);
24917 			} else {
24918 				/* Initial transfer */
24919 				rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
24920 				    un->un_pkt_flags, NULL_FUNC, NULL,
24921 				    blkno, nblk);
24922 			}
24923 		} else {
24924 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
24925 			    0, NULL_FUNC, NULL, blkno, nblk);
24926 		}
24927 
24928 		if (rval == 0) {
24929 			/* We were given a SCSI packet, continue. */
24930 			break;
24931 		}
24932 
24933 		if (i == 0) {
24934 			if (wr_bp->b_flags & B_ERROR) {
24935 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24936 				    "no resources for dumping; "
24937 				    "error code: 0x%x, retrying",
24938 				    geterror(wr_bp));
24939 			} else {
24940 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24941 				    "no resources for dumping; retrying");
24942 			}
24943 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
24944 			if (wr_bp->b_flags & B_ERROR) {
24945 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
24946 				    "no resources for dumping; error code: "
24947 				    "0x%x, retrying\n", geterror(wr_bp));
24948 			}
24949 		} else {
24950 			if (wr_bp->b_flags & B_ERROR) {
24951 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
24952 				    "no resources for dumping; "
24953 				    "error code: 0x%x, retries failed, "
24954 				    "giving up.\n", geterror(wr_bp));
24955 			} else {
24956 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
24957 				    "no resources for dumping; "
24958 				    "retries failed, giving up.\n");
24959 			}
24960 			mutex_enter(SD_MUTEX(un));
24961 			Restore_state(un);
24962 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
24963 				mutex_exit(SD_MUTEX(un));
24964 				scsi_free_consistent_buf(wr_bp);
24965 			} else {
24966 				mutex_exit(SD_MUTEX(un));
24967 			}
24968 			return (EIO);
24969 		}
24970 		drv_usecwait(10000);
24971 	}
24972 
24973 	if (un->un_partial_dma_supported == 1) {
24974 		/*
24975 		 * save the resid from PARTIAL_DMA
24976 		 */
24977 		dma_resid = wr_pktp->pkt_resid;
24978 		if (dma_resid != 0)
24979 			nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
24980 		wr_pktp->pkt_resid = 0;
24981 	} else {
24982 		dma_resid = 0;
24983 	}
24984 
24985 	/* SunBug 1222170 */
24986 	wr_pktp->pkt_flags = FLAG_NOINTR;
24987 
24988 	err = EIO;
24989 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
24990 
24991 		/*
24992 		 * Scsi_poll returns 0 (success) if the command completes and
24993 		 * the status block is STATUS_GOOD.  We should only check
24994 		 * errors if this condition is not true.  Even then we should
24995 		 * send our own request sense packet only if we have a check
24996 		 * condition and auto request sense has not been performed by
24997 		 * the hba.
24998 		 */
24999 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
25000 
25001 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
25002 		    (wr_pktp->pkt_resid == 0)) {
25003 			err = SD_SUCCESS;
25004 			break;
25005 		}
25006 
25007 		/*
25008 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
25009 		 */
25010 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
25011 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25012 			    "Error while dumping state...Device is gone\n");
25013 			break;
25014 		}
25015 
25016 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
25017 			SD_INFO(SD_LOG_DUMP, un,
25018 			    "sddump: write failed with CHECK, try # %d\n", i);
25019 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
25020 				(void) sd_send_polled_RQS(un);
25021 			}
25022 
25023 			continue;
25024 		}
25025 
25026 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
25027 			int reset_retval = 0;
25028 
25029 			SD_INFO(SD_LOG_DUMP, un,
25030 			    "sddump: write failed with BUSY, try # %d\n", i);
25031 
25032 			if (un->un_f_lun_reset_enabled == TRUE) {
25033 				reset_retval = scsi_reset(SD_ADDRESS(un),
25034 				    RESET_LUN);
25035 			}
25036 			if (reset_retval == 0) {
25037 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
25038 			}
25039 			(void) sd_send_polled_RQS(un);
25040 
25041 		} else {
25042 			SD_INFO(SD_LOG_DUMP, un,
25043 			    "sddump: write failed with 0x%x, try # %d\n",
25044 			    SD_GET_PKT_STATUS(wr_pktp), i);
25045 			mutex_enter(SD_MUTEX(un));
25046 			sd_reset_target(un, wr_pktp);
25047 			mutex_exit(SD_MUTEX(un));
25048 		}
25049 
25050 		/*
25051 		 * If we are not getting anywhere with lun/target resets,
25052 		 * let's reset the bus.
25053 		 */
25054 		if (i == SD_NDUMP_RETRIES/2) {
25055 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25056 			(void) sd_send_polled_RQS(un);
25057 		}
25058 	}
25059 	}
25060 
25061 	scsi_destroy_pkt(wr_pktp);
25062 	mutex_enter(SD_MUTEX(un));
25063 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
25064 		mutex_exit(SD_MUTEX(un));
25065 		scsi_free_consistent_buf(wr_bp);
25066 	} else {
25067 		mutex_exit(SD_MUTEX(un));
25068 	}
25069 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
25070 	return (err);
25071 }
25072 
25073 /*
25074  *    Function: sd_scsi_poll()
25075  *
25076  * Description: This is a wrapper for the scsi_poll call.
25077  *
25078  *   Arguments: sd_lun - The unit structure
25079  *              scsi_pkt - The scsi packet being sent to the device.
25080  *
25081  * Return Code: 0 - Command completed successfully with good status
25082  *             -1 - Command failed.  This could indicate a check condition
25083  *                  or other status value requiring recovery action.
25084  *
25085  * NOTE: This code is only called off sddump().
25086  */
25087 
25088 static int
25089 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
25090 {
25091 	int status;
25092 
25093 	ASSERT(un != NULL);
25094 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25095 	ASSERT(pktp != NULL);
25096 
25097 	status = SD_SUCCESS;
25098 
25099 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
25100 		pktp->pkt_flags |= un->un_tagflags;
25101 		pktp->pkt_flags &= ~FLAG_NODISCON;
25102 	}
25103 
25104 	status = sd_ddi_scsi_poll(pktp);
25105 	/*
25106 	 * Scsi_poll returns 0 (success) if the command completes and the
25107 	 * status block is STATUS_GOOD.  We should only check errors if this
25108 	 * condition is not true.  Even then we should send our own request
25109 	 * sense packet only if we have a check condition and auto
25110 	 * request sense has not been performed by the hba.
25111 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
25112 	 */
25113 	if ((status != SD_SUCCESS) &&
25114 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
25115 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
25116 	    (pktp->pkt_reason != CMD_DEV_GONE))
25117 		(void) sd_send_polled_RQS(un);
25118 
25119 	return (status);
25120 }
25121 
25122 /*
25123  *    Function: sd_send_polled_RQS()
25124  *
25125  * Description: This sends the request sense command to a device.
25126  *
25127  *   Arguments: sd_lun - The unit structure
25128  *
25129  * Return Code: 0 - Command completed successfully with good status
25130  *             -1 - Command failed.
25131  *
25132  */
25133 
25134 static int
25135 sd_send_polled_RQS(struct sd_lun *un)
25136 {
25137 	int	ret_val;
25138 	struct	scsi_pkt	*rqs_pktp;
25139 	struct	buf		*rqs_bp;
25140 
25141 	ASSERT(un != NULL);
25142 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25143 
25144 	ret_val = SD_SUCCESS;
25145 
25146 	rqs_pktp = un->un_rqs_pktp;
25147 	rqs_bp	 = un->un_rqs_bp;
25148 
25149 	mutex_enter(SD_MUTEX(un));
25150 
25151 	if (un->un_sense_isbusy) {
25152 		ret_val = SD_FAILURE;
25153 		mutex_exit(SD_MUTEX(un));
25154 		return (ret_val);
25155 	}
25156 
25157 	/*
25158 	 * If the request sense buffer (and packet) is not in use,
25159 	 * let's set the un_sense_isbusy and send our packet
25160 	 */
25161 	un->un_sense_isbusy 	= 1;
25162 	rqs_pktp->pkt_resid  	= 0;
25163 	rqs_pktp->pkt_reason 	= 0;
25164 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
25165 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
25166 
25167 	mutex_exit(SD_MUTEX(un));
25168 
25169 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
25170 	    " 0x%p\n", rqs_bp->b_un.b_addr);
25171 
25172 	/*
25173 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
25174 	 * axle - it has a call into us!
25175 	 */
25176 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
25177 		SD_INFO(SD_LOG_COMMON, un,
25178 		    "sd_send_polled_RQS: RQS failed\n");
25179 	}
25180 
25181 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
25182 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
25183 
25184 	mutex_enter(SD_MUTEX(un));
25185 	un->un_sense_isbusy = 0;
25186 	mutex_exit(SD_MUTEX(un));
25187 
25188 	return (ret_val);
25189 }
25190 
25191 /*
25192  * Defines needed for localized version of the scsi_poll routine.
25193  */
25194 #define	CSEC		10000			/* usecs */
25195 #define	SEC_TO_CSEC	(1000000/CSEC)
25196 
25197 /*
25198  *    Function: sd_ddi_scsi_poll()
25199  *
25200  * Description: Localized version of the scsi_poll routine.  The purpose is to
25201  *		send a scsi_pkt to a device as a polled command.  This version
25202  *		is to ensure more robust handling of transport errors.
25203  *		Specifically this routine cures not ready, coming ready
25204  *		transition for power up and reset of sonoma's.  This can take
25205  *		up to 45 seconds for power-on and 20 seconds for reset of a
25206  * 		sonoma lun.
25207  *
25208  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
25209  *
25210  * Return Code: 0 - Command completed successfully with good status
25211  *             -1 - Command failed.
25212  *
25213  * NOTE: This code is almost identical to scsi_poll, however before 6668774 can
25214  * be fixed (removing this code), we need to determine how to handle the
25215  * KEY_UNIT_ATTENTION condition below in conditions not as limited as sddump().
25216  *
25217  * NOTE: This code is only called off sddump().
25218  */
25219 static int
25220 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
25221 {
25222 	int			rval = -1;
25223 	int			savef;
25224 	long			savet;
25225 	void			(*savec)();
25226 	int			timeout;
25227 	int			busy_count;
25228 	int			poll_delay;
25229 	int			rc;
25230 	uint8_t			*sensep;
25231 	struct scsi_arq_status	*arqstat;
25232 	extern int		do_polled_io;
25233 
25234 	ASSERT(pkt->pkt_scbp);
25235 
25236 	/*
25237 	 * save old flags..
25238 	 */
25239 	savef = pkt->pkt_flags;
25240 	savec = pkt->pkt_comp;
25241 	savet = pkt->pkt_time;
25242 
25243 	pkt->pkt_flags |= FLAG_NOINTR;
25244 
25245 	/*
25246 	 * XXX there is nothing in the SCSA spec that states that we should not
25247 	 * do a callback for polled cmds; however, removing this will break sd
25248 	 * and probably other target drivers
25249 	 */
25250 	pkt->pkt_comp = NULL;
25251 
25252 	/*
25253 	 * we don't like a polled command without timeout.
25254 	 * 60 seconds seems long enough.
25255 	 */
25256 	if (pkt->pkt_time == 0)
25257 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
25258 
25259 	/*
25260 	 * Send polled cmd.
25261 	 *
25262 	 * We do some error recovery for various errors.  Tran_busy,
25263 	 * queue full, and non-dispatched commands are retried every 10 msec.
25264 	 * as they are typically transient failures.  Busy status and Not
25265 	 * Ready are retried every second as this status takes a while to
25266 	 * change.
25267 	 */
25268 	timeout = pkt->pkt_time * SEC_TO_CSEC;
25269 
25270 	for (busy_count = 0; busy_count < timeout; busy_count++) {
25271 		/*
25272 		 * Initialize pkt status variables.
25273 		 */
25274 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
25275 
25276 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
25277 			if (rc != TRAN_BUSY) {
25278 				/* Transport failed - give up. */
25279 				break;
25280 			} else {
25281 				/* Transport busy - try again. */
25282 				poll_delay = 1 * CSEC;		/* 10 msec. */
25283 			}
25284 		} else {
25285 			/*
25286 			 * Transport accepted - check pkt status.
25287 			 */
25288 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
25289 			if ((pkt->pkt_reason == CMD_CMPLT) &&
25290 			    (rc == STATUS_CHECK) &&
25291 			    (pkt->pkt_state & STATE_ARQ_DONE)) {
25292 				arqstat =
25293 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
25294 				sensep = (uint8_t *)&arqstat->sts_sensedata;
25295 			} else {
25296 				sensep = NULL;
25297 			}
25298 
25299 			if ((pkt->pkt_reason == CMD_CMPLT) &&
25300 			    (rc == STATUS_GOOD)) {
25301 				/* No error - we're done */
25302 				rval = 0;
25303 				break;
25304 
25305 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
25306 				/* Lost connection - give up */
25307 				break;
25308 
25309 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
25310 			    (pkt->pkt_state == 0)) {
25311 				/* Pkt not dispatched - try again. */
25312 				poll_delay = 1 * CSEC;		/* 10 msec. */
25313 
25314 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
25315 			    (rc == STATUS_QFULL)) {
25316 				/* Queue full - try again. */
25317 				poll_delay = 1 * CSEC;		/* 10 msec. */
25318 
25319 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
25320 			    (rc == STATUS_BUSY)) {
25321 				/* Busy - try again. */
25322 				poll_delay = 100 * CSEC;	/* 1 sec. */
25323 				busy_count += (SEC_TO_CSEC - 1);
25324 
25325 			} else if ((sensep != NULL) &&
25326 			    (scsi_sense_key(sensep) == KEY_UNIT_ATTENTION)) {
25327 				/*
25328 				 * Unit Attention - try again.
25329 				 * Pretend it took 1 sec.
25330 				 * NOTE: 'continue' avoids poll_delay
25331 				 */
25332 				busy_count += (SEC_TO_CSEC - 1);
25333 				continue;
25334 
25335 			} else if ((sensep != NULL) &&
25336 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
25337 			    (scsi_sense_asc(sensep) == 0x04) &&
25338 			    (scsi_sense_ascq(sensep) == 0x01)) {
25339 				/*
25340 				 * Not ready -> ready - try again.
25341 				 * 04h/01h: LUN IS IN PROCESS OF BECOMING READY
25342 				 * ...same as STATUS_BUSY
25343 				 */
25344 				poll_delay = 100 * CSEC;	/* 1 sec. */
25345 				busy_count += (SEC_TO_CSEC - 1);
25346 
25347 			} else {
25348 				/* BAD status - give up. */
25349 				break;
25350 			}
25351 		}
25352 
25353 		if (((curthread->t_flag & T_INTR_THREAD) == 0) &&
25354 		    !do_polled_io) {
25355 			delay(drv_usectohz(poll_delay));
25356 		} else {
25357 			/* we busy wait during cpr_dump or interrupt threads */
25358 			drv_usecwait(poll_delay);
25359 		}
25360 	}
25361 
25362 	pkt->pkt_flags = savef;
25363 	pkt->pkt_comp = savec;
25364 	pkt->pkt_time = savet;
25365 
25366 	/* return on error */
25367 	if (rval)
25368 		return (rval);
25369 
25370 	/*
25371 	 * This is not a performance critical code path.
25372 	 *
25373 	 * As an accommodation for scsi_poll callers, to avoid ddi_dma_sync()
25374 	 * issues associated with looking at DMA memory prior to
25375 	 * scsi_pkt_destroy(), we scsi_sync_pkt() prior to return.
25376 	 */
25377 	scsi_sync_pkt(pkt);
25378 	return (0);
25379 }
25380 
25381 
25382 
25383 /*
25384  *    Function: sd_persistent_reservation_in_read_keys
25385  *
25386  * Description: This routine is the driver entry point for handling CD-ROM
25387  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
25388  *		by sending the SCSI-3 PRIN commands to the device.
25389  *		Processes the read keys command response by copying the
25390  *		reservation key information into the user provided buffer.
25391  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
25392  *
25393  *   Arguments: un   -  Pointer to soft state struct for the target.
25394  *		usrp -	user provided pointer to multihost Persistent In Read
25395  *			Keys structure (mhioc_inkeys_t)
25396  *		flag -	this argument is a pass through to ddi_copyxxx()
25397  *			directly from the mode argument of ioctl().
25398  *
25399  * Return Code: 0   - Success
25400  *		EACCES
25401  *		ENOTSUP
25402  *		errno return code from sd_send_scsi_cmd()
25403  *
25404  *     Context: Can sleep. Does not return until command is completed.
25405  */
25406 
25407 static int
25408 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
25409     mhioc_inkeys_t *usrp, int flag)
25410 {
25411 #ifdef _MULTI_DATAMODEL
25412 	struct mhioc_key_list32	li32;
25413 #endif
25414 	sd_prin_readkeys_t	*in;
25415 	mhioc_inkeys_t		*ptr;
25416 	mhioc_key_list_t	li;
25417 	uchar_t			*data_bufp;
25418 	int 			data_len;
25419 	int			rval = 0;
25420 	size_t			copysz;
25421 	sd_ssc_t		*ssc;
25422 
25423 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
25424 		return (EINVAL);
25425 	}
25426 	bzero(&li, sizeof (mhioc_key_list_t));
25427 
25428 	ssc = sd_ssc_init(un);
25429 
25430 	/*
25431 	 * Get the listsize from user
25432 	 */
25433 #ifdef _MULTI_DATAMODEL
25434 
25435 	switch (ddi_model_convert_from(flag & FMODELS)) {
25436 	case DDI_MODEL_ILP32:
25437 		copysz = sizeof (struct mhioc_key_list32);
25438 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
25439 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25440 			    "sd_persistent_reservation_in_read_keys: "
25441 			    "failed ddi_copyin: mhioc_key_list32_t\n");
25442 			rval = EFAULT;
25443 			goto done;
25444 		}
25445 		li.listsize = li32.listsize;
25446 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
25447 		break;
25448 
25449 	case DDI_MODEL_NONE:
25450 		copysz = sizeof (mhioc_key_list_t);
25451 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
25452 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25453 			    "sd_persistent_reservation_in_read_keys: "
25454 			    "failed ddi_copyin: mhioc_key_list_t\n");
25455 			rval = EFAULT;
25456 			goto done;
25457 		}
25458 		break;
25459 	}
25460 
25461 #else /* ! _MULTI_DATAMODEL */
25462 	copysz = sizeof (mhioc_key_list_t);
25463 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
25464 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25465 		    "sd_persistent_reservation_in_read_keys: "
25466 		    "failed ddi_copyin: mhioc_key_list_t\n");
25467 		rval = EFAULT;
25468 		goto done;
25469 	}
25470 #endif
25471 
25472 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
25473 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
25474 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
25475 
25476 	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS,
25477 	    data_len, data_bufp);
25478 	if (rval != 0) {
25479 		if (rval == EIO)
25480 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
25481 		else
25482 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
25483 		goto done;
25484 	}
25485 	in = (sd_prin_readkeys_t *)data_bufp;
25486 	ptr->generation = BE_32(in->generation);
25487 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
25488 
25489 	/*
25490 	 * Return the min(listsize, listlen) keys
25491 	 */
25492 #ifdef _MULTI_DATAMODEL
25493 
25494 	switch (ddi_model_convert_from(flag & FMODELS)) {
25495 	case DDI_MODEL_ILP32:
25496 		li32.listlen = li.listlen;
25497 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
25498 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25499 			    "sd_persistent_reservation_in_read_keys: "
25500 			    "failed ddi_copyout: mhioc_key_list32_t\n");
25501 			rval = EFAULT;
25502 			goto done;
25503 		}
25504 		break;
25505 
25506 	case DDI_MODEL_NONE:
25507 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
25508 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25509 			    "sd_persistent_reservation_in_read_keys: "
25510 			    "failed ddi_copyout: mhioc_key_list_t\n");
25511 			rval = EFAULT;
25512 			goto done;
25513 		}
25514 		break;
25515 	}
25516 
25517 #else /* ! _MULTI_DATAMODEL */
25518 
25519 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
25520 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25521 		    "sd_persistent_reservation_in_read_keys: "
25522 		    "failed ddi_copyout: mhioc_key_list_t\n");
25523 		rval = EFAULT;
25524 		goto done;
25525 	}
25526 
25527 #endif /* _MULTI_DATAMODEL */
25528 
25529 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
25530 	    li.listsize * MHIOC_RESV_KEY_SIZE);
25531 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
25532 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25533 		    "sd_persistent_reservation_in_read_keys: "
25534 		    "failed ddi_copyout: keylist\n");
25535 		rval = EFAULT;
25536 	}
25537 done:
25538 	sd_ssc_fini(ssc);
25539 	kmem_free(data_bufp, data_len);
25540 	return (rval);
25541 }
25542 
25543 
25544 /*
25545  *    Function: sd_persistent_reservation_in_read_resv
25546  *
25547  * Description: This routine is the driver entry point for handling CD-ROM
25548  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
25549  *		by sending the SCSI-3 PRIN commands to the device.
25550  *		Process the read persistent reservations command response by
25551  *		copying the reservation information into the user provided
25552  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
25553  *
25554  *   Arguments: un   -  Pointer to soft state struct for the target.
25555  *		usrp -	user provided pointer to multihost Persistent In Read
25556  *			Keys structure (mhioc_inkeys_t)
25557  *		flag -	this argument is a pass through to ddi_copyxxx()
25558  *			directly from the mode argument of ioctl().
25559  *
25560  * Return Code: 0   - Success
25561  *		EACCES
25562  *		ENOTSUP
25563  *		errno return code from sd_send_scsi_cmd()
25564  *
25565  *     Context: Can sleep. Does not return until command is completed.
25566  */
25567 
25568 static int
25569 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
25570     mhioc_inresvs_t *usrp, int flag)
25571 {
25572 #ifdef _MULTI_DATAMODEL
25573 	struct mhioc_resv_desc_list32 resvlist32;
25574 #endif
25575 	sd_prin_readresv_t	*in;
25576 	mhioc_inresvs_t		*ptr;
25577 	sd_readresv_desc_t	*readresv_ptr;
25578 	mhioc_resv_desc_list_t	resvlist;
25579 	mhioc_resv_desc_t 	resvdesc;
25580 	uchar_t			*data_bufp = NULL;
25581 	int 			data_len;
25582 	int			rval = 0;
25583 	int			i;
25584 	size_t			copysz;
25585 	mhioc_resv_desc_t	*bufp;
25586 	sd_ssc_t		*ssc;
25587 
25588 	if ((ptr = usrp) == NULL) {
25589 		return (EINVAL);
25590 	}
25591 
25592 	ssc = sd_ssc_init(un);
25593 
25594 	/*
25595 	 * Get the listsize from user
25596 	 */
25597 #ifdef _MULTI_DATAMODEL
25598 	switch (ddi_model_convert_from(flag & FMODELS)) {
25599 	case DDI_MODEL_ILP32:
25600 		copysz = sizeof (struct mhioc_resv_desc_list32);
25601 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
25602 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25603 			    "sd_persistent_reservation_in_read_resv: "
25604 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
25605 			rval = EFAULT;
25606 			goto done;
25607 		}
25608 		resvlist.listsize = resvlist32.listsize;
25609 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
25610 		break;
25611 
25612 	case DDI_MODEL_NONE:
25613 		copysz = sizeof (mhioc_resv_desc_list_t);
25614 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
25615 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25616 			    "sd_persistent_reservation_in_read_resv: "
25617 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
25618 			rval = EFAULT;
25619 			goto done;
25620 		}
25621 		break;
25622 	}
25623 #else /* ! _MULTI_DATAMODEL */
25624 	copysz = sizeof (mhioc_resv_desc_list_t);
25625 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
25626 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25627 		    "sd_persistent_reservation_in_read_resv: "
25628 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
25629 		rval = EFAULT;
25630 		goto done;
25631 	}
25632 #endif /* ! _MULTI_DATAMODEL */
25633 
25634 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
25635 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
25636 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
25637 
25638 	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_RESV,
25639 	    data_len, data_bufp);
25640 	if (rval != 0) {
25641 		if (rval == EIO)
25642 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
25643 		else
25644 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
25645 		goto done;
25646 	}
25647 	in = (sd_prin_readresv_t *)data_bufp;
25648 	ptr->generation = BE_32(in->generation);
25649 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
25650 
25651 	/*
25652 	 * Return the min(listsize, listlen( keys
25653 	 */
25654 #ifdef _MULTI_DATAMODEL
25655 
25656 	switch (ddi_model_convert_from(flag & FMODELS)) {
25657 	case DDI_MODEL_ILP32:
25658 		resvlist32.listlen = resvlist.listlen;
25659 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
25660 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25661 			    "sd_persistent_reservation_in_read_resv: "
25662 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
25663 			rval = EFAULT;
25664 			goto done;
25665 		}
25666 		break;
25667 
25668 	case DDI_MODEL_NONE:
25669 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
25670 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25671 			    "sd_persistent_reservation_in_read_resv: "
25672 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
25673 			rval = EFAULT;
25674 			goto done;
25675 		}
25676 		break;
25677 	}
25678 
25679 #else /* ! _MULTI_DATAMODEL */
25680 
25681 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
25682 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25683 		    "sd_persistent_reservation_in_read_resv: "
25684 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
25685 		rval = EFAULT;
25686 		goto done;
25687 	}
25688 
25689 #endif /* ! _MULTI_DATAMODEL */
25690 
25691 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
25692 	bufp = resvlist.list;
25693 	copysz = sizeof (mhioc_resv_desc_t);
25694 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
25695 	    i++, readresv_ptr++, bufp++) {
25696 
25697 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
25698 		    MHIOC_RESV_KEY_SIZE);
25699 		resvdesc.type  = readresv_ptr->type;
25700 		resvdesc.scope = readresv_ptr->scope;
25701 		resvdesc.scope_specific_addr =
25702 		    BE_32(readresv_ptr->scope_specific_addr);
25703 
25704 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
25705 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25706 			    "sd_persistent_reservation_in_read_resv: "
25707 			    "failed ddi_copyout: resvlist\n");
25708 			rval = EFAULT;
25709 			goto done;
25710 		}
25711 	}
25712 done:
25713 	sd_ssc_fini(ssc);
25714 	/* only if data_bufp is allocated, we need to free it */
25715 	if (data_bufp) {
25716 		kmem_free(data_bufp, data_len);
25717 	}
25718 	return (rval);
25719 }
25720 
25721 
25722 /*
25723  *    Function: sr_change_blkmode()
25724  *
25725  * Description: This routine is the driver entry point for handling CD-ROM
25726  *		block mode ioctl requests. Support for returning and changing
25727  *		the current block size in use by the device is implemented. The
25728  *		LBA size is changed via a MODE SELECT Block Descriptor.
25729  *
25730  *		This routine issues a mode sense with an allocation length of
25731  *		12 bytes for the mode page header and a single block descriptor.
25732  *
25733  *   Arguments: dev - the device 'dev_t'
25734  *		cmd - the request type; one of CDROMGBLKMODE (get) or
25735  *		      CDROMSBLKMODE (set)
25736  *		data - current block size or requested block size
25737  *		flag - this argument is a pass through to ddi_copyxxx() directly
25738  *		       from the mode argument of ioctl().
25739  *
25740  * Return Code: the code returned by sd_send_scsi_cmd()
25741  *		EINVAL if invalid arguments are provided
25742  *		EFAULT if ddi_copyxxx() fails
25743  *		ENXIO if fail ddi_get_soft_state
25744  *		EIO if invalid mode sense block descriptor length
25745  *
25746  */
25747 
25748 static int
25749 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
25750 {
25751 	struct sd_lun			*un = NULL;
25752 	struct mode_header		*sense_mhp, *select_mhp;
25753 	struct block_descriptor		*sense_desc, *select_desc;
25754 	int				current_bsize;
25755 	int				rval = EINVAL;
25756 	uchar_t				*sense = NULL;
25757 	uchar_t				*select = NULL;
25758 	sd_ssc_t			*ssc;
25759 
25760 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
25761 
25762 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25763 		return (ENXIO);
25764 	}
25765 
25766 	/*
25767 	 * The block length is changed via the Mode Select block descriptor, the
25768 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
25769 	 * required as part of this routine. Therefore the mode sense allocation
25770 	 * length is specified to be the length of a mode page header and a
25771 	 * block descriptor.
25772 	 */
25773 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
25774 
25775 	ssc = sd_ssc_init(un);
25776 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
25777 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
25778 	sd_ssc_fini(ssc);
25779 	if (rval != 0) {
25780 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25781 		    "sr_change_blkmode: Mode Sense Failed\n");
25782 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
25783 		return (rval);
25784 	}
25785 
25786 	/* Check the block descriptor len to handle only 1 block descriptor */
25787 	sense_mhp = (struct mode_header *)sense;
25788 	if ((sense_mhp->bdesc_length == 0) ||
25789 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
25790 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25791 		    "sr_change_blkmode: Mode Sense returned invalid block"
25792 		    " descriptor length\n");
25793 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
25794 		return (EIO);
25795 	}
25796 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
25797 	current_bsize = ((sense_desc->blksize_hi << 16) |
25798 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
25799 
25800 	/* Process command */
25801 	switch (cmd) {
25802 	case CDROMGBLKMODE:
25803 		/* Return the block size obtained during the mode sense */
25804 		if (ddi_copyout(&current_bsize, (void *)data,
25805 		    sizeof (int), flag) != 0)
25806 			rval = EFAULT;
25807 		break;
25808 	case CDROMSBLKMODE:
25809 		/* Validate the requested block size */
25810 		switch (data) {
25811 		case CDROM_BLK_512:
25812 		case CDROM_BLK_1024:
25813 		case CDROM_BLK_2048:
25814 		case CDROM_BLK_2056:
25815 		case CDROM_BLK_2336:
25816 		case CDROM_BLK_2340:
25817 		case CDROM_BLK_2352:
25818 		case CDROM_BLK_2368:
25819 		case CDROM_BLK_2448:
25820 		case CDROM_BLK_2646:
25821 		case CDROM_BLK_2647:
25822 			break;
25823 		default:
25824 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25825 			    "sr_change_blkmode: "
25826 			    "Block Size '%ld' Not Supported\n", data);
25827 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
25828 			return (EINVAL);
25829 		}
25830 
25831 		/*
25832 		 * The current block size matches the requested block size so
25833 		 * there is no need to send the mode select to change the size
25834 		 */
25835 		if (current_bsize == data) {
25836 			break;
25837 		}
25838 
25839 		/* Build the select data for the requested block size */
25840 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
25841 		select_mhp = (struct mode_header *)select;
25842 		select_desc =
25843 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
25844 		/*
25845 		 * The LBA size is changed via the block descriptor, so the
25846 		 * descriptor is built according to the user data
25847 		 */
25848 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
25849 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
25850 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
25851 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
25852 
25853 		/* Send the mode select for the requested block size */
25854 		ssc = sd_ssc_init(un);
25855 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
25856 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
25857 		    SD_PATH_STANDARD);
25858 		sd_ssc_fini(ssc);
25859 		if (rval != 0) {
25860 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25861 			    "sr_change_blkmode: Mode Select Failed\n");
25862 			/*
25863 			 * The mode select failed for the requested block size,
25864 			 * so reset the data for the original block size and
25865 			 * send it to the target. The error is indicated by the
25866 			 * return value for the failed mode select.
25867 			 */
25868 			select_desc->blksize_hi  = sense_desc->blksize_hi;
25869 			select_desc->blksize_mid = sense_desc->blksize_mid;
25870 			select_desc->blksize_lo  = sense_desc->blksize_lo;
25871 			ssc = sd_ssc_init(un);
25872 			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
25873 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
25874 			    SD_PATH_STANDARD);
25875 			sd_ssc_fini(ssc);
25876 		} else {
25877 			ASSERT(!mutex_owned(SD_MUTEX(un)));
25878 			mutex_enter(SD_MUTEX(un));
25879 			sd_update_block_info(un, (uint32_t)data, 0);
25880 			mutex_exit(SD_MUTEX(un));
25881 		}
25882 		break;
25883 	default:
25884 		/* should not reach here, but check anyway */
25885 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25886 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
25887 		rval = EINVAL;
25888 		break;
25889 	}
25890 
25891 	if (select) {
25892 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
25893 	}
25894 	if (sense) {
25895 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
25896 	}
25897 	return (rval);
25898 }
25899 
25900 
25901 /*
25902  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
25903  * implement driver support for getting and setting the CD speed. The command
25904  * set used will be based on the device type. If the device has not been
25905  * identified as MMC the Toshiba vendor specific mode page will be used. If
25906  * the device is MMC but does not support the Real Time Streaming feature
25907  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
25908  * be used to read the speed.
25909  */
25910 
25911 /*
25912  *    Function: sr_change_speed()
25913  *
25914  * Description: This routine is the driver entry point for handling CD-ROM
25915  *		drive speed ioctl requests for devices supporting the Toshiba
25916  *		vendor specific drive speed mode page. Support for returning
25917  *		and changing the current drive speed in use by the device is
25918  *		implemented.
25919  *
25920  *   Arguments: dev - the device 'dev_t'
25921  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
25922  *		      CDROMSDRVSPEED (set)
25923  *		data - current drive speed or requested drive speed
25924  *		flag - this argument is a pass through to ddi_copyxxx() directly
25925  *		       from the mode argument of ioctl().
25926  *
25927  * Return Code: the code returned by sd_send_scsi_cmd()
25928  *		EINVAL if invalid arguments are provided
25929  *		EFAULT if ddi_copyxxx() fails
25930  *		ENXIO if fail ddi_get_soft_state
25931  *		EIO if invalid mode sense block descriptor length
25932  */
25933 
25934 static int
25935 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
25936 {
25937 	struct sd_lun			*un = NULL;
25938 	struct mode_header		*sense_mhp, *select_mhp;
25939 	struct mode_speed		*sense_page, *select_page;
25940 	int				current_speed;
25941 	int				rval = EINVAL;
25942 	int				bd_len;
25943 	uchar_t				*sense = NULL;
25944 	uchar_t				*select = NULL;
25945 	sd_ssc_t			*ssc;
25946 
25947 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
25948 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25949 		return (ENXIO);
25950 	}
25951 
25952 	/*
25953 	 * Note: The drive speed is being modified here according to a Toshiba
25954 	 * vendor specific mode page (0x31).
25955 	 */
25956 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
25957 
25958 	ssc = sd_ssc_init(un);
25959 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
25960 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
25961 	    SD_PATH_STANDARD);
25962 	sd_ssc_fini(ssc);
25963 	if (rval != 0) {
25964 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25965 		    "sr_change_speed: Mode Sense Failed\n");
25966 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
25967 		return (rval);
25968 	}
25969 	sense_mhp  = (struct mode_header *)sense;
25970 
25971 	/* Check the block descriptor len to handle only 1 block descriptor */
25972 	bd_len = sense_mhp->bdesc_length;
25973 	if (bd_len > MODE_BLK_DESC_LENGTH) {
25974 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25975 		    "sr_change_speed: Mode Sense returned invalid block "
25976 		    "descriptor length\n");
25977 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
25978 		return (EIO);
25979 	}
25980 
25981 	sense_page = (struct mode_speed *)
25982 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
25983 	current_speed = sense_page->speed;
25984 
25985 	/* Process command */
25986 	switch (cmd) {
25987 	case CDROMGDRVSPEED:
25988 		/* Return the drive speed obtained during the mode sense */
25989 		if (current_speed == 0x2) {
25990 			current_speed = CDROM_TWELVE_SPEED;
25991 		}
25992 		if (ddi_copyout(&current_speed, (void *)data,
25993 		    sizeof (int), flag) != 0) {
25994 			rval = EFAULT;
25995 		}
25996 		break;
25997 	case CDROMSDRVSPEED:
25998 		/* Validate the requested drive speed */
25999 		switch ((uchar_t)data) {
26000 		case CDROM_TWELVE_SPEED:
26001 			data = 0x2;
26002 			/*FALLTHROUGH*/
26003 		case CDROM_NORMAL_SPEED:
26004 		case CDROM_DOUBLE_SPEED:
26005 		case CDROM_QUAD_SPEED:
26006 		case CDROM_MAXIMUM_SPEED:
26007 			break;
26008 		default:
26009 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26010 			    "sr_change_speed: "
26011 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
26012 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26013 			return (EINVAL);
26014 		}
26015 
26016 		/*
26017 		 * The current drive speed matches the requested drive speed so
26018 		 * there is no need to send the mode select to change the speed
26019 		 */
26020 		if (current_speed == data) {
26021 			break;
26022 		}
26023 
26024 		/* Build the select data for the requested drive speed */
26025 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
26026 		select_mhp = (struct mode_header *)select;
26027 		select_mhp->bdesc_length = 0;
26028 		select_page =
26029 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
26030 		select_page =
26031 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
26032 		select_page->mode_page.code = CDROM_MODE_SPEED;
26033 		select_page->mode_page.length = 2;
26034 		select_page->speed = (uchar_t)data;
26035 
26036 		/* Send the mode select for the requested block size */
26037 		ssc = sd_ssc_init(un);
26038 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
26039 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
26040 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
26041 		sd_ssc_fini(ssc);
26042 		if (rval != 0) {
26043 			/*
26044 			 * The mode select failed for the requested drive speed,
26045 			 * so reset the data for the original drive speed and
26046 			 * send it to the target. The error is indicated by the
26047 			 * return value for the failed mode select.
26048 			 */
26049 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26050 			    "sr_drive_speed: Mode Select Failed\n");
26051 			select_page->speed = sense_page->speed;
26052 			ssc = sd_ssc_init(un);
26053 			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
26054 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
26055 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
26056 			sd_ssc_fini(ssc);
26057 		}
26058 		break;
26059 	default:
26060 		/* should not reach here, but check anyway */
26061 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26062 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
26063 		rval = EINVAL;
26064 		break;
26065 	}
26066 
26067 	if (select) {
26068 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
26069 	}
26070 	if (sense) {
26071 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26072 	}
26073 
26074 	return (rval);
26075 }
26076 
26077 
26078 /*
26079  *    Function: sr_atapi_change_speed()
26080  *
26081  * Description: This routine is the driver entry point for handling CD-ROM
26082  *		drive speed ioctl requests for MMC devices that do not support
26083  *		the Real Time Streaming feature (0x107).
26084  *
26085  *		Note: This routine will use the SET SPEED command which may not
26086  *		be supported by all devices.
26087  *
26088  *   Arguments: dev- the device 'dev_t'
26089  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
26090  *		     CDROMSDRVSPEED (set)
26091  *		data- current drive speed or requested drive speed
26092  *		flag- this argument is a pass through to ddi_copyxxx() directly
26093  *		      from the mode argument of ioctl().
26094  *
26095  * Return Code: the code returned by sd_send_scsi_cmd()
26096  *		EINVAL if invalid arguments are provided
26097  *		EFAULT if ddi_copyxxx() fails
26098  *		ENXIO if fail ddi_get_soft_state
26099  *		EIO if invalid mode sense block descriptor length
26100  */
26101 
26102 static int
26103 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
26104 {
26105 	struct sd_lun			*un;
26106 	struct uscsi_cmd		*com = NULL;
26107 	struct mode_header_grp2		*sense_mhp;
26108 	uchar_t				*sense_page;
26109 	uchar_t				*sense = NULL;
26110 	char				cdb[CDB_GROUP5];
26111 	int				bd_len;
26112 	int				current_speed = 0;
26113 	int				max_speed = 0;
26114 	int				rval;
26115 	sd_ssc_t			*ssc;
26116 
26117 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
26118 
26119 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26120 		return (ENXIO);
26121 	}
26122 
26123 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
26124 
26125 	ssc = sd_ssc_init(un);
26126 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
26127 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
26128 	    SD_PATH_STANDARD);
26129 	sd_ssc_fini(ssc);
26130 	if (rval != 0) {
26131 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26132 		    "sr_atapi_change_speed: Mode Sense Failed\n");
26133 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26134 		return (rval);
26135 	}
26136 
26137 	/* Check the block descriptor len to handle only 1 block descriptor */
26138 	sense_mhp = (struct mode_header_grp2 *)sense;
26139 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
26140 	if (bd_len > MODE_BLK_DESC_LENGTH) {
26141 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26142 		    "sr_atapi_change_speed: Mode Sense returned invalid "
26143 		    "block descriptor length\n");
26144 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26145 		return (EIO);
26146 	}
26147 
26148 	/* Calculate the current and maximum drive speeds */
26149 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
26150 	current_speed = (sense_page[14] << 8) | sense_page[15];
26151 	max_speed = (sense_page[8] << 8) | sense_page[9];
26152 
26153 	/* Process the command */
26154 	switch (cmd) {
26155 	case CDROMGDRVSPEED:
26156 		current_speed /= SD_SPEED_1X;
26157 		if (ddi_copyout(&current_speed, (void *)data,
26158 		    sizeof (int), flag) != 0)
26159 			rval = EFAULT;
26160 		break;
26161 	case CDROMSDRVSPEED:
26162 		/* Convert the speed code to KB/sec */
26163 		switch ((uchar_t)data) {
26164 		case CDROM_NORMAL_SPEED:
26165 			current_speed = SD_SPEED_1X;
26166 			break;
26167 		case CDROM_DOUBLE_SPEED:
26168 			current_speed = 2 * SD_SPEED_1X;
26169 			break;
26170 		case CDROM_QUAD_SPEED:
26171 			current_speed = 4 * SD_SPEED_1X;
26172 			break;
26173 		case CDROM_TWELVE_SPEED:
26174 			current_speed = 12 * SD_SPEED_1X;
26175 			break;
26176 		case CDROM_MAXIMUM_SPEED:
26177 			current_speed = 0xffff;
26178 			break;
26179 		default:
26180 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26181 			    "sr_atapi_change_speed: invalid drive speed %d\n",
26182 			    (uchar_t)data);
26183 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26184 			return (EINVAL);
26185 		}
26186 
26187 		/* Check the request against the drive's max speed. */
26188 		if (current_speed != 0xffff) {
26189 			if (current_speed > max_speed) {
26190 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26191 				return (EINVAL);
26192 			}
26193 		}
26194 
26195 		/*
26196 		 * Build and send the SET SPEED command
26197 		 *
26198 		 * Note: The SET SPEED (0xBB) command used in this routine is
26199 		 * obsolete per the SCSI MMC spec but still supported in the
26200 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
26201 		 * therefore the command is still implemented in this routine.
26202 		 */
26203 		bzero(cdb, sizeof (cdb));
26204 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
26205 		cdb[2] = (uchar_t)(current_speed >> 8);
26206 		cdb[3] = (uchar_t)current_speed;
26207 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26208 		com->uscsi_cdb	   = (caddr_t)cdb;
26209 		com->uscsi_cdblen  = CDB_GROUP5;
26210 		com->uscsi_bufaddr = NULL;
26211 		com->uscsi_buflen  = 0;
26212 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
26213 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
26214 		break;
26215 	default:
26216 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26217 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
26218 		rval = EINVAL;
26219 	}
26220 
26221 	if (sense) {
26222 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26223 	}
26224 	if (com) {
26225 		kmem_free(com, sizeof (*com));
26226 	}
26227 	return (rval);
26228 }
26229 
26230 
26231 /*
26232  *    Function: sr_pause_resume()
26233  *
26234  * Description: This routine is the driver entry point for handling CD-ROM
26235  *		pause/resume ioctl requests. This only affects the audio play
26236  *		operation.
26237  *
26238  *   Arguments: dev - the device 'dev_t'
26239  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
26240  *		      for setting the resume bit of the cdb.
26241  *
26242  * Return Code: the code returned by sd_send_scsi_cmd()
26243  *		EINVAL if invalid mode specified
26244  *
26245  */
26246 
26247 static int
26248 sr_pause_resume(dev_t dev, int cmd)
26249 {
26250 	struct sd_lun		*un;
26251 	struct uscsi_cmd	*com;
26252 	char			cdb[CDB_GROUP1];
26253 	int			rval;
26254 
26255 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26256 		return (ENXIO);
26257 	}
26258 
26259 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26260 	bzero(cdb, CDB_GROUP1);
26261 	cdb[0] = SCMD_PAUSE_RESUME;
26262 	switch (cmd) {
26263 	case CDROMRESUME:
26264 		cdb[8] = 1;
26265 		break;
26266 	case CDROMPAUSE:
26267 		cdb[8] = 0;
26268 		break;
26269 	default:
26270 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
26271 		    " Command '%x' Not Supported\n", cmd);
26272 		rval = EINVAL;
26273 		goto done;
26274 	}
26275 
26276 	com->uscsi_cdb    = cdb;
26277 	com->uscsi_cdblen = CDB_GROUP1;
26278 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
26279 
26280 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26281 	    SD_PATH_STANDARD);
26282 
26283 done:
26284 	kmem_free(com, sizeof (*com));
26285 	return (rval);
26286 }
26287 
26288 
26289 /*
26290  *    Function: sr_play_msf()
26291  *
26292  * Description: This routine is the driver entry point for handling CD-ROM
26293  *		ioctl requests to output the audio signals at the specified
26294  *		starting address and continue the audio play until the specified
26295  *		ending address (CDROMPLAYMSF) The address is in Minute Second
26296  *		Frame (MSF) format.
26297  *
26298  *   Arguments: dev	- the device 'dev_t'
26299  *		data	- pointer to user provided audio msf structure,
26300  *		          specifying start/end addresses.
26301  *		flag	- this argument is a pass through to ddi_copyxxx()
26302  *		          directly from the mode argument of ioctl().
26303  *
26304  * Return Code: the code returned by sd_send_scsi_cmd()
26305  *		EFAULT if ddi_copyxxx() fails
26306  *		ENXIO if fail ddi_get_soft_state
26307  *		EINVAL if data pointer is NULL
26308  */
26309 
26310 static int
26311 sr_play_msf(dev_t dev, caddr_t data, int flag)
26312 {
26313 	struct sd_lun		*un;
26314 	struct uscsi_cmd	*com;
26315 	struct cdrom_msf	msf_struct;
26316 	struct cdrom_msf	*msf = &msf_struct;
26317 	char			cdb[CDB_GROUP1];
26318 	int			rval;
26319 
26320 	if (data == NULL) {
26321 		return (EINVAL);
26322 	}
26323 
26324 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26325 		return (ENXIO);
26326 	}
26327 
26328 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
26329 		return (EFAULT);
26330 	}
26331 
26332 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26333 	bzero(cdb, CDB_GROUP1);
26334 	cdb[0] = SCMD_PLAYAUDIO_MSF;
26335 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
26336 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
26337 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
26338 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
26339 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
26340 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
26341 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
26342 	} else {
26343 		cdb[3] = msf->cdmsf_min0;
26344 		cdb[4] = msf->cdmsf_sec0;
26345 		cdb[5] = msf->cdmsf_frame0;
26346 		cdb[6] = msf->cdmsf_min1;
26347 		cdb[7] = msf->cdmsf_sec1;
26348 		cdb[8] = msf->cdmsf_frame1;
26349 	}
26350 	com->uscsi_cdb    = cdb;
26351 	com->uscsi_cdblen = CDB_GROUP1;
26352 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
26353 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26354 	    SD_PATH_STANDARD);
26355 	kmem_free(com, sizeof (*com));
26356 	return (rval);
26357 }
26358 
26359 
26360 /*
26361  *    Function: sr_play_trkind()
26362  *
26363  * Description: This routine is the driver entry point for handling CD-ROM
26364  *		ioctl requests to output the audio signals at the specified
26365  *		starting address and continue the audio play until the specified
26366  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
26367  *		format.
26368  *
26369  *   Arguments: dev	- the device 'dev_t'
26370  *		data	- pointer to user provided audio track/index structure,
26371  *		          specifying start/end addresses.
26372  *		flag	- this argument is a pass through to ddi_copyxxx()
26373  *		          directly from the mode argument of ioctl().
26374  *
26375  * Return Code: the code returned by sd_send_scsi_cmd()
26376  *		EFAULT if ddi_copyxxx() fails
26377  *		ENXIO if fail ddi_get_soft_state
26378  *		EINVAL if data pointer is NULL
26379  */
26380 
26381 static int
26382 sr_play_trkind(dev_t dev, caddr_t data, int flag)
26383 {
26384 	struct cdrom_ti		ti_struct;
26385 	struct cdrom_ti		*ti = &ti_struct;
26386 	struct uscsi_cmd	*com = NULL;
26387 	char			cdb[CDB_GROUP1];
26388 	int			rval;
26389 
26390 	if (data == NULL) {
26391 		return (EINVAL);
26392 	}
26393 
26394 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
26395 		return (EFAULT);
26396 	}
26397 
26398 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26399 	bzero(cdb, CDB_GROUP1);
26400 	cdb[0] = SCMD_PLAYAUDIO_TI;
26401 	cdb[4] = ti->cdti_trk0;
26402 	cdb[5] = ti->cdti_ind0;
26403 	cdb[7] = ti->cdti_trk1;
26404 	cdb[8] = ti->cdti_ind1;
26405 	com->uscsi_cdb    = cdb;
26406 	com->uscsi_cdblen = CDB_GROUP1;
26407 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
26408 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26409 	    SD_PATH_STANDARD);
26410 	kmem_free(com, sizeof (*com));
26411 	return (rval);
26412 }
26413 
26414 
26415 /*
26416  *    Function: sr_read_all_subcodes()
26417  *
26418  * Description: This routine is the driver entry point for handling CD-ROM
26419  *		ioctl requests to return raw subcode data while the target is
26420  *		playing audio (CDROMSUBCODE).
26421  *
26422  *   Arguments: dev	- the device 'dev_t'
26423  *		data	- pointer to user provided cdrom subcode structure,
26424  *		          specifying the transfer length and address.
26425  *		flag	- this argument is a pass through to ddi_copyxxx()
26426  *		          directly from the mode argument of ioctl().
26427  *
26428  * Return Code: the code returned by sd_send_scsi_cmd()
26429  *		EFAULT if ddi_copyxxx() fails
26430  *		ENXIO if fail ddi_get_soft_state
26431  *		EINVAL if data pointer is NULL
26432  */
26433 
26434 static int
26435 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
26436 {
26437 	struct sd_lun		*un = NULL;
26438 	struct uscsi_cmd	*com = NULL;
26439 	struct cdrom_subcode	*subcode = NULL;
26440 	int			rval;
26441 	size_t			buflen;
26442 	char			cdb[CDB_GROUP5];
26443 
26444 #ifdef _MULTI_DATAMODEL
26445 	/* To support ILP32 applications in an LP64 world */
26446 	struct cdrom_subcode32		cdrom_subcode32;
26447 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
26448 #endif
26449 	if (data == NULL) {
26450 		return (EINVAL);
26451 	}
26452 
26453 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26454 		return (ENXIO);
26455 	}
26456 
26457 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
26458 
26459 #ifdef _MULTI_DATAMODEL
26460 	switch (ddi_model_convert_from(flag & FMODELS)) {
26461 	case DDI_MODEL_ILP32:
26462 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
26463 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26464 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
26465 			kmem_free(subcode, sizeof (struct cdrom_subcode));
26466 			return (EFAULT);
26467 		}
26468 		/* Convert the ILP32 uscsi data from the application to LP64 */
26469 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
26470 		break;
26471 	case DDI_MODEL_NONE:
26472 		if (ddi_copyin(data, subcode,
26473 		    sizeof (struct cdrom_subcode), flag)) {
26474 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26475 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
26476 			kmem_free(subcode, sizeof (struct cdrom_subcode));
26477 			return (EFAULT);
26478 		}
26479 		break;
26480 	}
26481 #else /* ! _MULTI_DATAMODEL */
26482 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
26483 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26484 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
26485 		kmem_free(subcode, sizeof (struct cdrom_subcode));
26486 		return (EFAULT);
26487 	}
26488 #endif /* _MULTI_DATAMODEL */
26489 
26490 	/*
26491 	 * Since MMC-2 expects max 3 bytes for length, check if the
26492 	 * length input is greater than 3 bytes
26493 	 */
26494 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
26495 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26496 		    "sr_read_all_subcodes: "
26497 		    "cdrom transfer length too large: %d (limit %d)\n",
26498 		    subcode->cdsc_length, 0xFFFFFF);
26499 		kmem_free(subcode, sizeof (struct cdrom_subcode));
26500 		return (EINVAL);
26501 	}
26502 
26503 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
26504 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26505 	bzero(cdb, CDB_GROUP5);
26506 
26507 	if (un->un_f_mmc_cap == TRUE) {
26508 		cdb[0] = (char)SCMD_READ_CD;
26509 		cdb[2] = (char)0xff;
26510 		cdb[3] = (char)0xff;
26511 		cdb[4] = (char)0xff;
26512 		cdb[5] = (char)0xff;
26513 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
26514 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
26515 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
26516 		cdb[10] = 1;
26517 	} else {
26518 		/*
26519 		 * Note: A vendor specific command (0xDF) is being used her to
26520 		 * request a read of all subcodes.
26521 		 */
26522 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
26523 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
26524 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
26525 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
26526 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
26527 	}
26528 	com->uscsi_cdb	   = cdb;
26529 	com->uscsi_cdblen  = CDB_GROUP5;
26530 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
26531 	com->uscsi_buflen  = buflen;
26532 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
26533 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
26534 	    SD_PATH_STANDARD);
26535 	kmem_free(subcode, sizeof (struct cdrom_subcode));
26536 	kmem_free(com, sizeof (*com));
26537 	return (rval);
26538 }
26539 
26540 
26541 /*
26542  *    Function: sr_read_subchannel()
26543  *
26544  * Description: This routine is the driver entry point for handling CD-ROM
26545  *		ioctl requests to return the Q sub-channel data of the CD
26546  *		current position block. (CDROMSUBCHNL) The data includes the
26547  *		track number, index number, absolute CD-ROM address (LBA or MSF
26548  *		format per the user) , track relative CD-ROM address (LBA or MSF
26549  *		format per the user), control data and audio status.
26550  *
26551  *   Arguments: dev	- the device 'dev_t'
26552  *		data	- pointer to user provided cdrom sub-channel structure
26553  *		flag	- this argument is a pass through to ddi_copyxxx()
26554  *		          directly from the mode argument of ioctl().
26555  *
26556  * Return Code: the code returned by sd_send_scsi_cmd()
26557  *		EFAULT if ddi_copyxxx() fails
26558  *		ENXIO if fail ddi_get_soft_state
26559  *		EINVAL if data pointer is NULL
26560  */
26561 
26562 static int
26563 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
26564 {
26565 	struct sd_lun		*un;
26566 	struct uscsi_cmd	*com;
26567 	struct cdrom_subchnl	subchanel;
26568 	struct cdrom_subchnl	*subchnl = &subchanel;
26569 	char			cdb[CDB_GROUP1];
26570 	caddr_t			buffer;
26571 	int			rval;
26572 
26573 	if (data == NULL) {
26574 		return (EINVAL);
26575 	}
26576 
26577 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
26578 	    (un->un_state == SD_STATE_OFFLINE)) {
26579 		return (ENXIO);
26580 	}
26581 
26582 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
26583 		return (EFAULT);
26584 	}
26585 
26586 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
26587 	bzero(cdb, CDB_GROUP1);
26588 	cdb[0] = SCMD_READ_SUBCHANNEL;
26589 	/* Set the MSF bit based on the user requested address format */
26590 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
26591 	/*
26592 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
26593 	 * returned
26594 	 */
26595 	cdb[2] = 0x40;
26596 	/*
26597 	 * Set byte 3 to specify the return data format. A value of 0x01
26598 	 * indicates that the CD-ROM current position should be returned.
26599 	 */
26600 	cdb[3] = 0x01;
26601 	cdb[8] = 0x10;
26602 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26603 	com->uscsi_cdb	   = cdb;
26604 	com->uscsi_cdblen  = CDB_GROUP1;
26605 	com->uscsi_bufaddr = buffer;
26606 	com->uscsi_buflen  = 16;
26607 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
26608 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26609 	    SD_PATH_STANDARD);
26610 	if (rval != 0) {
26611 		kmem_free(buffer, 16);
26612 		kmem_free(com, sizeof (*com));
26613 		return (rval);
26614 	}
26615 
26616 	/* Process the returned Q sub-channel data */
26617 	subchnl->cdsc_audiostatus = buffer[1];
26618 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
26619 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
26620 	subchnl->cdsc_trk	= buffer[6];
26621 	subchnl->cdsc_ind	= buffer[7];
26622 	if (subchnl->cdsc_format & CDROM_LBA) {
26623 		subchnl->cdsc_absaddr.lba =
26624 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
26625 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
26626 		subchnl->cdsc_reladdr.lba =
26627 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
26628 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
26629 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
26630 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
26631 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
26632 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
26633 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
26634 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
26635 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
26636 	} else {
26637 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
26638 		subchnl->cdsc_absaddr.msf.second = buffer[10];
26639 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
26640 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
26641 		subchnl->cdsc_reladdr.msf.second = buffer[14];
26642 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
26643 	}
26644 	kmem_free(buffer, 16);
26645 	kmem_free(com, sizeof (*com));
26646 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
26647 	    != 0) {
26648 		return (EFAULT);
26649 	}
26650 	return (rval);
26651 }
26652 
26653 
26654 /*
26655  *    Function: sr_read_tocentry()
26656  *
26657  * Description: This routine is the driver entry point for handling CD-ROM
26658  *		ioctl requests to read from the Table of Contents (TOC)
26659  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
26660  *		fields, the starting address (LBA or MSF format per the user)
26661  *		and the data mode if the user specified track is a data track.
26662  *
26663  *		Note: The READ HEADER (0x44) command used in this routine is
26664  *		obsolete per the SCSI MMC spec but still supported in the
26665  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
26666  *		therefore the command is still implemented in this routine.
26667  *
26668  *   Arguments: dev	- the device 'dev_t'
26669  *		data	- pointer to user provided toc entry structure,
26670  *			  specifying the track # and the address format
26671  *			  (LBA or MSF).
26672  *		flag	- this argument is a pass through to ddi_copyxxx()
26673  *		          directly from the mode argument of ioctl().
26674  *
26675  * Return Code: the code returned by sd_send_scsi_cmd()
26676  *		EFAULT if ddi_copyxxx() fails
26677  *		ENXIO if fail ddi_get_soft_state
26678  *		EINVAL if data pointer is NULL
26679  */
26680 
26681 static int
26682 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
26683 {
26684 	struct sd_lun		*un = NULL;
26685 	struct uscsi_cmd	*com;
26686 	struct cdrom_tocentry	toc_entry;
26687 	struct cdrom_tocentry	*entry = &toc_entry;
26688 	caddr_t			buffer;
26689 	int			rval;
26690 	char			cdb[CDB_GROUP1];
26691 
26692 	if (data == NULL) {
26693 		return (EINVAL);
26694 	}
26695 
26696 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
26697 	    (un->un_state == SD_STATE_OFFLINE)) {
26698 		return (ENXIO);
26699 	}
26700 
26701 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
26702 		return (EFAULT);
26703 	}
26704 
26705 	/* Validate the requested track and address format */
26706 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
26707 		return (EINVAL);
26708 	}
26709 
26710 	if (entry->cdte_track == 0) {
26711 		return (EINVAL);
26712 	}
26713 
26714 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
26715 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26716 	bzero(cdb, CDB_GROUP1);
26717 
26718 	cdb[0] = SCMD_READ_TOC;
26719 	/* Set the MSF bit based on the user requested address format  */
26720 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
26721 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
26722 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
26723 	} else {
26724 		cdb[6] = entry->cdte_track;
26725 	}
26726 
26727 	/*
26728 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
26729 	 * (4 byte TOC response header + 8 byte track descriptor)
26730 	 */
26731 	cdb[8] = 12;
26732 	com->uscsi_cdb	   = cdb;
26733 	com->uscsi_cdblen  = CDB_GROUP1;
26734 	com->uscsi_bufaddr = buffer;
26735 	com->uscsi_buflen  = 0x0C;
26736 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
26737 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26738 	    SD_PATH_STANDARD);
26739 	if (rval != 0) {
26740 		kmem_free(buffer, 12);
26741 		kmem_free(com, sizeof (*com));
26742 		return (rval);
26743 	}
26744 
26745 	/* Process the toc entry */
26746 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
26747 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
26748 	if (entry->cdte_format & CDROM_LBA) {
26749 		entry->cdte_addr.lba =
26750 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
26751 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
26752 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
26753 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
26754 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
26755 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
26756 		/*
26757 		 * Send a READ TOC command using the LBA address format to get
26758 		 * the LBA for the track requested so it can be used in the
26759 		 * READ HEADER request
26760 		 *
26761 		 * Note: The MSF bit of the READ HEADER command specifies the
26762 		 * output format. The block address specified in that command
26763 		 * must be in LBA format.
26764 		 */
26765 		cdb[1] = 0;
26766 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26767 		    SD_PATH_STANDARD);
26768 		if (rval != 0) {
26769 			kmem_free(buffer, 12);
26770 			kmem_free(com, sizeof (*com));
26771 			return (rval);
26772 		}
26773 	} else {
26774 		entry->cdte_addr.msf.minute	= buffer[9];
26775 		entry->cdte_addr.msf.second	= buffer[10];
26776 		entry->cdte_addr.msf.frame	= buffer[11];
26777 		/*
26778 		 * Send a READ TOC command using the LBA address format to get
26779 		 * the LBA for the track requested so it can be used in the
26780 		 * READ HEADER request
26781 		 *
26782 		 * Note: The MSF bit of the READ HEADER command specifies the
26783 		 * output format. The block address specified in that command
26784 		 * must be in LBA format.
26785 		 */
26786 		cdb[1] = 0;
26787 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26788 		    SD_PATH_STANDARD);
26789 		if (rval != 0) {
26790 			kmem_free(buffer, 12);
26791 			kmem_free(com, sizeof (*com));
26792 			return (rval);
26793 		}
26794 	}
26795 
26796 	/*
26797 	 * Build and send the READ HEADER command to determine the data mode of
26798 	 * the user specified track.
26799 	 */
26800 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
26801 	    (entry->cdte_track != CDROM_LEADOUT)) {
26802 		bzero(cdb, CDB_GROUP1);
26803 		cdb[0] = SCMD_READ_HEADER;
26804 		cdb[2] = buffer[8];
26805 		cdb[3] = buffer[9];
26806 		cdb[4] = buffer[10];
26807 		cdb[5] = buffer[11];
26808 		cdb[8] = 0x08;
26809 		com->uscsi_buflen = 0x08;
26810 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26811 		    SD_PATH_STANDARD);
26812 		if (rval == 0) {
26813 			entry->cdte_datamode = buffer[0];
26814 		} else {
26815 			/*
26816 			 * READ HEADER command failed, since this is
26817 			 * obsoleted in one spec, its better to return
26818 			 * -1 for an invlid track so that we can still
26819 			 * receive the rest of the TOC data.
26820 			 */
26821 			entry->cdte_datamode = (uchar_t)-1;
26822 		}
26823 	} else {
26824 		entry->cdte_datamode = (uchar_t)-1;
26825 	}
26826 
26827 	kmem_free(buffer, 12);
26828 	kmem_free(com, sizeof (*com));
26829 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
26830 		return (EFAULT);
26831 
26832 	return (rval);
26833 }
26834 
26835 
26836 /*
26837  *    Function: sr_read_tochdr()
26838  *
26839  * Description: This routine is the driver entry point for handling CD-ROM
26840  * 		ioctl requests to read the Table of Contents (TOC) header
26841  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
26842  *		and ending track numbers
26843  *
26844  *   Arguments: dev	- the device 'dev_t'
26845  *		data	- pointer to user provided toc header structure,
26846  *			  specifying the starting and ending track numbers.
26847  *		flag	- this argument is a pass through to ddi_copyxxx()
26848  *			  directly from the mode argument of ioctl().
26849  *
26850  * Return Code: the code returned by sd_send_scsi_cmd()
26851  *		EFAULT if ddi_copyxxx() fails
26852  *		ENXIO if fail ddi_get_soft_state
26853  *		EINVAL if data pointer is NULL
26854  */
26855 
26856 static int
26857 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
26858 {
26859 	struct sd_lun		*un;
26860 	struct uscsi_cmd	*com;
26861 	struct cdrom_tochdr	toc_header;
26862 	struct cdrom_tochdr	*hdr = &toc_header;
26863 	char			cdb[CDB_GROUP1];
26864 	int			rval;
26865 	caddr_t			buffer;
26866 
26867 	if (data == NULL) {
26868 		return (EINVAL);
26869 	}
26870 
26871 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
26872 	    (un->un_state == SD_STATE_OFFLINE)) {
26873 		return (ENXIO);
26874 	}
26875 
26876 	buffer = kmem_zalloc(4, KM_SLEEP);
26877 	bzero(cdb, CDB_GROUP1);
26878 	cdb[0] = SCMD_READ_TOC;
26879 	/*
26880 	 * Specifying a track number of 0x00 in the READ TOC command indicates
26881 	 * that the TOC header should be returned
26882 	 */
26883 	cdb[6] = 0x00;
26884 	/*
26885 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
26886 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
26887 	 */
26888 	cdb[8] = 0x04;
26889 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26890 	com->uscsi_cdb	   = cdb;
26891 	com->uscsi_cdblen  = CDB_GROUP1;
26892 	com->uscsi_bufaddr = buffer;
26893 	com->uscsi_buflen  = 0x04;
26894 	com->uscsi_timeout = 300;
26895 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
26896 
26897 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26898 	    SD_PATH_STANDARD);
26899 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
26900 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
26901 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
26902 	} else {
26903 		hdr->cdth_trk0 = buffer[2];
26904 		hdr->cdth_trk1 = buffer[3];
26905 	}
26906 	kmem_free(buffer, 4);
26907 	kmem_free(com, sizeof (*com));
26908 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
26909 		return (EFAULT);
26910 	}
26911 	return (rval);
26912 }
26913 
26914 
26915 /*
26916  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
26917  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
26918  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
26919  * digital audio and extended architecture digital audio. These modes are
26920  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
26921  * MMC specs.
26922  *
26923  * In addition to support for the various data formats these routines also
26924  * include support for devices that implement only the direct access READ
26925  * commands (0x08, 0x28), devices that implement the READ_CD commands
26926  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
26927  * READ CDXA commands (0xD8, 0xDB)
26928  */
26929 
26930 /*
26931  *    Function: sr_read_mode1()
26932  *
26933  * Description: This routine is the driver entry point for handling CD-ROM
26934  *		ioctl read mode1 requests (CDROMREADMODE1).
26935  *
26936  *   Arguments: dev	- the device 'dev_t'
26937  *		data	- pointer to user provided cd read structure specifying
26938  *			  the lba buffer address and length.
26939  *		flag	- this argument is a pass through to ddi_copyxxx()
26940  *			  directly from the mode argument of ioctl().
26941  *
26942  * Return Code: the code returned by sd_send_scsi_cmd()
26943  *		EFAULT if ddi_copyxxx() fails
26944  *		ENXIO if fail ddi_get_soft_state
26945  *		EINVAL if data pointer is NULL
26946  */
26947 
26948 static int
26949 sr_read_mode1(dev_t dev, caddr_t data, int flag)
26950 {
26951 	struct sd_lun		*un;
26952 	struct cdrom_read	mode1_struct;
26953 	struct cdrom_read	*mode1 = &mode1_struct;
26954 	int			rval;
26955 	sd_ssc_t		*ssc;
26956 
26957 #ifdef _MULTI_DATAMODEL
26958 	/* To support ILP32 applications in an LP64 world */
26959 	struct cdrom_read32	cdrom_read32;
26960 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
26961 #endif /* _MULTI_DATAMODEL */
26962 
26963 	if (data == NULL) {
26964 		return (EINVAL);
26965 	}
26966 
26967 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
26968 	    (un->un_state == SD_STATE_OFFLINE)) {
26969 		return (ENXIO);
26970 	}
26971 
26972 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
26973 	    "sd_read_mode1: entry: un:0x%p\n", un);
26974 
26975 #ifdef _MULTI_DATAMODEL
26976 	switch (ddi_model_convert_from(flag & FMODELS)) {
26977 	case DDI_MODEL_ILP32:
26978 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
26979 			return (EFAULT);
26980 		}
26981 		/* Convert the ILP32 uscsi data from the application to LP64 */
26982 		cdrom_read32tocdrom_read(cdrd32, mode1);
26983 		break;
26984 	case DDI_MODEL_NONE:
26985 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
26986 			return (EFAULT);
26987 		}
26988 	}
26989 #else /* ! _MULTI_DATAMODEL */
26990 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
26991 		return (EFAULT);
26992 	}
26993 #endif /* _MULTI_DATAMODEL */
26994 
26995 	ssc = sd_ssc_init(un);
26996 	rval = sd_send_scsi_READ(ssc, mode1->cdread_bufaddr,
26997 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
26998 	sd_ssc_fini(ssc);
26999 
27000 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27001 	    "sd_read_mode1: exit: un:0x%p\n", un);
27002 
27003 	return (rval);
27004 }
27005 
27006 
27007 /*
27008  *    Function: sr_read_cd_mode2()
27009  *
27010  * Description: This routine is the driver entry point for handling CD-ROM
27011  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
27012  *		support the READ CD (0xBE) command or the 1st generation
27013  *		READ CD (0xD4) command.
27014  *
27015  *   Arguments: dev	- the device 'dev_t'
27016  *		data	- pointer to user provided cd read structure specifying
27017  *			  the lba buffer address and length.
27018  *		flag	- this argument is a pass through to ddi_copyxxx()
27019  *			  directly from the mode argument of ioctl().
27020  *
27021  * Return Code: the code returned by sd_send_scsi_cmd()
27022  *		EFAULT if ddi_copyxxx() fails
27023  *		ENXIO if fail ddi_get_soft_state
27024  *		EINVAL if data pointer is NULL
27025  */
27026 
27027 static int
27028 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
27029 {
27030 	struct sd_lun		*un;
27031 	struct uscsi_cmd	*com;
27032 	struct cdrom_read	mode2_struct;
27033 	struct cdrom_read	*mode2 = &mode2_struct;
27034 	uchar_t			cdb[CDB_GROUP5];
27035 	int			nblocks;
27036 	int			rval;
27037 #ifdef _MULTI_DATAMODEL
27038 	/*  To support ILP32 applications in an LP64 world */
27039 	struct cdrom_read32	cdrom_read32;
27040 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27041 #endif /* _MULTI_DATAMODEL */
27042 
27043 	if (data == NULL) {
27044 		return (EINVAL);
27045 	}
27046 
27047 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27048 	    (un->un_state == SD_STATE_OFFLINE)) {
27049 		return (ENXIO);
27050 	}
27051 
27052 #ifdef _MULTI_DATAMODEL
27053 	switch (ddi_model_convert_from(flag & FMODELS)) {
27054 	case DDI_MODEL_ILP32:
27055 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
27056 			return (EFAULT);
27057 		}
27058 		/* Convert the ILP32 uscsi data from the application to LP64 */
27059 		cdrom_read32tocdrom_read(cdrd32, mode2);
27060 		break;
27061 	case DDI_MODEL_NONE:
27062 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27063 			return (EFAULT);
27064 		}
27065 		break;
27066 	}
27067 
27068 #else /* ! _MULTI_DATAMODEL */
27069 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27070 		return (EFAULT);
27071 	}
27072 #endif /* _MULTI_DATAMODEL */
27073 
27074 	bzero(cdb, sizeof (cdb));
27075 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
27076 		/* Read command supported by 1st generation atapi drives */
27077 		cdb[0] = SCMD_READ_CDD4;
27078 	} else {
27079 		/* Universal CD Access Command */
27080 		cdb[0] = SCMD_READ_CD;
27081 	}
27082 
27083 	/*
27084 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
27085 	 */
27086 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
27087 
27088 	/* set the start address */
27089 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
27090 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
27091 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
27092 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
27093 
27094 	/* set the transfer length */
27095 	nblocks = mode2->cdread_buflen / 2336;
27096 	cdb[6] = (uchar_t)(nblocks >> 16);
27097 	cdb[7] = (uchar_t)(nblocks >> 8);
27098 	cdb[8] = (uchar_t)nblocks;
27099 
27100 	/* set the filter bits */
27101 	cdb[9] = CDROM_READ_CD_USERDATA;
27102 
27103 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27104 	com->uscsi_cdb = (caddr_t)cdb;
27105 	com->uscsi_cdblen = sizeof (cdb);
27106 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
27107 	com->uscsi_buflen = mode2->cdread_buflen;
27108 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27109 
27110 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27111 	    SD_PATH_STANDARD);
27112 	kmem_free(com, sizeof (*com));
27113 	return (rval);
27114 }
27115 
27116 
27117 /*
27118  *    Function: sr_read_mode2()
27119  *
27120  * Description: This routine is the driver entry point for handling CD-ROM
27121  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
27122  *		do not support the READ CD (0xBE) command.
27123  *
27124  *   Arguments: dev	- the device 'dev_t'
27125  *		data	- pointer to user provided cd read structure specifying
27126  *			  the lba buffer address and length.
27127  *		flag	- this argument is a pass through to ddi_copyxxx()
27128  *			  directly from the mode argument of ioctl().
27129  *
27130  * Return Code: the code returned by sd_send_scsi_cmd()
27131  *		EFAULT if ddi_copyxxx() fails
27132  *		ENXIO if fail ddi_get_soft_state
27133  *		EINVAL if data pointer is NULL
27134  *		EIO if fail to reset block size
27135  *		EAGAIN if commands are in progress in the driver
27136  */
27137 
27138 static int
27139 sr_read_mode2(dev_t dev, caddr_t data, int flag)
27140 {
27141 	struct sd_lun		*un;
27142 	struct cdrom_read	mode2_struct;
27143 	struct cdrom_read	*mode2 = &mode2_struct;
27144 	int			rval;
27145 	uint32_t		restore_blksize;
27146 	struct uscsi_cmd	*com;
27147 	uchar_t			cdb[CDB_GROUP0];
27148 	int			nblocks;
27149 
27150 #ifdef _MULTI_DATAMODEL
27151 	/* To support ILP32 applications in an LP64 world */
27152 	struct cdrom_read32	cdrom_read32;
27153 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27154 #endif /* _MULTI_DATAMODEL */
27155 
27156 	if (data == NULL) {
27157 		return (EINVAL);
27158 	}
27159 
27160 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27161 	    (un->un_state == SD_STATE_OFFLINE)) {
27162 		return (ENXIO);
27163 	}
27164 
27165 	/*
27166 	 * Because this routine will update the device and driver block size
27167 	 * being used we want to make sure there are no commands in progress.
27168 	 * If commands are in progress the user will have to try again.
27169 	 *
27170 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
27171 	 * in sdioctl to protect commands from sdioctl through to the top of
27172 	 * sd_uscsi_strategy. See sdioctl for details.
27173 	 */
27174 	mutex_enter(SD_MUTEX(un));
27175 	if (un->un_ncmds_in_driver != 1) {
27176 		mutex_exit(SD_MUTEX(un));
27177 		return (EAGAIN);
27178 	}
27179 	mutex_exit(SD_MUTEX(un));
27180 
27181 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27182 	    "sd_read_mode2: entry: un:0x%p\n", un);
27183 
27184 #ifdef _MULTI_DATAMODEL
27185 	switch (ddi_model_convert_from(flag & FMODELS)) {
27186 	case DDI_MODEL_ILP32:
27187 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
27188 			return (EFAULT);
27189 		}
27190 		/* Convert the ILP32 uscsi data from the application to LP64 */
27191 		cdrom_read32tocdrom_read(cdrd32, mode2);
27192 		break;
27193 	case DDI_MODEL_NONE:
27194 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27195 			return (EFAULT);
27196 		}
27197 		break;
27198 	}
27199 #else /* ! _MULTI_DATAMODEL */
27200 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
27201 		return (EFAULT);
27202 	}
27203 #endif /* _MULTI_DATAMODEL */
27204 
27205 	/* Store the current target block size for restoration later */
27206 	restore_blksize = un->un_tgt_blocksize;
27207 
27208 	/* Change the device and soft state target block size to 2336 */
27209 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
27210 		rval = EIO;
27211 		goto done;
27212 	}
27213 
27214 
27215 	bzero(cdb, sizeof (cdb));
27216 
27217 	/* set READ operation */
27218 	cdb[0] = SCMD_READ;
27219 
27220 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
27221 	mode2->cdread_lba >>= 2;
27222 
27223 	/* set the start address */
27224 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
27225 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
27226 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
27227 
27228 	/* set the transfer length */
27229 	nblocks = mode2->cdread_buflen / 2336;
27230 	cdb[4] = (uchar_t)nblocks & 0xFF;
27231 
27232 	/* build command */
27233 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27234 	com->uscsi_cdb = (caddr_t)cdb;
27235 	com->uscsi_cdblen = sizeof (cdb);
27236 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
27237 	com->uscsi_buflen = mode2->cdread_buflen;
27238 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27239 
27240 	/*
27241 	 * Issue SCSI command with user space address for read buffer.
27242 	 *
27243 	 * This sends the command through main channel in the driver.
27244 	 *
27245 	 * Since this is accessed via an IOCTL call, we go through the
27246 	 * standard path, so that if the device was powered down, then
27247 	 * it would be 'awakened' to handle the command.
27248 	 */
27249 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27250 	    SD_PATH_STANDARD);
27251 
27252 	kmem_free(com, sizeof (*com));
27253 
27254 	/* Restore the device and soft state target block size */
27255 	if (sr_sector_mode(dev, restore_blksize) != 0) {
27256 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27257 		    "can't do switch back to mode 1\n");
27258 		/*
27259 		 * If sd_send_scsi_READ succeeded we still need to report
27260 		 * an error because we failed to reset the block size
27261 		 */
27262 		if (rval == 0) {
27263 			rval = EIO;
27264 		}
27265 	}
27266 
27267 done:
27268 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27269 	    "sd_read_mode2: exit: un:0x%p\n", un);
27270 
27271 	return (rval);
27272 }
27273 
27274 
27275 /*
27276  *    Function: sr_sector_mode()
27277  *
27278  * Description: This utility function is used by sr_read_mode2 to set the target
27279  *		block size based on the user specified size. This is a legacy
27280  *		implementation based upon a vendor specific mode page
27281  *
27282  *   Arguments: dev	- the device 'dev_t'
27283  *		data	- flag indicating if block size is being set to 2336 or
27284  *			  512.
27285  *
27286  * Return Code: the code returned by sd_send_scsi_cmd()
27287  *		EFAULT if ddi_copyxxx() fails
27288  *		ENXIO if fail ddi_get_soft_state
27289  *		EINVAL if data pointer is NULL
27290  */
27291 
27292 static int
27293 sr_sector_mode(dev_t dev, uint32_t blksize)
27294 {
27295 	struct sd_lun	*un;
27296 	uchar_t		*sense;
27297 	uchar_t		*select;
27298 	int		rval;
27299 	sd_ssc_t	*ssc;
27300 
27301 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27302 	    (un->un_state == SD_STATE_OFFLINE)) {
27303 		return (ENXIO);
27304 	}
27305 
27306 	sense = kmem_zalloc(20, KM_SLEEP);
27307 
27308 	/* Note: This is a vendor specific mode page (0x81) */
27309 	ssc = sd_ssc_init(un);
27310 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, 20, 0x81,
27311 	    SD_PATH_STANDARD);
27312 	sd_ssc_fini(ssc);
27313 	if (rval != 0) {
27314 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
27315 		    "sr_sector_mode: Mode Sense failed\n");
27316 		kmem_free(sense, 20);
27317 		return (rval);
27318 	}
27319 	select = kmem_zalloc(20, KM_SLEEP);
27320 	select[3] = 0x08;
27321 	select[10] = ((blksize >> 8) & 0xff);
27322 	select[11] = (blksize & 0xff);
27323 	select[12] = 0x01;
27324 	select[13] = 0x06;
27325 	select[14] = sense[14];
27326 	select[15] = sense[15];
27327 	if (blksize == SD_MODE2_BLKSIZE) {
27328 		select[14] |= 0x01;
27329 	}
27330 
27331 	ssc = sd_ssc_init(un);
27332 	rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select, 20,
27333 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27334 	sd_ssc_fini(ssc);
27335 	if (rval != 0) {
27336 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
27337 		    "sr_sector_mode: Mode Select failed\n");
27338 	} else {
27339 		/*
27340 		 * Only update the softstate block size if we successfully
27341 		 * changed the device block mode.
27342 		 */
27343 		mutex_enter(SD_MUTEX(un));
27344 		sd_update_block_info(un, blksize, 0);
27345 		mutex_exit(SD_MUTEX(un));
27346 	}
27347 	kmem_free(sense, 20);
27348 	kmem_free(select, 20);
27349 	return (rval);
27350 }
27351 
27352 
27353 /*
27354  *    Function: sr_read_cdda()
27355  *
27356  * Description: This routine is the driver entry point for handling CD-ROM
27357  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
27358  *		the target supports CDDA these requests are handled via a vendor
27359  *		specific command (0xD8) If the target does not support CDDA
27360  *		these requests are handled via the READ CD command (0xBE).
27361  *
27362  *   Arguments: dev	- the device 'dev_t'
27363  *		data	- pointer to user provided CD-DA structure specifying
27364  *			  the track starting address, transfer length, and
27365  *			  subcode options.
27366  *		flag	- this argument is a pass through to ddi_copyxxx()
27367  *			  directly from the mode argument of ioctl().
27368  *
27369  * Return Code: the code returned by sd_send_scsi_cmd()
27370  *		EFAULT if ddi_copyxxx() fails
27371  *		ENXIO if fail ddi_get_soft_state
27372  *		EINVAL if invalid arguments are provided
27373  *		ENOTTY
27374  */
27375 
27376 static int
27377 sr_read_cdda(dev_t dev, caddr_t data, int flag)
27378 {
27379 	struct sd_lun			*un;
27380 	struct uscsi_cmd		*com;
27381 	struct cdrom_cdda		*cdda;
27382 	int				rval;
27383 	size_t				buflen;
27384 	char				cdb[CDB_GROUP5];
27385 
27386 #ifdef _MULTI_DATAMODEL
27387 	/* To support ILP32 applications in an LP64 world */
27388 	struct cdrom_cdda32	cdrom_cdda32;
27389 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
27390 #endif /* _MULTI_DATAMODEL */
27391 
27392 	if (data == NULL) {
27393 		return (EINVAL);
27394 	}
27395 
27396 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27397 		return (ENXIO);
27398 	}
27399 
27400 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
27401 
27402 #ifdef _MULTI_DATAMODEL
27403 	switch (ddi_model_convert_from(flag & FMODELS)) {
27404 	case DDI_MODEL_ILP32:
27405 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
27406 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27407 			    "sr_read_cdda: ddi_copyin Failed\n");
27408 			kmem_free(cdda, sizeof (struct cdrom_cdda));
27409 			return (EFAULT);
27410 		}
27411 		/* Convert the ILP32 uscsi data from the application to LP64 */
27412 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
27413 		break;
27414 	case DDI_MODEL_NONE:
27415 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
27416 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27417 			    "sr_read_cdda: ddi_copyin Failed\n");
27418 			kmem_free(cdda, sizeof (struct cdrom_cdda));
27419 			return (EFAULT);
27420 		}
27421 		break;
27422 	}
27423 #else /* ! _MULTI_DATAMODEL */
27424 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
27425 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27426 		    "sr_read_cdda: ddi_copyin Failed\n");
27427 		kmem_free(cdda, sizeof (struct cdrom_cdda));
27428 		return (EFAULT);
27429 	}
27430 #endif /* _MULTI_DATAMODEL */
27431 
27432 	/*
27433 	 * Since MMC-2 expects max 3 bytes for length, check if the
27434 	 * length input is greater than 3 bytes
27435 	 */
27436 	if ((cdda->cdda_length & 0xFF000000) != 0) {
27437 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
27438 		    "cdrom transfer length too large: %d (limit %d)\n",
27439 		    cdda->cdda_length, 0xFFFFFF);
27440 		kmem_free(cdda, sizeof (struct cdrom_cdda));
27441 		return (EINVAL);
27442 	}
27443 
27444 	switch (cdda->cdda_subcode) {
27445 	case CDROM_DA_NO_SUBCODE:
27446 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
27447 		break;
27448 	case CDROM_DA_SUBQ:
27449 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
27450 		break;
27451 	case CDROM_DA_ALL_SUBCODE:
27452 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
27453 		break;
27454 	case CDROM_DA_SUBCODE_ONLY:
27455 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
27456 		break;
27457 	default:
27458 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27459 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
27460 		    cdda->cdda_subcode);
27461 		kmem_free(cdda, sizeof (struct cdrom_cdda));
27462 		return (EINVAL);
27463 	}
27464 
27465 	/* Build and send the command */
27466 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27467 	bzero(cdb, CDB_GROUP5);
27468 
27469 	if (un->un_f_cfg_cdda == TRUE) {
27470 		cdb[0] = (char)SCMD_READ_CD;
27471 		cdb[1] = 0x04;
27472 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
27473 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
27474 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
27475 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
27476 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
27477 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
27478 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
27479 		cdb[9] = 0x10;
27480 		switch (cdda->cdda_subcode) {
27481 		case CDROM_DA_NO_SUBCODE :
27482 			cdb[10] = 0x0;
27483 			break;
27484 		case CDROM_DA_SUBQ :
27485 			cdb[10] = 0x2;
27486 			break;
27487 		case CDROM_DA_ALL_SUBCODE :
27488 			cdb[10] = 0x1;
27489 			break;
27490 		case CDROM_DA_SUBCODE_ONLY :
27491 			/* FALLTHROUGH */
27492 		default :
27493 			kmem_free(cdda, sizeof (struct cdrom_cdda));
27494 			kmem_free(com, sizeof (*com));
27495 			return (ENOTTY);
27496 		}
27497 	} else {
27498 		cdb[0] = (char)SCMD_READ_CDDA;
27499 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
27500 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
27501 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
27502 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
27503 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
27504 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
27505 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
27506 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
27507 		cdb[10] = cdda->cdda_subcode;
27508 	}
27509 
27510 	com->uscsi_cdb = cdb;
27511 	com->uscsi_cdblen = CDB_GROUP5;
27512 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
27513 	com->uscsi_buflen = buflen;
27514 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27515 
27516 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27517 	    SD_PATH_STANDARD);
27518 
27519 	kmem_free(cdda, sizeof (struct cdrom_cdda));
27520 	kmem_free(com, sizeof (*com));
27521 	return (rval);
27522 }
27523 
27524 
27525 /*
27526  *    Function: sr_read_cdxa()
27527  *
27528  * Description: This routine is the driver entry point for handling CD-ROM
27529  *		ioctl requests to return CD-XA (Extended Architecture) data.
27530  *		(CDROMCDXA).
27531  *
27532  *   Arguments: dev	- the device 'dev_t'
27533  *		data	- pointer to user provided CD-XA structure specifying
27534  *			  the data starting address, transfer length, and format
27535  *		flag	- this argument is a pass through to ddi_copyxxx()
27536  *			  directly from the mode argument of ioctl().
27537  *
27538  * Return Code: the code returned by sd_send_scsi_cmd()
27539  *		EFAULT if ddi_copyxxx() fails
27540  *		ENXIO if fail ddi_get_soft_state
27541  *		EINVAL if data pointer is NULL
27542  */
27543 
27544 static int
27545 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
27546 {
27547 	struct sd_lun		*un;
27548 	struct uscsi_cmd	*com;
27549 	struct cdrom_cdxa	*cdxa;
27550 	int			rval;
27551 	size_t			buflen;
27552 	char			cdb[CDB_GROUP5];
27553 	uchar_t			read_flags;
27554 
27555 #ifdef _MULTI_DATAMODEL
27556 	/* To support ILP32 applications in an LP64 world */
27557 	struct cdrom_cdxa32		cdrom_cdxa32;
27558 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
27559 #endif /* _MULTI_DATAMODEL */
27560 
27561 	if (data == NULL) {
27562 		return (EINVAL);
27563 	}
27564 
27565 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27566 		return (ENXIO);
27567 	}
27568 
27569 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
27570 
27571 #ifdef _MULTI_DATAMODEL
27572 	switch (ddi_model_convert_from(flag & FMODELS)) {
27573 	case DDI_MODEL_ILP32:
27574 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
27575 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
27576 			return (EFAULT);
27577 		}
27578 		/*
27579 		 * Convert the ILP32 uscsi data from the
27580 		 * application to LP64 for internal use.
27581 		 */
27582 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
27583 		break;
27584 	case DDI_MODEL_NONE:
27585 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
27586 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
27587 			return (EFAULT);
27588 		}
27589 		break;
27590 	}
27591 #else /* ! _MULTI_DATAMODEL */
27592 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
27593 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
27594 		return (EFAULT);
27595 	}
27596 #endif /* _MULTI_DATAMODEL */
27597 
27598 	/*
27599 	 * Since MMC-2 expects max 3 bytes for length, check if the
27600 	 * length input is greater than 3 bytes
27601 	 */
27602 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
27603 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
27604 		    "cdrom transfer length too large: %d (limit %d)\n",
27605 		    cdxa->cdxa_length, 0xFFFFFF);
27606 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
27607 		return (EINVAL);
27608 	}
27609 
27610 	switch (cdxa->cdxa_format) {
27611 	case CDROM_XA_DATA:
27612 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
27613 		read_flags = 0x10;
27614 		break;
27615 	case CDROM_XA_SECTOR_DATA:
27616 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
27617 		read_flags = 0xf8;
27618 		break;
27619 	case CDROM_XA_DATA_W_ERROR:
27620 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
27621 		read_flags = 0xfc;
27622 		break;
27623 	default:
27624 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27625 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
27626 		    cdxa->cdxa_format);
27627 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
27628 		return (EINVAL);
27629 	}
27630 
27631 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27632 	bzero(cdb, CDB_GROUP5);
27633 	if (un->un_f_mmc_cap == TRUE) {
27634 		cdb[0] = (char)SCMD_READ_CD;
27635 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
27636 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
27637 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
27638 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
27639 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
27640 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
27641 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
27642 		cdb[9] = (char)read_flags;
27643 	} else {
27644 		/*
27645 		 * Note: A vendor specific command (0xDB) is being used her to
27646 		 * request a read of all subcodes.
27647 		 */
27648 		cdb[0] = (char)SCMD_READ_CDXA;
27649 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
27650 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
27651 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
27652 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
27653 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
27654 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
27655 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
27656 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
27657 		cdb[10] = cdxa->cdxa_format;
27658 	}
27659 	com->uscsi_cdb	   = cdb;
27660 	com->uscsi_cdblen  = CDB_GROUP5;
27661 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
27662 	com->uscsi_buflen  = buflen;
27663 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27664 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27665 	    SD_PATH_STANDARD);
27666 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
27667 	kmem_free(com, sizeof (*com));
27668 	return (rval);
27669 }
27670 
27671 
27672 /*
27673  *    Function: sr_eject()
27674  *
27675  * Description: This routine is the driver entry point for handling CD-ROM
27676  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
27677  *
27678  *   Arguments: dev	- the device 'dev_t'
27679  *
27680  * Return Code: the code returned by sd_send_scsi_cmd()
27681  */
27682 
27683 static int
27684 sr_eject(dev_t dev)
27685 {
27686 	struct sd_lun	*un;
27687 	int		rval;
27688 	sd_ssc_t	*ssc;
27689 
27690 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27691 	    (un->un_state == SD_STATE_OFFLINE)) {
27692 		return (ENXIO);
27693 	}
27694 
27695 	/*
27696 	 * To prevent race conditions with the eject
27697 	 * command, keep track of an eject command as
27698 	 * it progresses. If we are already handling
27699 	 * an eject command in the driver for the given
27700 	 * unit and another request to eject is received
27701 	 * immediately return EAGAIN so we don't lose
27702 	 * the command if the current eject command fails.
27703 	 */
27704 	mutex_enter(SD_MUTEX(un));
27705 	if (un->un_f_ejecting == TRUE) {
27706 		mutex_exit(SD_MUTEX(un));
27707 		return (EAGAIN);
27708 	}
27709 	un->un_f_ejecting = TRUE;
27710 	mutex_exit(SD_MUTEX(un));
27711 
27712 	ssc = sd_ssc_init(un);
27713 	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
27714 	    SD_PATH_STANDARD);
27715 	sd_ssc_fini(ssc);
27716 
27717 	if (rval != 0) {
27718 		mutex_enter(SD_MUTEX(un));
27719 		un->un_f_ejecting = FALSE;
27720 		mutex_exit(SD_MUTEX(un));
27721 		return (rval);
27722 	}
27723 
27724 	ssc = sd_ssc_init(un);
27725 	rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_EJECT,
27726 	    SD_PATH_STANDARD);
27727 	sd_ssc_fini(ssc);
27728 
27729 	if (rval == 0) {
27730 		mutex_enter(SD_MUTEX(un));
27731 		sr_ejected(un);
27732 		un->un_mediastate = DKIO_EJECTED;
27733 		un->un_f_ejecting = FALSE;
27734 		cv_broadcast(&un->un_state_cv);
27735 		mutex_exit(SD_MUTEX(un));
27736 	} else {
27737 		mutex_enter(SD_MUTEX(un));
27738 		un->un_f_ejecting = FALSE;
27739 		mutex_exit(SD_MUTEX(un));
27740 	}
27741 	return (rval);
27742 }
27743 
27744 
27745 /*
27746  *    Function: sr_ejected()
27747  *
27748  * Description: This routine updates the soft state structure to invalidate the
27749  *		geometry information after the media has been ejected or a
27750  *		media eject has been detected.
27751  *
27752  *   Arguments: un - driver soft state (unit) structure
27753  */
27754 
27755 static void
27756 sr_ejected(struct sd_lun *un)
27757 {
27758 	struct sd_errstats *stp;
27759 
27760 	ASSERT(un != NULL);
27761 	ASSERT(mutex_owned(SD_MUTEX(un)));
27762 
27763 	un->un_f_blockcount_is_valid	= FALSE;
27764 	un->un_f_tgt_blocksize_is_valid	= FALSE;
27765 	mutex_exit(SD_MUTEX(un));
27766 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
27767 	mutex_enter(SD_MUTEX(un));
27768 
27769 	if (un->un_errstats != NULL) {
27770 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
27771 		stp->sd_capacity.value.ui64 = 0;
27772 	}
27773 }
27774 
27775 
27776 /*
27777  *    Function: sr_check_wp()
27778  *
27779  * Description: This routine checks the write protection of a removable
27780  *      media disk and hotpluggable devices via the write protect bit of
27781  *      the Mode Page Header device specific field. Some devices choke
27782  *      on unsupported mode page. In order to workaround this issue,
27783  *      this routine has been implemented to use 0x3f mode page(request
27784  *      for all pages) for all device types.
27785  *
27786  *   Arguments: dev             - the device 'dev_t'
27787  *
27788  * Return Code: int indicating if the device is write protected (1) or not (0)
27789  *
27790  *     Context: Kernel thread.
27791  *
27792  */
27793 
27794 static int
27795 sr_check_wp(dev_t dev)
27796 {
27797 	struct sd_lun	*un;
27798 	uchar_t		device_specific;
27799 	uchar_t		*sense;
27800 	int		hdrlen;
27801 	int		rval = FALSE;
27802 	int		status;
27803 	sd_ssc_t	*ssc;
27804 
27805 	/*
27806 	 * Note: The return codes for this routine should be reworked to
27807 	 * properly handle the case of a NULL softstate.
27808 	 */
27809 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27810 		return (FALSE);
27811 	}
27812 
27813 	if (un->un_f_cfg_is_atapi == TRUE) {
27814 		/*
27815 		 * The mode page contents are not required; set the allocation
27816 		 * length for the mode page header only
27817 		 */
27818 		hdrlen = MODE_HEADER_LENGTH_GRP2;
27819 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
27820 		ssc = sd_ssc_init(un);
27821 		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense, hdrlen,
27822 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
27823 		sd_ssc_fini(ssc);
27824 		if (status != 0)
27825 			goto err_exit;
27826 		device_specific =
27827 		    ((struct mode_header_grp2 *)sense)->device_specific;
27828 	} else {
27829 		hdrlen = MODE_HEADER_LENGTH;
27830 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
27831 		ssc = sd_ssc_init(un);
27832 		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, hdrlen,
27833 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
27834 		sd_ssc_fini(ssc);
27835 		if (status != 0)
27836 			goto err_exit;
27837 		device_specific =
27838 		    ((struct mode_header *)sense)->device_specific;
27839 	}
27840 
27841 
27842 	/*
27843 	 * Write protect mode sense failed; not all disks
27844 	 * understand this query. Return FALSE assuming that
27845 	 * these devices are not writable.
27846 	 */
27847 	if (device_specific & WRITE_PROTECT) {
27848 		rval = TRUE;
27849 	}
27850 
27851 err_exit:
27852 	kmem_free(sense, hdrlen);
27853 	return (rval);
27854 }
27855 
27856 /*
27857  *    Function: sr_volume_ctrl()
27858  *
27859  * Description: This routine is the driver entry point for handling CD-ROM
27860  *		audio output volume ioctl requests. (CDROMVOLCTRL)
27861  *
27862  *   Arguments: dev	- the device 'dev_t'
27863  *		data	- pointer to user audio volume control structure
27864  *		flag	- this argument is a pass through to ddi_copyxxx()
27865  *			  directly from the mode argument of ioctl().
27866  *
27867  * Return Code: the code returned by sd_send_scsi_cmd()
27868  *		EFAULT if ddi_copyxxx() fails
27869  *		ENXIO if fail ddi_get_soft_state
27870  *		EINVAL if data pointer is NULL
27871  *
27872  */
27873 
27874 static int
27875 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
27876 {
27877 	struct sd_lun		*un;
27878 	struct cdrom_volctrl    volume;
27879 	struct cdrom_volctrl    *vol = &volume;
27880 	uchar_t			*sense_page;
27881 	uchar_t			*select_page;
27882 	uchar_t			*sense;
27883 	uchar_t			*select;
27884 	int			sense_buflen;
27885 	int			select_buflen;
27886 	int			rval;
27887 	sd_ssc_t		*ssc;
27888 
27889 	if (data == NULL) {
27890 		return (EINVAL);
27891 	}
27892 
27893 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27894 	    (un->un_state == SD_STATE_OFFLINE)) {
27895 		return (ENXIO);
27896 	}
27897 
27898 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
27899 		return (EFAULT);
27900 	}
27901 
27902 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
27903 		struct mode_header_grp2		*sense_mhp;
27904 		struct mode_header_grp2		*select_mhp;
27905 		int				bd_len;
27906 
27907 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
27908 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
27909 		    MODEPAGE_AUDIO_CTRL_LEN;
27910 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
27911 		select = kmem_zalloc(select_buflen, KM_SLEEP);
27912 		ssc = sd_ssc_init(un);
27913 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
27914 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
27915 		    SD_PATH_STANDARD);
27916 		sd_ssc_fini(ssc);
27917 
27918 		if (rval != 0) {
27919 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
27920 			    "sr_volume_ctrl: Mode Sense Failed\n");
27921 			kmem_free(sense, sense_buflen);
27922 			kmem_free(select, select_buflen);
27923 			return (rval);
27924 		}
27925 		sense_mhp = (struct mode_header_grp2 *)sense;
27926 		select_mhp = (struct mode_header_grp2 *)select;
27927 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
27928 		    sense_mhp->bdesc_length_lo;
27929 		if (bd_len > MODE_BLK_DESC_LENGTH) {
27930 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27931 			    "sr_volume_ctrl: Mode Sense returned invalid "
27932 			    "block descriptor length\n");
27933 			kmem_free(sense, sense_buflen);
27934 			kmem_free(select, select_buflen);
27935 			return (EIO);
27936 		}
27937 		sense_page = (uchar_t *)
27938 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27939 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
27940 		select_mhp->length_msb = 0;
27941 		select_mhp->length_lsb = 0;
27942 		select_mhp->bdesc_length_hi = 0;
27943 		select_mhp->bdesc_length_lo = 0;
27944 	} else {
27945 		struct mode_header		*sense_mhp, *select_mhp;
27946 
27947 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
27948 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
27949 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
27950 		select = kmem_zalloc(select_buflen, KM_SLEEP);
27951 		ssc = sd_ssc_init(un);
27952 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
27953 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
27954 		    SD_PATH_STANDARD);
27955 		sd_ssc_fini(ssc);
27956 
27957 		if (rval != 0) {
27958 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27959 			    "sr_volume_ctrl: Mode Sense Failed\n");
27960 			kmem_free(sense, sense_buflen);
27961 			kmem_free(select, select_buflen);
27962 			return (rval);
27963 		}
27964 		sense_mhp  = (struct mode_header *)sense;
27965 		select_mhp = (struct mode_header *)select;
27966 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
27967 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27968 			    "sr_volume_ctrl: Mode Sense returned invalid "
27969 			    "block descriptor length\n");
27970 			kmem_free(sense, sense_buflen);
27971 			kmem_free(select, select_buflen);
27972 			return (EIO);
27973 		}
27974 		sense_page = (uchar_t *)
27975 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27976 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
27977 		select_mhp->length = 0;
27978 		select_mhp->bdesc_length = 0;
27979 	}
27980 	/*
27981 	 * Note: An audio control data structure could be created and overlayed
27982 	 * on the following in place of the array indexing method implemented.
27983 	 */
27984 
27985 	/* Build the select data for the user volume data */
27986 	select_page[0] = MODEPAGE_AUDIO_CTRL;
27987 	select_page[1] = 0xE;
27988 	/* Set the immediate bit */
27989 	select_page[2] = 0x04;
27990 	/* Zero out reserved fields */
27991 	select_page[3] = 0x00;
27992 	select_page[4] = 0x00;
27993 	/* Return sense data for fields not to be modified */
27994 	select_page[5] = sense_page[5];
27995 	select_page[6] = sense_page[6];
27996 	select_page[7] = sense_page[7];
27997 	/* Set the user specified volume levels for channel 0 and 1 */
27998 	select_page[8] = 0x01;
27999 	select_page[9] = vol->channel0;
28000 	select_page[10] = 0x02;
28001 	select_page[11] = vol->channel1;
28002 	/* Channel 2 and 3 are currently unsupported so return the sense data */
28003 	select_page[12] = sense_page[12];
28004 	select_page[13] = sense_page[13];
28005 	select_page[14] = sense_page[14];
28006 	select_page[15] = sense_page[15];
28007 
28008 	ssc = sd_ssc_init(un);
28009 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
28010 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP1, select,
28011 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
28012 	} else {
28013 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
28014 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
28015 	}
28016 	sd_ssc_fini(ssc);
28017 
28018 	kmem_free(sense, sense_buflen);
28019 	kmem_free(select, select_buflen);
28020 	return (rval);
28021 }
28022 
28023 
28024 /*
28025  *    Function: sr_read_sony_session_offset()
28026  *
28027  * Description: This routine is the driver entry point for handling CD-ROM
28028  *		ioctl requests for session offset information. (CDROMREADOFFSET)
28029  *		The address of the first track in the last session of a
28030  *		multi-session CD-ROM is returned
28031  *
28032  *		Note: This routine uses a vendor specific key value in the
28033  *		command control field without implementing any vendor check here
28034  *		or in the ioctl routine.
28035  *
28036  *   Arguments: dev	- the device 'dev_t'
28037  *		data	- pointer to an int to hold the requested address
28038  *		flag	- this argument is a pass through to ddi_copyxxx()
28039  *			  directly from the mode argument of ioctl().
28040  *
28041  * Return Code: the code returned by sd_send_scsi_cmd()
28042  *		EFAULT if ddi_copyxxx() fails
28043  *		ENXIO if fail ddi_get_soft_state
28044  *		EINVAL if data pointer is NULL
28045  */
28046 
28047 static int
28048 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
28049 {
28050 	struct sd_lun		*un;
28051 	struct uscsi_cmd	*com;
28052 	caddr_t			buffer;
28053 	char			cdb[CDB_GROUP1];
28054 	int			session_offset = 0;
28055 	int			rval;
28056 
28057 	if (data == NULL) {
28058 		return (EINVAL);
28059 	}
28060 
28061 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28062 	    (un->un_state == SD_STATE_OFFLINE)) {
28063 		return (ENXIO);
28064 	}
28065 
28066 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
28067 	bzero(cdb, CDB_GROUP1);
28068 	cdb[0] = SCMD_READ_TOC;
28069 	/*
28070 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28071 	 * (4 byte TOC response header + 8 byte response data)
28072 	 */
28073 	cdb[8] = SONY_SESSION_OFFSET_LEN;
28074 	/* Byte 9 is the control byte. A vendor specific value is used */
28075 	cdb[9] = SONY_SESSION_OFFSET_KEY;
28076 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28077 	com->uscsi_cdb = cdb;
28078 	com->uscsi_cdblen = CDB_GROUP1;
28079 	com->uscsi_bufaddr = buffer;
28080 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
28081 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28082 
28083 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28084 	    SD_PATH_STANDARD);
28085 	if (rval != 0) {
28086 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
28087 		kmem_free(com, sizeof (*com));
28088 		return (rval);
28089 	}
28090 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
28091 		session_offset =
28092 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28093 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28094 		/*
28095 		 * Offset returned offset in current lbasize block's. Convert to
28096 		 * 2k block's to return to the user
28097 		 */
28098 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
28099 			session_offset >>= 2;
28100 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
28101 			session_offset >>= 1;
28102 		}
28103 	}
28104 
28105 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
28106 		rval = EFAULT;
28107 	}
28108 
28109 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
28110 	kmem_free(com, sizeof (*com));
28111 	return (rval);
28112 }
28113 
28114 
28115 /*
28116  *    Function: sd_wm_cache_constructor()
28117  *
28118  * Description: Cache Constructor for the wmap cache for the read/modify/write
28119  * 		devices.
28120  *
28121  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
28122  *		un	- sd_lun structure for the device.
28123  *		flag	- the km flags passed to constructor
28124  *
28125  * Return Code: 0 on success.
28126  *		-1 on failure.
28127  */
28128 
28129 /*ARGSUSED*/
28130 static int
28131 sd_wm_cache_constructor(void *wm, void *un, int flags)
28132 {
28133 	bzero(wm, sizeof (struct sd_w_map));
28134 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
28135 	return (0);
28136 }
28137 
28138 
28139 /*
28140  *    Function: sd_wm_cache_destructor()
28141  *
28142  * Description: Cache destructor for the wmap cache for the read/modify/write
28143  * 		devices.
28144  *
28145  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
28146  *		un	- sd_lun structure for the device.
28147  */
28148 /*ARGSUSED*/
28149 static void
28150 sd_wm_cache_destructor(void *wm, void *un)
28151 {
28152 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
28153 }
28154 
28155 
28156 /*
28157  *    Function: sd_range_lock()
28158  *
28159  * Description: Lock the range of blocks specified as parameter to ensure
28160  *		that read, modify write is atomic and no other i/o writes
28161  *		to the same location. The range is specified in terms
28162  *		of start and end blocks. Block numbers are the actual
28163  *		media block numbers and not system.
28164  *
28165  *   Arguments: un	- sd_lun structure for the device.
28166  *		startb - The starting block number
28167  *		endb - The end block number
28168  *		typ - type of i/o - simple/read_modify_write
28169  *
28170  * Return Code: wm  - pointer to the wmap structure.
28171  *
28172  *     Context: This routine can sleep.
28173  */
28174 
28175 static struct sd_w_map *
28176 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
28177 {
28178 	struct sd_w_map *wmp = NULL;
28179 	struct sd_w_map *sl_wmp = NULL;
28180 	struct sd_w_map *tmp_wmp;
28181 	wm_state state = SD_WM_CHK_LIST;
28182 
28183 
28184 	ASSERT(un != NULL);
28185 	ASSERT(!mutex_owned(SD_MUTEX(un)));
28186 
28187 	mutex_enter(SD_MUTEX(un));
28188 
28189 	while (state != SD_WM_DONE) {
28190 
28191 		switch (state) {
28192 		case SD_WM_CHK_LIST:
28193 			/*
28194 			 * This is the starting state. Check the wmap list
28195 			 * to see if the range is currently available.
28196 			 */
28197 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
28198 				/*
28199 				 * If this is a simple write and no rmw
28200 				 * i/o is pending then try to lock the
28201 				 * range as the range should be available.
28202 				 */
28203 				state = SD_WM_LOCK_RANGE;
28204 			} else {
28205 				tmp_wmp = sd_get_range(un, startb, endb);
28206 				if (tmp_wmp != NULL) {
28207 					if ((wmp != NULL) && ONLIST(un, wmp)) {
28208 						/*
28209 						 * Should not keep onlist wmps
28210 						 * while waiting this macro
28211 						 * will also do wmp = NULL;
28212 						 */
28213 						FREE_ONLIST_WMAP(un, wmp);
28214 					}
28215 					/*
28216 					 * sl_wmp is the wmap on which wait
28217 					 * is done, since the tmp_wmp points
28218 					 * to the inuse wmap, set sl_wmp to
28219 					 * tmp_wmp and change the state to sleep
28220 					 */
28221 					sl_wmp = tmp_wmp;
28222 					state = SD_WM_WAIT_MAP;
28223 				} else {
28224 					state = SD_WM_LOCK_RANGE;
28225 				}
28226 
28227 			}
28228 			break;
28229 
28230 		case SD_WM_LOCK_RANGE:
28231 			ASSERT(un->un_wm_cache);
28232 			/*
28233 			 * The range need to be locked, try to get a wmap.
28234 			 * First attempt it with NO_SLEEP, want to avoid a sleep
28235 			 * if possible as we will have to release the sd mutex
28236 			 * if we have to sleep.
28237 			 */
28238 			if (wmp == NULL)
28239 				wmp = kmem_cache_alloc(un->un_wm_cache,
28240 				    KM_NOSLEEP);
28241 			if (wmp == NULL) {
28242 				mutex_exit(SD_MUTEX(un));
28243 				_NOTE(DATA_READABLE_WITHOUT_LOCK
28244 				    (sd_lun::un_wm_cache))
28245 				wmp = kmem_cache_alloc(un->un_wm_cache,
28246 				    KM_SLEEP);
28247 				mutex_enter(SD_MUTEX(un));
28248 				/*
28249 				 * we released the mutex so recheck and go to
28250 				 * check list state.
28251 				 */
28252 				state = SD_WM_CHK_LIST;
28253 			} else {
28254 				/*
28255 				 * We exit out of state machine since we
28256 				 * have the wmap. Do the housekeeping first.
28257 				 * place the wmap on the wmap list if it is not
28258 				 * on it already and then set the state to done.
28259 				 */
28260 				wmp->wm_start = startb;
28261 				wmp->wm_end = endb;
28262 				wmp->wm_flags = typ | SD_WM_BUSY;
28263 				if (typ & SD_WTYPE_RMW) {
28264 					un->un_rmw_count++;
28265 				}
28266 				/*
28267 				 * If not already on the list then link
28268 				 */
28269 				if (!ONLIST(un, wmp)) {
28270 					wmp->wm_next = un->un_wm;
28271 					wmp->wm_prev = NULL;
28272 					if (wmp->wm_next)
28273 						wmp->wm_next->wm_prev = wmp;
28274 					un->un_wm = wmp;
28275 				}
28276 				state = SD_WM_DONE;
28277 			}
28278 			break;
28279 
28280 		case SD_WM_WAIT_MAP:
28281 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
28282 			/*
28283 			 * Wait is done on sl_wmp, which is set in the
28284 			 * check_list state.
28285 			 */
28286 			sl_wmp->wm_wanted_count++;
28287 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
28288 			sl_wmp->wm_wanted_count--;
28289 			/*
28290 			 * We can reuse the memory from the completed sl_wmp
28291 			 * lock range for our new lock, but only if noone is
28292 			 * waiting for it.
28293 			 */
28294 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
28295 			if (sl_wmp->wm_wanted_count == 0) {
28296 				if (wmp != NULL)
28297 					CHK_N_FREEWMP(un, wmp);
28298 				wmp = sl_wmp;
28299 			}
28300 			sl_wmp = NULL;
28301 			/*
28302 			 * After waking up, need to recheck for availability of
28303 			 * range.
28304 			 */
28305 			state = SD_WM_CHK_LIST;
28306 			break;
28307 
28308 		default:
28309 			panic("sd_range_lock: "
28310 			    "Unknown state %d in sd_range_lock", state);
28311 			/*NOTREACHED*/
28312 		} /* switch(state) */
28313 
28314 	} /* while(state != SD_WM_DONE) */
28315 
28316 	mutex_exit(SD_MUTEX(un));
28317 
28318 	ASSERT(wmp != NULL);
28319 
28320 	return (wmp);
28321 }
28322 
28323 
28324 /*
28325  *    Function: sd_get_range()
28326  *
28327  * Description: Find if there any overlapping I/O to this one
28328  *		Returns the write-map of 1st such I/O, NULL otherwise.
28329  *
28330  *   Arguments: un	- sd_lun structure for the device.
28331  *		startb - The starting block number
28332  *		endb - The end block number
28333  *
28334  * Return Code: wm  - pointer to the wmap structure.
28335  */
28336 
28337 static struct sd_w_map *
28338 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
28339 {
28340 	struct sd_w_map *wmp;
28341 
28342 	ASSERT(un != NULL);
28343 
28344 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
28345 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
28346 			continue;
28347 		}
28348 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
28349 			break;
28350 		}
28351 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
28352 			break;
28353 		}
28354 	}
28355 
28356 	return (wmp);
28357 }
28358 
28359 
28360 /*
28361  *    Function: sd_free_inlist_wmap()
28362  *
28363  * Description: Unlink and free a write map struct.
28364  *
28365  *   Arguments: un      - sd_lun structure for the device.
28366  *		wmp	- sd_w_map which needs to be unlinked.
28367  */
28368 
28369 static void
28370 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
28371 {
28372 	ASSERT(un != NULL);
28373 
28374 	if (un->un_wm == wmp) {
28375 		un->un_wm = wmp->wm_next;
28376 	} else {
28377 		wmp->wm_prev->wm_next = wmp->wm_next;
28378 	}
28379 
28380 	if (wmp->wm_next) {
28381 		wmp->wm_next->wm_prev = wmp->wm_prev;
28382 	}
28383 
28384 	wmp->wm_next = wmp->wm_prev = NULL;
28385 
28386 	kmem_cache_free(un->un_wm_cache, wmp);
28387 }
28388 
28389 
28390 /*
28391  *    Function: sd_range_unlock()
28392  *
28393  * Description: Unlock the range locked by wm.
28394  *		Free write map if nobody else is waiting on it.
28395  *
28396  *   Arguments: un      - sd_lun structure for the device.
28397  *              wmp     - sd_w_map which needs to be unlinked.
28398  */
28399 
28400 static void
28401 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
28402 {
28403 	ASSERT(un != NULL);
28404 	ASSERT(wm != NULL);
28405 	ASSERT(!mutex_owned(SD_MUTEX(un)));
28406 
28407 	mutex_enter(SD_MUTEX(un));
28408 
28409 	if (wm->wm_flags & SD_WTYPE_RMW) {
28410 		un->un_rmw_count--;
28411 	}
28412 
28413 	if (wm->wm_wanted_count) {
28414 		wm->wm_flags = 0;
28415 		/*
28416 		 * Broadcast that the wmap is available now.
28417 		 */
28418 		cv_broadcast(&wm->wm_avail);
28419 	} else {
28420 		/*
28421 		 * If no one is waiting on the map, it should be free'ed.
28422 		 */
28423 		sd_free_inlist_wmap(un, wm);
28424 	}
28425 
28426 	mutex_exit(SD_MUTEX(un));
28427 }
28428 
28429 
28430 /*
28431  *    Function: sd_read_modify_write_task
28432  *
28433  * Description: Called from a taskq thread to initiate the write phase of
28434  *		a read-modify-write request.  This is used for targets where
28435  *		un->un_sys_blocksize != un->un_tgt_blocksize.
28436  *
28437  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
28438  *
28439  *     Context: Called under taskq thread context.
28440  */
28441 
28442 static void
28443 sd_read_modify_write_task(void *arg)
28444 {
28445 	struct sd_mapblocksize_info	*bsp;
28446 	struct buf	*bp;
28447 	struct sd_xbuf	*xp;
28448 	struct sd_lun	*un;
28449 
28450 	bp = arg;	/* The bp is given in arg */
28451 	ASSERT(bp != NULL);
28452 
28453 	/* Get the pointer to the layer-private data struct */
28454 	xp = SD_GET_XBUF(bp);
28455 	ASSERT(xp != NULL);
28456 	bsp = xp->xb_private;
28457 	ASSERT(bsp != NULL);
28458 
28459 	un = SD_GET_UN(bp);
28460 	ASSERT(un != NULL);
28461 	ASSERT(!mutex_owned(SD_MUTEX(un)));
28462 
28463 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
28464 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
28465 
28466 	/*
28467 	 * This is the write phase of a read-modify-write request, called
28468 	 * under the context of a taskq thread in response to the completion
28469 	 * of the read portion of the rmw request completing under interrupt
28470 	 * context. The write request must be sent from here down the iostart
28471 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
28472 	 * we use the layer index saved in the layer-private data area.
28473 	 */
28474 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
28475 
28476 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
28477 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
28478 }
28479 
28480 
28481 /*
28482  *    Function: sddump_do_read_of_rmw()
28483  *
28484  * Description: This routine will be called from sddump, If sddump is called
28485  *		with an I/O which not aligned on device blocksize boundary
28486  *		then the write has to be converted to read-modify-write.
28487  *		Do the read part here in order to keep sddump simple.
28488  *		Note - That the sd_mutex is held across the call to this
28489  *		routine.
28490  *
28491  *   Arguments: un	- sd_lun
28492  *		blkno	- block number in terms of media block size.
28493  *		nblk	- number of blocks.
28494  *		bpp	- pointer to pointer to the buf structure. On return
28495  *			from this function, *bpp points to the valid buffer
28496  *			to which the write has to be done.
28497  *
28498  * Return Code: 0 for success or errno-type return code
28499  */
28500 
28501 static int
28502 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
28503 	struct buf **bpp)
28504 {
28505 	int err;
28506 	int i;
28507 	int rval;
28508 	struct buf *bp;
28509 	struct scsi_pkt *pkt = NULL;
28510 	uint32_t target_blocksize;
28511 
28512 	ASSERT(un != NULL);
28513 	ASSERT(mutex_owned(SD_MUTEX(un)));
28514 
28515 	target_blocksize = un->un_tgt_blocksize;
28516 
28517 	mutex_exit(SD_MUTEX(un));
28518 
28519 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
28520 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
28521 	if (bp == NULL) {
28522 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28523 		    "no resources for dumping; giving up");
28524 		err = ENOMEM;
28525 		goto done;
28526 	}
28527 
28528 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
28529 	    blkno, nblk);
28530 	if (rval != 0) {
28531 		scsi_free_consistent_buf(bp);
28532 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28533 		    "no resources for dumping; giving up");
28534 		err = ENOMEM;
28535 		goto done;
28536 	}
28537 
28538 	pkt->pkt_flags |= FLAG_NOINTR;
28539 
28540 	err = EIO;
28541 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
28542 
28543 		/*
28544 		 * Scsi_poll returns 0 (success) if the command completes and
28545 		 * the status block is STATUS_GOOD.  We should only check
28546 		 * errors if this condition is not true.  Even then we should
28547 		 * send our own request sense packet only if we have a check
28548 		 * condition and auto request sense has not been performed by
28549 		 * the hba.
28550 		 */
28551 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
28552 
28553 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
28554 			err = 0;
28555 			break;
28556 		}
28557 
28558 		/*
28559 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
28560 		 * no need to read RQS data.
28561 		 */
28562 		if (pkt->pkt_reason == CMD_DEV_GONE) {
28563 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28564 			    "Error while dumping state with rmw..."
28565 			    "Device is gone\n");
28566 			break;
28567 		}
28568 
28569 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
28570 			SD_INFO(SD_LOG_DUMP, un,
28571 			    "sddump: read failed with CHECK, try # %d\n", i);
28572 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
28573 				(void) sd_send_polled_RQS(un);
28574 			}
28575 
28576 			continue;
28577 		}
28578 
28579 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
28580 			int reset_retval = 0;
28581 
28582 			SD_INFO(SD_LOG_DUMP, un,
28583 			    "sddump: read failed with BUSY, try # %d\n", i);
28584 
28585 			if (un->un_f_lun_reset_enabled == TRUE) {
28586 				reset_retval = scsi_reset(SD_ADDRESS(un),
28587 				    RESET_LUN);
28588 			}
28589 			if (reset_retval == 0) {
28590 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
28591 			}
28592 			(void) sd_send_polled_RQS(un);
28593 
28594 		} else {
28595 			SD_INFO(SD_LOG_DUMP, un,
28596 			    "sddump: read failed with 0x%x, try # %d\n",
28597 			    SD_GET_PKT_STATUS(pkt), i);
28598 			mutex_enter(SD_MUTEX(un));
28599 			sd_reset_target(un, pkt);
28600 			mutex_exit(SD_MUTEX(un));
28601 		}
28602 
28603 		/*
28604 		 * If we are not getting anywhere with lun/target resets,
28605 		 * let's reset the bus.
28606 		 */
28607 		if (i > SD_NDUMP_RETRIES/2) {
28608 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
28609 			(void) sd_send_polled_RQS(un);
28610 		}
28611 
28612 	}
28613 	scsi_destroy_pkt(pkt);
28614 
28615 	if (err != 0) {
28616 		scsi_free_consistent_buf(bp);
28617 		*bpp = NULL;
28618 	} else {
28619 		*bpp = bp;
28620 	}
28621 
28622 done:
28623 	mutex_enter(SD_MUTEX(un));
28624 	return (err);
28625 }
28626 
28627 
28628 /*
28629  *    Function: sd_failfast_flushq
28630  *
28631  * Description: Take all bp's on the wait queue that have B_FAILFAST set
28632  *		in b_flags and move them onto the failfast queue, then kick
28633  *		off a thread to return all bp's on the failfast queue to
28634  *		their owners with an error set.
28635  *
28636  *   Arguments: un - pointer to the soft state struct for the instance.
28637  *
28638  *     Context: may execute in interrupt context.
28639  */
28640 
28641 static void
28642 sd_failfast_flushq(struct sd_lun *un)
28643 {
28644 	struct buf *bp;
28645 	struct buf *next_waitq_bp;
28646 	struct buf *prev_waitq_bp = NULL;
28647 
28648 	ASSERT(un != NULL);
28649 	ASSERT(mutex_owned(SD_MUTEX(un)));
28650 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
28651 	ASSERT(un->un_failfast_bp == NULL);
28652 
28653 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
28654 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
28655 
28656 	/*
28657 	 * Check if we should flush all bufs when entering failfast state, or
28658 	 * just those with B_FAILFAST set.
28659 	 */
28660 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
28661 		/*
28662 		 * Move *all* bp's on the wait queue to the failfast flush
28663 		 * queue, including those that do NOT have B_FAILFAST set.
28664 		 */
28665 		if (un->un_failfast_headp == NULL) {
28666 			ASSERT(un->un_failfast_tailp == NULL);
28667 			un->un_failfast_headp = un->un_waitq_headp;
28668 		} else {
28669 			ASSERT(un->un_failfast_tailp != NULL);
28670 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
28671 		}
28672 
28673 		un->un_failfast_tailp = un->un_waitq_tailp;
28674 
28675 		/* update kstat for each bp moved out of the waitq */
28676 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
28677 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
28678 		}
28679 
28680 		/* empty the waitq */
28681 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
28682 
28683 	} else {
28684 		/*
28685 		 * Go thru the wait queue, pick off all entries with
28686 		 * B_FAILFAST set, and move these onto the failfast queue.
28687 		 */
28688 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
28689 			/*
28690 			 * Save the pointer to the next bp on the wait queue,
28691 			 * so we get to it on the next iteration of this loop.
28692 			 */
28693 			next_waitq_bp = bp->av_forw;
28694 
28695 			/*
28696 			 * If this bp from the wait queue does NOT have
28697 			 * B_FAILFAST set, just move on to the next element
28698 			 * in the wait queue. Note, this is the only place
28699 			 * where it is correct to set prev_waitq_bp.
28700 			 */
28701 			if ((bp->b_flags & B_FAILFAST) == 0) {
28702 				prev_waitq_bp = bp;
28703 				continue;
28704 			}
28705 
28706 			/*
28707 			 * Remove the bp from the wait queue.
28708 			 */
28709 			if (bp == un->un_waitq_headp) {
28710 				/* The bp is the first element of the waitq. */
28711 				un->un_waitq_headp = next_waitq_bp;
28712 				if (un->un_waitq_headp == NULL) {
28713 					/* The wait queue is now empty */
28714 					un->un_waitq_tailp = NULL;
28715 				}
28716 			} else {
28717 				/*
28718 				 * The bp is either somewhere in the middle
28719 				 * or at the end of the wait queue.
28720 				 */
28721 				ASSERT(un->un_waitq_headp != NULL);
28722 				ASSERT(prev_waitq_bp != NULL);
28723 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
28724 				    == 0);
28725 				if (bp == un->un_waitq_tailp) {
28726 					/* bp is the last entry on the waitq. */
28727 					ASSERT(next_waitq_bp == NULL);
28728 					un->un_waitq_tailp = prev_waitq_bp;
28729 				}
28730 				prev_waitq_bp->av_forw = next_waitq_bp;
28731 			}
28732 			bp->av_forw = NULL;
28733 
28734 			/*
28735 			 * update kstat since the bp is moved out of
28736 			 * the waitq
28737 			 */
28738 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
28739 
28740 			/*
28741 			 * Now put the bp onto the failfast queue.
28742 			 */
28743 			if (un->un_failfast_headp == NULL) {
28744 				/* failfast queue is currently empty */
28745 				ASSERT(un->un_failfast_tailp == NULL);
28746 				un->un_failfast_headp =
28747 				    un->un_failfast_tailp = bp;
28748 			} else {
28749 				/* Add the bp to the end of the failfast q */
28750 				ASSERT(un->un_failfast_tailp != NULL);
28751 				ASSERT(un->un_failfast_tailp->b_flags &
28752 				    B_FAILFAST);
28753 				un->un_failfast_tailp->av_forw = bp;
28754 				un->un_failfast_tailp = bp;
28755 			}
28756 		}
28757 	}
28758 
28759 	/*
28760 	 * Now return all bp's on the failfast queue to their owners.
28761 	 */
28762 	while ((bp = un->un_failfast_headp) != NULL) {
28763 
28764 		un->un_failfast_headp = bp->av_forw;
28765 		if (un->un_failfast_headp == NULL) {
28766 			un->un_failfast_tailp = NULL;
28767 		}
28768 
28769 		/*
28770 		 * We want to return the bp with a failure error code, but
28771 		 * we do not want a call to sd_start_cmds() to occur here,
28772 		 * so use sd_return_failed_command_no_restart() instead of
28773 		 * sd_return_failed_command().
28774 		 */
28775 		sd_return_failed_command_no_restart(un, bp, EIO);
28776 	}
28777 
28778 	/* Flush the xbuf queues if required. */
28779 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
28780 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
28781 	}
28782 
28783 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
28784 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
28785 }
28786 
28787 
28788 /*
28789  *    Function: sd_failfast_flushq_callback
28790  *
28791  * Description: Return TRUE if the given bp meets the criteria for failfast
28792  *		flushing. Used with ddi_xbuf_flushq(9F).
28793  *
28794  *   Arguments: bp - ptr to buf struct to be examined.
28795  *
28796  *     Context: Any
28797  */
28798 
28799 static int
28800 sd_failfast_flushq_callback(struct buf *bp)
28801 {
28802 	/*
28803 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
28804 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
28805 	 */
28806 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
28807 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
28808 }
28809 
28810 
28811 
28812 /*
28813  * Function: sd_setup_next_xfer
28814  *
28815  * Description: Prepare next I/O operation using DMA_PARTIAL
28816  *
28817  */
28818 
28819 static int
28820 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
28821     struct scsi_pkt *pkt, struct sd_xbuf *xp)
28822 {
28823 	ssize_t	num_blks_not_xfered;
28824 	daddr_t	strt_blk_num;
28825 	ssize_t	bytes_not_xfered;
28826 	int	rval;
28827 
28828 	ASSERT(pkt->pkt_resid == 0);
28829 
28830 	/*
28831 	 * Calculate next block number and amount to be transferred.
28832 	 *
28833 	 * How much data NOT transfered to the HBA yet.
28834 	 */
28835 	bytes_not_xfered = xp->xb_dma_resid;
28836 
28837 	/*
28838 	 * figure how many blocks NOT transfered to the HBA yet.
28839 	 */
28840 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
28841 
28842 	/*
28843 	 * set starting block number to the end of what WAS transfered.
28844 	 */
28845 	strt_blk_num = xp->xb_blkno +
28846 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
28847 
28848 	/*
28849 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
28850 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
28851 	 * the disk mutex here.
28852 	 */
28853 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
28854 	    strt_blk_num, num_blks_not_xfered);
28855 
28856 	if (rval == 0) {
28857 
28858 		/*
28859 		 * Success.
28860 		 *
28861 		 * Adjust things if there are still more blocks to be
28862 		 * transfered.
28863 		 */
28864 		xp->xb_dma_resid = pkt->pkt_resid;
28865 		pkt->pkt_resid = 0;
28866 
28867 		return (1);
28868 	}
28869 
28870 	/*
28871 	 * There's really only one possible return value from
28872 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
28873 	 * returns NULL.
28874 	 */
28875 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
28876 
28877 	bp->b_resid = bp->b_bcount;
28878 	bp->b_flags |= B_ERROR;
28879 
28880 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28881 	    "Error setting up next portion of DMA transfer\n");
28882 
28883 	return (0);
28884 }
28885 
28886 /*
28887  *    Function: sd_panic_for_res_conflict
28888  *
28889  * Description: Call panic with a string formatted with "Reservation Conflict"
28890  *		and a human readable identifier indicating the SD instance
28891  *		that experienced the reservation conflict.
28892  *
28893  *   Arguments: un - pointer to the soft state struct for the instance.
28894  *
28895  *     Context: may execute in interrupt context.
28896  */
28897 
28898 #define	SD_RESV_CONFLICT_FMT_LEN 40
28899 void
28900 sd_panic_for_res_conflict(struct sd_lun *un)
28901 {
28902 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
28903 	char path_str[MAXPATHLEN];
28904 
28905 	(void) snprintf(panic_str, sizeof (panic_str),
28906 	    "Reservation Conflict\nDisk: %s",
28907 	    ddi_pathname(SD_DEVINFO(un), path_str));
28908 
28909 	panic(panic_str);
28910 }
28911 
28912 /*
28913  * Note: The following sd_faultinjection_ioctl( ) routines implement
28914  * driver support for handling fault injection for error analysis
28915  * causing faults in multiple layers of the driver.
28916  *
28917  */
28918 
28919 #ifdef SD_FAULT_INJECTION
28920 static uint_t   sd_fault_injection_on = 0;
28921 
28922 /*
28923  *    Function: sd_faultinjection_ioctl()
28924  *
28925  * Description: This routine is the driver entry point for handling
28926  *              faultinjection ioctls to inject errors into the
28927  *              layer model
28928  *
28929  *   Arguments: cmd	- the ioctl cmd received
28930  *		arg	- the arguments from user and returns
28931  */
28932 
28933 static void
28934 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
28935 
28936 	uint_t i = 0;
28937 	uint_t rval;
28938 
28939 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
28940 
28941 	mutex_enter(SD_MUTEX(un));
28942 
28943 	switch (cmd) {
28944 	case SDIOCRUN:
28945 		/* Allow pushed faults to be injected */
28946 		SD_INFO(SD_LOG_SDTEST, un,
28947 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
28948 
28949 		sd_fault_injection_on = 1;
28950 
28951 		SD_INFO(SD_LOG_IOERR, un,
28952 		    "sd_faultinjection_ioctl: run finished\n");
28953 		break;
28954 
28955 	case SDIOCSTART:
28956 		/* Start Injection Session */
28957 		SD_INFO(SD_LOG_SDTEST, un,
28958 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
28959 
28960 		sd_fault_injection_on = 0;
28961 		un->sd_injection_mask = 0xFFFFFFFF;
28962 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
28963 			un->sd_fi_fifo_pkt[i] = NULL;
28964 			un->sd_fi_fifo_xb[i] = NULL;
28965 			un->sd_fi_fifo_un[i] = NULL;
28966 			un->sd_fi_fifo_arq[i] = NULL;
28967 		}
28968 		un->sd_fi_fifo_start = 0;
28969 		un->sd_fi_fifo_end = 0;
28970 
28971 		mutex_enter(&(un->un_fi_mutex));
28972 		un->sd_fi_log[0] = '\0';
28973 		un->sd_fi_buf_len = 0;
28974 		mutex_exit(&(un->un_fi_mutex));
28975 
28976 		SD_INFO(SD_LOG_IOERR, un,
28977 		    "sd_faultinjection_ioctl: start finished\n");
28978 		break;
28979 
28980 	case SDIOCSTOP:
28981 		/* Stop Injection Session */
28982 		SD_INFO(SD_LOG_SDTEST, un,
28983 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
28984 		sd_fault_injection_on = 0;
28985 		un->sd_injection_mask = 0x0;
28986 
28987 		/* Empty stray or unuseds structs from fifo */
28988 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
28989 			if (un->sd_fi_fifo_pkt[i] != NULL) {
28990 				kmem_free(un->sd_fi_fifo_pkt[i],
28991 				    sizeof (struct sd_fi_pkt));
28992 			}
28993 			if (un->sd_fi_fifo_xb[i] != NULL) {
28994 				kmem_free(un->sd_fi_fifo_xb[i],
28995 				    sizeof (struct sd_fi_xb));
28996 			}
28997 			if (un->sd_fi_fifo_un[i] != NULL) {
28998 				kmem_free(un->sd_fi_fifo_un[i],
28999 				    sizeof (struct sd_fi_un));
29000 			}
29001 			if (un->sd_fi_fifo_arq[i] != NULL) {
29002 				kmem_free(un->sd_fi_fifo_arq[i],
29003 				    sizeof (struct sd_fi_arq));
29004 			}
29005 			un->sd_fi_fifo_pkt[i] = NULL;
29006 			un->sd_fi_fifo_un[i] = NULL;
29007 			un->sd_fi_fifo_xb[i] = NULL;
29008 			un->sd_fi_fifo_arq[i] = NULL;
29009 		}
29010 		un->sd_fi_fifo_start = 0;
29011 		un->sd_fi_fifo_end = 0;
29012 
29013 		SD_INFO(SD_LOG_IOERR, un,
29014 		    "sd_faultinjection_ioctl: stop finished\n");
29015 		break;
29016 
29017 	case SDIOCINSERTPKT:
29018 		/* Store a packet struct to be pushed onto fifo */
29019 		SD_INFO(SD_LOG_SDTEST, un,
29020 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
29021 
29022 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29023 
29024 		sd_fault_injection_on = 0;
29025 
29026 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
29027 		if (un->sd_fi_fifo_pkt[i] != NULL) {
29028 			kmem_free(un->sd_fi_fifo_pkt[i],
29029 			    sizeof (struct sd_fi_pkt));
29030 		}
29031 		if (arg != NULL) {
29032 			un->sd_fi_fifo_pkt[i] =
29033 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
29034 			if (un->sd_fi_fifo_pkt[i] == NULL) {
29035 				/* Alloc failed don't store anything */
29036 				break;
29037 			}
29038 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
29039 			    sizeof (struct sd_fi_pkt), 0);
29040 			if (rval == -1) {
29041 				kmem_free(un->sd_fi_fifo_pkt[i],
29042 				    sizeof (struct sd_fi_pkt));
29043 				un->sd_fi_fifo_pkt[i] = NULL;
29044 			}
29045 		} else {
29046 			SD_INFO(SD_LOG_IOERR, un,
29047 			    "sd_faultinjection_ioctl: pkt null\n");
29048 		}
29049 		break;
29050 
29051 	case SDIOCINSERTXB:
29052 		/* Store a xb struct to be pushed onto fifo */
29053 		SD_INFO(SD_LOG_SDTEST, un,
29054 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
29055 
29056 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29057 
29058 		sd_fault_injection_on = 0;
29059 
29060 		if (un->sd_fi_fifo_xb[i] != NULL) {
29061 			kmem_free(un->sd_fi_fifo_xb[i],
29062 			    sizeof (struct sd_fi_xb));
29063 			un->sd_fi_fifo_xb[i] = NULL;
29064 		}
29065 		if (arg != NULL) {
29066 			un->sd_fi_fifo_xb[i] =
29067 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
29068 			if (un->sd_fi_fifo_xb[i] == NULL) {
29069 				/* Alloc failed don't store anything */
29070 				break;
29071 			}
29072 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
29073 			    sizeof (struct sd_fi_xb), 0);
29074 
29075 			if (rval == -1) {
29076 				kmem_free(un->sd_fi_fifo_xb[i],
29077 				    sizeof (struct sd_fi_xb));
29078 				un->sd_fi_fifo_xb[i] = NULL;
29079 			}
29080 		} else {
29081 			SD_INFO(SD_LOG_IOERR, un,
29082 			    "sd_faultinjection_ioctl: xb null\n");
29083 		}
29084 		break;
29085 
29086 	case SDIOCINSERTUN:
29087 		/* Store a un struct to be pushed onto fifo */
29088 		SD_INFO(SD_LOG_SDTEST, un,
29089 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
29090 
29091 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29092 
29093 		sd_fault_injection_on = 0;
29094 
29095 		if (un->sd_fi_fifo_un[i] != NULL) {
29096 			kmem_free(un->sd_fi_fifo_un[i],
29097 			    sizeof (struct sd_fi_un));
29098 			un->sd_fi_fifo_un[i] = NULL;
29099 		}
29100 		if (arg != NULL) {
29101 			un->sd_fi_fifo_un[i] =
29102 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
29103 			if (un->sd_fi_fifo_un[i] == NULL) {
29104 				/* Alloc failed don't store anything */
29105 				break;
29106 			}
29107 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
29108 			    sizeof (struct sd_fi_un), 0);
29109 			if (rval == -1) {
29110 				kmem_free(un->sd_fi_fifo_un[i],
29111 				    sizeof (struct sd_fi_un));
29112 				un->sd_fi_fifo_un[i] = NULL;
29113 			}
29114 
29115 		} else {
29116 			SD_INFO(SD_LOG_IOERR, un,
29117 			    "sd_faultinjection_ioctl: un null\n");
29118 		}
29119 
29120 		break;
29121 
29122 	case SDIOCINSERTARQ:
29123 		/* Store a arq struct to be pushed onto fifo */
29124 		SD_INFO(SD_LOG_SDTEST, un,
29125 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
29126 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29127 
29128 		sd_fault_injection_on = 0;
29129 
29130 		if (un->sd_fi_fifo_arq[i] != NULL) {
29131 			kmem_free(un->sd_fi_fifo_arq[i],
29132 			    sizeof (struct sd_fi_arq));
29133 			un->sd_fi_fifo_arq[i] = NULL;
29134 		}
29135 		if (arg != NULL) {
29136 			un->sd_fi_fifo_arq[i] =
29137 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
29138 			if (un->sd_fi_fifo_arq[i] == NULL) {
29139 				/* Alloc failed don't store anything */
29140 				break;
29141 			}
29142 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
29143 			    sizeof (struct sd_fi_arq), 0);
29144 			if (rval == -1) {
29145 				kmem_free(un->sd_fi_fifo_arq[i],
29146 				    sizeof (struct sd_fi_arq));
29147 				un->sd_fi_fifo_arq[i] = NULL;
29148 			}
29149 
29150 		} else {
29151 			SD_INFO(SD_LOG_IOERR, un,
29152 			    "sd_faultinjection_ioctl: arq null\n");
29153 		}
29154 
29155 		break;
29156 
29157 	case SDIOCPUSH:
29158 		/* Push stored xb, pkt, un, and arq onto fifo */
29159 		sd_fault_injection_on = 0;
29160 
29161 		if (arg != NULL) {
29162 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
29163 			if (rval != -1 &&
29164 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
29165 				un->sd_fi_fifo_end += i;
29166 			}
29167 		} else {
29168 			SD_INFO(SD_LOG_IOERR, un,
29169 			    "sd_faultinjection_ioctl: push arg null\n");
29170 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
29171 				un->sd_fi_fifo_end++;
29172 			}
29173 		}
29174 		SD_INFO(SD_LOG_IOERR, un,
29175 		    "sd_faultinjection_ioctl: push to end=%d\n",
29176 		    un->sd_fi_fifo_end);
29177 		break;
29178 
29179 	case SDIOCRETRIEVE:
29180 		/* Return buffer of log from Injection session */
29181 		SD_INFO(SD_LOG_SDTEST, un,
29182 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
29183 
29184 		sd_fault_injection_on = 0;
29185 
29186 		mutex_enter(&(un->un_fi_mutex));
29187 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
29188 		    un->sd_fi_buf_len+1, 0);
29189 		mutex_exit(&(un->un_fi_mutex));
29190 
29191 		if (rval == -1) {
29192 			/*
29193 			 * arg is possibly invalid setting
29194 			 * it to NULL for return
29195 			 */
29196 			arg = NULL;
29197 		}
29198 		break;
29199 	}
29200 
29201 	mutex_exit(SD_MUTEX(un));
29202 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
29203 			    " exit\n");
29204 }
29205 
29206 
29207 /*
29208  *    Function: sd_injection_log()
29209  *
29210  * Description: This routine adds buff to the already existing injection log
29211  *              for retrieval via faultinjection_ioctl for use in fault
29212  *              detection and recovery
29213  *
29214  *   Arguments: buf - the string to add to the log
29215  */
29216 
29217 static void
29218 sd_injection_log(char *buf, struct sd_lun *un)
29219 {
29220 	uint_t len;
29221 
29222 	ASSERT(un != NULL);
29223 	ASSERT(buf != NULL);
29224 
29225 	mutex_enter(&(un->un_fi_mutex));
29226 
29227 	len = min(strlen(buf), 255);
29228 	/* Add logged value to Injection log to be returned later */
29229 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
29230 		uint_t	offset = strlen((char *)un->sd_fi_log);
29231 		char *destp = (char *)un->sd_fi_log + offset;
29232 		int i;
29233 		for (i = 0; i < len; i++) {
29234 			*destp++ = *buf++;
29235 		}
29236 		un->sd_fi_buf_len += len;
29237 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
29238 	}
29239 
29240 	mutex_exit(&(un->un_fi_mutex));
29241 }
29242 
29243 
29244 /*
29245  *    Function: sd_faultinjection()
29246  *
29247  * Description: This routine takes the pkt and changes its
29248  *		content based on error injection scenerio.
29249  *
29250  *   Arguments: pktp	- packet to be changed
29251  */
29252 
29253 static void
29254 sd_faultinjection(struct scsi_pkt *pktp)
29255 {
29256 	uint_t i;
29257 	struct sd_fi_pkt *fi_pkt;
29258 	struct sd_fi_xb *fi_xb;
29259 	struct sd_fi_un *fi_un;
29260 	struct sd_fi_arq *fi_arq;
29261 	struct buf *bp;
29262 	struct sd_xbuf *xb;
29263 	struct sd_lun *un;
29264 
29265 	ASSERT(pktp != NULL);
29266 
29267 	/* pull bp xb and un from pktp */
29268 	bp = (struct buf *)pktp->pkt_private;
29269 	xb = SD_GET_XBUF(bp);
29270 	un = SD_GET_UN(bp);
29271 
29272 	ASSERT(un != NULL);
29273 
29274 	mutex_enter(SD_MUTEX(un));
29275 
29276 	SD_TRACE(SD_LOG_SDTEST, un,
29277 	    "sd_faultinjection: entry Injection from sdintr\n");
29278 
29279 	/* if injection is off return */
29280 	if (sd_fault_injection_on == 0 ||
29281 	    un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
29282 		mutex_exit(SD_MUTEX(un));
29283 		return;
29284 	}
29285 
29286 	SD_INFO(SD_LOG_SDTEST, un,
29287 	    "sd_faultinjection: is working for copying\n");
29288 
29289 	/* take next set off fifo */
29290 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
29291 
29292 	fi_pkt = un->sd_fi_fifo_pkt[i];
29293 	fi_xb = un->sd_fi_fifo_xb[i];
29294 	fi_un = un->sd_fi_fifo_un[i];
29295 	fi_arq = un->sd_fi_fifo_arq[i];
29296 
29297 
29298 	/* set variables accordingly */
29299 	/* set pkt if it was on fifo */
29300 	if (fi_pkt != NULL) {
29301 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
29302 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
29303 		if (fi_pkt->pkt_cdbp != 0xff)
29304 			SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
29305 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
29306 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
29307 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
29308 
29309 	}
29310 	/* set xb if it was on fifo */
29311 	if (fi_xb != NULL) {
29312 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
29313 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
29314 		if (fi_xb->xb_retry_count != 0)
29315 			SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
29316 		SD_CONDSET(xb, xb, xb_victim_retry_count,
29317 		    "xb_victim_retry_count");
29318 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
29319 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
29320 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
29321 
29322 		/* copy in block data from sense */
29323 		/*
29324 		 * if (fi_xb->xb_sense_data[0] != -1) {
29325 		 *	bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
29326 		 *	SENSE_LENGTH);
29327 		 * }
29328 		 */
29329 		bcopy(fi_xb->xb_sense_data, xb->xb_sense_data, SENSE_LENGTH);
29330 
29331 		/* copy in extended sense codes */
29332 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29333 		    xb, es_code, "es_code");
29334 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29335 		    xb, es_key, "es_key");
29336 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29337 		    xb, es_add_code, "es_add_code");
29338 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29339 		    xb, es_qual_code, "es_qual_code");
29340 		struct scsi_extended_sense *esp;
29341 		esp = (struct scsi_extended_sense *)xb->xb_sense_data;
29342 		esp->es_class = CLASS_EXTENDED_SENSE;
29343 	}
29344 
29345 	/* set un if it was on fifo */
29346 	if (fi_un != NULL) {
29347 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
29348 		SD_CONDSET(un, un, un_ctype, "un_ctype");
29349 		SD_CONDSET(un, un, un_reset_retry_count,
29350 		    "un_reset_retry_count");
29351 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
29352 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
29353 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
29354 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
29355 		    "un_f_allow_bus_device_reset");
29356 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
29357 
29358 	}
29359 
29360 	/* copy in auto request sense if it was on fifo */
29361 	if (fi_arq != NULL) {
29362 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
29363 	}
29364 
29365 	/* free structs */
29366 	if (un->sd_fi_fifo_pkt[i] != NULL) {
29367 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
29368 	}
29369 	if (un->sd_fi_fifo_xb[i] != NULL) {
29370 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
29371 	}
29372 	if (un->sd_fi_fifo_un[i] != NULL) {
29373 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
29374 	}
29375 	if (un->sd_fi_fifo_arq[i] != NULL) {
29376 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
29377 	}
29378 
29379 	/*
29380 	 * kmem_free does not gurantee to set to NULL
29381 	 * since we uses these to determine if we set
29382 	 * values or not lets confirm they are always
29383 	 * NULL after free
29384 	 */
29385 	un->sd_fi_fifo_pkt[i] = NULL;
29386 	un->sd_fi_fifo_un[i] = NULL;
29387 	un->sd_fi_fifo_xb[i] = NULL;
29388 	un->sd_fi_fifo_arq[i] = NULL;
29389 
29390 	un->sd_fi_fifo_start++;
29391 
29392 	mutex_exit(SD_MUTEX(un));
29393 
29394 	SD_INFO(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
29395 }
29396 
29397 #endif /* SD_FAULT_INJECTION */
29398 
29399 /*
29400  * This routine is invoked in sd_unit_attach(). Before calling it, the
29401  * properties in conf file should be processed already, and "hotpluggable"
29402  * property was processed also.
29403  *
29404  * The sd driver distinguishes 3 different type of devices: removable media,
29405  * non-removable media, and hotpluggable. Below the differences are defined:
29406  *
29407  * 1. Device ID
29408  *
29409  *     The device ID of a device is used to identify this device. Refer to
29410  *     ddi_devid_register(9F).
29411  *
29412  *     For a non-removable media disk device which can provide 0x80 or 0x83
29413  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
29414  *     device ID is created to identify this device. For other non-removable
29415  *     media devices, a default device ID is created only if this device has
29416  *     at least 2 alter cylinders. Otherwise, this device has no devid.
29417  *
29418  *     -------------------------------------------------------
29419  *     removable media   hotpluggable  | Can Have Device ID
29420  *     -------------------------------------------------------
29421  *         false             false     |     Yes
29422  *         false             true      |     Yes
29423  *         true                x       |     No
29424  *     ------------------------------------------------------
29425  *
29426  *
29427  * 2. SCSI group 4 commands
29428  *
29429  *     In SCSI specs, only some commands in group 4 command set can use
29430  *     8-byte addresses that can be used to access >2TB storage spaces.
29431  *     Other commands have no such capability. Without supporting group4,
29432  *     it is impossible to make full use of storage spaces of a disk with
29433  *     capacity larger than 2TB.
29434  *
29435  *     -----------------------------------------------
29436  *     removable media   hotpluggable   LP64  |  Group
29437  *     -----------------------------------------------
29438  *           false          false       false |   1
29439  *           false          false       true  |   4
29440  *           false          true        false |   1
29441  *           false          true        true  |   4
29442  *           true             x           x   |   5
29443  *     -----------------------------------------------
29444  *
29445  *
29446  * 3. Check for VTOC Label
29447  *
29448  *     If a direct-access disk has no EFI label, sd will check if it has a
29449  *     valid VTOC label. Now, sd also does that check for removable media
29450  *     and hotpluggable devices.
29451  *
29452  *     --------------------------------------------------------------
29453  *     Direct-Access   removable media    hotpluggable |  Check Label
29454  *     -------------------------------------------------------------
29455  *         false          false           false        |   No
29456  *         false          false           true         |   No
29457  *         false          true            false        |   Yes
29458  *         false          true            true         |   Yes
29459  *         true            x                x          |   Yes
29460  *     --------------------------------------------------------------
29461  *
29462  *
29463  * 4. Building default VTOC label
29464  *
29465  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
29466  *     If those devices have no valid VTOC label, sd(7d) will attempt to
29467  *     create default VTOC for them. Currently sd creates default VTOC label
29468  *     for all devices on x86 platform (VTOC_16), but only for removable
29469  *     media devices on SPARC (VTOC_8).
29470  *
29471  *     -----------------------------------------------------------
29472  *       removable media hotpluggable platform   |   Default Label
29473  *     -----------------------------------------------------------
29474  *             false          false    sparc     |     No
29475  *             false          true      x86      |     Yes
29476  *             false          true     sparc     |     Yes
29477  *             true             x        x       |     Yes
29478  *     ----------------------------------------------------------
29479  *
29480  *
29481  * 5. Supported blocksizes of target devices
29482  *
29483  *     Sd supports non-512-byte blocksize for removable media devices only.
29484  *     For other devices, only 512-byte blocksize is supported. This may be
29485  *     changed in near future because some RAID devices require non-512-byte
29486  *     blocksize
29487  *
29488  *     -----------------------------------------------------------
29489  *     removable media    hotpluggable    | non-512-byte blocksize
29490  *     -----------------------------------------------------------
29491  *           false          false         |   No
29492  *           false          true          |   No
29493  *           true             x           |   Yes
29494  *     -----------------------------------------------------------
29495  *
29496  *
29497  * 6. Automatic mount & unmount
29498  *
29499  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
29500  *     if a device is removable media device. It return 1 for removable media
29501  *     devices, and 0 for others.
29502  *
29503  *     The automatic mounting subsystem should distinguish between the types
29504  *     of devices and apply automounting policies to each.
29505  *
29506  *
29507  * 7. fdisk partition management
29508  *
29509  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
29510  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
29511  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
29512  *     fdisk partitions on both x86 and SPARC platform.
29513  *
29514  *     -----------------------------------------------------------
29515  *       platform   removable media  USB/1394  |  fdisk supported
29516  *     -----------------------------------------------------------
29517  *        x86         X               X        |       true
29518  *     ------------------------------------------------------------
29519  *        sparc       X               X        |       false
29520  *     ------------------------------------------------------------
29521  *
29522  *
29523  * 8. MBOOT/MBR
29524  *
29525  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
29526  *     read/write mboot for removable media devices on sparc platform.
29527  *
29528  *     -----------------------------------------------------------
29529  *       platform   removable media  USB/1394  |  mboot supported
29530  *     -----------------------------------------------------------
29531  *        x86         X               X        |       true
29532  *     ------------------------------------------------------------
29533  *        sparc      false           false     |       false
29534  *        sparc      false           true      |       true
29535  *        sparc      true            false     |       true
29536  *        sparc      true            true      |       true
29537  *     ------------------------------------------------------------
29538  *
29539  *
29540  * 9.  error handling during opening device
29541  *
29542  *     If failed to open a disk device, an errno is returned. For some kinds
29543  *     of errors, different errno is returned depending on if this device is
29544  *     a removable media device. This brings USB/1394 hard disks in line with
29545  *     expected hard disk behavior. It is not expected that this breaks any
29546  *     application.
29547  *
29548  *     ------------------------------------------------------
29549  *       removable media    hotpluggable   |  errno
29550  *     ------------------------------------------------------
29551  *             false          false        |   EIO
29552  *             false          true         |   EIO
29553  *             true             x          |   ENXIO
29554  *     ------------------------------------------------------
29555  *
29556  *
29557  * 11. ioctls: DKIOCEJECT, CDROMEJECT
29558  *
29559  *     These IOCTLs are applicable only to removable media devices.
29560  *
29561  *     -----------------------------------------------------------
29562  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
29563  *     -----------------------------------------------------------
29564  *             false          false        |     No
29565  *             false          true         |     No
29566  *             true            x           |     Yes
29567  *     -----------------------------------------------------------
29568  *
29569  *
29570  * 12. Kstats for partitions
29571  *
29572  *     sd creates partition kstat for non-removable media devices. USB and
29573  *     Firewire hard disks now have partition kstats
29574  *
29575  *      ------------------------------------------------------
29576  *       removable media    hotpluggable   |   kstat
29577  *      ------------------------------------------------------
29578  *             false          false        |    Yes
29579  *             false          true         |    Yes
29580  *             true             x          |    No
29581  *       ------------------------------------------------------
29582  *
29583  *
29584  * 13. Removable media & hotpluggable properties
29585  *
29586  *     Sd driver creates a "removable-media" property for removable media
29587  *     devices. Parent nexus drivers create a "hotpluggable" property if
29588  *     it supports hotplugging.
29589  *
29590  *     ---------------------------------------------------------------------
29591  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
29592  *     ---------------------------------------------------------------------
29593  *       false            false       |    No                   No
29594  *       false            true        |    No                   Yes
29595  *       true             false       |    Yes                  No
29596  *       true             true        |    Yes                  Yes
29597  *     ---------------------------------------------------------------------
29598  *
29599  *
29600  * 14. Power Management
29601  *
29602  *     sd only power manages removable media devices or devices that support
29603  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
29604  *
29605  *     A parent nexus that supports hotplugging can also set "pm-capable"
29606  *     if the disk can be power managed.
29607  *
29608  *     ------------------------------------------------------------
29609  *       removable media hotpluggable pm-capable  |   power manage
29610  *     ------------------------------------------------------------
29611  *             false          false     false     |     No
29612  *             false          false     true      |     Yes
29613  *             false          true      false     |     No
29614  *             false          true      true      |     Yes
29615  *             true             x        x        |     Yes
29616  *     ------------------------------------------------------------
29617  *
29618  *      USB and firewire hard disks can now be power managed independently
29619  *      of the framebuffer
29620  *
29621  *
29622  * 15. Support for USB disks with capacity larger than 1TB
29623  *
29624  *     Currently, sd doesn't permit a fixed disk device with capacity
29625  *     larger than 1TB to be used in a 32-bit operating system environment.
29626  *     However, sd doesn't do that for removable media devices. Instead, it
29627  *     assumes that removable media devices cannot have a capacity larger
29628  *     than 1TB. Therefore, using those devices on 32-bit system is partially
29629  *     supported, which can cause some unexpected results.
29630  *
29631  *     ---------------------------------------------------------------------
29632  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
29633  *     ---------------------------------------------------------------------
29634  *             false          false  |   true         |     no
29635  *             false          true   |   true         |     no
29636  *             true           false  |   true         |     Yes
29637  *             true           true   |   true         |     Yes
29638  *     ---------------------------------------------------------------------
29639  *
29640  *
29641  * 16. Check write-protection at open time
29642  *
29643  *     When a removable media device is being opened for writing without NDELAY
29644  *     flag, sd will check if this device is writable. If attempting to open
29645  *     without NDELAY flag a write-protected device, this operation will abort.
29646  *
29647  *     ------------------------------------------------------------
29648  *       removable media    USB/1394   |   WP Check
29649  *     ------------------------------------------------------------
29650  *             false          false    |     No
29651  *             false          true     |     No
29652  *             true           false    |     Yes
29653  *             true           true     |     Yes
29654  *     ------------------------------------------------------------
29655  *
29656  *
29657  * 17. syslog when corrupted VTOC is encountered
29658  *
29659  *      Currently, if an invalid VTOC is encountered, sd only print syslog
29660  *      for fixed SCSI disks.
29661  *     ------------------------------------------------------------
29662  *       removable media    USB/1394   |   print syslog
29663  *     ------------------------------------------------------------
29664  *             false          false    |     Yes
29665  *             false          true     |     No
29666  *             true           false    |     No
29667  *             true           true     |     No
29668  *     ------------------------------------------------------------
29669  */
29670 static void
29671 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
29672 {
29673 	int	pm_capable_prop;
29674 
29675 	ASSERT(un->un_sd);
29676 	ASSERT(un->un_sd->sd_inq);
29677 
29678 	/*
29679 	 * Enable SYNC CACHE support for all devices.
29680 	 */
29681 	un->un_f_sync_cache_supported = TRUE;
29682 
29683 	/*
29684 	 * Set the sync cache required flag to false.
29685 	 * This would ensure that there is no SYNC CACHE
29686 	 * sent when there are no writes
29687 	 */
29688 	un->un_f_sync_cache_required = FALSE;
29689 
29690 	if (un->un_sd->sd_inq->inq_rmb) {
29691 		/*
29692 		 * The media of this device is removable. And for this kind
29693 		 * of devices, it is possible to change medium after opening
29694 		 * devices. Thus we should support this operation.
29695 		 */
29696 		un->un_f_has_removable_media = TRUE;
29697 
29698 		/*
29699 		 * support non-512-byte blocksize of removable media devices
29700 		 */
29701 		un->un_f_non_devbsize_supported = TRUE;
29702 
29703 		/*
29704 		 * Assume that all removable media devices support DOOR_LOCK
29705 		 */
29706 		un->un_f_doorlock_supported = TRUE;
29707 
29708 		/*
29709 		 * For a removable media device, it is possible to be opened
29710 		 * with NDELAY flag when there is no media in drive, in this
29711 		 * case we don't care if device is writable. But if without
29712 		 * NDELAY flag, we need to check if media is write-protected.
29713 		 */
29714 		un->un_f_chk_wp_open = TRUE;
29715 
29716 		/*
29717 		 * need to start a SCSI watch thread to monitor media state,
29718 		 * when media is being inserted or ejected, notify syseventd.
29719 		 */
29720 		un->un_f_monitor_media_state = TRUE;
29721 
29722 		/*
29723 		 * Some devices don't support START_STOP_UNIT command.
29724 		 * Therefore, we'd better check if a device supports it
29725 		 * before sending it.
29726 		 */
29727 		un->un_f_check_start_stop = TRUE;
29728 
29729 		/*
29730 		 * support eject media ioctl:
29731 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
29732 		 */
29733 		un->un_f_eject_media_supported = TRUE;
29734 
29735 		/*
29736 		 * Because many removable-media devices don't support
29737 		 * LOG_SENSE, we couldn't use this command to check if
29738 		 * a removable media device support power-management.
29739 		 * We assume that they support power-management via
29740 		 * START_STOP_UNIT command and can be spun up and down
29741 		 * without limitations.
29742 		 */
29743 		un->un_f_pm_supported = TRUE;
29744 
29745 		/*
29746 		 * Need to create a zero length (Boolean) property
29747 		 * removable-media for the removable media devices.
29748 		 * Note that the return value of the property is not being
29749 		 * checked, since if unable to create the property
29750 		 * then do not want the attach to fail altogether. Consistent
29751 		 * with other property creation in attach.
29752 		 */
29753 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
29754 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
29755 
29756 	} else {
29757 		/*
29758 		 * create device ID for device
29759 		 */
29760 		un->un_f_devid_supported = TRUE;
29761 
29762 		/*
29763 		 * Spin up non-removable-media devices once it is attached
29764 		 */
29765 		un->un_f_attach_spinup = TRUE;
29766 
29767 		/*
29768 		 * According to SCSI specification, Sense data has two kinds of
29769 		 * format: fixed format, and descriptor format. At present, we
29770 		 * don't support descriptor format sense data for removable
29771 		 * media.
29772 		 */
29773 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
29774 			un->un_f_descr_format_supported = TRUE;
29775 		}
29776 
29777 		/*
29778 		 * kstats are created only for non-removable media devices.
29779 		 *
29780 		 * Set this in sd.conf to 0 in order to disable kstats.  The
29781 		 * default is 1, so they are enabled by default.
29782 		 */
29783 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
29784 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
29785 		    "enable-partition-kstats", 1));
29786 
29787 		/*
29788 		 * Check if HBA has set the "pm-capable" property.
29789 		 * If "pm-capable" exists and is non-zero then we can
29790 		 * power manage the device without checking the start/stop
29791 		 * cycle count log sense page.
29792 		 *
29793 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
29794 		 * then we should not power manage the device.
29795 		 *
29796 		 * If "pm-capable" doesn't exist then pm_capable_prop will
29797 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
29798 		 * sd will check the start/stop cycle count log sense page
29799 		 * and power manage the device if the cycle count limit has
29800 		 * not been exceeded.
29801 		 */
29802 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
29803 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
29804 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
29805 			un->un_f_log_sense_supported = TRUE;
29806 		} else {
29807 			/*
29808 			 * pm-capable property exists.
29809 			 *
29810 			 * Convert "TRUE" values for pm_capable_prop to
29811 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
29812 			 * later. "TRUE" values are any values except
29813 			 * SD_PM_CAPABLE_FALSE (0) and
29814 			 * SD_PM_CAPABLE_UNDEFINED (-1)
29815 			 */
29816 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
29817 				un->un_f_log_sense_supported = FALSE;
29818 			} else {
29819 				un->un_f_pm_supported = TRUE;
29820 			}
29821 
29822 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
29823 			    "sd_unit_attach: un:0x%p pm-capable "
29824 			    "property set to %d.\n", un, un->un_f_pm_supported);
29825 		}
29826 	}
29827 
29828 	if (un->un_f_is_hotpluggable) {
29829 
29830 		/*
29831 		 * Have to watch hotpluggable devices as well, since
29832 		 * that's the only way for userland applications to
29833 		 * detect hot removal while device is busy/mounted.
29834 		 */
29835 		un->un_f_monitor_media_state = TRUE;
29836 
29837 		un->un_f_check_start_stop = TRUE;
29838 
29839 	}
29840 }
29841 
29842 /*
29843  * sd_tg_rdwr:
29844  * Provides rdwr access for cmlb via sd_tgops. The start_block is
29845  * in sys block size, req_length in bytes.
29846  *
29847  */
29848 static int
29849 sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
29850     diskaddr_t start_block, size_t reqlength, void *tg_cookie)
29851 {
29852 	struct sd_lun *un;
29853 	int path_flag = (int)(uintptr_t)tg_cookie;
29854 	char *dkl = NULL;
29855 	diskaddr_t real_addr = start_block;
29856 	diskaddr_t first_byte, end_block;
29857 
29858 	size_t	buffer_size = reqlength;
29859 	int rval = 0;
29860 	diskaddr_t	cap;
29861 	uint32_t	lbasize;
29862 	sd_ssc_t	*ssc;
29863 
29864 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
29865 	if (un == NULL)
29866 		return (ENXIO);
29867 
29868 	if (cmd != TG_READ && cmd != TG_WRITE)
29869 		return (EINVAL);
29870 
29871 	ssc = sd_ssc_init(un);
29872 	mutex_enter(SD_MUTEX(un));
29873 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
29874 		mutex_exit(SD_MUTEX(un));
29875 		rval = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
29876 		    &lbasize, path_flag);
29877 		if (rval != 0)
29878 			goto done1;
29879 		mutex_enter(SD_MUTEX(un));
29880 		sd_update_block_info(un, lbasize, cap);
29881 		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
29882 			mutex_exit(SD_MUTEX(un));
29883 			rval = EIO;
29884 			goto done;
29885 		}
29886 	}
29887 
29888 	if (NOT_DEVBSIZE(un)) {
29889 		/*
29890 		 * sys_blocksize != tgt_blocksize, need to re-adjust
29891 		 * blkno and save the index to beginning of dk_label
29892 		 */
29893 		first_byte  = SD_SYSBLOCKS2BYTES(un, start_block);
29894 		real_addr = first_byte / un->un_tgt_blocksize;
29895 
29896 		end_block = (first_byte + reqlength +
29897 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
29898 
29899 		/* round up buffer size to multiple of target block size */
29900 		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
29901 
29902 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
29903 		    "label_addr: 0x%x allocation size: 0x%x\n",
29904 		    real_addr, buffer_size);
29905 
29906 		if (((first_byte % un->un_tgt_blocksize) != 0) ||
29907 		    (reqlength % un->un_tgt_blocksize) != 0)
29908 			/* the request is not aligned */
29909 			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
29910 	}
29911 
29912 	/*
29913 	 * The MMC standard allows READ CAPACITY to be
29914 	 * inaccurate by a bounded amount (in the interest of
29915 	 * response latency).  As a result, failed READs are
29916 	 * commonplace (due to the reading of metadata and not
29917 	 * data). Depending on the per-Vendor/drive Sense data,
29918 	 * the failed READ can cause many (unnecessary) retries.
29919 	 */
29920 
29921 	if (ISCD(un) && (cmd == TG_READ) &&
29922 	    (un->un_f_blockcount_is_valid == TRUE) &&
29923 	    ((start_block == (un->un_blockcount - 1))||
29924 	    (start_block == (un->un_blockcount - 2)))) {
29925 			path_flag = SD_PATH_DIRECT_PRIORITY;
29926 	}
29927 
29928 	mutex_exit(SD_MUTEX(un));
29929 	if (cmd == TG_READ) {
29930 		rval = sd_send_scsi_READ(ssc, (dkl != NULL)? dkl: bufaddr,
29931 		    buffer_size, real_addr, path_flag);
29932 		if (dkl != NULL)
29933 			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
29934 			    real_addr), bufaddr, reqlength);
29935 	} else {
29936 		if (dkl) {
29937 			rval = sd_send_scsi_READ(ssc, dkl, buffer_size,
29938 			    real_addr, path_flag);
29939 			if (rval) {
29940 				goto done1;
29941 			}
29942 			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
29943 			    real_addr), reqlength);
29944 		}
29945 		rval = sd_send_scsi_WRITE(ssc, (dkl != NULL)? dkl: bufaddr,
29946 		    buffer_size, real_addr, path_flag);
29947 	}
29948 
29949 done1:
29950 	if (dkl != NULL)
29951 		kmem_free(dkl, buffer_size);
29952 
29953 	if (rval != 0) {
29954 		if (rval == EIO)
29955 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
29956 		else
29957 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
29958 	}
29959 done:
29960 	sd_ssc_fini(ssc);
29961 	return (rval);
29962 }
29963 
29964 
29965 static int
29966 sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
29967 {
29968 
29969 	struct sd_lun *un;
29970 	diskaddr_t	cap;
29971 	uint32_t	lbasize;
29972 	int		path_flag = (int)(uintptr_t)tg_cookie;
29973 	int		ret = 0;
29974 
29975 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
29976 	if (un == NULL)
29977 		return (ENXIO);
29978 
29979 	switch (cmd) {
29980 	case TG_GETPHYGEOM:
29981 	case TG_GETVIRTGEOM:
29982 	case TG_GETCAPACITY:
29983 	case TG_GETBLOCKSIZE:
29984 		mutex_enter(SD_MUTEX(un));
29985 
29986 		if ((un->un_f_blockcount_is_valid == TRUE) &&
29987 		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
29988 			cap = un->un_blockcount;
29989 			lbasize = un->un_tgt_blocksize;
29990 			mutex_exit(SD_MUTEX(un));
29991 		} else {
29992 			sd_ssc_t	*ssc;
29993 			mutex_exit(SD_MUTEX(un));
29994 			ssc = sd_ssc_init(un);
29995 			ret = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
29996 			    &lbasize, path_flag);
29997 			if (ret != 0) {
29998 				if (ret == EIO)
29999 					sd_ssc_assessment(ssc,
30000 					    SD_FMT_STATUS_CHECK);
30001 				else
30002 					sd_ssc_assessment(ssc,
30003 					    SD_FMT_IGNORE);
30004 				sd_ssc_fini(ssc);
30005 				return (ret);
30006 			}
30007 			sd_ssc_fini(ssc);
30008 			mutex_enter(SD_MUTEX(un));
30009 			sd_update_block_info(un, lbasize, cap);
30010 			if ((un->un_f_blockcount_is_valid == FALSE) ||
30011 			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
30012 				mutex_exit(SD_MUTEX(un));
30013 				return (EIO);
30014 			}
30015 			mutex_exit(SD_MUTEX(un));
30016 		}
30017 
30018 		if (cmd == TG_GETCAPACITY) {
30019 			*(diskaddr_t *)arg = cap;
30020 			return (0);
30021 		}
30022 
30023 		if (cmd == TG_GETBLOCKSIZE) {
30024 			*(uint32_t *)arg = lbasize;
30025 			return (0);
30026 		}
30027 
30028 		if (cmd == TG_GETPHYGEOM)
30029 			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
30030 			    cap, lbasize, path_flag);
30031 		else
30032 			/* TG_GETVIRTGEOM */
30033 			ret = sd_get_virtual_geometry(un,
30034 			    (cmlb_geom_t *)arg, cap, lbasize);
30035 
30036 		return (ret);
30037 
30038 	case TG_GETATTR:
30039 		mutex_enter(SD_MUTEX(un));
30040 		((tg_attribute_t *)arg)->media_is_writable =
30041 		    un->un_f_mmc_writable_media;
30042 		mutex_exit(SD_MUTEX(un));
30043 		return (0);
30044 	default:
30045 		return (ENOTTY);
30046 
30047 	}
30048 }
30049 
30050 /*
30051  *    Function: sd_ssc_ereport_post
30052  *
30053  * Description: Will be called when SD driver need to post an ereport.
30054  *
30055  *    Context: Kernel thread or interrupt context.
30056  */
30057 static void
30058 sd_ssc_ereport_post(sd_ssc_t *ssc, enum sd_driver_assessment drv_assess)
30059 {
30060 	int uscsi_path_instance = 0;
30061 	uchar_t	uscsi_pkt_reason;
30062 	uint32_t uscsi_pkt_state;
30063 	uint32_t uscsi_pkt_statistics;
30064 	uint64_t uscsi_ena;
30065 	uchar_t op_code;
30066 	uint8_t *sensep;
30067 	union scsi_cdb *cdbp;
30068 	uint_t cdblen = 0;
30069 	uint_t senlen = 0;
30070 	struct sd_lun *un;
30071 	dev_info_t *dip;
30072 	char *devid;
30073 	int ssc_invalid_flags = SSC_FLAGS_INVALID_PKT_REASON |
30074 	    SSC_FLAGS_INVALID_STATUS |
30075 	    SSC_FLAGS_INVALID_SENSE |
30076 	    SSC_FLAGS_INVALID_DATA;
30077 	char assessment[16];
30078 
30079 	ASSERT(ssc != NULL);
30080 	ASSERT(ssc->ssc_uscsi_cmd != NULL);
30081 	ASSERT(ssc->ssc_uscsi_info != NULL);
30082 
30083 	un = ssc->ssc_un;
30084 	ASSERT(un != NULL);
30085 
30086 	dip = un->un_sd->sd_dev;
30087 
30088 	/*
30089 	 * Get the devid:
30090 	 *	devid will only be passed to non-transport error reports.
30091 	 */
30092 	devid = DEVI(dip)->devi_devid_str;
30093 
30094 	/*
30095 	 * If we are syncing or dumping, the command will not be executed
30096 	 * so we bypass this situation.
30097 	 */
30098 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
30099 	    (un->un_state == SD_STATE_DUMPING))
30100 		return;
30101 
30102 	uscsi_pkt_reason = ssc->ssc_uscsi_info->ui_pkt_reason;
30103 	uscsi_path_instance = ssc->ssc_uscsi_cmd->uscsi_path_instance;
30104 	uscsi_pkt_state = ssc->ssc_uscsi_info->ui_pkt_state;
30105 	uscsi_pkt_statistics = ssc->ssc_uscsi_info->ui_pkt_statistics;
30106 	uscsi_ena = ssc->ssc_uscsi_info->ui_ena;
30107 
30108 	sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
30109 	cdbp = (union scsi_cdb *)ssc->ssc_uscsi_cmd->uscsi_cdb;
30110 
30111 	/* In rare cases, EG:DOORLOCK, the cdb could be NULL */
30112 	if (cdbp == NULL) {
30113 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30114 		    "sd_ssc_ereport_post meet empty cdb\n");
30115 		return;
30116 	}
30117 
30118 	op_code = cdbp->scc_cmd;
30119 
30120 	cdblen = (int)ssc->ssc_uscsi_cmd->uscsi_cdblen;
30121 	senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
30122 	    ssc->ssc_uscsi_cmd->uscsi_rqresid);
30123 
30124 	if (senlen > 0)
30125 		ASSERT(sensep != NULL);
30126 
30127 	/*
30128 	 * Initialize drv_assess to corresponding values.
30129 	 * SD_FM_DRV_FATAL will be mapped to "fail" or "fatal" depending
30130 	 * on the sense-key returned back.
30131 	 */
30132 	switch (drv_assess) {
30133 		case SD_FM_DRV_RECOVERY:
30134 			(void) sprintf(assessment, "%s", "recovered");
30135 			break;
30136 		case SD_FM_DRV_RETRY:
30137 			(void) sprintf(assessment, "%s", "retry");
30138 			break;
30139 		case SD_FM_DRV_NOTICE:
30140 			(void) sprintf(assessment, "%s", "info");
30141 			break;
30142 		case SD_FM_DRV_FATAL:
30143 		default:
30144 			(void) sprintf(assessment, "%s", "unknown");
30145 	}
30146 	/*
30147 	 * If drv_assess == SD_FM_DRV_RECOVERY, this should be a recovered
30148 	 * command, we will post ereport.io.scsi.cmd.disk.recovered.
30149 	 * driver-assessment will always be "recovered" here.
30150 	 */
30151 	if (drv_assess == SD_FM_DRV_RECOVERY) {
30152 		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30153 		    "cmd.disk.recovered", uscsi_ena, devid, DDI_NOSLEEP,
30154 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30155 		    "driver-assessment", DATA_TYPE_STRING, assessment,
30156 		    "op-code", DATA_TYPE_UINT8, op_code,
30157 		    "cdb", DATA_TYPE_UINT8_ARRAY,
30158 		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30159 		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30160 		    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
30161 		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
30162 		    NULL);
30163 		return;
30164 	}
30165 
30166 	/*
30167 	 * If there is un-expected/un-decodable data, we should post
30168 	 * ereport.io.scsi.cmd.disk.dev.uderr.
30169 	 * driver-assessment will be set based on parameter drv_assess.
30170 	 * SSC_FLAGS_INVALID_SENSE - invalid sense data sent back.
30171 	 * SSC_FLAGS_INVALID_PKT_REASON - invalid pkt-reason encountered.
30172 	 * SSC_FLAGS_INVALID_STATUS - invalid stat-code encountered.
30173 	 * SSC_FLAGS_INVALID_DATA - invalid data sent back.
30174 	 */
30175 	if (ssc->ssc_flags & ssc_invalid_flags) {
30176 		if (ssc->ssc_flags & SSC_FLAGS_INVALID_SENSE) {
30177 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30178 			    "cmd.disk.dev.uderr", uscsi_ena, devid, DDI_NOSLEEP,
30179 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30180 			    "driver-assessment", DATA_TYPE_STRING,
30181 			    drv_assess == SD_FM_DRV_FATAL ?
30182 			    "fail" : assessment,
30183 			    "op-code", DATA_TYPE_UINT8, op_code,
30184 			    "cdb", DATA_TYPE_UINT8_ARRAY,
30185 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30186 			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30187 			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
30188 			    "pkt-stats", DATA_TYPE_UINT32,
30189 			    uscsi_pkt_statistics,
30190 			    "stat-code", DATA_TYPE_UINT8,
30191 			    ssc->ssc_uscsi_cmd->uscsi_status,
30192 			    "un-decode-info", DATA_TYPE_STRING,
30193 			    ssc->ssc_info,
30194 			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
30195 			    senlen, sensep,
30196 			    NULL);
30197 		} else {
30198 			/*
30199 			 * For other type of invalid data, the
30200 			 * un-decode-value field would be empty because the
30201 			 * un-decodable content could be seen from upper
30202 			 * level payload or inside un-decode-info.
30203 			 */
30204 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30205 			    "cmd.disk.dev.uderr", uscsi_ena, devid, DDI_NOSLEEP,
30206 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30207 			    "driver-assessment", DATA_TYPE_STRING,
30208 			    drv_assess == SD_FM_DRV_FATAL ?
30209 			    "fail" : assessment,
30210 			    "op-code", DATA_TYPE_UINT8, op_code,
30211 			    "cdb", DATA_TYPE_UINT8_ARRAY,
30212 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30213 			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30214 			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
30215 			    "pkt-stats", DATA_TYPE_UINT32,
30216 			    uscsi_pkt_statistics,
30217 			    "stat-code", DATA_TYPE_UINT8,
30218 			    ssc->ssc_uscsi_cmd->uscsi_status,
30219 			    "un-decode-info", DATA_TYPE_STRING,
30220 			    ssc->ssc_info,
30221 			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
30222 			    0, NULL,
30223 			    NULL);
30224 		}
30225 		ssc->ssc_flags &= ~ssc_invalid_flags;
30226 		return;
30227 	}
30228 
30229 	if (uscsi_pkt_reason != CMD_CMPLT ||
30230 	    (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)) {
30231 		/*
30232 		 * pkt-reason != CMD_CMPLT or SSC_FLAGS_TRAN_ABORT was
30233 		 * set inside sd_start_cmds due to errors(bad packet or
30234 		 * fatal transport error), we should take it as a
30235 		 * transport error, so we post ereport.io.scsi.cmd.disk.tran.
30236 		 * driver-assessment will be set based on drv_assess.
30237 		 * We will set devid to NULL because it is a transport
30238 		 * error.
30239 		 */
30240 		if (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)
30241 			ssc->ssc_flags &= ~SSC_FLAGS_TRAN_ABORT;
30242 
30243 		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30244 		    "cmd.disk.tran", uscsi_ena, NULL, DDI_NOSLEEP, FM_VERSION,
30245 		    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30246 		    "driver-assessment", DATA_TYPE_STRING,
30247 		    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
30248 		    "op-code", DATA_TYPE_UINT8, op_code,
30249 		    "cdb", DATA_TYPE_UINT8_ARRAY,
30250 		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30251 		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30252 		    "pkt-state", DATA_TYPE_UINT8, uscsi_pkt_state,
30253 		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
30254 		    NULL);
30255 	} else {
30256 		/*
30257 		 * If we got here, we have a completed command, and we need
30258 		 * to further investigate the sense data to see what kind
30259 		 * of ereport we should post.
30260 		 * Post ereport.io.scsi.cmd.disk.dev.rqs.merr
30261 		 * if sense-key == 0x3.
30262 		 * Post ereport.io.scsi.cmd.disk.dev.rqs.derr otherwise.
30263 		 * driver-assessment will be set based on the parameter
30264 		 * drv_assess.
30265 		 */
30266 		if (senlen > 0) {
30267 			/*
30268 			 * Here we have sense data available.
30269 			 */
30270 			uint8_t sense_key;
30271 			sense_key = scsi_sense_key(sensep);
30272 			if (sense_key == 0x3) {
30273 				/*
30274 				 * sense-key == 0x3(medium error),
30275 				 * driver-assessment should be "fatal" if
30276 				 * drv_assess is SD_FM_DRV_FATAL.
30277 				 */
30278 				scsi_fm_ereport_post(un->un_sd,
30279 				    uscsi_path_instance,
30280 				    "cmd.disk.dev.rqs.merr",
30281 				    uscsi_ena, devid, DDI_NOSLEEP, FM_VERSION,
30282 				    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30283 				    "driver-assessment",
30284 				    DATA_TYPE_STRING,
30285 				    drv_assess == SD_FM_DRV_FATAL ?
30286 				    "fatal" : assessment,
30287 				    "op-code",
30288 				    DATA_TYPE_UINT8, op_code,
30289 				    "cdb",
30290 				    DATA_TYPE_UINT8_ARRAY, cdblen,
30291 				    ssc->ssc_uscsi_cmd->uscsi_cdb,
30292 				    "pkt-reason",
30293 				    DATA_TYPE_UINT8, uscsi_pkt_reason,
30294 				    "pkt-state",
30295 				    DATA_TYPE_UINT8, uscsi_pkt_state,
30296 				    "pkt-stats",
30297 				    DATA_TYPE_UINT32,
30298 				    uscsi_pkt_statistics,
30299 				    "stat-code",
30300 				    DATA_TYPE_UINT8,
30301 				    ssc->ssc_uscsi_cmd->uscsi_status,
30302 				    "key",
30303 				    DATA_TYPE_UINT8,
30304 				    scsi_sense_key(sensep),
30305 				    "asc",
30306 				    DATA_TYPE_UINT8,
30307 				    scsi_sense_asc(sensep),
30308 				    "ascq",
30309 				    DATA_TYPE_UINT8,
30310 				    scsi_sense_ascq(sensep),
30311 				    "sense-data",
30312 				    DATA_TYPE_UINT8_ARRAY,
30313 				    senlen, sensep,
30314 				    "lba",
30315 				    DATA_TYPE_UINT64,
30316 				    ssc->ssc_uscsi_info->ui_lba,
30317 				    NULL);
30318 				} else {
30319 					/*
30320 					 * if sense-key == 0x4(hardware
30321 					 * error), driver-assessment should
30322 					 * be "fatal" if drv_assess is
30323 					 * SD_FM_DRV_FATAL.
30324 					 */
30325 					scsi_fm_ereport_post(un->un_sd,
30326 					    uscsi_path_instance,
30327 					    "cmd.disk.dev.rqs.derr",
30328 					    uscsi_ena, devid, DDI_NOSLEEP,
30329 					    FM_VERSION,
30330 					    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30331 					    "driver-assessment",
30332 					    DATA_TYPE_STRING,
30333 					    drv_assess == SD_FM_DRV_FATAL ?
30334 					    (sense_key == 0x4 ?
30335 					    "fatal" : "fail") : assessment,
30336 					    "op-code",
30337 					    DATA_TYPE_UINT8, op_code,
30338 					    "cdb",
30339 					    DATA_TYPE_UINT8_ARRAY, cdblen,
30340 					    ssc->ssc_uscsi_cmd->uscsi_cdb,
30341 					    "pkt-reason",
30342 					    DATA_TYPE_UINT8, uscsi_pkt_reason,
30343 					    "pkt-state",
30344 					    DATA_TYPE_UINT8, uscsi_pkt_state,
30345 					    "pkt-stats",
30346 					    DATA_TYPE_UINT32,
30347 					    uscsi_pkt_statistics,
30348 					    "stat-code",
30349 					    DATA_TYPE_UINT8,
30350 					    ssc->ssc_uscsi_cmd->uscsi_status,
30351 					    "key",
30352 					    DATA_TYPE_UINT8,
30353 					    scsi_sense_key(sensep),
30354 					    "asc",
30355 					    DATA_TYPE_UINT8,
30356 					    scsi_sense_asc(sensep),
30357 					    "ascq",
30358 					    DATA_TYPE_UINT8,
30359 					    scsi_sense_ascq(sensep),
30360 					    "sense-data",
30361 					    DATA_TYPE_UINT8_ARRAY,
30362 					    senlen, sensep,
30363 					    NULL);
30364 				}
30365 		} else {
30366 			/*
30367 			 * For stat_code == STATUS_GOOD, this is not a
30368 			 * hardware error.
30369 			 */
30370 			if (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD)
30371 				return;
30372 
30373 			/*
30374 			 * Post ereport.io.scsi.cmd.disk.dev.serr if we got the
30375 			 * stat-code but with sense data unavailable.
30376 			 * driver-assessment will be set based on parameter
30377 			 * drv_assess.
30378 			 */
30379 			scsi_fm_ereport_post(un->un_sd,
30380 			    uscsi_path_instance, "cmd.disk.dev.serr", uscsi_ena,
30381 			    devid, DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8,
30382 			    FM_EREPORT_VERS0,
30383 			    "driver-assessment", DATA_TYPE_STRING,
30384 			    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
30385 			    "op-code", DATA_TYPE_UINT8, op_code,
30386 			    "cdb",
30387 			    DATA_TYPE_UINT8_ARRAY,
30388 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30389 			    "pkt-reason",
30390 			    DATA_TYPE_UINT8, uscsi_pkt_reason,
30391 			    "pkt-state",
30392 			    DATA_TYPE_UINT8, uscsi_pkt_state,
30393 			    "pkt-stats",
30394 			    DATA_TYPE_UINT32, uscsi_pkt_statistics,
30395 			    "stat-code",
30396 			    DATA_TYPE_UINT8,
30397 			    ssc->ssc_uscsi_cmd->uscsi_status,
30398 			    NULL);
30399 		}
30400 	}
30401 }
30402 
30403 /*
30404  *     Function: sd_ssc_extract_info
30405  *
30406  * Description: Extract information available to help generate ereport.
30407  *
30408  *     Context: Kernel thread or interrupt context.
30409  */
30410 static void
30411 sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un, struct scsi_pkt *pktp,
30412     struct buf *bp, struct sd_xbuf *xp)
30413 {
30414 	size_t senlen = 0;
30415 	union scsi_cdb *cdbp;
30416 	int path_instance;
30417 	/*
30418 	 * Need scsi_cdb_size array to determine the cdb length.
30419 	 */
30420 	extern uchar_t	scsi_cdb_size[];
30421 
30422 	ASSERT(un != NULL);
30423 	ASSERT(pktp != NULL);
30424 	ASSERT(bp != NULL);
30425 	ASSERT(xp != NULL);
30426 	ASSERT(ssc != NULL);
30427 	ASSERT(mutex_owned(SD_MUTEX(un)));
30428 
30429 	/*
30430 	 * Transfer the cdb buffer pointer here.
30431 	 */
30432 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
30433 
30434 	ssc->ssc_uscsi_cmd->uscsi_cdblen = scsi_cdb_size[GETGROUP(cdbp)];
30435 	ssc->ssc_uscsi_cmd->uscsi_cdb = (caddr_t)cdbp;
30436 
30437 	/*
30438 	 * Transfer the sense data buffer pointer if sense data is available,
30439 	 * calculate the sense data length first.
30440 	 */
30441 	if ((xp->xb_sense_state & STATE_XARQ_DONE) ||
30442 	    (xp->xb_sense_state & STATE_ARQ_DONE)) {
30443 		/*
30444 		 * For arq case, we will enter here.
30445 		 */
30446 		if (xp->xb_sense_state & STATE_XARQ_DONE) {
30447 			senlen = MAX_SENSE_LENGTH - xp->xb_sense_resid;
30448 		} else {
30449 			senlen = SENSE_LENGTH;
30450 		}
30451 	} else {
30452 		/*
30453 		 * For non-arq case, we will enter this branch.
30454 		 */
30455 		if (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK &&
30456 		    (xp->xb_sense_state & STATE_XFERRED_DATA)) {
30457 			senlen = SENSE_LENGTH - xp->xb_sense_resid;
30458 		}
30459 
30460 	}
30461 
30462 	ssc->ssc_uscsi_cmd->uscsi_rqlen = (senlen & 0xff);
30463 	ssc->ssc_uscsi_cmd->uscsi_rqresid = 0;
30464 	ssc->ssc_uscsi_cmd->uscsi_rqbuf = (caddr_t)xp->xb_sense_data;
30465 
30466 	ssc->ssc_uscsi_cmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
30467 
30468 	/*
30469 	 * Only transfer path_instance when scsi_pkt was properly allocated.
30470 	 */
30471 	path_instance = pktp->pkt_path_instance;
30472 	if (scsi_pkt_allocated_correctly(pktp) && path_instance)
30473 		ssc->ssc_uscsi_cmd->uscsi_path_instance = path_instance;
30474 	else
30475 		ssc->ssc_uscsi_cmd->uscsi_path_instance = 0;
30476 
30477 	/*
30478 	 * Copy in the other fields we may need when posting ereport.
30479 	 */
30480 	ssc->ssc_uscsi_info->ui_pkt_reason = pktp->pkt_reason;
30481 	ssc->ssc_uscsi_info->ui_pkt_state = pktp->pkt_state;
30482 	ssc->ssc_uscsi_info->ui_pkt_statistics = pktp->pkt_statistics;
30483 	ssc->ssc_uscsi_info->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
30484 
30485 	/*
30486 	 * For partially read/write command, we will not create ena
30487 	 * in case of a successful command be reconized as recovered.
30488 	 */
30489 	if ((pktp->pkt_reason == CMD_CMPLT) &&
30490 	    (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD) &&
30491 	    (senlen == 0)) {
30492 		return;
30493 	}
30494 
30495 	/*
30496 	 * To associate ereports of a single command execution flow, we
30497 	 * need a shared ena for a specific command.
30498 	 */
30499 	if (xp->xb_ena == 0)
30500 		xp->xb_ena = fm_ena_generate(0, FM_ENA_FMT1);
30501 	ssc->ssc_uscsi_info->ui_ena = xp->xb_ena;
30502 }
30503