xref: /illumos-gate/usr/src/uts/common/io/scsi/targets/sd.c (revision cb1bb6c3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright (c) 2011 Bayard G. Bell.  All rights reserved.
27  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
28  * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
29  * Copyright 2019 Joyent, Inc.
30  * Copyright 2019 Racktop Systems
31  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
32  * Copyright 2022 Tintri by DDN, Inc. All rights reserved.
33  * Copyright 2022 Garrett D'Amore
34  */
35 /*
36  * Copyright 2011 cyril.galibern@opensvc.com
37  */
38 
39 /*
40  * SCSI disk target driver.
41  */
42 #include <sys/scsi/scsi.h>
43 #include <sys/dkbad.h>
44 #include <sys/dklabel.h>
45 #include <sys/dkio.h>
46 #include <sys/fdio.h>
47 #include <sys/cdio.h>
48 #include <sys/mhd.h>
49 #include <sys/vtoc.h>
50 #include <sys/dktp/fdisk.h>
51 #include <sys/kstat.h>
52 #include <sys/vtrace.h>
53 #include <sys/note.h>
54 #include <sys/thread.h>
55 #include <sys/proc.h>
56 #include <sys/efi_partition.h>
57 #include <sys/var.h>
58 #include <sys/aio_req.h>
59 #include <sys/dkioc_free_util.h>
60 
61 #include <sys/taskq.h>
62 #include <sys/uuid.h>
63 #include <sys/byteorder.h>
64 #include <sys/sdt.h>
65 
66 #include "sd_xbuf.h"
67 
68 #include <sys/scsi/targets/sddef.h>
69 #include <sys/cmlb.h>
70 #include <sys/sysevent/eventdefs.h>
71 #include <sys/sysevent/dev.h>
72 
73 #include <sys/fm/protocol.h>
74 
75 /*
76  * Loadable module info.
77  */
78 #define	SD_MODULE_NAME	"SCSI Disk Driver"
79 
80 /*
81  * Define the interconnect type, to allow the driver to distinguish
82  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
83  *
84  * This is really for backward compatibility. In the future, the driver
85  * should actually check the "interconnect-type" property as reported by
86  * the HBA; however at present this property is not defined by all HBAs,
87  * so we will use this #define (1) to permit the driver to run in
88  * backward-compatibility mode; and (2) to print a notification message
89  * if an FC HBA does not support the "interconnect-type" property.  The
90  * behavior of the driver will be to assume parallel SCSI behaviors unless
91  * the "interconnect-type" property is defined by the HBA **AND** has a
92  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
93  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
94  * Channel behaviors (as per the old ssd).  (Note that the
95  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
96  * will result in the driver assuming parallel SCSI behaviors.)
97  *
98  * (see common/sys/scsi/impl/services.h)
99  */
100 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
101 
102 /*
103  * The name of the driver, established from the module name in _init.
104  */
105 static	char *sd_label			= NULL;
106 
107 /*
108  * Driver name is unfortunately prefixed on some driver.conf properties.
109  */
110 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
111 static	char *sd_config_list		= "sd-config-list";
112 
113 /*
114  * Driver global variables
115  */
116 
117 #ifdef	SDDEBUG
118 int	sd_force_pm_supported		= 0;
119 #endif	/* SDDEBUG */
120 
121 void *sd_state				= NULL;
122 int sd_io_time				= SD_IO_TIME;
123 int sd_failfast_enable			= 1;
124 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
125 int sd_report_pfa			= 1;
126 int sd_max_throttle			= SD_MAX_THROTTLE;
127 int sd_min_throttle			= SD_MIN_THROTTLE;
128 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
129 int sd_qfull_throttle_enable		= TRUE;
130 
131 int sd_retry_on_reservation_conflict	= 1;
132 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
133 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
134 
135 static int sd_dtype_optical_bind	= -1;
136 
137 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
138 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
139 
140 /*
141  * Global data for debug logging. To enable debug printing, sd_component_mask
142  * and sd_level_mask should be set to the desired bit patterns as outlined in
143  * sddef.h.
144  */
145 uint_t	sd_component_mask		= 0x0;
146 uint_t	sd_level_mask			= 0x0;
147 struct	sd_lun *sd_debug_un		= NULL;
148 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
149 
150 /* Note: these may go away in the future... */
151 static uint32_t	sd_xbuf_active_limit	= 512;
152 static uint32_t sd_xbuf_reserve_limit	= 16;
153 
154 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
155 
156 /*
157  * Timer value used to reset the throttle after it has been reduced
158  * (typically in response to TRAN_BUSY or STATUS_QFULL)
159  */
160 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
161 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
162 
163 /*
164  * Interval value associated with the media change scsi watch.
165  */
166 static int sd_check_media_time		= 3000000;
167 
168 /*
169  * Wait value used for in progress operations during a DDI_SUSPEND
170  */
171 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
172 
173 /*
174  * Global buffer and mutex for debug logging
175  */
176 static char	sd_log_buf[1024];
177 static kmutex_t	sd_log_mutex;
178 
179 /*
180  * Structs and globals for recording attached lun information.
181  * This maintains a chain. Each node in the chain represents a SCSI controller.
182  * The structure records the number of luns attached to each target connected
183  * with the controller.
184  * For parallel scsi device only.
185  */
186 struct sd_scsi_hba_tgt_lun {
187 	struct sd_scsi_hba_tgt_lun	*next;
188 	dev_info_t			*pdip;
189 	int				nlun[NTARGETS_WIDE];
190 };
191 
192 /*
193  * Flag to indicate the lun is attached or detached
194  */
195 #define	SD_SCSI_LUN_ATTACH	0
196 #define	SD_SCSI_LUN_DETACH	1
197 
198 static kmutex_t	sd_scsi_target_lun_mutex;
199 static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
200 
201 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
202     sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
203 
204 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
205     sd_scsi_target_lun_head))
206 
207 /*
208  * "Smart" Probe Caching structs, globals, #defines, etc.
209  * For parallel scsi and non-self-identify device only.
210  */
211 
212 /*
213  * The following resources and routines are implemented to support
214  * "smart" probing, which caches the scsi_probe() results in an array,
215  * in order to help avoid long probe times.
216  */
217 struct sd_scsi_probe_cache {
218 	struct	sd_scsi_probe_cache	*next;
219 	dev_info_t	*pdip;
220 	int		cache[NTARGETS_WIDE];
221 };
222 
223 static kmutex_t	sd_scsi_probe_cache_mutex;
224 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
225 
226 /*
227  * Really we only need protection on the head of the linked list, but
228  * better safe than sorry.
229  */
230 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
231     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
232 
233 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
234     sd_scsi_probe_cache_head))
235 
236 /*
237  * Power attribute table
238  */
239 static sd_power_attr_ss sd_pwr_ss = {
240 	{ "NAME=spindle-motor", "0=off", "1=on", NULL },
241 	{0, 100},
242 	{30, 0},
243 	{20000, 0}
244 };
245 
246 static sd_power_attr_pc sd_pwr_pc = {
247 	{ "NAME=spindle-motor", "0=stopped", "1=standby", "2=idle",
248 		"3=active", NULL },
249 	{0, 0, 0, 100},
250 	{90, 90, 20, 0},
251 	{15000, 15000, 1000, 0}
252 };
253 
254 /*
255  * Power level to power condition
256  */
257 static int sd_pl2pc[] = {
258 	SD_TARGET_START_VALID,
259 	SD_TARGET_STANDBY,
260 	SD_TARGET_IDLE,
261 	SD_TARGET_ACTIVE
262 };
263 
264 /*
265  * Vendor specific data name property declarations
266  */
267 
268 static sd_tunables seagate_properties = {
269 	SEAGATE_THROTTLE_VALUE,
270 	0,
271 	0,
272 	0,
273 	0,
274 	0,
275 	0,
276 	0,
277 	0
278 };
279 
280 
281 static sd_tunables fujitsu_properties = {
282 	FUJITSU_THROTTLE_VALUE,
283 	0,
284 	0,
285 	0,
286 	0,
287 	0,
288 	0,
289 	0,
290 	0
291 };
292 
293 static sd_tunables ibm_properties = {
294 	IBM_THROTTLE_VALUE,
295 	0,
296 	0,
297 	0,
298 	0,
299 	0,
300 	0,
301 	0,
302 	0
303 };
304 
305 static sd_tunables sve_properties = {
306 	SVE_THROTTLE_VALUE,
307 	0,
308 	0,
309 	SVE_BUSY_RETRIES,
310 	SVE_RESET_RETRY_COUNT,
311 	SVE_RESERVE_RELEASE_TIME,
312 	SVE_MIN_THROTTLE_VALUE,
313 	SVE_DISKSORT_DISABLED_FLAG,
314 	0
315 };
316 
317 static sd_tunables maserati_properties = {
318 	0,
319 	0,
320 	0,
321 	0,
322 	0,
323 	0,
324 	0,
325 	MASERATI_DISKSORT_DISABLED_FLAG,
326 	MASERATI_LUN_RESET_ENABLED_FLAG
327 };
328 
329 static sd_tunables pirus_properties = {
330 	PIRUS_THROTTLE_VALUE,
331 	0,
332 	PIRUS_NRR_COUNT,
333 	PIRUS_BUSY_RETRIES,
334 	PIRUS_RESET_RETRY_COUNT,
335 	0,
336 	PIRUS_MIN_THROTTLE_VALUE,
337 	PIRUS_DISKSORT_DISABLED_FLAG,
338 	PIRUS_LUN_RESET_ENABLED_FLAG
339 };
340 
341 static sd_tunables elite_properties = {
342 	ELITE_THROTTLE_VALUE,
343 	0,
344 	0,
345 	0,
346 	0,
347 	0,
348 	0,
349 	0,
350 	0
351 };
352 
353 static sd_tunables st31200n_properties = {
354 	ST31200N_THROTTLE_VALUE,
355 	0,
356 	0,
357 	0,
358 	0,
359 	0,
360 	0,
361 	0,
362 	0
363 };
364 
365 static sd_tunables lsi_properties_scsi = {
366 	LSI_THROTTLE_VALUE,
367 	0,
368 	LSI_NOTREADY_RETRIES,
369 	0,
370 	0,
371 	0,
372 	0,
373 	0,
374 	0
375 };
376 
377 static sd_tunables symbios_properties = {
378 	SYMBIOS_THROTTLE_VALUE,
379 	0,
380 	SYMBIOS_NOTREADY_RETRIES,
381 	0,
382 	0,
383 	0,
384 	0,
385 	0,
386 	0
387 };
388 
389 static sd_tunables lsi_properties = {
390 	0,
391 	0,
392 	LSI_NOTREADY_RETRIES,
393 	0,
394 	0,
395 	0,
396 	0,
397 	0,
398 	0
399 };
400 
401 static sd_tunables lsi_oem_properties = {
402 	0,
403 	0,
404 	LSI_OEM_NOTREADY_RETRIES,
405 	0,
406 	0,
407 	0,
408 	0,
409 	0,
410 	0,
411 	1
412 };
413 
414 
415 
416 #if (defined(SD_PROP_TST))
417 
418 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
419 #define	SD_TST_THROTTLE_VAL	16
420 #define	SD_TST_NOTREADY_VAL	12
421 #define	SD_TST_BUSY_VAL		60
422 #define	SD_TST_RST_RETRY_VAL	36
423 #define	SD_TST_RSV_REL_TIME	60
424 
425 static sd_tunables tst_properties = {
426 	SD_TST_THROTTLE_VAL,
427 	SD_TST_CTYPE_VAL,
428 	SD_TST_NOTREADY_VAL,
429 	SD_TST_BUSY_VAL,
430 	SD_TST_RST_RETRY_VAL,
431 	SD_TST_RSV_REL_TIME,
432 	0,
433 	0,
434 	0
435 };
436 #endif
437 
438 /* This is similar to the ANSI toupper implementation */
439 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
440 
441 /*
442  * Static Driver Configuration Table
443  *
444  * This is the table of disks which need throttle adjustment (or, perhaps
445  * something else as defined by the flags at a future time.)  device_id
446  * is a string consisting of concatenated vid (vendor), pid (product/model)
447  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
448  * the parts of the string are as defined by the sizes in the scsi_inquiry
449  * structure.  Device type is searched as far as the device_id string is
450  * defined.  Flags defines which values are to be set in the driver from the
451  * properties list.
452  *
453  * Entries below which begin and end with a "*" are a special case.
454  * These do not have a specific vendor, and the string which follows
455  * can appear anywhere in the 16 byte PID portion of the inquiry data.
456  *
457  * Entries below which begin and end with a " " (blank) are a special
458  * case. The comparison function will treat multiple consecutive blanks
459  * as equivalent to a single blank. For example, this causes a
460  * sd_disk_table entry of " NEC CDROM " to match a device's id string
461  * of  "NEC       CDROM".
462  *
463  * Note: The MD21 controller type has been obsoleted.
464  *	 ST318202F is a Legacy device
465  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
466  *	 made with an FC connection. The entries here are a legacy.
467  */
468 static sd_disk_config_t sd_disk_table[] = {
469 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
470 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
471 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
472 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
473 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
474 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
475 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
476 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
477 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
478 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
479 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
480 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
481 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
482 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
483 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
484 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
485 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
486 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
487 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
488 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
489 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
490 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
491 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
492 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
493 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
494 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
495 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
496 	{ "IBM     1724-100",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
497 	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
498 	{ "IBM     1726-22x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
499 	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
500 	{ "IBM     1726-42x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
501 	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
502 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
503 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
504 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
505 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
506 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
507 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
508 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
509 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
510 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
511 	{ "IBM     1818",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
512 	{ "DELL    MD3000",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
513 	{ "DELL    MD3000i",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
514 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
515 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
516 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
517 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
518 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT |
519 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
520 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT |
521 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
522 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
523 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
524 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
525 		SD_CONF_BSET_BSY_RETRY_COUNT|
526 		SD_CONF_BSET_RST_RETRIES|
527 		SD_CONF_BSET_RSV_REL_TIME|
528 		SD_CONF_BSET_MIN_THROTTLE|
529 		SD_CONF_BSET_DISKSORT_DISABLED,
530 		&sve_properties },
531 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
532 		SD_CONF_BSET_LUN_RESET_ENABLED,
533 		&maserati_properties },
534 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
535 		SD_CONF_BSET_NRR_COUNT|
536 		SD_CONF_BSET_BSY_RETRY_COUNT|
537 		SD_CONF_BSET_RST_RETRIES|
538 		SD_CONF_BSET_MIN_THROTTLE|
539 		SD_CONF_BSET_DISKSORT_DISABLED|
540 		SD_CONF_BSET_LUN_RESET_ENABLED,
541 		&pirus_properties },
542 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
543 		SD_CONF_BSET_NRR_COUNT|
544 		SD_CONF_BSET_BSY_RETRY_COUNT|
545 		SD_CONF_BSET_RST_RETRIES|
546 		SD_CONF_BSET_MIN_THROTTLE|
547 		SD_CONF_BSET_DISKSORT_DISABLED|
548 		SD_CONF_BSET_LUN_RESET_ENABLED,
549 		&pirus_properties },
550 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
551 		SD_CONF_BSET_NRR_COUNT|
552 		SD_CONF_BSET_BSY_RETRY_COUNT|
553 		SD_CONF_BSET_RST_RETRIES|
554 		SD_CONF_BSET_MIN_THROTTLE|
555 		SD_CONF_BSET_DISKSORT_DISABLED|
556 		SD_CONF_BSET_LUN_RESET_ENABLED,
557 		&pirus_properties },
558 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
559 		SD_CONF_BSET_NRR_COUNT|
560 		SD_CONF_BSET_BSY_RETRY_COUNT|
561 		SD_CONF_BSET_RST_RETRIES|
562 		SD_CONF_BSET_MIN_THROTTLE|
563 		SD_CONF_BSET_DISKSORT_DISABLED|
564 		SD_CONF_BSET_LUN_RESET_ENABLED,
565 		&pirus_properties },
566 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
567 		SD_CONF_BSET_NRR_COUNT|
568 		SD_CONF_BSET_BSY_RETRY_COUNT|
569 		SD_CONF_BSET_RST_RETRIES|
570 		SD_CONF_BSET_MIN_THROTTLE|
571 		SD_CONF_BSET_DISKSORT_DISABLED|
572 		SD_CONF_BSET_LUN_RESET_ENABLED,
573 		&pirus_properties },
574 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
575 		SD_CONF_BSET_NRR_COUNT|
576 		SD_CONF_BSET_BSY_RETRY_COUNT|
577 		SD_CONF_BSET_RST_RETRIES|
578 		SD_CONF_BSET_MIN_THROTTLE|
579 		SD_CONF_BSET_DISKSORT_DISABLED|
580 		SD_CONF_BSET_LUN_RESET_ENABLED,
581 		&pirus_properties },
582 	{ "SUN     STK6580_6780", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
583 	{ "SUN     SUN_6180", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
584 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
585 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
586 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
587 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
588 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
589 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
590 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
591 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
592 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
593 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
594 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
595 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
596 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
597 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
598 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
599 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
600 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
601 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
602 	    &symbios_properties },
603 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
604 	    &lsi_properties_scsi },
605 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
606 				    | SD_CONF_BSET_READSUB_BCD
607 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
608 				    | SD_CONF_BSET_NO_READ_HEADER
609 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
610 
611 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
612 				    | SD_CONF_BSET_READSUB_BCD
613 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
614 				    | SD_CONF_BSET_NO_READ_HEADER
615 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
616 
617 #if (defined(SD_PROP_TST))
618 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
619 				| SD_CONF_BSET_CTYPE
620 				| SD_CONF_BSET_NRR_COUNT
621 				| SD_CONF_BSET_FAB_DEVID
622 				| SD_CONF_BSET_NOCACHE
623 				| SD_CONF_BSET_BSY_RETRY_COUNT
624 				| SD_CONF_BSET_PLAYMSF_BCD
625 				| SD_CONF_BSET_READSUB_BCD
626 				| SD_CONF_BSET_READ_TOC_TRK_BCD
627 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
628 				| SD_CONF_BSET_NO_READ_HEADER
629 				| SD_CONF_BSET_READ_CD_XD4
630 				| SD_CONF_BSET_RST_RETRIES
631 				| SD_CONF_BSET_RSV_REL_TIME
632 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
633 #endif
634 };
635 
636 static const int sd_disk_table_size =
637 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
638 
639 /*
640  * Emulation mode disk drive VID/PID table
641  */
642 static char sd_flash_dev_table[][25] = {
643 	"ATA     MARVELL SD88SA02",
644 	"MARVELL SD88SA02",
645 	"TOSHIBA THNSNV05",
646 };
647 
648 static const int sd_flash_dev_table_size =
649 	sizeof (sd_flash_dev_table) / sizeof (sd_flash_dev_table[0]);
650 
651 #define	SD_INTERCONNECT_PARALLEL	0
652 #define	SD_INTERCONNECT_FABRIC		1
653 #define	SD_INTERCONNECT_FIBRE		2
654 #define	SD_INTERCONNECT_SSA		3
655 #define	SD_INTERCONNECT_SATA		4
656 #define	SD_INTERCONNECT_SAS		5
657 
658 #define	SD_IS_PARALLEL_SCSI(un)		\
659 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
660 #define	SD_IS_SERIAL(un)		\
661 	(((un)->un_interconnect_type == SD_INTERCONNECT_SATA) ||\
662 	((un)->un_interconnect_type == SD_INTERCONNECT_SAS))
663 
664 /*
665  * Definitions used by device id registration routines
666  */
667 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
668 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
669 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
670 
671 static kmutex_t sd_sense_mutex = {0};
672 
673 /*
674  * Macros for updates of the driver state
675  */
676 #define	New_state(un, s)        \
677 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
678 #define	Restore_state(un)	\
679 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
680 
681 static struct sd_cdbinfo sd_cdbtab[] = {
682 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
683 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
684 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
685 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
686 };
687 
688 /*
689  * Specifies the number of seconds that must have elapsed since the last
690  * cmd. has completed for a device to be declared idle to the PM framework.
691  */
692 static int sd_pm_idletime = 1;
693 
694 /*
695  * Internal function prototypes
696  */
697 
698 typedef struct unmap_param_hdr_s {
699 	uint16_t	uph_data_len;
700 	uint16_t	uph_descr_data_len;
701 	uint32_t	uph_reserved;
702 } unmap_param_hdr_t;
703 
704 typedef struct unmap_blk_descr_s {
705 	uint64_t	ubd_lba;
706 	uint32_t	ubd_lba_cnt;
707 	uint32_t	ubd_reserved;
708 } unmap_blk_descr_t;
709 
710 /* Max number of block descriptors in UNMAP command */
711 #define	SD_UNMAP_MAX_DESCR \
712 	((UINT16_MAX - sizeof (unmap_param_hdr_t)) / sizeof (unmap_blk_descr_t))
713 /* Max size of the UNMAP parameter list in bytes */
714 #define	SD_UNMAP_PARAM_LIST_MAXSZ	(sizeof (unmap_param_hdr_t) + \
715 	SD_UNMAP_MAX_DESCR * sizeof (unmap_blk_descr_t))
716 
717 int _init(void);
718 int _fini(void);
719 int _info(struct modinfo *modinfop);
720 
721 /*PRINTFLIKE3*/
722 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
723 /*PRINTFLIKE3*/
724 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
725 /*PRINTFLIKE3*/
726 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
727 
728 static int sdprobe(dev_info_t *devi);
729 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
730     void **result);
731 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
732     int mod_flags, char *name, caddr_t valuep, int *lengthp);
733 
734 /*
735  * Smart probe for parallel scsi
736  */
737 static void sd_scsi_probe_cache_init(void);
738 static void sd_scsi_probe_cache_fini(void);
739 static void sd_scsi_clear_probe_cache(void);
740 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
741 
742 /*
743  * Attached luns on target for parallel scsi
744  */
745 static void sd_scsi_target_lun_init(void);
746 static void sd_scsi_target_lun_fini(void);
747 static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
748 static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
749 
750 static int sd_spin_up_unit(sd_ssc_t *ssc);
751 
752 /*
753  * Using sd_ssc_init to establish sd_ssc_t struct
754  * Using sd_ssc_send to send uscsi internal command
755  * Using sd_ssc_fini to free sd_ssc_t struct
756  */
757 static sd_ssc_t *sd_ssc_init(struct sd_lun *un);
758 static int sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd,
759     int flag, enum uio_seg dataspace, int path_flag);
760 static void sd_ssc_fini(sd_ssc_t *ssc);
761 
762 /*
763  * Using sd_ssc_assessment to set correct type-of-assessment
764  * Using sd_ssc_post to post ereport & system log
765  *       sd_ssc_post will call sd_ssc_print to print system log
766  *       sd_ssc_post will call sd_ssd_ereport_post to post ereport
767  */
768 static void sd_ssc_assessment(sd_ssc_t *ssc,
769     enum sd_type_assessment tp_assess);
770 
771 static void sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess);
772 static void sd_ssc_print(sd_ssc_t *ssc, int sd_severity);
773 static void sd_ssc_ereport_post(sd_ssc_t *ssc,
774     enum sd_driver_assessment drv_assess);
775 
776 /*
777  * Using sd_ssc_set_info to mark an un-decodable-data error.
778  * Using sd_ssc_extract_info to transfer information from internal
779  *       data structures to sd_ssc_t.
780  */
781 static void sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp,
782     const char *fmt, ...);
783 static void sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un,
784     struct scsi_pkt *pktp, struct buf *bp, struct sd_xbuf *xp);
785 
786 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
787     enum uio_seg dataspace, int path_flag);
788 
789 static void	sd_enable_descr_sense(sd_ssc_t *ssc);
790 static void	sd_reenable_dsense_task(void *arg);
791 
792 static void	sd_set_mmc_caps(sd_ssc_t *ssc);
793 
794 static void sd_read_unit_properties(struct sd_lun *un);
795 static int  sd_process_sdconf_file(struct sd_lun *un);
796 static void sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str);
797 static void sd_set_properties(struct sd_lun *un, char *name, char *value);
798 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
799     int *data_list, sd_tunables *values);
800 static void sd_process_sdconf_table(struct sd_lun *un);
801 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
802 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
803 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
804     int list_len, char *dataname_ptr);
805 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
806     sd_tunables *prop_list);
807 
808 static void sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi,
809     int reservation_flag);
810 static int  sd_get_devid(sd_ssc_t *ssc);
811 static ddi_devid_t sd_create_devid(sd_ssc_t *ssc);
812 static int  sd_write_deviceid(sd_ssc_t *ssc);
813 static int  sd_check_vpd_page_support(sd_ssc_t *ssc);
814 
815 static void sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi);
816 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
817 
818 static int  sd_ddi_suspend(dev_info_t *devi);
819 static int  sd_ddi_resume(dev_info_t *devi);
820 static int  sd_pm_state_change(struct sd_lun *un, int level, int flag);
821 static int  sdpower(dev_info_t *devi, int component, int level);
822 
823 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
824 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
825 static int  sd_unit_attach(dev_info_t *devi);
826 static int  sd_unit_detach(dev_info_t *devi);
827 
828 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
829 static void sd_create_errstats(struct sd_lun *un, int instance);
830 static void sd_set_errstats(struct sd_lun *un);
831 static void sd_set_pstats(struct sd_lun *un);
832 
833 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
834 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
835 static int  sd_send_polled_RQS(struct sd_lun *un);
836 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
837 
838 /*
839  * Defines for sd_cache_control
840  */
841 
842 #define	SD_CACHE_ENABLE		1
843 #define	SD_CACHE_DISABLE	0
844 #define	SD_CACHE_NOCHANGE	-1
845 
846 static int   sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag);
847 static int   sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled);
848 static void  sd_get_write_cache_changeable(sd_ssc_t *ssc, int *is_changeable);
849 static void  sd_get_nv_sup(sd_ssc_t *ssc);
850 static dev_t sd_make_device(dev_info_t *devi);
851 static void  sd_check_bdc_vpd(sd_ssc_t *ssc);
852 static void  sd_check_emulation_mode(sd_ssc_t *ssc);
853 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
854     uint64_t capacity);
855 
856 /*
857  * Driver entry point functions.
858  */
859 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
860 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
861 static int  sd_ready_and_valid(sd_ssc_t *ssc, int part);
862 
863 static void sdmin(struct buf *bp);
864 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
865 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
866 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
867 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
868 
869 static int sdstrategy(struct buf *bp);
870 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
871 
872 /*
873  * Function prototypes for layering functions in the iostart chain.
874  */
875 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
876     struct buf *bp);
877 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
878     struct buf *bp);
879 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
880 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
881     struct buf *bp);
882 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
883 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
884 
885 /*
886  * Function prototypes for layering functions in the iodone chain.
887  */
888 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
889 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
890 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
891     struct buf *bp);
892 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
893     struct buf *bp);
894 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
895 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
896     struct buf *bp);
897 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
898 
899 /*
900  * Prototypes for functions to support buf(9S) based IO.
901  */
902 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
903 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
904 static void sd_destroypkt_for_buf(struct buf *);
905 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
906     struct buf *bp, int flags,
907     int (*callback)(caddr_t), caddr_t callback_arg,
908     diskaddr_t lba, uint32_t blockcount);
909 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
910     struct buf *bp, diskaddr_t lba, uint32_t blockcount);
911 
912 /*
913  * Prototypes for functions to support USCSI IO.
914  */
915 static int sd_uscsi_strategy(struct buf *bp);
916 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
917 static void sd_destroypkt_for_uscsi(struct buf *);
918 
919 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
920     uchar_t chain_type, void *pktinfop);
921 
922 static int  sd_pm_entry(struct sd_lun *un);
923 static void sd_pm_exit(struct sd_lun *un);
924 
925 static void sd_pm_idletimeout_handler(void *arg);
926 
927 /*
928  * sd_core internal functions (used at the sd_core_io layer).
929  */
930 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
931 static void sdintr(struct scsi_pkt *pktp);
932 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
933 
934 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
935     enum uio_seg dataspace, int path_flag);
936 
937 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
938     daddr_t blkno, int (*func)(struct buf *));
939 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
940     uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
941 static void sd_bioclone_free(struct buf *bp);
942 static void sd_shadow_buf_free(struct buf *bp);
943 
944 static void sd_print_transport_rejected_message(struct sd_lun *un,
945     struct sd_xbuf *xp, int code);
946 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
947     void *arg, int code);
948 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
949     void *arg, int code);
950 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
951     void *arg, int code);
952 
953 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
954     int retry_check_flag,
955     void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int c),
956     void *user_arg, int failure_code,  clock_t retry_delay,
957     void (*statp)(kstat_io_t *));
958 
959 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
960     clock_t retry_delay, void (*statp)(kstat_io_t *));
961 
962 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
963     struct scsi_pkt *pktp);
964 static void sd_start_retry_command(void *arg);
965 static void sd_start_direct_priority_command(void *arg);
966 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
967     int errcode);
968 static void sd_return_failed_command_no_restart(struct sd_lun *un,
969     struct buf *bp, int errcode);
970 static void sd_return_command(struct sd_lun *un, struct buf *bp);
971 static void sd_sync_with_callback(struct sd_lun *un);
972 static int sdrunout(caddr_t arg);
973 
974 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
975 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
976 
977 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
978 static void sd_restore_throttle(void *arg);
979 
980 static void sd_init_cdb_limits(struct sd_lun *un);
981 
982 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
983     struct sd_xbuf *xp, struct scsi_pkt *pktp);
984 
985 /*
986  * Error handling functions
987  */
988 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
989     struct sd_xbuf *xp, struct scsi_pkt *pktp);
990 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
991     struct sd_xbuf *xp, struct scsi_pkt *pktp);
992 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
993     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
994 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
995     struct sd_xbuf *xp, struct scsi_pkt *pktp);
996 
997 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
998     struct sd_xbuf *xp, struct scsi_pkt *pktp);
999 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1000     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1001 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1002     struct sd_xbuf *xp, size_t actual_len);
1003 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1004     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1005 
1006 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1007     void *arg, int code);
1008 
1009 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1010     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1011 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1012     uint8_t *sense_datap,
1013     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1014 static void sd_sense_key_not_ready(struct sd_lun *un,
1015     uint8_t *sense_datap,
1016     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1017 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1018     uint8_t *sense_datap,
1019     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1020 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1021     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1022 static void sd_sense_key_unit_attention(struct sd_lun *un,
1023     uint8_t *sense_datap,
1024     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1025 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1026     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1027 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1028     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1029 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1030     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1031 static void sd_sense_key_default(struct sd_lun *un,
1032     uint8_t *sense_datap,
1033     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1034 
1035 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1036     void *arg, int flag);
1037 
1038 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1039     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1040 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1041     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1042 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1043     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1044 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1045     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1046 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1047     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1048 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1049     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1050 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1051     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1052 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1053     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1054 
1055 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1056 
1057 static void sd_start_stop_unit_callback(void *arg);
1058 static void sd_start_stop_unit_task(void *arg);
1059 
1060 static void sd_taskq_create(void);
1061 static void sd_taskq_delete(void);
1062 static void sd_target_change_task(void *arg);
1063 static void sd_log_dev_status_event(struct sd_lun *un, char *esc, int km_flag);
1064 static void sd_log_lun_expansion_event(struct sd_lun *un, int km_flag);
1065 static void sd_log_eject_request_event(struct sd_lun *un, int km_flag);
1066 static void sd_media_change_task(void *arg);
1067 
1068 static int sd_handle_mchange(struct sd_lun *un);
1069 static int sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag);
1070 static int sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp,
1071     uint32_t *lbap, int path_flag);
1072 static int sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
1073     uint32_t *lbap, uint32_t *psp, int path_flag);
1074 static int sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int pc_flag,
1075     int flag, int path_flag);
1076 static int sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr,
1077     size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1078 static int sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag);
1079 static int sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc,
1080     uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1081 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc,
1082     uchar_t usr_cmd, uchar_t *usr_bufp);
1083 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1084     struct dk_callback *dkc);
1085 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1086 static int sd_send_scsi_UNMAP(dev_t dev, sd_ssc_t *ssc, dkioc_free_list_t *dfl,
1087     int flag);
1088 static int sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc,
1089     struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1090     uchar_t *bufaddr, uint_t buflen, int path_flag);
1091 static int sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc,
1092     struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1093     uchar_t *bufaddr, uint_t buflen, char feature, int path_flag);
1094 static int sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize,
1095     uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1096 static int sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize,
1097     uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1098 static int sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
1099     size_t buflen, daddr_t start_block, int path_flag);
1100 #define	sd_send_scsi_READ(ssc, bufaddr, buflen, start_block, path_flag)	\
1101     sd_send_scsi_RDWR(ssc, SCMD_READ, bufaddr, buflen, start_block, \
1102     path_flag)
1103 #define	sd_send_scsi_WRITE(ssc, bufaddr, buflen, start_block, path_flag)\
1104     sd_send_scsi_RDWR(ssc, SCMD_WRITE, bufaddr, buflen, start_block,\
1105     path_flag)
1106 
1107 static int sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr,
1108     uint16_t buflen, uchar_t page_code, uchar_t page_control,
1109     uint16_t param_ptr, int path_flag);
1110 static int sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(sd_ssc_t *ssc,
1111     uchar_t *bufaddr, size_t buflen, uchar_t class_req);
1112 static boolean_t sd_gesn_media_data_valid(uchar_t *data);
1113 
1114 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1115 static void sd_free_rqs(struct sd_lun *un);
1116 
1117 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1118     uchar_t *data, int len, int fmt);
1119 static void sd_panic_for_res_conflict(struct sd_lun *un);
1120 
1121 /*
1122  * Disk Ioctl Function Prototypes
1123  */
1124 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1125 static int sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag);
1126 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1127 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1128 
1129 /*
1130  * Multi-host Ioctl Prototypes
1131  */
1132 static int sd_check_mhd(dev_t dev, int interval);
1133 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1134 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1135 static char *sd_sname(uchar_t status);
1136 static void sd_mhd_resvd_recover(void *arg);
1137 static void sd_resv_reclaim_thread();
1138 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1139 static int sd_reserve_release(dev_t dev, int cmd);
1140 static void sd_rmv_resv_reclaim_req(dev_t dev);
1141 static void sd_mhd_reset_notify_cb(caddr_t arg);
1142 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1143     mhioc_inkeys_t *usrp, int flag);
1144 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1145     mhioc_inresvs_t *usrp, int flag);
1146 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1147 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1148 static int sd_mhdioc_release(dev_t dev);
1149 static int sd_mhdioc_register_devid(dev_t dev);
1150 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1151 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1152 
1153 /*
1154  * SCSI removable prototypes
1155  */
1156 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1157 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1158 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1159 static int sr_pause_resume(dev_t dev, int mode);
1160 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1161 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1162 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1163 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1164 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1165 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1166 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1167 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1168 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1169 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1170 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1171 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1172 static int sr_eject(dev_t dev);
1173 static void sr_ejected(register struct sd_lun *un);
1174 static int sr_check_wp(dev_t dev);
1175 static opaque_t sd_watch_request_submit(struct sd_lun *un);
1176 static int sd_check_media(dev_t dev, enum dkio_state state);
1177 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1178 static void sd_delayed_cv_broadcast(void *arg);
1179 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1180 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1181 
1182 static int sd_log_page_supported(sd_ssc_t *ssc, int log_page);
1183 
1184 /*
1185  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1186  */
1187 static void sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag);
1188 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1189 static void sd_wm_cache_destructor(void *wm, void *un);
1190 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1191     daddr_t endb, ushort_t typ);
1192 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1193     daddr_t endb);
1194 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1195 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1196 static void sd_read_modify_write_task(void * arg);
1197 static int
1198 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1199     struct buf **bpp);
1200 
1201 
1202 /*
1203  * Function prototypes for failfast support.
1204  */
1205 static void sd_failfast_flushq(struct sd_lun *un);
1206 static int sd_failfast_flushq_callback(struct buf *bp);
1207 
1208 /*
1209  * Function prototypes for partial DMA support
1210  */
1211 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1212 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1213 
1214 
1215 /* Function prototypes for cmlb */
1216 static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1217     diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1218 
1219 static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1220 
1221 /*
1222  * For printing RMW warning message timely
1223  */
1224 static void sd_rmw_msg_print_handler(void *arg);
1225 
1226 /*
1227  * Constants for failfast support:
1228  *
1229  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1230  * failfast processing being performed.
1231  *
1232  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1233  * failfast processing on all bufs with B_FAILFAST set.
1234  */
1235 
1236 #define	SD_FAILFAST_INACTIVE		0
1237 #define	SD_FAILFAST_ACTIVE		1
1238 
1239 /*
1240  * Bitmask to control behavior of buf(9S) flushes when a transition to
1241  * the failfast state occurs. Optional bits include:
1242  *
1243  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1244  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1245  * be flushed.
1246  *
1247  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1248  * driver, in addition to the regular wait queue. This includes the xbuf
1249  * queues. When clear, only the driver's wait queue will be flushed.
1250  */
1251 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1252 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1253 
1254 /*
1255  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1256  * to flush all queues within the driver.
1257  */
1258 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1259 
1260 
1261 /*
1262  * SD Testing Fault Injection
1263  */
1264 #ifdef SD_FAULT_INJECTION
1265 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1266 static void sd_faultinjection(struct scsi_pkt *pktp);
1267 static void sd_injection_log(char *buf, struct sd_lun *un);
1268 #endif
1269 
1270 /*
1271  * Device driver ops vector
1272  */
1273 static struct cb_ops sd_cb_ops = {
1274 	sdopen,			/* open */
1275 	sdclose,		/* close */
1276 	sdstrategy,		/* strategy */
1277 	nodev,			/* print */
1278 	sddump,			/* dump */
1279 	sdread,			/* read */
1280 	sdwrite,		/* write */
1281 	sdioctl,		/* ioctl */
1282 	nodev,			/* devmap */
1283 	nodev,			/* mmap */
1284 	nodev,			/* segmap */
1285 	nochpoll,		/* poll */
1286 	sd_prop_op,		/* cb_prop_op */
1287 	0,			/* streamtab  */
1288 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1289 	CB_REV,			/* cb_rev */
1290 	sdaread,		/* async I/O read entry point */
1291 	sdawrite		/* async I/O write entry point */
1292 };
1293 
1294 struct dev_ops sd_ops = {
1295 	DEVO_REV,		/* devo_rev, */
1296 	0,			/* refcnt  */
1297 	sdinfo,			/* info */
1298 	nulldev,		/* identify */
1299 	sdprobe,		/* probe */
1300 	sdattach,		/* attach */
1301 	sddetach,		/* detach */
1302 	nodev,			/* reset */
1303 	&sd_cb_ops,		/* driver operations */
1304 	NULL,			/* bus operations */
1305 	sdpower,		/* power */
1306 	ddi_quiesce_not_needed,		/* quiesce */
1307 };
1308 
1309 /*
1310  * This is the loadable module wrapper.
1311  */
1312 #include <sys/modctl.h>
1313 
1314 static struct modldrv modldrv = {
1315 	&mod_driverops,		/* Type of module. This one is a driver */
1316 	SD_MODULE_NAME,		/* Module name. */
1317 	&sd_ops			/* driver ops */
1318 };
1319 
1320 static struct modlinkage modlinkage = {
1321 	MODREV_1, &modldrv, NULL
1322 };
1323 
1324 static cmlb_tg_ops_t sd_tgops = {
1325 	TG_DK_OPS_VERSION_1,
1326 	sd_tg_rdwr,
1327 	sd_tg_getinfo
1328 };
1329 
1330 static struct scsi_asq_key_strings sd_additional_codes[] = {
1331 	0x81, 0, "Logical Unit is Reserved",
1332 	0x85, 0, "Audio Address Not Valid",
1333 	0xb6, 0, "Media Load Mechanism Failed",
1334 	0xB9, 0, "Audio Play Operation Aborted",
1335 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1336 	0x53, 2, "Medium removal prevented",
1337 	0x6f, 0, "Authentication failed during key exchange",
1338 	0x6f, 1, "Key not present",
1339 	0x6f, 2, "Key not established",
1340 	0x6f, 3, "Read without proper authentication",
1341 	0x6f, 4, "Mismatched region to this logical unit",
1342 	0x6f, 5, "Region reset count error",
1343 	0xffff, 0x0, NULL
1344 };
1345 
1346 
1347 /*
1348  * Struct for passing printing information for sense data messages
1349  */
1350 struct sd_sense_info {
1351 	int	ssi_severity;
1352 	int	ssi_pfa_flag;
1353 };
1354 
1355 /*
1356  * Table of function pointers for iostart-side routines. Separate "chains"
1357  * of layered function calls are formed by placing the function pointers
1358  * sequentially in the desired order. Functions are called according to an
1359  * incrementing table index ordering. The last function in each chain must
1360  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1361  * in the sd_iodone_chain[] array.
1362  *
1363  * Note: It may seem more natural to organize both the iostart and iodone
1364  * functions together, into an array of structures (or some similar
1365  * organization) with a common index, rather than two separate arrays which
1366  * must be maintained in synchronization. The purpose of this division is
1367  * to achieve improved performance: individual arrays allows for more
1368  * effective cache line utilization on certain platforms.
1369  */
1370 
1371 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1372 
1373 
1374 static sd_chain_t sd_iostart_chain[] = {
1375 
1376 	/* Chain for buf IO for disk drive targets (PM enabled) */
1377 	sd_mapblockaddr_iostart,	/* Index: 0 */
1378 	sd_pm_iostart,			/* Index: 1 */
1379 	sd_core_iostart,		/* Index: 2 */
1380 
1381 	/* Chain for buf IO for disk drive targets (PM disabled) */
1382 	sd_mapblockaddr_iostart,	/* Index: 3 */
1383 	sd_core_iostart,		/* Index: 4 */
1384 
1385 	/*
1386 	 * Chain for buf IO for removable-media or large sector size
1387 	 * disk drive targets with RMW needed (PM enabled)
1388 	 */
1389 	sd_mapblockaddr_iostart,	/* Index: 5 */
1390 	sd_mapblocksize_iostart,	/* Index: 6 */
1391 	sd_pm_iostart,			/* Index: 7 */
1392 	sd_core_iostart,		/* Index: 8 */
1393 
1394 	/*
1395 	 * Chain for buf IO for removable-media or large sector size
1396 	 * disk drive targets with RMW needed (PM disabled)
1397 	 */
1398 	sd_mapblockaddr_iostart,	/* Index: 9 */
1399 	sd_mapblocksize_iostart,	/* Index: 10 */
1400 	sd_core_iostart,		/* Index: 11 */
1401 
1402 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1403 	sd_mapblockaddr_iostart,	/* Index: 12 */
1404 	sd_checksum_iostart,		/* Index: 13 */
1405 	sd_pm_iostart,			/* Index: 14 */
1406 	sd_core_iostart,		/* Index: 15 */
1407 
1408 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1409 	sd_mapblockaddr_iostart,	/* Index: 16 */
1410 	sd_checksum_iostart,		/* Index: 17 */
1411 	sd_core_iostart,		/* Index: 18 */
1412 
1413 	/* Chain for USCSI commands (all targets) */
1414 	sd_pm_iostart,			/* Index: 19 */
1415 	sd_core_iostart,		/* Index: 20 */
1416 
1417 	/* Chain for checksumming USCSI commands (all targets) */
1418 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1419 	sd_pm_iostart,			/* Index: 22 */
1420 	sd_core_iostart,		/* Index: 23 */
1421 
1422 	/* Chain for "direct" USCSI commands (all targets) */
1423 	sd_core_iostart,		/* Index: 24 */
1424 
1425 	/* Chain for "direct priority" USCSI commands (all targets) */
1426 	sd_core_iostart,		/* Index: 25 */
1427 
1428 	/*
1429 	 * Chain for buf IO for large sector size disk drive targets
1430 	 * with RMW needed with checksumming (PM enabled)
1431 	 */
1432 	sd_mapblockaddr_iostart,	/* Index: 26 */
1433 	sd_mapblocksize_iostart,	/* Index: 27 */
1434 	sd_checksum_iostart,		/* Index: 28 */
1435 	sd_pm_iostart,			/* Index: 29 */
1436 	sd_core_iostart,		/* Index: 30 */
1437 
1438 	/*
1439 	 * Chain for buf IO for large sector size disk drive targets
1440 	 * with RMW needed with checksumming (PM disabled)
1441 	 */
1442 	sd_mapblockaddr_iostart,	/* Index: 31 */
1443 	sd_mapblocksize_iostart,	/* Index: 32 */
1444 	sd_checksum_iostart,		/* Index: 33 */
1445 	sd_core_iostart,		/* Index: 34 */
1446 
1447 };
1448 
1449 /*
1450  * Macros to locate the first function of each iostart chain in the
1451  * sd_iostart_chain[] array. These are located by the index in the array.
1452  */
1453 #define	SD_CHAIN_DISK_IOSTART			0
1454 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1455 #define	SD_CHAIN_MSS_DISK_IOSTART		5
1456 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1457 #define	SD_CHAIN_MSS_DISK_IOSTART_NO_PM		9
1458 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1459 #define	SD_CHAIN_CHKSUM_IOSTART			12
1460 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1461 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1462 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1463 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1464 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1465 #define	SD_CHAIN_MSS_CHKSUM_IOSTART		26
1466 #define	SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM	31
1467 
1468 
1469 /*
1470  * Table of function pointers for the iodone-side routines for the driver-
1471  * internal layering mechanism.  The calling sequence for iodone routines
1472  * uses a decrementing table index, so the last routine called in a chain
1473  * must be at the lowest array index location for that chain.  The last
1474  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1475  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1476  * of the functions in an iodone side chain must correspond to the ordering
1477  * of the iostart routines for that chain.  Note that there is no iodone
1478  * side routine that corresponds to sd_core_iostart(), so there is no
1479  * entry in the table for this.
1480  */
1481 
1482 static sd_chain_t sd_iodone_chain[] = {
1483 
1484 	/* Chain for buf IO for disk drive targets (PM enabled) */
1485 	sd_buf_iodone,			/* Index: 0 */
1486 	sd_mapblockaddr_iodone,		/* Index: 1 */
1487 	sd_pm_iodone,			/* Index: 2 */
1488 
1489 	/* Chain for buf IO for disk drive targets (PM disabled) */
1490 	sd_buf_iodone,			/* Index: 3 */
1491 	sd_mapblockaddr_iodone,		/* Index: 4 */
1492 
1493 	/*
1494 	 * Chain for buf IO for removable-media or large sector size
1495 	 * disk drive targets with RMW needed (PM enabled)
1496 	 */
1497 	sd_buf_iodone,			/* Index: 5 */
1498 	sd_mapblockaddr_iodone,		/* Index: 6 */
1499 	sd_mapblocksize_iodone,		/* Index: 7 */
1500 	sd_pm_iodone,			/* Index: 8 */
1501 
1502 	/*
1503 	 * Chain for buf IO for removable-media or large sector size
1504 	 * disk drive targets with RMW needed (PM disabled)
1505 	 */
1506 	sd_buf_iodone,			/* Index: 9 */
1507 	sd_mapblockaddr_iodone,		/* Index: 10 */
1508 	sd_mapblocksize_iodone,		/* Index: 11 */
1509 
1510 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1511 	sd_buf_iodone,			/* Index: 12 */
1512 	sd_mapblockaddr_iodone,		/* Index: 13 */
1513 	sd_checksum_iodone,		/* Index: 14 */
1514 	sd_pm_iodone,			/* Index: 15 */
1515 
1516 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1517 	sd_buf_iodone,			/* Index: 16 */
1518 	sd_mapblockaddr_iodone,		/* Index: 17 */
1519 	sd_checksum_iodone,		/* Index: 18 */
1520 
1521 	/* Chain for USCSI commands (non-checksum targets) */
1522 	sd_uscsi_iodone,		/* Index: 19 */
1523 	sd_pm_iodone,			/* Index: 20 */
1524 
1525 	/* Chain for USCSI commands (checksum targets) */
1526 	sd_uscsi_iodone,		/* Index: 21 */
1527 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1528 	sd_pm_iodone,			/* Index: 22 */
1529 
1530 	/* Chain for "direct" USCSI commands (all targets) */
1531 	sd_uscsi_iodone,		/* Index: 24 */
1532 
1533 	/* Chain for "direct priority" USCSI commands (all targets) */
1534 	sd_uscsi_iodone,		/* Index: 25 */
1535 
1536 	/*
1537 	 * Chain for buf IO for large sector size disk drive targets
1538 	 * with checksumming (PM enabled)
1539 	 */
1540 	sd_buf_iodone,			/* Index: 26 */
1541 	sd_mapblockaddr_iodone,		/* Index: 27 */
1542 	sd_mapblocksize_iodone,		/* Index: 28 */
1543 	sd_checksum_iodone,		/* Index: 29 */
1544 	sd_pm_iodone,			/* Index: 30 */
1545 
1546 	/*
1547 	 * Chain for buf IO for large sector size disk drive targets
1548 	 * with checksumming (PM disabled)
1549 	 */
1550 	sd_buf_iodone,			/* Index: 31 */
1551 	sd_mapblockaddr_iodone,		/* Index: 32 */
1552 	sd_mapblocksize_iodone,		/* Index: 33 */
1553 	sd_checksum_iodone,		/* Index: 34 */
1554 };
1555 
1556 
1557 /*
1558  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1559  * each iodone-side chain. These are located by the array index, but as the
1560  * iodone side functions are called in a decrementing-index order, the
1561  * highest index number in each chain must be specified (as these correspond
1562  * to the first function in the iodone chain that will be called by the core
1563  * at IO completion time).
1564  */
1565 
1566 #define	SD_CHAIN_DISK_IODONE			2
1567 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1568 #define	SD_CHAIN_RMMEDIA_IODONE			8
1569 #define	SD_CHAIN_MSS_DISK_IODONE		8
1570 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1571 #define	SD_CHAIN_MSS_DISK_IODONE_NO_PM		11
1572 #define	SD_CHAIN_CHKSUM_IODONE			15
1573 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1574 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1575 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1576 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1577 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1578 #define	SD_CHAIN_MSS_CHKSUM_IODONE		30
1579 #define	SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM	34
1580 
1581 
1582 
1583 /*
1584  * Array to map a layering chain index to the appropriate initpkt routine.
1585  * The redundant entries are present so that the index used for accessing
1586  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1587  * with this table as well.
1588  */
1589 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1590 
1591 static sd_initpkt_t	sd_initpkt_map[] = {
1592 
1593 	/* Chain for buf IO for disk drive targets (PM enabled) */
1594 	sd_initpkt_for_buf,		/* Index: 0 */
1595 	sd_initpkt_for_buf,		/* Index: 1 */
1596 	sd_initpkt_for_buf,		/* Index: 2 */
1597 
1598 	/* Chain for buf IO for disk drive targets (PM disabled) */
1599 	sd_initpkt_for_buf,		/* Index: 3 */
1600 	sd_initpkt_for_buf,		/* Index: 4 */
1601 
1602 	/*
1603 	 * Chain for buf IO for removable-media or large sector size
1604 	 * disk drive targets (PM enabled)
1605 	 */
1606 	sd_initpkt_for_buf,		/* Index: 5 */
1607 	sd_initpkt_for_buf,		/* Index: 6 */
1608 	sd_initpkt_for_buf,		/* Index: 7 */
1609 	sd_initpkt_for_buf,		/* Index: 8 */
1610 
1611 	/*
1612 	 * Chain for buf IO for removable-media or large sector size
1613 	 * disk drive targets (PM disabled)
1614 	 */
1615 	sd_initpkt_for_buf,		/* Index: 9 */
1616 	sd_initpkt_for_buf,		/* Index: 10 */
1617 	sd_initpkt_for_buf,		/* Index: 11 */
1618 
1619 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1620 	sd_initpkt_for_buf,		/* Index: 12 */
1621 	sd_initpkt_for_buf,		/* Index: 13 */
1622 	sd_initpkt_for_buf,		/* Index: 14 */
1623 	sd_initpkt_for_buf,		/* Index: 15 */
1624 
1625 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1626 	sd_initpkt_for_buf,		/* Index: 16 */
1627 	sd_initpkt_for_buf,		/* Index: 17 */
1628 	sd_initpkt_for_buf,		/* Index: 18 */
1629 
1630 	/* Chain for USCSI commands (non-checksum targets) */
1631 	sd_initpkt_for_uscsi,		/* Index: 19 */
1632 	sd_initpkt_for_uscsi,		/* Index: 20 */
1633 
1634 	/* Chain for USCSI commands (checksum targets) */
1635 	sd_initpkt_for_uscsi,		/* Index: 21 */
1636 	sd_initpkt_for_uscsi,		/* Index: 22 */
1637 	sd_initpkt_for_uscsi,		/* Index: 22 */
1638 
1639 	/* Chain for "direct" USCSI commands (all targets) */
1640 	sd_initpkt_for_uscsi,		/* Index: 24 */
1641 
1642 	/* Chain for "direct priority" USCSI commands (all targets) */
1643 	sd_initpkt_for_uscsi,		/* Index: 25 */
1644 
1645 	/*
1646 	 * Chain for buf IO for large sector size disk drive targets
1647 	 * with checksumming (PM enabled)
1648 	 */
1649 	sd_initpkt_for_buf,		/* Index: 26 */
1650 	sd_initpkt_for_buf,		/* Index: 27 */
1651 	sd_initpkt_for_buf,		/* Index: 28 */
1652 	sd_initpkt_for_buf,		/* Index: 29 */
1653 	sd_initpkt_for_buf,		/* Index: 30 */
1654 
1655 	/*
1656 	 * Chain for buf IO for large sector size disk drive targets
1657 	 * with checksumming (PM disabled)
1658 	 */
1659 	sd_initpkt_for_buf,		/* Index: 31 */
1660 	sd_initpkt_for_buf,		/* Index: 32 */
1661 	sd_initpkt_for_buf,		/* Index: 33 */
1662 	sd_initpkt_for_buf,		/* Index: 34 */
1663 };
1664 
1665 
1666 /*
1667  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1668  * The redundant entries are present so that the index used for accessing
1669  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1670  * with this table as well.
1671  */
1672 typedef void (*sd_destroypkt_t)(struct buf *);
1673 
1674 static sd_destroypkt_t	sd_destroypkt_map[] = {
1675 
1676 	/* Chain for buf IO for disk drive targets (PM enabled) */
1677 	sd_destroypkt_for_buf,		/* Index: 0 */
1678 	sd_destroypkt_for_buf,		/* Index: 1 */
1679 	sd_destroypkt_for_buf,		/* Index: 2 */
1680 
1681 	/* Chain for buf IO for disk drive targets (PM disabled) */
1682 	sd_destroypkt_for_buf,		/* Index: 3 */
1683 	sd_destroypkt_for_buf,		/* Index: 4 */
1684 
1685 	/*
1686 	 * Chain for buf IO for removable-media or large sector size
1687 	 * disk drive targets (PM enabled)
1688 	 */
1689 	sd_destroypkt_for_buf,		/* Index: 5 */
1690 	sd_destroypkt_for_buf,		/* Index: 6 */
1691 	sd_destroypkt_for_buf,		/* Index: 7 */
1692 	sd_destroypkt_for_buf,		/* Index: 8 */
1693 
1694 	/*
1695 	 * Chain for buf IO for removable-media or large sector size
1696 	 * disk drive targets (PM disabled)
1697 	 */
1698 	sd_destroypkt_for_buf,		/* Index: 9 */
1699 	sd_destroypkt_for_buf,		/* Index: 10 */
1700 	sd_destroypkt_for_buf,		/* Index: 11 */
1701 
1702 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1703 	sd_destroypkt_for_buf,		/* Index: 12 */
1704 	sd_destroypkt_for_buf,		/* Index: 13 */
1705 	sd_destroypkt_for_buf,		/* Index: 14 */
1706 	sd_destroypkt_for_buf,		/* Index: 15 */
1707 
1708 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1709 	sd_destroypkt_for_buf,		/* Index: 16 */
1710 	sd_destroypkt_for_buf,		/* Index: 17 */
1711 	sd_destroypkt_for_buf,		/* Index: 18 */
1712 
1713 	/* Chain for USCSI commands (non-checksum targets) */
1714 	sd_destroypkt_for_uscsi,	/* Index: 19 */
1715 	sd_destroypkt_for_uscsi,	/* Index: 20 */
1716 
1717 	/* Chain for USCSI commands (checksum targets) */
1718 	sd_destroypkt_for_uscsi,	/* Index: 21 */
1719 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1720 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1721 
1722 	/* Chain for "direct" USCSI commands (all targets) */
1723 	sd_destroypkt_for_uscsi,	/* Index: 24 */
1724 
1725 	/* Chain for "direct priority" USCSI commands (all targets) */
1726 	sd_destroypkt_for_uscsi,	/* Index: 25 */
1727 
1728 	/*
1729 	 * Chain for buf IO for large sector size disk drive targets
1730 	 * with checksumming (PM disabled)
1731 	 */
1732 	sd_destroypkt_for_buf,		/* Index: 26 */
1733 	sd_destroypkt_for_buf,		/* Index: 27 */
1734 	sd_destroypkt_for_buf,		/* Index: 28 */
1735 	sd_destroypkt_for_buf,		/* Index: 29 */
1736 	sd_destroypkt_for_buf,		/* Index: 30 */
1737 
1738 	/*
1739 	 * Chain for buf IO for large sector size disk drive targets
1740 	 * with checksumming (PM enabled)
1741 	 */
1742 	sd_destroypkt_for_buf,		/* Index: 31 */
1743 	sd_destroypkt_for_buf,		/* Index: 32 */
1744 	sd_destroypkt_for_buf,		/* Index: 33 */
1745 	sd_destroypkt_for_buf,		/* Index: 34 */
1746 };
1747 
1748 
1749 
1750 /*
1751  * Array to map a layering chain index to the appropriate chain "type".
1752  * The chain type indicates a specific property/usage of the chain.
1753  * The redundant entries are present so that the index used for accessing
1754  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1755  * with this table as well.
1756  */
1757 
1758 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
1759 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
1760 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
1761 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
1762 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
1763 						/* (for error recovery) */
1764 
1765 static int sd_chain_type_map[] = {
1766 
1767 	/* Chain for buf IO for disk drive targets (PM enabled) */
1768 	SD_CHAIN_BUFIO,			/* Index: 0 */
1769 	SD_CHAIN_BUFIO,			/* Index: 1 */
1770 	SD_CHAIN_BUFIO,			/* Index: 2 */
1771 
1772 	/* Chain for buf IO for disk drive targets (PM disabled) */
1773 	SD_CHAIN_BUFIO,			/* Index: 3 */
1774 	SD_CHAIN_BUFIO,			/* Index: 4 */
1775 
1776 	/*
1777 	 * Chain for buf IO for removable-media or large sector size
1778 	 * disk drive targets (PM enabled)
1779 	 */
1780 	SD_CHAIN_BUFIO,			/* Index: 5 */
1781 	SD_CHAIN_BUFIO,			/* Index: 6 */
1782 	SD_CHAIN_BUFIO,			/* Index: 7 */
1783 	SD_CHAIN_BUFIO,			/* Index: 8 */
1784 
1785 	/*
1786 	 * Chain for buf IO for removable-media or large sector size
1787 	 * disk drive targets (PM disabled)
1788 	 */
1789 	SD_CHAIN_BUFIO,			/* Index: 9 */
1790 	SD_CHAIN_BUFIO,			/* Index: 10 */
1791 	SD_CHAIN_BUFIO,			/* Index: 11 */
1792 
1793 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1794 	SD_CHAIN_BUFIO,			/* Index: 12 */
1795 	SD_CHAIN_BUFIO,			/* Index: 13 */
1796 	SD_CHAIN_BUFIO,			/* Index: 14 */
1797 	SD_CHAIN_BUFIO,			/* Index: 15 */
1798 
1799 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1800 	SD_CHAIN_BUFIO,			/* Index: 16 */
1801 	SD_CHAIN_BUFIO,			/* Index: 17 */
1802 	SD_CHAIN_BUFIO,			/* Index: 18 */
1803 
1804 	/* Chain for USCSI commands (non-checksum targets) */
1805 	SD_CHAIN_USCSI,			/* Index: 19 */
1806 	SD_CHAIN_USCSI,			/* Index: 20 */
1807 
1808 	/* Chain for USCSI commands (checksum targets) */
1809 	SD_CHAIN_USCSI,			/* Index: 21 */
1810 	SD_CHAIN_USCSI,			/* Index: 22 */
1811 	SD_CHAIN_USCSI,			/* Index: 23 */
1812 
1813 	/* Chain for "direct" USCSI commands (all targets) */
1814 	SD_CHAIN_DIRECT,		/* Index: 24 */
1815 
1816 	/* Chain for "direct priority" USCSI commands (all targets) */
1817 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
1818 
1819 	/*
1820 	 * Chain for buf IO for large sector size disk drive targets
1821 	 * with checksumming (PM enabled)
1822 	 */
1823 	SD_CHAIN_BUFIO,			/* Index: 26 */
1824 	SD_CHAIN_BUFIO,			/* Index: 27 */
1825 	SD_CHAIN_BUFIO,			/* Index: 28 */
1826 	SD_CHAIN_BUFIO,			/* Index: 29 */
1827 	SD_CHAIN_BUFIO,			/* Index: 30 */
1828 
1829 	/*
1830 	 * Chain for buf IO for large sector size disk drive targets
1831 	 * with checksumming (PM disabled)
1832 	 */
1833 	SD_CHAIN_BUFIO,			/* Index: 31 */
1834 	SD_CHAIN_BUFIO,			/* Index: 32 */
1835 	SD_CHAIN_BUFIO,			/* Index: 33 */
1836 	SD_CHAIN_BUFIO,			/* Index: 34 */
1837 };
1838 
1839 
1840 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
1841 #define	SD_IS_BUFIO(xp)			\
1842 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
1843 
1844 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
1845 #define	SD_IS_DIRECT_PRIORITY(xp)	\
1846 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
1847 
1848 
1849 
1850 /*
1851  * Struct, array, and macros to map a specific chain to the appropriate
1852  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
1853  *
1854  * The sd_chain_index_map[] array is used at attach time to set the various
1855  * un_xxx_chain type members of the sd_lun softstate to the specific layering
1856  * chain to be used with the instance. This allows different instances to use
1857  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
1858  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
1859  * values at sd_xbuf init time, this allows (1) layering chains may be changed
1860  * dynamically & without the use of locking; and (2) a layer may update the
1861  * xb_chain_io[start|done] member in a given xbuf with its current index value,
1862  * to allow for deferred processing of an IO within the same chain from a
1863  * different execution context.
1864  */
1865 
1866 struct sd_chain_index {
1867 	int	sci_iostart_index;
1868 	int	sci_iodone_index;
1869 };
1870 
1871 static struct sd_chain_index	sd_chain_index_map[] = {
1872 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
1873 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
1874 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
1875 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
1876 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
1877 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
1878 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
1879 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
1880 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
1881 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
1882 	{ SD_CHAIN_MSS_CHKSUM_IOSTART,		SD_CHAIN_MSS_CHKSUM_IODONE },
1883 	{ SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM, SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM },
1884 
1885 };
1886 
1887 
1888 /*
1889  * The following are indexes into the sd_chain_index_map[] array.
1890  */
1891 
1892 /* un->un_buf_chain_type must be set to one of these */
1893 #define	SD_CHAIN_INFO_DISK		0
1894 #define	SD_CHAIN_INFO_DISK_NO_PM	1
1895 #define	SD_CHAIN_INFO_RMMEDIA		2
1896 #define	SD_CHAIN_INFO_MSS_DISK		2
1897 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
1898 #define	SD_CHAIN_INFO_MSS_DSK_NO_PM	3
1899 #define	SD_CHAIN_INFO_CHKSUM		4
1900 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
1901 #define	SD_CHAIN_INFO_MSS_DISK_CHKSUM	10
1902 #define	SD_CHAIN_INFO_MSS_DISK_CHKSUM_NO_PM	11
1903 
1904 /* un->un_uscsi_chain_type must be set to one of these */
1905 #define	SD_CHAIN_INFO_USCSI_CMD		6
1906 /* USCSI with PM disabled is the same as DIRECT */
1907 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
1908 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
1909 
1910 /* un->un_direct_chain_type must be set to one of these */
1911 #define	SD_CHAIN_INFO_DIRECT_CMD	8
1912 
1913 /* un->un_priority_chain_type must be set to one of these */
1914 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
1915 
1916 /* size for devid inquiries */
1917 #define	MAX_INQUIRY_SIZE		0xF0
1918 
1919 /*
1920  * Macros used by functions to pass a given buf(9S) struct along to the
1921  * next function in the layering chain for further processing.
1922  *
1923  * In the following macros, passing more than three arguments to the called
1924  * routines causes the optimizer for the SPARC compiler to stop doing tail
1925  * call elimination which results in significant performance degradation.
1926  */
1927 #define	SD_BEGIN_IOSTART(index, un, bp)	\
1928 	((*(sd_iostart_chain[index]))(index, un, bp))
1929 
1930 #define	SD_BEGIN_IODONE(index, un, bp)	\
1931 	((*(sd_iodone_chain[index]))(index, un, bp))
1932 
1933 #define	SD_NEXT_IOSTART(index, un, bp)				\
1934 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
1935 
1936 #define	SD_NEXT_IODONE(index, un, bp)				\
1937 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
1938 
1939 /*
1940  *    Function: _init
1941  *
1942  * Description: This is the driver _init(9E) entry point.
1943  *
1944  * Return Code: Returns the value from mod_install(9F) or
1945  *		ddi_soft_state_init(9F) as appropriate.
1946  *
1947  *     Context: Called when driver module loaded.
1948  */
1949 
1950 int
_init(void)1951 _init(void)
1952 {
1953 	int	err;
1954 
1955 	/* establish driver name from module name */
1956 	sd_label = (char *)mod_modname(&modlinkage);
1957 
1958 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
1959 	    SD_MAXUNIT);
1960 	if (err != 0) {
1961 		return (err);
1962 	}
1963 
1964 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
1965 
1966 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
1967 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
1968 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
1969 
1970 	/*
1971 	 * it's ok to init here even for fibre device
1972 	 */
1973 	sd_scsi_probe_cache_init();
1974 
1975 	sd_scsi_target_lun_init();
1976 
1977 	/*
1978 	 * Creating taskq before mod_install ensures that all callers (threads)
1979 	 * that enter the module after a successful mod_install encounter
1980 	 * a valid taskq.
1981 	 */
1982 	sd_taskq_create();
1983 
1984 	err = mod_install(&modlinkage);
1985 	if (err != 0) {
1986 		/* delete taskq if install fails */
1987 		sd_taskq_delete();
1988 
1989 		mutex_destroy(&sd_log_mutex);
1990 
1991 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
1992 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
1993 		cv_destroy(&sd_tr.srq_inprocess_cv);
1994 
1995 		sd_scsi_probe_cache_fini();
1996 
1997 		sd_scsi_target_lun_fini();
1998 
1999 		ddi_soft_state_fini(&sd_state);
2000 
2001 		return (err);
2002 	}
2003 
2004 	return (err);
2005 }
2006 
2007 
2008 /*
2009  *    Function: _fini
2010  *
2011  * Description: This is the driver _fini(9E) entry point.
2012  *
2013  * Return Code: Returns the value from mod_remove(9F)
2014  *
2015  *     Context: Called when driver module is unloaded.
2016  */
2017 
2018 int
_fini(void)2019 _fini(void)
2020 {
2021 	int err;
2022 
2023 	if ((err = mod_remove(&modlinkage)) != 0) {
2024 		return (err);
2025 	}
2026 
2027 	sd_taskq_delete();
2028 
2029 	mutex_destroy(&sd_log_mutex);
2030 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2031 
2032 	sd_scsi_probe_cache_fini();
2033 
2034 	sd_scsi_target_lun_fini();
2035 
2036 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2037 	cv_destroy(&sd_tr.srq_inprocess_cv);
2038 
2039 	ddi_soft_state_fini(&sd_state);
2040 
2041 	return (err);
2042 }
2043 
2044 
2045 /*
2046  *    Function: _info
2047  *
2048  * Description: This is the driver _info(9E) entry point.
2049  *
2050  *   Arguments: modinfop - pointer to the driver modinfo structure
2051  *
2052  * Return Code: Returns the value from mod_info(9F).
2053  *
2054  *     Context: Kernel thread context
2055  */
2056 
2057 int
_info(struct modinfo * modinfop)2058 _info(struct modinfo *modinfop)
2059 {
2060 	return (mod_info(&modlinkage, modinfop));
2061 }
2062 
2063 
2064 /*
2065  * The following routines implement the driver message logging facility.
2066  * They provide component- and level- based debug output filtering.
2067  * Output may also be restricted to messages for a single instance by
2068  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2069  * to NULL, then messages for all instances are printed.
2070  *
2071  * These routines have been cloned from each other due to the language
2072  * constraints of macros and variable argument list processing.
2073  */
2074 
2075 
2076 /*
2077  *    Function: sd_log_err
2078  *
2079  * Description: This routine is called by the SD_ERROR macro for debug
2080  *		logging of error conditions.
2081  *
2082  *   Arguments: comp - driver component being logged
2083  *		dev  - pointer to driver info structure
2084  *		fmt  - error string and format to be logged
2085  */
2086 
2087 static void
sd_log_err(uint_t comp,struct sd_lun * un,const char * fmt,...)2088 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2089 {
2090 	va_list		ap;
2091 	dev_info_t	*dev;
2092 
2093 	ASSERT(un != NULL);
2094 	dev = SD_DEVINFO(un);
2095 	ASSERT(dev != NULL);
2096 
2097 	/*
2098 	 * Filter messages based on the global component and level masks.
2099 	 * Also print if un matches the value of sd_debug_un, or if
2100 	 * sd_debug_un is set to NULL.
2101 	 */
2102 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2103 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2104 		mutex_enter(&sd_log_mutex);
2105 		va_start(ap, fmt);
2106 		(void) vsprintf(sd_log_buf, fmt, ap);
2107 		va_end(ap);
2108 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2109 		mutex_exit(&sd_log_mutex);
2110 	}
2111 #ifdef SD_FAULT_INJECTION
2112 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2113 	if (un->sd_injection_mask & comp) {
2114 		mutex_enter(&sd_log_mutex);
2115 		va_start(ap, fmt);
2116 		(void) vsprintf(sd_log_buf, fmt, ap);
2117 		va_end(ap);
2118 		sd_injection_log(sd_log_buf, un);
2119 		mutex_exit(&sd_log_mutex);
2120 	}
2121 #endif
2122 }
2123 
2124 
2125 /*
2126  *    Function: sd_log_info
2127  *
2128  * Description: This routine is called by the SD_INFO macro for debug
2129  *		logging of general purpose informational conditions.
2130  *
2131  *   Arguments: comp - driver component being logged
2132  *		dev  - pointer to driver info structure
2133  *		fmt  - info string and format to be logged
2134  */
2135 
2136 static void
sd_log_info(uint_t component,struct sd_lun * un,const char * fmt,...)2137 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2138 {
2139 	va_list		ap;
2140 	dev_info_t	*dev;
2141 
2142 	ASSERT(un != NULL);
2143 	dev = SD_DEVINFO(un);
2144 	ASSERT(dev != NULL);
2145 
2146 	/*
2147 	 * Filter messages based on the global component and level masks.
2148 	 * Also print if un matches the value of sd_debug_un, or if
2149 	 * sd_debug_un is set to NULL.
2150 	 */
2151 	if ((sd_component_mask & component) &&
2152 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2153 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2154 		mutex_enter(&sd_log_mutex);
2155 		va_start(ap, fmt);
2156 		(void) vsprintf(sd_log_buf, fmt, ap);
2157 		va_end(ap);
2158 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2159 		mutex_exit(&sd_log_mutex);
2160 	}
2161 #ifdef SD_FAULT_INJECTION
2162 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2163 	if (un->sd_injection_mask & component) {
2164 		mutex_enter(&sd_log_mutex);
2165 		va_start(ap, fmt);
2166 		(void) vsprintf(sd_log_buf, fmt, ap);
2167 		va_end(ap);
2168 		sd_injection_log(sd_log_buf, un);
2169 		mutex_exit(&sd_log_mutex);
2170 	}
2171 #endif
2172 }
2173 
2174 
2175 /*
2176  *    Function: sd_log_trace
2177  *
2178  * Description: This routine is called by the SD_TRACE macro for debug
2179  *		logging of trace conditions (i.e. function entry/exit).
2180  *
2181  *   Arguments: comp - driver component being logged
2182  *		dev  - pointer to driver info structure
2183  *		fmt  - trace string and format to be logged
2184  */
2185 
2186 static void
sd_log_trace(uint_t component,struct sd_lun * un,const char * fmt,...)2187 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2188 {
2189 	va_list		ap;
2190 	dev_info_t	*dev;
2191 
2192 	ASSERT(un != NULL);
2193 	dev = SD_DEVINFO(un);
2194 	ASSERT(dev != NULL);
2195 
2196 	/*
2197 	 * Filter messages based on the global component and level masks.
2198 	 * Also print if un matches the value of sd_debug_un, or if
2199 	 * sd_debug_un is set to NULL.
2200 	 */
2201 	if ((sd_component_mask & component) &&
2202 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2203 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2204 		mutex_enter(&sd_log_mutex);
2205 		va_start(ap, fmt);
2206 		(void) vsprintf(sd_log_buf, fmt, ap);
2207 		va_end(ap);
2208 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2209 		mutex_exit(&sd_log_mutex);
2210 	}
2211 #ifdef SD_FAULT_INJECTION
2212 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2213 	if (un->sd_injection_mask & component) {
2214 		mutex_enter(&sd_log_mutex);
2215 		va_start(ap, fmt);
2216 		(void) vsprintf(sd_log_buf, fmt, ap);
2217 		va_end(ap);
2218 		sd_injection_log(sd_log_buf, un);
2219 		mutex_exit(&sd_log_mutex);
2220 	}
2221 #endif
2222 }
2223 
2224 
2225 /*
2226  *    Function: sdprobe
2227  *
2228  * Description: This is the driver probe(9e) entry point function.
2229  *
2230  *   Arguments: devi - opaque device info handle
2231  *
2232  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2233  *              DDI_PROBE_FAILURE: If the probe failed.
2234  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2235  *				   but may be present in the future.
2236  */
2237 
2238 static int
sdprobe(dev_info_t * devi)2239 sdprobe(dev_info_t *devi)
2240 {
2241 	struct scsi_device	*devp;
2242 	int			rval;
2243 	int			instance = ddi_get_instance(devi);
2244 
2245 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2246 		return (DDI_PROBE_DONTCARE);
2247 	}
2248 
2249 	devp = ddi_get_driver_private(devi);
2250 
2251 	if (devp == NULL) {
2252 		/* Ooops... nexus driver is mis-configured... */
2253 		return (DDI_PROBE_FAILURE);
2254 	}
2255 
2256 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2257 		return (DDI_PROBE_PARTIAL);
2258 	}
2259 
2260 	/*
2261 	 * Call the SCSA utility probe routine to see if we actually
2262 	 * have a target at this SCSI nexus.
2263 	 */
2264 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2265 	case SCSIPROBE_EXISTS:
2266 		switch (devp->sd_inq->inq_dtype) {
2267 		case DTYPE_DIRECT:
2268 			rval = DDI_PROBE_SUCCESS;
2269 			break;
2270 		case DTYPE_RODIRECT:
2271 			/* CDs etc. Can be removable media */
2272 			rval = DDI_PROBE_SUCCESS;
2273 			break;
2274 		case DTYPE_OPTICAL:
2275 			/*
2276 			 * Rewritable optical driver HP115AA
2277 			 * Can also be removable media
2278 			 */
2279 
2280 			/*
2281 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2282 			 * pre solaris 9 sparc sd behavior is required
2283 			 *
2284 			 * If first time through and sd_dtype_optical_bind
2285 			 * has not been set in /etc/system check properties
2286 			 */
2287 
2288 			if (sd_dtype_optical_bind  < 0) {
2289 				sd_dtype_optical_bind = ddi_prop_get_int
2290 				    (DDI_DEV_T_ANY, devi, 0,
2291 				    "optical-device-bind", 1);
2292 			}
2293 
2294 			if (sd_dtype_optical_bind == 0) {
2295 				rval = DDI_PROBE_FAILURE;
2296 			} else {
2297 				rval = DDI_PROBE_SUCCESS;
2298 			}
2299 			break;
2300 
2301 		case DTYPE_NOTPRESENT:
2302 		default:
2303 			rval = DDI_PROBE_FAILURE;
2304 			break;
2305 		}
2306 		break;
2307 	default:
2308 		rval = DDI_PROBE_PARTIAL;
2309 		break;
2310 	}
2311 
2312 	/*
2313 	 * This routine checks for resource allocation prior to freeing,
2314 	 * so it will take care of the "smart probing" case where a
2315 	 * scsi_probe() may or may not have been issued and will *not*
2316 	 * free previously-freed resources.
2317 	 */
2318 	scsi_unprobe(devp);
2319 	return (rval);
2320 }
2321 
2322 
2323 /*
2324  *    Function: sdinfo
2325  *
2326  * Description: This is the driver getinfo(9e) entry point function.
2327  *		Given the device number, return the devinfo pointer from
2328  *		the scsi_device structure or the instance number
2329  *		associated with the dev_t.
2330  *
2331  *   Arguments: dip     - pointer to device info structure
2332  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2333  *			  DDI_INFO_DEVT2INSTANCE)
2334  *		arg     - driver dev_t
2335  *		resultp - user buffer for request response
2336  *
2337  * Return Code: DDI_SUCCESS
2338  *              DDI_FAILURE
2339  */
2340 /* ARGSUSED */
2341 static int
sdinfo(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)2342 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2343 {
2344 	struct sd_lun	*un;
2345 	dev_t		dev;
2346 	int		instance;
2347 	int		error;
2348 
2349 	switch (infocmd) {
2350 	case DDI_INFO_DEVT2DEVINFO:
2351 		dev = (dev_t)arg;
2352 		instance = SDUNIT(dev);
2353 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2354 			return (DDI_FAILURE);
2355 		}
2356 		*result = (void *) SD_DEVINFO(un);
2357 		error = DDI_SUCCESS;
2358 		break;
2359 	case DDI_INFO_DEVT2INSTANCE:
2360 		dev = (dev_t)arg;
2361 		instance = SDUNIT(dev);
2362 		*result = (void *)(uintptr_t)instance;
2363 		error = DDI_SUCCESS;
2364 		break;
2365 	default:
2366 		error = DDI_FAILURE;
2367 	}
2368 	return (error);
2369 }
2370 
2371 /*
2372  *    Function: sd_prop_op
2373  *
2374  * Description: This is the driver prop_op(9e) entry point function.
2375  *		Return the number of blocks for the partition in question
2376  *		or forward the request to the property facilities.
2377  *
2378  *   Arguments: dev       - device number
2379  *		dip       - pointer to device info structure
2380  *		prop_op   - property operator
2381  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2382  *		name      - pointer to property name
2383  *		valuep    - pointer or address of the user buffer
2384  *		lengthp   - property length
2385  *
2386  * Return Code: DDI_PROP_SUCCESS
2387  *              DDI_PROP_NOT_FOUND
2388  *              DDI_PROP_UNDEFINED
2389  *              DDI_PROP_NO_MEMORY
2390  *              DDI_PROP_BUF_TOO_SMALL
2391  */
2392 
2393 static int
sd_prop_op(dev_t dev,dev_info_t * dip,ddi_prop_op_t prop_op,int mod_flags,char * name,caddr_t valuep,int * lengthp)2394 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2395     char *name, caddr_t valuep, int *lengthp)
2396 {
2397 	struct sd_lun	*un;
2398 
2399 	if ((un = ddi_get_soft_state(sd_state, ddi_get_instance(dip))) == NULL)
2400 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2401 		    name, valuep, lengthp));
2402 
2403 	return (cmlb_prop_op(un->un_cmlbhandle,
2404 	    dev, dip, prop_op, mod_flags, name, valuep, lengthp,
2405 	    SDPART(dev), (void *)SD_PATH_DIRECT));
2406 }
2407 
2408 /*
2409  * The following functions are for smart probing:
2410  * sd_scsi_probe_cache_init()
2411  * sd_scsi_probe_cache_fini()
2412  * sd_scsi_clear_probe_cache()
2413  * sd_scsi_probe_with_cache()
2414  */
2415 
2416 /*
2417  *    Function: sd_scsi_probe_cache_init
2418  *
2419  * Description: Initializes the probe response cache mutex and head pointer.
2420  *
2421  *     Context: Kernel thread context
2422  */
2423 
2424 static void
sd_scsi_probe_cache_init(void)2425 sd_scsi_probe_cache_init(void)
2426 {
2427 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2428 	sd_scsi_probe_cache_head = NULL;
2429 }
2430 
2431 
2432 /*
2433  *    Function: sd_scsi_probe_cache_fini
2434  *
2435  * Description: Frees all resources associated with the probe response cache.
2436  *
2437  *     Context: Kernel thread context
2438  */
2439 
2440 static void
sd_scsi_probe_cache_fini(void)2441 sd_scsi_probe_cache_fini(void)
2442 {
2443 	struct sd_scsi_probe_cache *cp;
2444 	struct sd_scsi_probe_cache *ncp;
2445 
2446 	/* Clean up our smart probing linked list */
2447 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2448 		ncp = cp->next;
2449 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2450 	}
2451 	sd_scsi_probe_cache_head = NULL;
2452 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2453 }
2454 
2455 
2456 /*
2457  *    Function: sd_scsi_clear_probe_cache
2458  *
2459  * Description: This routine clears the probe response cache. This is
2460  *		done when open() returns ENXIO so that when deferred
2461  *		attach is attempted (possibly after a device has been
2462  *		turned on) we will retry the probe. Since we don't know
2463  *		which target we failed to open, we just clear the
2464  *		entire cache.
2465  *
2466  *     Context: Kernel thread context
2467  */
2468 
2469 static void
sd_scsi_clear_probe_cache(void)2470 sd_scsi_clear_probe_cache(void)
2471 {
2472 	struct sd_scsi_probe_cache	*cp;
2473 	int				i;
2474 
2475 	mutex_enter(&sd_scsi_probe_cache_mutex);
2476 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2477 		/*
2478 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2479 		 * force probing to be performed the next time
2480 		 * sd_scsi_probe_with_cache is called.
2481 		 */
2482 		for (i = 0; i < NTARGETS_WIDE; i++) {
2483 			cp->cache[i] = SCSIPROBE_EXISTS;
2484 		}
2485 	}
2486 	mutex_exit(&sd_scsi_probe_cache_mutex);
2487 }
2488 
2489 
2490 /*
2491  *    Function: sd_scsi_probe_with_cache
2492  *
2493  * Description: This routine implements support for a scsi device probe
2494  *		with cache. The driver maintains a cache of the target
2495  *		responses to scsi probes. If we get no response from a
2496  *		target during a probe inquiry, we remember that, and we
2497  *		avoid additional calls to scsi_probe on non-zero LUNs
2498  *		on the same target until the cache is cleared. By doing
2499  *		so we avoid the 1/4 sec selection timeout for nonzero
2500  *		LUNs. lun0 of a target is always probed.
2501  *
2502  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2503  *              waitfunc - indicates what the allocator routines should
2504  *			   do when resources are not available. This value
2505  *			   is passed on to scsi_probe() when that routine
2506  *			   is called.
2507  *
2508  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2509  *		otherwise the value returned by scsi_probe(9F).
2510  *
2511  *     Context: Kernel thread context
2512  */
2513 
2514 static int
sd_scsi_probe_with_cache(struct scsi_device * devp,int (* waitfn)())2515 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2516 {
2517 	struct sd_scsi_probe_cache	*cp;
2518 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2519 	int		lun, tgt;
2520 
2521 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2522 	    SCSI_ADDR_PROP_LUN, 0);
2523 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2524 	    SCSI_ADDR_PROP_TARGET, -1);
2525 
2526 	/* Make sure caching enabled and target in range */
2527 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2528 		/* do it the old way (no cache) */
2529 		return (scsi_probe(devp, waitfn));
2530 	}
2531 
2532 	mutex_enter(&sd_scsi_probe_cache_mutex);
2533 
2534 	/* Find the cache for this scsi bus instance */
2535 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2536 		if (cp->pdip == pdip) {
2537 			break;
2538 		}
2539 	}
2540 
2541 	/* If we can't find a cache for this pdip, create one */
2542 	if (cp == NULL) {
2543 		int i;
2544 
2545 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2546 		    KM_SLEEP);
2547 		cp->pdip = pdip;
2548 		cp->next = sd_scsi_probe_cache_head;
2549 		sd_scsi_probe_cache_head = cp;
2550 		for (i = 0; i < NTARGETS_WIDE; i++) {
2551 			cp->cache[i] = SCSIPROBE_EXISTS;
2552 		}
2553 	}
2554 
2555 	mutex_exit(&sd_scsi_probe_cache_mutex);
2556 
2557 	/* Recompute the cache for this target if LUN zero */
2558 	if (lun == 0) {
2559 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2560 	}
2561 
2562 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2563 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2564 		return (SCSIPROBE_NORESP);
2565 	}
2566 
2567 	/* Do the actual probe; save & return the result */
2568 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2569 }
2570 
2571 
2572 /*
2573  *    Function: sd_scsi_target_lun_init
2574  *
2575  * Description: Initializes the attached lun chain mutex and head pointer.
2576  *
2577  *     Context: Kernel thread context
2578  */
2579 
2580 static void
sd_scsi_target_lun_init(void)2581 sd_scsi_target_lun_init(void)
2582 {
2583 	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
2584 	sd_scsi_target_lun_head = NULL;
2585 }
2586 
2587 
2588 /*
2589  *    Function: sd_scsi_target_lun_fini
2590  *
2591  * Description: Frees all resources associated with the attached lun
2592  *              chain
2593  *
2594  *     Context: Kernel thread context
2595  */
2596 
2597 static void
sd_scsi_target_lun_fini(void)2598 sd_scsi_target_lun_fini(void)
2599 {
2600 	struct sd_scsi_hba_tgt_lun	*cp;
2601 	struct sd_scsi_hba_tgt_lun	*ncp;
2602 
2603 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
2604 		ncp = cp->next;
2605 		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
2606 	}
2607 	sd_scsi_target_lun_head = NULL;
2608 	mutex_destroy(&sd_scsi_target_lun_mutex);
2609 }
2610 
2611 
2612 /*
2613  *    Function: sd_scsi_get_target_lun_count
2614  *
2615  * Description: This routine will check in the attached lun chain to see
2616  *		how many luns are attached on the required SCSI controller
2617  *		and target. Currently, some capabilities like tagged queue
2618  *		are supported per target based by HBA. So all luns in a
2619  *		target have the same capabilities. Based on this assumption,
2620  *		sd should only set these capabilities once per target. This
2621  *		function is called when sd needs to decide how many luns
2622  *		already attached on a target.
2623  *
2624  *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
2625  *			  controller device.
2626  *              target	- The target ID on the controller's SCSI bus.
2627  *
2628  * Return Code: The number of luns attached on the required target and
2629  *		controller.
2630  *		-1 if target ID is not in parallel SCSI scope or the given
2631  *		dip is not in the chain.
2632  *
2633  *     Context: Kernel thread context
2634  */
2635 
2636 static int
sd_scsi_get_target_lun_count(dev_info_t * dip,int target)2637 sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
2638 {
2639 	struct sd_scsi_hba_tgt_lun	*cp;
2640 
2641 	if ((target < 0) || (target >= NTARGETS_WIDE)) {
2642 		return (-1);
2643 	}
2644 
2645 	mutex_enter(&sd_scsi_target_lun_mutex);
2646 
2647 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2648 		if (cp->pdip == dip) {
2649 			break;
2650 		}
2651 	}
2652 
2653 	mutex_exit(&sd_scsi_target_lun_mutex);
2654 
2655 	if (cp == NULL) {
2656 		return (-1);
2657 	}
2658 
2659 	return (cp->nlun[target]);
2660 }
2661 
2662 
2663 /*
2664  *    Function: sd_scsi_update_lun_on_target
2665  *
2666  * Description: This routine is used to update the attached lun chain when a
2667  *		lun is attached or detached on a target.
2668  *
2669  *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
2670  *                        controller device.
2671  *              target  - The target ID on the controller's SCSI bus.
2672  *		flag	- Indicate the lun is attached or detached.
2673  *
2674  *     Context: Kernel thread context
2675  */
2676 
2677 static void
sd_scsi_update_lun_on_target(dev_info_t * dip,int target,int flag)2678 sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
2679 {
2680 	struct sd_scsi_hba_tgt_lun	*cp;
2681 
2682 	mutex_enter(&sd_scsi_target_lun_mutex);
2683 
2684 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2685 		if (cp->pdip == dip) {
2686 			break;
2687 		}
2688 	}
2689 
2690 	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
2691 		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
2692 		    KM_SLEEP);
2693 		cp->pdip = dip;
2694 		cp->next = sd_scsi_target_lun_head;
2695 		sd_scsi_target_lun_head = cp;
2696 	}
2697 
2698 	mutex_exit(&sd_scsi_target_lun_mutex);
2699 
2700 	if (cp != NULL) {
2701 		if (flag == SD_SCSI_LUN_ATTACH) {
2702 			cp->nlun[target] ++;
2703 		} else {
2704 			cp->nlun[target] --;
2705 		}
2706 	}
2707 }
2708 
2709 
2710 /*
2711  *    Function: sd_spin_up_unit
2712  *
2713  * Description: Issues the following commands to spin-up the device:
2714  *		START STOP UNIT, and INQUIRY.
2715  *
2716  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
2717  *                      structure for this target.
2718  *
2719  * Return Code: 0 - success
2720  *		EIO - failure
2721  *		EACCES - reservation conflict
2722  *
2723  *     Context: Kernel thread context
2724  */
2725 
2726 static int
sd_spin_up_unit(sd_ssc_t * ssc)2727 sd_spin_up_unit(sd_ssc_t *ssc)
2728 {
2729 	size_t	resid		= 0;
2730 	int	has_conflict	= FALSE;
2731 	uchar_t *bufaddr;
2732 	int	status;
2733 	struct sd_lun	*un;
2734 
2735 	ASSERT(ssc != NULL);
2736 	un = ssc->ssc_un;
2737 	ASSERT(un != NULL);
2738 
2739 	/*
2740 	 * Send a throwaway START UNIT command.
2741 	 *
2742 	 * If we fail on this, we don't care presently what precisely
2743 	 * is wrong.  EMC's arrays will also fail this with a check
2744 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2745 	 * we don't want to fail the attach because it may become
2746 	 * "active" later.
2747 	 * We don't know if power condition is supported or not at
2748 	 * this stage, use START STOP bit.
2749 	 */
2750 	status = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
2751 	    SD_TARGET_START, SD_PATH_DIRECT);
2752 
2753 	if (status != 0) {
2754 		if (status == EACCES)
2755 			has_conflict = TRUE;
2756 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
2757 	}
2758 
2759 	/*
2760 	 * Send another INQUIRY command to the target. This is necessary for
2761 	 * non-removable media direct access devices because their INQUIRY data
2762 	 * may not be fully qualified until they are spun up (perhaps via the
2763 	 * START command above).  Note: This seems to be needed for some
2764 	 * legacy devices only.) The INQUIRY command should succeed even if a
2765 	 * Reservation Conflict is present.
2766 	 */
2767 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2768 
2769 	if (sd_send_scsi_INQUIRY(ssc, bufaddr, SUN_INQSIZE, 0, 0, &resid)
2770 	    != 0) {
2771 		kmem_free(bufaddr, SUN_INQSIZE);
2772 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
2773 		return (EIO);
2774 	}
2775 
2776 	/*
2777 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2778 	 * Note that this routine does not return a failure here even if the
2779 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2780 	 */
2781 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2782 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2783 	}
2784 
2785 	kmem_free(bufaddr, SUN_INQSIZE);
2786 
2787 	/* If we hit a reservation conflict above, tell the caller. */
2788 	if (has_conflict == TRUE) {
2789 		return (EACCES);
2790 	}
2791 
2792 	return (0);
2793 }
2794 
2795 /*
2796  *    Function: sd_enable_descr_sense
2797  *
2798  * Description: This routine attempts to select descriptor sense format
2799  *		using the Control mode page.  Devices that support 64 bit
2800  *		LBAs (for >2TB luns) should also implement descriptor
2801  *		sense data so we will call this function whenever we see
2802  *		a lun larger than 2TB.  If for some reason the device
2803  *		supports 64 bit LBAs but doesn't support descriptor sense
2804  *		presumably the mode select will fail.  Everything will
2805  *		continue to work normally except that we will not get
2806  *		complete sense data for commands that fail with an LBA
2807  *		larger than 32 bits.
2808  *
2809  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
2810  *                      structure for this target.
2811  *
2812  *     Context: Kernel thread context only
2813  */
2814 
2815 static void
sd_enable_descr_sense(sd_ssc_t * ssc)2816 sd_enable_descr_sense(sd_ssc_t *ssc)
2817 {
2818 	uchar_t			*header;
2819 	struct mode_control_scsi3 *ctrl_bufp;
2820 	size_t			buflen;
2821 	size_t			bd_len;
2822 	int			status;
2823 	struct sd_lun		*un;
2824 
2825 	ASSERT(ssc != NULL);
2826 	un = ssc->ssc_un;
2827 	ASSERT(un != NULL);
2828 
2829 	/*
2830 	 * Read MODE SENSE page 0xA, Control Mode Page
2831 	 */
2832 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
2833 	    sizeof (struct mode_control_scsi3);
2834 	header = kmem_zalloc(buflen, KM_SLEEP);
2835 
2836 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
2837 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT);
2838 
2839 	if (status != 0) {
2840 		SD_ERROR(SD_LOG_COMMON, un,
2841 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
2842 		goto eds_exit;
2843 	}
2844 
2845 	/*
2846 	 * Determine size of Block Descriptors in order to locate
2847 	 * the mode page data. ATAPI devices return 0, SCSI devices
2848 	 * should return MODE_BLK_DESC_LENGTH.
2849 	 */
2850 	bd_len  = ((struct mode_header *)header)->bdesc_length;
2851 
2852 	/* Clear the mode data length field for MODE SELECT */
2853 	((struct mode_header *)header)->length = 0;
2854 
2855 	ctrl_bufp = (struct mode_control_scsi3 *)
2856 	    (header + MODE_HEADER_LENGTH + bd_len);
2857 
2858 	/*
2859 	 * If the page length is smaller than the expected value,
2860 	 * the target device doesn't support D_SENSE. Bail out here.
2861 	 */
2862 	if (ctrl_bufp->mode_page.length <
2863 	    sizeof (struct mode_control_scsi3) - 2) {
2864 		SD_ERROR(SD_LOG_COMMON, un,
2865 		    "sd_enable_descr_sense: enable D_SENSE failed\n");
2866 		goto eds_exit;
2867 	}
2868 
2869 	/*
2870 	 * Clear PS bit for MODE SELECT
2871 	 */
2872 	ctrl_bufp->mode_page.ps = 0;
2873 
2874 	/*
2875 	 * Set D_SENSE to enable descriptor sense format.
2876 	 */
2877 	ctrl_bufp->d_sense = 1;
2878 
2879 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
2880 
2881 	/*
2882 	 * Use MODE SELECT to commit the change to the D_SENSE bit
2883 	 */
2884 	status = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, header,
2885 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT);
2886 
2887 	if (status != 0) {
2888 		SD_INFO(SD_LOG_COMMON, un,
2889 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
2890 	} else {
2891 		kmem_free(header, buflen);
2892 		return;
2893 	}
2894 
2895 eds_exit:
2896 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
2897 	kmem_free(header, buflen);
2898 }
2899 
2900 /*
2901  *    Function: sd_reenable_dsense_task
2902  *
2903  * Description: Re-enable descriptor sense after device or bus reset
2904  *
2905  *     Context: Executes in a taskq() thread context
2906  */
2907 static void
sd_reenable_dsense_task(void * arg)2908 sd_reenable_dsense_task(void *arg)
2909 {
2910 	struct	sd_lun	*un = arg;
2911 	sd_ssc_t	*ssc;
2912 
2913 	ASSERT(un != NULL);
2914 
2915 	ssc = sd_ssc_init(un);
2916 	sd_enable_descr_sense(ssc);
2917 	sd_ssc_fini(ssc);
2918 }
2919 
2920 /*
2921  *    Function: sd_set_mmc_caps
2922  *
2923  * Description: This routine determines if the device is MMC compliant and if
2924  *		the device supports CDDA via a mode sense of the CDVD
2925  *		capabilities mode page. Also checks if the device is a
2926  *		dvdram writable device.
2927  *
2928  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
2929  *                      structure for this target.
2930  *
2931  *     Context: Kernel thread context only
2932  */
2933 
2934 static void
sd_set_mmc_caps(sd_ssc_t * ssc)2935 sd_set_mmc_caps(sd_ssc_t *ssc)
2936 {
2937 	struct mode_header_grp2		*sense_mhp;
2938 	uchar_t				*sense_page;
2939 	caddr_t				buf;
2940 	int				bd_len;
2941 	int				status;
2942 	struct uscsi_cmd		com;
2943 	int				rtn;
2944 	uchar_t				*out_data_rw, *out_data_hd;
2945 	uchar_t				*rqbuf_rw, *rqbuf_hd;
2946 	uchar_t				*out_data_gesn;
2947 	int				gesn_len;
2948 	struct sd_lun			*un;
2949 
2950 	ASSERT(ssc != NULL);
2951 	un = ssc->ssc_un;
2952 	ASSERT(un != NULL);
2953 
2954 	/*
2955 	 * The flags which will be set in this function are - mmc compliant,
2956 	 * dvdram writable device, cdda support. Initialize them to FALSE
2957 	 * and if a capability is detected - it will be set to TRUE.
2958 	 */
2959 	un->un_f_mmc_cap = FALSE;
2960 	un->un_f_dvdram_writable_device = FALSE;
2961 	un->un_f_cfg_cdda = FALSE;
2962 
2963 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
2964 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
2965 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
2966 
2967 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
2968 
2969 	if (status != 0) {
2970 		/* command failed; just return */
2971 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
2972 		return;
2973 	}
2974 	/*
2975 	 * If the mode sense request for the CDROM CAPABILITIES
2976 	 * page (0x2A) succeeds the device is assumed to be MMC.
2977 	 */
2978 	un->un_f_mmc_cap = TRUE;
2979 
2980 	/* See if GET STATUS EVENT NOTIFICATION is supported */
2981 	if (un->un_f_mmc_gesn_polling) {
2982 		gesn_len = SD_GESN_HEADER_LEN + SD_GESN_MEDIA_DATA_LEN;
2983 		out_data_gesn = kmem_zalloc(gesn_len, KM_SLEEP);
2984 
2985 		rtn = sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(ssc,
2986 		    out_data_gesn, gesn_len, 1 << SD_GESN_MEDIA_CLASS);
2987 
2988 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
2989 
2990 		if ((rtn != 0) || !sd_gesn_media_data_valid(out_data_gesn)) {
2991 			un->un_f_mmc_gesn_polling = FALSE;
2992 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
2993 			    "sd_set_mmc_caps: gesn not supported "
2994 			    "%d %x %x %x %x\n", rtn,
2995 			    out_data_gesn[0], out_data_gesn[1],
2996 			    out_data_gesn[2], out_data_gesn[3]);
2997 		}
2998 
2999 		kmem_free(out_data_gesn, gesn_len);
3000 	}
3001 
3002 	/* Get to the page data */
3003 	sense_mhp = (struct mode_header_grp2 *)buf;
3004 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3005 	    sense_mhp->bdesc_length_lo;
3006 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3007 		/*
3008 		 * We did not get back the expected block descriptor
3009 		 * length so we cannot determine if the device supports
3010 		 * CDDA. However, we still indicate the device is MMC
3011 		 * according to the successful response to the page
3012 		 * 0x2A mode sense request.
3013 		 */
3014 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3015 		    "sd_set_mmc_caps: Mode Sense returned "
3016 		    "invalid block descriptor length\n");
3017 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3018 		return;
3019 	}
3020 
3021 	/* See if read CDDA is supported */
3022 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3023 	    bd_len);
3024 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3025 
3026 	/* See if writing DVD RAM is supported. */
3027 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3028 	if (un->un_f_dvdram_writable_device == TRUE) {
3029 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3030 		return;
3031 	}
3032 
3033 	/*
3034 	 * If the device presents DVD or CD capabilities in the mode
3035 	 * page, we can return here since a RRD will not have
3036 	 * these capabilities.
3037 	 */
3038 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3039 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3040 		return;
3041 	}
3042 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3043 
3044 	/*
3045 	 * If un->un_f_dvdram_writable_device is still FALSE,
3046 	 * check for a Removable Rigid Disk (RRD).  A RRD
3047 	 * device is identified by the features RANDOM_WRITABLE and
3048 	 * HARDWARE_DEFECT_MANAGEMENT.
3049 	 */
3050 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3051 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3052 
3053 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3054 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3055 	    RANDOM_WRITABLE, SD_PATH_STANDARD);
3056 
3057 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3058 
3059 	if (rtn != 0) {
3060 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3061 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3062 		return;
3063 	}
3064 
3065 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3066 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3067 
3068 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3069 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3070 	    HARDWARE_DEFECT_MANAGEMENT, SD_PATH_STANDARD);
3071 
3072 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3073 
3074 	if (rtn == 0) {
3075 		/*
3076 		 * We have good information, check for random writable
3077 		 * and hardware defect features.
3078 		 */
3079 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3080 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3081 			un->un_f_dvdram_writable_device = TRUE;
3082 		}
3083 	}
3084 
3085 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3086 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3087 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3088 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3089 }
3090 
3091 /*
3092  *    Function: sd_check_for_writable_cd
3093  *
3094  * Description: This routine determines if the media in the device is
3095  *		writable or not. It uses the get configuration command (0x46)
3096  *		to determine if the media is writable
3097  *
3098  *   Arguments: un - driver soft state (unit) structure
3099  *              path_flag - SD_PATH_DIRECT to use the USCSI "direct"
3100  *                           chain and the normal command waitq, or
3101  *                           SD_PATH_DIRECT_PRIORITY to use the USCSI
3102  *                           "direct" chain and bypass the normal command
3103  *                           waitq.
3104  *
3105  *     Context: Never called at interrupt context.
3106  */
3107 
3108 static void
sd_check_for_writable_cd(sd_ssc_t * ssc,int path_flag)3109 sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag)
3110 {
3111 	struct uscsi_cmd		com;
3112 	uchar_t				*out_data;
3113 	uchar_t				*rqbuf;
3114 	int				rtn;
3115 	uchar_t				*out_data_rw, *out_data_hd;
3116 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3117 	struct mode_header_grp2		*sense_mhp;
3118 	uchar_t				*sense_page;
3119 	caddr_t				buf;
3120 	int				bd_len;
3121 	int				status;
3122 	struct sd_lun			*un;
3123 
3124 	ASSERT(ssc != NULL);
3125 	un = ssc->ssc_un;
3126 	ASSERT(un != NULL);
3127 	ASSERT(mutex_owned(SD_MUTEX(un)));
3128 
3129 	/*
3130 	 * Initialize the writable media to false, if configuration info.
3131 	 * tells us otherwise then only we will set it.
3132 	 */
3133 	un->un_f_mmc_writable_media = FALSE;
3134 	mutex_exit(SD_MUTEX(un));
3135 
3136 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3137 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3138 
3139 	rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf, SENSE_LENGTH,
3140 	    out_data, SD_PROFILE_HEADER_LEN, path_flag);
3141 
3142 	if (rtn != 0)
3143 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3144 
3145 	mutex_enter(SD_MUTEX(un));
3146 	if (rtn == 0) {
3147 		/*
3148 		 * We have good information, check for writable DVD.
3149 		 */
3150 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3151 			un->un_f_mmc_writable_media = TRUE;
3152 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3153 			kmem_free(rqbuf, SENSE_LENGTH);
3154 			return;
3155 		}
3156 	}
3157 
3158 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3159 	kmem_free(rqbuf, SENSE_LENGTH);
3160 
3161 	/*
3162 	 * Determine if this is a RRD type device.
3163 	 */
3164 	mutex_exit(SD_MUTEX(un));
3165 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3166 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3167 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, path_flag);
3168 
3169 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3170 
3171 	mutex_enter(SD_MUTEX(un));
3172 	if (status != 0) {
3173 		/* command failed; just return */
3174 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3175 		return;
3176 	}
3177 
3178 	/* Get to the page data */
3179 	sense_mhp = (struct mode_header_grp2 *)buf;
3180 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3181 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3182 		/*
3183 		 * We did not get back the expected block descriptor length so
3184 		 * we cannot check the mode page.
3185 		 */
3186 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3187 		    "sd_check_for_writable_cd: Mode Sense returned "
3188 		    "invalid block descriptor length\n");
3189 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3190 		return;
3191 	}
3192 
3193 	/*
3194 	 * If the device presents DVD or CD capabilities in the mode
3195 	 * page, we can return here since a RRD device will not have
3196 	 * these capabilities.
3197 	 */
3198 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3199 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3200 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3201 		return;
3202 	}
3203 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3204 
3205 	/*
3206 	 * If un->un_f_mmc_writable_media is still FALSE,
3207 	 * check for RRD type media.  A RRD device is identified
3208 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3209 	 */
3210 	mutex_exit(SD_MUTEX(un));
3211 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3212 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3213 
3214 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3215 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3216 	    RANDOM_WRITABLE, path_flag);
3217 
3218 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3219 	if (rtn != 0) {
3220 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3221 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3222 		mutex_enter(SD_MUTEX(un));
3223 		return;
3224 	}
3225 
3226 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3227 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3228 
3229 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3230 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3231 	    HARDWARE_DEFECT_MANAGEMENT, path_flag);
3232 
3233 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3234 	mutex_enter(SD_MUTEX(un));
3235 	if (rtn == 0) {
3236 		/*
3237 		 * We have good information, check for random writable
3238 		 * and hardware defect features as current.
3239 		 */
3240 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3241 		    (out_data_rw[10] & 0x1) &&
3242 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3243 		    (out_data_hd[10] & 0x1)) {
3244 			un->un_f_mmc_writable_media = TRUE;
3245 		}
3246 	}
3247 
3248 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3249 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3250 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3251 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3252 }
3253 
3254 /*
3255  *    Function: sd_read_unit_properties
3256  *
3257  * Description: The following implements a property lookup mechanism.
3258  *		Properties for particular disks (keyed on vendor, model
3259  *		and rev numbers) are sought in the sd.conf file via
3260  *		sd_process_sdconf_file(), and if not found there, are
3261  *		looked for in a list hardcoded in this driver via
3262  *		sd_process_sdconf_table() Once located the properties
3263  *		are used to update the driver unit structure.
3264  *
3265  *   Arguments: un - driver soft state (unit) structure
3266  */
3267 
3268 static void
sd_read_unit_properties(struct sd_lun * un)3269 sd_read_unit_properties(struct sd_lun *un)
3270 {
3271 	/*
3272 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3273 	 * the "sd-config-list" property (from the sd.conf file) or if
3274 	 * there was not a match for the inquiry vid/pid. If this event
3275 	 * occurs the static driver configuration table is searched for
3276 	 * a match.
3277 	 */
3278 	ASSERT(un != NULL);
3279 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3280 		sd_process_sdconf_table(un);
3281 	}
3282 }
3283 
3284 
3285 /*
3286  *    Function: sd_process_sdconf_file
3287  *
3288  * Description: Use ddi_prop_lookup(9F) to obtain the properties from the
3289  *		driver's config file (ie, sd.conf) and update the driver
3290  *		soft state structure accordingly.
3291  *
3292  *   Arguments: un - driver soft state (unit) structure
3293  *
3294  * Return Code: SD_SUCCESS - The properties were successfully set according
3295  *			     to the driver configuration file.
3296  *		SD_FAILURE - The driver config list was not obtained or
3297  *			     there was no vid/pid match. This indicates that
3298  *			     the static config table should be used.
3299  *
3300  * The config file has a property, "sd-config-list". Currently we support
3301  * two kinds of formats. For both formats, the value of this property
3302  * is a list of duplets:
3303  *
3304  *  sd-config-list=
3305  *	<duplet>,
3306  *	[,<duplet>]*;
3307  *
3308  * For the improved format, where
3309  *
3310  *     <duplet>:= "<vid+pid>","<tunable-list>"
3311  *
3312  * and
3313  *
3314  *     <tunable-list>:=   <tunable> [, <tunable> ]*;
3315  *     <tunable> =        <name> : <value>
3316  *
3317  * The <vid+pid> is the string that is returned by the target device on a
3318  * SCSI inquiry command, the <tunable-list> contains one or more tunables
3319  * to apply to all target devices with the specified <vid+pid>.
3320  *
3321  * Each <tunable> is a "<name> : <value>" pair.
3322  *
3323  * For the old format, the structure of each duplet is as follows:
3324  *
3325  *  <duplet>:= "<vid+pid>","<data-property-name_list>"
3326  *
3327  * The first entry of the duplet is the device ID string (the concatenated
3328  * vid & pid; not to be confused with a device_id).  This is defined in
3329  * the same way as in the sd_disk_table.
3330  *
3331  * The second part of the duplet is a string that identifies a
3332  * data-property-name-list. The data-property-name-list is defined as
3333  * follows:
3334  *
3335  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3336  *
3337  * The syntax of <data-property-name> depends on the <version> field.
3338  *
3339  * If version = SD_CONF_VERSION_1 we have the following syntax:
3340  *
3341  *	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3342  *
3343  * where the prop0 value will be used to set prop0 if bit0 set in the
3344  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3345  *
3346  */
3347 
3348 static int
sd_process_sdconf_file(struct sd_lun * un)3349 sd_process_sdconf_file(struct sd_lun *un)
3350 {
3351 	char	**config_list = NULL;
3352 	uint_t	nelements;
3353 	char	*vidptr;
3354 	int	vidlen;
3355 	char	*dnlist_ptr;
3356 	char	*dataname_ptr;
3357 	char	*dataname_lasts;
3358 	int	*data_list = NULL;
3359 	uint_t	data_list_len;
3360 	int	rval = SD_FAILURE;
3361 	int	i;
3362 
3363 	ASSERT(un != NULL);
3364 
3365 	/* Obtain the configuration list associated with the .conf file */
3366 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, SD_DEVINFO(un),
3367 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, sd_config_list,
3368 	    &config_list, &nelements) != DDI_PROP_SUCCESS) {
3369 		return (SD_FAILURE);
3370 	}
3371 
3372 	/*
3373 	 * Compare vids in each duplet to the inquiry vid - if a match is
3374 	 * made, get the data value and update the soft state structure
3375 	 * accordingly.
3376 	 *
3377 	 * Each duplet should show as a pair of strings, return SD_FAILURE
3378 	 * otherwise.
3379 	 */
3380 	if (nelements & 1) {
3381 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3382 		    "sd-config-list should show as pairs of strings.\n");
3383 		if (config_list)
3384 			ddi_prop_free(config_list);
3385 		return (SD_FAILURE);
3386 	}
3387 
3388 	for (i = 0; i < nelements; i += 2) {
3389 		/*
3390 		 * Note: The assumption here is that each vid entry is on
3391 		 * a unique line from its associated duplet.
3392 		 */
3393 		vidptr = config_list[i];
3394 		vidlen = (int)strlen(vidptr);
3395 		if (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS) {
3396 			continue;
3397 		}
3398 
3399 		/*
3400 		 * dnlist contains 1 or more blank separated
3401 		 * data-property-name entries
3402 		 */
3403 		dnlist_ptr = config_list[i + 1];
3404 
3405 		if (strchr(dnlist_ptr, ':') != NULL) {
3406 			/*
3407 			 * Decode the improved format sd-config-list.
3408 			 */
3409 			sd_nvpair_str_decode(un, dnlist_ptr);
3410 		} else {
3411 			/*
3412 			 * The old format sd-config-list, loop through all
3413 			 * data-property-name entries in the
3414 			 * data-property-name-list
3415 			 * setting the properties for each.
3416 			 */
3417 			for (dataname_ptr = strtok_r(dnlist_ptr, " \t",
3418 			    &dataname_lasts); dataname_ptr != NULL;
3419 			    dataname_ptr = strtok_r(NULL, " \t",
3420 			    &dataname_lasts)) {
3421 				int version;
3422 
3423 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3424 				    "sd_process_sdconf_file: disk:%s, "
3425 				    "data:%s\n", vidptr, dataname_ptr);
3426 
3427 				/* Get the data list */
3428 				if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY,
3429 				    SD_DEVINFO(un), 0, dataname_ptr, &data_list,
3430 				    &data_list_len) != DDI_PROP_SUCCESS) {
3431 					SD_INFO(SD_LOG_ATTACH_DETACH, un,
3432 					    "sd_process_sdconf_file: data "
3433 					    "property (%s) has no value\n",
3434 					    dataname_ptr);
3435 					continue;
3436 				}
3437 
3438 				version = data_list[0];
3439 
3440 				if (version == SD_CONF_VERSION_1) {
3441 					sd_tunables values;
3442 
3443 					/* Set the properties */
3444 					if (sd_chk_vers1_data(un, data_list[1],
3445 					    &data_list[2], data_list_len,
3446 					    dataname_ptr) == SD_SUCCESS) {
3447 						sd_get_tunables_from_conf(un,
3448 						    data_list[1], &data_list[2],
3449 						    &values);
3450 						sd_set_vers1_properties(un,
3451 						    data_list[1], &values);
3452 						rval = SD_SUCCESS;
3453 					} else {
3454 						rval = SD_FAILURE;
3455 					}
3456 				} else {
3457 					scsi_log(SD_DEVINFO(un), sd_label,
3458 					    CE_WARN, "data property %s version "
3459 					    "0x%x is invalid.",
3460 					    dataname_ptr, version);
3461 					rval = SD_FAILURE;
3462 				}
3463 				if (data_list)
3464 					ddi_prop_free(data_list);
3465 			}
3466 		}
3467 	}
3468 
3469 	/* free up the memory allocated by ddi_prop_lookup_string_array(). */
3470 	if (config_list) {
3471 		ddi_prop_free(config_list);
3472 	}
3473 
3474 	return (rval);
3475 }
3476 
3477 /*
3478  *    Function: sd_nvpair_str_decode()
3479  *
3480  * Description: Parse the improved format sd-config-list to get
3481  *    each entry of tunable, which includes a name-value pair.
3482  *    Then call sd_set_properties() to set the property.
3483  *
3484  *   Arguments: un - driver soft state (unit) structure
3485  *    nvpair_str - the tunable list
3486  */
3487 static void
sd_nvpair_str_decode(struct sd_lun * un,char * nvpair_str)3488 sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str)
3489 {
3490 	char	*nv, *name, *value, *token;
3491 	char	*nv_lasts, *v_lasts, *x_lasts;
3492 
3493 	for (nv = strtok_r(nvpair_str, ",", &nv_lasts); nv != NULL;
3494 	    nv = strtok_r(NULL, ",", &nv_lasts)) {
3495 		token = strtok_r(nv, ":", &v_lasts);
3496 		name  = strtok_r(token, " \t", &x_lasts);
3497 		token = strtok_r(NULL, ":", &v_lasts);
3498 		value = strtok_r(token, " \t", &x_lasts);
3499 		if (name == NULL || value == NULL) {
3500 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3501 			    "sd_nvpair_str_decode: "
3502 			    "name or value is not valid!\n");
3503 		} else {
3504 			sd_set_properties(un, name, value);
3505 		}
3506 	}
3507 }
3508 
3509 /*
3510  *    Function: sd_set_properties()
3511  *
3512  * Description: Set device properties based on the improved
3513  *    format sd-config-list.
3514  *
3515  *   Arguments: un - driver soft state (unit) structure
3516  *    name  - supported tunable name
3517  *    value - tunable value
3518  */
3519 static void
sd_set_properties(struct sd_lun * un,char * name,char * value)3520 sd_set_properties(struct sd_lun *un, char *name, char *value)
3521 {
3522 	char	*endptr = NULL;
3523 	long	val = 0;
3524 
3525 	if (strcasecmp(name, "cache-nonvolatile") == 0) {
3526 		if (strcasecmp(value, "true") == 0) {
3527 			un->un_f_suppress_cache_flush = TRUE;
3528 		} else if (strcasecmp(value, "false") == 0) {
3529 			un->un_f_suppress_cache_flush = FALSE;
3530 		} else {
3531 			goto value_invalid;
3532 		}
3533 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3534 		    "suppress_cache_flush flag set to %d\n",
3535 		    un->un_f_suppress_cache_flush);
3536 		return;
3537 	}
3538 
3539 	if (strcasecmp(name, "controller-type") == 0) {
3540 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3541 			un->un_ctype = val;
3542 		} else {
3543 			goto value_invalid;
3544 		}
3545 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3546 		    "ctype set to %d\n", un->un_ctype);
3547 		return;
3548 	}
3549 
3550 	if (strcasecmp(name, "delay-busy") == 0) {
3551 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3552 			un->un_busy_timeout = drv_usectohz(val / 1000);
3553 		} else {
3554 			goto value_invalid;
3555 		}
3556 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3557 		    "busy_timeout set to %d\n", un->un_busy_timeout);
3558 		return;
3559 	}
3560 
3561 	if (strcasecmp(name, "disksort") == 0) {
3562 		if (strcasecmp(value, "true") == 0) {
3563 			un->un_f_disksort_disabled = FALSE;
3564 		} else if (strcasecmp(value, "false") == 0) {
3565 			un->un_f_disksort_disabled = TRUE;
3566 		} else {
3567 			goto value_invalid;
3568 		}
3569 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3570 		    "disksort disabled flag set to %d\n",
3571 		    un->un_f_disksort_disabled);
3572 		return;
3573 	}
3574 
3575 	if (strcasecmp(name, "power-condition") == 0) {
3576 		if (strcasecmp(value, "true") == 0) {
3577 			un->un_f_power_condition_disabled = FALSE;
3578 		} else if (strcasecmp(value, "false") == 0) {
3579 			un->un_f_power_condition_disabled = TRUE;
3580 		} else {
3581 			goto value_invalid;
3582 		}
3583 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3584 		    "power condition disabled flag set to %d\n",
3585 		    un->un_f_power_condition_disabled);
3586 		return;
3587 	}
3588 
3589 	if (strcasecmp(name, "timeout-releasereservation") == 0) {
3590 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3591 			un->un_reserve_release_time = val;
3592 		} else {
3593 			goto value_invalid;
3594 		}
3595 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3596 		    "reservation release timeout set to %d\n",
3597 		    un->un_reserve_release_time);
3598 		return;
3599 	}
3600 
3601 	if (strcasecmp(name, "reset-lun") == 0) {
3602 		if (strcasecmp(value, "true") == 0) {
3603 			un->un_f_lun_reset_enabled = TRUE;
3604 		} else if (strcasecmp(value, "false") == 0) {
3605 			un->un_f_lun_reset_enabled = FALSE;
3606 		} else {
3607 			goto value_invalid;
3608 		}
3609 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3610 		    "lun reset enabled flag set to %d\n",
3611 		    un->un_f_lun_reset_enabled);
3612 		return;
3613 	}
3614 
3615 	if (strcasecmp(name, "retries-busy") == 0) {
3616 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3617 			un->un_busy_retry_count = val;
3618 		} else {
3619 			goto value_invalid;
3620 		}
3621 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3622 		    "busy retry count set to %d\n", un->un_busy_retry_count);
3623 		return;
3624 	}
3625 
3626 	if (strcasecmp(name, "retries-timeout") == 0) {
3627 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3628 			un->un_retry_count = val;
3629 		} else {
3630 			goto value_invalid;
3631 		}
3632 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3633 		    "timeout retry count set to %d\n", un->un_retry_count);
3634 		return;
3635 	}
3636 
3637 	if (strcasecmp(name, "retries-notready") == 0) {
3638 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3639 			un->un_notready_retry_count = val;
3640 		} else {
3641 			goto value_invalid;
3642 		}
3643 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3644 		    "notready retry count set to %d\n",
3645 		    un->un_notready_retry_count);
3646 		return;
3647 	}
3648 
3649 	if (strcasecmp(name, "retries-reset") == 0) {
3650 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3651 			un->un_reset_retry_count = val;
3652 		} else {
3653 			goto value_invalid;
3654 		}
3655 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3656 		    "reset retry count set to %d\n",
3657 		    un->un_reset_retry_count);
3658 		return;
3659 	}
3660 
3661 	if (strcasecmp(name, "throttle-max") == 0) {
3662 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3663 			un->un_saved_throttle = un->un_throttle = val;
3664 		} else {
3665 			goto value_invalid;
3666 		}
3667 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3668 		    "throttle set to %d\n", un->un_throttle);
3669 	}
3670 
3671 	if (strcasecmp(name, "throttle-min") == 0) {
3672 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3673 			un->un_min_throttle = val;
3674 		} else {
3675 			goto value_invalid;
3676 		}
3677 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3678 		    "min throttle set to %d\n", un->un_min_throttle);
3679 	}
3680 
3681 	if (strcasecmp(name, "rmw-type") == 0) {
3682 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3683 			un->un_f_rmw_type = val;
3684 		} else {
3685 			goto value_invalid;
3686 		}
3687 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3688 		    "RMW type set to %d\n", un->un_f_rmw_type);
3689 	}
3690 
3691 	if (strcasecmp(name, "physical-block-size") == 0) {
3692 		if (ddi_strtol(value, &endptr, 0, &val) == 0 &&
3693 		    ISP2(val) && val >= un->un_tgt_blocksize &&
3694 		    val >= un->un_sys_blocksize) {
3695 			un->un_phy_blocksize = val;
3696 		} else {
3697 			goto value_invalid;
3698 		}
3699 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3700 		    "physical block size set to %d\n", un->un_phy_blocksize);
3701 	}
3702 
3703 	if (strcasecmp(name, "retries-victim") == 0) {
3704 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3705 			un->un_victim_retry_count = val;
3706 		} else {
3707 			goto value_invalid;
3708 		}
3709 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3710 		    "victim retry count set to %d\n",
3711 		    un->un_victim_retry_count);
3712 		return;
3713 	}
3714 
3715 	/*
3716 	 * Validate the throttle values.
3717 	 * If any of the numbers are invalid, set everything to defaults.
3718 	 */
3719 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
3720 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
3721 	    (un->un_min_throttle > un->un_throttle)) {
3722 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
3723 		un->un_min_throttle = sd_min_throttle;
3724 	}
3725 
3726 	if (strcasecmp(name, "mmc-gesn-polling") == 0) {
3727 		if (strcasecmp(value, "true") == 0) {
3728 			un->un_f_mmc_gesn_polling = TRUE;
3729 		} else if (strcasecmp(value, "false") == 0) {
3730 			un->un_f_mmc_gesn_polling = FALSE;
3731 		} else {
3732 			goto value_invalid;
3733 		}
3734 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3735 		    "mmc-gesn-polling set to %d\n",
3736 		    un->un_f_mmc_gesn_polling);
3737 	}
3738 
3739 	return;
3740 
3741 value_invalid:
3742 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3743 	    "value of prop %s is invalid\n", name);
3744 }
3745 
3746 /*
3747  *    Function: sd_get_tunables_from_conf()
3748  *
3749  *
3750  *    This function reads the data list from the sd.conf file and pulls
3751  *    the values that can have numeric values as arguments and places
3752  *    the values in the appropriate sd_tunables member.
3753  *    Since the order of the data list members varies across platforms
3754  *    This function reads them from the data list in a platform specific
3755  *    order and places them into the correct sd_tunable member that is
3756  *    consistent across all platforms.
3757  */
3758 static void
sd_get_tunables_from_conf(struct sd_lun * un,int flags,int * data_list,sd_tunables * values)3759 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3760     sd_tunables *values)
3761 {
3762 	int i;
3763 	int mask;
3764 
3765 	bzero(values, sizeof (sd_tunables));
3766 
3767 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3768 
3769 		mask = 1 << i;
3770 		if (mask > flags) {
3771 			break;
3772 		}
3773 
3774 		switch (mask & flags) {
3775 		case 0:	/* This mask bit not set in flags */
3776 			continue;
3777 		case SD_CONF_BSET_THROTTLE:
3778 			values->sdt_throttle = data_list[i];
3779 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3780 			    "sd_get_tunables_from_conf: throttle = %d\n",
3781 			    values->sdt_throttle);
3782 			break;
3783 		case SD_CONF_BSET_CTYPE:
3784 			values->sdt_ctype = data_list[i];
3785 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3786 			    "sd_get_tunables_from_conf: ctype = %d\n",
3787 			    values->sdt_ctype);
3788 			break;
3789 		case SD_CONF_BSET_NRR_COUNT:
3790 			values->sdt_not_rdy_retries = data_list[i];
3791 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3792 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3793 			    values->sdt_not_rdy_retries);
3794 			break;
3795 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3796 			values->sdt_busy_retries = data_list[i];
3797 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3798 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3799 			    values->sdt_busy_retries);
3800 			break;
3801 		case SD_CONF_BSET_RST_RETRIES:
3802 			values->sdt_reset_retries = data_list[i];
3803 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3804 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3805 			    values->sdt_reset_retries);
3806 			break;
3807 		case SD_CONF_BSET_RSV_REL_TIME:
3808 			values->sdt_reserv_rel_time = data_list[i];
3809 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3810 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3811 			    values->sdt_reserv_rel_time);
3812 			break;
3813 		case SD_CONF_BSET_MIN_THROTTLE:
3814 			values->sdt_min_throttle = data_list[i];
3815 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3816 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3817 			    values->sdt_min_throttle);
3818 			break;
3819 		case SD_CONF_BSET_DISKSORT_DISABLED:
3820 			values->sdt_disk_sort_dis = data_list[i];
3821 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3822 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3823 			    values->sdt_disk_sort_dis);
3824 			break;
3825 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3826 			values->sdt_lun_reset_enable = data_list[i];
3827 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3828 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3829 			    "\n", values->sdt_lun_reset_enable);
3830 			break;
3831 		case SD_CONF_BSET_CACHE_IS_NV:
3832 			values->sdt_suppress_cache_flush = data_list[i];
3833 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3834 			    "sd_get_tunables_from_conf: \
3835 			    suppress_cache_flush = %d"
3836 			    "\n", values->sdt_suppress_cache_flush);
3837 			break;
3838 		case SD_CONF_BSET_PC_DISABLED:
3839 			values->sdt_disk_sort_dis = data_list[i];
3840 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3841 			    "sd_get_tunables_from_conf: power_condition_dis = "
3842 			    "%d\n", values->sdt_power_condition_dis);
3843 			break;
3844 		}
3845 	}
3846 }
3847 
3848 /*
3849  *    Function: sd_process_sdconf_table
3850  *
3851  * Description: Search the static configuration table for a match on the
3852  *		inquiry vid/pid and update the driver soft state structure
3853  *		according to the table property values for the device.
3854  *
3855  *		The form of a configuration table entry is:
3856  *		  <vid+pid>,<flags>,<property-data>
3857  *		  "SEAGATE ST42400N",1,0x40000,
3858  *		  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1;
3859  *
3860  *   Arguments: un - driver soft state (unit) structure
3861  */
3862 
3863 static void
sd_process_sdconf_table(struct sd_lun * un)3864 sd_process_sdconf_table(struct sd_lun *un)
3865 {
3866 	char	*id = NULL;
3867 	int	table_index;
3868 	int	idlen;
3869 
3870 	ASSERT(un != NULL);
3871 	for (table_index = 0; table_index < sd_disk_table_size;
3872 	    table_index++) {
3873 		id = sd_disk_table[table_index].device_id;
3874 		idlen = strlen(id);
3875 
3876 		/*
3877 		 * The static configuration table currently does not
3878 		 * implement version 10 properties. Additionally,
3879 		 * multiple data-property-name entries are not
3880 		 * implemented in the static configuration table.
3881 		 */
3882 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3883 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3884 			    "sd_process_sdconf_table: disk %s\n", id);
3885 			sd_set_vers1_properties(un,
3886 			    sd_disk_table[table_index].flags,
3887 			    sd_disk_table[table_index].properties);
3888 			break;
3889 		}
3890 	}
3891 }
3892 
3893 
3894 /*
3895  *    Function: sd_sdconf_id_match
3896  *
3897  * Description: This local function implements a case sensitive vid/pid
3898  *		comparison as well as the boundary cases of wild card and
3899  *		multiple blanks.
3900  *
3901  *		Note: An implicit assumption made here is that the scsi
3902  *		inquiry structure will always keep the vid, pid and
3903  *		revision strings in consecutive sequence, so they can be
3904  *		read as a single string. If this assumption is not the
3905  *		case, a separate string, to be used for the check, needs
3906  *		to be built with these strings concatenated.
3907  *
3908  *   Arguments: un - driver soft state (unit) structure
3909  *		id - table or config file vid/pid
3910  *		idlen  - length of the vid/pid (bytes)
3911  *
3912  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3913  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3914  */
3915 
3916 static int
sd_sdconf_id_match(struct sd_lun * un,char * id,int idlen)3917 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3918 {
3919 	struct scsi_inquiry	*sd_inq;
3920 	int			rval = SD_SUCCESS;
3921 
3922 	ASSERT(un != NULL);
3923 	sd_inq = un->un_sd->sd_inq;
3924 	ASSERT(id != NULL);
3925 
3926 	/*
3927 	 * We use the inq_vid as a pointer to a buffer containing the
3928 	 * vid and pid and use the entire vid/pid length of the table
3929 	 * entry for the comparison. This works because the inq_pid
3930 	 * data member follows inq_vid in the scsi_inquiry structure.
3931 	 */
3932 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3933 		/*
3934 		 * The user id string is compared to the inquiry vid/pid
3935 		 * using a case insensitive comparison and ignoring
3936 		 * multiple spaces.
3937 		 */
3938 		rval = sd_blank_cmp(un, id, idlen);
3939 		if (rval != SD_SUCCESS) {
3940 			/*
3941 			 * User id strings that start and end with a "*"
3942 			 * are a special case. These do not have a
3943 			 * specific vendor, and the product string can
3944 			 * appear anywhere in the 16 byte PID portion of
3945 			 * the inquiry data. This is a simple strstr()
3946 			 * type search for the user id in the inquiry data.
3947 			 */
3948 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3949 				char	*pidptr = &id[1];
3950 				int	i;
3951 				int	j;
3952 				int	pidstrlen = idlen - 2;
3953 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3954 				    pidstrlen;
3955 
3956 				if (j < 0) {
3957 					return (SD_FAILURE);
3958 				}
3959 				for (i = 0; i < j; i++) {
3960 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3961 					    pidptr, pidstrlen) == 0) {
3962 						rval = SD_SUCCESS;
3963 						break;
3964 					}
3965 				}
3966 			}
3967 		}
3968 	}
3969 	return (rval);
3970 }
3971 
3972 
3973 /*
3974  *    Function: sd_blank_cmp
3975  *
3976  * Description: If the id string starts and ends with a space, treat
3977  *		multiple consecutive spaces as equivalent to a single
3978  *		space. For example, this causes a sd_disk_table entry
3979  *		of " NEC CDROM " to match a device's id string of
3980  *		"NEC       CDROM".
3981  *
3982  *		Note: The success exit condition for this routine is if
3983  *		the pointer to the table entry is '\0' and the cnt of
3984  *		the inquiry length is zero. This will happen if the inquiry
3985  *		string returned by the device is padded with spaces to be
3986  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3987  *		SCSI spec states that the inquiry string is to be padded with
3988  *		spaces.
3989  *
3990  *   Arguments: un - driver soft state (unit) structure
3991  *		id - table or config file vid/pid
3992  *		idlen  - length of the vid/pid (bytes)
3993  *
3994  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3995  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3996  */
3997 
3998 static int
sd_blank_cmp(struct sd_lun * un,char * id,int idlen)3999 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
4000 {
4001 	char		*p1;
4002 	char		*p2;
4003 	int		cnt;
4004 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
4005 	    sizeof (SD_INQUIRY(un)->inq_pid);
4006 
4007 	ASSERT(un != NULL);
4008 	p2 = un->un_sd->sd_inq->inq_vid;
4009 	ASSERT(id != NULL);
4010 	p1 = id;
4011 
4012 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
4013 		/*
4014 		 * Note: string p1 is terminated by a NUL but string p2
4015 		 * isn't.  The end of p2 is determined by cnt.
4016 		 */
4017 		for (;;) {
4018 			/* skip over any extra blanks in both strings */
4019 			while ((*p1 != '\0') && (*p1 == ' ')) {
4020 				p1++;
4021 			}
4022 			while ((cnt != 0) && (*p2 == ' ')) {
4023 				p2++;
4024 				cnt--;
4025 			}
4026 
4027 			/* compare the two strings */
4028 			if ((cnt == 0) ||
4029 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
4030 				break;
4031 			}
4032 			while ((cnt > 0) &&
4033 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
4034 				p1++;
4035 				p2++;
4036 				cnt--;
4037 			}
4038 		}
4039 	}
4040 
4041 	/* return SD_SUCCESS if both strings match */
4042 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
4043 }
4044 
4045 
4046 /*
4047  *    Function: sd_chk_vers1_data
4048  *
4049  * Description: Verify the version 1 device properties provided by the
4050  *		user via the configuration file
4051  *
4052  *   Arguments: un	     - driver soft state (unit) structure
4053  *		flags	     - integer mask indicating properties to be set
4054  *		prop_list    - integer list of property values
4055  *		list_len     - number of the elements
4056  *
4057  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
4058  *		SD_FAILURE - Indicates the user provided data is invalid
4059  */
4060 
4061 static int
sd_chk_vers1_data(struct sd_lun * un,int flags,int * prop_list,int list_len,char * dataname_ptr)4062 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
4063     int list_len, char *dataname_ptr)
4064 {
4065 	int i;
4066 	int mask = 1;
4067 	int index = 0;
4068 
4069 	ASSERT(un != NULL);
4070 
4071 	/* Check for a NULL property name and list */
4072 	if (dataname_ptr == NULL) {
4073 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4074 		    "sd_chk_vers1_data: NULL data property name.");
4075 		return (SD_FAILURE);
4076 	}
4077 	if (prop_list == NULL) {
4078 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4079 		    "sd_chk_vers1_data: %s NULL data property list.",
4080 		    dataname_ptr);
4081 		return (SD_FAILURE);
4082 	}
4083 
4084 	/* Display a warning if undefined bits are set in the flags */
4085 	if (flags & ~SD_CONF_BIT_MASK) {
4086 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4087 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
4088 		    "Properties not set.",
4089 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
4090 		return (SD_FAILURE);
4091 	}
4092 
4093 	/*
4094 	 * Verify the length of the list by identifying the highest bit set
4095 	 * in the flags and validating that the property list has a length
4096 	 * up to the index of this bit.
4097 	 */
4098 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4099 		if (flags & mask) {
4100 			index++;
4101 		}
4102 		mask = 1 << i;
4103 	}
4104 	if (list_len < (index + 2)) {
4105 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4106 		    "sd_chk_vers1_data: "
4107 		    "Data property list %s size is incorrect. "
4108 		    "Properties not set.", dataname_ptr);
4109 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
4110 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
4111 		return (SD_FAILURE);
4112 	}
4113 	return (SD_SUCCESS);
4114 }
4115 
4116 
4117 /*
4118  *    Function: sd_set_vers1_properties
4119  *
4120  * Description: Set version 1 device properties based on a property list
4121  *		retrieved from the driver configuration file or static
4122  *		configuration table. Version 1 properties have the format:
4123  *
4124  *	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
4125  *
4126  *		where the prop0 value will be used to set prop0 if bit0
4127  *		is set in the flags
4128  *
4129  *   Arguments: un	     - driver soft state (unit) structure
4130  *		flags	     - integer mask indicating properties to be set
4131  *		prop_list    - integer list of property values
4132  */
4133 
4134 static void
sd_set_vers1_properties(struct sd_lun * un,int flags,sd_tunables * prop_list)4135 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
4136 {
4137 	ASSERT(un != NULL);
4138 
4139 	/*
4140 	 * Set the flag to indicate cache is to be disabled. An attempt
4141 	 * to disable the cache via sd_cache_control() will be made
4142 	 * later during attach once the basic initialization is complete.
4143 	 */
4144 	if (flags & SD_CONF_BSET_NOCACHE) {
4145 		un->un_f_opt_disable_cache = TRUE;
4146 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4147 		    "sd_set_vers1_properties: caching disabled flag set\n");
4148 	}
4149 
4150 	/* CD-specific configuration parameters */
4151 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
4152 		un->un_f_cfg_playmsf_bcd = TRUE;
4153 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4154 		    "sd_set_vers1_properties: playmsf_bcd set\n");
4155 	}
4156 	if (flags & SD_CONF_BSET_READSUB_BCD) {
4157 		un->un_f_cfg_readsub_bcd = TRUE;
4158 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4159 		    "sd_set_vers1_properties: readsub_bcd set\n");
4160 	}
4161 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4162 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4163 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4164 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4165 	}
4166 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4167 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4168 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4169 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4170 	}
4171 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4172 		un->un_f_cfg_no_read_header = TRUE;
4173 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4174 		    "sd_set_vers1_properties: no_read_header set\n");
4175 	}
4176 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4177 		un->un_f_cfg_read_cd_xd4 = TRUE;
4178 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4179 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4180 	}
4181 
4182 	/* Support for devices which do not have valid/unique serial numbers */
4183 	if (flags & SD_CONF_BSET_FAB_DEVID) {
4184 		un->un_f_opt_fab_devid = TRUE;
4185 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4186 		    "sd_set_vers1_properties: fab_devid bit set\n");
4187 	}
4188 
4189 	/* Support for user throttle configuration */
4190 	if (flags & SD_CONF_BSET_THROTTLE) {
4191 		ASSERT(prop_list != NULL);
4192 		un->un_saved_throttle = un->un_throttle =
4193 		    prop_list->sdt_throttle;
4194 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4195 		    "sd_set_vers1_properties: throttle set to %d\n",
4196 		    prop_list->sdt_throttle);
4197 	}
4198 
4199 	/* Set the per disk retry count according to the conf file or table. */
4200 	if (flags & SD_CONF_BSET_NRR_COUNT) {
4201 		ASSERT(prop_list != NULL);
4202 		if (prop_list->sdt_not_rdy_retries) {
4203 			un->un_notready_retry_count =
4204 			    prop_list->sdt_not_rdy_retries;
4205 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4206 			    "sd_set_vers1_properties: not ready retry count"
4207 			    " set to %d\n", un->un_notready_retry_count);
4208 		}
4209 	}
4210 
4211 	/* The controller type is reported for generic disk driver ioctls */
4212 	if (flags & SD_CONF_BSET_CTYPE) {
4213 		ASSERT(prop_list != NULL);
4214 		switch (prop_list->sdt_ctype) {
4215 		case CTYPE_CDROM:
4216 			un->un_ctype = prop_list->sdt_ctype;
4217 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4218 			    "sd_set_vers1_properties: ctype set to "
4219 			    "CTYPE_CDROM\n");
4220 			break;
4221 		case CTYPE_CCS:
4222 			un->un_ctype = prop_list->sdt_ctype;
4223 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4224 			    "sd_set_vers1_properties: ctype set to "
4225 			    "CTYPE_CCS\n");
4226 			break;
4227 		case CTYPE_ROD:		/* RW optical */
4228 			un->un_ctype = prop_list->sdt_ctype;
4229 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4230 			    "sd_set_vers1_properties: ctype set to "
4231 			    "CTYPE_ROD\n");
4232 			break;
4233 		default:
4234 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4235 			    "sd_set_vers1_properties: Could not set "
4236 			    "invalid ctype value (%d)",
4237 			    prop_list->sdt_ctype);
4238 		}
4239 	}
4240 
4241 	/* Purple failover timeout */
4242 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4243 		ASSERT(prop_list != NULL);
4244 		un->un_busy_retry_count =
4245 		    prop_list->sdt_busy_retries;
4246 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4247 		    "sd_set_vers1_properties: "
4248 		    "busy retry count set to %d\n",
4249 		    un->un_busy_retry_count);
4250 	}
4251 
4252 	/* Purple reset retry count */
4253 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4254 		ASSERT(prop_list != NULL);
4255 		un->un_reset_retry_count =
4256 		    prop_list->sdt_reset_retries;
4257 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4258 		    "sd_set_vers1_properties: "
4259 		    "reset retry count set to %d\n",
4260 		    un->un_reset_retry_count);
4261 	}
4262 
4263 	/* Purple reservation release timeout */
4264 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4265 		ASSERT(prop_list != NULL);
4266 		un->un_reserve_release_time =
4267 		    prop_list->sdt_reserv_rel_time;
4268 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4269 		    "sd_set_vers1_properties: "
4270 		    "reservation release timeout set to %d\n",
4271 		    un->un_reserve_release_time);
4272 	}
4273 
4274 	/*
4275 	 * Driver flag telling the driver to verify that no commands are pending
4276 	 * for a device before issuing a Test Unit Ready. This is a workaround
4277 	 * for a firmware bug in some Seagate eliteI drives.
4278 	 */
4279 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4280 		un->un_f_cfg_tur_check = TRUE;
4281 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4282 		    "sd_set_vers1_properties: tur queue check set\n");
4283 	}
4284 
4285 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4286 		un->un_min_throttle = prop_list->sdt_min_throttle;
4287 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4288 		    "sd_set_vers1_properties: min throttle set to %d\n",
4289 		    un->un_min_throttle);
4290 	}
4291 
4292 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4293 		un->un_f_disksort_disabled =
4294 		    (prop_list->sdt_disk_sort_dis != 0) ?
4295 		    TRUE : FALSE;
4296 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4297 		    "sd_set_vers1_properties: disksort disabled "
4298 		    "flag set to %d\n",
4299 		    prop_list->sdt_disk_sort_dis);
4300 	}
4301 
4302 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4303 		un->un_f_lun_reset_enabled =
4304 		    (prop_list->sdt_lun_reset_enable != 0) ?
4305 		    TRUE : FALSE;
4306 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4307 		    "sd_set_vers1_properties: lun reset enabled "
4308 		    "flag set to %d\n",
4309 		    prop_list->sdt_lun_reset_enable);
4310 	}
4311 
4312 	if (flags & SD_CONF_BSET_CACHE_IS_NV) {
4313 		un->un_f_suppress_cache_flush =
4314 		    (prop_list->sdt_suppress_cache_flush != 0) ?
4315 		    TRUE : FALSE;
4316 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4317 		    "sd_set_vers1_properties: suppress_cache_flush "
4318 		    "flag set to %d\n",
4319 		    prop_list->sdt_suppress_cache_flush);
4320 	}
4321 
4322 	if (flags & SD_CONF_BSET_PC_DISABLED) {
4323 		un->un_f_power_condition_disabled =
4324 		    (prop_list->sdt_power_condition_dis != 0) ?
4325 		    TRUE : FALSE;
4326 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4327 		    "sd_set_vers1_properties: power_condition_disabled "
4328 		    "flag set to %d\n",
4329 		    prop_list->sdt_power_condition_dis);
4330 	}
4331 
4332 	/*
4333 	 * Validate the throttle values.
4334 	 * If any of the numbers are invalid, set everything to defaults.
4335 	 */
4336 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4337 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4338 	    (un->un_min_throttle > un->un_throttle)) {
4339 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4340 		un->un_min_throttle = sd_min_throttle;
4341 	}
4342 }
4343 
4344 /*
4345  *    Function: sd_get_physical_geometry
4346  *
4347  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4348  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4349  *		target, and use this information to initialize the physical
4350  *		geometry cache specified by pgeom_p.
4351  *
4352  *		MODE SENSE is an optional command, so failure in this case
4353  *		does not necessarily denote an error. We want to use the
4354  *		MODE SENSE commands to derive the physical geometry of the
4355  *		device, but if either command fails, the logical geometry is
4356  *		used as the fallback for disk label geometry in cmlb.
4357  *
4358  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4359  *		have already been initialized for the current target and
4360  *		that the current values be passed as args so that we don't
4361  *		end up ever trying to use -1 as a valid value. This could
4362  *		happen if either value is reset while we're not holding
4363  *		the mutex.
4364  *
4365  *   Arguments: un - driver soft state (unit) structure
4366  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4367  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4368  *			to use the USCSI "direct" chain and bypass the normal
4369  *			command waitq.
4370  *
4371  *     Context: Kernel thread only (can sleep).
4372  */
4373 
4374 static int
sd_get_physical_geometry(struct sd_lun * un,cmlb_geom_t * pgeom_p,diskaddr_t capacity,int lbasize,int path_flag)4375 sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4376     diskaddr_t capacity, int lbasize, int path_flag)
4377 {
4378 	struct	mode_format	*page3p;
4379 	struct	mode_geometry	*page4p;
4380 	struct	mode_header	*headerp;
4381 	int	sector_size;
4382 	int	nsect;
4383 	int	nhead;
4384 	int	ncyl;
4385 	int	intrlv;
4386 	int	spc;
4387 	diskaddr_t	modesense_capacity;
4388 	int	rpm;
4389 	int	bd_len;
4390 	int	mode_header_length;
4391 	uchar_t	*p3bufp;
4392 	uchar_t	*p4bufp;
4393 	int	cdbsize;
4394 	int	ret = EIO;
4395 	sd_ssc_t *ssc;
4396 	int	status;
4397 
4398 	ASSERT(un != NULL);
4399 
4400 	if (lbasize == 0) {
4401 		if (ISCD(un)) {
4402 			lbasize = 2048;
4403 		} else {
4404 			lbasize = un->un_sys_blocksize;
4405 		}
4406 	}
4407 	pgeom_p->g_secsize = (unsigned short)lbasize;
4408 
4409 	/*
4410 	 * If the unit is a cd/dvd drive MODE SENSE page three
4411 	 * and MODE SENSE page four are reserved (see SBC spec
4412 	 * and MMC spec). To prevent soft errors just return
4413 	 * using the default LBA size.
4414 	 *
4415 	 * Since SATA MODE SENSE function (sata_txlt_mode_sense()) does not
4416 	 * implement support for mode pages 3 and 4 return here to prevent
4417 	 * illegal requests on SATA drives.
4418 	 *
4419 	 * These pages are also reserved in SBC-2 and later.  We assume SBC-2
4420 	 * or later for a direct-attached block device if the SCSI version is
4421 	 * at least SPC-3.
4422 	 */
4423 
4424 	if (ISCD(un) ||
4425 	    un->un_interconnect_type == SD_INTERCONNECT_SATA ||
4426 	    (un->un_ctype == CTYPE_CCS && SD_INQUIRY(un)->inq_ansi >= 5))
4427 		return (ret);
4428 
4429 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4430 
4431 	/*
4432 	 * Retrieve MODE SENSE page 3 - Format Device Page
4433 	 */
4434 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4435 	ssc = sd_ssc_init(un);
4436 	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p3bufp,
4437 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag);
4438 	if (status != 0) {
4439 		SD_ERROR(SD_LOG_COMMON, un,
4440 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4441 		goto page3_exit;
4442 	}
4443 
4444 	/*
4445 	 * Determine size of Block Descriptors in order to locate the mode
4446 	 * page data.  ATAPI devices return 0, SCSI devices should return
4447 	 * MODE_BLK_DESC_LENGTH.
4448 	 */
4449 	headerp = (struct mode_header *)p3bufp;
4450 	if (un->un_f_cfg_is_atapi == TRUE) {
4451 		struct mode_header_grp2 *mhp =
4452 		    (struct mode_header_grp2 *)headerp;
4453 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4454 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4455 	} else {
4456 		mode_header_length = MODE_HEADER_LENGTH;
4457 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4458 	}
4459 
4460 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4461 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4462 		    "sd_get_physical_geometry: received unexpected bd_len "
4463 		    "of %d, page3\n", bd_len);
4464 		status = EIO;
4465 		goto page3_exit;
4466 	}
4467 
4468 	page3p = (struct mode_format *)
4469 	    ((caddr_t)headerp + mode_header_length + bd_len);
4470 
4471 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4472 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4473 		    "sd_get_physical_geometry: mode sense pg3 code mismatch "
4474 		    "%d\n", page3p->mode_page.code);
4475 		status = EIO;
4476 		goto page3_exit;
4477 	}
4478 
4479 	/*
4480 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4481 	 * complete successfully; otherwise, revert to the logical geometry.
4482 	 * So, we need to save everything in temporary variables.
4483 	 */
4484 	sector_size = BE_16(page3p->data_bytes_sect);
4485 
4486 	/*
4487 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4488 	 */
4489 	if (sector_size == 0) {
4490 		sector_size = un->un_sys_blocksize;
4491 	} else {
4492 		sector_size &= ~(un->un_sys_blocksize - 1);
4493 	}
4494 
4495 	nsect  = BE_16(page3p->sect_track);
4496 	intrlv = BE_16(page3p->interleave);
4497 
4498 	SD_INFO(SD_LOG_COMMON, un,
4499 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4500 	SD_INFO(SD_LOG_COMMON, un,
4501 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4502 	    page3p->mode_page.code, nsect, sector_size);
4503 	SD_INFO(SD_LOG_COMMON, un,
4504 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4505 	    BE_16(page3p->track_skew),
4506 	    BE_16(page3p->cylinder_skew));
4507 
4508 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
4509 
4510 	/*
4511 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4512 	 */
4513 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4514 	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p4bufp,
4515 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag);
4516 	if (status != 0) {
4517 		SD_ERROR(SD_LOG_COMMON, un,
4518 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4519 		goto page4_exit;
4520 	}
4521 
4522 	/*
4523 	 * Determine size of Block Descriptors in order to locate the mode
4524 	 * page data.  ATAPI devices return 0, SCSI devices should return
4525 	 * MODE_BLK_DESC_LENGTH.
4526 	 */
4527 	headerp = (struct mode_header *)p4bufp;
4528 	if (un->un_f_cfg_is_atapi == TRUE) {
4529 		struct mode_header_grp2 *mhp =
4530 		    (struct mode_header_grp2 *)headerp;
4531 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4532 	} else {
4533 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4534 	}
4535 
4536 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4537 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4538 		    "sd_get_physical_geometry: received unexpected bd_len of "
4539 		    "%d, page4\n", bd_len);
4540 		status = EIO;
4541 		goto page4_exit;
4542 	}
4543 
4544 	page4p = (struct mode_geometry *)
4545 	    ((caddr_t)headerp + mode_header_length + bd_len);
4546 
4547 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4548 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4549 		    "sd_get_physical_geometry: mode sense pg4 code mismatch "
4550 		    "%d\n", page4p->mode_page.code);
4551 		status = EIO;
4552 		goto page4_exit;
4553 	}
4554 
4555 	/*
4556 	 * Stash the data now, after we know that both commands completed.
4557 	 */
4558 
4559 
4560 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4561 	spc   = nhead * nsect;
4562 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4563 	rpm   = BE_16(page4p->rpm);
4564 
4565 	modesense_capacity = spc * ncyl;
4566 
4567 	SD_INFO(SD_LOG_COMMON, un,
4568 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4569 	SD_INFO(SD_LOG_COMMON, un,
4570 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4571 	SD_INFO(SD_LOG_COMMON, un,
4572 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4573 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4574 	    (void *)pgeom_p, capacity);
4575 
4576 	/*
4577 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4578 	 * the product of C * H * S returned by MODE SENSE >= that returned
4579 	 * by read capacity. This is an idiosyncrasy of the original x86
4580 	 * disk subsystem.
4581 	 */
4582 	if (modesense_capacity >= capacity) {
4583 		SD_INFO(SD_LOG_COMMON, un,
4584 		    "sd_get_physical_geometry: adjusting acyl; "
4585 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
4586 		    (modesense_capacity - capacity + spc - 1) / spc);
4587 		if (sector_size != 0) {
4588 			/* 1243403: NEC D38x7 drives don't support sec size */
4589 			pgeom_p->g_secsize = (unsigned short)sector_size;
4590 		}
4591 		pgeom_p->g_nsect    = (unsigned short)nsect;
4592 		pgeom_p->g_nhead    = (unsigned short)nhead;
4593 		pgeom_p->g_capacity = capacity;
4594 		pgeom_p->g_acyl	    =
4595 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
4596 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
4597 	}
4598 
4599 	pgeom_p->g_rpm    = (unsigned short)rpm;
4600 	pgeom_p->g_intrlv = (unsigned short)intrlv;
4601 	ret = 0;
4602 
4603 	SD_INFO(SD_LOG_COMMON, un,
4604 	    "sd_get_physical_geometry: mode sense geometry:\n");
4605 	SD_INFO(SD_LOG_COMMON, un,
4606 	    "   nsect: %d; sector size: %d; interlv: %d\n",
4607 	    nsect, sector_size, intrlv);
4608 	SD_INFO(SD_LOG_COMMON, un,
4609 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
4610 	    nhead, ncyl, rpm, modesense_capacity);
4611 	SD_INFO(SD_LOG_COMMON, un,
4612 	    "sd_get_physical_geometry: (cached)\n");
4613 	SD_INFO(SD_LOG_COMMON, un,
4614 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4615 	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
4616 	    pgeom_p->g_nhead, pgeom_p->g_nsect);
4617 	SD_INFO(SD_LOG_COMMON, un,
4618 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
4619 	    pgeom_p->g_secsize, pgeom_p->g_capacity,
4620 	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
4621 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
4622 
4623 page4_exit:
4624 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
4625 
4626 page3_exit:
4627 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
4628 
4629 	if (status != 0) {
4630 		if (status == EIO) {
4631 			/*
4632 			 * Some disks do not support mode sense(6), we
4633 			 * should ignore this kind of error(sense key is
4634 			 * 0x5 - illegal request).
4635 			 */
4636 			uint8_t *sensep;
4637 			int senlen;
4638 
4639 			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
4640 			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
4641 			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
4642 
4643 			if (senlen > 0 &&
4644 			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
4645 				sd_ssc_assessment(ssc,
4646 				    SD_FMT_IGNORE_COMPROMISE);
4647 			} else {
4648 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
4649 			}
4650 		} else {
4651 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
4652 		}
4653 	}
4654 	sd_ssc_fini(ssc);
4655 	return (ret);
4656 }
4657 
4658 /*
4659  *    Function: sd_get_virtual_geometry
4660  *
4661  * Description: Ask the controller to tell us about the target device.
4662  *
4663  *   Arguments: un - pointer to softstate
4664  *		capacity - disk capacity in #blocks
4665  *		lbasize - disk block size in bytes
4666  *
4667  *     Context: Kernel thread only
4668  */
4669 
4670 static int
sd_get_virtual_geometry(struct sd_lun * un,cmlb_geom_t * lgeom_p,diskaddr_t capacity,int lbasize)4671 sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
4672     diskaddr_t capacity, int lbasize)
4673 {
4674 	uint_t	geombuf;
4675 	int	spc;
4676 
4677 	ASSERT(un != NULL);
4678 
4679 	/* Set sector size, and total number of sectors */
4680 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
4681 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
4682 
4683 	/* Let the HBA tell us its geometry */
4684 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
4685 
4686 	/* A value of -1 indicates an undefined "geometry" property */
4687 	if (geombuf == (-1)) {
4688 		return (EINVAL);
4689 	}
4690 
4691 	/* Initialize the logical geometry cache. */
4692 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
4693 	lgeom_p->g_nsect   = geombuf & 0xffff;
4694 	lgeom_p->g_secsize = un->un_sys_blocksize;
4695 
4696 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
4697 
4698 	/*
4699 	 * Note: The driver originally converted the capacity value from
4700 	 * target blocks to system blocks. However, the capacity value passed
4701 	 * to this routine is already in terms of system blocks (this scaling
4702 	 * is done when the READ CAPACITY command is issued and processed).
4703 	 * This 'error' may have gone undetected because the usage of g_ncyl
4704 	 * (which is based upon g_capacity) is very limited within the driver
4705 	 */
4706 	lgeom_p->g_capacity = capacity;
4707 
4708 	/*
4709 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
4710 	 * hba may return zero values if the device has been removed.
4711 	 */
4712 	if (spc == 0) {
4713 		lgeom_p->g_ncyl = 0;
4714 	} else {
4715 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
4716 	}
4717 	lgeom_p->g_acyl = 0;
4718 
4719 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
4720 	return (0);
4721 
4722 }
4723 /*
4724  *    Function: sd_update_block_info
4725  *
4726  * Description: Calculate a byte count to sector count bitshift value
4727  *		from sector size.
4728  *
4729  *   Arguments: un: unit struct.
4730  *		lbasize: new target sector size
4731  *		capacity: new target capacity, ie. block count
4732  *
4733  *     Context: Kernel thread context
4734  */
4735 
4736 static void
sd_update_block_info(struct sd_lun * un,uint32_t lbasize,uint64_t capacity)4737 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
4738 {
4739 	if (lbasize != 0) {
4740 		un->un_tgt_blocksize = lbasize;
4741 		un->un_f_tgt_blocksize_is_valid = TRUE;
4742 		if (!un->un_f_has_removable_media) {
4743 			un->un_sys_blocksize = lbasize;
4744 		}
4745 	}
4746 
4747 	if (capacity != 0) {
4748 		un->un_blockcount		= capacity;
4749 		un->un_f_blockcount_is_valid	= TRUE;
4750 
4751 		/*
4752 		 * The capacity has changed so update the errstats.
4753 		 */
4754 		if (un->un_errstats != NULL) {
4755 			struct sd_errstats *stp;
4756 
4757 			capacity *= un->un_sys_blocksize;
4758 			stp = (struct sd_errstats *)un->un_errstats->ks_data;
4759 			if (stp->sd_capacity.value.ui64 < capacity)
4760 				stp->sd_capacity.value.ui64 = capacity;
4761 		}
4762 	}
4763 }
4764 
4765 /*
4766  * Parses the SCSI Block Limits VPD page (0xB0). It's legal to pass NULL for
4767  * vpd_pg, in which case all the block limits will be reset to the defaults.
4768  */
4769 static void
sd_parse_blk_limits_vpd(struct sd_lun * un,uchar_t * vpd_pg)4770 sd_parse_blk_limits_vpd(struct sd_lun *un, uchar_t *vpd_pg)
4771 {
4772 	sd_blk_limits_t *lim = &un->un_blk_lim;
4773 	unsigned pg_len;
4774 
4775 	if (vpd_pg != NULL)
4776 		pg_len = BE_IN16(&vpd_pg[2]);
4777 	else
4778 		pg_len = 0;
4779 
4780 	/* Block Limits VPD can be 16 bytes or 64 bytes long - support both */
4781 	if (pg_len >= 0x10) {
4782 		lim->lim_opt_xfer_len_gran = BE_IN16(&vpd_pg[6]);
4783 		lim->lim_max_xfer_len = BE_IN32(&vpd_pg[8]);
4784 		lim->lim_opt_xfer_len = BE_IN32(&vpd_pg[12]);
4785 
4786 		/* Zero means not reported, so use "unlimited" */
4787 		if (lim->lim_max_xfer_len == 0)
4788 			lim->lim_max_xfer_len = UINT32_MAX;
4789 		if (lim->lim_opt_xfer_len == 0)
4790 			lim->lim_opt_xfer_len = UINT32_MAX;
4791 	} else {
4792 		lim->lim_opt_xfer_len_gran = 0;
4793 		lim->lim_max_xfer_len = UINT32_MAX;
4794 		lim->lim_opt_xfer_len = UINT32_MAX;
4795 	}
4796 	if (pg_len >= 0x3c) {
4797 		lim->lim_max_pfetch_len = BE_IN32(&vpd_pg[16]);
4798 		/*
4799 		 * A zero in either of the following two fields indicates lack
4800 		 * of UNMAP support.
4801 		 */
4802 		lim->lim_max_unmap_lba_cnt = BE_IN32(&vpd_pg[20]);
4803 		lim->lim_max_unmap_descr_cnt = BE_IN32(&vpd_pg[24]);
4804 		lim->lim_opt_unmap_gran = BE_IN32(&vpd_pg[28]);
4805 		if ((vpd_pg[32] >> 7) == 1) {
4806 			lim->lim_unmap_gran_align =
4807 			    ((vpd_pg[32] & 0x7f) << 24) | (vpd_pg[33] << 16) |
4808 			    (vpd_pg[34] << 8) | vpd_pg[35];
4809 		} else {
4810 			lim->lim_unmap_gran_align = 0;
4811 		}
4812 		lim->lim_max_write_same_len = BE_IN64(&vpd_pg[36]);
4813 	} else {
4814 		lim->lim_max_pfetch_len = UINT32_MAX;
4815 		lim->lim_max_unmap_lba_cnt = UINT32_MAX;
4816 		lim->lim_max_unmap_descr_cnt = SD_UNMAP_MAX_DESCR;
4817 		lim->lim_opt_unmap_gran = 0;
4818 		lim->lim_unmap_gran_align = 0;
4819 		lim->lim_max_write_same_len = UINT64_MAX;
4820 	}
4821 }
4822 
4823 /*
4824  * Collects VPD page B0 data if available (block limits). If the data is
4825  * not available or querying the device failed, we revert to the defaults.
4826  */
4827 static void
sd_setup_blk_limits(sd_ssc_t * ssc)4828 sd_setup_blk_limits(sd_ssc_t *ssc)
4829 {
4830 	struct sd_lun	*un		= ssc->ssc_un;
4831 	uchar_t		*inqB0		= NULL;
4832 	size_t		inqB0_resid	= 0;
4833 	int		rval;
4834 
4835 	if (un->un_vpd_page_mask & SD_VPD_BLK_LIMITS_PG) {
4836 		inqB0 = kmem_zalloc(MAX_INQUIRY_SIZE, KM_SLEEP);
4837 		rval = sd_send_scsi_INQUIRY(ssc, inqB0, MAX_INQUIRY_SIZE, 0x01,
4838 		    0xB0, &inqB0_resid);
4839 		if (rval != 0) {
4840 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
4841 			kmem_free(inqB0, MAX_INQUIRY_SIZE);
4842 			inqB0 = NULL;
4843 		}
4844 	}
4845 	/* passing NULL inqB0 will reset to defaults */
4846 	sd_parse_blk_limits_vpd(ssc->ssc_un, inqB0);
4847 	if (inqB0)
4848 		kmem_free(inqB0, MAX_INQUIRY_SIZE);
4849 }
4850 
4851 /*
4852  *    Function: sd_register_devid
4853  *
4854  * Description: This routine will obtain the device id information from the
4855  *		target, obtain the serial number, and register the device
4856  *		id with the ddi framework.
4857  *
4858  *   Arguments: devi - the system's dev_info_t for the device.
4859  *		un - driver soft state (unit) structure
4860  *		reservation_flag - indicates if a reservation conflict
4861  *		occurred during attach
4862  *
4863  *     Context: Kernel Thread
4864  */
4865 static void
sd_register_devid(sd_ssc_t * ssc,dev_info_t * devi,int reservation_flag)4866 sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi, int reservation_flag)
4867 {
4868 	int		rval		= 0;
4869 	uchar_t		*inq80		= NULL;
4870 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
4871 	size_t		inq80_resid	= 0;
4872 	uchar_t		*inq83		= NULL;
4873 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
4874 	size_t		inq83_resid	= 0;
4875 	int		dlen, len;
4876 	char		*sn;
4877 	struct sd_lun	*un;
4878 
4879 	ASSERT(ssc != NULL);
4880 	un = ssc->ssc_un;
4881 	ASSERT(un != NULL);
4882 	ASSERT(mutex_owned(SD_MUTEX(un)));
4883 	ASSERT((SD_DEVINFO(un)) == devi);
4884 
4885 
4886 	/*
4887 	 * We check the availability of the World Wide Name (0x83) and Unit
4888 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
4889 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
4890 	 * 0x83 is available, that is the best choice.  Our next choice is
4891 	 * 0x80.  If neither are available, we munge the devid from the device
4892 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
4893 	 * to fabricate a devid for non-Sun qualified disks.
4894 	 */
4895 	if (sd_check_vpd_page_support(ssc) == 0) {
4896 		/* collect page 80 data if available */
4897 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
4898 
4899 			mutex_exit(SD_MUTEX(un));
4900 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
4901 
4902 			rval = sd_send_scsi_INQUIRY(ssc, inq80, inq80_len,
4903 			    0x01, 0x80, &inq80_resid);
4904 
4905 			if (rval != 0) {
4906 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
4907 				kmem_free(inq80, inq80_len);
4908 				inq80 = NULL;
4909 				inq80_len = 0;
4910 			} else if (ddi_prop_exists(
4911 			    DDI_DEV_T_NONE, SD_DEVINFO(un),
4912 			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
4913 			    INQUIRY_SERIAL_NO) == 0) {
4914 				/*
4915 				 * If we don't already have a serial number
4916 				 * property, do quick verify of data returned
4917 				 * and define property.
4918 				 */
4919 				dlen = inq80_len - inq80_resid;
4920 				len = (size_t)inq80[3];
4921 				if ((dlen >= 4) && ((len + 4) <= dlen)) {
4922 					/*
4923 					 * Ensure sn termination, skip leading
4924 					 * blanks, and create property
4925 					 * 'inquiry-serial-no'.
4926 					 */
4927 					sn = (char *)&inq80[4];
4928 					sn[len] = 0;
4929 					while (*sn && (*sn == ' '))
4930 						sn++;
4931 					if (*sn) {
4932 						(void) ddi_prop_update_string(
4933 						    DDI_DEV_T_NONE,
4934 						    SD_DEVINFO(un),
4935 						    INQUIRY_SERIAL_NO, sn);
4936 					}
4937 				}
4938 			}
4939 			mutex_enter(SD_MUTEX(un));
4940 		}
4941 
4942 		/* collect page 83 data if available */
4943 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
4944 			mutex_exit(SD_MUTEX(un));
4945 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
4946 
4947 			rval = sd_send_scsi_INQUIRY(ssc, inq83, inq83_len,
4948 			    0x01, 0x83, &inq83_resid);
4949 
4950 			if (rval != 0) {
4951 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
4952 				kmem_free(inq83, inq83_len);
4953 				inq83 = NULL;
4954 				inq83_len = 0;
4955 			}
4956 			mutex_enter(SD_MUTEX(un));
4957 		}
4958 	}
4959 
4960 	/*
4961 	 * If transport has already registered a devid for this target
4962 	 * then that takes precedence over the driver's determination
4963 	 * of the devid.
4964 	 *
4965 	 * NOTE: The reason this check is done here instead of at the beginning
4966 	 * of the function is to allow the code above to create the
4967 	 * 'inquiry-serial-no' property.
4968 	 */
4969 	if (ddi_devid_get(SD_DEVINFO(un), &un->un_devid) == DDI_SUCCESS) {
4970 		ASSERT(un->un_devid);
4971 		un->un_f_devid_transport_defined = TRUE;
4972 		goto cleanup; /* use devid registered by the transport */
4973 	}
4974 
4975 	/*
4976 	 * This is the case of antiquated Sun disk drives that have the
4977 	 * FAB_DEVID property set in the disk_table.  These drives
4978 	 * manage the devid's by storing them in last 2 available sectors
4979 	 * on the drive and have them fabricated by the ddi layer by calling
4980 	 * ddi_devid_init and passing the DEVID_FAB flag.
4981 	 */
4982 	if (un->un_f_opt_fab_devid == TRUE) {
4983 		/*
4984 		 * Depending on EINVAL isn't reliable, since a reserved disk
4985 		 * may result in invalid geometry, so check to make sure a
4986 		 * reservation conflict did not occur during attach.
4987 		 */
4988 		if ((sd_get_devid(ssc) == EINVAL) &&
4989 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
4990 			/*
4991 			 * The devid is invalid AND there is no reservation
4992 			 * conflict.  Fabricate a new devid.
4993 			 */
4994 			(void) sd_create_devid(ssc);
4995 		}
4996 
4997 		/* Register the devid if it exists */
4998 		if (un->un_devid != NULL) {
4999 			(void) ddi_devid_register(SD_DEVINFO(un),
5000 			    un->un_devid);
5001 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5002 			    "sd_register_devid: Devid Fabricated\n");
5003 		}
5004 		goto cleanup;
5005 	}
5006 
5007 	/* encode best devid possible based on data available */
5008 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
5009 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
5010 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
5011 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
5012 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
5013 
5014 		/* devid successfully encoded, register devid */
5015 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
5016 
5017 	} else {
5018 		/*
5019 		 * Unable to encode a devid based on data available.
5020 		 * This is not a Sun qualified disk.  Older Sun disk
5021 		 * drives that have the SD_FAB_DEVID property
5022 		 * set in the disk_table and non Sun qualified
5023 		 * disks are treated in the same manner.  These
5024 		 * drives manage the devid's by storing them in
5025 		 * last 2 available sectors on the drive and
5026 		 * have them fabricated by the ddi layer by
5027 		 * calling ddi_devid_init and passing the
5028 		 * DEVID_FAB flag.
5029 		 * Create a fabricate devid only if there's no
5030 		 * fabricate devid existed.
5031 		 */
5032 		if (sd_get_devid(ssc) == EINVAL) {
5033 			(void) sd_create_devid(ssc);
5034 		}
5035 		un->un_f_opt_fab_devid = TRUE;
5036 
5037 		/* Register the devid if it exists */
5038 		if (un->un_devid != NULL) {
5039 			(void) ddi_devid_register(SD_DEVINFO(un),
5040 			    un->un_devid);
5041 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5042 			    "sd_register_devid: devid fabricated using "
5043 			    "ddi framework\n");
5044 		}
5045 	}
5046 
5047 cleanup:
5048 	/* clean up resources */
5049 	if (inq80 != NULL) {
5050 		kmem_free(inq80, inq80_len);
5051 	}
5052 	if (inq83 != NULL) {
5053 		kmem_free(inq83, inq83_len);
5054 	}
5055 }
5056 
5057 
5058 
5059 /*
5060  *    Function: sd_get_devid
5061  *
5062  * Description: This routine will return 0 if a valid device id has been
5063  *		obtained from the target and stored in the soft state. If a
5064  *		valid device id has not been previously read and stored, a
5065  *		read attempt will be made.
5066  *
5067  *   Arguments: un - driver soft state (unit) structure
5068  *
5069  * Return Code: 0 if we successfully get the device id
5070  *
5071  *     Context: Kernel Thread
5072  */
5073 
5074 static int
sd_get_devid(sd_ssc_t * ssc)5075 sd_get_devid(sd_ssc_t *ssc)
5076 {
5077 	struct dk_devid		*dkdevid;
5078 	ddi_devid_t		tmpid;
5079 	uint_t			*ip;
5080 	size_t			sz;
5081 	diskaddr_t		blk;
5082 	int			status;
5083 	int			chksum;
5084 	int			i;
5085 	size_t			buffer_size;
5086 	struct sd_lun		*un;
5087 
5088 	ASSERT(ssc != NULL);
5089 	un = ssc->ssc_un;
5090 	ASSERT(un != NULL);
5091 	ASSERT(mutex_owned(SD_MUTEX(un)));
5092 
5093 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
5094 	    un);
5095 
5096 	if (un->un_devid != NULL) {
5097 		return (0);
5098 	}
5099 
5100 	mutex_exit(SD_MUTEX(un));
5101 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5102 	    (void *)SD_PATH_DIRECT) != 0) {
5103 		mutex_enter(SD_MUTEX(un));
5104 		return (EINVAL);
5105 	}
5106 
5107 	/*
5108 	 * Read and verify device id, stored in the reserved cylinders at the
5109 	 * end of the disk. Backup label is on the odd sectors of the last
5110 	 * track of the last cylinder. Device id will be on track of the next
5111 	 * to last cylinder.
5112 	 */
5113 	mutex_enter(SD_MUTEX(un));
5114 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
5115 	mutex_exit(SD_MUTEX(un));
5116 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
5117 	status = sd_send_scsi_READ(ssc, dkdevid, buffer_size, blk,
5118 	    SD_PATH_DIRECT);
5119 
5120 	if (status != 0) {
5121 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5122 		goto error;
5123 	}
5124 
5125 	/* Validate the revision */
5126 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
5127 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
5128 		status = EINVAL;
5129 		goto error;
5130 	}
5131 
5132 	/* Calculate the checksum */
5133 	chksum = 0;
5134 	ip = (uint_t *)dkdevid;
5135 	for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
5136 	    i++) {
5137 		chksum ^= ip[i];
5138 	}
5139 
5140 	/* Compare the checksums */
5141 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
5142 		status = EINVAL;
5143 		goto error;
5144 	}
5145 
5146 	/* Validate the device id */
5147 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
5148 		status = EINVAL;
5149 		goto error;
5150 	}
5151 
5152 	/*
5153 	 * Store the device id in the driver soft state
5154 	 */
5155 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
5156 	tmpid = kmem_alloc(sz, KM_SLEEP);
5157 
5158 	mutex_enter(SD_MUTEX(un));
5159 
5160 	un->un_devid = tmpid;
5161 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
5162 
5163 	kmem_free(dkdevid, buffer_size);
5164 
5165 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
5166 
5167 	return (status);
5168 error:
5169 	mutex_enter(SD_MUTEX(un));
5170 	kmem_free(dkdevid, buffer_size);
5171 	return (status);
5172 }
5173 
5174 
5175 /*
5176  *    Function: sd_create_devid
5177  *
5178  * Description: This routine will fabricate the device id and write it
5179  *		to the disk.
5180  *
5181  *   Arguments: un - driver soft state (unit) structure
5182  *
5183  * Return Code: value of the fabricated device id
5184  *
5185  *     Context: Kernel Thread
5186  */
5187 
5188 static ddi_devid_t
sd_create_devid(sd_ssc_t * ssc)5189 sd_create_devid(sd_ssc_t *ssc)
5190 {
5191 	struct sd_lun	*un;
5192 
5193 	ASSERT(ssc != NULL);
5194 	un = ssc->ssc_un;
5195 	ASSERT(un != NULL);
5196 
5197 	/* Fabricate the devid */
5198 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
5199 	    == DDI_FAILURE) {
5200 		return (NULL);
5201 	}
5202 
5203 	/* Write the devid to disk */
5204 	if (sd_write_deviceid(ssc) != 0) {
5205 		ddi_devid_free(un->un_devid);
5206 		un->un_devid = NULL;
5207 	}
5208 
5209 	return (un->un_devid);
5210 }
5211 
5212 
5213 /*
5214  *    Function: sd_write_deviceid
5215  *
5216  * Description: This routine will write the device id to the disk
5217  *		reserved sector.
5218  *
5219  *   Arguments: un - driver soft state (unit) structure
5220  *
5221  * Return Code: EINVAL
5222  *		value returned by sd_send_scsi_cmd
5223  *
5224  *     Context: Kernel Thread
5225  */
5226 
5227 static int
sd_write_deviceid(sd_ssc_t * ssc)5228 sd_write_deviceid(sd_ssc_t *ssc)
5229 {
5230 	struct dk_devid		*dkdevid;
5231 	uchar_t			*buf;
5232 	diskaddr_t		blk;
5233 	uint_t			*ip, chksum;
5234 	int			status;
5235 	int			i;
5236 	struct sd_lun		*un;
5237 
5238 	ASSERT(ssc != NULL);
5239 	un = ssc->ssc_un;
5240 	ASSERT(un != NULL);
5241 	ASSERT(mutex_owned(SD_MUTEX(un)));
5242 
5243 	mutex_exit(SD_MUTEX(un));
5244 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5245 	    (void *)SD_PATH_DIRECT) != 0) {
5246 		mutex_enter(SD_MUTEX(un));
5247 		return (-1);
5248 	}
5249 
5250 
5251 	/* Allocate the buffer */
5252 	buf = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
5253 	dkdevid = (struct dk_devid *)buf;
5254 
5255 	/* Fill in the revision */
5256 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
5257 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
5258 
5259 	/* Copy in the device id */
5260 	mutex_enter(SD_MUTEX(un));
5261 	bcopy(un->un_devid, &dkdevid->dkd_devid,
5262 	    ddi_devid_sizeof(un->un_devid));
5263 	mutex_exit(SD_MUTEX(un));
5264 
5265 	/* Calculate the checksum */
5266 	chksum = 0;
5267 	ip = (uint_t *)dkdevid;
5268 	for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
5269 	    i++) {
5270 		chksum ^= ip[i];
5271 	}
5272 
5273 	/* Fill-in checksum */
5274 	DKD_FORMCHKSUM(chksum, dkdevid);
5275 
5276 	/* Write the reserved sector */
5277 	status = sd_send_scsi_WRITE(ssc, buf, un->un_sys_blocksize, blk,
5278 	    SD_PATH_DIRECT);
5279 	if (status != 0)
5280 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5281 
5282 	kmem_free(buf, un->un_sys_blocksize);
5283 
5284 	mutex_enter(SD_MUTEX(un));
5285 	return (status);
5286 }
5287 
5288 
5289 /*
5290  *    Function: sd_check_vpd_page_support
5291  *
5292  * Description: This routine sends an inquiry command with the EVPD bit set and
5293  *		a page code of 0x00 to the device. It is used to determine which
5294  *		vital product pages are available to find the devid. We are
5295  *		looking for pages 0x83 0x80 or 0xB1.  If we return a negative 1,
5296  *		the device does not support that command.
5297  *
5298  *   Arguments: un  - driver soft state (unit) structure
5299  *
5300  * Return Code: 0 - success
5301  *		1 - check condition
5302  *
5303  *     Context: This routine can sleep.
5304  */
5305 
5306 static int
sd_check_vpd_page_support(sd_ssc_t * ssc)5307 sd_check_vpd_page_support(sd_ssc_t *ssc)
5308 {
5309 	uchar_t	*page_list	= NULL;
5310 	uchar_t	page_length	= 0xff;	/* Use max possible length */
5311 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
5312 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
5313 	int	rval		= 0;
5314 	int	counter;
5315 	struct sd_lun		*un;
5316 
5317 	ASSERT(ssc != NULL);
5318 	un = ssc->ssc_un;
5319 	ASSERT(un != NULL);
5320 	ASSERT(mutex_owned(SD_MUTEX(un)));
5321 
5322 	mutex_exit(SD_MUTEX(un));
5323 
5324 	/*
5325 	 * We'll set the page length to the maximum to save figuring it out
5326 	 * with an additional call.
5327 	 */
5328 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
5329 
5330 	rval = sd_send_scsi_INQUIRY(ssc, page_list, page_length, evpd,
5331 	    page_code, NULL);
5332 
5333 	if (rval != 0)
5334 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5335 
5336 	mutex_enter(SD_MUTEX(un));
5337 
5338 	/*
5339 	 * Now we must validate that the device accepted the command, as some
5340 	 * drives do not support it.  If the drive does support it, we will
5341 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
5342 	 * not, we return -1.
5343 	 */
5344 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
5345 		/* Loop to find one of the 2 pages we need */
5346 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
5347 
5348 		/*
5349 		 * Pages are returned in ascending order, and 0x83 is what we
5350 		 * are hoping for.
5351 		 */
5352 		while ((page_list[counter] <= 0xB1) &&
5353 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
5354 		    VPD_HEAD_OFFSET))) {
5355 			/*
5356 			 * Add 3 because page_list[3] is the number of
5357 			 * pages minus 3
5358 			 */
5359 
5360 			switch (page_list[counter]) {
5361 			case 0x00:
5362 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
5363 				break;
5364 			case 0x80:
5365 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
5366 				break;
5367 			case 0x81:
5368 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
5369 				break;
5370 			case 0x82:
5371 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
5372 				break;
5373 			case 0x83:
5374 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
5375 				break;
5376 			case 0x86:
5377 				un->un_vpd_page_mask |= SD_VPD_EXTENDED_DATA_PG;
5378 				break;
5379 			case 0xB0:
5380 				un->un_vpd_page_mask |= SD_VPD_BLK_LIMITS_PG;
5381 				break;
5382 			case 0xB1:
5383 				un->un_vpd_page_mask |= SD_VPD_DEV_CHARACTER_PG;
5384 				break;
5385 			}
5386 			counter++;
5387 		}
5388 
5389 	} else {
5390 		rval = -1;
5391 
5392 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5393 		    "sd_check_vpd_page_support: This drive does not implement "
5394 		    "VPD pages.\n");
5395 	}
5396 
5397 	kmem_free(page_list, page_length);
5398 
5399 	return (rval);
5400 }
5401 
5402 
5403 /*
5404  *    Function: sd_setup_pm
5405  *
5406  * Description: Initialize Power Management on the device
5407  *
5408  *     Context: Kernel Thread
5409  */
5410 
5411 static void
sd_setup_pm(sd_ssc_t * ssc,dev_info_t * devi)5412 sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi)
5413 {
5414 	uint_t		log_page_size;
5415 	uchar_t		*log_page_data;
5416 	int		rval = 0;
5417 	struct sd_lun	*un;
5418 
5419 	ASSERT(ssc != NULL);
5420 	un = ssc->ssc_un;
5421 	ASSERT(un != NULL);
5422 
5423 	/*
5424 	 * Since we are called from attach, holding a mutex for
5425 	 * un is unnecessary. Because some of the routines called
5426 	 * from here require SD_MUTEX to not be held, assert this
5427 	 * right up front.
5428 	 */
5429 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5430 	/*
5431 	 * Since the sd device does not have the 'reg' property,
5432 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5433 	 * The following code is to tell cpr that this device
5434 	 * DOES need to be suspended and resumed.
5435 	 */
5436 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5437 	    "pm-hardware-state", "needs-suspend-resume");
5438 
5439 	/*
5440 	 * This complies with the new power management framework
5441 	 * for certain desktop machines. Create the pm_components
5442 	 * property as a string array property.
5443 	 * If un_f_pm_supported is TRUE, that means the disk
5444 	 * attached HBA has set the "pm-capable" property and
5445 	 * the value of this property is bigger than 0.
5446 	 */
5447 	if (un->un_f_pm_supported) {
5448 		/*
5449 		 * not all devices have a motor, try it first.
5450 		 * some devices may return ILLEGAL REQUEST, some
5451 		 * will hang
5452 		 * The following START_STOP_UNIT is used to check if target
5453 		 * device has a motor.
5454 		 */
5455 		un->un_f_start_stop_supported = TRUE;
5456 
5457 		if (un->un_f_power_condition_supported) {
5458 			rval = sd_send_scsi_START_STOP_UNIT(ssc,
5459 			    SD_POWER_CONDITION, SD_TARGET_ACTIVE,
5460 			    SD_PATH_DIRECT);
5461 			if (rval != 0) {
5462 				un->un_f_power_condition_supported = FALSE;
5463 			}
5464 		}
5465 		if (!un->un_f_power_condition_supported) {
5466 			rval = sd_send_scsi_START_STOP_UNIT(ssc,
5467 			    SD_START_STOP, SD_TARGET_START, SD_PATH_DIRECT);
5468 		}
5469 		if (rval != 0) {
5470 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5471 			un->un_f_start_stop_supported = FALSE;
5472 		}
5473 
5474 		/*
5475 		 * create pm properties anyways otherwise the parent can't
5476 		 * go to sleep
5477 		 */
5478 		un->un_f_pm_is_enabled = TRUE;
5479 		(void) sd_create_pm_components(devi, un);
5480 
5481 		/*
5482 		 * If it claims that log sense is supported, check it out.
5483 		 */
5484 		if (un->un_f_log_sense_supported) {
5485 			rval = sd_log_page_supported(ssc,
5486 			    START_STOP_CYCLE_PAGE);
5487 			if (rval == 1) {
5488 				/* Page found, use it. */
5489 				un->un_start_stop_cycle_page =
5490 				    START_STOP_CYCLE_PAGE;
5491 			} else {
5492 				/*
5493 				 * Page not found or log sense is not
5494 				 * supported.
5495 				 * Notice we do not check the old style
5496 				 * START_STOP_CYCLE_VU_PAGE because this
5497 				 * code path does not apply to old disks.
5498 				 */
5499 				un->un_f_log_sense_supported = FALSE;
5500 				un->un_f_pm_log_sense_smart = FALSE;
5501 			}
5502 		}
5503 
5504 		return;
5505 	}
5506 
5507 	/*
5508 	 * For the disk whose attached HBA has not set the "pm-capable"
5509 	 * property, check if it supports the power management.
5510 	 */
5511 	if (!un->un_f_log_sense_supported) {
5512 		un->un_power_level = SD_SPINDLE_ON;
5513 		un->un_f_pm_is_enabled = FALSE;
5514 		return;
5515 	}
5516 
5517 	rval = sd_log_page_supported(ssc, START_STOP_CYCLE_PAGE);
5518 
5519 #ifdef	SDDEBUG
5520 	if (sd_force_pm_supported) {
5521 		/* Force a successful result */
5522 		rval = 1;
5523 	}
5524 #endif
5525 
5526 	/*
5527 	 * If the start-stop cycle counter log page is not supported
5528 	 * or if the pm-capable property is set to be false (0),
5529 	 * then we should not create the pm_components property.
5530 	 */
5531 	if (rval == -1) {
5532 		/*
5533 		 * Error.
5534 		 * Reading log sense failed, most likely this is
5535 		 * an older drive that does not support log sense.
5536 		 * If this fails auto-pm is not supported.
5537 		 */
5538 		un->un_power_level = SD_SPINDLE_ON;
5539 		un->un_f_pm_is_enabled = FALSE;
5540 
5541 	} else if (rval == 0) {
5542 		/*
5543 		 * Page not found.
5544 		 * The start stop cycle counter is implemented as page
5545 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
5546 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
5547 		 */
5548 		if (sd_log_page_supported(ssc, START_STOP_CYCLE_VU_PAGE) == 1) {
5549 			/*
5550 			 * Page found, use this one.
5551 			 */
5552 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
5553 			un->un_f_pm_is_enabled = TRUE;
5554 		} else {
5555 			/*
5556 			 * Error or page not found.
5557 			 * auto-pm is not supported for this device.
5558 			 */
5559 			un->un_power_level = SD_SPINDLE_ON;
5560 			un->un_f_pm_is_enabled = FALSE;
5561 		}
5562 	} else {
5563 		/*
5564 		 * Page found, use it.
5565 		 */
5566 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
5567 		un->un_f_pm_is_enabled = TRUE;
5568 	}
5569 
5570 
5571 	if (un->un_f_pm_is_enabled == TRUE) {
5572 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5573 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5574 
5575 		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
5576 		    log_page_size, un->un_start_stop_cycle_page,
5577 		    0x01, 0, SD_PATH_DIRECT);
5578 
5579 		if (rval != 0) {
5580 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5581 		}
5582 
5583 #ifdef	SDDEBUG
5584 		if (sd_force_pm_supported) {
5585 			/* Force a successful result */
5586 			rval = 0;
5587 		}
5588 #endif
5589 
5590 		/*
5591 		 * If the Log sense for Page( Start/stop cycle counter page)
5592 		 * succeeds, then power management is supported and we can
5593 		 * enable auto-pm.
5594 		 */
5595 		if (rval == 0)  {
5596 			(void) sd_create_pm_components(devi, un);
5597 		} else {
5598 			un->un_power_level = SD_SPINDLE_ON;
5599 			un->un_f_pm_is_enabled = FALSE;
5600 		}
5601 
5602 		kmem_free(log_page_data, log_page_size);
5603 	}
5604 }
5605 
5606 
5607 /*
5608  *    Function: sd_create_pm_components
5609  *
5610  * Description: Initialize PM property.
5611  *
5612  *     Context: Kernel thread context
5613  */
5614 
5615 static void
sd_create_pm_components(dev_info_t * devi,struct sd_lun * un)5616 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
5617 {
5618 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5619 
5620 	if (un->un_f_power_condition_supported) {
5621 		if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
5622 		    "pm-components", sd_pwr_pc.pm_comp, 5)
5623 		    != DDI_PROP_SUCCESS) {
5624 			un->un_power_level = SD_SPINDLE_ACTIVE;
5625 			un->un_f_pm_is_enabled = FALSE;
5626 			return;
5627 		}
5628 	} else {
5629 		if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
5630 		    "pm-components", sd_pwr_ss.pm_comp, 3)
5631 		    != DDI_PROP_SUCCESS) {
5632 			un->un_power_level = SD_SPINDLE_ON;
5633 			un->un_f_pm_is_enabled = FALSE;
5634 			return;
5635 		}
5636 	}
5637 	/*
5638 	 * When components are initially created they are idle,
5639 	 * power up any non-removables.
5640 	 * Note: the return value of pm_raise_power can't be used
5641 	 * for determining if PM should be enabled for this device.
5642 	 * Even if you check the return values and remove this
5643 	 * property created above, the PM framework will not honor the
5644 	 * change after the first call to pm_raise_power. Hence,
5645 	 * removal of that property does not help if pm_raise_power
5646 	 * fails. In the case of removable media, the start/stop
5647 	 * will fail if the media is not present.
5648 	 */
5649 	if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
5650 	    SD_PM_STATE_ACTIVE(un)) == DDI_SUCCESS)) {
5651 		mutex_enter(SD_MUTEX(un));
5652 		un->un_power_level = SD_PM_STATE_ACTIVE(un);
5653 		mutex_enter(&un->un_pm_mutex);
5654 		/* Set to on and not busy. */
5655 		un->un_pm_count = 0;
5656 	} else {
5657 		mutex_enter(SD_MUTEX(un));
5658 		un->un_power_level = SD_PM_STATE_STOPPED(un);
5659 		mutex_enter(&un->un_pm_mutex);
5660 		/* Set to off. */
5661 		un->un_pm_count = -1;
5662 	}
5663 	mutex_exit(&un->un_pm_mutex);
5664 	mutex_exit(SD_MUTEX(un));
5665 }
5666 
5667 
5668 /*
5669  *    Function: sd_ddi_suspend
5670  *
5671  * Description: Performs system power-down operations. This includes
5672  *		setting the drive state to indicate its suspended so
5673  *		that no new commands will be accepted. Also, wait for
5674  *		all commands that are in transport or queued to a timer
5675  *		for retry to complete. All timeout threads are cancelled.
5676  *
5677  * Return Code: DDI_FAILURE or DDI_SUCCESS
5678  *
5679  *     Context: Kernel thread context
5680  */
5681 
5682 static int
sd_ddi_suspend(dev_info_t * devi)5683 sd_ddi_suspend(dev_info_t *devi)
5684 {
5685 	struct	sd_lun	*un;
5686 	clock_t		wait_cmds_complete;
5687 
5688 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5689 	if (un == NULL) {
5690 		return (DDI_FAILURE);
5691 	}
5692 
5693 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
5694 
5695 	mutex_enter(SD_MUTEX(un));
5696 
5697 	/* Return success if the device is already suspended. */
5698 	if (un->un_state == SD_STATE_SUSPENDED) {
5699 		mutex_exit(SD_MUTEX(un));
5700 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5701 		    "device already suspended, exiting\n");
5702 		return (DDI_SUCCESS);
5703 	}
5704 
5705 	/* Return failure if the device is being used by HA */
5706 	if (un->un_resvd_status &
5707 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
5708 		mutex_exit(SD_MUTEX(un));
5709 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5710 		    "device in use by HA, exiting\n");
5711 		return (DDI_FAILURE);
5712 	}
5713 
5714 	/*
5715 	 * Return failure if the device is in a resource wait
5716 	 * or power changing state.
5717 	 */
5718 	if ((un->un_state == SD_STATE_RWAIT) ||
5719 	    (un->un_state == SD_STATE_PM_CHANGING)) {
5720 		mutex_exit(SD_MUTEX(un));
5721 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5722 		    "device in resource wait state, exiting\n");
5723 		return (DDI_FAILURE);
5724 	}
5725 
5726 
5727 	un->un_save_state = un->un_last_state;
5728 	New_state(un, SD_STATE_SUSPENDED);
5729 
5730 	/*
5731 	 * Wait for all commands that are in transport or queued to a timer
5732 	 * for retry to complete.
5733 	 *
5734 	 * While waiting, no new commands will be accepted or sent because of
5735 	 * the new state we set above.
5736 	 *
5737 	 * Wait till current operation has completed. If we are in the resource
5738 	 * wait state (with an intr outstanding) then we need to wait till the
5739 	 * intr completes and starts the next cmd. We want to wait for
5740 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
5741 	 */
5742 	wait_cmds_complete = ddi_get_lbolt() +
5743 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
5744 
5745 	while (un->un_ncmds_in_transport != 0) {
5746 		/*
5747 		 * Fail if commands do not finish in the specified time.
5748 		 */
5749 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
5750 		    wait_cmds_complete) == -1) {
5751 			/*
5752 			 * Undo the state changes made above. Everything
5753 			 * must go back to it's original value.
5754 			 */
5755 			Restore_state(un);
5756 			un->un_last_state = un->un_save_state;
5757 			/* Wake up any threads that might be waiting. */
5758 			cv_broadcast(&un->un_suspend_cv);
5759 			mutex_exit(SD_MUTEX(un));
5760 			SD_ERROR(SD_LOG_IO_PM, un,
5761 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
5762 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
5763 			return (DDI_FAILURE);
5764 		}
5765 	}
5766 
5767 	/*
5768 	 * Cancel SCSI watch thread and timeouts, if any are active
5769 	 */
5770 
5771 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
5772 		opaque_t temp_token = un->un_swr_token;
5773 		mutex_exit(SD_MUTEX(un));
5774 		scsi_watch_suspend(temp_token);
5775 		mutex_enter(SD_MUTEX(un));
5776 	}
5777 
5778 	if (un->un_reset_throttle_timeid != NULL) {
5779 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
5780 		un->un_reset_throttle_timeid = NULL;
5781 		mutex_exit(SD_MUTEX(un));
5782 		(void) untimeout(temp_id);
5783 		mutex_enter(SD_MUTEX(un));
5784 	}
5785 
5786 	if (un->un_dcvb_timeid != NULL) {
5787 		timeout_id_t temp_id = un->un_dcvb_timeid;
5788 		un->un_dcvb_timeid = NULL;
5789 		mutex_exit(SD_MUTEX(un));
5790 		(void) untimeout(temp_id);
5791 		mutex_enter(SD_MUTEX(un));
5792 	}
5793 
5794 	mutex_enter(&un->un_pm_mutex);
5795 	if (un->un_pm_timeid != NULL) {
5796 		timeout_id_t temp_id = un->un_pm_timeid;
5797 		un->un_pm_timeid = NULL;
5798 		mutex_exit(&un->un_pm_mutex);
5799 		mutex_exit(SD_MUTEX(un));
5800 		(void) untimeout(temp_id);
5801 		mutex_enter(SD_MUTEX(un));
5802 	} else {
5803 		mutex_exit(&un->un_pm_mutex);
5804 	}
5805 
5806 	if (un->un_rmw_msg_timeid != NULL) {
5807 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
5808 		un->un_rmw_msg_timeid = NULL;
5809 		mutex_exit(SD_MUTEX(un));
5810 		(void) untimeout(temp_id);
5811 		mutex_enter(SD_MUTEX(un));
5812 	}
5813 
5814 	if (un->un_retry_timeid != NULL) {
5815 		timeout_id_t temp_id = un->un_retry_timeid;
5816 		un->un_retry_timeid = NULL;
5817 		mutex_exit(SD_MUTEX(un));
5818 		(void) untimeout(temp_id);
5819 		mutex_enter(SD_MUTEX(un));
5820 
5821 		if (un->un_retry_bp != NULL) {
5822 			un->un_retry_bp->av_forw = un->un_waitq_headp;
5823 			un->un_waitq_headp = un->un_retry_bp;
5824 			if (un->un_waitq_tailp == NULL) {
5825 				un->un_waitq_tailp = un->un_retry_bp;
5826 			}
5827 			un->un_retry_bp = NULL;
5828 			un->un_retry_statp = NULL;
5829 		}
5830 	}
5831 
5832 	if (un->un_direct_priority_timeid != NULL) {
5833 		timeout_id_t temp_id = un->un_direct_priority_timeid;
5834 		un->un_direct_priority_timeid = NULL;
5835 		mutex_exit(SD_MUTEX(un));
5836 		(void) untimeout(temp_id);
5837 		mutex_enter(SD_MUTEX(un));
5838 	}
5839 
5840 	if (un->un_f_is_fibre == TRUE) {
5841 		/*
5842 		 * Remove callbacks for insert and remove events
5843 		 */
5844 		if (un->un_insert_event != NULL) {
5845 			mutex_exit(SD_MUTEX(un));
5846 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
5847 			mutex_enter(SD_MUTEX(un));
5848 			un->un_insert_event = NULL;
5849 		}
5850 
5851 		if (un->un_remove_event != NULL) {
5852 			mutex_exit(SD_MUTEX(un));
5853 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
5854 			mutex_enter(SD_MUTEX(un));
5855 			un->un_remove_event = NULL;
5856 		}
5857 	}
5858 
5859 	mutex_exit(SD_MUTEX(un));
5860 
5861 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
5862 
5863 	return (DDI_SUCCESS);
5864 }
5865 
5866 
5867 /*
5868  *    Function: sd_ddi_resume
5869  *
5870  * Description: Performs system power-up operations..
5871  *
5872  * Return Code: DDI_SUCCESS
5873  *		DDI_FAILURE
5874  *
5875  *     Context: Kernel thread context
5876  */
5877 
5878 static int
sd_ddi_resume(dev_info_t * devi)5879 sd_ddi_resume(dev_info_t *devi)
5880 {
5881 	struct	sd_lun	*un;
5882 
5883 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5884 	if (un == NULL) {
5885 		return (DDI_FAILURE);
5886 	}
5887 
5888 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
5889 
5890 	mutex_enter(SD_MUTEX(un));
5891 	Restore_state(un);
5892 
5893 	/*
5894 	 * Restore the state which was saved to give the
5895 	 * the right state in un_last_state
5896 	 */
5897 	un->un_last_state = un->un_save_state;
5898 	/*
5899 	 * Note: throttle comes back at full.
5900 	 * Also note: this MUST be done before calling pm_raise_power
5901 	 * otherwise the system can get hung in biowait. The scenario where
5902 	 * this'll happen is under cpr suspend. Writing of the system
5903 	 * state goes through sddump, which writes 0 to un_throttle. If
5904 	 * writing the system state then fails, example if the partition is
5905 	 * too small, then cpr attempts a resume. If throttle isn't restored
5906 	 * from the saved value until after calling pm_raise_power then
5907 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
5908 	 * in biowait.
5909 	 */
5910 	un->un_throttle = un->un_saved_throttle;
5911 
5912 	/*
5913 	 * The chance of failure is very rare as the only command done in power
5914 	 * entry point is START command when you transition from 0->1 or
5915 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
5916 	 * which suspend was done. Ignore the return value as the resume should
5917 	 * not be failed. In the case of removable media the media need not be
5918 	 * inserted and hence there is a chance that raise power will fail with
5919 	 * media not present.
5920 	 */
5921 	if (un->un_f_attach_spinup) {
5922 		mutex_exit(SD_MUTEX(un));
5923 		(void) pm_raise_power(SD_DEVINFO(un), 0,
5924 		    SD_PM_STATE_ACTIVE(un));
5925 		mutex_enter(SD_MUTEX(un));
5926 	}
5927 
5928 	/*
5929 	 * Don't broadcast to the suspend cv and therefore possibly
5930 	 * start I/O until after power has been restored.
5931 	 */
5932 	cv_broadcast(&un->un_suspend_cv);
5933 	cv_broadcast(&un->un_state_cv);
5934 
5935 	/* restart thread */
5936 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
5937 		scsi_watch_resume(un->un_swr_token);
5938 	}
5939 
5940 	/*
5941 	 * Transport any pending commands to the target.
5942 	 *
5943 	 * If this is a low-activity device commands in queue will have to wait
5944 	 * until new commands come in, which may take awhile. Also, we
5945 	 * specifically don't check un_ncmds_in_transport because we know that
5946 	 * there really are no commands in progress after the unit was
5947 	 * suspended and we could have reached the throttle level, been
5948 	 * suspended, and have no new commands coming in for awhile. Highly
5949 	 * unlikely, but so is the low-activity disk scenario.
5950 	 */
5951 	ddi_xbuf_dispatch(un->un_xbuf_attr);
5952 
5953 	sd_start_cmds(un, NULL);
5954 	mutex_exit(SD_MUTEX(un));
5955 
5956 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
5957 
5958 	return (DDI_SUCCESS);
5959 }
5960 
5961 
5962 /*
5963  *    Function: sd_pm_state_change
5964  *
5965  * Description: Change the driver power state.
5966  *		Someone else is required to actually change the driver
5967  *		power level.
5968  *
5969  *   Arguments: un - driver soft state (unit) structure
5970  *              level - the power level that is changed to
5971  *              flag - to decide how to change the power state
5972  *
5973  * Return Code: DDI_SUCCESS
5974  *
5975  *     Context: Kernel thread context
5976  */
5977 static int
sd_pm_state_change(struct sd_lun * un,int level,int flag)5978 sd_pm_state_change(struct sd_lun *un, int level, int flag)
5979 {
5980 	ASSERT(un != NULL);
5981 	SD_TRACE(SD_LOG_POWER, un, "sd_pm_state_change: entry\n");
5982 
5983 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5984 	mutex_enter(SD_MUTEX(un));
5985 
5986 	if (flag == SD_PM_STATE_ROLLBACK || SD_PM_IS_IO_CAPABLE(un, level)) {
5987 		un->un_power_level = level;
5988 		ASSERT(!mutex_owned(&un->un_pm_mutex));
5989 		mutex_enter(&un->un_pm_mutex);
5990 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
5991 			un->un_pm_count++;
5992 			ASSERT(un->un_pm_count == 0);
5993 		}
5994 		mutex_exit(&un->un_pm_mutex);
5995 	} else {
5996 		/*
5997 		 * Exit if power management is not enabled for this device,
5998 		 * or if the device is being used by HA.
5999 		 */
6000 		if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6001 		    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6002 			mutex_exit(SD_MUTEX(un));
6003 			SD_TRACE(SD_LOG_POWER, un,
6004 			    "sd_pm_state_change: exiting\n");
6005 			return (DDI_FAILURE);
6006 		}
6007 
6008 		SD_INFO(SD_LOG_POWER, un, "sd_pm_state_change: "
6009 		    "un_ncmds_in_driver=%ld\n", un->un_ncmds_in_driver);
6010 
6011 		/*
6012 		 * See if the device is not busy, ie.:
6013 		 *    - we have no commands in the driver for this device
6014 		 *    - not waiting for resources
6015 		 */
6016 		if ((un->un_ncmds_in_driver == 0) &&
6017 		    (un->un_state != SD_STATE_RWAIT)) {
6018 			/*
6019 			 * The device is not busy, so it is OK to go to low
6020 			 * power state. Indicate low power, but rely on someone
6021 			 * else to actually change it.
6022 			 */
6023 			mutex_enter(&un->un_pm_mutex);
6024 			un->un_pm_count = -1;
6025 			mutex_exit(&un->un_pm_mutex);
6026 			un->un_power_level = level;
6027 		}
6028 	}
6029 
6030 	mutex_exit(SD_MUTEX(un));
6031 
6032 	SD_TRACE(SD_LOG_POWER, un, "sd_pm_state_change: exit\n");
6033 
6034 	return (DDI_SUCCESS);
6035 }
6036 
6037 
6038 /*
6039  *    Function: sd_pm_idletimeout_handler
6040  *
6041  * Description: A timer routine that's active only while a device is busy.
6042  *		The purpose is to extend slightly the pm framework's busy
6043  *		view of the device to prevent busy/idle thrashing for
6044  *		back-to-back commands. Do this by comparing the current time
6045  *		to the time at which the last command completed and when the
6046  *		difference is greater than sd_pm_idletime, call
6047  *		pm_idle_component. In addition to indicating idle to the pm
6048  *		framework, update the chain type to again use the internal pm
6049  *		layers of the driver.
6050  *
6051  *   Arguments: arg - driver soft state (unit) structure
6052  *
6053  *     Context: Executes in a timeout(9F) thread context
6054  */
6055 
6056 static void
sd_pm_idletimeout_handler(void * arg)6057 sd_pm_idletimeout_handler(void *arg)
6058 {
6059 	const hrtime_t idletime = sd_pm_idletime * NANOSEC;
6060 	struct sd_lun *un = arg;
6061 
6062 	/*
6063 	 * Grab both mutexes, in the proper order, since we're accessing
6064 	 * both PM and softstate variables.
6065 	 */
6066 	mutex_enter(SD_MUTEX(un));
6067 	mutex_enter(&un->un_pm_mutex);
6068 	/* if timeout id is NULL, we are being canceled via untimeout */
6069 	if (un->un_pm_idle_timeid == NULL) {
6070 		mutex_exit(&un->un_pm_mutex);
6071 		mutex_exit(SD_MUTEX(un));
6072 		return;
6073 	}
6074 	if (((gethrtime() - un->un_pm_idle_time) > idletime) &&
6075 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
6076 		/*
6077 		 * Update the chain types.
6078 		 * This takes affect on the next new command received.
6079 		 */
6080 		if (un->un_f_non_devbsize_supported) {
6081 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6082 		} else {
6083 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6084 		}
6085 		un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD;
6086 
6087 		SD_TRACE(SD_LOG_IO_PM, un,
6088 		    "sd_pm_idletimeout_handler: idling device\n");
6089 		(void) pm_idle_component(SD_DEVINFO(un), 0);
6090 		un->un_pm_idle_timeid = NULL;
6091 	} else {
6092 		un->un_pm_idle_timeid =
6093 		    timeout(sd_pm_idletimeout_handler, un,
6094 		    (drv_usectohz((clock_t)300000))); /* 300 ms. */
6095 	}
6096 	mutex_exit(&un->un_pm_mutex);
6097 	mutex_exit(SD_MUTEX(un));
6098 }
6099 
6100 
6101 /*
6102  *    Function: sd_pm_timeout_handler
6103  *
6104  * Description: Callback to tell framework we are idle.
6105  *
6106  *     Context: timeout(9f) thread context.
6107  */
6108 
6109 static void
sd_pm_timeout_handler(void * arg)6110 sd_pm_timeout_handler(void *arg)
6111 {
6112 	struct sd_lun *un = arg;
6113 
6114 	(void) pm_idle_component(SD_DEVINFO(un), 0);
6115 	mutex_enter(&un->un_pm_mutex);
6116 	un->un_pm_timeid = NULL;
6117 	mutex_exit(&un->un_pm_mutex);
6118 }
6119 
6120 
6121 /*
6122  *    Function: sdpower
6123  *
6124  * Description: PM entry point.
6125  *
6126  * Return Code: DDI_SUCCESS
6127  *		DDI_FAILURE
6128  *
6129  *     Context: Kernel thread context
6130  */
6131 
6132 static int
sdpower(dev_info_t * devi,int component,int level)6133 sdpower(dev_info_t *devi, int component, int level)
6134 {
6135 	struct sd_lun	*un;
6136 	int		instance;
6137 	int		rval = DDI_SUCCESS;
6138 	uint_t		i, log_page_size, maxcycles, ncycles;
6139 	uchar_t		*log_page_data;
6140 	int		log_sense_page;
6141 	int		medium_present;
6142 	time_t		intvlp;
6143 	struct pm_trans_data	sd_pm_tran_data;
6144 	uchar_t		save_state = SD_STATE_NORMAL;
6145 	int		sval;
6146 	uchar_t		state_before_pm;
6147 	sd_ssc_t	*ssc;
6148 	int	last_power_level = SD_SPINDLE_UNINIT;
6149 
6150 	instance = ddi_get_instance(devi);
6151 
6152 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
6153 	    !SD_PM_IS_LEVEL_VALID(un, level) || component != 0) {
6154 		return (DDI_FAILURE);
6155 	}
6156 
6157 	ssc = sd_ssc_init(un);
6158 
6159 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
6160 
6161 	mutex_enter(SD_MUTEX(un));
6162 
6163 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
6164 	    un->un_ncmds_in_driver);
6165 
6166 	/*
6167 	 * If un_ncmds_in_driver is non-zero it indicates commands are
6168 	 * already being processed in the driver.
6169 	 * At the same time somebody is requesting to go to a lower power
6170 	 * that can't perform I/O, which can't happen, therefore we need to
6171 	 * return failure.
6172 	 */
6173 	if ((!SD_PM_IS_IO_CAPABLE(un, level)) &&
6174 	    (un->un_ncmds_in_driver != 0)) {
6175 		mutex_exit(SD_MUTEX(un));
6176 
6177 		SD_TRACE(SD_LOG_IO_PM, un,
6178 		    "sdpower: exit, device has queued cmds.\n");
6179 
6180 		goto sdpower_failed;
6181 	}
6182 
6183 	/*
6184 	 * if it is OFFLINE that means the disk is completely dead
6185 	 * in our case we have to put the disk in on or off by sending commands
6186 	 * Of course that will fail anyway so return back here.
6187 	 *
6188 	 * Power changes to a device that's OFFLINE or SUSPENDED
6189 	 * are not allowed.
6190 	 */
6191 	if ((un->un_state == SD_STATE_OFFLINE) ||
6192 	    (un->un_state == SD_STATE_SUSPENDED)) {
6193 		mutex_exit(SD_MUTEX(un));
6194 
6195 		SD_TRACE(SD_LOG_IO_PM, un,
6196 		    "sdpower: exit, device is off-line.\n");
6197 
6198 		goto sdpower_failed;
6199 	}
6200 
6201 	/*
6202 	 * Change the device's state to indicate it's power level
6203 	 * is being changed. Do this to prevent a power off in the
6204 	 * middle of commands, which is especially bad on devices
6205 	 * that are really powered off instead of just spun down.
6206 	 */
6207 	state_before_pm = un->un_state;
6208 	un->un_state = SD_STATE_PM_CHANGING;
6209 
6210 	mutex_exit(SD_MUTEX(un));
6211 
6212 	/*
6213 	 * If log sense command is not supported, bypass the
6214 	 * following checking, otherwise, check the log sense
6215 	 * information for this device.
6216 	 */
6217 	if (SD_PM_STOP_MOTOR_NEEDED(un, level) &&
6218 	    un->un_f_log_sense_supported) {
6219 		/*
6220 		 * Get the log sense information to understand whether the
6221 		 * the powercycle counts have gone beyond the threshhold.
6222 		 */
6223 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6224 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6225 
6226 		mutex_enter(SD_MUTEX(un));
6227 		log_sense_page = un->un_start_stop_cycle_page;
6228 		mutex_exit(SD_MUTEX(un));
6229 
6230 		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
6231 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
6232 
6233 		if (rval != 0) {
6234 			if (rval == EIO)
6235 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6236 			else
6237 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6238 		}
6239 
6240 #ifdef	SDDEBUG
6241 		if (sd_force_pm_supported) {
6242 			/* Force a successful result */
6243 			rval = 0;
6244 		}
6245 #endif
6246 		if (rval != 0) {
6247 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
6248 			    "Log Sense Failed\n");
6249 
6250 			kmem_free(log_page_data, log_page_size);
6251 			/* Cannot support power management on those drives */
6252 
6253 			/*
6254 			 * On exit put the state back to it's original value
6255 			 * and broadcast to anyone waiting for the power
6256 			 * change completion.
6257 			 */
6258 			mutex_enter(SD_MUTEX(un));
6259 			un->un_state = state_before_pm;
6260 			cv_broadcast(&un->un_suspend_cv);
6261 			mutex_exit(SD_MUTEX(un));
6262 			SD_TRACE(SD_LOG_IO_PM, un,
6263 			    "sdpower: exit, Log Sense Failed.\n");
6264 
6265 			goto sdpower_failed;
6266 		}
6267 
6268 		/*
6269 		 * From the page data - Convert the essential information to
6270 		 * pm_trans_data
6271 		 */
6272 		maxcycles =
6273 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
6274 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
6275 
6276 		ncycles =
6277 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
6278 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
6279 
6280 		if (un->un_f_pm_log_sense_smart) {
6281 			sd_pm_tran_data.un.smart_count.allowed = maxcycles;
6282 			sd_pm_tran_data.un.smart_count.consumed = ncycles;
6283 			sd_pm_tran_data.un.smart_count.flag = 0;
6284 			sd_pm_tran_data.format = DC_SMART_FORMAT;
6285 		} else {
6286 			sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
6287 			sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
6288 			for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
6289 				sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
6290 				    log_page_data[8+i];
6291 			}
6292 			sd_pm_tran_data.un.scsi_cycles.flag = 0;
6293 			sd_pm_tran_data.format = DC_SCSI_FORMAT;
6294 		}
6295 
6296 		kmem_free(log_page_data, log_page_size);
6297 
6298 		/*
6299 		 * Call pm_trans_check routine to get the Ok from
6300 		 * the global policy
6301 		 */
6302 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
6303 #ifdef	SDDEBUG
6304 		if (sd_force_pm_supported) {
6305 			/* Force a successful result */
6306 			rval = 1;
6307 		}
6308 #endif
6309 		switch (rval) {
6310 		case 0:
6311 			/*
6312 			 * Not Ok to Power cycle or error in parameters passed
6313 			 * Would have given the advised time to consider power
6314 			 * cycle. Based on the new intvlp parameter we are
6315 			 * supposed to pretend we are busy so that pm framework
6316 			 * will never call our power entry point. Because of
6317 			 * that install a timeout handler and wait for the
6318 			 * recommended time to elapse so that power management
6319 			 * can be effective again.
6320 			 *
6321 			 * To effect this behavior, call pm_busy_component to
6322 			 * indicate to the framework this device is busy.
6323 			 * By not adjusting un_pm_count the rest of PM in
6324 			 * the driver will function normally, and independent
6325 			 * of this but because the framework is told the device
6326 			 * is busy it won't attempt powering down until it gets
6327 			 * a matching idle. The timeout handler sends this.
6328 			 * Note: sd_pm_entry can't be called here to do this
6329 			 * because sdpower may have been called as a result
6330 			 * of a call to pm_raise_power from within sd_pm_entry.
6331 			 *
6332 			 * If a timeout handler is already active then
6333 			 * don't install another.
6334 			 */
6335 			mutex_enter(&un->un_pm_mutex);
6336 			if (un->un_pm_timeid == NULL) {
6337 				un->un_pm_timeid =
6338 				    timeout(sd_pm_timeout_handler,
6339 				    un, intvlp * drv_usectohz(1000000));
6340 				mutex_exit(&un->un_pm_mutex);
6341 				(void) pm_busy_component(SD_DEVINFO(un), 0);
6342 			} else {
6343 				mutex_exit(&un->un_pm_mutex);
6344 			}
6345 			/*
6346 			 * On exit put the state back to its original value
6347 			 * and broadcast to anyone waiting for the power
6348 			 * change completion.
6349 			 */
6350 			mutex_enter(SD_MUTEX(un));
6351 			un->un_state = state_before_pm;
6352 			cv_broadcast(&un->un_suspend_cv);
6353 			mutex_exit(SD_MUTEX(un));
6354 
6355 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
6356 			    "trans check Failed, not ok to power cycle.\n");
6357 
6358 			goto sdpower_failed;
6359 		case -1:
6360 			/*
6361 			 * On exit put the state back to its original value
6362 			 * and broadcast to anyone waiting for the power
6363 			 * change completion.
6364 			 */
6365 			mutex_enter(SD_MUTEX(un));
6366 			un->un_state = state_before_pm;
6367 			cv_broadcast(&un->un_suspend_cv);
6368 			mutex_exit(SD_MUTEX(un));
6369 			SD_TRACE(SD_LOG_IO_PM, un,
6370 			    "sdpower: exit, trans check command Failed.\n");
6371 
6372 			goto sdpower_failed;
6373 		}
6374 	}
6375 
6376 	if (!SD_PM_IS_IO_CAPABLE(un, level)) {
6377 		/*
6378 		 * Save the last state... if the STOP FAILS we need it
6379 		 * for restoring
6380 		 */
6381 		mutex_enter(SD_MUTEX(un));
6382 		save_state = un->un_last_state;
6383 		last_power_level = un->un_power_level;
6384 		/*
6385 		 * There must not be any cmds. getting processed
6386 		 * in the driver when we get here. Power to the
6387 		 * device is potentially going off.
6388 		 */
6389 		ASSERT(un->un_ncmds_in_driver == 0);
6390 		mutex_exit(SD_MUTEX(un));
6391 
6392 		/*
6393 		 * For now PM suspend the device completely before spindle is
6394 		 * turned off
6395 		 */
6396 		if ((rval = sd_pm_state_change(un, level, SD_PM_STATE_CHANGE))
6397 		    == DDI_FAILURE) {
6398 			/*
6399 			 * On exit put the state back to its original value
6400 			 * and broadcast to anyone waiting for the power
6401 			 * change completion.
6402 			 */
6403 			mutex_enter(SD_MUTEX(un));
6404 			un->un_state = state_before_pm;
6405 			un->un_power_level = last_power_level;
6406 			cv_broadcast(&un->un_suspend_cv);
6407 			mutex_exit(SD_MUTEX(un));
6408 			SD_TRACE(SD_LOG_IO_PM, un,
6409 			    "sdpower: exit, PM suspend Failed.\n");
6410 
6411 			goto sdpower_failed;
6412 		}
6413 	}
6414 
6415 	/*
6416 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
6417 	 * close, or strategy. Dump no long uses this routine, it uses it's
6418 	 * own code so it can be done in polled mode.
6419 	 */
6420 
6421 	medium_present = TRUE;
6422 
6423 	/*
6424 	 * When powering up, issue a TUR in case the device is at unit
6425 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
6426 	 * a deadlock on un_pm_busy_cv will occur.
6427 	 */
6428 	if (SD_PM_IS_IO_CAPABLE(un, level)) {
6429 		sval = sd_send_scsi_TEST_UNIT_READY(ssc,
6430 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
6431 		if (sval != 0)
6432 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6433 	}
6434 
6435 	if (un->un_f_power_condition_supported) {
6436 		char *pm_condition_name[] = {"STOPPED", "STANDBY",
6437 		    "IDLE", "ACTIVE"};
6438 		SD_TRACE(SD_LOG_IO_PM, un,
6439 		    "sdpower: sending \'%s\' power condition",
6440 		    pm_condition_name[level]);
6441 		sval = sd_send_scsi_START_STOP_UNIT(ssc, SD_POWER_CONDITION,
6442 		    sd_pl2pc[level], SD_PATH_DIRECT);
6443 	} else {
6444 		SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
6445 		    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
6446 		sval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
6447 		    ((level == SD_SPINDLE_ON) ? SD_TARGET_START :
6448 		    SD_TARGET_STOP), SD_PATH_DIRECT);
6449 	}
6450 	if (sval != 0) {
6451 		if (sval == EIO)
6452 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6453 		else
6454 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6455 	}
6456 
6457 	/* Command failed, check for media present. */
6458 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
6459 		medium_present = FALSE;
6460 	}
6461 
6462 	/*
6463 	 * The conditions of interest here are:
6464 	 *   if a spindle off with media present fails,
6465 	 *	then restore the state and return an error.
6466 	 *   else if a spindle on fails,
6467 	 *	then return an error (there's no state to restore).
6468 	 * In all other cases we setup for the new state
6469 	 * and return success.
6470 	 */
6471 	if (!SD_PM_IS_IO_CAPABLE(un, level)) {
6472 		if ((medium_present == TRUE) && (sval != 0)) {
6473 			/* The stop command from above failed */
6474 			rval = DDI_FAILURE;
6475 			/*
6476 			 * The stop command failed, and we have media
6477 			 * present. Put the level back by calling the
6478 			 * sd_pm_resume() and set the state back to
6479 			 * it's previous value.
6480 			 */
6481 			(void) sd_pm_state_change(un, last_power_level,
6482 			    SD_PM_STATE_ROLLBACK);
6483 			mutex_enter(SD_MUTEX(un));
6484 			un->un_last_state = save_state;
6485 			mutex_exit(SD_MUTEX(un));
6486 		} else if (un->un_f_monitor_media_state) {
6487 			/*
6488 			 * The stop command from above succeeded.
6489 			 * Terminate watch thread in case of removable media
6490 			 * devices going into low power state. This is as per
6491 			 * the requirements of pm framework, otherwise commands
6492 			 * will be generated for the device (through watch
6493 			 * thread), even when the device is in low power state.
6494 			 */
6495 			mutex_enter(SD_MUTEX(un));
6496 			un->un_f_watcht_stopped = FALSE;
6497 			if (un->un_swr_token != NULL) {
6498 				opaque_t temp_token = un->un_swr_token;
6499 				un->un_f_watcht_stopped = TRUE;
6500 				un->un_swr_token = NULL;
6501 				mutex_exit(SD_MUTEX(un));
6502 				(void) scsi_watch_request_terminate(temp_token,
6503 				    SCSI_WATCH_TERMINATE_ALL_WAIT);
6504 			} else {
6505 				mutex_exit(SD_MUTEX(un));
6506 			}
6507 		}
6508 	} else {
6509 		/*
6510 		 * The level requested is I/O capable.
6511 		 * Legacy behavior: return success on a failed spinup
6512 		 * if there is no media in the drive.
6513 		 * Do this by looking at medium_present here.
6514 		 */
6515 		if ((sval != 0) && medium_present) {
6516 			/* The start command from above failed */
6517 			rval = DDI_FAILURE;
6518 		} else {
6519 			/*
6520 			 * The start command from above succeeded
6521 			 * PM resume the devices now that we have
6522 			 * started the disks
6523 			 */
6524 			(void) sd_pm_state_change(un, level,
6525 			    SD_PM_STATE_CHANGE);
6526 
6527 			/*
6528 			 * Resume the watch thread since it was suspended
6529 			 * when the device went into low power mode.
6530 			 */
6531 			if (un->un_f_monitor_media_state) {
6532 				mutex_enter(SD_MUTEX(un));
6533 				if (un->un_f_watcht_stopped == TRUE) {
6534 					opaque_t temp_token;
6535 
6536 					un->un_f_watcht_stopped = FALSE;
6537 					mutex_exit(SD_MUTEX(un));
6538 					temp_token =
6539 					    sd_watch_request_submit(un);
6540 					mutex_enter(SD_MUTEX(un));
6541 					un->un_swr_token = temp_token;
6542 				}
6543 				mutex_exit(SD_MUTEX(un));
6544 			}
6545 		}
6546 	}
6547 
6548 	/*
6549 	 * On exit put the state back to its original value
6550 	 * and broadcast to anyone waiting for the power
6551 	 * change completion.
6552 	 */
6553 	mutex_enter(SD_MUTEX(un));
6554 	un->un_state = state_before_pm;
6555 	cv_broadcast(&un->un_suspend_cv);
6556 	mutex_exit(SD_MUTEX(un));
6557 
6558 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
6559 
6560 	sd_ssc_fini(ssc);
6561 	return (rval);
6562 
6563 sdpower_failed:
6564 
6565 	sd_ssc_fini(ssc);
6566 	return (DDI_FAILURE);
6567 }
6568 
6569 
6570 
6571 /*
6572  *    Function: sdattach
6573  *
6574  * Description: Driver's attach(9e) entry point function.
6575  *
6576  *   Arguments: devi - opaque device info handle
6577  *		cmd  - attach  type
6578  *
6579  * Return Code: DDI_SUCCESS
6580  *		DDI_FAILURE
6581  *
6582  *     Context: Kernel thread context
6583  */
6584 
6585 static int
sdattach(dev_info_t * devi,ddi_attach_cmd_t cmd)6586 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
6587 {
6588 	switch (cmd) {
6589 	case DDI_ATTACH:
6590 		return (sd_unit_attach(devi));
6591 	case DDI_RESUME:
6592 		return (sd_ddi_resume(devi));
6593 	default:
6594 		break;
6595 	}
6596 	return (DDI_FAILURE);
6597 }
6598 
6599 
6600 /*
6601  *    Function: sddetach
6602  *
6603  * Description: Driver's detach(9E) entry point function.
6604  *
6605  *   Arguments: devi - opaque device info handle
6606  *		cmd  - detach  type
6607  *
6608  * Return Code: DDI_SUCCESS
6609  *		DDI_FAILURE
6610  *
6611  *     Context: Kernel thread context
6612  */
6613 
6614 static int
sddetach(dev_info_t * devi,ddi_detach_cmd_t cmd)6615 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
6616 {
6617 	switch (cmd) {
6618 	case DDI_DETACH:
6619 		return (sd_unit_detach(devi));
6620 	case DDI_SUSPEND:
6621 		return (sd_ddi_suspend(devi));
6622 	default:
6623 		break;
6624 	}
6625 	return (DDI_FAILURE);
6626 }
6627 
6628 
6629 /*
6630  *     Function: sd_sync_with_callback
6631  *
6632  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
6633  *		 state while the callback routine is active.
6634  *
6635  *    Arguments: un: softstate structure for the instance
6636  *
6637  *	Context: Kernel thread context
6638  */
6639 
6640 static void
sd_sync_with_callback(struct sd_lun * un)6641 sd_sync_with_callback(struct sd_lun *un)
6642 {
6643 	ASSERT(un != NULL);
6644 
6645 	mutex_enter(SD_MUTEX(un));
6646 
6647 	ASSERT(un->un_in_callback >= 0);
6648 
6649 	while (un->un_in_callback > 0) {
6650 		mutex_exit(SD_MUTEX(un));
6651 		delay(2);
6652 		mutex_enter(SD_MUTEX(un));
6653 	}
6654 
6655 	mutex_exit(SD_MUTEX(un));
6656 }
6657 
6658 /*
6659  *    Function: sd_unit_attach
6660  *
6661  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
6662  *		the soft state structure for the device and performs
6663  *		all necessary structure and device initializations.
6664  *
6665  *   Arguments: devi: the system's dev_info_t for the device.
6666  *
6667  * Return Code: DDI_SUCCESS if attach is successful.
6668  *		DDI_FAILURE if any part of the attach fails.
6669  *
6670  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
6671  *		Kernel thread context only.  Can sleep.
6672  */
6673 
6674 static int
sd_unit_attach(dev_info_t * devi)6675 sd_unit_attach(dev_info_t *devi)
6676 {
6677 	struct	scsi_device	*devp;
6678 	struct	sd_lun		*un;
6679 	char			*variantp;
6680 	char			name_str[48];
6681 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
6682 	int	instance;
6683 	int	rval;
6684 	int	wc_enabled;
6685 	int	wc_changeable;
6686 	int	tgt;
6687 	uint64_t	capacity;
6688 	uint_t		lbasize = 0;
6689 	dev_info_t	*pdip = ddi_get_parent(devi);
6690 	int		offbyone = 0;
6691 	int		geom_label_valid = 0;
6692 	sd_ssc_t	*ssc;
6693 	int		status;
6694 	struct sd_fm_internal	*sfip = NULL;
6695 	int		max_xfer_size;
6696 
6697 	/*
6698 	 * Retrieve the target driver's private data area. This was set
6699 	 * up by the HBA.
6700 	 */
6701 	devp = ddi_get_driver_private(devi);
6702 
6703 	/*
6704 	 * Retrieve the target ID of the device.
6705 	 */
6706 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6707 	    SCSI_ADDR_PROP_TARGET, -1);
6708 
6709 	/*
6710 	 * Since we have no idea what state things were left in by the last
6711 	 * user of the device, set up some 'default' settings, ie. turn 'em
6712 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
6713 	 * Do this before the scsi_probe, which sends an inquiry.
6714 	 * This is a fix for bug (4430280).
6715 	 * Of special importance is wide-xfer. The drive could have been left
6716 	 * in wide transfer mode by the last driver to communicate with it,
6717 	 * this includes us. If that's the case, and if the following is not
6718 	 * setup properly or we don't re-negotiate with the drive prior to
6719 	 * transferring data to/from the drive, it causes bus parity errors,
6720 	 * data overruns, and unexpected interrupts. This first occurred when
6721 	 * the fix for bug (4378686) was made.
6722 	 */
6723 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
6724 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
6725 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
6726 
6727 	/*
6728 	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
6729 	 * on a target. Setting it per lun instance actually sets the
6730 	 * capability of this target, which affects those luns already
6731 	 * attached on the same target. So during attach, we can only disable
6732 	 * this capability only when no other lun has been attached on this
6733 	 * target. By doing this, we assume a target has the same tagged-qing
6734 	 * capability for every lun. The condition can be removed when HBA
6735 	 * is changed to support per lun based tagged-qing capability.
6736 	 */
6737 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
6738 		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
6739 	}
6740 
6741 	/*
6742 	 * Use scsi_probe() to issue an INQUIRY command to the device.
6743 	 * This call will allocate and fill in the scsi_inquiry structure
6744 	 * and point the sd_inq member of the scsi_device structure to it.
6745 	 * If the attach succeeds, then this memory will not be de-allocated
6746 	 * (via scsi_unprobe()) until the instance is detached.
6747 	 */
6748 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
6749 		goto probe_failed;
6750 	}
6751 
6752 	/*
6753 	 * Check the device type as specified in the inquiry data and
6754 	 * claim it if it is of a type that we support.
6755 	 */
6756 	switch (devp->sd_inq->inq_dtype) {
6757 	case DTYPE_DIRECT:
6758 		break;
6759 	case DTYPE_RODIRECT:
6760 		break;
6761 	case DTYPE_OPTICAL:
6762 		break;
6763 	case DTYPE_NOTPRESENT:
6764 	default:
6765 		/* Unsupported device type; fail the attach. */
6766 		goto probe_failed;
6767 	}
6768 
6769 	/*
6770 	 * Allocate the soft state structure for this unit.
6771 	 *
6772 	 * We rely upon this memory being set to all zeroes by
6773 	 * ddi_soft_state_zalloc().  We assume that any member of the
6774 	 * soft state structure that is not explicitly initialized by
6775 	 * this routine will have a value of zero.
6776 	 */
6777 	instance = ddi_get_instance(devp->sd_dev);
6778 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
6779 		goto probe_failed;
6780 	}
6781 
6782 	/*
6783 	 * Retrieve a pointer to the newly-allocated soft state.
6784 	 *
6785 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
6786 	 * was successful, unless something has gone horribly wrong and the
6787 	 * ddi's soft state internals are corrupt (in which case it is
6788 	 * probably better to halt here than just fail the attach....)
6789 	 */
6790 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
6791 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
6792 		    instance);
6793 		/*NOTREACHED*/
6794 	}
6795 
6796 	/*
6797 	 * Link the back ptr of the driver soft state to the scsi_device
6798 	 * struct for this lun.
6799 	 * Save a pointer to the softstate in the driver-private area of
6800 	 * the scsi_device struct.
6801 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
6802 	 * we first set un->un_sd below.
6803 	 */
6804 	un->un_sd = devp;
6805 	devp->sd_private = (opaque_t)un;
6806 
6807 	/*
6808 	 * The following must be after devp is stored in the soft state struct.
6809 	 */
6810 #ifdef SDDEBUG
6811 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6812 	    "%s_unit_attach: un:0x%p instance:%d\n",
6813 	    ddi_driver_name(devi), un, instance);
6814 #endif
6815 
6816 	/*
6817 	 * Set up the device type and node type (for the minor nodes).
6818 	 * By default we assume that the device can at least support the
6819 	 * Common Command Set. Call it a CD-ROM if it reports itself
6820 	 * as a RODIRECT device.
6821 	 */
6822 	switch (devp->sd_inq->inq_dtype) {
6823 	case DTYPE_RODIRECT:
6824 		un->un_node_type = DDI_NT_CD_CHAN;
6825 		un->un_ctype	 = CTYPE_CDROM;
6826 		break;
6827 	case DTYPE_OPTICAL:
6828 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6829 		un->un_ctype	 = CTYPE_ROD;
6830 		break;
6831 	default:
6832 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6833 		un->un_ctype	 = CTYPE_CCS;
6834 		break;
6835 	}
6836 
6837 	/*
6838 	 * Try to read the interconnect type from the HBA.
6839 	 *
6840 	 * Note: This driver is currently compiled as two binaries, a parallel
6841 	 * scsi version (sd) and a fibre channel version (ssd). All functional
6842 	 * differences are determined at compile time. In the future a single
6843 	 * binary will be provided and the interconnect type will be used to
6844 	 * differentiate between fibre and parallel scsi behaviors. At that time
6845 	 * it will be necessary for all fibre channel HBAs to support this
6846 	 * property.
6847 	 *
6848 	 * set un_f_is_fiber to TRUE ( default fiber )
6849 	 */
6850 	un->un_f_is_fibre = TRUE;
6851 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
6852 	case INTERCONNECT_SSA:
6853 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
6854 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6855 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
6856 		break;
6857 	case INTERCONNECT_PARALLEL:
6858 		un->un_f_is_fibre = FALSE;
6859 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6860 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6861 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
6862 		break;
6863 	case INTERCONNECT_SAS:
6864 		un->un_f_is_fibre = FALSE;
6865 		un->un_interconnect_type = SD_INTERCONNECT_SAS;
6866 		un->un_node_type = DDI_NT_BLOCK_SAS;
6867 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6868 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SAS\n", un);
6869 		break;
6870 	case INTERCONNECT_SATA:
6871 		un->un_f_is_fibre = FALSE;
6872 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
6873 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6874 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
6875 		break;
6876 	case INTERCONNECT_FIBRE:
6877 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
6878 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6879 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
6880 		break;
6881 	case INTERCONNECT_FABRIC:
6882 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
6883 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
6884 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6885 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
6886 		break;
6887 	default:
6888 		/*
6889 		 * The HBA does not support the "interconnect-type" property
6890 		 * (or did not provide a recognized type).
6891 		 *
6892 		 * Note: This will be obsoleted when a single fibre channel
6893 		 * and parallel scsi driver is delivered. In the meantime the
6894 		 * interconnect type will be set to the platform default.If that
6895 		 * type is not parallel SCSI, it means that we should be
6896 		 * assuming "ssd" semantics. However, here this also means that
6897 		 * the FC HBA is not supporting the "interconnect-type" property
6898 		 * like we expect it to, so log this occurrence.
6899 		 */
6900 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
6901 		if (!SD_IS_PARALLEL_SCSI(un)) {
6902 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6903 			    "sd_unit_attach: un:0x%p Assuming "
6904 			    "INTERCONNECT_FIBRE\n", un);
6905 		} else {
6906 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6907 			    "sd_unit_attach: un:0x%p Assuming "
6908 			    "INTERCONNECT_PARALLEL\n", un);
6909 			un->un_f_is_fibre = FALSE;
6910 		}
6911 		break;
6912 	}
6913 
6914 	if (un->un_f_is_fibre == TRUE) {
6915 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
6916 		    SCSI_VERSION_3) {
6917 			switch (un->un_interconnect_type) {
6918 			case SD_INTERCONNECT_FIBRE:
6919 			case SD_INTERCONNECT_SSA:
6920 				un->un_node_type = DDI_NT_BLOCK_WWN;
6921 				break;
6922 			default:
6923 				break;
6924 			}
6925 		}
6926 	}
6927 
6928 	/*
6929 	 * Initialize the Request Sense command for the target
6930 	 */
6931 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
6932 		goto alloc_rqs_failed;
6933 	}
6934 
6935 	/*
6936 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
6937 	 * with separate binary for sd and ssd.
6938 	 *
6939 	 * x86 has 1 binary, un_retry_count is set base on connection type.
6940 	 * The hardcoded values will go away when Sparc uses 1 binary
6941 	 * for sd and ssd.  This hardcoded values need to match
6942 	 * SD_RETRY_COUNT in sddef.h
6943 	 * The value used is base on interconnect type.
6944 	 * fibre = 3, parallel = 5
6945 	 */
6946 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
6947 
6948 	/*
6949 	 * Set the per disk retry count to the default number of retries
6950 	 * for disks and CDROMs. This value can be overridden by the
6951 	 * disk property list or an entry in sd.conf.
6952 	 */
6953 	un->un_notready_retry_count =
6954 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
6955 	    : DISK_NOT_READY_RETRY_COUNT(un);
6956 
6957 	/*
6958 	 * Set the busy retry count to the default value of un_retry_count.
6959 	 * This can be overridden by entries in sd.conf or the device
6960 	 * config table.
6961 	 */
6962 	un->un_busy_retry_count = un->un_retry_count;
6963 
6964 	/*
6965 	 * Init the reset threshold for retries.  This number determines
6966 	 * how many retries must be performed before a reset can be issued
6967 	 * (for certain error conditions). This can be overridden by entries
6968 	 * in sd.conf or the device config table.
6969 	 */
6970 	un->un_reset_retry_count = (un->un_retry_count / 2);
6971 
6972 	/*
6973 	 * Set the victim_retry_count to the default un_retry_count
6974 	 */
6975 	un->un_victim_retry_count = (2 * un->un_retry_count);
6976 
6977 	/*
6978 	 * Set the reservation release timeout to the default value of
6979 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
6980 	 * device config table.
6981 	 */
6982 	un->un_reserve_release_time = 5;
6983 
6984 	/*
6985 	 * Set up the default maximum transfer size. Note that this may
6986 	 * get updated later in the attach, when setting up default wide
6987 	 * operations for disks.
6988 	 */
6989 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
6990 	un->un_partial_dma_supported = 1;
6991 
6992 	/*
6993 	 * Get "allow bus device reset" property (defaults to "enabled" if
6994 	 * the property was not defined). This is to disable bus resets for
6995 	 * certain kinds of error recovery. Note: In the future when a run-time
6996 	 * fibre check is available the soft state flag should default to
6997 	 * enabled.
6998 	 */
6999 	if (un->un_f_is_fibre == TRUE) {
7000 		un->un_f_allow_bus_device_reset = TRUE;
7001 	} else {
7002 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7003 		    "allow-bus-device-reset", 1) != 0) {
7004 			un->un_f_allow_bus_device_reset = TRUE;
7005 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7006 			    "sd_unit_attach: un:0x%p Bus device reset "
7007 			    "enabled\n", un);
7008 		} else {
7009 			un->un_f_allow_bus_device_reset = FALSE;
7010 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7011 			    "sd_unit_attach: un:0x%p Bus device reset "
7012 			    "disabled\n", un);
7013 		}
7014 	}
7015 
7016 	/*
7017 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
7018 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
7019 	 *
7020 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
7021 	 * property. The new "variant" property with a value of "atapi" has been
7022 	 * introduced so that future 'variants' of standard SCSI behavior (like
7023 	 * atapi) could be specified by the underlying HBA drivers by supplying
7024 	 * a new value for the "variant" property, instead of having to define a
7025 	 * new property.
7026 	 */
7027 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
7028 		un->un_f_cfg_is_atapi = TRUE;
7029 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7030 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
7031 	}
7032 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
7033 	    &variantp) == DDI_PROP_SUCCESS) {
7034 		if (strcmp(variantp, "atapi") == 0) {
7035 			un->un_f_cfg_is_atapi = TRUE;
7036 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7037 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
7038 		}
7039 		ddi_prop_free(variantp);
7040 	}
7041 
7042 	un->un_cmd_timeout	= SD_IO_TIME;
7043 
7044 	un->un_busy_timeout  = SD_BSY_TIMEOUT;
7045 
7046 	/* Info on current states, statuses, etc. (Updated frequently) */
7047 	un->un_state		= SD_STATE_NORMAL;
7048 	un->un_last_state	= SD_STATE_NORMAL;
7049 
7050 	/* Control & status info for command throttling */
7051 	un->un_throttle		= sd_max_throttle;
7052 	un->un_saved_throttle	= sd_max_throttle;
7053 	un->un_min_throttle	= sd_min_throttle;
7054 
7055 	if (un->un_f_is_fibre == TRUE) {
7056 		un->un_f_use_adaptive_throttle = TRUE;
7057 	} else {
7058 		un->un_f_use_adaptive_throttle = FALSE;
7059 	}
7060 
7061 	/* Removable media support. */
7062 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
7063 	un->un_mediastate		= DKIO_NONE;
7064 	un->un_specified_mediastate	= DKIO_NONE;
7065 
7066 	/* CVs for suspend/resume (PM or DR) */
7067 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
7068 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
7069 
7070 	/* Power management support. */
7071 	un->un_power_level = SD_SPINDLE_UNINIT;
7072 
7073 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
7074 	un->un_f_wcc_inprog = 0;
7075 
7076 	/*
7077 	 * The conf file entry and softstate variable is a forceful override,
7078 	 * meaning a non-zero value must be entered to change the default.
7079 	 */
7080 	un->un_f_disksort_disabled = FALSE;
7081 	un->un_f_rmw_type = SD_RMW_TYPE_DEFAULT;
7082 	un->un_f_enable_rmw = FALSE;
7083 
7084 	/*
7085 	 * GET EVENT STATUS NOTIFICATION media polling enabled by default, but
7086 	 * can be overridden via [s]sd-config-list "mmc-gesn-polling" property.
7087 	 */
7088 	un->un_f_mmc_gesn_polling = TRUE;
7089 
7090 	/*
7091 	 * physical sector size defaults to DEV_BSIZE currently. We can
7092 	 * override this value via the driver configuration file so we must
7093 	 * set it before calling sd_read_unit_properties().
7094 	 */
7095 	un->un_phy_blocksize = DEV_BSIZE;
7096 
7097 	/*
7098 	 * Retrieve the properties from the static driver table or the driver
7099 	 * configuration file (.conf) for this unit and update the soft state
7100 	 * for the device as needed for the indicated properties.
7101 	 * Note: the property configuration needs to occur here as some of the
7102 	 * following routines may have dependencies on soft state flags set
7103 	 * as part of the driver property configuration.
7104 	 */
7105 	sd_read_unit_properties(un);
7106 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7107 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
7108 
7109 	/*
7110 	 * Only if a device has "hotpluggable" property, it is
7111 	 * treated as hotpluggable device. Otherwise, it is
7112 	 * regarded as non-hotpluggable one.
7113 	 */
7114 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
7115 	    -1) != -1) {
7116 		un->un_f_is_hotpluggable = TRUE;
7117 	}
7118 
7119 	/*
7120 	 * set unit's attributes(flags) according to "hotpluggable" and
7121 	 * RMB bit in INQUIRY data.
7122 	 */
7123 	sd_set_unit_attributes(un, devi);
7124 
7125 	/*
7126 	 * By default, we mark the capacity, lbasize, and geometry
7127 	 * as invalid. Only if we successfully read a valid capacity
7128 	 * will we update the un_blockcount and un_tgt_blocksize with the
7129 	 * valid values (the geometry will be validated later).
7130 	 */
7131 	un->un_f_blockcount_is_valid	= FALSE;
7132 	un->un_f_tgt_blocksize_is_valid	= FALSE;
7133 
7134 	/*
7135 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
7136 	 * otherwise.
7137 	 */
7138 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
7139 	un->un_blockcount = 0;
7140 
7141 	/*
7142 	 * Set up the per-instance info needed to determine the correct
7143 	 * CDBs and other info for issuing commands to the target.
7144 	 */
7145 	sd_init_cdb_limits(un);
7146 
7147 	/*
7148 	 * Set up the IO chains to use, based upon the target type.
7149 	 */
7150 	if (un->un_f_non_devbsize_supported) {
7151 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7152 	} else {
7153 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7154 	}
7155 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7156 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
7157 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
7158 
7159 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
7160 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
7161 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
7162 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
7163 
7164 
7165 	if (ISCD(un)) {
7166 		un->un_additional_codes = sd_additional_codes;
7167 	} else {
7168 		un->un_additional_codes = NULL;
7169 	}
7170 
7171 	/*
7172 	 * Create the kstats here so they can be available for attach-time
7173 	 * routines that send commands to the unit (either polled or via
7174 	 * sd_send_scsi_cmd).
7175 	 *
7176 	 * Note: This is a critical sequence that needs to be maintained:
7177 	 *	1) Instantiate the kstats here, before any routines using the
7178 	 *	   iopath (i.e. sd_send_scsi_cmd).
7179 	 *	2) Instantiate and initialize the partition stats
7180 	 *	   (sd_set_pstats).
7181 	 *	3) Initialize the error stats (sd_set_errstats), following
7182 	 *	   sd_validate_geometry(),sd_register_devid(),
7183 	 *	   and sd_cache_control().
7184 	 */
7185 
7186 	un->un_stats = kstat_create(sd_label, instance,
7187 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
7188 	if (un->un_stats != NULL) {
7189 		un->un_stats->ks_lock = SD_MUTEX(un);
7190 		kstat_install(un->un_stats);
7191 	}
7192 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7193 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
7194 
7195 	un->un_unmapstats_ks = kstat_create(sd_label, instance, "unmapstats",
7196 	    "misc", KSTAT_TYPE_NAMED, sizeof (*un->un_unmapstats) /
7197 	    sizeof (kstat_named_t), 0);
7198 	if (un->un_unmapstats_ks) {
7199 		un->un_unmapstats = un->un_unmapstats_ks->ks_data;
7200 
7201 		kstat_named_init(&un->un_unmapstats->us_cmds,
7202 		    "commands", KSTAT_DATA_UINT64);
7203 		kstat_named_init(&un->un_unmapstats->us_errs,
7204 		    "errors", KSTAT_DATA_UINT64);
7205 		kstat_named_init(&un->un_unmapstats->us_extents,
7206 		    "extents", KSTAT_DATA_UINT64);
7207 		kstat_named_init(&un->un_unmapstats->us_bytes,
7208 		    "bytes", KSTAT_DATA_UINT64);
7209 
7210 		kstat_install(un->un_unmapstats_ks);
7211 	} else {
7212 		cmn_err(CE_NOTE, "!Cannot create unmap kstats for disk %d",
7213 		    instance);
7214 	}
7215 
7216 	sd_create_errstats(un, instance);
7217 	if (un->un_errstats == NULL) {
7218 		goto create_errstats_failed;
7219 	}
7220 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7221 	    "sd_unit_attach: un:0x%p errstats created\n", un);
7222 
7223 	/*
7224 	 * The following if/else code was relocated here from below as part
7225 	 * of the fix for bug (4430280). However with the default setup added
7226 	 * on entry to this routine, it's no longer absolutely necessary for
7227 	 * this to be before the call to sd_spin_up_unit.
7228 	 */
7229 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
7230 		int tq_trigger_flag = (((devp->sd_inq->inq_ansi == 4) ||
7231 		    (devp->sd_inq->inq_ansi == 5)) &&
7232 		    devp->sd_inq->inq_bque) || devp->sd_inq->inq_cmdque;
7233 
7234 		/*
7235 		 * If tagged queueing is supported by the target
7236 		 * and by the host adapter then we will enable it
7237 		 */
7238 		un->un_tagflags = 0;
7239 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) && tq_trigger_flag &&
7240 		    (un->un_f_arq_enabled == TRUE)) {
7241 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
7242 			    1, 1) == 1) {
7243 				un->un_tagflags = FLAG_STAG;
7244 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7245 				    "sd_unit_attach: un:0x%p tag queueing "
7246 				    "enabled\n", un);
7247 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
7248 			    "untagged-qing", 0) == 1) {
7249 				un->un_f_opt_queueing = TRUE;
7250 				un->un_saved_throttle = un->un_throttle =
7251 				    min(un->un_throttle, 3);
7252 			} else {
7253 				un->un_f_opt_queueing = FALSE;
7254 				un->un_saved_throttle = un->un_throttle = 1;
7255 			}
7256 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
7257 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
7258 			/* The Host Adapter supports internal queueing. */
7259 			un->un_f_opt_queueing = TRUE;
7260 			un->un_saved_throttle = un->un_throttle =
7261 			    min(un->un_throttle, 3);
7262 		} else {
7263 			un->un_f_opt_queueing = FALSE;
7264 			un->un_saved_throttle = un->un_throttle = 1;
7265 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7266 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
7267 		}
7268 
7269 		/*
7270 		 * Enable large transfers for SATA/SAS drives
7271 		 */
7272 		if (SD_IS_SERIAL(un)) {
7273 			un->un_max_xfer_size =
7274 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7275 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7276 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7277 			    "sd_unit_attach: un:0x%p max transfer "
7278 			    "size=0x%x\n", un, un->un_max_xfer_size);
7279 
7280 		}
7281 
7282 		/* Setup or tear down default wide operations for disks */
7283 
7284 		/*
7285 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
7286 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
7287 		 * system and be set to different values. In the future this
7288 		 * code may need to be updated when the ssd module is
7289 		 * obsoleted and removed from the system. (4299588)
7290 		 */
7291 		if (SD_IS_PARALLEL_SCSI(un) &&
7292 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
7293 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
7294 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7295 			    1, 1) == 1) {
7296 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7297 				    "sd_unit_attach: un:0x%p Wide Transfer "
7298 				    "enabled\n", un);
7299 			}
7300 
7301 			/*
7302 			 * If tagged queuing has also been enabled, then
7303 			 * enable large xfers
7304 			 */
7305 			if (un->un_saved_throttle == sd_max_throttle) {
7306 				un->un_max_xfer_size =
7307 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7308 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7309 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7310 				    "sd_unit_attach: un:0x%p max transfer "
7311 				    "size=0x%x\n", un, un->un_max_xfer_size);
7312 			}
7313 		} else {
7314 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7315 			    0, 1) == 1) {
7316 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7317 				    "sd_unit_attach: un:0x%p "
7318 				    "Wide Transfer disabled\n", un);
7319 			}
7320 		}
7321 	} else {
7322 		un->un_tagflags = FLAG_STAG;
7323 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
7324 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
7325 	}
7326 
7327 	/*
7328 	 * If this target supports LUN reset, try to enable it.
7329 	 */
7330 	if (un->un_f_lun_reset_enabled) {
7331 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
7332 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7333 			    "un:0x%p lun_reset capability set\n", un);
7334 		} else {
7335 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7336 			    "un:0x%p lun-reset capability not set\n", un);
7337 		}
7338 	}
7339 
7340 	/*
7341 	 * Adjust the maximum transfer size. This is to fix
7342 	 * the problem of partial DMA support on SPARC. Some
7343 	 * HBA driver, like aac, has very small dma_attr_maxxfer
7344 	 * size, which requires partial DMA support on SPARC.
7345 	 * In the future the SPARC pci nexus driver may solve
7346 	 * the problem instead of this fix.
7347 	 */
7348 	max_xfer_size = scsi_ifgetcap(SD_ADDRESS(un), "dma-max", 1);
7349 	if ((max_xfer_size > 0) && (max_xfer_size < un->un_max_xfer_size)) {
7350 		/* We need DMA partial even on sparc to ensure sddump() works */
7351 		un->un_max_xfer_size = max_xfer_size;
7352 		if (un->un_partial_dma_supported == 0)
7353 			un->un_partial_dma_supported = 1;
7354 	}
7355 	if (ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7356 	    DDI_PROP_DONTPASS, "buf_break", 0) == 1) {
7357 		if (ddi_xbuf_attr_setup_brk(un->un_xbuf_attr,
7358 		    un->un_max_xfer_size) == 1) {
7359 			un->un_buf_breakup_supported = 1;
7360 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7361 			    "un:0x%p Buf breakup enabled\n", un);
7362 		}
7363 	}
7364 
7365 	/*
7366 	 * Set PKT_DMA_PARTIAL flag.
7367 	 */
7368 	if (un->un_partial_dma_supported == 1) {
7369 		un->un_pkt_flags = PKT_DMA_PARTIAL;
7370 	} else {
7371 		un->un_pkt_flags = 0;
7372 	}
7373 
7374 	/* Initialize sd_ssc_t for internal uscsi commands */
7375 	ssc = sd_ssc_init(un);
7376 	scsi_fm_init(devp);
7377 
7378 	/*
7379 	 * Allocate memory for SCSI FMA stuffs.
7380 	 */
7381 	un->un_fm_private =
7382 	    kmem_zalloc(sizeof (struct sd_fm_internal), KM_SLEEP);
7383 	sfip = (struct sd_fm_internal *)un->un_fm_private;
7384 	sfip->fm_ssc.ssc_uscsi_cmd = &sfip->fm_ucmd;
7385 	sfip->fm_ssc.ssc_uscsi_info = &sfip->fm_uinfo;
7386 	sfip->fm_ssc.ssc_un = un;
7387 
7388 	if (ISCD(un) ||
7389 	    un->un_f_has_removable_media ||
7390 	    devp->sd_fm_capable == DDI_FM_NOT_CAPABLE) {
7391 		/*
7392 		 * We don't touch CDROM or the DDI_FM_NOT_CAPABLE device.
7393 		 * Their log are unchanged.
7394 		 */
7395 		sfip->fm_log_level = SD_FM_LOG_NSUP;
7396 	} else {
7397 		/*
7398 		 * If enter here, it should be non-CDROM and FM-capable
7399 		 * device, and it will not keep the old scsi_log as before
7400 		 * in /var/adm/messages. However, the property
7401 		 * "fm-scsi-log" will control whether the FM telemetry will
7402 		 * be logged in /var/adm/messages.
7403 		 */
7404 		int fm_scsi_log;
7405 		fm_scsi_log = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7406 		    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "fm-scsi-log", 0);
7407 
7408 		if (fm_scsi_log)
7409 			sfip->fm_log_level = SD_FM_LOG_EREPORT;
7410 		else
7411 			sfip->fm_log_level = SD_FM_LOG_SILENT;
7412 	}
7413 
7414 	/*
7415 	 * At this point in the attach, we have enough info in the
7416 	 * soft state to be able to issue commands to the target.
7417 	 *
7418 	 * All command paths used below MUST issue their commands as
7419 	 * SD_PATH_DIRECT. This is important as intermediate layers
7420 	 * are not all initialized yet (such as PM).
7421 	 */
7422 
7423 	/*
7424 	 * Send a TEST UNIT READY command to the device. This should clear
7425 	 * any outstanding UNIT ATTENTION that may be present.
7426 	 *
7427 	 * Note: Don't check for success, just track if there is a reservation,
7428 	 * this is a throw away command to clear any unit attentions.
7429 	 *
7430 	 * Note: This MUST be the first command issued to the target during
7431 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
7432 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
7433 	 * with attempts at spinning up a device with no media.
7434 	 */
7435 	status = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
7436 	if (status != 0) {
7437 		if (status == EACCES)
7438 			reservation_flag = SD_TARGET_IS_RESERVED;
7439 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7440 	}
7441 
7442 	/*
7443 	 * If the device is NOT a removable media device, attempt to spin
7444 	 * it up (using the START_STOP_UNIT command) and read its capacity
7445 	 * (using the READ CAPACITY command).  Note, however, that either
7446 	 * of these could fail and in some cases we would continue with
7447 	 * the attach despite the failure (see below).
7448 	 */
7449 	if (un->un_f_descr_format_supported) {
7450 
7451 		switch (sd_spin_up_unit(ssc)) {
7452 		case 0:
7453 			/*
7454 			 * Spin-up was successful; now try to read the
7455 			 * capacity.  If successful then save the results
7456 			 * and mark the capacity & lbasize as valid.
7457 			 */
7458 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7459 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
7460 
7461 			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
7462 			    &lbasize, SD_PATH_DIRECT);
7463 
7464 			switch (status) {
7465 			case 0: {
7466 				if (capacity > DK_MAX_BLOCKS) {
7467 					if ((capacity + 1) >
7468 					    SD_GROUP1_MAX_ADDRESS) {
7469 						/*
7470 						 * Enable descriptor format
7471 						 * sense data so that we can
7472 						 * get 64 bit sense data
7473 						 * fields.
7474 						 */
7475 						sd_enable_descr_sense(ssc);
7476 					}
7477 				}
7478 
7479 				/*
7480 				 * Here it's not necessary to check the case:
7481 				 * the capacity of the device is bigger than
7482 				 * what the max hba cdb can support. Because
7483 				 * sd_send_scsi_READ_CAPACITY will retrieve
7484 				 * the capacity by sending USCSI command, which
7485 				 * is constrained by the max hba cdb. Actually,
7486 				 * sd_send_scsi_READ_CAPACITY will return
7487 				 * EINVAL when using bigger cdb than required
7488 				 * cdb length. Will handle this case in
7489 				 * "case EINVAL".
7490 				 */
7491 
7492 				/*
7493 				 * The following relies on
7494 				 * sd_send_scsi_READ_CAPACITY never
7495 				 * returning 0 for capacity and/or lbasize.
7496 				 */
7497 				sd_update_block_info(un, lbasize, capacity);
7498 
7499 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7500 				    "sd_unit_attach: un:0x%p capacity = %ld "
7501 				    "blocks; lbasize= %ld.\n", un,
7502 				    un->un_blockcount, un->un_tgt_blocksize);
7503 
7504 				break;
7505 			}
7506 			case EINVAL:
7507 				/*
7508 				 * In the case where the max-cdb-length property
7509 				 * is smaller than the required CDB length for
7510 				 * a SCSI device, a target driver can fail to
7511 				 * attach to that device.
7512 				 */
7513 				scsi_log(SD_DEVINFO(un),
7514 				    sd_label, CE_WARN,
7515 				    "disk capacity is too large "
7516 				    "for current cdb length");
7517 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7518 
7519 				goto spinup_failed;
7520 			case EACCES:
7521 				/*
7522 				 * Should never get here if the spin-up
7523 				 * succeeded, but code it in anyway.
7524 				 * From here, just continue with the attach...
7525 				 */
7526 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7527 				    "sd_unit_attach: un:0x%p "
7528 				    "sd_send_scsi_READ_CAPACITY "
7529 				    "returned reservation conflict\n", un);
7530 				reservation_flag = SD_TARGET_IS_RESERVED;
7531 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7532 				break;
7533 			default:
7534 				/*
7535 				 * Likewise, should never get here if the
7536 				 * spin-up succeeded. Just continue with
7537 				 * the attach...
7538 				 */
7539 				if (status == EIO)
7540 					sd_ssc_assessment(ssc,
7541 					    SD_FMT_STATUS_CHECK);
7542 				else
7543 					sd_ssc_assessment(ssc,
7544 					    SD_FMT_IGNORE);
7545 				break;
7546 			}
7547 			break;
7548 		case EACCES:
7549 			/*
7550 			 * Device is reserved by another host.  In this case
7551 			 * we could not spin it up or read the capacity, but
7552 			 * we continue with the attach anyway.
7553 			 */
7554 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7555 			    "sd_unit_attach: un:0x%p spin-up reservation "
7556 			    "conflict.\n", un);
7557 			reservation_flag = SD_TARGET_IS_RESERVED;
7558 			break;
7559 		default:
7560 			/* Fail the attach if the spin-up failed. */
7561 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7562 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
7563 			goto spinup_failed;
7564 		}
7565 
7566 	}
7567 
7568 	/*
7569 	 * Check to see if this is a MMC drive
7570 	 */
7571 	if (ISCD(un)) {
7572 		sd_set_mmc_caps(ssc);
7573 	}
7574 
7575 	/*
7576 	 * Add a zero-length attribute to tell the world we support
7577 	 * kernel ioctls (for layered drivers)
7578 	 */
7579 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7580 	    DDI_KERNEL_IOCTL, NULL, 0);
7581 
7582 	/*
7583 	 * Add a boolean property to tell the world we support
7584 	 * the B_FAILFAST flag (for layered drivers)
7585 	 */
7586 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7587 	    "ddi-failfast-supported", NULL, 0);
7588 
7589 	/*
7590 	 * Initialize power management
7591 	 */
7592 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
7593 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
7594 	sd_setup_pm(ssc, devi);
7595 	if (un->un_f_pm_is_enabled == FALSE) {
7596 		/*
7597 		 * For performance, point to a jump table that does
7598 		 * not include pm.
7599 		 * The direct and priority chains don't change with PM.
7600 		 *
7601 		 * Note: this is currently done based on individual device
7602 		 * capabilities. When an interface for determining system
7603 		 * power enabled state becomes available, or when additional
7604 		 * layers are added to the command chain, these values will
7605 		 * have to be re-evaluated for correctness.
7606 		 */
7607 		if (un->un_f_non_devbsize_supported) {
7608 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
7609 		} else {
7610 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
7611 		}
7612 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
7613 	}
7614 
7615 	/*
7616 	 * This property is set to 0 by HA software to avoid retries
7617 	 * on a reserved disk. (The preferred property name is
7618 	 * "retry-on-reservation-conflict") (1189689)
7619 	 *
7620 	 * Note: The use of a global here can have unintended consequences. A
7621 	 * per instance variable is preferable to match the capabilities of
7622 	 * different underlying hba's (4402600)
7623 	 */
7624 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
7625 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
7626 	    sd_retry_on_reservation_conflict);
7627 	if (sd_retry_on_reservation_conflict != 0) {
7628 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
7629 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
7630 		    sd_retry_on_reservation_conflict);
7631 	}
7632 
7633 	/* Set up options for QFULL handling. */
7634 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7635 	    "qfull-retries", -1)) != -1) {
7636 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
7637 		    rval, 1);
7638 	}
7639 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7640 	    "qfull-retry-interval", -1)) != -1) {
7641 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
7642 		    rval, 1);
7643 	}
7644 
7645 	/*
7646 	 * This just prints a message that announces the existence of the
7647 	 * device. The message is always printed in the system logfile, but
7648 	 * only appears on the console if the system is booted with the
7649 	 * -v (verbose) argument.
7650 	 */
7651 	ddi_report_dev(devi);
7652 
7653 	un->un_mediastate = DKIO_NONE;
7654 
7655 	/*
7656 	 * Check Block Device Characteristics VPD.
7657 	 */
7658 	sd_check_bdc_vpd(ssc);
7659 
7660 	/*
7661 	 * Check whether the drive is in emulation mode.
7662 	 */
7663 	sd_check_emulation_mode(ssc);
7664 
7665 	cmlb_alloc_handle(&un->un_cmlbhandle);
7666 
7667 #if defined(__x86)
7668 	/*
7669 	 * On x86, compensate for off-by-1 legacy error
7670 	 */
7671 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
7672 	    (lbasize == un->un_sys_blocksize))
7673 		offbyone = CMLB_OFF_BY_ONE;
7674 #endif
7675 
7676 	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
7677 	    VOID2BOOLEAN(un->un_f_has_removable_media != 0),
7678 	    VOID2BOOLEAN(un->un_f_is_hotpluggable != 0),
7679 	    un->un_node_type, offbyone, un->un_cmlbhandle,
7680 	    (void *)SD_PATH_DIRECT) != 0) {
7681 		goto cmlb_attach_failed;
7682 	}
7683 
7684 
7685 	/*
7686 	 * Read and validate the device's geometry (ie, disk label)
7687 	 * A new unformatted drive will not have a valid geometry, but
7688 	 * the driver needs to successfully attach to this device so
7689 	 * the drive can be formatted via ioctls.
7690 	 */
7691 	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
7692 	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
7693 
7694 	mutex_enter(SD_MUTEX(un));
7695 
7696 	/*
7697 	 * Read and initialize the devid for the unit.
7698 	 */
7699 	if (un->un_f_devid_supported) {
7700 		sd_register_devid(ssc, devi, reservation_flag);
7701 	}
7702 	mutex_exit(SD_MUTEX(un));
7703 
7704 
7705 	if (un->un_f_opt_disable_cache == TRUE) {
7706 		/*
7707 		 * Disable both read cache and write cache.  This is
7708 		 * the historic behavior of the keywords in the config file.
7709 		 */
7710 		if (sd_cache_control(ssc, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
7711 		    0) {
7712 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7713 			    "sd_unit_attach: un:0x%p Could not disable "
7714 			    "caching", un);
7715 			goto devid_failed;
7716 		}
7717 	}
7718 
7719 	/*
7720 	 * Check the value of the WCE bit and if it's allowed to be changed,
7721 	 * set un_f_write_cache_enabled and un_f_cache_mode_changeable
7722 	 * accordingly.
7723 	 */
7724 	(void) sd_get_write_cache_enabled(ssc, &wc_enabled);
7725 	sd_get_write_cache_changeable(ssc, &wc_changeable);
7726 	mutex_enter(SD_MUTEX(un));
7727 	un->un_f_write_cache_enabled = (wc_enabled != 0);
7728 	un->un_f_cache_mode_changeable = (wc_changeable != 0);
7729 	mutex_exit(SD_MUTEX(un));
7730 
7731 	if ((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR &&
7732 	    un->un_tgt_blocksize != DEV_BSIZE) ||
7733 	    un->un_f_enable_rmw) {
7734 		if (!(un->un_wm_cache)) {
7735 			(void) snprintf(name_str, sizeof (name_str),
7736 			    "%s%d_cache",
7737 			    ddi_driver_name(SD_DEVINFO(un)),
7738 			    ddi_get_instance(SD_DEVINFO(un)));
7739 			un->un_wm_cache = kmem_cache_create(
7740 			    name_str, sizeof (struct sd_w_map),
7741 			    8, sd_wm_cache_constructor,
7742 			    sd_wm_cache_destructor, NULL,
7743 			    (void *)un, NULL, 0);
7744 			if (!(un->un_wm_cache)) {
7745 				goto wm_cache_failed;
7746 			}
7747 		}
7748 	}
7749 
7750 	/*
7751 	 * Check the value of the NV_SUP bit and set
7752 	 * un_f_suppress_cache_flush accordingly.
7753 	 */
7754 	sd_get_nv_sup(ssc);
7755 
7756 	/*
7757 	 * Find out what type of reservation this disk supports.
7758 	 */
7759 	status = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS, 0, NULL);
7760 
7761 	switch (status) {
7762 	case 0:
7763 		/*
7764 		 * SCSI-3 reservations are supported.
7765 		 */
7766 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7767 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7768 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
7769 		break;
7770 	case ENOTSUP:
7771 		/*
7772 		 * The PERSISTENT RESERVE IN command would not be recognized by
7773 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
7774 		 */
7775 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7776 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
7777 		un->un_reservation_type = SD_SCSI2_RESERVATION;
7778 
7779 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7780 		break;
7781 	default:
7782 		/*
7783 		 * default to SCSI-3 reservations
7784 		 */
7785 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7786 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
7787 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7788 
7789 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7790 		break;
7791 	}
7792 
7793 	/*
7794 	 * Set the pstat and error stat values here, so data obtained during the
7795 	 * previous attach-time routines is available.
7796 	 *
7797 	 * Note: This is a critical sequence that needs to be maintained:
7798 	 *	1) Instantiate the kstats before any routines using the iopath
7799 	 *	   (i.e. sd_send_scsi_cmd).
7800 	 *	2) Initialize the error stats (sd_set_errstats) and partition
7801 	 *	   stats (sd_set_pstats)here, following
7802 	 *	   cmlb_validate_geometry(), sd_register_devid(), and
7803 	 *	   sd_cache_control().
7804 	 */
7805 
7806 	if (un->un_f_pkstats_enabled && geom_label_valid) {
7807 		sd_set_pstats(un);
7808 		SD_TRACE(SD_LOG_IO_PARTITION, un,
7809 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
7810 	}
7811 
7812 	sd_set_errstats(un);
7813 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7814 	    "sd_unit_attach: un:0x%p errstats set\n", un);
7815 
7816 	sd_setup_blk_limits(ssc);
7817 
7818 	/*
7819 	 * After successfully attaching an instance, we record the information
7820 	 * of how many luns have been attached on the relative target and
7821 	 * controller for parallel SCSI. This information is used when sd tries
7822 	 * to set the tagged queuing capability in HBA.
7823 	 */
7824 	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7825 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
7826 	}
7827 
7828 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7829 	    "sd_unit_attach: un:0x%p exit success\n", un);
7830 
7831 	/* Uninitialize sd_ssc_t pointer */
7832 	sd_ssc_fini(ssc);
7833 
7834 	return (DDI_SUCCESS);
7835 
7836 	/*
7837 	 * An error occurred during the attach; clean up & return failure.
7838 	 */
7839 wm_cache_failed:
7840 devid_failed:
7841 	ddi_remove_minor_node(devi, NULL);
7842 
7843 cmlb_attach_failed:
7844 	/*
7845 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7846 	 */
7847 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7848 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7849 
7850 	/*
7851 	 * Refer to the comments of setting tagged-qing in the beginning of
7852 	 * sd_unit_attach. We can only disable tagged queuing when there is
7853 	 * no lun attached on the target.
7854 	 */
7855 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
7856 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7857 	}
7858 
7859 	if (un->un_f_is_fibre == FALSE) {
7860 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7861 	}
7862 
7863 spinup_failed:
7864 
7865 	/* Uninitialize sd_ssc_t pointer */
7866 	sd_ssc_fini(ssc);
7867 
7868 	mutex_enter(SD_MUTEX(un));
7869 
7870 	/* Deallocate SCSI FMA memory spaces */
7871 	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
7872 
7873 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
7874 	if (un->un_direct_priority_timeid != NULL) {
7875 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7876 		un->un_direct_priority_timeid = NULL;
7877 		mutex_exit(SD_MUTEX(un));
7878 		(void) untimeout(temp_id);
7879 		mutex_enter(SD_MUTEX(un));
7880 	}
7881 
7882 	/* Cancel any pending start/stop timeouts */
7883 	if (un->un_startstop_timeid != NULL) {
7884 		timeout_id_t temp_id = un->un_startstop_timeid;
7885 		un->un_startstop_timeid = NULL;
7886 		mutex_exit(SD_MUTEX(un));
7887 		(void) untimeout(temp_id);
7888 		mutex_enter(SD_MUTEX(un));
7889 	}
7890 
7891 	/* Cancel any pending reset-throttle timeouts */
7892 	if (un->un_reset_throttle_timeid != NULL) {
7893 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7894 		un->un_reset_throttle_timeid = NULL;
7895 		mutex_exit(SD_MUTEX(un));
7896 		(void) untimeout(temp_id);
7897 		mutex_enter(SD_MUTEX(un));
7898 	}
7899 
7900 	/* Cancel rmw warning message timeouts */
7901 	if (un->un_rmw_msg_timeid != NULL) {
7902 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
7903 		un->un_rmw_msg_timeid = NULL;
7904 		mutex_exit(SD_MUTEX(un));
7905 		(void) untimeout(temp_id);
7906 		mutex_enter(SD_MUTEX(un));
7907 	}
7908 
7909 	/* Cancel any pending retry timeouts */
7910 	if (un->un_retry_timeid != NULL) {
7911 		timeout_id_t temp_id = un->un_retry_timeid;
7912 		un->un_retry_timeid = NULL;
7913 		mutex_exit(SD_MUTEX(un));
7914 		(void) untimeout(temp_id);
7915 		mutex_enter(SD_MUTEX(un));
7916 	}
7917 
7918 	/* Cancel any pending delayed cv broadcast timeouts */
7919 	if (un->un_dcvb_timeid != NULL) {
7920 		timeout_id_t temp_id = un->un_dcvb_timeid;
7921 		un->un_dcvb_timeid = NULL;
7922 		mutex_exit(SD_MUTEX(un));
7923 		(void) untimeout(temp_id);
7924 		mutex_enter(SD_MUTEX(un));
7925 	}
7926 
7927 	mutex_exit(SD_MUTEX(un));
7928 
7929 	/* There should not be any in-progress I/O so ASSERT this check */
7930 	ASSERT(un->un_ncmds_in_transport == 0);
7931 	ASSERT(un->un_ncmds_in_driver == 0);
7932 
7933 	/* Do not free the softstate if the callback routine is active */
7934 	sd_sync_with_callback(un);
7935 
7936 	/*
7937 	 * Partition stats apparently are not used with removables. These would
7938 	 * not have been created during attach, so no need to clean them up...
7939 	 */
7940 	if (un->un_errstats != NULL) {
7941 		kstat_delete(un->un_errstats);
7942 		un->un_errstats = NULL;
7943 	}
7944 
7945 create_errstats_failed:
7946 
7947 	if (un->un_stats != NULL) {
7948 		kstat_delete(un->un_stats);
7949 		un->un_stats = NULL;
7950 	}
7951 
7952 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7953 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7954 
7955 	ddi_prop_remove_all(devi);
7956 	cv_destroy(&un->un_state_cv);
7957 
7958 	sd_free_rqs(un);
7959 
7960 alloc_rqs_failed:
7961 
7962 	devp->sd_private = NULL;
7963 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
7964 
7965 	/*
7966 	 * Note: the man pages are unclear as to whether or not doing a
7967 	 * ddi_soft_state_free(sd_state, instance) is the right way to
7968 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
7969 	 * ddi_get_soft_state() fails.  The implication seems to be
7970 	 * that the get_soft_state cannot fail if the zalloc succeeds.
7971 	 */
7972 #ifndef XPV_HVM_DRIVER
7973 	ddi_soft_state_free(sd_state, instance);
7974 #endif /* !XPV_HVM_DRIVER */
7975 
7976 probe_failed:
7977 	scsi_unprobe(devp);
7978 
7979 	return (DDI_FAILURE);
7980 }
7981 
7982 
7983 /*
7984  *    Function: sd_unit_detach
7985  *
7986  * Description: Performs DDI_DETACH processing for sddetach().
7987  *
7988  * Return Code: DDI_SUCCESS
7989  *		DDI_FAILURE
7990  *
7991  *     Context: Kernel thread context
7992  */
7993 
7994 static int
sd_unit_detach(dev_info_t * devi)7995 sd_unit_detach(dev_info_t *devi)
7996 {
7997 	struct scsi_device	*devp;
7998 	struct sd_lun		*un;
7999 	int			i;
8000 	int			tgt;
8001 	dev_t			dev;
8002 	dev_info_t		*pdip = ddi_get_parent(devi);
8003 	int			instance = ddi_get_instance(devi);
8004 
8005 	/*
8006 	 * Fail the detach for any of the following:
8007 	 *  - Unable to get the sd_lun struct for the instance
8008 	 *  - There is pending I/O
8009 	 */
8010 	devp = ddi_get_driver_private(devi);
8011 	if ((devp == NULL) ||
8012 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8013 	    (un->un_ncmds_in_driver != 0)) {
8014 		return (DDI_FAILURE);
8015 	}
8016 
8017 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8018 
8019 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8020 	    SCSI_ADDR_PROP_TARGET, -1);
8021 
8022 	dev = sd_make_device(SD_DEVINFO(un));
8023 
8024 #ifndef lint
8025 	_NOTE(COMPETING_THREADS_NOW);
8026 #endif
8027 
8028 	mutex_enter(SD_MUTEX(un));
8029 
8030 	/*
8031 	 * Fail the detach if there are any outstanding layered
8032 	 * opens on this device.
8033 	 */
8034 	for (i = 0; i < NDKMAP; i++) {
8035 		if (un->un_ocmap.lyropen[i] != 0) {
8036 			goto err_notclosed;
8037 		}
8038 	}
8039 
8040 	/*
8041 	 * Verify there are NO outstanding commands issued to this device.
8042 	 * ie, un_ncmds_in_transport == 0.
8043 	 * It's possible to have outstanding commands through the physio
8044 	 * code path, even though everything's closed.
8045 	 */
8046 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8047 	    (un->un_direct_priority_timeid != NULL) ||
8048 	    (un->un_state == SD_STATE_RWAIT)) {
8049 		mutex_exit(SD_MUTEX(un));
8050 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8051 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
8052 		goto err_stillbusy;
8053 	}
8054 
8055 	/*
8056 	 * If we have the device reserved, release the reservation.
8057 	 */
8058 	if ((un->un_resvd_status & SD_RESERVE) &&
8059 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
8060 		mutex_exit(SD_MUTEX(un));
8061 		/*
8062 		 * Note: sd_reserve_release sends a command to the device
8063 		 * via the sd_ioctlcmd() path, and can sleep.
8064 		 */
8065 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
8066 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8067 			    "sd_dr_detach: Cannot release reservation \n");
8068 		}
8069 	} else {
8070 		mutex_exit(SD_MUTEX(un));
8071 	}
8072 
8073 	/*
8074 	 * Untimeout any reserve recover, throttle reset, restart unit
8075 	 * and delayed broadcast timeout threads. Protect the timeout pointer
8076 	 * from getting nulled by their callback functions.
8077 	 */
8078 	mutex_enter(SD_MUTEX(un));
8079 	if (un->un_resvd_timeid != NULL) {
8080 		timeout_id_t temp_id = un->un_resvd_timeid;
8081 		un->un_resvd_timeid = NULL;
8082 		mutex_exit(SD_MUTEX(un));
8083 		(void) untimeout(temp_id);
8084 		mutex_enter(SD_MUTEX(un));
8085 	}
8086 
8087 	if (un->un_reset_throttle_timeid != NULL) {
8088 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8089 		un->un_reset_throttle_timeid = NULL;
8090 		mutex_exit(SD_MUTEX(un));
8091 		(void) untimeout(temp_id);
8092 		mutex_enter(SD_MUTEX(un));
8093 	}
8094 
8095 	if (un->un_startstop_timeid != NULL) {
8096 		timeout_id_t temp_id = un->un_startstop_timeid;
8097 		un->un_startstop_timeid = NULL;
8098 		mutex_exit(SD_MUTEX(un));
8099 		(void) untimeout(temp_id);
8100 		mutex_enter(SD_MUTEX(un));
8101 	}
8102 
8103 	if (un->un_rmw_msg_timeid != NULL) {
8104 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
8105 		un->un_rmw_msg_timeid = NULL;
8106 		mutex_exit(SD_MUTEX(un));
8107 		(void) untimeout(temp_id);
8108 		mutex_enter(SD_MUTEX(un));
8109 	}
8110 
8111 	if (un->un_dcvb_timeid != NULL) {
8112 		timeout_id_t temp_id = un->un_dcvb_timeid;
8113 		un->un_dcvb_timeid = NULL;
8114 		mutex_exit(SD_MUTEX(un));
8115 		(void) untimeout(temp_id);
8116 	} else {
8117 		mutex_exit(SD_MUTEX(un));
8118 	}
8119 
8120 	/* Remove any pending reservation reclaim requests for this device */
8121 	sd_rmv_resv_reclaim_req(dev);
8122 
8123 	mutex_enter(SD_MUTEX(un));
8124 
8125 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
8126 	if (un->un_direct_priority_timeid != NULL) {
8127 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8128 		un->un_direct_priority_timeid = NULL;
8129 		mutex_exit(SD_MUTEX(un));
8130 		(void) untimeout(temp_id);
8131 		mutex_enter(SD_MUTEX(un));
8132 	}
8133 
8134 	/* Cancel any active multi-host disk watch thread requests */
8135 	if (un->un_mhd_token != NULL) {
8136 		mutex_exit(SD_MUTEX(un));
8137 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
8138 		if (scsi_watch_request_terminate(un->un_mhd_token,
8139 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8140 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8141 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
8142 			/*
8143 			 * Note: We are returning here after having removed
8144 			 * some driver timeouts above. This is consistent with
8145 			 * the legacy implementation but perhaps the watch
8146 			 * terminate call should be made with the wait flag set.
8147 			 */
8148 			goto err_stillbusy;
8149 		}
8150 		mutex_enter(SD_MUTEX(un));
8151 		un->un_mhd_token = NULL;
8152 	}
8153 
8154 	if (un->un_swr_token != NULL) {
8155 		mutex_exit(SD_MUTEX(un));
8156 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
8157 		if (scsi_watch_request_terminate(un->un_swr_token,
8158 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8159 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8160 			    "sd_dr_detach: Cannot cancel swr watch request\n");
8161 			/*
8162 			 * Note: We are returning here after having removed
8163 			 * some driver timeouts above. This is consistent with
8164 			 * the legacy implementation but perhaps the watch
8165 			 * terminate call should be made with the wait flag set.
8166 			 */
8167 			goto err_stillbusy;
8168 		}
8169 		mutex_enter(SD_MUTEX(un));
8170 		un->un_swr_token = NULL;
8171 	}
8172 
8173 	mutex_exit(SD_MUTEX(un));
8174 
8175 	/*
8176 	 * Clear any scsi_reset_notifies. We clear the reset notifies
8177 	 * if we have not registered one.
8178 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
8179 	 */
8180 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
8181 	    sd_mhd_reset_notify_cb, (caddr_t)un);
8182 
8183 	/*
8184 	 * protect the timeout pointers from getting nulled by
8185 	 * their callback functions during the cancellation process.
8186 	 * In such a scenario untimeout can be invoked with a null value.
8187 	 */
8188 	_NOTE(NO_COMPETING_THREADS_NOW);
8189 
8190 	mutex_enter(&un->un_pm_mutex);
8191 	if (un->un_pm_idle_timeid != NULL) {
8192 		timeout_id_t temp_id = un->un_pm_idle_timeid;
8193 		un->un_pm_idle_timeid = NULL;
8194 		mutex_exit(&un->un_pm_mutex);
8195 
8196 		/*
8197 		 * Timeout is active; cancel it.
8198 		 * Note that it'll never be active on a device
8199 		 * that does not support PM therefore we don't
8200 		 * have to check before calling pm_idle_component.
8201 		 */
8202 		(void) untimeout(temp_id);
8203 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8204 		mutex_enter(&un->un_pm_mutex);
8205 	}
8206 
8207 	/*
8208 	 * Check whether there is already a timeout scheduled for power
8209 	 * management. If yes then don't lower the power here, that's.
8210 	 * the timeout handler's job.
8211 	 */
8212 	if (un->un_pm_timeid != NULL) {
8213 		timeout_id_t temp_id = un->un_pm_timeid;
8214 		un->un_pm_timeid = NULL;
8215 		mutex_exit(&un->un_pm_mutex);
8216 		/*
8217 		 * Timeout is active; cancel it.
8218 		 * Note that it'll never be active on a device
8219 		 * that does not support PM therefore we don't
8220 		 * have to check before calling pm_idle_component.
8221 		 */
8222 		(void) untimeout(temp_id);
8223 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8224 
8225 	} else {
8226 		mutex_exit(&un->un_pm_mutex);
8227 		if ((un->un_f_pm_is_enabled == TRUE) &&
8228 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_PM_STATE_STOPPED(un))
8229 		    != DDI_SUCCESS)) {
8230 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8231 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
8232 			/*
8233 			 * Fix for bug: 4297749, item # 13
8234 			 * The above test now includes a check to see if PM is
8235 			 * supported by this device before call
8236 			 * pm_lower_power().
8237 			 * Note, the following is not dead code. The call to
8238 			 * pm_lower_power above will generate a call back into
8239 			 * our sdpower routine which might result in a timeout
8240 			 * handler getting activated. Therefore the following
8241 			 * code is valid and necessary.
8242 			 */
8243 			mutex_enter(&un->un_pm_mutex);
8244 			if (un->un_pm_timeid != NULL) {
8245 				timeout_id_t temp_id = un->un_pm_timeid;
8246 				un->un_pm_timeid = NULL;
8247 				mutex_exit(&un->un_pm_mutex);
8248 				(void) untimeout(temp_id);
8249 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8250 			} else {
8251 				mutex_exit(&un->un_pm_mutex);
8252 			}
8253 		}
8254 	}
8255 
8256 	/*
8257 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8258 	 * Relocated here from above to be after the call to
8259 	 * pm_lower_power, which was getting errors.
8260 	 */
8261 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8262 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8263 
8264 	/*
8265 	 * Currently, tagged queuing is supported per target based by HBA.
8266 	 * Setting this per lun instance actually sets the capability of this
8267 	 * target in HBA, which affects those luns already attached on the
8268 	 * same target. So during detach, we can only disable this capability
8269 	 * only when this is the only lun left on this target. By doing
8270 	 * this, we assume a target has the same tagged queuing capability
8271 	 * for every lun. The condition can be removed when HBA is changed to
8272 	 * support per lun based tagged queuing capability.
8273 	 */
8274 	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
8275 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8276 	}
8277 
8278 	if (un->un_f_is_fibre == FALSE) {
8279 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8280 	}
8281 
8282 	/*
8283 	 * Remove any event callbacks, fibre only
8284 	 */
8285 	if (un->un_f_is_fibre == TRUE) {
8286 		if ((un->un_insert_event != NULL) &&
8287 		    (ddi_remove_event_handler(un->un_insert_cb_id) !=
8288 		    DDI_SUCCESS)) {
8289 			/*
8290 			 * Note: We are returning here after having done
8291 			 * substantial cleanup above. This is consistent
8292 			 * with the legacy implementation but this may not
8293 			 * be the right thing to do.
8294 			 */
8295 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8296 			    "sd_dr_detach: Cannot cancel insert event\n");
8297 			goto err_remove_event;
8298 		}
8299 		un->un_insert_event = NULL;
8300 
8301 		if ((un->un_remove_event != NULL) &&
8302 		    (ddi_remove_event_handler(un->un_remove_cb_id) !=
8303 		    DDI_SUCCESS)) {
8304 			/*
8305 			 * Note: We are returning here after having done
8306 			 * substantial cleanup above. This is consistent
8307 			 * with the legacy implementation but this may not
8308 			 * be the right thing to do.
8309 			 */
8310 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8311 			    "sd_dr_detach: Cannot cancel remove event\n");
8312 			goto err_remove_event;
8313 		}
8314 		un->un_remove_event = NULL;
8315 	}
8316 
8317 	/* Do not free the softstate if the callback routine is active */
8318 	sd_sync_with_callback(un);
8319 
8320 	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
8321 	cmlb_free_handle(&un->un_cmlbhandle);
8322 
8323 	/*
8324 	 * Clean up the soft state struct.
8325 	 * Cleanup is done in reverse order of allocs/inits.
8326 	 * At this point there should be no competing threads anymore.
8327 	 */
8328 
8329 	scsi_fm_fini(devp);
8330 
8331 	/*
8332 	 * Deallocate memory for SCSI FMA.
8333 	 */
8334 	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
8335 
8336 	/*
8337 	 * Unregister and free device id if it was not registered
8338 	 * by the transport.
8339 	 */
8340 	if (un->un_f_devid_transport_defined == FALSE)
8341 		ddi_devid_unregister(devi);
8342 
8343 	/*
8344 	 * free the devid structure if allocated before (by ddi_devid_init()
8345 	 * or ddi_devid_get()).
8346 	 */
8347 	if (un->un_devid) {
8348 		ddi_devid_free(un->un_devid);
8349 		un->un_devid = NULL;
8350 	}
8351 
8352 	/*
8353 	 * Destroy wmap cache if it exists.
8354 	 */
8355 	if (un->un_wm_cache != NULL) {
8356 		kmem_cache_destroy(un->un_wm_cache);
8357 		un->un_wm_cache = NULL;
8358 	}
8359 
8360 	/*
8361 	 * kstat cleanup is done in detach for all device types (4363169).
8362 	 * We do not want to fail detach if the device kstats are not deleted
8363 	 * since there is a confusion about the devo_refcnt for the device.
8364 	 * We just delete the kstats and let detach complete successfully.
8365 	 */
8366 	if (un->un_stats != NULL) {
8367 		kstat_delete(un->un_stats);
8368 		un->un_stats = NULL;
8369 	}
8370 	if (un->un_unmapstats != NULL) {
8371 		kstat_delete(un->un_unmapstats_ks);
8372 		un->un_unmapstats_ks = NULL;
8373 		un->un_unmapstats = NULL;
8374 	}
8375 	if (un->un_errstats != NULL) {
8376 		kstat_delete(un->un_errstats);
8377 		un->un_errstats = NULL;
8378 	}
8379 
8380 	/* Remove partition stats */
8381 	if (un->un_f_pkstats_enabled) {
8382 		for (i = 0; i < NSDMAP; i++) {
8383 			if (un->un_pstats[i] != NULL) {
8384 				kstat_delete(un->un_pstats[i]);
8385 				un->un_pstats[i] = NULL;
8386 			}
8387 		}
8388 	}
8389 
8390 	/* Remove xbuf registration */
8391 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8392 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8393 
8394 	/* Remove driver properties */
8395 	ddi_prop_remove_all(devi);
8396 
8397 	mutex_destroy(&un->un_pm_mutex);
8398 	cv_destroy(&un->un_pm_busy_cv);
8399 
8400 	cv_destroy(&un->un_wcc_cv);
8401 
8402 	/* Removable media condvar. */
8403 	cv_destroy(&un->un_state_cv);
8404 
8405 	/* Suspend/resume condvar. */
8406 	cv_destroy(&un->un_suspend_cv);
8407 	cv_destroy(&un->un_disk_busy_cv);
8408 
8409 	sd_free_rqs(un);
8410 
8411 	/* Free up soft state */
8412 	devp->sd_private = NULL;
8413 
8414 	bzero(un, sizeof (struct sd_lun));
8415 
8416 	ddi_soft_state_free(sd_state, instance);
8417 
8418 	/* This frees up the INQUIRY data associated with the device. */
8419 	scsi_unprobe(devp);
8420 
8421 	/*
8422 	 * After successfully detaching an instance, we update the information
8423 	 * of how many luns have been attached in the relative target and
8424 	 * controller for parallel SCSI. This information is used when sd tries
8425 	 * to set the tagged queuing capability in HBA.
8426 	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
8427 	 * check if the device is parallel SCSI. However, we don't need to
8428 	 * check here because we've already checked during attach. No device
8429 	 * that is not parallel SCSI is in the chain.
8430 	 */
8431 	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
8432 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
8433 	}
8434 
8435 	return (DDI_SUCCESS);
8436 
8437 err_notclosed:
8438 	mutex_exit(SD_MUTEX(un));
8439 
8440 err_stillbusy:
8441 	_NOTE(NO_COMPETING_THREADS_NOW);
8442 
8443 err_remove_event:
8444 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
8445 	return (DDI_FAILURE);
8446 }
8447 
8448 
8449 /*
8450  *    Function: sd_create_errstats
8451  *
8452  * Description: This routine instantiates the device error stats.
8453  *
8454  *		Note: During attach the stats are instantiated first so they are
8455  *		available for attach-time routines that utilize the driver
8456  *		iopath to send commands to the device. The stats are initialized
8457  *		separately so data obtained during some attach-time routines is
8458  *		available. (4362483)
8459  *
8460  *   Arguments: un - driver soft state (unit) structure
8461  *		instance - driver instance
8462  *
8463  *     Context: Kernel thread context
8464  */
8465 
8466 static void
sd_create_errstats(struct sd_lun * un,int instance)8467 sd_create_errstats(struct sd_lun *un, int instance)
8468 {
8469 	struct	sd_errstats	*stp;
8470 	char	kstatmodule_err[KSTAT_STRLEN];
8471 	char	kstatname[KSTAT_STRLEN];
8472 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
8473 
8474 	ASSERT(un != NULL);
8475 
8476 	if (un->un_errstats != NULL) {
8477 		return;
8478 	}
8479 
8480 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
8481 	    "%serr", sd_label);
8482 	(void) snprintf(kstatname, sizeof (kstatname),
8483 	    "%s%d,err", sd_label, instance);
8484 
8485 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
8486 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
8487 
8488 	if (un->un_errstats == NULL) {
8489 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8490 		    "sd_create_errstats: Failed kstat_create\n");
8491 		return;
8492 	}
8493 
8494 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
8495 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
8496 	    KSTAT_DATA_UINT32);
8497 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
8498 	    KSTAT_DATA_UINT32);
8499 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
8500 	    KSTAT_DATA_UINT32);
8501 	kstat_named_init(&stp->sd_vid,		"Vendor",
8502 	    KSTAT_DATA_CHAR);
8503 	kstat_named_init(&stp->sd_pid,		"Product",
8504 	    KSTAT_DATA_CHAR);
8505 	kstat_named_init(&stp->sd_revision,	"Revision",
8506 	    KSTAT_DATA_CHAR);
8507 	kstat_named_init(&stp->sd_serial,	"Serial No",
8508 	    KSTAT_DATA_CHAR);
8509 	kstat_named_init(&stp->sd_capacity,	"Size",
8510 	    KSTAT_DATA_ULONGLONG);
8511 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
8512 	    KSTAT_DATA_UINT32);
8513 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
8514 	    KSTAT_DATA_UINT32);
8515 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
8516 	    KSTAT_DATA_UINT32);
8517 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
8518 	    KSTAT_DATA_UINT32);
8519 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
8520 	    KSTAT_DATA_UINT32);
8521 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
8522 	    KSTAT_DATA_UINT32);
8523 
8524 	un->un_errstats->ks_private = un;
8525 	un->un_errstats->ks_update  = nulldev;
8526 
8527 	kstat_install(un->un_errstats);
8528 }
8529 
8530 
8531 /*
8532  *    Function: sd_set_errstats
8533  *
8534  * Description: This routine sets the value of the vendor id, product id,
8535  *		revision, serial number, and capacity device error stats.
8536  *
8537  *		Note: During attach the stats are instantiated first so they are
8538  *		available for attach-time routines that utilize the driver
8539  *		iopath to send commands to the device. The stats are initialized
8540  *		separately so data obtained during some attach-time routines is
8541  *		available. (4362483)
8542  *
8543  *   Arguments: un - driver soft state (unit) structure
8544  *
8545  *     Context: Kernel thread context
8546  */
8547 
8548 static void
sd_set_errstats(struct sd_lun * un)8549 sd_set_errstats(struct sd_lun *un)
8550 {
8551 	struct	sd_errstats	*stp;
8552 	char			*sn;
8553 
8554 	ASSERT(un != NULL);
8555 	ASSERT(un->un_errstats != NULL);
8556 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
8557 	ASSERT(stp != NULL);
8558 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
8559 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
8560 	(void) strncpy(stp->sd_revision.value.c,
8561 	    un->un_sd->sd_inq->inq_revision, 4);
8562 
8563 	/*
8564 	 * All the errstats are persistent across detach/attach,
8565 	 * so reset all the errstats here in case of the hot
8566 	 * replacement of disk drives, except for not changed
8567 	 * Sun qualified drives.
8568 	 */
8569 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
8570 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8571 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
8572 		stp->sd_softerrs.value.ui32 = 0;
8573 		stp->sd_harderrs.value.ui32 = 0;
8574 		stp->sd_transerrs.value.ui32 = 0;
8575 		stp->sd_rq_media_err.value.ui32 = 0;
8576 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
8577 		stp->sd_rq_nodev_err.value.ui32 = 0;
8578 		stp->sd_rq_recov_err.value.ui32 = 0;
8579 		stp->sd_rq_illrq_err.value.ui32 = 0;
8580 		stp->sd_rq_pfa_err.value.ui32 = 0;
8581 	}
8582 
8583 	/*
8584 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
8585 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
8586 	 * (4376302))
8587 	 */
8588 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
8589 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8590 		    sizeof (SD_INQUIRY(un)->inq_serial));
8591 	} else {
8592 		/*
8593 		 * Set the "Serial No" kstat for non-Sun qualified drives
8594 		 */
8595 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, SD_DEVINFO(un),
8596 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
8597 		    INQUIRY_SERIAL_NO, &sn) == DDI_SUCCESS) {
8598 			(void) strlcpy(stp->sd_serial.value.c, sn,
8599 			    sizeof (stp->sd_serial.value.c));
8600 			ddi_prop_free(sn);
8601 		}
8602 	}
8603 
8604 	if (un->un_f_blockcount_is_valid != TRUE) {
8605 		/*
8606 		 * Set capacity error stat to 0 for no media. This ensures
8607 		 * a valid capacity is displayed in response to 'iostat -E'
8608 		 * when no media is present in the device.
8609 		 */
8610 		stp->sd_capacity.value.ui64 = 0;
8611 	} else {
8612 		/*
8613 		 * Multiply un_blockcount by un->un_sys_blocksize to get
8614 		 * capacity.
8615 		 *
8616 		 * Note: for non-512 blocksize devices "un_blockcount" has been
8617 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
8618 		 * (un_tgt_blocksize / un->un_sys_blocksize).
8619 		 */
8620 		stp->sd_capacity.value.ui64 = (uint64_t)
8621 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
8622 	}
8623 }
8624 
8625 
8626 /*
8627  *    Function: sd_set_pstats
8628  *
8629  * Description: This routine instantiates and initializes the partition
8630  *              stats for each partition with more than zero blocks.
8631  *		(4363169)
8632  *
8633  *   Arguments: un - driver soft state (unit) structure
8634  *
8635  *     Context: Kernel thread context
8636  */
8637 
8638 static void
sd_set_pstats(struct sd_lun * un)8639 sd_set_pstats(struct sd_lun *un)
8640 {
8641 	char	kstatname[KSTAT_STRLEN];
8642 	int	instance;
8643 	int	i;
8644 	diskaddr_t	nblks = 0;
8645 	char	*partname = NULL;
8646 
8647 	ASSERT(un != NULL);
8648 
8649 	instance = ddi_get_instance(SD_DEVINFO(un));
8650 
8651 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
8652 	for (i = 0; i < NSDMAP; i++) {
8653 
8654 		if (cmlb_partinfo(un->un_cmlbhandle, i,
8655 		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
8656 			continue;
8657 		mutex_enter(SD_MUTEX(un));
8658 
8659 		if ((un->un_pstats[i] == NULL) &&
8660 		    (nblks != 0)) {
8661 
8662 			(void) snprintf(kstatname, sizeof (kstatname),
8663 			    "%s%d,%s", sd_label, instance,
8664 			    partname);
8665 
8666 			un->un_pstats[i] = kstat_create(sd_label,
8667 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
8668 			    1, KSTAT_FLAG_PERSISTENT);
8669 			if (un->un_pstats[i] != NULL) {
8670 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
8671 				kstat_install(un->un_pstats[i]);
8672 			}
8673 		}
8674 		mutex_exit(SD_MUTEX(un));
8675 	}
8676 }
8677 
8678 /*
8679  * Values related to caching mode page depending on whether the unit is ATAPI.
8680  */
8681 #define	SDC_CDB_GROUP(un) ((un->un_f_cfg_is_atapi == TRUE) ? \
8682 	CDB_GROUP1 : CDB_GROUP0)
8683 #define	SDC_HDRLEN(un) ((un->un_f_cfg_is_atapi == TRUE) ? \
8684 	MODE_HEADER_LENGTH_GRP2 : MODE_HEADER_LENGTH)
8685 /*
8686  * Use mode_cache_scsi3 to ensure we get all of the mode sense data, otherwise
8687  * the mode select will fail (mode_cache_scsi3 is a superset of mode_caching).
8688  */
8689 #define	SDC_BUFLEN(un) (SDC_HDRLEN(un) + MODE_BLK_DESC_LENGTH + \
8690 	sizeof (struct mode_cache_scsi3))
8691 
8692 static int
sd_get_caching_mode_page(sd_ssc_t * ssc,uchar_t page_control,uchar_t ** header,int * bdlen)8693 sd_get_caching_mode_page(sd_ssc_t *ssc, uchar_t page_control, uchar_t **header,
8694     int *bdlen)
8695 {
8696 	struct sd_lun	*un = ssc->ssc_un;
8697 	struct mode_caching *mode_caching_page;
8698 	size_t		buflen = SDC_BUFLEN(un);
8699 	int		hdrlen = SDC_HDRLEN(un);
8700 	int		rval;
8701 
8702 	/*
8703 	 * Do a test unit ready, otherwise a mode sense may not work if this
8704 	 * is the first command sent to the device after boot.
8705 	 */
8706 	if (sd_send_scsi_TEST_UNIT_READY(ssc, 0) != 0)
8707 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8708 
8709 	/*
8710 	 * Allocate memory for the retrieved mode page and its headers.  Set
8711 	 * a pointer to the page itself.
8712 	 */
8713 	*header = kmem_zalloc(buflen, KM_SLEEP);
8714 
8715 	/* Get the information from the device */
8716 	rval = sd_send_scsi_MODE_SENSE(ssc, SDC_CDB_GROUP(un), *header, buflen,
8717 	    page_control | MODEPAGE_CACHING, SD_PATH_DIRECT);
8718 	if (rval != 0) {
8719 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un, "%s: Mode Sense Failed\n",
8720 		    __func__);
8721 		goto mode_sense_failed;
8722 	}
8723 
8724 	/*
8725 	 * Determine size of Block Descriptors in order to locate
8726 	 * the mode page data. ATAPI devices return 0, SCSI devices
8727 	 * should return MODE_BLK_DESC_LENGTH.
8728 	 */
8729 	if (un->un_f_cfg_is_atapi == TRUE) {
8730 		struct mode_header_grp2 *mhp =
8731 		    (struct mode_header_grp2 *)(*header);
8732 		*bdlen = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8733 	} else {
8734 		*bdlen = ((struct mode_header *)(*header))->bdesc_length;
8735 	}
8736 
8737 	if (*bdlen > MODE_BLK_DESC_LENGTH) {
8738 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, 0,
8739 		    "%s: Mode Sense returned invalid block descriptor length\n",
8740 		    __func__);
8741 		rval = EIO;
8742 		goto mode_sense_failed;
8743 	}
8744 
8745 	mode_caching_page = (struct mode_caching *)(*header + hdrlen + *bdlen);
8746 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8747 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
8748 		    "%s: Mode Sense caching page code mismatch %d\n",
8749 		    __func__, mode_caching_page->mode_page.code);
8750 		rval = EIO;
8751 	}
8752 
8753 mode_sense_failed:
8754 	if (rval != 0) {
8755 		kmem_free(*header, buflen);
8756 		*header = NULL;
8757 		*bdlen = 0;
8758 	}
8759 	return (rval);
8760 }
8761 
8762 /*
8763  *    Function: sd_cache_control()
8764  *
8765  * Description: This routine is the driver entry point for setting
8766  *		read and write caching by modifying the WCE (write cache
8767  *		enable) and RCD (read cache disable) bits of mode
8768  *		page 8 (MODEPAGE_CACHING).
8769  *
8770  *   Arguments: ssc		- ssc contains pointer to driver soft state
8771  *				  (unit) structure for this target.
8772  *		rcd_flag	- flag for controlling the read cache
8773  *		wce_flag	- flag for controlling the write cache
8774  *
8775  * Return Code: EIO
8776  *		code returned by sd_send_scsi_MODE_SENSE and
8777  *		sd_send_scsi_MODE_SELECT
8778  *
8779  *     Context: Kernel Thread
8780  */
8781 
8782 static int
sd_cache_control(sd_ssc_t * ssc,int rcd_flag,int wce_flag)8783 sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag)
8784 {
8785 	struct sd_lun	*un = ssc->ssc_un;
8786 	struct mode_caching *mode_caching_page;
8787 	uchar_t		*header;
8788 	size_t		buflen = SDC_BUFLEN(un);
8789 	int		hdrlen = SDC_HDRLEN(un);
8790 	int		bdlen;
8791 	int		rval;
8792 
8793 	rval = sd_get_caching_mode_page(ssc, MODEPAGE_CURRENT, &header, &bdlen);
8794 	switch (rval) {
8795 	case 0:
8796 		/* Check the relevant bits on successful mode sense */
8797 		mode_caching_page = (struct mode_caching *)(header + hdrlen +
8798 		    bdlen);
8799 		if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
8800 		    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
8801 		    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
8802 		    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
8803 			size_t sbuflen;
8804 			uchar_t save_pg;
8805 
8806 			/*
8807 			 * Construct select buffer length based on the
8808 			 * length of the sense data returned.
8809 			 */
8810 			sbuflen = hdrlen + bdlen + sizeof (struct mode_page) +
8811 			    (int)mode_caching_page->mode_page.length;
8812 
8813 			/* Set the caching bits as requested */
8814 			if (rcd_flag == SD_CACHE_ENABLE)
8815 				mode_caching_page->rcd = 0;
8816 			else if (rcd_flag == SD_CACHE_DISABLE)
8817 				mode_caching_page->rcd = 1;
8818 
8819 			if (wce_flag == SD_CACHE_ENABLE)
8820 				mode_caching_page->wce = 1;
8821 			else if (wce_flag == SD_CACHE_DISABLE)
8822 				mode_caching_page->wce = 0;
8823 
8824 			/*
8825 			 * Save the page if the mode sense says the
8826 			 * drive supports it.
8827 			 */
8828 			save_pg = mode_caching_page->mode_page.ps ?
8829 			    SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
8830 
8831 			/* Clear reserved bits before mode select */
8832 			mode_caching_page->mode_page.ps = 0;
8833 
8834 			/*
8835 			 * Clear out mode header for mode select.
8836 			 * The rest of the retrieved page will be reused.
8837 			 */
8838 			bzero(header, hdrlen);
8839 
8840 			if (un->un_f_cfg_is_atapi == TRUE) {
8841 				struct mode_header_grp2 *mhp =
8842 				    (struct mode_header_grp2 *)header;
8843 				mhp->bdesc_length_hi = bdlen >> 8;
8844 				mhp->bdesc_length_lo = (uchar_t)bdlen & 0xff;
8845 			} else {
8846 				((struct mode_header *)header)->bdesc_length =
8847 				    bdlen;
8848 			}
8849 
8850 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8851 
8852 			/* Issue mode select to change the cache settings */
8853 			rval = sd_send_scsi_MODE_SELECT(ssc, SDC_CDB_GROUP(un),
8854 			    header, sbuflen, save_pg, SD_PATH_DIRECT);
8855 		}
8856 		kmem_free(header, buflen);
8857 		break;
8858 	case EIO:
8859 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
8860 		break;
8861 	default:
8862 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8863 		break;
8864 	}
8865 
8866 	return (rval);
8867 }
8868 
8869 
8870 /*
8871  *    Function: sd_get_write_cache_enabled()
8872  *
8873  * Description: This routine is the driver entry point for determining if write
8874  *		caching is enabled.  It examines the WCE (write cache enable)
8875  *		bits of mode page 8 (MODEPAGE_CACHING) with Page Control field
8876  *		bits set to MODEPAGE_CURRENT.
8877  *
8878  *   Arguments: ssc		- ssc contains pointer to driver soft state
8879  *				  (unit) structure for this target.
8880  *		is_enabled	- pointer to int where write cache enabled state
8881  *				  is returned (non-zero -> write cache enabled)
8882  *
8883  * Return Code: EIO
8884  *		code returned by sd_send_scsi_MODE_SENSE
8885  *
8886  *     Context: Kernel Thread
8887  *
8888  * NOTE: If ioctl is added to disable write cache, this sequence should
8889  * be followed so that no locking is required for accesses to
8890  * un->un_f_write_cache_enabled:
8891  *	do mode select to clear wce
8892  *	do synchronize cache to flush cache
8893  *	set un->un_f_write_cache_enabled = FALSE
8894  *
8895  * Conversely, an ioctl to enable the write cache should be done
8896  * in this order:
8897  *	set un->un_f_write_cache_enabled = TRUE
8898  *	do mode select to set wce
8899  */
8900 
8901 static int
sd_get_write_cache_enabled(sd_ssc_t * ssc,int * is_enabled)8902 sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled)
8903 {
8904 	struct sd_lun	*un = ssc->ssc_un;
8905 	struct mode_caching *mode_caching_page;
8906 	uchar_t		*header;
8907 	size_t		buflen = SDC_BUFLEN(un);
8908 	int		hdrlen = SDC_HDRLEN(un);
8909 	int		bdlen;
8910 	int		rval;
8911 
8912 	/* In case of error, flag as enabled */
8913 	*is_enabled = TRUE;
8914 
8915 	rval = sd_get_caching_mode_page(ssc, MODEPAGE_CURRENT, &header, &bdlen);
8916 	switch (rval) {
8917 	case 0:
8918 		mode_caching_page = (struct mode_caching *)(header + hdrlen +
8919 		    bdlen);
8920 		*is_enabled = mode_caching_page->wce;
8921 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
8922 		kmem_free(header, buflen);
8923 		break;
8924 	case EIO: {
8925 		/*
8926 		 * Some disks do not support Mode Sense(6), we
8927 		 * should ignore this kind of error (sense key is
8928 		 * 0x5 - illegal request).
8929 		 */
8930 		uint8_t *sensep;
8931 		int senlen;
8932 
8933 		sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
8934 		senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
8935 		    ssc->ssc_uscsi_cmd->uscsi_rqresid);
8936 
8937 		if (senlen > 0 &&
8938 		    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
8939 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
8940 		} else {
8941 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
8942 		}
8943 		break;
8944 	}
8945 	default:
8946 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8947 		break;
8948 	}
8949 
8950 	return (rval);
8951 }
8952 
8953 /*
8954  *    Function: sd_get_write_cache_changeable()
8955  *
8956  * Description: This routine is the driver entry point for determining if write
8957  *		caching is changeable.  It examines the WCE (write cache enable)
8958  *		bits of mode page 8 (MODEPAGE_CACHING) with Page Control field
8959  *		bits set to MODEPAGE_CHANGEABLE.
8960  *
8961  *   Arguments: ssc		- ssc contains pointer to driver soft state
8962  *				  (unit) structure for this target.
8963  *		is_changeable	- pointer to int where write cache changeable
8964  *				  state is returned (non-zero -> write cache
8965  *				  changeable)
8966  *
8967  *     Context: Kernel Thread
8968  */
8969 
8970 static void
sd_get_write_cache_changeable(sd_ssc_t * ssc,int * is_changeable)8971 sd_get_write_cache_changeable(sd_ssc_t *ssc, int *is_changeable)
8972 {
8973 	struct sd_lun	*un = ssc->ssc_un;
8974 	struct mode_caching *mode_caching_page;
8975 	uchar_t		*header;
8976 	size_t		buflen = SDC_BUFLEN(un);
8977 	int		hdrlen = SDC_HDRLEN(un);
8978 	int		bdlen;
8979 	int		rval;
8980 
8981 	/* In case of error, flag as enabled */
8982 	*is_changeable = TRUE;
8983 
8984 	rval = sd_get_caching_mode_page(ssc, MODEPAGE_CHANGEABLE, &header,
8985 	    &bdlen);
8986 	switch (rval) {
8987 	case 0:
8988 		mode_caching_page = (struct mode_caching *)(header + hdrlen +
8989 		    bdlen);
8990 		*is_changeable = mode_caching_page->wce;
8991 		kmem_free(header, buflen);
8992 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
8993 		break;
8994 	case EIO:
8995 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
8996 		break;
8997 	default:
8998 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8999 		break;
9000 	}
9001 }
9002 
9003 /*
9004  *    Function: sd_get_nv_sup()
9005  *
9006  * Description: This routine is the driver entry point for
9007  * determining whether non-volatile cache is supported. This
9008  * determination process works as follows:
9009  *
9010  * 1. sd first queries sd.conf on whether
9011  * suppress_cache_flush bit is set for this device.
9012  *
9013  * 2. if not there, then queries the internal disk table.
9014  *
9015  * 3. if either sd.conf or internal disk table specifies
9016  * cache flush be suppressed, we don't bother checking
9017  * NV_SUP bit.
9018  *
9019  * If SUPPRESS_CACHE_FLUSH bit is not set to 1, sd queries
9020  * the optional INQUIRY VPD page 0x86. If the device
9021  * supports VPD page 0x86, sd examines the NV_SUP
9022  * (non-volatile cache support) bit in the INQUIRY VPD page
9023  * 0x86:
9024  *   o If NV_SUP bit is set, sd assumes the device has a
9025  *   non-volatile cache and set the
9026  *   un_f_sync_nv_supported to TRUE.
9027  *   o Otherwise cache is not non-volatile,
9028  *   un_f_sync_nv_supported is set to FALSE.
9029  *
9030  * Arguments: un - driver soft state (unit) structure
9031  *
9032  * Return Code:
9033  *
9034  *     Context: Kernel Thread
9035  */
9036 
9037 static void
sd_get_nv_sup(sd_ssc_t * ssc)9038 sd_get_nv_sup(sd_ssc_t *ssc)
9039 {
9040 	int		rval		= 0;
9041 	uchar_t		*inq86		= NULL;
9042 	size_t		inq86_len	= MAX_INQUIRY_SIZE;
9043 	size_t		inq86_resid	= 0;
9044 	struct		dk_callback *dkc;
9045 	struct sd_lun	*un;
9046 
9047 	ASSERT(ssc != NULL);
9048 	un = ssc->ssc_un;
9049 	ASSERT(un != NULL);
9050 
9051 	mutex_enter(SD_MUTEX(un));
9052 
9053 	/*
9054 	 * Be conservative on the device's support of
9055 	 * SYNC_NV bit: un_f_sync_nv_supported is
9056 	 * initialized to be false.
9057 	 */
9058 	un->un_f_sync_nv_supported = FALSE;
9059 
9060 	/*
9061 	 * If either sd.conf or internal disk table
9062 	 * specifies cache flush be suppressed, then
9063 	 * we don't bother checking NV_SUP bit.
9064 	 */
9065 	if (un->un_f_suppress_cache_flush == TRUE) {
9066 		mutex_exit(SD_MUTEX(un));
9067 		return;
9068 	}
9069 
9070 	if (sd_check_vpd_page_support(ssc) == 0 &&
9071 	    un->un_vpd_page_mask & SD_VPD_EXTENDED_DATA_PG) {
9072 		mutex_exit(SD_MUTEX(un));
9073 		/* collect page 86 data if available */
9074 		inq86 = kmem_zalloc(inq86_len, KM_SLEEP);
9075 
9076 		rval = sd_send_scsi_INQUIRY(ssc, inq86, inq86_len,
9077 		    0x01, 0x86, &inq86_resid);
9078 
9079 		if (rval == 0 && (inq86_len - inq86_resid > 6)) {
9080 			SD_TRACE(SD_LOG_COMMON, un,
9081 			    "sd_get_nv_sup: \
9082 			    successfully get VPD page: %x \
9083 			    PAGE LENGTH: %x BYTE 6: %x\n",
9084 			    inq86[1], inq86[3], inq86[6]);
9085 
9086 			mutex_enter(SD_MUTEX(un));
9087 			/*
9088 			 * check the value of NV_SUP bit: only if the device
9089 			 * reports NV_SUP bit to be 1, the
9090 			 * un_f_sync_nv_supported bit will be set to true.
9091 			 */
9092 			if (inq86[6] & SD_VPD_NV_SUP) {
9093 				un->un_f_sync_nv_supported = TRUE;
9094 			}
9095 			mutex_exit(SD_MUTEX(un));
9096 		} else if (rval != 0) {
9097 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9098 		}
9099 
9100 		kmem_free(inq86, inq86_len);
9101 	} else {
9102 		mutex_exit(SD_MUTEX(un));
9103 	}
9104 
9105 	/*
9106 	 * Send a SYNC CACHE command to check whether
9107 	 * SYNC_NV bit is supported. This command should have
9108 	 * un_f_sync_nv_supported set to correct value.
9109 	 */
9110 	mutex_enter(SD_MUTEX(un));
9111 	if (un->un_f_sync_nv_supported) {
9112 		mutex_exit(SD_MUTEX(un));
9113 		dkc = kmem_zalloc(sizeof (struct dk_callback), KM_SLEEP);
9114 		dkc->dkc_flag = FLUSH_VOLATILE;
9115 		(void) sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
9116 
9117 		/*
9118 		 * Send a TEST UNIT READY command to the device. This should
9119 		 * clear any outstanding UNIT ATTENTION that may be present.
9120 		 */
9121 		rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
9122 		if (rval != 0)
9123 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9124 
9125 		kmem_free(dkc, sizeof (struct dk_callback));
9126 	} else {
9127 		mutex_exit(SD_MUTEX(un));
9128 	}
9129 
9130 	SD_TRACE(SD_LOG_COMMON, un, "sd_get_nv_sup: \
9131 	    un_f_suppress_cache_flush is set to %d\n",
9132 	    un->un_f_suppress_cache_flush);
9133 }
9134 
9135 /*
9136  *    Function: sd_make_device
9137  *
9138  * Description: Utility routine to return the Solaris device number from
9139  *		the data in the device's dev_info structure.
9140  *
9141  * Return Code: The Solaris device number
9142  *
9143  *     Context: Any
9144  */
9145 
9146 static dev_t
sd_make_device(dev_info_t * devi)9147 sd_make_device(dev_info_t *devi)
9148 {
9149 	return (makedevice(ddi_driver_major(devi),
9150 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
9151 }
9152 
9153 
9154 /*
9155  *    Function: sd_pm_entry
9156  *
9157  * Description: Called at the start of a new command to manage power
9158  *		and busy status of a device. This includes determining whether
9159  *		the current power state of the device is sufficient for
9160  *		performing the command or whether it must be changed.
9161  *		The PM framework is notified appropriately.
9162  *		Only with a return status of DDI_SUCCESS will the
9163  *		component be busy to the framework.
9164  *
9165  *		All callers of sd_pm_entry must check the return status
9166  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
9167  *		of DDI_FAILURE indicates the device failed to power up.
9168  *		In this case un_pm_count has been adjusted so the result
9169  *		on exit is still powered down, ie. count is less than 0.
9170  *		Calling sd_pm_exit with this count value hits an ASSERT.
9171  *
9172  * Return Code: DDI_SUCCESS or DDI_FAILURE
9173  *
9174  *     Context: Kernel thread context.
9175  */
9176 
9177 static int
sd_pm_entry(struct sd_lun * un)9178 sd_pm_entry(struct sd_lun *un)
9179 {
9180 	int return_status = DDI_SUCCESS;
9181 
9182 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9183 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9184 
9185 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
9186 
9187 	if (un->un_f_pm_is_enabled == FALSE) {
9188 		SD_TRACE(SD_LOG_IO_PM, un,
9189 		    "sd_pm_entry: exiting, PM not enabled\n");
9190 		return (return_status);
9191 	}
9192 
9193 	/*
9194 	 * Just increment a counter if PM is enabled. On the transition from
9195 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
9196 	 * the count with each IO and mark the device as idle when the count
9197 	 * hits 0.
9198 	 *
9199 	 * If the count is less than 0 the device is powered down. If a powered
9200 	 * down device is successfully powered up then the count must be
9201 	 * incremented to reflect the power up. Note that it'll get incremented
9202 	 * a second time to become busy.
9203 	 *
9204 	 * Because the following has the potential to change the device state
9205 	 * and must release the un_pm_mutex to do so, only one thread can be
9206 	 * allowed through at a time.
9207 	 */
9208 
9209 	mutex_enter(&un->un_pm_mutex);
9210 	while (un->un_pm_busy == TRUE) {
9211 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
9212 	}
9213 	un->un_pm_busy = TRUE;
9214 
9215 	if (un->un_pm_count < 1) {
9216 
9217 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
9218 
9219 		/*
9220 		 * Indicate we are now busy so the framework won't attempt to
9221 		 * power down the device. This call will only fail if either
9222 		 * we passed a bad component number or the device has no
9223 		 * components. Neither of these should ever happen.
9224 		 */
9225 		mutex_exit(&un->un_pm_mutex);
9226 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
9227 		ASSERT(return_status == DDI_SUCCESS);
9228 
9229 		mutex_enter(&un->un_pm_mutex);
9230 
9231 		if (un->un_pm_count < 0) {
9232 			mutex_exit(&un->un_pm_mutex);
9233 
9234 			SD_TRACE(SD_LOG_IO_PM, un,
9235 			    "sd_pm_entry: power up component\n");
9236 
9237 			/*
9238 			 * pm_raise_power will cause sdpower to be called
9239 			 * which brings the device power level to the
9240 			 * desired state, If successful, un_pm_count and
9241 			 * un_power_level will be updated appropriately.
9242 			 */
9243 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
9244 			    SD_PM_STATE_ACTIVE(un));
9245 
9246 			mutex_enter(&un->un_pm_mutex);
9247 
9248 			if (return_status != DDI_SUCCESS) {
9249 				/*
9250 				 * Power up failed.
9251 				 * Idle the device and adjust the count
9252 				 * so the result on exit is that we're
9253 				 * still powered down, ie. count is less than 0.
9254 				 */
9255 				SD_TRACE(SD_LOG_IO_PM, un,
9256 				    "sd_pm_entry: power up failed,"
9257 				    " idle the component\n");
9258 
9259 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9260 				un->un_pm_count--;
9261 			} else {
9262 				/*
9263 				 * Device is powered up, verify the
9264 				 * count is non-negative.
9265 				 * This is debug only.
9266 				 */
9267 				ASSERT(un->un_pm_count == 0);
9268 			}
9269 		}
9270 
9271 		if (return_status == DDI_SUCCESS) {
9272 			/*
9273 			 * For performance, now that the device has been tagged
9274 			 * as busy, and it's known to be powered up, update the
9275 			 * chain types to use jump tables that do not include
9276 			 * pm. This significantly lowers the overhead and
9277 			 * therefore improves performance.
9278 			 */
9279 
9280 			mutex_exit(&un->un_pm_mutex);
9281 			mutex_enter(SD_MUTEX(un));
9282 			SD_TRACE(SD_LOG_IO_PM, un,
9283 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
9284 			    un->un_uscsi_chain_type);
9285 
9286 			if (un->un_f_non_devbsize_supported) {
9287 				un->un_buf_chain_type =
9288 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
9289 			} else {
9290 				un->un_buf_chain_type =
9291 				    SD_CHAIN_INFO_DISK_NO_PM;
9292 			}
9293 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
9294 
9295 			SD_TRACE(SD_LOG_IO_PM, un,
9296 			    "             changed  uscsi_chain_type to   %d\n",
9297 			    un->un_uscsi_chain_type);
9298 			mutex_exit(SD_MUTEX(un));
9299 			mutex_enter(&un->un_pm_mutex);
9300 
9301 			if (un->un_pm_idle_timeid == NULL) {
9302 				/* 300 ms. */
9303 				un->un_pm_idle_timeid =
9304 				    timeout(sd_pm_idletimeout_handler, un,
9305 				    (drv_usectohz((clock_t)300000)));
9306 				/*
9307 				 * Include an extra call to busy which keeps the
9308 				 * device busy with-respect-to the PM layer
9309 				 * until the timer fires, at which time it'll
9310 				 * get the extra idle call.
9311 				 */
9312 				(void) pm_busy_component(SD_DEVINFO(un), 0);
9313 			}
9314 		}
9315 	}
9316 	un->un_pm_busy = FALSE;
9317 	/* Next... */
9318 	cv_signal(&un->un_pm_busy_cv);
9319 
9320 	un->un_pm_count++;
9321 
9322 	SD_TRACE(SD_LOG_IO_PM, un,
9323 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
9324 
9325 	mutex_exit(&un->un_pm_mutex);
9326 
9327 	return (return_status);
9328 }
9329 
9330 
9331 /*
9332  *    Function: sd_pm_exit
9333  *
9334  * Description: Called at the completion of a command to manage busy
9335  *		status for the device. If the device becomes idle the
9336  *		PM framework is notified.
9337  *
9338  *     Context: Kernel thread context
9339  */
9340 
9341 static void
sd_pm_exit(struct sd_lun * un)9342 sd_pm_exit(struct sd_lun *un)
9343 {
9344 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9345 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9346 
9347 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
9348 
9349 	/*
9350 	 * After attach the following flag is only read, so don't
9351 	 * take the penalty of acquiring a mutex for it.
9352 	 */
9353 	if (un->un_f_pm_is_enabled == TRUE) {
9354 
9355 		mutex_enter(&un->un_pm_mutex);
9356 		un->un_pm_count--;
9357 
9358 		SD_TRACE(SD_LOG_IO_PM, un,
9359 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
9360 
9361 		ASSERT(un->un_pm_count >= 0);
9362 		if (un->un_pm_count == 0) {
9363 			mutex_exit(&un->un_pm_mutex);
9364 
9365 			SD_TRACE(SD_LOG_IO_PM, un,
9366 			    "sd_pm_exit: idle component\n");
9367 
9368 			(void) pm_idle_component(SD_DEVINFO(un), 0);
9369 
9370 		} else {
9371 			mutex_exit(&un->un_pm_mutex);
9372 		}
9373 	}
9374 
9375 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
9376 }
9377 
9378 
9379 /*
9380  *    Function: sdopen
9381  *
9382  * Description: Driver's open(9e) entry point function.
9383  *
9384  *   Arguments: dev_i   - pointer to device number
9385  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
9386  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9387  *		cred_p  - user credential pointer
9388  *
9389  * Return Code: EINVAL
9390  *		ENXIO
9391  *		EIO
9392  *		EROFS
9393  *		EBUSY
9394  *
9395  *     Context: Kernel thread context
9396  */
9397 /* ARGSUSED */
9398 static int
sdopen(dev_t * dev_p,int flag,int otyp,cred_t * cred_p)9399 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
9400 {
9401 	struct sd_lun	*un;
9402 	int		nodelay;
9403 	int		part;
9404 	uint64_t	partmask;
9405 	int		instance;
9406 	dev_t		dev;
9407 	int		rval = EIO;
9408 	diskaddr_t	nblks = 0;
9409 	diskaddr_t	label_cap;
9410 
9411 	/* Validate the open type */
9412 	if (otyp >= OTYPCNT) {
9413 		return (EINVAL);
9414 	}
9415 
9416 	dev = *dev_p;
9417 	instance = SDUNIT(dev);
9418 
9419 	/*
9420 	 * Fail the open if there is no softstate for the instance.
9421 	 */
9422 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
9423 		/*
9424 		 * The probe cache only needs to be cleared when open (9e) fails
9425 		 * with ENXIO (4238046).
9426 		 */
9427 		/*
9428 		 * un-conditionally clearing probe cache is ok with
9429 		 * separate sd/ssd binaries
9430 		 * x86 platform can be an issue with both parallel
9431 		 * and fibre in 1 binary
9432 		 */
9433 		sd_scsi_clear_probe_cache();
9434 		return (ENXIO);
9435 	}
9436 
9437 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
9438 	part	 = SDPART(dev);
9439 	partmask = 1 << part;
9440 
9441 	mutex_enter(SD_MUTEX(un));
9442 
9443 	/*
9444 	 * All device accesses go thru sdstrategy() where we check
9445 	 * on suspend status but there could be a scsi_poll command,
9446 	 * which bypasses sdstrategy(), so we need to check pm
9447 	 * status.
9448 	 */
9449 
9450 	if (!nodelay) {
9451 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9452 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9453 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9454 		}
9455 
9456 		mutex_exit(SD_MUTEX(un));
9457 		if (sd_pm_entry(un) != DDI_SUCCESS) {
9458 			rval = EIO;
9459 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
9460 			    "sdopen: sd_pm_entry failed\n");
9461 			goto open_failed_with_pm;
9462 		}
9463 		mutex_enter(SD_MUTEX(un));
9464 	}
9465 
9466 	/* check for previous exclusive open */
9467 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
9468 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9469 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
9470 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
9471 
9472 	if (un->un_exclopen & (partmask)) {
9473 		goto excl_open_fail;
9474 	}
9475 
9476 	if (flag & FEXCL) {
9477 		int i;
9478 		if (un->un_ocmap.lyropen[part]) {
9479 			goto excl_open_fail;
9480 		}
9481 		for (i = 0; i < (OTYPCNT - 1); i++) {
9482 			if (un->un_ocmap.regopen[i] & (partmask)) {
9483 				goto excl_open_fail;
9484 			}
9485 		}
9486 	}
9487 
9488 	/*
9489 	 * Check the write permission if this is a removable media device,
9490 	 * NDELAY has not been set, and writable permission is requested.
9491 	 *
9492 	 * Note: If NDELAY was set and this is write-protected media the WRITE
9493 	 * attempt will fail with EIO as part of the I/O processing. This is a
9494 	 * more permissive implementation that allows the open to succeed and
9495 	 * WRITE attempts to fail when appropriate.
9496 	 */
9497 	if (un->un_f_chk_wp_open) {
9498 		if ((flag & FWRITE) && (!nodelay)) {
9499 			mutex_exit(SD_MUTEX(un));
9500 			/*
9501 			 * Defer the check for write permission on writable
9502 			 * DVD drive till sdstrategy and will not fail open even
9503 			 * if FWRITE is set as the device can be writable
9504 			 * depending upon the media and the media can change
9505 			 * after the call to open().
9506 			 */
9507 			if (un->un_f_dvdram_writable_device == FALSE) {
9508 				if (ISCD(un) || sr_check_wp(dev)) {
9509 				rval = EROFS;
9510 				mutex_enter(SD_MUTEX(un));
9511 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9512 				    "write to cd or write protected media\n");
9513 				goto open_fail;
9514 				}
9515 			}
9516 			mutex_enter(SD_MUTEX(un));
9517 		}
9518 	}
9519 
9520 	/*
9521 	 * If opening in NDELAY/NONBLOCK mode, just return.
9522 	 * Check if disk is ready and has a valid geometry later.
9523 	 */
9524 	if (!nodelay) {
9525 		sd_ssc_t	*ssc;
9526 
9527 		mutex_exit(SD_MUTEX(un));
9528 		ssc = sd_ssc_init(un);
9529 		rval = sd_ready_and_valid(ssc, part);
9530 		sd_ssc_fini(ssc);
9531 		mutex_enter(SD_MUTEX(un));
9532 		/*
9533 		 * Fail if device is not ready or if the number of disk
9534 		 * blocks is zero or negative for non CD devices.
9535 		 */
9536 
9537 		nblks = 0;
9538 
9539 		if (rval == SD_READY_VALID && (!ISCD(un))) {
9540 			/* if cmlb_partinfo fails, nblks remains 0 */
9541 			mutex_exit(SD_MUTEX(un));
9542 			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
9543 			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
9544 			mutex_enter(SD_MUTEX(un));
9545 		}
9546 
9547 		if ((rval != SD_READY_VALID) ||
9548 		    (!ISCD(un) && nblks <= 0)) {
9549 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
9550 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9551 			    "device not ready or invalid disk block value\n");
9552 			goto open_fail;
9553 		}
9554 #if defined(__x86)
9555 	} else {
9556 		uchar_t *cp;
9557 		/*
9558 		 * x86 requires special nodelay handling, so that p0 is
9559 		 * always defined and accessible.
9560 		 * Invalidate geometry only if device is not already open.
9561 		 */
9562 		cp = &un->un_ocmap.chkd[0];
9563 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
9564 			if (*cp != (uchar_t)0) {
9565 				break;
9566 			}
9567 			cp++;
9568 		}
9569 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
9570 			mutex_exit(SD_MUTEX(un));
9571 			cmlb_invalidate(un->un_cmlbhandle,
9572 			    (void *)SD_PATH_DIRECT);
9573 			mutex_enter(SD_MUTEX(un));
9574 		}
9575 
9576 #endif
9577 	}
9578 
9579 	if (otyp == OTYP_LYR) {
9580 		un->un_ocmap.lyropen[part]++;
9581 	} else {
9582 		un->un_ocmap.regopen[otyp] |= partmask;
9583 	}
9584 
9585 	/* Set up open and exclusive open flags */
9586 	if (flag & FEXCL) {
9587 		un->un_exclopen |= (partmask);
9588 	}
9589 
9590 	/*
9591 	 * If the lun is EFI labeled and lun capacity is greater than the
9592 	 * capacity contained in the label, log a sys-event to notify the
9593 	 * interested module.
9594 	 * To avoid an infinite loop of logging sys-event, we only log the
9595 	 * event when the lun is not opened in NDELAY mode. The event handler
9596 	 * should open the lun in NDELAY mode.
9597 	 */
9598 	if (!nodelay) {
9599 		mutex_exit(SD_MUTEX(un));
9600 		if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
9601 		    (void*)SD_PATH_DIRECT) == 0) {
9602 			mutex_enter(SD_MUTEX(un));
9603 			if (un->un_f_blockcount_is_valid &&
9604 			    un->un_blockcount > label_cap &&
9605 			    un->un_f_expnevent == B_FALSE) {
9606 				un->un_f_expnevent = B_TRUE;
9607 				mutex_exit(SD_MUTEX(un));
9608 				sd_log_lun_expansion_event(un,
9609 				    (nodelay ? KM_NOSLEEP : KM_SLEEP));
9610 				mutex_enter(SD_MUTEX(un));
9611 			}
9612 		} else {
9613 			mutex_enter(SD_MUTEX(un));
9614 		}
9615 	}
9616 
9617 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9618 	    "open of part %d type %d\n", part, otyp);
9619 
9620 	mutex_exit(SD_MUTEX(un));
9621 	if (!nodelay) {
9622 		sd_pm_exit(un);
9623 	}
9624 
9625 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
9626 	return (DDI_SUCCESS);
9627 
9628 excl_open_fail:
9629 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
9630 	rval = EBUSY;
9631 
9632 open_fail:
9633 	mutex_exit(SD_MUTEX(un));
9634 
9635 	/*
9636 	 * On a failed open we must exit the pm management.
9637 	 */
9638 	if (!nodelay) {
9639 		sd_pm_exit(un);
9640 	}
9641 open_failed_with_pm:
9642 
9643 	return (rval);
9644 }
9645 
9646 
9647 /*
9648  *    Function: sdclose
9649  *
9650  * Description: Driver's close(9e) entry point function.
9651  *
9652  *   Arguments: dev    - device number
9653  *		flag   - file status flag, informational only
9654  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9655  *		cred_p - user credential pointer
9656  *
9657  * Return Code: ENXIO
9658  *
9659  *     Context: Kernel thread context
9660  */
9661 /* ARGSUSED */
9662 static int
sdclose(dev_t dev,int flag,int otyp,cred_t * cred_p)9663 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
9664 {
9665 	struct sd_lun	*un;
9666 	uchar_t		*cp;
9667 	int		part;
9668 	int		nodelay;
9669 	int		rval = 0;
9670 
9671 	/* Validate the open type */
9672 	if (otyp >= OTYPCNT) {
9673 		return (ENXIO);
9674 	}
9675 
9676 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9677 		return (ENXIO);
9678 	}
9679 
9680 	part = SDPART(dev);
9681 	nodelay = flag & (FNDELAY | FNONBLOCK);
9682 
9683 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9684 	    "sdclose: close of part %d type %d\n", part, otyp);
9685 
9686 	mutex_enter(SD_MUTEX(un));
9687 
9688 	/* Don't proceed if power is being changed. */
9689 	while (un->un_state == SD_STATE_PM_CHANGING) {
9690 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9691 	}
9692 
9693 	if (un->un_exclopen & (1 << part)) {
9694 		un->un_exclopen &= ~(1 << part);
9695 	}
9696 
9697 	/* Update the open partition map */
9698 	if (otyp == OTYP_LYR) {
9699 		un->un_ocmap.lyropen[part] -= 1;
9700 	} else {
9701 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
9702 	}
9703 
9704 	cp = &un->un_ocmap.chkd[0];
9705 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
9706 		if (*cp != '\0') {
9707 			break;
9708 		}
9709 		cp++;
9710 	}
9711 
9712 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
9713 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
9714 
9715 		/*
9716 		 * We avoid persistance upon the last close, and set
9717 		 * the throttle back to the maximum.
9718 		 */
9719 		un->un_throttle = un->un_saved_throttle;
9720 
9721 		if (un->un_state == SD_STATE_OFFLINE) {
9722 			if (un->un_f_is_fibre == FALSE) {
9723 				scsi_log(SD_DEVINFO(un), sd_label,
9724 				    CE_WARN, "offline\n");
9725 			}
9726 			mutex_exit(SD_MUTEX(un));
9727 			cmlb_invalidate(un->un_cmlbhandle,
9728 			    (void *)SD_PATH_DIRECT);
9729 			mutex_enter(SD_MUTEX(un));
9730 
9731 		} else {
9732 			/*
9733 			 * Flush any outstanding writes in NVRAM cache.
9734 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
9735 			 * cmd, it may not work for non-Pluto devices.
9736 			 * SYNCHRONIZE CACHE is not required for removables,
9737 			 * except DVD-RAM drives.
9738 			 *
9739 			 * Also note: because SYNCHRONIZE CACHE is currently
9740 			 * the only command issued here that requires the
9741 			 * drive be powered up, only do the power up before
9742 			 * sending the Sync Cache command. If additional
9743 			 * commands are added which require a powered up
9744 			 * drive, the following sequence may have to change.
9745 			 *
9746 			 * And finally, note that parallel SCSI on SPARC
9747 			 * only issues a Sync Cache to DVD-RAM, a newly
9748 			 * supported device.
9749 			 */
9750 			if ((un->un_f_sync_cache_supported &&
9751 			    un->un_f_sync_cache_required) ||
9752 			    un->un_f_dvdram_writable_device == TRUE) {
9753 				mutex_exit(SD_MUTEX(un));
9754 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9755 					rval =
9756 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
9757 					    NULL);
9758 					/* ignore error if not supported */
9759 					if (rval == ENOTSUP) {
9760 						rval = 0;
9761 					} else if (rval != 0) {
9762 						rval = EIO;
9763 					}
9764 					sd_pm_exit(un);
9765 				} else {
9766 					rval = EIO;
9767 				}
9768 				mutex_enter(SD_MUTEX(un));
9769 			}
9770 
9771 			/*
9772 			 * For devices which supports DOOR_LOCK, send an ALLOW
9773 			 * MEDIA REMOVAL command, but don't get upset if it
9774 			 * fails. We need to raise the power of the drive before
9775 			 * we can call sd_send_scsi_DOORLOCK()
9776 			 */
9777 			if (un->un_f_doorlock_supported) {
9778 				mutex_exit(SD_MUTEX(un));
9779 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9780 					sd_ssc_t	*ssc;
9781 
9782 					ssc = sd_ssc_init(un);
9783 					rval = sd_send_scsi_DOORLOCK(ssc,
9784 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
9785 					if (rval != 0)
9786 						sd_ssc_assessment(ssc,
9787 						    SD_FMT_IGNORE);
9788 					sd_ssc_fini(ssc);
9789 
9790 					sd_pm_exit(un);
9791 					if (ISCD(un) && (rval != 0) &&
9792 					    (nodelay != 0)) {
9793 						rval = ENXIO;
9794 					}
9795 				} else {
9796 					rval = EIO;
9797 				}
9798 				mutex_enter(SD_MUTEX(un));
9799 			}
9800 
9801 			/*
9802 			 * If a device has removable media, invalidate all
9803 			 * parameters related to media, such as geometry,
9804 			 * blocksize, and blockcount.
9805 			 */
9806 			if (un->un_f_has_removable_media) {
9807 				sr_ejected(un);
9808 			}
9809 
9810 			/*
9811 			 * Destroy the cache (if it exists) which was
9812 			 * allocated for the write maps, as long as no
9813 			 * other outstanding commands for the device exist.
9814 			 * (If we don't destroy it here, we will do so later
9815 			 * on detach.  More likely we'll just reuse it on
9816 			 * a future open.)
9817 			 */
9818 			if ((un->un_wm_cache != NULL) &&
9819 			    (un->un_ncmds_in_driver == 0)) {
9820 				kmem_cache_destroy(un->un_wm_cache);
9821 				un->un_wm_cache = NULL;
9822 			}
9823 		}
9824 	}
9825 
9826 	mutex_exit(SD_MUTEX(un));
9827 
9828 	return (rval);
9829 }
9830 
9831 
9832 /*
9833  *    Function: sd_ready_and_valid
9834  *
9835  * Description: Test if device is ready and has a valid geometry.
9836  *
9837  *   Arguments: ssc - sd_ssc_t will contain un
9838  *		un  - driver soft state (unit) structure
9839  *
9840  * Return Code: SD_READY_VALID		ready and valid label
9841  *		SD_NOT_READY_VALID	not ready, no label
9842  *		SD_RESERVED_BY_OTHERS	reservation conflict
9843  *
9844  *     Context: Never called at interrupt context.
9845  */
9846 
9847 static int
sd_ready_and_valid(sd_ssc_t * ssc,int part)9848 sd_ready_and_valid(sd_ssc_t *ssc, int part)
9849 {
9850 	struct sd_errstats	*stp;
9851 	uint64_t		capacity;
9852 	uint_t			lbasize;
9853 	int			rval = SD_READY_VALID;
9854 	char			name_str[48];
9855 	boolean_t		is_valid;
9856 	struct sd_lun		*un;
9857 	int			status;
9858 
9859 	ASSERT(ssc != NULL);
9860 	un = ssc->ssc_un;
9861 	ASSERT(un != NULL);
9862 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9863 
9864 	mutex_enter(SD_MUTEX(un));
9865 	/*
9866 	 * If a device has removable media, we must check if media is
9867 	 * ready when checking if this device is ready and valid.
9868 	 */
9869 	if (un->un_f_has_removable_media) {
9870 		mutex_exit(SD_MUTEX(un));
9871 		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
9872 
9873 		if (status != 0) {
9874 			rval = SD_NOT_READY_VALID;
9875 			mutex_enter(SD_MUTEX(un));
9876 
9877 			/* Ignore all failed status for removalbe media */
9878 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9879 
9880 			goto done;
9881 		}
9882 
9883 		is_valid = SD_IS_VALID_LABEL(un);
9884 		mutex_enter(SD_MUTEX(un));
9885 		if (!is_valid ||
9886 		    (un->un_f_blockcount_is_valid == FALSE) ||
9887 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
9888 
9889 			/* capacity has to be read every open. */
9890 			mutex_exit(SD_MUTEX(un));
9891 			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
9892 			    &lbasize, SD_PATH_DIRECT);
9893 
9894 			if (status != 0) {
9895 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9896 
9897 				cmlb_invalidate(un->un_cmlbhandle,
9898 				    (void *)SD_PATH_DIRECT);
9899 				mutex_enter(SD_MUTEX(un));
9900 				rval = SD_NOT_READY_VALID;
9901 
9902 				goto done;
9903 			} else {
9904 				mutex_enter(SD_MUTEX(un));
9905 				sd_update_block_info(un, lbasize, capacity);
9906 			}
9907 		}
9908 
9909 		/*
9910 		 * Check if the media in the device is writable or not.
9911 		 */
9912 		if (!is_valid && ISCD(un)) {
9913 			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
9914 		}
9915 
9916 	} else {
9917 		/*
9918 		 * Do a test unit ready to clear any unit attention from non-cd
9919 		 * devices.
9920 		 */
9921 		mutex_exit(SD_MUTEX(un));
9922 
9923 		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
9924 		if (status != 0) {
9925 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9926 		}
9927 
9928 		mutex_enter(SD_MUTEX(un));
9929 	}
9930 
9931 
9932 	/*
9933 	 * If this is a non 512 block device, allocate space for
9934 	 * the wmap cache. This is being done here since every time
9935 	 * a media is changed this routine will be called and the
9936 	 * block size is a function of media rather than device.
9937 	 */
9938 	if (((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR ||
9939 	    un->un_f_non_devbsize_supported) &&
9940 	    un->un_tgt_blocksize != DEV_BSIZE) ||
9941 	    un->un_f_enable_rmw) {
9942 		if (!(un->un_wm_cache)) {
9943 			(void) snprintf(name_str, sizeof (name_str),
9944 			    "%s%d_cache",
9945 			    ddi_driver_name(SD_DEVINFO(un)),
9946 			    ddi_get_instance(SD_DEVINFO(un)));
9947 			un->un_wm_cache = kmem_cache_create(
9948 			    name_str, sizeof (struct sd_w_map),
9949 			    8, sd_wm_cache_constructor,
9950 			    sd_wm_cache_destructor, NULL,
9951 			    (void *)un, NULL, 0);
9952 			if (!(un->un_wm_cache)) {
9953 				rval = ENOMEM;
9954 				goto done;
9955 			}
9956 		}
9957 	}
9958 
9959 	if (un->un_state == SD_STATE_NORMAL) {
9960 		/*
9961 		 * If the target is not yet ready here (defined by a TUR
9962 		 * failure), invalidate the geometry and print an 'offline'
9963 		 * message. This is a legacy message, as the state of the
9964 		 * target is not actually changed to SD_STATE_OFFLINE.
9965 		 *
9966 		 * If the TUR fails for EACCES (Reservation Conflict),
9967 		 * SD_RESERVED_BY_OTHERS will be returned to indicate
9968 		 * reservation conflict. If the TUR fails for other
9969 		 * reasons, SD_NOT_READY_VALID will be returned.
9970 		 */
9971 		int err;
9972 
9973 		mutex_exit(SD_MUTEX(un));
9974 		err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
9975 		mutex_enter(SD_MUTEX(un));
9976 
9977 		if (err != 0) {
9978 			mutex_exit(SD_MUTEX(un));
9979 			cmlb_invalidate(un->un_cmlbhandle,
9980 			    (void *)SD_PATH_DIRECT);
9981 			mutex_enter(SD_MUTEX(un));
9982 			if (err == EACCES) {
9983 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9984 				    "reservation conflict\n");
9985 				rval = SD_RESERVED_BY_OTHERS;
9986 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9987 			} else {
9988 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9989 				    "drive offline\n");
9990 				rval = SD_NOT_READY_VALID;
9991 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9992 			}
9993 			goto done;
9994 		}
9995 	}
9996 
9997 	if (un->un_f_format_in_progress == FALSE) {
9998 		mutex_exit(SD_MUTEX(un));
9999 
10000 		(void) cmlb_validate(un->un_cmlbhandle, 0,
10001 		    (void *)SD_PATH_DIRECT);
10002 		if (cmlb_partinfo(un->un_cmlbhandle, part, NULL, NULL, NULL,
10003 		    NULL, (void *) SD_PATH_DIRECT) != 0) {
10004 			rval = SD_NOT_READY_VALID;
10005 			mutex_enter(SD_MUTEX(un));
10006 
10007 			goto done;
10008 		}
10009 		if (un->un_f_pkstats_enabled) {
10010 			sd_set_pstats(un);
10011 			SD_TRACE(SD_LOG_IO_PARTITION, un,
10012 			    "sd_ready_and_valid: un:0x%p pstats created and "
10013 			    "set\n", un);
10014 		}
10015 		mutex_enter(SD_MUTEX(un));
10016 	}
10017 
10018 	/*
10019 	 * If this device supports DOOR_LOCK command, try and send
10020 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10021 	 * if it fails. For a CD, however, it is an error
10022 	 */
10023 	if (un->un_f_doorlock_supported) {
10024 		mutex_exit(SD_MUTEX(un));
10025 		status = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
10026 		    SD_PATH_DIRECT);
10027 
10028 		if ((status != 0) && ISCD(un)) {
10029 			rval = SD_NOT_READY_VALID;
10030 			mutex_enter(SD_MUTEX(un));
10031 
10032 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10033 
10034 			goto done;
10035 		} else if (status != 0)
10036 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10037 		mutex_enter(SD_MUTEX(un));
10038 	}
10039 
10040 	/* The state has changed, inform the media watch routines */
10041 	un->un_mediastate = DKIO_INSERTED;
10042 	cv_broadcast(&un->un_state_cv);
10043 	rval = SD_READY_VALID;
10044 
10045 done:
10046 
10047 	/*
10048 	 * Initialize the capacity kstat value, if no media previously
10049 	 * (capacity kstat is 0) and a media has been inserted
10050 	 * (un_blockcount > 0).
10051 	 */
10052 	if (un->un_errstats != NULL) {
10053 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10054 		if ((stp->sd_capacity.value.ui64 == 0) &&
10055 		    (un->un_f_blockcount_is_valid == TRUE)) {
10056 			stp->sd_capacity.value.ui64 =
10057 			    (uint64_t)((uint64_t)un->un_blockcount *
10058 			    un->un_sys_blocksize);
10059 		}
10060 	}
10061 
10062 	mutex_exit(SD_MUTEX(un));
10063 	return (rval);
10064 }
10065 
10066 
10067 /*
10068  *    Function: sdmin
10069  *
10070  * Description: Routine to limit the size of a data transfer. Used in
10071  *		conjunction with physio(9F).
10072  *
10073  *   Arguments: bp - pointer to the indicated buf(9S) struct.
10074  *
10075  *     Context: Kernel thread context.
10076  */
10077 
10078 static void
sdmin(struct buf * bp)10079 sdmin(struct buf *bp)
10080 {
10081 	struct sd_lun	*un;
10082 	int		instance;
10083 
10084 	instance = SDUNIT(bp->b_edev);
10085 
10086 	un = ddi_get_soft_state(sd_state, instance);
10087 	ASSERT(un != NULL);
10088 
10089 	/*
10090 	 * We depend on buf breakup to restrict
10091 	 * IO size if it is enabled.
10092 	 */
10093 	if (un->un_buf_breakup_supported) {
10094 		return;
10095 	}
10096 
10097 	if (bp->b_bcount > un->un_max_xfer_size) {
10098 		bp->b_bcount = un->un_max_xfer_size;
10099 	}
10100 }
10101 
10102 
10103 /*
10104  *    Function: sdread
10105  *
10106  * Description: Driver's read(9e) entry point function.
10107  *
10108  *   Arguments: dev   - device number
10109  *		uio   - structure pointer describing where data is to be stored
10110  *			in user's space
10111  *		cred_p  - user credential pointer
10112  *
10113  * Return Code: ENXIO
10114  *		EIO
10115  *		EINVAL
10116  *		value returned by physio
10117  *
10118  *     Context: Kernel thread context.
10119  */
10120 /* ARGSUSED */
10121 static int
sdread(dev_t dev,struct uio * uio,cred_t * cred_p)10122 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
10123 {
10124 	struct sd_lun	*un = NULL;
10125 	int		secmask;
10126 	int		err = 0;
10127 	sd_ssc_t	*ssc;
10128 
10129 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10130 		return (ENXIO);
10131 	}
10132 
10133 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10134 
10135 
10136 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10137 		mutex_enter(SD_MUTEX(un));
10138 		/*
10139 		 * Because the call to sd_ready_and_valid will issue I/O we
10140 		 * must wait here if either the device is suspended or
10141 		 * if it's power level is changing.
10142 		 */
10143 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10144 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10145 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10146 		}
10147 		un->un_ncmds_in_driver++;
10148 		mutex_exit(SD_MUTEX(un));
10149 
10150 		/* Initialize sd_ssc_t for internal uscsi commands */
10151 		ssc = sd_ssc_init(un);
10152 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10153 			err = EIO;
10154 		} else {
10155 			err = 0;
10156 		}
10157 		sd_ssc_fini(ssc);
10158 
10159 		mutex_enter(SD_MUTEX(un));
10160 		un->un_ncmds_in_driver--;
10161 		ASSERT(un->un_ncmds_in_driver >= 0);
10162 		mutex_exit(SD_MUTEX(un));
10163 		if (err != 0)
10164 			return (err);
10165 	}
10166 
10167 	/*
10168 	 * Read requests are restricted to multiples of the system block size.
10169 	 */
10170 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
10171 	    !un->un_f_enable_rmw)
10172 		secmask = un->un_tgt_blocksize - 1;
10173 	else
10174 		secmask = DEV_BSIZE - 1;
10175 
10176 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10177 		SD_ERROR(SD_LOG_READ_WRITE, un,
10178 		    "sdread: file offset not modulo %d\n",
10179 		    secmask + 1);
10180 		err = EINVAL;
10181 	} else if (uio->uio_iov->iov_len & (secmask)) {
10182 		SD_ERROR(SD_LOG_READ_WRITE, un,
10183 		    "sdread: transfer length not modulo %d\n",
10184 		    secmask + 1);
10185 		err = EINVAL;
10186 	} else {
10187 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
10188 	}
10189 
10190 	return (err);
10191 }
10192 
10193 
10194 /*
10195  *    Function: sdwrite
10196  *
10197  * Description: Driver's write(9e) entry point function.
10198  *
10199  *   Arguments: dev   - device number
10200  *		uio   - structure pointer describing where data is stored in
10201  *			user's space
10202  *		cred_p  - user credential pointer
10203  *
10204  * Return Code: ENXIO
10205  *		EIO
10206  *		EINVAL
10207  *		value returned by physio
10208  *
10209  *     Context: Kernel thread context.
10210  */
10211 /* ARGSUSED */
10212 static int
sdwrite(dev_t dev,struct uio * uio,cred_t * cred_p)10213 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
10214 {
10215 	struct sd_lun	*un = NULL;
10216 	int		secmask;
10217 	int		err = 0;
10218 	sd_ssc_t	*ssc;
10219 
10220 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10221 		return (ENXIO);
10222 	}
10223 
10224 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10225 
10226 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10227 		mutex_enter(SD_MUTEX(un));
10228 		/*
10229 		 * Because the call to sd_ready_and_valid will issue I/O we
10230 		 * must wait here if either the device is suspended or
10231 		 * if it's power level is changing.
10232 		 */
10233 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10234 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10235 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10236 		}
10237 		un->un_ncmds_in_driver++;
10238 		mutex_exit(SD_MUTEX(un));
10239 
10240 		/* Initialize sd_ssc_t for internal uscsi commands */
10241 		ssc = sd_ssc_init(un);
10242 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10243 			err = EIO;
10244 		} else {
10245 			err = 0;
10246 		}
10247 		sd_ssc_fini(ssc);
10248 
10249 		mutex_enter(SD_MUTEX(un));
10250 		un->un_ncmds_in_driver--;
10251 		ASSERT(un->un_ncmds_in_driver >= 0);
10252 		mutex_exit(SD_MUTEX(un));
10253 		if (err != 0)
10254 			return (err);
10255 	}
10256 
10257 	/*
10258 	 * Write requests are restricted to multiples of the system block size.
10259 	 */
10260 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
10261 	    !un->un_f_enable_rmw)
10262 		secmask = un->un_tgt_blocksize - 1;
10263 	else
10264 		secmask = DEV_BSIZE - 1;
10265 
10266 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10267 		SD_ERROR(SD_LOG_READ_WRITE, un,
10268 		    "sdwrite: file offset not modulo %d\n",
10269 		    secmask + 1);
10270 		err = EINVAL;
10271 	} else if (uio->uio_iov->iov_len & (secmask)) {
10272 		SD_ERROR(SD_LOG_READ_WRITE, un,
10273 		    "sdwrite: transfer length not modulo %d\n",
10274 		    secmask + 1);
10275 		err = EINVAL;
10276 	} else {
10277 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
10278 	}
10279 
10280 	return (err);
10281 }
10282 
10283 
10284 /*
10285  *    Function: sdaread
10286  *
10287  * Description: Driver's aread(9e) entry point function.
10288  *
10289  *   Arguments: dev   - device number
10290  *		aio   - structure pointer describing where data is to be stored
10291  *		cred_p  - user credential pointer
10292  *
10293  * Return Code: ENXIO
10294  *		EIO
10295  *		EINVAL
10296  *		value returned by aphysio
10297  *
10298  *     Context: Kernel thread context.
10299  */
10300 /* ARGSUSED */
10301 static int
sdaread(dev_t dev,struct aio_req * aio,cred_t * cred_p)10302 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
10303 {
10304 	struct sd_lun	*un = NULL;
10305 	struct uio	*uio = aio->aio_uio;
10306 	int		secmask;
10307 	int		err = 0;
10308 	sd_ssc_t	*ssc;
10309 
10310 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10311 		return (ENXIO);
10312 	}
10313 
10314 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10315 
10316 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10317 		mutex_enter(SD_MUTEX(un));
10318 		/*
10319 		 * Because the call to sd_ready_and_valid will issue I/O we
10320 		 * must wait here if either the device is suspended or
10321 		 * if it's power level is changing.
10322 		 */
10323 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10324 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10325 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10326 		}
10327 		un->un_ncmds_in_driver++;
10328 		mutex_exit(SD_MUTEX(un));
10329 
10330 		/* Initialize sd_ssc_t for internal uscsi commands */
10331 		ssc = sd_ssc_init(un);
10332 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10333 			err = EIO;
10334 		} else {
10335 			err = 0;
10336 		}
10337 		sd_ssc_fini(ssc);
10338 
10339 		mutex_enter(SD_MUTEX(un));
10340 		un->un_ncmds_in_driver--;
10341 		ASSERT(un->un_ncmds_in_driver >= 0);
10342 		mutex_exit(SD_MUTEX(un));
10343 		if (err != 0)
10344 			return (err);
10345 	}
10346 
10347 	/*
10348 	 * Read requests are restricted to multiples of the system block size.
10349 	 */
10350 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
10351 	    !un->un_f_enable_rmw)
10352 		secmask = un->un_tgt_blocksize - 1;
10353 	else
10354 		secmask = DEV_BSIZE - 1;
10355 
10356 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10357 		SD_ERROR(SD_LOG_READ_WRITE, un,
10358 		    "sdaread: file offset not modulo %d\n",
10359 		    secmask + 1);
10360 		err = EINVAL;
10361 	} else if (uio->uio_iov->iov_len & (secmask)) {
10362 		SD_ERROR(SD_LOG_READ_WRITE, un,
10363 		    "sdaread: transfer length not modulo %d\n",
10364 		    secmask + 1);
10365 		err = EINVAL;
10366 	} else {
10367 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
10368 	}
10369 
10370 	return (err);
10371 }
10372 
10373 
10374 /*
10375  *    Function: sdawrite
10376  *
10377  * Description: Driver's awrite(9e) entry point function.
10378  *
10379  *   Arguments: dev   - device number
10380  *		aio   - structure pointer describing where data is stored
10381  *		cred_p  - user credential pointer
10382  *
10383  * Return Code: ENXIO
10384  *		EIO
10385  *		EINVAL
10386  *		value returned by aphysio
10387  *
10388  *     Context: Kernel thread context.
10389  */
10390 /* ARGSUSED */
10391 static int
sdawrite(dev_t dev,struct aio_req * aio,cred_t * cred_p)10392 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
10393 {
10394 	struct sd_lun	*un = NULL;
10395 	struct uio	*uio = aio->aio_uio;
10396 	int		secmask;
10397 	int		err = 0;
10398 	sd_ssc_t	*ssc;
10399 
10400 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10401 		return (ENXIO);
10402 	}
10403 
10404 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10405 
10406 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10407 		mutex_enter(SD_MUTEX(un));
10408 		/*
10409 		 * Because the call to sd_ready_and_valid will issue I/O we
10410 		 * must wait here if either the device is suspended or
10411 		 * if it's power level is changing.
10412 		 */
10413 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10414 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10415 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10416 		}
10417 		un->un_ncmds_in_driver++;
10418 		mutex_exit(SD_MUTEX(un));
10419 
10420 		/* Initialize sd_ssc_t for internal uscsi commands */
10421 		ssc = sd_ssc_init(un);
10422 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10423 			err = EIO;
10424 		} else {
10425 			err = 0;
10426 		}
10427 		sd_ssc_fini(ssc);
10428 
10429 		mutex_enter(SD_MUTEX(un));
10430 		un->un_ncmds_in_driver--;
10431 		ASSERT(un->un_ncmds_in_driver >= 0);
10432 		mutex_exit(SD_MUTEX(un));
10433 		if (err != 0)
10434 			return (err);
10435 	}
10436 
10437 	/*
10438 	 * Write requests are restricted to multiples of the system block size.
10439 	 */
10440 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
10441 	    !un->un_f_enable_rmw)
10442 		secmask = un->un_tgt_blocksize - 1;
10443 	else
10444 		secmask = DEV_BSIZE - 1;
10445 
10446 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10447 		SD_ERROR(SD_LOG_READ_WRITE, un,
10448 		    "sdawrite: file offset not modulo %d\n",
10449 		    secmask + 1);
10450 		err = EINVAL;
10451 	} else if (uio->uio_iov->iov_len & (secmask)) {
10452 		SD_ERROR(SD_LOG_READ_WRITE, un,
10453 		    "sdawrite: transfer length not modulo %d\n",
10454 		    secmask + 1);
10455 		err = EINVAL;
10456 	} else {
10457 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
10458 	}
10459 
10460 	return (err);
10461 }
10462 
10463 
10464 
10465 
10466 
10467 /*
10468  * Driver IO processing follows the following sequence:
10469  *
10470  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
10471  *         |                |                     ^
10472  *         v                v                     |
10473  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
10474  *         |                |                     |                   |
10475  *         v                |                     |                   |
10476  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
10477  *         |                |                     ^                   ^
10478  *         v                v                     |                   |
10479  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
10480  *         |                |                     |                   |
10481  *     +---+                |                     +------------+      +-------+
10482  *     |                    |                                  |              |
10483  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10484  *     |                    v                                  |              |
10485  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
10486  *     |                    |                                  ^              |
10487  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10488  *     |                    v                                  |              |
10489  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
10490  *     |                    |                                  ^              |
10491  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10492  *     |                    v                                  |              |
10493  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
10494  *     |                    |                                  ^              |
10495  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
10496  *     |                    v                                  |              |
10497  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
10498  *     |                    |                                  ^              |
10499  *     |                    |                                  |              |
10500  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
10501  *                          |                           ^
10502  *                          v                           |
10503  *                   sd_core_iostart()                  |
10504  *                          |                           |
10505  *                          |                           +------>(*destroypkt)()
10506  *                          +-> sd_start_cmds() <-+     |           |
10507  *                          |                     |     |           v
10508  *                          |                     |     |  scsi_destroy_pkt(9F)
10509  *                          |                     |     |
10510  *                          +->(*initpkt)()       +- sdintr()
10511  *                          |  |                        |  |
10512  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
10513  *                          |  +-> scsi_setup_cdb(9F)   |
10514  *                          |                           |
10515  *                          +--> scsi_transport(9F)     |
10516  *                                     |                |
10517  *                                     +----> SCSA ---->+
10518  *
10519  *
10520  * This code is based upon the following presumptions:
10521  *
10522  *   - iostart and iodone functions operate on buf(9S) structures. These
10523  *     functions perform the necessary operations on the buf(9S) and pass
10524  *     them along to the next function in the chain by using the macros
10525  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
10526  *     (for iodone side functions).
10527  *
10528  *   - The iostart side functions may sleep. The iodone side functions
10529  *     are called under interrupt context and may NOT sleep. Therefore
10530  *     iodone side functions also may not call iostart side functions.
10531  *     (NOTE: iostart side functions should NOT sleep for memory, as
10532  *     this could result in deadlock.)
10533  *
10534  *   - An iostart side function may call its corresponding iodone side
10535  *     function directly (if necessary).
10536  *
10537  *   - In the event of an error, an iostart side function can return a buf(9S)
10538  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
10539  *     b_error in the usual way of course).
10540  *
10541  *   - The taskq mechanism may be used by the iodone side functions to dispatch
10542  *     requests to the iostart side functions.  The iostart side functions in
10543  *     this case would be called under the context of a taskq thread, so it's
10544  *     OK for them to block/sleep/spin in this case.
10545  *
10546  *   - iostart side functions may allocate "shadow" buf(9S) structs and
10547  *     pass them along to the next function in the chain.  The corresponding
10548  *     iodone side functions must coalesce the "shadow" bufs and return
10549  *     the "original" buf to the next higher layer.
10550  *
10551  *   - The b_private field of the buf(9S) struct holds a pointer to
10552  *     an sd_xbuf struct, which contains information needed to
10553  *     construct the scsi_pkt for the command.
10554  *
10555  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
10556  *     layer must acquire & release the SD_MUTEX(un) as needed.
10557  */
10558 
10559 
10560 /*
10561  * Create taskq for all targets in the system. This is created at
10562  * _init(9E) and destroyed at _fini(9E).
10563  *
10564  * Note: here we set the minalloc to a reasonably high number to ensure that
10565  * we will have an adequate supply of task entries available at interrupt time.
10566  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
10567  * sd_create_taskq().  Since we do not want to sleep for allocations at
10568  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
10569  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
10570  * requests any one instant in time.
10571  */
10572 #define	SD_TASKQ_NUMTHREADS	8
10573 #define	SD_TASKQ_MINALLOC	256
10574 #define	SD_TASKQ_MAXALLOC	256
10575 
10576 static taskq_t	*sd_tq = NULL;
10577 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
10578 
10579 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
10580 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
10581 
10582 /*
10583  * The following task queue is being created for the write part of
10584  * read-modify-write of non-512 block size devices.
10585  * Limit the number of threads to 1 for now. This number has been chosen
10586  * considering the fact that it applies only to dvd ram drives/MO drives
10587  * currently. Performance for which is not main criteria at this stage.
10588  * Note: It needs to be explored if we can use a single taskq in future
10589  */
10590 #define	SD_WMR_TASKQ_NUMTHREADS	1
10591 static taskq_t	*sd_wmr_tq = NULL;
10592 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
10593 
10594 /*
10595  *    Function: sd_taskq_create
10596  *
10597  * Description: Create taskq thread(s) and preallocate task entries
10598  *
10599  * Return Code: Returns a pointer to the allocated taskq_t.
10600  *
10601  *     Context: Can sleep. Requires blockable context.
10602  *
10603  *       Notes: - The taskq() facility currently is NOT part of the DDI.
10604  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
10605  *		- taskq_create() will block for memory, also it will panic
10606  *		  if it cannot create the requested number of threads.
10607  *		- Currently taskq_create() creates threads that cannot be
10608  *		  swapped.
10609  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
10610  *		  supply of taskq entries at interrupt time (ie, so that we
10611  *		  do not have to sleep for memory)
10612  */
10613 
10614 static void
sd_taskq_create(void)10615 sd_taskq_create(void)
10616 {
10617 	char	taskq_name[TASKQ_NAMELEN];
10618 
10619 	ASSERT(sd_tq == NULL);
10620 	ASSERT(sd_wmr_tq == NULL);
10621 
10622 	(void) snprintf(taskq_name, sizeof (taskq_name),
10623 	    "%s_drv_taskq", sd_label);
10624 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
10625 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
10626 	    TASKQ_PREPOPULATE));
10627 
10628 	(void) snprintf(taskq_name, sizeof (taskq_name),
10629 	    "%s_rmw_taskq", sd_label);
10630 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
10631 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
10632 	    TASKQ_PREPOPULATE));
10633 }
10634 
10635 
10636 /*
10637  *    Function: sd_taskq_delete
10638  *
10639  * Description: Complementary cleanup routine for sd_taskq_create().
10640  *
10641  *     Context: Kernel thread context.
10642  */
10643 
10644 static void
sd_taskq_delete(void)10645 sd_taskq_delete(void)
10646 {
10647 	ASSERT(sd_tq != NULL);
10648 	ASSERT(sd_wmr_tq != NULL);
10649 	taskq_destroy(sd_tq);
10650 	taskq_destroy(sd_wmr_tq);
10651 	sd_tq = NULL;
10652 	sd_wmr_tq = NULL;
10653 }
10654 
10655 
10656 /*
10657  *    Function: sdstrategy
10658  *
10659  * Description: Driver's strategy (9E) entry point function.
10660  *
10661  *   Arguments: bp - pointer to buf(9S)
10662  *
10663  * Return Code: Always returns zero
10664  *
10665  *     Context: Kernel thread context.
10666  */
10667 
10668 static int
sdstrategy(struct buf * bp)10669 sdstrategy(struct buf *bp)
10670 {
10671 	struct sd_lun *un;
10672 
10673 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
10674 	if (un == NULL) {
10675 		bioerror(bp, EIO);
10676 		bp->b_resid = bp->b_bcount;
10677 		biodone(bp);
10678 		return (0);
10679 	}
10680 
10681 	/* As was done in the past, fail new cmds. if state is dumping. */
10682 	if (un->un_state == SD_STATE_DUMPING) {
10683 		bioerror(bp, ENXIO);
10684 		bp->b_resid = bp->b_bcount;
10685 		biodone(bp);
10686 		return (0);
10687 	}
10688 
10689 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10690 
10691 	/*
10692 	 * Commands may sneak in while we released the mutex in
10693 	 * DDI_SUSPEND, we should block new commands. However, old
10694 	 * commands that are still in the driver at this point should
10695 	 * still be allowed to drain.
10696 	 */
10697 	mutex_enter(SD_MUTEX(un));
10698 	/*
10699 	 * Must wait here if either the device is suspended or
10700 	 * if it's power level is changing.
10701 	 */
10702 	while ((un->un_state == SD_STATE_SUSPENDED) ||
10703 	    (un->un_state == SD_STATE_PM_CHANGING)) {
10704 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10705 	}
10706 
10707 	un->un_ncmds_in_driver++;
10708 
10709 	/*
10710 	 * atapi: Since we are running the CD for now in PIO mode we need to
10711 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10712 	 * the HBA's init_pkt routine.
10713 	 */
10714 	if (un->un_f_cfg_is_atapi == TRUE) {
10715 		mutex_exit(SD_MUTEX(un));
10716 		bp_mapin(bp);
10717 		mutex_enter(SD_MUTEX(un));
10718 	}
10719 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
10720 	    un->un_ncmds_in_driver);
10721 
10722 	if (bp->b_flags & B_WRITE)
10723 		un->un_f_sync_cache_required = TRUE;
10724 
10725 	mutex_exit(SD_MUTEX(un));
10726 
10727 	/*
10728 	 * This will (eventually) allocate the sd_xbuf area and
10729 	 * call sd_xbuf_strategy().  We just want to return the
10730 	 * result of ddi_xbuf_qstrategy so that we have an opt-
10731 	 * imized tail call which saves us a stack frame.
10732 	 */
10733 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
10734 }
10735 
10736 
10737 /*
10738  *    Function: sd_xbuf_strategy
10739  *
10740  * Description: Function for initiating IO operations via the
10741  *		ddi_xbuf_qstrategy() mechanism.
10742  *
10743  *     Context: Kernel thread context.
10744  */
10745 
10746 static void
sd_xbuf_strategy(struct buf * bp,ddi_xbuf_t xp,void * arg)10747 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
10748 {
10749 	struct sd_lun *un = arg;
10750 
10751 	ASSERT(bp != NULL);
10752 	ASSERT(xp != NULL);
10753 	ASSERT(un != NULL);
10754 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10755 
10756 	/*
10757 	 * Initialize the fields in the xbuf and save a pointer to the
10758 	 * xbuf in bp->b_private.
10759 	 */
10760 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
10761 
10762 	/* Send the buf down the iostart chain */
10763 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
10764 }
10765 
10766 
10767 /*
10768  *    Function: sd_xbuf_init
10769  *
10770  * Description: Prepare the given sd_xbuf struct for use.
10771  *
10772  *   Arguments: un - ptr to softstate
10773  *		bp - ptr to associated buf(9S)
10774  *		xp - ptr to associated sd_xbuf
10775  *		chain_type - IO chain type to use:
10776  *			SD_CHAIN_NULL
10777  *			SD_CHAIN_BUFIO
10778  *			SD_CHAIN_USCSI
10779  *			SD_CHAIN_DIRECT
10780  *			SD_CHAIN_DIRECT_PRIORITY
10781  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
10782  *			initialization; may be NULL if none.
10783  *
10784  *     Context: Kernel thread context
10785  */
10786 
10787 static void
sd_xbuf_init(struct sd_lun * un,struct buf * bp,struct sd_xbuf * xp,uchar_t chain_type,void * pktinfop)10788 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
10789     uchar_t chain_type, void *pktinfop)
10790 {
10791 	int index;
10792 
10793 	ASSERT(un != NULL);
10794 	ASSERT(bp != NULL);
10795 	ASSERT(xp != NULL);
10796 
10797 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
10798 	    bp, chain_type);
10799 
10800 	xp->xb_un	= un;
10801 	xp->xb_pktp	= NULL;
10802 	xp->xb_pktinfo	= pktinfop;
10803 	xp->xb_private	= bp->b_private;
10804 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
10805 
10806 	/*
10807 	 * Set up the iostart and iodone chain indexes in the xbuf, based
10808 	 * upon the specified chain type to use.
10809 	 */
10810 	switch (chain_type) {
10811 	case SD_CHAIN_NULL:
10812 		/*
10813 		 * Fall thru to just use the values for the buf type, even
10814 		 * tho for the NULL chain these values will never be used.
10815 		 */
10816 		/* FALLTHRU */
10817 	case SD_CHAIN_BUFIO:
10818 		index = un->un_buf_chain_type;
10819 		if ((!un->un_f_has_removable_media) &&
10820 		    (un->un_tgt_blocksize != 0) &&
10821 		    (un->un_tgt_blocksize != DEV_BSIZE ||
10822 		    un->un_f_enable_rmw)) {
10823 			int secmask = 0, blknomask = 0;
10824 			if (un->un_f_enable_rmw) {
10825 				blknomask =
10826 				    (un->un_phy_blocksize / DEV_BSIZE) - 1;
10827 				secmask = un->un_phy_blocksize - 1;
10828 			} else {
10829 				blknomask =
10830 				    (un->un_tgt_blocksize / DEV_BSIZE) - 1;
10831 				secmask = un->un_tgt_blocksize - 1;
10832 			}
10833 
10834 			if ((bp->b_lblkno & (blknomask)) ||
10835 			    (bp->b_bcount & (secmask))) {
10836 				if ((un->un_f_rmw_type !=
10837 				    SD_RMW_TYPE_RETURN_ERROR) ||
10838 				    un->un_f_enable_rmw) {
10839 					if (un->un_f_pm_is_enabled == FALSE)
10840 						index =
10841 						    SD_CHAIN_INFO_MSS_DSK_NO_PM;
10842 					else
10843 						index =
10844 						    SD_CHAIN_INFO_MSS_DISK;
10845 				}
10846 			}
10847 		}
10848 		break;
10849 	case SD_CHAIN_USCSI:
10850 		index = un->un_uscsi_chain_type;
10851 		break;
10852 	case SD_CHAIN_DIRECT:
10853 		index = un->un_direct_chain_type;
10854 		break;
10855 	case SD_CHAIN_DIRECT_PRIORITY:
10856 		index = un->un_priority_chain_type;
10857 		break;
10858 	default:
10859 		/* We're really broken if we ever get here... */
10860 		panic("sd_xbuf_init: illegal chain type!");
10861 		/*NOTREACHED*/
10862 	}
10863 
10864 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
10865 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
10866 
10867 	/*
10868 	 * It might be a bit easier to simply bzero the entire xbuf above,
10869 	 * but it turns out that since we init a fair number of members anyway,
10870 	 * we save a fair number cycles by doing explicit assignment of zero.
10871 	 */
10872 	xp->xb_pkt_flags	= 0;
10873 	xp->xb_dma_resid	= 0;
10874 	xp->xb_retry_count	= 0;
10875 	xp->xb_victim_retry_count = 0;
10876 	xp->xb_ua_retry_count	= 0;
10877 	xp->xb_nr_retry_count	= 0;
10878 	xp->xb_sense_bp		= NULL;
10879 	xp->xb_sense_status	= 0;
10880 	xp->xb_sense_state	= 0;
10881 	xp->xb_sense_resid	= 0;
10882 	xp->xb_ena		= 0;
10883 
10884 	bp->b_private	= xp;
10885 	bp->b_flags	&= ~(B_DONE | B_ERROR);
10886 	bp->b_resid	= 0;
10887 	bp->av_forw	= NULL;
10888 	bp->av_back	= NULL;
10889 	bioerror(bp, 0);
10890 
10891 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
10892 }
10893 
10894 
10895 /*
10896  *    Function: sd_uscsi_strategy
10897  *
10898  * Description: Wrapper for calling into the USCSI chain via physio(9F)
10899  *
10900  *   Arguments: bp - buf struct ptr
10901  *
10902  * Return Code: Always returns 0
10903  *
10904  *     Context: Kernel thread context
10905  */
10906 
10907 static int
sd_uscsi_strategy(struct buf * bp)10908 sd_uscsi_strategy(struct buf *bp)
10909 {
10910 	struct sd_lun		*un;
10911 	struct sd_uscsi_info	*uip;
10912 	struct sd_xbuf		*xp;
10913 	uchar_t			chain_type;
10914 	uchar_t			cmd;
10915 
10916 	ASSERT(bp != NULL);
10917 
10918 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
10919 	if (un == NULL) {
10920 		bioerror(bp, EIO);
10921 		bp->b_resid = bp->b_bcount;
10922 		biodone(bp);
10923 		return (0);
10924 	}
10925 
10926 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10927 
10928 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
10929 
10930 	/*
10931 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
10932 	 */
10933 	ASSERT(bp->b_private != NULL);
10934 	uip = (struct sd_uscsi_info *)bp->b_private;
10935 	cmd = ((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_cdb[0];
10936 
10937 	mutex_enter(SD_MUTEX(un));
10938 	/*
10939 	 * atapi: Since we are running the CD for now in PIO mode we need to
10940 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10941 	 * the HBA's init_pkt routine.
10942 	 */
10943 	if (un->un_f_cfg_is_atapi == TRUE) {
10944 		mutex_exit(SD_MUTEX(un));
10945 		bp_mapin(bp);
10946 		mutex_enter(SD_MUTEX(un));
10947 	}
10948 	un->un_ncmds_in_driver++;
10949 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
10950 	    un->un_ncmds_in_driver);
10951 
10952 	if ((bp->b_flags & B_WRITE) && (bp->b_bcount != 0) &&
10953 	    (cmd != SCMD_MODE_SELECT) && (cmd != SCMD_MODE_SELECT_G1))
10954 		un->un_f_sync_cache_required = TRUE;
10955 
10956 	mutex_exit(SD_MUTEX(un));
10957 
10958 	switch (uip->ui_flags) {
10959 	case SD_PATH_DIRECT:
10960 		chain_type = SD_CHAIN_DIRECT;
10961 		break;
10962 	case SD_PATH_DIRECT_PRIORITY:
10963 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
10964 		break;
10965 	default:
10966 		chain_type = SD_CHAIN_USCSI;
10967 		break;
10968 	}
10969 
10970 	/*
10971 	 * We may allocate extra buf for external USCSI commands. If the
10972 	 * application asks for bigger than 20-byte sense data via USCSI,
10973 	 * SCSA layer will allocate 252 bytes sense buf for that command.
10974 	 */
10975 	if (((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_rqlen >
10976 	    SENSE_LENGTH) {
10977 		xp = kmem_zalloc(sizeof (struct sd_xbuf) - SENSE_LENGTH +
10978 		    MAX_SENSE_LENGTH, KM_SLEEP);
10979 	} else {
10980 		xp = kmem_zalloc(sizeof (struct sd_xbuf), KM_SLEEP);
10981 	}
10982 
10983 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
10984 
10985 	/* Use the index obtained within xbuf_init */
10986 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
10987 
10988 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
10989 
10990 	return (0);
10991 }
10992 
10993 /*
10994  *    Function: sd_send_scsi_cmd
10995  *
10996  * Description: Runs a USCSI command for user (when called thru sdioctl),
10997  *		or for the driver
10998  *
10999  *   Arguments: dev - the dev_t for the device
11000  *		incmd - ptr to a valid uscsi_cmd struct
11001  *		flag - bit flag, indicating open settings, 32/64 bit type
11002  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11003  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11004  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11005  *			to use the USCSI "direct" chain and bypass the normal
11006  *			command waitq.
11007  *
11008  * Return Code: 0 -  successful completion of the given command
11009  *		EIO - scsi_uscsi_handle_command() failed
11010  *		ENXIO  - soft state not found for specified dev
11011  *		EINVAL
11012  *		EFAULT - copyin/copyout error
11013  *		return code of scsi_uscsi_handle_command():
11014  *			EIO
11015  *			ENXIO
11016  *			EACCES
11017  *
11018  *     Context: Waits for command to complete. Can sleep.
11019  */
11020 
11021 static int
sd_send_scsi_cmd(dev_t dev,struct uscsi_cmd * incmd,int flag,enum uio_seg dataspace,int path_flag)11022 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
11023     enum uio_seg dataspace, int path_flag)
11024 {
11025 	struct sd_lun	*un;
11026 	sd_ssc_t	*ssc;
11027 	int		rval;
11028 
11029 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11030 	if (un == NULL) {
11031 		return (ENXIO);
11032 	}
11033 
11034 	/*
11035 	 * Using sd_ssc_send to handle uscsi cmd
11036 	 */
11037 	ssc = sd_ssc_init(un);
11038 	rval = sd_ssc_send(ssc, incmd, flag, dataspace, path_flag);
11039 	sd_ssc_fini(ssc);
11040 
11041 	return (rval);
11042 }
11043 
11044 /*
11045  *    Function: sd_ssc_init
11046  *
11047  * Description: Uscsi end-user call this function to initialize necessary
11048  *              fields, such as uscsi_cmd and sd_uscsi_info struct.
11049  *
11050  *              The return value of sd_send_scsi_cmd will be treated as a
11051  *              fault in various conditions. Even it is not Zero, some
11052  *              callers may ignore the return value. That is to say, we can
11053  *              not make an accurate assessment in sdintr, since if a
11054  *              command is failed in sdintr it does not mean the caller of
11055  *              sd_send_scsi_cmd will treat it as a real failure.
11056  *
11057  *              To avoid printing too many error logs for a failed uscsi
11058  *              packet that the caller may not treat it as a failure, the
11059  *              sd will keep silent for handling all uscsi commands.
11060  *
11061  *              During detach->attach and attach-open, for some types of
11062  *              problems, the driver should be providing information about
11063  *              the problem encountered. Device use USCSI_SILENT, which
11064  *              suppresses all driver information. The result is that no
11065  *              information about the problem is available. Being
11066  *              completely silent during this time is inappropriate. The
11067  *              driver needs a more selective filter than USCSI_SILENT, so
11068  *              that information related to faults is provided.
11069  *
11070  *              To make the accurate accessment, the caller  of
11071  *              sd_send_scsi_USCSI_CMD should take the ownership and
11072  *              get necessary information to print error messages.
11073  *
11074  *              If we want to print necessary info of uscsi command, we need to
11075  *              keep the uscsi_cmd and sd_uscsi_info till we can make the
11076  *              assessment. We use sd_ssc_init to alloc necessary
11077  *              structs for sending an uscsi command and we are also
11078  *              responsible for free the memory by calling
11079  *              sd_ssc_fini.
11080  *
11081  *              The calling secquences will look like:
11082  *              sd_ssc_init->
11083  *
11084  *                  ...
11085  *
11086  *                  sd_send_scsi_USCSI_CMD->
11087  *                      sd_ssc_send-> - - - sdintr
11088  *                  ...
11089  *
11090  *                  if we think the return value should be treated as a
11091  *                  failure, we make the accessment here and print out
11092  *                  necessary by retrieving uscsi_cmd and sd_uscsi_info'
11093  *
11094  *                  ...
11095  *
11096  *              sd_ssc_fini
11097  *
11098  *
11099  *   Arguments: un - pointer to driver soft state (unit) structure for this
11100  *                   target.
11101  *
11102  * Return code: sd_ssc_t - pointer to allocated sd_ssc_t struct, it contains
11103  *                         uscsi_cmd and sd_uscsi_info.
11104  *                  NULL - if can not alloc memory for sd_ssc_t struct
11105  *
11106  *     Context: Kernel Thread.
11107  */
11108 static sd_ssc_t *
sd_ssc_init(struct sd_lun * un)11109 sd_ssc_init(struct sd_lun *un)
11110 {
11111 	sd_ssc_t		*ssc;
11112 	struct uscsi_cmd	*ucmdp;
11113 	struct sd_uscsi_info	*uip;
11114 
11115 	ASSERT(un != NULL);
11116 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11117 
11118 	/*
11119 	 * Allocate sd_ssc_t structure
11120 	 */
11121 	ssc = kmem_zalloc(sizeof (sd_ssc_t), KM_SLEEP);
11122 
11123 	/*
11124 	 * Allocate uscsi_cmd by calling scsi_uscsi_alloc common routine
11125 	 */
11126 	ucmdp = scsi_uscsi_alloc();
11127 
11128 	/*
11129 	 * Allocate sd_uscsi_info structure
11130 	 */
11131 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11132 
11133 	ssc->ssc_uscsi_cmd = ucmdp;
11134 	ssc->ssc_uscsi_info = uip;
11135 	ssc->ssc_un = un;
11136 
11137 	return (ssc);
11138 }
11139 
11140 /*
11141  * Function: sd_ssc_fini
11142  *
11143  * Description: To free sd_ssc_t and it's hanging off
11144  *
11145  * Arguments: ssc - struct pointer of sd_ssc_t.
11146  */
11147 static void
sd_ssc_fini(sd_ssc_t * ssc)11148 sd_ssc_fini(sd_ssc_t *ssc)
11149 {
11150 	scsi_uscsi_free(ssc->ssc_uscsi_cmd);
11151 
11152 	if (ssc->ssc_uscsi_info != NULL) {
11153 		kmem_free(ssc->ssc_uscsi_info, sizeof (struct sd_uscsi_info));
11154 		ssc->ssc_uscsi_info = NULL;
11155 	}
11156 
11157 	kmem_free(ssc, sizeof (sd_ssc_t));
11158 	ssc = NULL;
11159 }
11160 
11161 /*
11162  * Function: sd_ssc_send
11163  *
11164  * Description: Runs a USCSI command for user when called through sdioctl,
11165  *              or for the driver.
11166  *
11167  *   Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11168  *                    sd_uscsi_info in.
11169  *		incmd - ptr to a valid uscsi_cmd struct
11170  *		flag - bit flag, indicating open settings, 32/64 bit type
11171  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11172  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11173  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11174  *			to use the USCSI "direct" chain and bypass the normal
11175  *			command waitq.
11176  *
11177  * Return Code: 0 -  successful completion of the given command
11178  *		EIO - scsi_uscsi_handle_command() failed
11179  *		ENXIO  - soft state not found for specified dev
11180  *		ECANCELED - command cancelled due to low power
11181  *		EINVAL
11182  *		EFAULT - copyin/copyout error
11183  *		return code of scsi_uscsi_handle_command():
11184  *			EIO
11185  *			ENXIO
11186  *			EACCES
11187  *
11188  *     Context: Kernel Thread;
11189  *              Waits for command to complete. Can sleep.
11190  */
11191 static int
sd_ssc_send(sd_ssc_t * ssc,struct uscsi_cmd * incmd,int flag,enum uio_seg dataspace,int path_flag)11192 sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd, int flag,
11193     enum uio_seg dataspace, int path_flag)
11194 {
11195 	struct sd_uscsi_info	*uip;
11196 	struct uscsi_cmd	*uscmd;
11197 	struct sd_lun		*un;
11198 	dev_t			dev;
11199 
11200 	int	format = 0;
11201 	int	rval;
11202 
11203 	ASSERT(ssc != NULL);
11204 	un = ssc->ssc_un;
11205 	ASSERT(un != NULL);
11206 	uscmd = ssc->ssc_uscsi_cmd;
11207 	ASSERT(uscmd != NULL);
11208 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11209 	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
11210 		/*
11211 		 * If enter here, it indicates that the previous uscsi
11212 		 * command has not been processed by sd_ssc_assessment.
11213 		 * This is violating our rules of FMA telemetry processing.
11214 		 * We should print out this message and the last undisposed
11215 		 * uscsi command.
11216 		 */
11217 		if (uscmd->uscsi_cdb != NULL) {
11218 			SD_INFO(SD_LOG_SDTEST, un,
11219 			    "sd_ssc_send is missing the alternative "
11220 			    "sd_ssc_assessment when running command 0x%x.\n",
11221 			    uscmd->uscsi_cdb[0]);
11222 		}
11223 		/*
11224 		 * Set the ssc_flags to SSC_FLAGS_UNKNOWN, which should be
11225 		 * the initial status.
11226 		 */
11227 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11228 	}
11229 
11230 	/*
11231 	 * We need to make sure sd_ssc_send will have sd_ssc_assessment
11232 	 * followed to avoid missing FMA telemetries.
11233 	 */
11234 	ssc->ssc_flags |= SSC_FLAGS_NEED_ASSESSMENT;
11235 
11236 	/*
11237 	 * if USCSI_PMFAILFAST is set and un is in low power, fail the
11238 	 * command immediately.
11239 	 */
11240 	mutex_enter(SD_MUTEX(un));
11241 	mutex_enter(&un->un_pm_mutex);
11242 	if ((uscmd->uscsi_flags & USCSI_PMFAILFAST) &&
11243 	    SD_DEVICE_IS_IN_LOW_POWER(un)) {
11244 		SD_TRACE(SD_LOG_IO, un, "sd_ssc_send:"
11245 		    "un:0x%p is in low power\n", un);
11246 		mutex_exit(&un->un_pm_mutex);
11247 		mutex_exit(SD_MUTEX(un));
11248 		return (ECANCELED);
11249 	}
11250 	mutex_exit(&un->un_pm_mutex);
11251 	mutex_exit(SD_MUTEX(un));
11252 
11253 #ifdef SDDEBUG
11254 	switch (dataspace) {
11255 	case UIO_USERSPACE:
11256 		SD_TRACE(SD_LOG_IO, un,
11257 		    "sd_ssc_send: entry: un:0x%p UIO_USERSPACE\n", un);
11258 		break;
11259 	case UIO_SYSSPACE:
11260 		SD_TRACE(SD_LOG_IO, un,
11261 		    "sd_ssc_send: entry: un:0x%p UIO_SYSSPACE\n", un);
11262 		break;
11263 	default:
11264 		SD_TRACE(SD_LOG_IO, un,
11265 		    "sd_ssc_send: entry: un:0x%p UNEXPECTED SPACE\n", un);
11266 		break;
11267 	}
11268 #endif
11269 
11270 	rval = scsi_uscsi_copyin((intptr_t)incmd, flag,
11271 	    SD_ADDRESS(un), &uscmd);
11272 	if (rval != 0) {
11273 		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
11274 		    "scsi_uscsi_alloc_and_copyin failed\n", un);
11275 		return (rval);
11276 	}
11277 
11278 	if ((uscmd->uscsi_cdb != NULL) &&
11279 	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
11280 		mutex_enter(SD_MUTEX(un));
11281 		un->un_f_format_in_progress = TRUE;
11282 		mutex_exit(SD_MUTEX(un));
11283 		format = 1;
11284 	}
11285 
11286 	/*
11287 	 * Allocate an sd_uscsi_info struct and fill it with the info
11288 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11289 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
11290 	 * since we allocate the buf here in this function, we do not
11291 	 * need to preserve the prior contents of b_private.
11292 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
11293 	 */
11294 	uip = ssc->ssc_uscsi_info;
11295 	uip->ui_flags = path_flag;
11296 	uip->ui_cmdp = uscmd;
11297 
11298 	/*
11299 	 * Commands sent with priority are intended for error recovery
11300 	 * situations, and do not have retries performed.
11301 	 */
11302 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
11303 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
11304 	}
11305 	uscmd->uscsi_flags &= ~USCSI_NOINTR;
11306 
11307 	dev = SD_GET_DEV(un);
11308 	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
11309 	    sd_uscsi_strategy, NULL, uip);
11310 
11311 	/*
11312 	 * mark ssc_flags right after handle_cmd to make sure
11313 	 * the uscsi has been sent
11314 	 */
11315 	ssc->ssc_flags |= SSC_FLAGS_CMD_ISSUED;
11316 
11317 #ifdef SDDEBUG
11318 	SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
11319 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
11320 	    uscmd->uscsi_status, uscmd->uscsi_resid);
11321 	if (uscmd->uscsi_bufaddr != NULL) {
11322 		SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
11323 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
11324 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
11325 		if (dataspace == UIO_SYSSPACE) {
11326 			SD_DUMP_MEMORY(un, SD_LOG_IO,
11327 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
11328 			    uscmd->uscsi_buflen, SD_LOG_HEX);
11329 		}
11330 	}
11331 #endif
11332 
11333 	if (format == 1) {
11334 		mutex_enter(SD_MUTEX(un));
11335 		un->un_f_format_in_progress = FALSE;
11336 		mutex_exit(SD_MUTEX(un));
11337 	}
11338 
11339 	(void) scsi_uscsi_copyout((intptr_t)incmd, uscmd);
11340 
11341 	return (rval);
11342 }
11343 
11344 /*
11345  *     Function: sd_ssc_print
11346  *
11347  * Description: Print information available to the console.
11348  *
11349  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11350  *                    sd_uscsi_info in.
11351  *            sd_severity - log level.
11352  *     Context: Kernel thread or interrupt context.
11353  */
11354 static void
sd_ssc_print(sd_ssc_t * ssc,int sd_severity)11355 sd_ssc_print(sd_ssc_t *ssc, int sd_severity)
11356 {
11357 	struct uscsi_cmd	*ucmdp;
11358 	struct scsi_device	*devp;
11359 	dev_info_t		*devinfo;
11360 	uchar_t			*sensep;
11361 	int			senlen;
11362 	union scsi_cdb		*cdbp;
11363 	uchar_t			com;
11364 	extern struct scsi_key_strings scsi_cmds[];
11365 
11366 	ASSERT(ssc != NULL);
11367 	ASSERT(ssc->ssc_un != NULL);
11368 
11369 	if (SD_FM_LOG(ssc->ssc_un) != SD_FM_LOG_EREPORT)
11370 		return;
11371 	ucmdp = ssc->ssc_uscsi_cmd;
11372 	devp = SD_SCSI_DEVP(ssc->ssc_un);
11373 	devinfo = SD_DEVINFO(ssc->ssc_un);
11374 	ASSERT(ucmdp != NULL);
11375 	ASSERT(devp != NULL);
11376 	ASSERT(devinfo != NULL);
11377 	sensep = (uint8_t *)ucmdp->uscsi_rqbuf;
11378 	senlen = ucmdp->uscsi_rqlen - ucmdp->uscsi_rqresid;
11379 	cdbp = (union scsi_cdb *)ucmdp->uscsi_cdb;
11380 
11381 	/* In certain case (like DOORLOCK), the cdb could be NULL. */
11382 	if (cdbp == NULL)
11383 		return;
11384 	/* We don't print log if no sense data available. */
11385 	if (senlen == 0)
11386 		sensep = NULL;
11387 	com = cdbp->scc_cmd;
11388 	scsi_generic_errmsg(devp, sd_label, sd_severity, 0, 0, com,
11389 	    scsi_cmds, sensep, ssc->ssc_un->un_additional_codes, NULL);
11390 }
11391 
11392 /*
11393  *     Function: sd_ssc_assessment
11394  *
11395  * Description: We use this function to make an assessment at the point
11396  *              where SD driver may encounter a potential error.
11397  *
11398  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11399  *                  sd_uscsi_info in.
11400  *            tp_assess - a hint of strategy for ereport posting.
11401  *            Possible values of tp_assess include:
11402  *                SD_FMT_IGNORE - we don't post any ereport because we're
11403  *                sure that it is ok to ignore the underlying problems.
11404  *                SD_FMT_IGNORE_COMPROMISE - we don't post any ereport for now
11405  *                but it might be not correct to ignore the underlying hardware
11406  *                error.
11407  *                SD_FMT_STATUS_CHECK - we will post an ereport with the
11408  *                payload driver-assessment of value "fail" or
11409  *                "fatal"(depending on what information we have here). This
11410  *                assessment value is usually set when SD driver think there
11411  *                is a potential error occurred(Typically, when return value
11412  *                of the SCSI command is EIO).
11413  *                SD_FMT_STANDARD - we will post an ereport with the payload
11414  *                driver-assessment of value "info". This assessment value is
11415  *                set when the SCSI command returned successfully and with
11416  *                sense data sent back.
11417  *
11418  *     Context: Kernel thread.
11419  */
11420 static void
sd_ssc_assessment(sd_ssc_t * ssc,enum sd_type_assessment tp_assess)11421 sd_ssc_assessment(sd_ssc_t *ssc, enum sd_type_assessment tp_assess)
11422 {
11423 	int senlen = 0;
11424 	struct uscsi_cmd *ucmdp = NULL;
11425 	struct sd_lun *un;
11426 
11427 	ASSERT(ssc != NULL);
11428 	un = ssc->ssc_un;
11429 	ASSERT(un != NULL);
11430 	ucmdp = ssc->ssc_uscsi_cmd;
11431 	ASSERT(ucmdp != NULL);
11432 
11433 	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
11434 		ssc->ssc_flags &= ~SSC_FLAGS_NEED_ASSESSMENT;
11435 	} else {
11436 		/*
11437 		 * If enter here, it indicates that we have a wrong
11438 		 * calling sequence of sd_ssc_send and sd_ssc_assessment,
11439 		 * both of which should be called in a pair in case of
11440 		 * loss of FMA telemetries.
11441 		 */
11442 		if (ucmdp->uscsi_cdb != NULL) {
11443 			SD_INFO(SD_LOG_SDTEST, un,
11444 			    "sd_ssc_assessment is missing the "
11445 			    "alternative sd_ssc_send when running 0x%x, "
11446 			    "or there are superfluous sd_ssc_assessment for "
11447 			    "the same sd_ssc_send.\n",
11448 			    ucmdp->uscsi_cdb[0]);
11449 		}
11450 		/*
11451 		 * Set the ssc_flags to the initial value to avoid passing
11452 		 * down dirty flags to the following sd_ssc_send function.
11453 		 */
11454 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11455 		return;
11456 	}
11457 
11458 	/*
11459 	 * Only handle an issued command which is waiting for assessment.
11460 	 * A command which is not issued will not have
11461 	 * SSC_FLAGS_INVALID_DATA set, so it'ok we just return here.
11462 	 */
11463 	if (!(ssc->ssc_flags & SSC_FLAGS_CMD_ISSUED)) {
11464 		sd_ssc_print(ssc, SCSI_ERR_INFO);
11465 		return;
11466 	} else {
11467 		/*
11468 		 * For an issued command, we should clear this flag in
11469 		 * order to make the sd_ssc_t structure be used off
11470 		 * multiple uscsi commands.
11471 		 */
11472 		ssc->ssc_flags &= ~SSC_FLAGS_CMD_ISSUED;
11473 	}
11474 
11475 	/*
11476 	 * We will not deal with non-retryable(flag USCSI_DIAGNOSE set)
11477 	 * commands here. And we should clear the ssc_flags before return.
11478 	 */
11479 	if (ucmdp->uscsi_flags & USCSI_DIAGNOSE) {
11480 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11481 		return;
11482 	}
11483 
11484 	switch (tp_assess) {
11485 	case SD_FMT_IGNORE:
11486 	case SD_FMT_IGNORE_COMPROMISE:
11487 		break;
11488 	case SD_FMT_STATUS_CHECK:
11489 		/*
11490 		 * For a failed command(including the succeeded command
11491 		 * with invalid data sent back).
11492 		 */
11493 		sd_ssc_post(ssc, SD_FM_DRV_FATAL);
11494 		break;
11495 	case SD_FMT_STANDARD:
11496 		/*
11497 		 * Always for the succeeded commands probably with sense
11498 		 * data sent back.
11499 		 * Limitation:
11500 		 *	We can only handle a succeeded command with sense
11501 		 *	data sent back when auto-request-sense is enabled.
11502 		 */
11503 		senlen = ssc->ssc_uscsi_cmd->uscsi_rqlen -
11504 		    ssc->ssc_uscsi_cmd->uscsi_rqresid;
11505 		if ((ssc->ssc_uscsi_info->ui_pkt_state & STATE_ARQ_DONE) &&
11506 		    (un->un_f_arq_enabled == TRUE) &&
11507 		    senlen > 0 &&
11508 		    ssc->ssc_uscsi_cmd->uscsi_rqbuf != NULL) {
11509 			sd_ssc_post(ssc, SD_FM_DRV_NOTICE);
11510 		}
11511 		break;
11512 	default:
11513 		/*
11514 		 * Should not have other type of assessment.
11515 		 */
11516 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
11517 		    "sd_ssc_assessment got wrong "
11518 		    "sd_type_assessment %d.\n", tp_assess);
11519 		break;
11520 	}
11521 	/*
11522 	 * Clear up the ssc_flags before return.
11523 	 */
11524 	ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11525 }
11526 
11527 /*
11528  *    Function: sd_ssc_post
11529  *
11530  * Description: 1. read the driver property to get fm-scsi-log flag.
11531  *              2. print log if fm_log_capable is non-zero.
11532  *              3. call sd_ssc_ereport_post to post ereport if possible.
11533  *
11534  *    Context: May be called from kernel thread or interrupt context.
11535  */
11536 static void
sd_ssc_post(sd_ssc_t * ssc,enum sd_driver_assessment sd_assess)11537 sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess)
11538 {
11539 	struct sd_lun	*un;
11540 	int		sd_severity;
11541 
11542 	ASSERT(ssc != NULL);
11543 	un = ssc->ssc_un;
11544 	ASSERT(un != NULL);
11545 
11546 	/*
11547 	 * We may enter here from sd_ssc_assessment(for USCSI command) or
11548 	 * by directly called from sdintr context.
11549 	 * We don't handle a non-disk drive(CD-ROM, removable media).
11550 	 * Clear the ssc_flags before return in case we've set
11551 	 * SSC_FLAGS_INVALID_XXX which should be skipped for a non-disk
11552 	 * driver.
11553 	 */
11554 	if (ISCD(un) || un->un_f_has_removable_media) {
11555 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11556 		return;
11557 	}
11558 
11559 	switch (sd_assess) {
11560 		case SD_FM_DRV_FATAL:
11561 			sd_severity = SCSI_ERR_FATAL;
11562 			break;
11563 		case SD_FM_DRV_RECOVERY:
11564 			sd_severity = SCSI_ERR_RECOVERED;
11565 			break;
11566 		case SD_FM_DRV_RETRY:
11567 			sd_severity = SCSI_ERR_RETRYABLE;
11568 			break;
11569 		case SD_FM_DRV_NOTICE:
11570 			sd_severity = SCSI_ERR_INFO;
11571 			break;
11572 		default:
11573 			sd_severity = SCSI_ERR_UNKNOWN;
11574 	}
11575 	/* print log */
11576 	sd_ssc_print(ssc, sd_severity);
11577 
11578 	/* always post ereport */
11579 	sd_ssc_ereport_post(ssc, sd_assess);
11580 }
11581 
11582 /*
11583  *    Function: sd_ssc_set_info
11584  *
11585  * Description: Mark ssc_flags and set ssc_info which would be the
11586  *              payload of uderr ereport. This function will cause
11587  *              sd_ssc_ereport_post to post uderr ereport only.
11588  *              Besides, when ssc_flags == SSC_FLAGS_INVALID_DATA(USCSI),
11589  *              the function will also call SD_ERROR or scsi_log for a
11590  *              CDROM/removable-media/DDI_FM_NOT_CAPABLE device.
11591  *
11592  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11593  *                  sd_uscsi_info in.
11594  *            ssc_flags - indicate the sub-category of a uderr.
11595  *            comp - this argument is meaningful only when
11596  *                   ssc_flags == SSC_FLAGS_INVALID_DATA, and its possible
11597  *                   values include:
11598  *                   > 0, SD_ERROR is used with comp as the driver logging
11599  *                   component;
11600  *                   = 0, scsi-log is used to log error telemetries;
11601  *                   < 0, no log available for this telemetry.
11602  *
11603  *    Context: Kernel thread or interrupt context
11604  */
11605 static void
sd_ssc_set_info(sd_ssc_t * ssc,int ssc_flags,uint_t comp,const char * fmt,...)11606 sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp, const char *fmt, ...)
11607 {
11608 	va_list	ap;
11609 
11610 	ASSERT(ssc != NULL);
11611 	ASSERT(ssc->ssc_un != NULL);
11612 
11613 	ssc->ssc_flags |= ssc_flags;
11614 	va_start(ap, fmt);
11615 	(void) vsnprintf(ssc->ssc_info, sizeof (ssc->ssc_info), fmt, ap);
11616 	va_end(ap);
11617 
11618 	/*
11619 	 * If SSC_FLAGS_INVALID_DATA is set, it should be a uscsi command
11620 	 * with invalid data sent back. For non-uscsi command, the
11621 	 * following code will be bypassed.
11622 	 */
11623 	if (ssc_flags & SSC_FLAGS_INVALID_DATA) {
11624 		if (SD_FM_LOG(ssc->ssc_un) == SD_FM_LOG_NSUP) {
11625 			/*
11626 			 * If the error belong to certain component and we
11627 			 * do not want it to show up on the console, we
11628 			 * will use SD_ERROR, otherwise scsi_log is
11629 			 * preferred.
11630 			 */
11631 			if (comp > 0) {
11632 				SD_ERROR(comp, ssc->ssc_un, ssc->ssc_info);
11633 			} else if (comp == 0) {
11634 				scsi_log(SD_DEVINFO(ssc->ssc_un), sd_label,
11635 				    CE_WARN, ssc->ssc_info);
11636 			}
11637 		}
11638 	}
11639 }
11640 
11641 /*
11642  *    Function: sd_buf_iodone
11643  *
11644  * Description: Frees the sd_xbuf & returns the buf to its originator.
11645  *
11646  *     Context: May be called from interrupt context.
11647  */
11648 /* ARGSUSED */
11649 static void
sd_buf_iodone(int index,struct sd_lun * un,struct buf * bp)11650 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
11651 {
11652 	struct sd_xbuf *xp;
11653 
11654 	ASSERT(un != NULL);
11655 	ASSERT(bp != NULL);
11656 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11657 
11658 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
11659 
11660 	xp = SD_GET_XBUF(bp);
11661 	ASSERT(xp != NULL);
11662 
11663 	/* xbuf is gone after this */
11664 	if (ddi_xbuf_done(bp, un->un_xbuf_attr)) {
11665 		mutex_enter(SD_MUTEX(un));
11666 
11667 		/*
11668 		 * Grab time when the cmd completed.
11669 		 * This is used for determining if the system has been
11670 		 * idle long enough to make it idle to the PM framework.
11671 		 * This is for lowering the overhead, and therefore improving
11672 		 * performance per I/O operation.
11673 		 */
11674 		un->un_pm_idle_time = gethrtime();
11675 
11676 		un->un_ncmds_in_driver--;
11677 		ASSERT(un->un_ncmds_in_driver >= 0);
11678 		SD_INFO(SD_LOG_IO, un,
11679 		    "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
11680 		    un->un_ncmds_in_driver);
11681 
11682 		mutex_exit(SD_MUTEX(un));
11683 	}
11684 
11685 	biodone(bp);				/* bp is gone after this */
11686 
11687 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
11688 }
11689 
11690 
11691 /*
11692  *    Function: sd_uscsi_iodone
11693  *
11694  * Description: Frees the sd_xbuf & returns the buf to its originator.
11695  *
11696  *     Context: May be called from interrupt context.
11697  */
11698 /* ARGSUSED */
11699 static void
sd_uscsi_iodone(int index,struct sd_lun * un,struct buf * bp)11700 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
11701 {
11702 	struct sd_xbuf *xp;
11703 
11704 	ASSERT(un != NULL);
11705 	ASSERT(bp != NULL);
11706 
11707 	xp = SD_GET_XBUF(bp);
11708 	ASSERT(xp != NULL);
11709 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11710 
11711 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
11712 
11713 	bp->b_private = xp->xb_private;
11714 
11715 	mutex_enter(SD_MUTEX(un));
11716 
11717 	/*
11718 	 * Grab time when the cmd completed.
11719 	 * This is used for determining if the system has been
11720 	 * idle long enough to make it idle to the PM framework.
11721 	 * This is for lowering the overhead, and therefore improving
11722 	 * performance per I/O operation.
11723 	 */
11724 	un->un_pm_idle_time = gethrtime();
11725 
11726 	un->un_ncmds_in_driver--;
11727 	ASSERT(un->un_ncmds_in_driver >= 0);
11728 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
11729 	    un->un_ncmds_in_driver);
11730 
11731 	mutex_exit(SD_MUTEX(un));
11732 
11733 	if (((struct uscsi_cmd *)(xp->xb_pktinfo))->uscsi_rqlen >
11734 	    SENSE_LENGTH) {
11735 		kmem_free(xp, sizeof (struct sd_xbuf) - SENSE_LENGTH +
11736 		    MAX_SENSE_LENGTH);
11737 	} else {
11738 		kmem_free(xp, sizeof (struct sd_xbuf));
11739 	}
11740 
11741 	biodone(bp);
11742 
11743 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
11744 }
11745 
11746 
11747 /*
11748  *    Function: sd_mapblockaddr_iostart
11749  *
11750  * Description: Verify request lies within the partition limits for
11751  *		the indicated minor device.  Issue "overrun" buf if
11752  *		request would exceed partition range.  Converts
11753  *		partition-relative block address to absolute.
11754  *
11755  *              Upon exit of this function:
11756  *              1.I/O is aligned
11757  *                 xp->xb_blkno represents the absolute sector address
11758  *              2.I/O is misaligned
11759  *                 xp->xb_blkno represents the absolute logical block address
11760  *                 based on DEV_BSIZE. The logical block address will be
11761  *                 converted to physical sector address in sd_mapblocksize_\
11762  *                 iostart.
11763  *              3.I/O is misaligned but is aligned in "overrun" buf
11764  *                 xp->xb_blkno represents the absolute logical block address
11765  *                 based on DEV_BSIZE. The logical block address will be
11766  *                 converted to physical sector address in sd_mapblocksize_\
11767  *                 iostart. But no RMW will be issued in this case.
11768  *
11769  *     Context: Can sleep
11770  *
11771  *      Issues: This follows what the old code did, in terms of accessing
11772  *		some of the partition info in the unit struct without holding
11773  *		the mutext.  This is a general issue, if the partition info
11774  *		can be altered while IO is in progress... as soon as we send
11775  *		a buf, its partitioning can be invalid before it gets to the
11776  *		device.  Probably the right fix is to move partitioning out
11777  *		of the driver entirely.
11778  */
11779 
11780 static void
sd_mapblockaddr_iostart(int index,struct sd_lun * un,struct buf * bp)11781 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
11782 {
11783 	diskaddr_t	nblocks;	/* #blocks in the given partition */
11784 	daddr_t	blocknum;	/* Block number specified by the buf */
11785 	size_t	requested_nblocks;
11786 	size_t	available_nblocks;
11787 	int	partition;
11788 	diskaddr_t	partition_offset;
11789 	struct sd_xbuf *xp;
11790 	int secmask = 0, blknomask = 0;
11791 	ushort_t is_aligned = TRUE;
11792 
11793 	ASSERT(un != NULL);
11794 	ASSERT(bp != NULL);
11795 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11796 
11797 	SD_TRACE(SD_LOG_IO_PARTITION, un,
11798 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
11799 
11800 	xp = SD_GET_XBUF(bp);
11801 	ASSERT(xp != NULL);
11802 
11803 	/*
11804 	 * If the geometry is not indicated as valid, attempt to access
11805 	 * the unit & verify the geometry/label. This can be the case for
11806 	 * removable-media devices, of if the device was opened in
11807 	 * NDELAY/NONBLOCK mode.
11808 	 */
11809 	partition = SDPART(bp->b_edev);
11810 
11811 	if (!SD_IS_VALID_LABEL(un)) {
11812 		sd_ssc_t *ssc;
11813 		/*
11814 		 * Initialize sd_ssc_t for internal uscsi commands
11815 		 * In case of potential porformance issue, we need
11816 		 * to alloc memory only if there is invalid label
11817 		 */
11818 		ssc = sd_ssc_init(un);
11819 
11820 		if (sd_ready_and_valid(ssc, partition) != SD_READY_VALID) {
11821 			/*
11822 			 * For removable devices it is possible to start an
11823 			 * I/O without a media by opening the device in nodelay
11824 			 * mode. Also for writable CDs there can be many
11825 			 * scenarios where there is no geometry yet but volume
11826 			 * manager is trying to issue a read() just because
11827 			 * it can see TOC on the CD. So do not print a message
11828 			 * for removables.
11829 			 */
11830 			if (!un->un_f_has_removable_media) {
11831 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
11832 				    "i/o to invalid geometry\n");
11833 			}
11834 			bioerror(bp, EIO);
11835 			bp->b_resid = bp->b_bcount;
11836 			SD_BEGIN_IODONE(index, un, bp);
11837 
11838 			sd_ssc_fini(ssc);
11839 			return;
11840 		}
11841 		sd_ssc_fini(ssc);
11842 	}
11843 
11844 	nblocks = 0;
11845 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
11846 	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
11847 
11848 	if (un->un_f_enable_rmw) {
11849 		blknomask = (un->un_phy_blocksize / DEV_BSIZE) - 1;
11850 		secmask = un->un_phy_blocksize - 1;
11851 	} else {
11852 		blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
11853 		secmask = un->un_tgt_blocksize - 1;
11854 	}
11855 
11856 	if ((bp->b_lblkno & (blknomask)) || (bp->b_bcount & (secmask))) {
11857 		is_aligned = FALSE;
11858 	}
11859 
11860 	if (!(NOT_DEVBSIZE(un)) || un->un_f_enable_rmw) {
11861 		/*
11862 		 * If I/O is aligned, no need to involve RMW(Read Modify Write)
11863 		 * Convert the logical block number to target's physical sector
11864 		 * number.
11865 		 */
11866 		if (is_aligned) {
11867 			xp->xb_blkno = SD_SYS2TGTBLOCK(un, xp->xb_blkno);
11868 		} else {
11869 			/*
11870 			 * There is no RMW if we're just reading, so don't
11871 			 * warn or error out because of it.
11872 			 */
11873 			if (bp->b_flags & B_READ) {
11874 				/*EMPTY*/
11875 			} else if (!un->un_f_enable_rmw &&
11876 			    un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR) {
11877 				bp->b_flags |= B_ERROR;
11878 				goto error_exit;
11879 			} else if (un->un_f_rmw_type == SD_RMW_TYPE_DEFAULT) {
11880 				mutex_enter(SD_MUTEX(un));
11881 				if (!un->un_f_enable_rmw &&
11882 				    un->un_rmw_msg_timeid == NULL) {
11883 					scsi_log(SD_DEVINFO(un), sd_label,
11884 					    CE_WARN, "I/O request is not "
11885 					    "aligned with %d disk sector size. "
11886 					    "It is handled through Read Modify "
11887 					    "Write but the performance is "
11888 					    "very low.\n",
11889 					    un->un_tgt_blocksize);
11890 					un->un_rmw_msg_timeid =
11891 					    timeout(sd_rmw_msg_print_handler,
11892 					    un, SD_RMW_MSG_PRINT_TIMEOUT);
11893 				} else {
11894 					un->un_rmw_incre_count ++;
11895 				}
11896 				mutex_exit(SD_MUTEX(un));
11897 			}
11898 
11899 			nblocks = SD_TGT2SYSBLOCK(un, nblocks);
11900 			partition_offset = SD_TGT2SYSBLOCK(un,
11901 			    partition_offset);
11902 		}
11903 	}
11904 
11905 	/*
11906 	 * blocknum is the starting block number of the request. At this
11907 	 * point it is still relative to the start of the minor device.
11908 	 */
11909 	blocknum = xp->xb_blkno;
11910 
11911 	/*
11912 	 * Legacy: If the starting block number is one past the last block
11913 	 * in the partition, do not set B_ERROR in the buf.
11914 	 */
11915 	if (blocknum == nblocks)  {
11916 		goto error_exit;
11917 	}
11918 
11919 	/*
11920 	 * Confirm that the first block of the request lies within the
11921 	 * partition limits. Also the requested number of bytes must be
11922 	 * a multiple of the system block size.
11923 	 */
11924 	if ((blocknum < 0) || (blocknum >= nblocks) ||
11925 	    ((bp->b_bcount & (DEV_BSIZE - 1)) != 0)) {
11926 		bp->b_flags |= B_ERROR;
11927 		goto error_exit;
11928 	}
11929 
11930 	/*
11931 	 * If the requsted # blocks exceeds the available # blocks, that
11932 	 * is an overrun of the partition.
11933 	 */
11934 	if ((!NOT_DEVBSIZE(un)) && is_aligned) {
11935 		requested_nblocks = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
11936 	} else {
11937 		requested_nblocks = SD_BYTES2SYSBLOCKS(bp->b_bcount);
11938 	}
11939 
11940 	available_nblocks = (size_t)(nblocks - blocknum);
11941 	ASSERT(nblocks >= blocknum);
11942 
11943 	if (requested_nblocks > available_nblocks) {
11944 		size_t resid;
11945 
11946 		/*
11947 		 * Allocate an "overrun" buf to allow the request to proceed
11948 		 * for the amount of space available in the partition. The
11949 		 * amount not transferred will be added into the b_resid
11950 		 * when the operation is complete. The overrun buf
11951 		 * replaces the original buf here, and the original buf
11952 		 * is saved inside the overrun buf, for later use.
11953 		 */
11954 		if ((!NOT_DEVBSIZE(un)) && is_aligned) {
11955 			resid = SD_TGTBLOCKS2BYTES(un,
11956 			    (offset_t)(requested_nblocks - available_nblocks));
11957 		} else {
11958 			resid = SD_SYSBLOCKS2BYTES(
11959 			    (offset_t)(requested_nblocks - available_nblocks));
11960 		}
11961 
11962 		size_t count = bp->b_bcount - resid;
11963 		/*
11964 		 * Note: count is an unsigned entity thus it'll NEVER
11965 		 * be less than 0 so ASSERT the original values are
11966 		 * correct.
11967 		 */
11968 		ASSERT(bp->b_bcount >= resid);
11969 
11970 		bp = sd_bioclone_alloc(bp, count, blocknum,
11971 		    (int (*)(struct buf *))(uintptr_t)sd_mapblockaddr_iodone);
11972 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
11973 		ASSERT(xp != NULL);
11974 	}
11975 
11976 	/* At this point there should be no residual for this buf. */
11977 	ASSERT(bp->b_resid == 0);
11978 
11979 	/* Convert the block number to an absolute address. */
11980 	xp->xb_blkno += partition_offset;
11981 
11982 	SD_NEXT_IOSTART(index, un, bp);
11983 
11984 	SD_TRACE(SD_LOG_IO_PARTITION, un,
11985 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
11986 
11987 	return;
11988 
11989 error_exit:
11990 	bp->b_resid = bp->b_bcount;
11991 	SD_BEGIN_IODONE(index, un, bp);
11992 	SD_TRACE(SD_LOG_IO_PARTITION, un,
11993 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
11994 }
11995 
11996 
11997 /*
11998  *    Function: sd_mapblockaddr_iodone
11999  *
12000  * Description: Completion-side processing for partition management.
12001  *
12002  *     Context: May be called under interrupt context
12003  */
12004 
12005 static void
sd_mapblockaddr_iodone(int index,struct sd_lun * un,struct buf * bp)12006 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12007 {
12008 	/* int	partition; */	/* Not used, see below. */
12009 	ASSERT(un != NULL);
12010 	ASSERT(bp != NULL);
12011 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12012 
12013 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12014 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12015 
12016 	if ((uintptr_t)bp->b_iodone == (uintptr_t)sd_mapblockaddr_iodone) {
12017 		/*
12018 		 * We have an "overrun" buf to deal with...
12019 		 */
12020 		struct sd_xbuf	*xp;
12021 		struct buf	*obp;	/* ptr to the original buf */
12022 
12023 		xp = SD_GET_XBUF(bp);
12024 		ASSERT(xp != NULL);
12025 
12026 		/* Retrieve the pointer to the original buf */
12027 		obp = (struct buf *)xp->xb_private;
12028 		ASSERT(obp != NULL);
12029 
12030 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12031 		bioerror(obp, bp->b_error);
12032 
12033 		sd_bioclone_free(bp);
12034 
12035 		/*
12036 		 * Get back the original buf.
12037 		 * Note that since the restoration of xb_blkno below
12038 		 * was removed, the sd_xbuf is not needed.
12039 		 */
12040 		bp = obp;
12041 		/*
12042 		 * xp = SD_GET_XBUF(bp);
12043 		 * ASSERT(xp != NULL);
12044 		 */
12045 	}
12046 
12047 	/*
12048 	 * Convert sd->xb_blkno back to a minor-device relative value.
12049 	 * Note: this has been commented out, as it is not needed in the
12050 	 * current implementation of the driver (ie, since this function
12051 	 * is at the top of the layering chains, so the info will be
12052 	 * discarded) and it is in the "hot" IO path.
12053 	 *
12054 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12055 	 * xp->xb_blkno -= un->un_offset[partition];
12056 	 */
12057 
12058 	SD_NEXT_IODONE(index, un, bp);
12059 
12060 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12061 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12062 }
12063 
12064 
12065 /*
12066  *    Function: sd_mapblocksize_iostart
12067  *
12068  * Description: Convert between system block size (un->un_sys_blocksize)
12069  *		and target block size (un->un_tgt_blocksize).
12070  *
12071  *     Context: Can sleep to allocate resources.
12072  *
12073  * Assumptions: A higher layer has already performed any partition validation,
12074  *		and converted the xp->xb_blkno to an absolute value relative
12075  *		to the start of the device.
12076  *
12077  *		It is also assumed that the higher layer has implemented
12078  *		an "overrun" mechanism for the case where the request would
12079  *		read/write beyond the end of a partition.  In this case we
12080  *		assume (and ASSERT) that bp->b_resid == 0.
12081  *
12082  *		Note: The implementation for this routine assumes the target
12083  *		block size remains constant between allocation and transport.
12084  */
12085 
12086 static void
sd_mapblocksize_iostart(int index,struct sd_lun * un,struct buf * bp)12087 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12088 {
12089 	struct sd_mapblocksize_info	*bsp;
12090 	struct sd_xbuf			*xp;
12091 	offset_t first_byte;
12092 	daddr_t	start_block, end_block;
12093 	daddr_t	request_bytes;
12094 	ushort_t is_aligned = FALSE;
12095 
12096 	ASSERT(un != NULL);
12097 	ASSERT(bp != NULL);
12098 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12099 	ASSERT(bp->b_resid == 0);
12100 
12101 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12102 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12103 
12104 	/*
12105 	 * For a non-writable CD, a write request is an error
12106 	 */
12107 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12108 	    (un->un_f_mmc_writable_media == FALSE)) {
12109 		bioerror(bp, EIO);
12110 		bp->b_resid = bp->b_bcount;
12111 		SD_BEGIN_IODONE(index, un, bp);
12112 		return;
12113 	}
12114 
12115 	/*
12116 	 * We do not need a shadow buf if the device is using
12117 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12118 	 * In this case there is no layer-private data block allocated.
12119 	 */
12120 	if ((un->un_tgt_blocksize == DEV_BSIZE && !un->un_f_enable_rmw) ||
12121 	    (bp->b_bcount == 0)) {
12122 		goto done;
12123 	}
12124 
12125 #if defined(__x86)
12126 	/* We do not support non-block-aligned transfers for ROD devices */
12127 	ASSERT(!ISROD(un));
12128 #endif
12129 
12130 	xp = SD_GET_XBUF(bp);
12131 	ASSERT(xp != NULL);
12132 
12133 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12134 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12135 	    un->un_tgt_blocksize, DEV_BSIZE);
12136 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12137 	    "request start block:0x%x\n", xp->xb_blkno);
12138 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12139 	    "request len:0x%x\n", bp->b_bcount);
12140 
12141 	/*
12142 	 * Allocate the layer-private data area for the mapblocksize layer.
12143 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12144 	 * struct to store the pointer to their layer-private data block, but
12145 	 * each layer also has the responsibility of restoring the prior
12146 	 * contents of xb_private before returning the buf/xbuf to the
12147 	 * higher layer that sent it.
12148 	 *
12149 	 * Here we save the prior contents of xp->xb_private into the
12150 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12151 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12152 	 * the layer-private area and returning the buf/xbuf to the layer
12153 	 * that sent it.
12154 	 *
12155 	 * Note that here we use kmem_zalloc for the allocation as there are
12156 	 * parts of the mapblocksize code that expect certain fields to be
12157 	 * zero unless explicitly set to a required value.
12158 	 */
12159 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12160 	bsp->mbs_oprivate = xp->xb_private;
12161 	xp->xb_private = bsp;
12162 
12163 	/*
12164 	 * This treats the data on the disk (target) as an array of bytes.
12165 	 * first_byte is the byte offset, from the beginning of the device,
12166 	 * to the location of the request. This is converted from a
12167 	 * un->un_sys_blocksize block address to a byte offset, and then back
12168 	 * to a block address based upon a un->un_tgt_blocksize block size.
12169 	 *
12170 	 * xp->xb_blkno should be absolute upon entry into this function,
12171 	 * but, but it is based upon partitions that use the "system"
12172 	 * block size. It must be adjusted to reflect the block size of
12173 	 * the target.
12174 	 *
12175 	 * Note that end_block is actually the block that follows the last
12176 	 * block of the request, but that's what is needed for the computation.
12177 	 */
12178 	first_byte  = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
12179 	if (un->un_f_enable_rmw) {
12180 		start_block = xp->xb_blkno =
12181 		    (first_byte / un->un_phy_blocksize) *
12182 		    (un->un_phy_blocksize / DEV_BSIZE);
12183 		end_block   = ((first_byte + bp->b_bcount +
12184 		    un->un_phy_blocksize - 1) / un->un_phy_blocksize) *
12185 		    (un->un_phy_blocksize / DEV_BSIZE);
12186 	} else {
12187 		start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12188 		end_block   = (first_byte + bp->b_bcount +
12189 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
12190 	}
12191 
12192 	/* request_bytes is rounded up to a multiple of the target block size */
12193 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12194 
12195 	/*
12196 	 * See if the starting address of the request and the request
12197 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12198 	 * then we do not need to allocate a shadow buf to handle the request.
12199 	 */
12200 	if (un->un_f_enable_rmw) {
12201 		if (((first_byte % un->un_phy_blocksize) == 0) &&
12202 		    ((bp->b_bcount % un->un_phy_blocksize) == 0)) {
12203 			is_aligned = TRUE;
12204 		}
12205 	} else {
12206 		if (((first_byte % un->un_tgt_blocksize) == 0) &&
12207 		    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12208 			is_aligned = TRUE;
12209 		}
12210 	}
12211 
12212 	if ((bp->b_flags & B_READ) == 0) {
12213 		/*
12214 		 * Lock the range for a write operation. An aligned request is
12215 		 * considered a simple write; otherwise the request must be a
12216 		 * read-modify-write.
12217 		 */
12218 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12219 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12220 	}
12221 
12222 	/*
12223 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12224 	 * where the READ command is generated for a read-modify-write. (The
12225 	 * write phase is deferred until after the read completes.)
12226 	 */
12227 	if (is_aligned == FALSE) {
12228 
12229 		struct sd_mapblocksize_info	*shadow_bsp;
12230 		struct sd_xbuf	*shadow_xp;
12231 		struct buf	*shadow_bp;
12232 
12233 		/*
12234 		 * Allocate the shadow buf and it associated xbuf. Note that
12235 		 * after this call the xb_blkno value in both the original
12236 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12237 		 * same: absolute relative to the start of the device, and
12238 		 * adjusted for the target block size. The b_blkno in the
12239 		 * shadow buf will also be set to this value. We should never
12240 		 * change b_blkno in the original bp however.
12241 		 *
12242 		 * Note also that the shadow buf will always need to be a
12243 		 * READ command, regardless of whether the incoming command
12244 		 * is a READ or a WRITE.
12245 		 */
12246 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12247 		    xp->xb_blkno,
12248 		    (int (*)(struct buf *))(uintptr_t)sd_mapblocksize_iodone);
12249 
12250 		shadow_xp = SD_GET_XBUF(shadow_bp);
12251 
12252 		/*
12253 		 * Allocate the layer-private data for the shadow buf.
12254 		 * (No need to preserve xb_private in the shadow xbuf.)
12255 		 */
12256 		shadow_xp->xb_private = shadow_bsp =
12257 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12258 
12259 		/*
12260 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12261 		 * to figure out where the start of the user data is (based upon
12262 		 * the system block size) in the data returned by the READ
12263 		 * command (which will be based upon the target blocksize). Note
12264 		 * that this is only really used if the request is unaligned.
12265 		 */
12266 		if (un->un_f_enable_rmw) {
12267 			bsp->mbs_copy_offset = (ssize_t)(first_byte -
12268 			    ((offset_t)xp->xb_blkno * un->un_sys_blocksize));
12269 			ASSERT((bsp->mbs_copy_offset >= 0) &&
12270 			    (bsp->mbs_copy_offset < un->un_phy_blocksize));
12271 		} else {
12272 			bsp->mbs_copy_offset = (ssize_t)(first_byte -
12273 			    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12274 			ASSERT((bsp->mbs_copy_offset >= 0) &&
12275 			    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12276 		}
12277 
12278 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12279 
12280 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12281 
12282 		/* Transfer the wmap (if any) to the shadow buf */
12283 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12284 		bsp->mbs_wmp = NULL;
12285 
12286 		/*
12287 		 * The shadow buf goes on from here in place of the
12288 		 * original buf.
12289 		 */
12290 		shadow_bsp->mbs_orig_bp = bp;
12291 		bp = shadow_bp;
12292 	}
12293 
12294 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12295 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12296 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12297 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12298 	    request_bytes);
12299 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12300 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12301 
12302 done:
12303 	SD_NEXT_IOSTART(index, un, bp);
12304 
12305 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12306 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12307 }
12308 
12309 
12310 /*
12311  *    Function: sd_mapblocksize_iodone
12312  *
12313  * Description: Completion side processing for block-size mapping.
12314  *
12315  *     Context: May be called under interrupt context
12316  */
12317 
12318 static void
sd_mapblocksize_iodone(int index,struct sd_lun * un,struct buf * bp)12319 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12320 {
12321 	struct sd_mapblocksize_info	*bsp;
12322 	struct sd_xbuf	*xp;
12323 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12324 	struct buf	*orig_bp;	/* ptr to the original buf */
12325 	offset_t	shadow_end;
12326 	offset_t	request_end;
12327 	offset_t	shadow_start;
12328 	ssize_t		copy_offset;
12329 	size_t		copy_length;
12330 	size_t		shortfall;
12331 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12332 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12333 
12334 	ASSERT(un != NULL);
12335 	ASSERT(bp != NULL);
12336 
12337 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12338 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12339 
12340 	/*
12341 	 * There is no shadow buf or layer-private data if the target is
12342 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12343 	 */
12344 	if ((un->un_tgt_blocksize == DEV_BSIZE && !un->un_f_enable_rmw) ||
12345 	    (bp->b_bcount == 0)) {
12346 		goto exit;
12347 	}
12348 
12349 	xp = SD_GET_XBUF(bp);
12350 	ASSERT(xp != NULL);
12351 
12352 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12353 	bsp = xp->xb_private;
12354 
12355 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12356 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12357 
12358 	if (is_write) {
12359 		/*
12360 		 * For a WRITE request we must free up the block range that
12361 		 * we have locked up.  This holds regardless of whether this is
12362 		 * an aligned write request or a read-modify-write request.
12363 		 */
12364 		sd_range_unlock(un, bsp->mbs_wmp);
12365 		bsp->mbs_wmp = NULL;
12366 	}
12367 
12368 	if ((uintptr_t)bp->b_iodone != (uintptr_t)sd_mapblocksize_iodone) {
12369 		/*
12370 		 * An aligned read or write command will have no shadow buf;
12371 		 * there is not much else to do with it.
12372 		 */
12373 		goto done;
12374 	}
12375 
12376 	orig_bp = bsp->mbs_orig_bp;
12377 	ASSERT(orig_bp != NULL);
12378 	orig_xp = SD_GET_XBUF(orig_bp);
12379 	ASSERT(orig_xp != NULL);
12380 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12381 
12382 	if (!is_write && has_wmap) {
12383 		/*
12384 		 * A READ with a wmap means this is the READ phase of a
12385 		 * read-modify-write. If an error occurred on the READ then
12386 		 * we do not proceed with the WRITE phase or copy any data.
12387 		 * Just release the write maps and return with an error.
12388 		 */
12389 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12390 			orig_bp->b_resid = orig_bp->b_bcount;
12391 			bioerror(orig_bp, bp->b_error);
12392 			sd_range_unlock(un, bsp->mbs_wmp);
12393 			goto freebuf_done;
12394 		}
12395 	}
12396 
12397 	/*
12398 	 * Here is where we set up to copy the data from the shadow buf
12399 	 * into the space associated with the original buf.
12400 	 *
12401 	 * To deal with the conversion between block sizes, these
12402 	 * computations treat the data as an array of bytes, with the
12403 	 * first byte (byte 0) corresponding to the first byte in the
12404 	 * first block on the disk.
12405 	 */
12406 
12407 	/*
12408 	 * shadow_start and shadow_len indicate the location and size of
12409 	 * the data returned with the shadow IO request.
12410 	 */
12411 	if (un->un_f_enable_rmw) {
12412 		shadow_start  = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
12413 	} else {
12414 		shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12415 	}
12416 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12417 
12418 	/*
12419 	 * copy_offset gives the offset (in bytes) from the start of the first
12420 	 * block of the READ request to the beginning of the data.  We retrieve
12421 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12422 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12423 	 * data to be copied (in bytes).
12424 	 */
12425 	copy_offset  = bsp->mbs_copy_offset;
12426 	if (un->un_f_enable_rmw) {
12427 		ASSERT((copy_offset >= 0) &&
12428 		    (copy_offset < un->un_phy_blocksize));
12429 	} else {
12430 		ASSERT((copy_offset >= 0) &&
12431 		    (copy_offset < un->un_tgt_blocksize));
12432 	}
12433 
12434 	copy_length  = orig_bp->b_bcount;
12435 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12436 
12437 	/*
12438 	 * Set up the resid and error fields of orig_bp as appropriate.
12439 	 */
12440 	if (shadow_end >= request_end) {
12441 		/* We got all the requested data; set resid to zero */
12442 		orig_bp->b_resid = 0;
12443 	} else {
12444 		/*
12445 		 * We failed to get enough data to fully satisfy the original
12446 		 * request. Just copy back whatever data we got and set
12447 		 * up the residual and error code as required.
12448 		 *
12449 		 * 'shortfall' is the amount by which the data received with the
12450 		 * shadow buf has "fallen short" of the requested amount.
12451 		 */
12452 		shortfall = (size_t)(request_end - shadow_end);
12453 
12454 		if (shortfall > orig_bp->b_bcount) {
12455 			/*
12456 			 * We did not get enough data to even partially
12457 			 * fulfill the original request.  The residual is
12458 			 * equal to the amount requested.
12459 			 */
12460 			orig_bp->b_resid = orig_bp->b_bcount;
12461 		} else {
12462 			/*
12463 			 * We did not get all the data that we requested
12464 			 * from the device, but we will try to return what
12465 			 * portion we did get.
12466 			 */
12467 			orig_bp->b_resid = shortfall;
12468 		}
12469 		ASSERT(copy_length >= orig_bp->b_resid);
12470 		copy_length  -= orig_bp->b_resid;
12471 	}
12472 
12473 	/* Propagate the error code from the shadow buf to the original buf */
12474 	bioerror(orig_bp, bp->b_error);
12475 
12476 	if (is_write) {
12477 		goto freebuf_done;	/* No data copying for a WRITE */
12478 	}
12479 
12480 	if (has_wmap) {
12481 		/*
12482 		 * This is a READ command from the READ phase of a
12483 		 * read-modify-write request. We have to copy the data given
12484 		 * by the user OVER the data returned by the READ command,
12485 		 * then convert the command from a READ to a WRITE and send
12486 		 * it back to the target.
12487 		 */
12488 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12489 		    copy_length);
12490 
12491 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12492 
12493 		/*
12494 		 * Dispatch the WRITE command to the taskq thread, which
12495 		 * will in turn send the command to the target. When the
12496 		 * WRITE command completes, we (sd_mapblocksize_iodone())
12497 		 * will get called again as part of the iodone chain
12498 		 * processing for it. Note that we will still be dealing
12499 		 * with the shadow buf at that point.
12500 		 */
12501 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12502 		    KM_NOSLEEP) != TASKQID_INVALID) {
12503 			/*
12504 			 * Dispatch was successful so we are done. Return
12505 			 * without going any higher up the iodone chain. Do
12506 			 * not free up any layer-private data until after the
12507 			 * WRITE completes.
12508 			 */
12509 			return;
12510 		}
12511 
12512 		/*
12513 		 * Dispatch of the WRITE command failed; set up the error
12514 		 * condition and send this IO back up the iodone chain.
12515 		 */
12516 		bioerror(orig_bp, EIO);
12517 		orig_bp->b_resid = orig_bp->b_bcount;
12518 
12519 	} else {
12520 		/*
12521 		 * This is a regular READ request (ie, not a RMW). Copy the
12522 		 * data from the shadow buf into the original buf. The
12523 		 * copy_offset compensates for any "misalignment" between the
12524 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12525 		 * original buf (with its un->un_sys_blocksize blocks).
12526 		 */
12527 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12528 		    copy_length);
12529 	}
12530 
12531 freebuf_done:
12532 
12533 	/*
12534 	 * At this point we still have both the shadow buf AND the original
12535 	 * buf to deal with, as well as the layer-private data area in each.
12536 	 * Local variables are as follows:
12537 	 *
12538 	 * bp -- points to shadow buf
12539 	 * xp -- points to xbuf of shadow buf
12540 	 * bsp -- points to layer-private data area of shadow buf
12541 	 * orig_bp -- points to original buf
12542 	 *
12543 	 * First free the shadow buf and its associated xbuf, then free the
12544 	 * layer-private data area from the shadow buf. There is no need to
12545 	 * restore xb_private in the shadow xbuf.
12546 	 */
12547 	sd_shadow_buf_free(bp);
12548 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12549 
12550 	/*
12551 	 * Now update the local variables to point to the original buf, xbuf,
12552 	 * and layer-private area.
12553 	 */
12554 	bp = orig_bp;
12555 	xp = SD_GET_XBUF(bp);
12556 	ASSERT(xp != NULL);
12557 	ASSERT(xp == orig_xp);
12558 	bsp = xp->xb_private;
12559 	ASSERT(bsp != NULL);
12560 
12561 done:
12562 	/*
12563 	 * Restore xb_private to whatever it was set to by the next higher
12564 	 * layer in the chain, then free the layer-private data area.
12565 	 */
12566 	xp->xb_private = bsp->mbs_oprivate;
12567 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12568 
12569 exit:
12570 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
12571 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
12572 
12573 	SD_NEXT_IODONE(index, un, bp);
12574 }
12575 
12576 
12577 /*
12578  *    Function: sd_checksum_iostart
12579  *
12580  * Description: A stub function for a layer that's currently not used.
12581  *		For now just a placeholder.
12582  *
12583  *     Context: Kernel thread context
12584  */
12585 
12586 static void
sd_checksum_iostart(int index,struct sd_lun * un,struct buf * bp)12587 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
12588 {
12589 	ASSERT(un != NULL);
12590 	ASSERT(bp != NULL);
12591 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12592 	SD_NEXT_IOSTART(index, un, bp);
12593 }
12594 
12595 
12596 /*
12597  *    Function: sd_checksum_iodone
12598  *
12599  * Description: A stub function for a layer that's currently not used.
12600  *		For now just a placeholder.
12601  *
12602  *     Context: May be called under interrupt context
12603  */
12604 
12605 static void
sd_checksum_iodone(int index,struct sd_lun * un,struct buf * bp)12606 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
12607 {
12608 	ASSERT(un != NULL);
12609 	ASSERT(bp != NULL);
12610 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12611 	SD_NEXT_IODONE(index, un, bp);
12612 }
12613 
12614 
12615 /*
12616  *    Function: sd_checksum_uscsi_iostart
12617  *
12618  * Description: A stub function for a layer that's currently not used.
12619  *		For now just a placeholder.
12620  *
12621  *     Context: Kernel thread context
12622  */
12623 
12624 static void
sd_checksum_uscsi_iostart(int index,struct sd_lun * un,struct buf * bp)12625 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
12626 {
12627 	ASSERT(un != NULL);
12628 	ASSERT(bp != NULL);
12629 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12630 	SD_NEXT_IOSTART(index, un, bp);
12631 }
12632 
12633 
12634 /*
12635  *    Function: sd_checksum_uscsi_iodone
12636  *
12637  * Description: A stub function for a layer that's currently not used.
12638  *		For now just a placeholder.
12639  *
12640  *     Context: May be called under interrupt context
12641  */
12642 
12643 static void
sd_checksum_uscsi_iodone(int index,struct sd_lun * un,struct buf * bp)12644 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12645 {
12646 	ASSERT(un != NULL);
12647 	ASSERT(bp != NULL);
12648 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12649 	SD_NEXT_IODONE(index, un, bp);
12650 }
12651 
12652 
12653 /*
12654  *    Function: sd_pm_iostart
12655  *
12656  * Description: iostart-side routine for Power mangement.
12657  *
12658  *     Context: Kernel thread context
12659  */
12660 
12661 static void
sd_pm_iostart(int index,struct sd_lun * un,struct buf * bp)12662 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
12663 {
12664 	ASSERT(un != NULL);
12665 	ASSERT(bp != NULL);
12666 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12667 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12668 
12669 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
12670 
12671 	if (sd_pm_entry(un) != DDI_SUCCESS) {
12672 		/*
12673 		 * Set up to return the failed buf back up the 'iodone'
12674 		 * side of the calling chain.
12675 		 */
12676 		bioerror(bp, EIO);
12677 		bp->b_resid = bp->b_bcount;
12678 
12679 		SD_BEGIN_IODONE(index, un, bp);
12680 
12681 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12682 		return;
12683 	}
12684 
12685 	SD_NEXT_IOSTART(index, un, bp);
12686 
12687 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12688 }
12689 
12690 
12691 /*
12692  *    Function: sd_pm_iodone
12693  *
12694  * Description: iodone-side routine for power mangement.
12695  *
12696  *     Context: may be called from interrupt context
12697  */
12698 
12699 static void
sd_pm_iodone(int index,struct sd_lun * un,struct buf * bp)12700 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
12701 {
12702 	ASSERT(un != NULL);
12703 	ASSERT(bp != NULL);
12704 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12705 
12706 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
12707 
12708 	/*
12709 	 * After attach the following flag is only read, so don't
12710 	 * take the penalty of acquiring a mutex for it.
12711 	 */
12712 	if (un->un_f_pm_is_enabled == TRUE) {
12713 		sd_pm_exit(un);
12714 	}
12715 
12716 	SD_NEXT_IODONE(index, un, bp);
12717 
12718 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
12719 }
12720 
12721 
12722 /*
12723  *    Function: sd_core_iostart
12724  *
12725  * Description: Primary driver function for enqueuing buf(9S) structs from
12726  *		the system and initiating IO to the target device
12727  *
12728  *     Context: Kernel thread context. Can sleep.
12729  *
12730  * Assumptions:  - The given xp->xb_blkno is absolute
12731  *		   (ie, relative to the start of the device).
12732  *		 - The IO is to be done using the native blocksize of
12733  *		   the device, as specified in un->un_tgt_blocksize.
12734  */
12735 /* ARGSUSED */
12736 static void
sd_core_iostart(int index,struct sd_lun * un,struct buf * bp)12737 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
12738 {
12739 	struct sd_xbuf *xp;
12740 
12741 	ASSERT(un != NULL);
12742 	ASSERT(bp != NULL);
12743 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12744 	ASSERT(bp->b_resid == 0);
12745 
12746 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
12747 
12748 	xp = SD_GET_XBUF(bp);
12749 	ASSERT(xp != NULL);
12750 
12751 	mutex_enter(SD_MUTEX(un));
12752 
12753 	/*
12754 	 * If we are currently in the failfast state, fail any new IO
12755 	 * that has B_FAILFAST set, then return.
12756 	 */
12757 	if ((bp->b_flags & B_FAILFAST) &&
12758 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
12759 		mutex_exit(SD_MUTEX(un));
12760 		bioerror(bp, EIO);
12761 		bp->b_resid = bp->b_bcount;
12762 		SD_BEGIN_IODONE(index, un, bp);
12763 		return;
12764 	}
12765 
12766 	if (SD_IS_DIRECT_PRIORITY(xp)) {
12767 		/*
12768 		 * Priority command -- transport it immediately.
12769 		 *
12770 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
12771 		 * because all direct priority commands should be associated
12772 		 * with error recovery actions which we don't want to retry.
12773 		 */
12774 		sd_start_cmds(un, bp);
12775 	} else {
12776 		/*
12777 		 * Normal command -- add it to the wait queue, then start
12778 		 * transporting commands from the wait queue.
12779 		 */
12780 		sd_add_buf_to_waitq(un, bp);
12781 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
12782 		sd_start_cmds(un, NULL);
12783 	}
12784 
12785 	mutex_exit(SD_MUTEX(un));
12786 
12787 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
12788 }
12789 
12790 
12791 /*
12792  *    Function: sd_init_cdb_limits
12793  *
12794  * Description: This is to handle scsi_pkt initialization differences
12795  *		between the driver platforms.
12796  *
12797  *		Legacy behaviors:
12798  *
12799  *		If the block number or the sector count exceeds the
12800  *		capabilities of a Group 0 command, shift over to a
12801  *		Group 1 command. We don't blindly use Group 1
12802  *		commands because a) some drives (CDC Wren IVs) get a
12803  *		bit confused, and b) there is probably a fair amount
12804  *		of speed difference for a target to receive and decode
12805  *		a 10 byte command instead of a 6 byte command.
12806  *
12807  *		The xfer time difference of 6 vs 10 byte CDBs is
12808  *		still significant so this code is still worthwhile.
12809  *		10 byte CDBs are very inefficient with the fas HBA driver
12810  *		and older disks. Each CDB byte took 1 usec with some
12811  *		popular disks.
12812  *
12813  *     Context: Must be called at attach time
12814  */
12815 
12816 static void
sd_init_cdb_limits(struct sd_lun * un)12817 sd_init_cdb_limits(struct sd_lun *un)
12818 {
12819 	int hba_cdb_limit;
12820 
12821 	/*
12822 	 * Use CDB_GROUP1 commands for most devices except for
12823 	 * parallel SCSI fixed drives in which case we get better
12824 	 * performance using CDB_GROUP0 commands (where applicable).
12825 	 */
12826 	un->un_mincdb = SD_CDB_GROUP1;
12827 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
12828 	    !un->un_f_has_removable_media) {
12829 		un->un_mincdb = SD_CDB_GROUP0;
12830 	}
12831 
12832 	/*
12833 	 * Try to read the max-cdb-length supported by HBA.
12834 	 */
12835 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
12836 	if (0 >= un->un_max_hba_cdb) {
12837 		un->un_max_hba_cdb = CDB_GROUP4;
12838 		hba_cdb_limit = SD_CDB_GROUP4;
12839 	} else if (0 < un->un_max_hba_cdb &&
12840 	    un->un_max_hba_cdb < CDB_GROUP1) {
12841 		hba_cdb_limit = SD_CDB_GROUP0;
12842 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
12843 	    un->un_max_hba_cdb < CDB_GROUP5) {
12844 		hba_cdb_limit = SD_CDB_GROUP1;
12845 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
12846 	    un->un_max_hba_cdb < CDB_GROUP4) {
12847 		hba_cdb_limit = SD_CDB_GROUP5;
12848 	} else {
12849 		hba_cdb_limit = SD_CDB_GROUP4;
12850 	}
12851 
12852 	/*
12853 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
12854 	 * commands for fixed disks.
12855 	 */
12856 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
12857 	    min(hba_cdb_limit, SD_CDB_GROUP4);
12858 
12859 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
12860 	    ? sizeof (struct scsi_arq_status) : 1);
12861 	if (!ISCD(un))
12862 		un->un_cmd_timeout = (ushort_t)sd_io_time;
12863 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
12864 }
12865 
12866 
12867 /*
12868  *    Function: sd_initpkt_for_buf
12869  *
12870  * Description: Allocate and initialize for transport a scsi_pkt struct,
12871  *		based upon the info specified in the given buf struct.
12872  *
12873  *		Assumes the xb_blkno in the request is absolute (ie,
12874  *		relative to the start of the device (NOT partition!).
12875  *		Also assumes that the request is using the native block
12876  *		size of the device (as returned by the READ CAPACITY
12877  *		command).
12878  *
12879  * Return Code: SD_PKT_ALLOC_SUCCESS
12880  *		SD_PKT_ALLOC_FAILURE
12881  *		SD_PKT_ALLOC_FAILURE_NO_DMA
12882  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
12883  *
12884  *     Context: Kernel thread and may be called from software interrupt context
12885  *		as part of a sdrunout callback. This function may not block or
12886  *		call routines that block
12887  */
12888 
12889 static int
sd_initpkt_for_buf(struct buf * bp,struct scsi_pkt ** pktpp)12890 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
12891 {
12892 	struct sd_xbuf	*xp;
12893 	struct scsi_pkt *pktp = NULL;
12894 	struct sd_lun	*un;
12895 	size_t		blockcount;
12896 	daddr_t		startblock;
12897 	int		rval;
12898 	int		cmd_flags;
12899 
12900 	ASSERT(bp != NULL);
12901 	ASSERT(pktpp != NULL);
12902 	xp = SD_GET_XBUF(bp);
12903 	ASSERT(xp != NULL);
12904 	un = SD_GET_UN(bp);
12905 	ASSERT(un != NULL);
12906 	ASSERT(mutex_owned(SD_MUTEX(un)));
12907 	ASSERT(bp->b_resid == 0);
12908 
12909 	SD_TRACE(SD_LOG_IO_CORE, un,
12910 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
12911 
12912 	mutex_exit(SD_MUTEX(un));
12913 
12914 #if defined(__x86)	/* DMAFREE for x86 only */
12915 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
12916 		/*
12917 		 * Already have a scsi_pkt -- just need DMA resources.
12918 		 * We must recompute the CDB in case the mapping returns
12919 		 * a nonzero pkt_resid.
12920 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
12921 		 * that is being retried, the unmap/remap of the DMA resouces
12922 		 * will result in the entire transfer starting over again
12923 		 * from the very first block.
12924 		 */
12925 		ASSERT(xp->xb_pktp != NULL);
12926 		pktp = xp->xb_pktp;
12927 	} else {
12928 		pktp = NULL;
12929 	}
12930 #endif /* __x86 */
12931 
12932 	startblock = xp->xb_blkno;	/* Absolute block num. */
12933 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
12934 
12935 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
12936 
12937 	/*
12938 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
12939 	 * call scsi_init_pkt, and build the CDB.
12940 	 */
12941 	rval = sd_setup_rw_pkt(un, &pktp, bp,
12942 	    cmd_flags, sdrunout, (caddr_t)un,
12943 	    startblock, blockcount);
12944 
12945 	if (rval == 0) {
12946 		/*
12947 		 * Success.
12948 		 *
12949 		 * If partial DMA is being used and required for this transfer.
12950 		 * set it up here.
12951 		 */
12952 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
12953 		    (pktp->pkt_resid != 0)) {
12954 
12955 			/*
12956 			 * Save the CDB length and pkt_resid for the
12957 			 * next xfer
12958 			 */
12959 			xp->xb_dma_resid = pktp->pkt_resid;
12960 
12961 			/* rezero resid */
12962 			pktp->pkt_resid = 0;
12963 
12964 		} else {
12965 			xp->xb_dma_resid = 0;
12966 		}
12967 
12968 		pktp->pkt_flags = un->un_tagflags;
12969 		pktp->pkt_time  = un->un_cmd_timeout;
12970 		pktp->pkt_comp  = sdintr;
12971 
12972 		pktp->pkt_private = bp;
12973 		*pktpp = pktp;
12974 
12975 		SD_TRACE(SD_LOG_IO_CORE, un,
12976 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
12977 
12978 #if defined(__x86)	/* DMAFREE for x86 only */
12979 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
12980 #endif
12981 
12982 		mutex_enter(SD_MUTEX(un));
12983 		return (SD_PKT_ALLOC_SUCCESS);
12984 
12985 	}
12986 
12987 	/*
12988 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
12989 	 * from sd_setup_rw_pkt.
12990 	 */
12991 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
12992 
12993 	if (rval == SD_PKT_ALLOC_FAILURE) {
12994 		*pktpp = NULL;
12995 		/*
12996 		 * Set the driver state to RWAIT to indicate the driver
12997 		 * is waiting on resource allocations. The driver will not
12998 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
12999 		 */
13000 		mutex_enter(SD_MUTEX(un));
13001 		New_state(un, SD_STATE_RWAIT);
13002 
13003 		SD_ERROR(SD_LOG_IO_CORE, un,
13004 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13005 
13006 		if ((bp->b_flags & B_ERROR) != 0) {
13007 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13008 		}
13009 		return (SD_PKT_ALLOC_FAILURE);
13010 	} else {
13011 		/*
13012 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13013 		 *
13014 		 * This should never happen.  Maybe someone messed with the
13015 		 * kernel's minphys?
13016 		 */
13017 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13018 		    "Request rejected: too large for CDB: "
13019 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13020 		SD_ERROR(SD_LOG_IO_CORE, un,
13021 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13022 		mutex_enter(SD_MUTEX(un));
13023 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13024 
13025 	}
13026 }
13027 
13028 
13029 /*
13030  *    Function: sd_destroypkt_for_buf
13031  *
13032  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13033  *
13034  *     Context: Kernel thread or interrupt context
13035  */
13036 
13037 static void
sd_destroypkt_for_buf(struct buf * bp)13038 sd_destroypkt_for_buf(struct buf *bp)
13039 {
13040 	ASSERT(bp != NULL);
13041 	ASSERT(SD_GET_UN(bp) != NULL);
13042 
13043 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13044 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13045 
13046 	ASSERT(SD_GET_PKTP(bp) != NULL);
13047 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13048 
13049 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13050 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13051 }
13052 
13053 /*
13054  *    Function: sd_setup_rw_pkt
13055  *
13056  * Description: Determines appropriate CDB group for the requested LBA
13057  *		and transfer length, calls scsi_init_pkt, and builds
13058  *		the CDB.  Do not use for partial DMA transfers except
13059  *		for the initial transfer since the CDB size must
13060  *		remain constant.
13061  *
13062  *     Context: Kernel thread and may be called from software interrupt
13063  *		context as part of a sdrunout callback. This function may not
13064  *		block or call routines that block
13065  */
13066 
13067 
13068 int
sd_setup_rw_pkt(struct sd_lun * un,struct scsi_pkt ** pktpp,struct buf * bp,int flags,int (* callback)(caddr_t),caddr_t callback_arg,diskaddr_t lba,uint32_t blockcount)13069 sd_setup_rw_pkt(struct sd_lun *un,
13070     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13071     int (*callback)(caddr_t), caddr_t callback_arg,
13072     diskaddr_t lba, uint32_t blockcount)
13073 {
13074 	struct scsi_pkt *return_pktp;
13075 	union scsi_cdb *cdbp;
13076 	struct sd_cdbinfo *cp = NULL;
13077 	int i;
13078 
13079 	/*
13080 	 * See which size CDB to use, based upon the request.
13081 	 */
13082 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13083 
13084 		/*
13085 		 * Check lba and block count against sd_cdbtab limits.
13086 		 * In the partial DMA case, we have to use the same size
13087 		 * CDB for all the transfers.  Check lba + blockcount
13088 		 * against the max LBA so we know that segment of the
13089 		 * transfer can use the CDB we select.
13090 		 */
13091 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13092 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13093 
13094 			/*
13095 			 * The command will fit into the CDB type
13096 			 * specified by sd_cdbtab[i].
13097 			 */
13098 			cp = sd_cdbtab + i;
13099 
13100 			/*
13101 			 * Call scsi_init_pkt so we can fill in the
13102 			 * CDB.
13103 			 */
13104 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13105 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13106 			    flags, callback, callback_arg);
13107 
13108 			if (return_pktp != NULL) {
13109 
13110 				/*
13111 				 * Return new value of pkt
13112 				 */
13113 				*pktpp = return_pktp;
13114 
13115 				/*
13116 				 * To be safe, zero the CDB insuring there is
13117 				 * no leftover data from a previous command.
13118 				 */
13119 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13120 
13121 				/*
13122 				 * Handle partial DMA mapping
13123 				 */
13124 				if (return_pktp->pkt_resid != 0) {
13125 
13126 					/*
13127 					 * Not going to xfer as many blocks as
13128 					 * originally expected
13129 					 */
13130 					blockcount -=
13131 					    SD_BYTES2TGTBLOCKS(un,
13132 					    return_pktp->pkt_resid);
13133 				}
13134 
13135 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13136 
13137 				/*
13138 				 * Set command byte based on the CDB
13139 				 * type we matched.
13140 				 */
13141 				cdbp->scc_cmd = cp->sc_grpmask |
13142 				    ((bp->b_flags & B_READ) ?
13143 				    SCMD_READ : SCMD_WRITE);
13144 
13145 				SD_FILL_SCSI1_LUN(un, return_pktp);
13146 
13147 				/*
13148 				 * Fill in LBA and length
13149 				 */
13150 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13151 				    (cp->sc_grpcode == CDB_GROUP4) ||
13152 				    (cp->sc_grpcode == CDB_GROUP0) ||
13153 				    (cp->sc_grpcode == CDB_GROUP5));
13154 
13155 				if (cp->sc_grpcode == CDB_GROUP1) {
13156 					FORMG1ADDR(cdbp, lba);
13157 					FORMG1COUNT(cdbp, blockcount);
13158 					return (0);
13159 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13160 					FORMG4LONGADDR(cdbp, lba);
13161 					FORMG4COUNT(cdbp, blockcount);
13162 					return (0);
13163 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13164 					FORMG0ADDR(cdbp, lba);
13165 					FORMG0COUNT(cdbp, blockcount);
13166 					return (0);
13167 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13168 					FORMG5ADDR(cdbp, lba);
13169 					FORMG5COUNT(cdbp, blockcount);
13170 					return (0);
13171 				}
13172 
13173 				/*
13174 				 * It should be impossible to not match one
13175 				 * of the CDB types above, so we should never
13176 				 * reach this point.  Set the CDB command byte
13177 				 * to test-unit-ready to avoid writing
13178 				 * to somewhere we don't intend.
13179 				 */
13180 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13181 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13182 			} else {
13183 				/*
13184 				 * Couldn't get scsi_pkt
13185 				 */
13186 				return (SD_PKT_ALLOC_FAILURE);
13187 			}
13188 		}
13189 	}
13190 
13191 	/*
13192 	 * None of the available CDB types were suitable.  This really
13193 	 * should never happen:  on a 64 bit system we support
13194 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13195 	 * and on a 32 bit system we will refuse to bind to a device
13196 	 * larger than 2TB so addresses will never be larger than 32 bits.
13197 	 */
13198 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13199 }
13200 
13201 /*
13202  *    Function: sd_setup_next_rw_pkt
13203  *
13204  * Description: Setup packet for partial DMA transfers, except for the
13205  *		initial transfer.  sd_setup_rw_pkt should be used for
13206  *		the initial transfer.
13207  *
13208  *     Context: Kernel thread and may be called from interrupt context.
13209  */
13210 
13211 int
sd_setup_next_rw_pkt(struct sd_lun * un,struct scsi_pkt * pktp,struct buf * bp,diskaddr_t lba,uint32_t blockcount)13212 sd_setup_next_rw_pkt(struct sd_lun *un,
13213     struct scsi_pkt *pktp, struct buf *bp,
13214     diskaddr_t lba, uint32_t blockcount)
13215 {
13216 	uchar_t com;
13217 	union scsi_cdb *cdbp;
13218 	uchar_t cdb_group_id;
13219 
13220 	ASSERT(pktp != NULL);
13221 	ASSERT(pktp->pkt_cdbp != NULL);
13222 
13223 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13224 	com = cdbp->scc_cmd;
13225 	cdb_group_id = CDB_GROUPID(com);
13226 
13227 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13228 	    (cdb_group_id == CDB_GROUPID_1) ||
13229 	    (cdb_group_id == CDB_GROUPID_4) ||
13230 	    (cdb_group_id == CDB_GROUPID_5));
13231 
13232 	/*
13233 	 * Move pkt to the next portion of the xfer.
13234 	 * func is NULL_FUNC so we do not have to release
13235 	 * the disk mutex here.
13236 	 */
13237 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13238 	    NULL_FUNC, NULL) == pktp) {
13239 		/* Success.  Handle partial DMA */
13240 		if (pktp->pkt_resid != 0) {
13241 			blockcount -=
13242 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13243 		}
13244 
13245 		cdbp->scc_cmd = com;
13246 		SD_FILL_SCSI1_LUN(un, pktp);
13247 		if (cdb_group_id == CDB_GROUPID_1) {
13248 			FORMG1ADDR(cdbp, lba);
13249 			FORMG1COUNT(cdbp, blockcount);
13250 			return (0);
13251 		} else if (cdb_group_id == CDB_GROUPID_4) {
13252 			FORMG4LONGADDR(cdbp, lba);
13253 			FORMG4COUNT(cdbp, blockcount);
13254 			return (0);
13255 		} else if (cdb_group_id == CDB_GROUPID_0) {
13256 			FORMG0ADDR(cdbp, lba);
13257 			FORMG0COUNT(cdbp, blockcount);
13258 			return (0);
13259 		} else if (cdb_group_id == CDB_GROUPID_5) {
13260 			FORMG5ADDR(cdbp, lba);
13261 			FORMG5COUNT(cdbp, blockcount);
13262 			return (0);
13263 		}
13264 
13265 		/* Unreachable */
13266 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13267 	}
13268 
13269 	/*
13270 	 * Error setting up next portion of cmd transfer.
13271 	 * Something is definitely very wrong and this
13272 	 * should not happen.
13273 	 */
13274 	return (SD_PKT_ALLOC_FAILURE);
13275 }
13276 
13277 /*
13278  *    Function: sd_initpkt_for_uscsi
13279  *
13280  * Description: Allocate and initialize for transport a scsi_pkt struct,
13281  *		based upon the info specified in the given uscsi_cmd struct.
13282  *
13283  * Return Code: SD_PKT_ALLOC_SUCCESS
13284  *		SD_PKT_ALLOC_FAILURE
13285  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13286  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13287  *
13288  *     Context: Kernel thread and may be called from software interrupt context
13289  *		as part of a sdrunout callback. This function may not block or
13290  *		call routines that block
13291  */
13292 
13293 static int
sd_initpkt_for_uscsi(struct buf * bp,struct scsi_pkt ** pktpp)13294 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13295 {
13296 	struct uscsi_cmd *uscmd;
13297 	struct sd_xbuf	*xp;
13298 	struct scsi_pkt	*pktp;
13299 	struct sd_lun	*un;
13300 	uint32_t	flags = 0;
13301 
13302 	ASSERT(bp != NULL);
13303 	ASSERT(pktpp != NULL);
13304 	xp = SD_GET_XBUF(bp);
13305 	ASSERT(xp != NULL);
13306 	un = SD_GET_UN(bp);
13307 	ASSERT(un != NULL);
13308 	ASSERT(mutex_owned(SD_MUTEX(un)));
13309 
13310 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13311 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13312 	ASSERT(uscmd != NULL);
13313 
13314 	SD_TRACE(SD_LOG_IO_CORE, un,
13315 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13316 
13317 	/*
13318 	 * Allocate the scsi_pkt for the command.
13319 	 *
13320 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13321 	 *	 during scsi_init_pkt time and will continue to use the
13322 	 *	 same path as long as the same scsi_pkt is used without
13323 	 *	 intervening scsi_dmafree(). Since uscsi command does
13324 	 *	 not call scsi_dmafree() before retry failed command, it
13325 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13326 	 *	 set such that scsi_vhci can use other available path for
13327 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13328 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13329 	 *
13330 	 *	 More fundamentally, we can't support breaking up this DMA into
13331 	 *	 multiple windows on x86. There is, in general, no guarantee
13332 	 *	 that arbitrary SCSI commands are idempotent, which is required
13333 	 *	 if we want to use multiple windows for a given command.
13334 	 */
13335 	if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
13336 		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13337 		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13338 		    ((int)(uscmd->uscsi_rqlen) + sizeof (struct scsi_arq_status)
13339 		    - sizeof (struct scsi_extended_sense)), 0,
13340 		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL) | PKT_XARQ,
13341 		    sdrunout, (caddr_t)un);
13342 	} else {
13343 		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13344 		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13345 		    sizeof (struct scsi_arq_status), 0,
13346 		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
13347 		    sdrunout, (caddr_t)un);
13348 	}
13349 
13350 	if (pktp == NULL) {
13351 		*pktpp = NULL;
13352 		/*
13353 		 * Set the driver state to RWAIT to indicate the driver
13354 		 * is waiting on resource allocations. The driver will not
13355 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13356 		 */
13357 		New_state(un, SD_STATE_RWAIT);
13358 
13359 		SD_ERROR(SD_LOG_IO_CORE, un,
13360 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13361 
13362 		if ((bp->b_flags & B_ERROR) != 0) {
13363 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13364 		}
13365 		return (SD_PKT_ALLOC_FAILURE);
13366 	}
13367 
13368 	/*
13369 	 * We do not do DMA breakup for USCSI commands, so return failure
13370 	 * here if all the needed DMA resources were not allocated.
13371 	 */
13372 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13373 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13374 		scsi_destroy_pkt(pktp);
13375 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13376 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13377 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13378 	}
13379 
13380 	/* Init the cdb from the given uscsi struct */
13381 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13382 	    uscmd->uscsi_cdb[0], 0, 0, 0);
13383 
13384 	SD_FILL_SCSI1_LUN(un, pktp);
13385 
13386 	/*
13387 	 * Set up the optional USCSI flags. See the uscsi(4I) man page
13388 	 * for listing of the supported flags.
13389 	 */
13390 
13391 	if (uscmd->uscsi_flags & USCSI_SILENT) {
13392 		flags |= FLAG_SILENT;
13393 	}
13394 
13395 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13396 		flags |= FLAG_DIAGNOSE;
13397 	}
13398 
13399 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13400 		flags |= FLAG_ISOLATE;
13401 	}
13402 
13403 	if (un->un_f_is_fibre == FALSE) {
13404 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13405 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13406 		}
13407 	}
13408 
13409 	/*
13410 	 * Set the pkt flags here so we save time later.
13411 	 * Note: These flags are NOT in the uscsi man page!!!
13412 	 */
13413 	if (uscmd->uscsi_flags & USCSI_HEAD) {
13414 		flags |= FLAG_HEAD;
13415 	}
13416 
13417 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13418 		flags |= FLAG_NOINTR;
13419 	}
13420 
13421 	/*
13422 	 * For tagged queueing, things get a bit complicated.
13423 	 * Check first for head of queue and last for ordered queue.
13424 	 * If neither head nor order, use the default driver tag flags.
13425 	 */
13426 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13427 		if (uscmd->uscsi_flags & USCSI_HTAG) {
13428 			flags |= FLAG_HTAG;
13429 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13430 			flags |= FLAG_OTAG;
13431 		} else {
13432 			flags |= un->un_tagflags & FLAG_TAGMASK;
13433 		}
13434 	}
13435 
13436 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13437 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13438 	}
13439 
13440 	pktp->pkt_flags = flags;
13441 
13442 	/* Transfer uscsi information to scsi_pkt */
13443 	(void) scsi_uscsi_pktinit(uscmd, pktp);
13444 
13445 	/* Copy the caller's CDB into the pkt... */
13446 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13447 
13448 	if (uscmd->uscsi_timeout == 0) {
13449 		pktp->pkt_time = un->un_uscsi_timeout;
13450 	} else {
13451 		pktp->pkt_time = uscmd->uscsi_timeout;
13452 	}
13453 
13454 	/* need it later to identify USCSI request in sdintr */
13455 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13456 
13457 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13458 
13459 	pktp->pkt_private = bp;
13460 	pktp->pkt_comp = sdintr;
13461 	*pktpp = pktp;
13462 
13463 	SD_TRACE(SD_LOG_IO_CORE, un,
13464 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13465 
13466 	return (SD_PKT_ALLOC_SUCCESS);
13467 }
13468 
13469 
13470 /*
13471  *    Function: sd_destroypkt_for_uscsi
13472  *
13473  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13474  *		IOs.. Also saves relevant info into the associated uscsi_cmd
13475  *		struct.
13476  *
13477  *     Context: May be called under interrupt context
13478  */
13479 
13480 static void
sd_destroypkt_for_uscsi(struct buf * bp)13481 sd_destroypkt_for_uscsi(struct buf *bp)
13482 {
13483 	struct uscsi_cmd *uscmd;
13484 	struct sd_xbuf	*xp;
13485 	struct scsi_pkt	*pktp;
13486 	struct sd_lun	*un;
13487 	struct sd_uscsi_info *suip;
13488 
13489 	ASSERT(bp != NULL);
13490 	xp = SD_GET_XBUF(bp);
13491 	ASSERT(xp != NULL);
13492 	un = SD_GET_UN(bp);
13493 	ASSERT(un != NULL);
13494 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13495 	pktp = SD_GET_PKTP(bp);
13496 	ASSERT(pktp != NULL);
13497 
13498 	SD_TRACE(SD_LOG_IO_CORE, un,
13499 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13500 
13501 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13502 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13503 	ASSERT(uscmd != NULL);
13504 
13505 	/* Save the status and the residual into the uscsi_cmd struct */
13506 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13507 	uscmd->uscsi_resid  = bp->b_resid;
13508 
13509 	/* Transfer scsi_pkt information to uscsi */
13510 	(void) scsi_uscsi_pktfini(pktp, uscmd);
13511 
13512 	/*
13513 	 * If enabled, copy any saved sense data into the area specified
13514 	 * by the uscsi command.
13515 	 */
13516 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13517 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13518 		/*
13519 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13520 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13521 		 */
13522 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13523 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13524 		if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
13525 			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
13526 			    MAX_SENSE_LENGTH);
13527 		} else {
13528 			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
13529 			    SENSE_LENGTH);
13530 		}
13531 	}
13532 	/*
13533 	 * The following assignments are for SCSI FMA.
13534 	 */
13535 	ASSERT(xp->xb_private != NULL);
13536 	suip = (struct sd_uscsi_info *)xp->xb_private;
13537 	suip->ui_pkt_reason = pktp->pkt_reason;
13538 	suip->ui_pkt_state = pktp->pkt_state;
13539 	suip->ui_pkt_statistics = pktp->pkt_statistics;
13540 	suip->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
13541 
13542 	/* We are done with the scsi_pkt; free it now */
13543 	ASSERT(SD_GET_PKTP(bp) != NULL);
13544 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13545 
13546 	SD_TRACE(SD_LOG_IO_CORE, un,
13547 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13548 }
13549 
13550 
13551 /*
13552  *    Function: sd_bioclone_alloc
13553  *
13554  * Description: Allocate a buf(9S) and init it as per the given buf
13555  *		and the various arguments.  The associated sd_xbuf
13556  *		struct is (nearly) duplicated.  The struct buf *bp
13557  *		argument is saved in new_xp->xb_private.
13558  *
13559  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13560  *		datalen - size of data area for the shadow bp
13561  *		blkno - starting LBA
13562  *		func - function pointer for b_iodone in the shadow buf. (May
13563  *			be NULL if none.)
13564  *
13565  * Return Code: Pointer to allocates buf(9S) struct
13566  *
13567  *     Context: Can sleep.
13568  */
13569 
13570 static struct buf *
sd_bioclone_alloc(struct buf * bp,size_t datalen,daddr_t blkno,int (* func)(struct buf *))13571 sd_bioclone_alloc(struct buf *bp, size_t datalen, daddr_t blkno,
13572     int (*func)(struct buf *))
13573 {
13574 	struct	sd_lun	*un;
13575 	struct	sd_xbuf	*xp;
13576 	struct	sd_xbuf	*new_xp;
13577 	struct	buf	*new_bp;
13578 
13579 	ASSERT(bp != NULL);
13580 	xp = SD_GET_XBUF(bp);
13581 	ASSERT(xp != NULL);
13582 	un = SD_GET_UN(bp);
13583 	ASSERT(un != NULL);
13584 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13585 
13586 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
13587 	    NULL, KM_SLEEP);
13588 
13589 	new_bp->b_lblkno	= blkno;
13590 
13591 	/*
13592 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13593 	 * original xbuf into it.
13594 	 */
13595 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13596 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13597 
13598 	/*
13599 	 * The given bp is automatically saved in the xb_private member
13600 	 * of the new xbuf.  Callers are allowed to depend on this.
13601 	 */
13602 	new_xp->xb_private = bp;
13603 
13604 	new_bp->b_private  = new_xp;
13605 
13606 	return (new_bp);
13607 }
13608 
13609 /*
13610  *    Function: sd_shadow_buf_alloc
13611  *
13612  * Description: Allocate a buf(9S) and init it as per the given buf
13613  *		and the various arguments.  The associated sd_xbuf
13614  *		struct is (nearly) duplicated.  The struct buf *bp
13615  *		argument is saved in new_xp->xb_private.
13616  *
13617  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13618  *		datalen - size of data area for the shadow bp
13619  *		bflags - B_READ or B_WRITE (pseudo flag)
13620  *		blkno - starting LBA
13621  *		func - function pointer for b_iodone in the shadow buf. (May
13622  *			be NULL if none.)
13623  *
13624  * Return Code: Pointer to allocates buf(9S) struct
13625  *
13626  *     Context: Can sleep.
13627  */
13628 
13629 static struct buf *
sd_shadow_buf_alloc(struct buf * bp,size_t datalen,uint_t bflags,daddr_t blkno,int (* func)(struct buf *))13630 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
13631     daddr_t blkno, int (*func)(struct buf *))
13632 {
13633 	struct	sd_lun	*un;
13634 	struct	sd_xbuf	*xp;
13635 	struct	sd_xbuf	*new_xp;
13636 	struct	buf	*new_bp;
13637 
13638 	ASSERT(bp != NULL);
13639 	xp = SD_GET_XBUF(bp);
13640 	ASSERT(xp != NULL);
13641 	un = SD_GET_UN(bp);
13642 	ASSERT(un != NULL);
13643 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13644 
13645 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
13646 		bp_mapin(bp);
13647 	}
13648 
13649 	bflags &= (B_READ | B_WRITE);
13650 #if defined(__x86)
13651 	new_bp = getrbuf(KM_SLEEP);
13652 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
13653 	new_bp->b_bcount = datalen;
13654 	new_bp->b_flags = bflags |
13655 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
13656 #else
13657 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
13658 	    datalen, bflags, SLEEP_FUNC, NULL);
13659 #endif
13660 	new_bp->av_forw	= NULL;
13661 	new_bp->av_back	= NULL;
13662 	new_bp->b_dev	= bp->b_dev;
13663 	new_bp->b_blkno	= blkno;
13664 	new_bp->b_iodone = func;
13665 	new_bp->b_edev	= bp->b_edev;
13666 	new_bp->b_resid	= 0;
13667 
13668 	/* We need to preserve the B_FAILFAST flag */
13669 	if (bp->b_flags & B_FAILFAST) {
13670 		new_bp->b_flags |= B_FAILFAST;
13671 	}
13672 
13673 	/*
13674 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13675 	 * original xbuf into it.
13676 	 */
13677 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13678 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13679 
13680 	/* Need later to copy data between the shadow buf & original buf! */
13681 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
13682 
13683 	/*
13684 	 * The given bp is automatically saved in the xb_private member
13685 	 * of the new xbuf.  Callers are allowed to depend on this.
13686 	 */
13687 	new_xp->xb_private = bp;
13688 
13689 	new_bp->b_private  = new_xp;
13690 
13691 	return (new_bp);
13692 }
13693 
13694 /*
13695  *    Function: sd_bioclone_free
13696  *
13697  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
13698  *		in the larger than partition operation.
13699  *
13700  *     Context: May be called under interrupt context
13701  */
13702 
13703 static void
sd_bioclone_free(struct buf * bp)13704 sd_bioclone_free(struct buf *bp)
13705 {
13706 	struct sd_xbuf	*xp;
13707 
13708 	ASSERT(bp != NULL);
13709 	xp = SD_GET_XBUF(bp);
13710 	ASSERT(xp != NULL);
13711 
13712 	/*
13713 	 * Call bp_mapout() before freeing the buf,  in case a lower
13714 	 * layer or HBA  had done a bp_mapin().  we must do this here
13715 	 * as we are the "originator" of the shadow buf.
13716 	 */
13717 	bp_mapout(bp);
13718 
13719 	/*
13720 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13721 	 * never gets confused by a stale value in this field. (Just a little
13722 	 * extra defensiveness here.)
13723 	 */
13724 	bp->b_iodone = NULL;
13725 
13726 	freerbuf(bp);
13727 
13728 	kmem_free(xp, sizeof (struct sd_xbuf));
13729 }
13730 
13731 /*
13732  *    Function: sd_shadow_buf_free
13733  *
13734  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
13735  *
13736  *     Context: May be called under interrupt context
13737  */
13738 
13739 static void
sd_shadow_buf_free(struct buf * bp)13740 sd_shadow_buf_free(struct buf *bp)
13741 {
13742 	struct sd_xbuf	*xp;
13743 
13744 	ASSERT(bp != NULL);
13745 	xp = SD_GET_XBUF(bp);
13746 	ASSERT(xp != NULL);
13747 
13748 #if defined(__sparc)
13749 	/*
13750 	 * Call bp_mapout() before freeing the buf,  in case a lower
13751 	 * layer or HBA  had done a bp_mapin().  we must do this here
13752 	 * as we are the "originator" of the shadow buf.
13753 	 */
13754 	bp_mapout(bp);
13755 #endif
13756 
13757 	/*
13758 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13759 	 * never gets confused by a stale value in this field. (Just a little
13760 	 * extra defensiveness here.)
13761 	 */
13762 	bp->b_iodone = NULL;
13763 
13764 #if defined(__x86)
13765 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
13766 	freerbuf(bp);
13767 #else
13768 	scsi_free_consistent_buf(bp);
13769 #endif
13770 
13771 	kmem_free(xp, sizeof (struct sd_xbuf));
13772 }
13773 
13774 
13775 /*
13776  *    Function: sd_print_transport_rejected_message
13777  *
13778  * Description: This implements the ludicrously complex rules for printing
13779  *		a "transport rejected" message.  This is to address the
13780  *		specific problem of having a flood of this error message
13781  *		produced when a failover occurs.
13782  *
13783  *     Context: Any.
13784  */
13785 
13786 static void
sd_print_transport_rejected_message(struct sd_lun * un,struct sd_xbuf * xp,int code)13787 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
13788     int code)
13789 {
13790 	ASSERT(un != NULL);
13791 	ASSERT(mutex_owned(SD_MUTEX(un)));
13792 	ASSERT(xp != NULL);
13793 
13794 	/*
13795 	 * Print the "transport rejected" message under the following
13796 	 * conditions:
13797 	 *
13798 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
13799 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
13800 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
13801 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
13802 	 *   scsi_transport(9F) (which indicates that the target might have
13803 	 *   gone off-line).  This uses the un->un_tran_fatal_count
13804 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
13805 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
13806 	 *   from scsi_transport().
13807 	 *
13808 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
13809 	 * the preceeding cases in order for the message to be printed.
13810 	 */
13811 	if (((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) &&
13812 	    (SD_FM_LOG(un) == SD_FM_LOG_NSUP)) {
13813 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
13814 		    (code != TRAN_FATAL_ERROR) ||
13815 		    (un->un_tran_fatal_count == 1)) {
13816 			switch (code) {
13817 			case TRAN_BADPKT:
13818 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13819 				    "transport rejected bad packet\n");
13820 				break;
13821 			case TRAN_FATAL_ERROR:
13822 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13823 				    "transport rejected fatal error\n");
13824 				break;
13825 			default:
13826 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13827 				    "transport rejected (%d)\n", code);
13828 				break;
13829 			}
13830 		}
13831 	}
13832 }
13833 
13834 
13835 /*
13836  *    Function: sd_add_buf_to_waitq
13837  *
13838  * Description: Add the given buf(9S) struct to the wait queue for the
13839  *		instance.  If sorting is enabled, then the buf is added
13840  *		to the queue via an elevator sort algorithm (a la
13841  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
13842  *		If sorting is not enabled, then the buf is just added
13843  *		to the end of the wait queue.
13844  *
13845  * Return Code: void
13846  *
13847  *     Context: Does not sleep/block, therefore technically can be called
13848  *		from any context.  However if sorting is enabled then the
13849  *		execution time is indeterminate, and may take long if
13850  *		the wait queue grows large.
13851  */
13852 
13853 static void
sd_add_buf_to_waitq(struct sd_lun * un,struct buf * bp)13854 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
13855 {
13856 	struct buf *ap;
13857 
13858 	ASSERT(bp != NULL);
13859 	ASSERT(un != NULL);
13860 	ASSERT(mutex_owned(SD_MUTEX(un)));
13861 
13862 	/* If the queue is empty, add the buf as the only entry & return. */
13863 	if (un->un_waitq_headp == NULL) {
13864 		ASSERT(un->un_waitq_tailp == NULL);
13865 		un->un_waitq_headp = un->un_waitq_tailp = bp;
13866 		bp->av_forw = NULL;
13867 		return;
13868 	}
13869 
13870 	ASSERT(un->un_waitq_tailp != NULL);
13871 
13872 	/*
13873 	 * If sorting is disabled, just add the buf to the tail end of
13874 	 * the wait queue and return.
13875 	 */
13876 	if (un->un_f_disksort_disabled || un->un_f_enable_rmw) {
13877 		un->un_waitq_tailp->av_forw = bp;
13878 		un->un_waitq_tailp = bp;
13879 		bp->av_forw = NULL;
13880 		return;
13881 	}
13882 
13883 	/*
13884 	 * Sort thru the list of requests currently on the wait queue
13885 	 * and add the new buf request at the appropriate position.
13886 	 *
13887 	 * The un->un_waitq_headp is an activity chain pointer on which
13888 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
13889 	 * first queue holds those requests which are positioned after
13890 	 * the current SD_GET_BLKNO() (in the first request); the second holds
13891 	 * requests which came in after their SD_GET_BLKNO() number was passed.
13892 	 * Thus we implement a one way scan, retracting after reaching
13893 	 * the end of the drive to the first request on the second
13894 	 * queue, at which time it becomes the first queue.
13895 	 * A one-way scan is natural because of the way UNIX read-ahead
13896 	 * blocks are allocated.
13897 	 *
13898 	 * If we lie after the first request, then we must locate the
13899 	 * second request list and add ourselves to it.
13900 	 */
13901 	ap = un->un_waitq_headp;
13902 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
13903 		while (ap->av_forw != NULL) {
13904 			/*
13905 			 * Look for an "inversion" in the (normally
13906 			 * ascending) block numbers. This indicates
13907 			 * the start of the second request list.
13908 			 */
13909 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
13910 				/*
13911 				 * Search the second request list for the
13912 				 * first request at a larger block number.
13913 				 * We go before that; however if there is
13914 				 * no such request, we go at the end.
13915 				 */
13916 				do {
13917 					if (SD_GET_BLKNO(bp) <
13918 					    SD_GET_BLKNO(ap->av_forw)) {
13919 						goto insert;
13920 					}
13921 					ap = ap->av_forw;
13922 				} while (ap->av_forw != NULL);
13923 				goto insert;		/* after last */
13924 			}
13925 			ap = ap->av_forw;
13926 		}
13927 
13928 		/*
13929 		 * No inversions... we will go after the last, and
13930 		 * be the first request in the second request list.
13931 		 */
13932 		goto insert;
13933 	}
13934 
13935 	/*
13936 	 * Request is at/after the current request...
13937 	 * sort in the first request list.
13938 	 */
13939 	while (ap->av_forw != NULL) {
13940 		/*
13941 		 * We want to go after the current request (1) if
13942 		 * there is an inversion after it (i.e. it is the end
13943 		 * of the first request list), or (2) if the next
13944 		 * request is a larger block no. than our request.
13945 		 */
13946 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
13947 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
13948 			goto insert;
13949 		}
13950 		ap = ap->av_forw;
13951 	}
13952 
13953 	/*
13954 	 * Neither a second list nor a larger request, therefore
13955 	 * we go at the end of the first list (which is the same
13956 	 * as the end of the whole schebang).
13957 	 */
13958 insert:
13959 	bp->av_forw = ap->av_forw;
13960 	ap->av_forw = bp;
13961 
13962 	/*
13963 	 * If we inserted onto the tail end of the waitq, make sure the
13964 	 * tail pointer is updated.
13965 	 */
13966 	if (ap == un->un_waitq_tailp) {
13967 		un->un_waitq_tailp = bp;
13968 	}
13969 }
13970 
13971 
13972 /*
13973  *    Function: sd_start_cmds
13974  *
13975  * Description: Remove and transport cmds from the driver queues.
13976  *
13977  *   Arguments: un - pointer to the unit (soft state) struct for the target.
13978  *
13979  *		immed_bp - ptr to a buf to be transported immediately. Only
13980  *		the immed_bp is transported; bufs on the waitq are not
13981  *		processed and the un_retry_bp is not checked.  If immed_bp is
13982  *		NULL, then normal queue processing is performed.
13983  *
13984  *     Context: May be called from kernel thread context, interrupt context,
13985  *		or runout callback context. This function may not block or
13986  *		call routines that block.
13987  */
13988 
13989 static void
sd_start_cmds(struct sd_lun * un,struct buf * immed_bp)13990 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
13991 {
13992 	struct	sd_xbuf	*xp;
13993 	struct	buf	*bp;
13994 	void	(*statp)(kstat_io_t *);
13995 #if defined(__x86)	/* DMAFREE for x86 only */
13996 	void	(*saved_statp)(kstat_io_t *);
13997 #endif
13998 	int	rval;
13999 	struct sd_fm_internal *sfip = NULL;
14000 
14001 	ASSERT(un != NULL);
14002 	ASSERT(mutex_owned(SD_MUTEX(un)));
14003 	ASSERT(un->un_ncmds_in_transport >= 0);
14004 	ASSERT(un->un_throttle >= 0);
14005 
14006 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14007 
14008 	do {
14009 #if defined(__x86)	/* DMAFREE for x86 only */
14010 		saved_statp = NULL;
14011 #endif
14012 
14013 		/*
14014 		 * If we are syncing or dumping, fail the command to
14015 		 * avoid recursively calling back into scsi_transport().
14016 		 * The dump I/O itself uses a separate code path so this
14017 		 * only prevents non-dump I/O from being sent while dumping.
14018 		 * File system sync takes place before dumping begins.
14019 		 * During panic, filesystem I/O is allowed provided
14020 		 * un_in_callback is <= 1.  This is to prevent recursion
14021 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14022 		 * sd_start_cmds and so on.  See panic.c for more information
14023 		 * about the states the system can be in during panic.
14024 		 */
14025 		if ((un->un_state == SD_STATE_DUMPING) ||
14026 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14027 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14028 			    "sd_start_cmds: panicking\n");
14029 			goto exit;
14030 		}
14031 
14032 		if ((bp = immed_bp) != NULL) {
14033 			/*
14034 			 * We have a bp that must be transported immediately.
14035 			 * It's OK to transport the immed_bp here without doing
14036 			 * the throttle limit check because the immed_bp is
14037 			 * always used in a retry/recovery case. This means
14038 			 * that we know we are not at the throttle limit by
14039 			 * virtue of the fact that to get here we must have
14040 			 * already gotten a command back via sdintr(). This also
14041 			 * relies on (1) the command on un_retry_bp preventing
14042 			 * further commands from the waitq from being issued;
14043 			 * and (2) the code in sd_retry_command checking the
14044 			 * throttle limit before issuing a delayed or immediate
14045 			 * retry. This holds even if the throttle limit is
14046 			 * currently ratcheted down from its maximum value.
14047 			 */
14048 			statp = kstat_runq_enter;
14049 			if (bp == un->un_retry_bp) {
14050 				ASSERT((un->un_retry_statp == NULL) ||
14051 				    (un->un_retry_statp == kstat_waitq_enter) ||
14052 				    (un->un_retry_statp ==
14053 				    kstat_runq_back_to_waitq));
14054 				/*
14055 				 * If the waitq kstat was incremented when
14056 				 * sd_set_retry_bp() queued this bp for a retry,
14057 				 * then we must set up statp so that the waitq
14058 				 * count will get decremented correctly below.
14059 				 * Also we must clear un->un_retry_statp to
14060 				 * ensure that we do not act on a stale value
14061 				 * in this field.
14062 				 */
14063 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14064 				    (un->un_retry_statp ==
14065 				    kstat_runq_back_to_waitq)) {
14066 					statp = kstat_waitq_to_runq;
14067 				}
14068 #if defined(__x86)	/* DMAFREE for x86 only */
14069 				saved_statp = un->un_retry_statp;
14070 #endif
14071 				un->un_retry_statp = NULL;
14072 
14073 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14074 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14075 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14076 				    un, un->un_retry_bp, un->un_throttle,
14077 				    un->un_ncmds_in_transport);
14078 			} else {
14079 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14080 				    "processing priority bp:0x%p\n", bp);
14081 			}
14082 
14083 		} else if ((bp = un->un_waitq_headp) != NULL) {
14084 			/*
14085 			 * A command on the waitq is ready to go, but do not
14086 			 * send it if:
14087 			 *
14088 			 * (1) the throttle limit has been reached, or
14089 			 * (2) a retry is pending, or
14090 			 * (3) a START_STOP_UNIT callback pending, or
14091 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14092 			 *	command is pending.
14093 			 *
14094 			 * For all of these conditions, IO processing will
14095 			 * restart after the condition is cleared.
14096 			 */
14097 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14098 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14099 				    "sd_start_cmds: exiting, "
14100 				    "throttle limit reached!\n");
14101 				goto exit;
14102 			}
14103 			if (un->un_retry_bp != NULL) {
14104 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14105 				    "sd_start_cmds: exiting, retry pending!\n");
14106 				goto exit;
14107 			}
14108 			if (un->un_startstop_timeid != NULL) {
14109 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14110 				    "sd_start_cmds: exiting, "
14111 				    "START_STOP pending!\n");
14112 				goto exit;
14113 			}
14114 			if (un->un_direct_priority_timeid != NULL) {
14115 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14116 				    "sd_start_cmds: exiting, "
14117 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14118 				goto exit;
14119 			}
14120 
14121 			/* Dequeue the command */
14122 			un->un_waitq_headp = bp->av_forw;
14123 			if (un->un_waitq_headp == NULL) {
14124 				un->un_waitq_tailp = NULL;
14125 			}
14126 			bp->av_forw = NULL;
14127 			statp = kstat_waitq_to_runq;
14128 			SD_TRACE(SD_LOG_IO_CORE, un,
14129 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14130 
14131 		} else {
14132 			/* No work to do so bail out now */
14133 			SD_TRACE(SD_LOG_IO_CORE, un,
14134 			    "sd_start_cmds: no more work, exiting!\n");
14135 			goto exit;
14136 		}
14137 
14138 		/*
14139 		 * Reset the state to normal. This is the mechanism by which
14140 		 * the state transitions from either SD_STATE_RWAIT or
14141 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14142 		 * If state is SD_STATE_PM_CHANGING then this command is
14143 		 * part of the device power control and the state must
14144 		 * not be put back to normal. Doing so would would
14145 		 * allow new commands to proceed when they shouldn't,
14146 		 * the device may be going off.
14147 		 */
14148 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14149 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14150 			New_state(un, SD_STATE_NORMAL);
14151 		}
14152 
14153 		xp = SD_GET_XBUF(bp);
14154 		ASSERT(xp != NULL);
14155 
14156 #if defined(__x86)	/* DMAFREE for x86 only */
14157 		/*
14158 		 * Allocate the scsi_pkt if we need one, or attach DMA
14159 		 * resources if we have a scsi_pkt that needs them. The
14160 		 * latter should only occur for commands that are being
14161 		 * retried.
14162 		 */
14163 		if ((xp->xb_pktp == NULL) ||
14164 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14165 #else
14166 		if (xp->xb_pktp == NULL) {
14167 #endif
14168 			/*
14169 			 * There is no scsi_pkt allocated for this buf. Call
14170 			 * the initpkt function to allocate & init one.
14171 			 *
14172 			 * The scsi_init_pkt runout callback functionality is
14173 			 * implemented as follows:
14174 			 *
14175 			 * 1) The initpkt function always calls
14176 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14177 			 *    callback routine.
14178 			 * 2) A successful packet allocation is initialized and
14179 			 *    the I/O is transported.
14180 			 * 3) The I/O associated with an allocation resource
14181 			 *    failure is left on its queue to be retried via
14182 			 *    runout or the next I/O.
14183 			 * 4) The I/O associated with a DMA error is removed
14184 			 *    from the queue and failed with EIO. Processing of
14185 			 *    the transport queues is also halted to be
14186 			 *    restarted via runout or the next I/O.
14187 			 * 5) The I/O associated with a CDB size or packet
14188 			 *    size error is removed from the queue and failed
14189 			 *    with EIO. Processing of the transport queues is
14190 			 *    continued.
14191 			 *
14192 			 * Note: there is no interface for canceling a runout
14193 			 * callback. To prevent the driver from detaching or
14194 			 * suspending while a runout is pending the driver
14195 			 * state is set to SD_STATE_RWAIT
14196 			 *
14197 			 * Note: using the scsi_init_pkt callback facility can
14198 			 * result in an I/O request persisting at the head of
14199 			 * the list which cannot be satisfied even after
14200 			 * multiple retries. In the future the driver may
14201 			 * implement some kind of maximum runout count before
14202 			 * failing an I/O.
14203 			 *
14204 			 * Note: the use of funcp below may seem superfluous,
14205 			 * but it helps warlock figure out the correct
14206 			 * initpkt function calls (see [s]sd.wlcmd).
14207 			 */
14208 			struct scsi_pkt	*pktp;
14209 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14210 
14211 			ASSERT(bp != un->un_rqs_bp);
14212 
14213 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14214 			switch ((*funcp)(bp, &pktp)) {
14215 			case  SD_PKT_ALLOC_SUCCESS:
14216 				xp->xb_pktp = pktp;
14217 				SD_TRACE(SD_LOG_IO_CORE, un,
14218 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14219 				    pktp);
14220 				goto got_pkt;
14221 
14222 			case SD_PKT_ALLOC_FAILURE:
14223 				/*
14224 				 * Temporary (hopefully) resource depletion.
14225 				 * Since retries and RQS commands always have a
14226 				 * scsi_pkt allocated, these cases should never
14227 				 * get here. So the only cases this needs to
14228 				 * handle is a bp from the waitq (which we put
14229 				 * back onto the waitq for sdrunout), or a bp
14230 				 * sent as an immed_bp (which we just fail).
14231 				 */
14232 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14233 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14234 
14235 #if defined(__x86)	/* DMAFREE for x86 only */
14236 
14237 				if (bp == immed_bp) {
14238 					/*
14239 					 * If SD_XB_DMA_FREED is clear, then
14240 					 * this is a failure to allocate a
14241 					 * scsi_pkt, and we must fail the
14242 					 * command.
14243 					 */
14244 					if ((xp->xb_pkt_flags &
14245 					    SD_XB_DMA_FREED) == 0) {
14246 						break;
14247 					}
14248 
14249 					/*
14250 					 * If this immediate command is NOT our
14251 					 * un_retry_bp, then we must fail it.
14252 					 */
14253 					if (bp != un->un_retry_bp) {
14254 						break;
14255 					}
14256 
14257 					/*
14258 					 * We get here if this cmd is our
14259 					 * un_retry_bp that was DMAFREED, but
14260 					 * scsi_init_pkt() failed to reallocate
14261 					 * DMA resources when we attempted to
14262 					 * retry it. This can happen when an
14263 					 * mpxio failover is in progress, but
14264 					 * we don't want to just fail the
14265 					 * command in this case.
14266 					 *
14267 					 * Use timeout(9F) to restart it after
14268 					 * a 100ms delay.  We don't want to
14269 					 * let sdrunout() restart it, because
14270 					 * sdrunout() is just supposed to start
14271 					 * commands that are sitting on the
14272 					 * wait queue.  The un_retry_bp stays
14273 					 * set until the command completes, but
14274 					 * sdrunout can be called many times
14275 					 * before that happens.  Since sdrunout
14276 					 * cannot tell if the un_retry_bp is
14277 					 * already in the transport, it could
14278 					 * end up calling scsi_transport() for
14279 					 * the un_retry_bp multiple times.
14280 					 *
14281 					 * Also: don't schedule the callback
14282 					 * if some other callback is already
14283 					 * pending.
14284 					 */
14285 					if (un->un_retry_statp == NULL) {
14286 						/*
14287 						 * restore the kstat pointer to
14288 						 * keep kstat counts coherent
14289 						 * when we do retry the command.
14290 						 */
14291 						un->un_retry_statp =
14292 						    saved_statp;
14293 					}
14294 
14295 					if ((un->un_startstop_timeid == NULL) &&
14296 					    (un->un_retry_timeid == NULL) &&
14297 					    (un->un_direct_priority_timeid ==
14298 					    NULL)) {
14299 
14300 						un->un_retry_timeid =
14301 						    timeout(
14302 						    sd_start_retry_command,
14303 						    un, SD_RESTART_TIMEOUT);
14304 					}
14305 					goto exit;
14306 				}
14307 
14308 #else
14309 				if (bp == immed_bp) {
14310 					break;	/* Just fail the command */
14311 				}
14312 #endif
14313 
14314 				/* Add the buf back to the head of the waitq */
14315 				bp->av_forw = un->un_waitq_headp;
14316 				un->un_waitq_headp = bp;
14317 				if (un->un_waitq_tailp == NULL) {
14318 					un->un_waitq_tailp = bp;
14319 				}
14320 				goto exit;
14321 
14322 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14323 				/*
14324 				 * HBA DMA resource failure. Fail the command
14325 				 * and continue processing of the queues.
14326 				 */
14327 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14328 				    "sd_start_cmds: "
14329 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14330 				break;
14331 
14332 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14333 				/*
14334 				 * Note:x86: Partial DMA mapping not supported
14335 				 * for USCSI commands, and all the needed DMA
14336 				 * resources were not allocated.
14337 				 */
14338 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14339 				    "sd_start_cmds: "
14340 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14341 				break;
14342 
14343 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14344 				/*
14345 				 * Note:x86: Request cannot fit into CDB based
14346 				 * on lba and len.
14347 				 */
14348 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14349 				    "sd_start_cmds: "
14350 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14351 				break;
14352 
14353 			default:
14354 				/* Should NEVER get here! */
14355 				panic("scsi_initpkt error");
14356 				/*NOTREACHED*/
14357 			}
14358 
14359 			/*
14360 			 * Fatal error in allocating a scsi_pkt for this buf.
14361 			 * Update kstats & return the buf with an error code.
14362 			 * We must use sd_return_failed_command_no_restart() to
14363 			 * avoid a recursive call back into sd_start_cmds().
14364 			 * However this also means that we must keep processing
14365 			 * the waitq here in order to avoid stalling.
14366 			 */
14367 			if (statp == kstat_waitq_to_runq) {
14368 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14369 			}
14370 			sd_return_failed_command_no_restart(un, bp, EIO);
14371 			if (bp == immed_bp) {
14372 				/* immed_bp is gone by now, so clear this */
14373 				immed_bp = NULL;
14374 			}
14375 			continue;
14376 		}
14377 got_pkt:
14378 		if (bp == immed_bp) {
14379 			/* goto the head of the class.... */
14380 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14381 		}
14382 
14383 		un->un_ncmds_in_transport++;
14384 		SD_UPDATE_KSTATS(un, statp, bp);
14385 
14386 		/*
14387 		 * Call scsi_transport() to send the command to the target.
14388 		 * According to SCSA architecture, we must drop the mutex here
14389 		 * before calling scsi_transport() in order to avoid deadlock.
14390 		 * Note that the scsi_pkt's completion routine can be executed
14391 		 * (from interrupt context) even before the call to
14392 		 * scsi_transport() returns.
14393 		 */
14394 		SD_TRACE(SD_LOG_IO_CORE, un,
14395 		    "sd_start_cmds: calling scsi_transport()\n");
14396 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14397 
14398 		mutex_exit(SD_MUTEX(un));
14399 		rval = scsi_transport(xp->xb_pktp);
14400 		mutex_enter(SD_MUTEX(un));
14401 
14402 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14403 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14404 
14405 		switch (rval) {
14406 		case TRAN_ACCEPT:
14407 			/* Clear this with every pkt accepted by the HBA */
14408 			un->un_tran_fatal_count = 0;
14409 			break;	/* Success; try the next cmd (if any) */
14410 
14411 		case TRAN_BUSY:
14412 			un->un_ncmds_in_transport--;
14413 			ASSERT(un->un_ncmds_in_transport >= 0);
14414 
14415 			/*
14416 			 * Don't retry request sense, the sense data
14417 			 * is lost when another request is sent.
14418 			 * Free up the rqs buf and retry
14419 			 * the original failed cmd.  Update kstat.
14420 			 */
14421 			if (bp == un->un_rqs_bp) {
14422 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14423 				bp = sd_mark_rqs_idle(un, xp);
14424 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14425 				    NULL, NULL, EIO, un->un_busy_timeout / 500,
14426 				    kstat_waitq_enter);
14427 				goto exit;
14428 			}
14429 
14430 #if defined(__x86)	/* DMAFREE for x86 only */
14431 			/*
14432 			 * Free the DMA resources for the  scsi_pkt. This will
14433 			 * allow mpxio to select another path the next time
14434 			 * we call scsi_transport() with this scsi_pkt.
14435 			 * See sdintr() for the rationalization behind this.
14436 			 */
14437 			if ((un->un_f_is_fibre == TRUE) &&
14438 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14439 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14440 				scsi_dmafree(xp->xb_pktp);
14441 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14442 			}
14443 #endif
14444 
14445 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14446 				/*
14447 				 * Commands that are SD_PATH_DIRECT_PRIORITY
14448 				 * are for error recovery situations. These do
14449 				 * not use the normal command waitq, so if they
14450 				 * get a TRAN_BUSY we cannot put them back onto
14451 				 * the waitq for later retry. One possible
14452 				 * problem is that there could already be some
14453 				 * other command on un_retry_bp that is waiting
14454 				 * for this one to complete, so we would be
14455 				 * deadlocked if we put this command back onto
14456 				 * the waitq for later retry (since un_retry_bp
14457 				 * must complete before the driver gets back to
14458 				 * commands on the waitq).
14459 				 *
14460 				 * To avoid deadlock we must schedule a callback
14461 				 * that will restart this command after a set
14462 				 * interval.  This should keep retrying for as
14463 				 * long as the underlying transport keeps
14464 				 * returning TRAN_BUSY (just like for other
14465 				 * commands).  Use the same timeout interval as
14466 				 * for the ordinary TRAN_BUSY retry.
14467 				 */
14468 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14469 				    "sd_start_cmds: scsi_transport() returned "
14470 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14471 
14472 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14473 				un->un_direct_priority_timeid =
14474 				    timeout(sd_start_direct_priority_command,
14475 				    bp, un->un_busy_timeout / 500);
14476 
14477 				goto exit;
14478 			}
14479 
14480 			/*
14481 			 * For TRAN_BUSY, we want to reduce the throttle value,
14482 			 * unless we are retrying a command.
14483 			 */
14484 			if (bp != un->un_retry_bp) {
14485 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14486 			}
14487 
14488 			/*
14489 			 * Set up the bp to be tried again 10 ms later.
14490 			 * Note:x86: Is there a timeout value in the sd_lun
14491 			 * for this condition?
14492 			 */
14493 			sd_set_retry_bp(un, bp, un->un_busy_timeout / 500,
14494 			    kstat_runq_back_to_waitq);
14495 			goto exit;
14496 
14497 		case TRAN_FATAL_ERROR:
14498 			un->un_tran_fatal_count++;
14499 			/* FALLTHRU */
14500 
14501 		case TRAN_BADPKT:
14502 		default:
14503 			un->un_ncmds_in_transport--;
14504 			ASSERT(un->un_ncmds_in_transport >= 0);
14505 
14506 			/*
14507 			 * If this is our REQUEST SENSE command with a
14508 			 * transport error, we must get back the pointers
14509 			 * to the original buf, and mark the REQUEST
14510 			 * SENSE command as "available".
14511 			 */
14512 			if (bp == un->un_rqs_bp) {
14513 				bp = sd_mark_rqs_idle(un, xp);
14514 				xp = SD_GET_XBUF(bp);
14515 			} else {
14516 				/*
14517 				 * Legacy behavior: do not update transport
14518 				 * error count for request sense commands.
14519 				 */
14520 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14521 			}
14522 
14523 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14524 			sd_print_transport_rejected_message(un, xp, rval);
14525 
14526 			/*
14527 			 * This command will be terminated by SD driver due
14528 			 * to a fatal transport error. We should post
14529 			 * ereport.io.scsi.cmd.disk.tran with driver-assessment
14530 			 * of "fail" for any command to indicate this
14531 			 * situation.
14532 			 */
14533 			if (xp->xb_ena > 0) {
14534 				ASSERT(un->un_fm_private != NULL);
14535 				sfip = un->un_fm_private;
14536 				sfip->fm_ssc.ssc_flags |= SSC_FLAGS_TRAN_ABORT;
14537 				sd_ssc_extract_info(&sfip->fm_ssc, un,
14538 				    xp->xb_pktp, bp, xp);
14539 				sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
14540 			}
14541 
14542 			/*
14543 			 * We must use sd_return_failed_command_no_restart() to
14544 			 * avoid a recursive call back into sd_start_cmds().
14545 			 * However this also means that we must keep processing
14546 			 * the waitq here in order to avoid stalling.
14547 			 */
14548 			sd_return_failed_command_no_restart(un, bp, EIO);
14549 
14550 			/*
14551 			 * Notify any threads waiting in sd_ddi_suspend() that
14552 			 * a command completion has occurred.
14553 			 */
14554 			if (un->un_state == SD_STATE_SUSPENDED) {
14555 				cv_broadcast(&un->un_disk_busy_cv);
14556 			}
14557 
14558 			if (bp == immed_bp) {
14559 				/* immed_bp is gone by now, so clear this */
14560 				immed_bp = NULL;
14561 			}
14562 			break;
14563 		}
14564 
14565 	} while (immed_bp == NULL);
14566 
14567 exit:
14568 	ASSERT(mutex_owned(SD_MUTEX(un)));
14569 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14570 }
14571 
14572 
14573 /*
14574  *    Function: sd_return_command
14575  *
14576  * Description: Returns a command to its originator (with or without an
14577  *		error).  Also starts commands waiting to be transported
14578  *		to the target.
14579  *
14580  *     Context: May be called from interrupt, kernel, or timeout context
14581  */
14582 
14583 static void
14584 sd_return_command(struct sd_lun *un, struct buf *bp)
14585 {
14586 	struct sd_xbuf *xp;
14587 	struct scsi_pkt *pktp;
14588 	struct sd_fm_internal *sfip;
14589 
14590 	ASSERT(bp != NULL);
14591 	ASSERT(un != NULL);
14592 	ASSERT(mutex_owned(SD_MUTEX(un)));
14593 	ASSERT(bp != un->un_rqs_bp);
14594 	xp = SD_GET_XBUF(bp);
14595 	ASSERT(xp != NULL);
14596 
14597 	pktp = SD_GET_PKTP(bp);
14598 	sfip = (struct sd_fm_internal *)un->un_fm_private;
14599 	ASSERT(sfip != NULL);
14600 
14601 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
14602 
14603 	/*
14604 	 * Note: check for the "sdrestart failed" case.
14605 	 */
14606 	if ((un->un_partial_dma_supported == 1) &&
14607 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
14608 	    (geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
14609 	    (xp->xb_pktp->pkt_resid == 0)) {
14610 
14611 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
14612 			/*
14613 			 * Successfully set up next portion of cmd
14614 			 * transfer, try sending it
14615 			 */
14616 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14617 			    NULL, NULL, 0, (clock_t)0, NULL);
14618 			sd_start_cmds(un, NULL);
14619 			return;	/* Note:x86: need a return here? */
14620 		}
14621 	}
14622 
14623 	/*
14624 	 * If this is the failfast bp, clear it from un_failfast_bp. This
14625 	 * can happen if upon being re-tried the failfast bp either
14626 	 * succeeded or encountered another error (possibly even a different
14627 	 * error than the one that precipitated the failfast state, but in
14628 	 * that case it would have had to exhaust retries as well). Regardless,
14629 	 * this should not occur whenever the instance is in the active
14630 	 * failfast state.
14631 	 */
14632 	if (bp == un->un_failfast_bp) {
14633 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14634 		un->un_failfast_bp = NULL;
14635 	}
14636 
14637 	/*
14638 	 * Clear the failfast state upon successful completion of ANY cmd.
14639 	 */
14640 	if (bp->b_error == 0) {
14641 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
14642 		/*
14643 		 * If this is a successful command, but used to be retried,
14644 		 * we will take it as a recovered command and post an
14645 		 * ereport with driver-assessment of "recovered".
14646 		 */
14647 		if (xp->xb_ena > 0) {
14648 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
14649 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RECOVERY);
14650 		}
14651 	} else {
14652 		/*
14653 		 * If this is a failed non-USCSI command we will post an
14654 		 * ereport with driver-assessment set accordingly("fail" or
14655 		 * "fatal").
14656 		 */
14657 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
14658 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
14659 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
14660 		}
14661 	}
14662 
14663 	/*
14664 	 * This is used if the command was retried one or more times. Show that
14665 	 * we are done with it, and allow processing of the waitq to resume.
14666 	 */
14667 	if (bp == un->un_retry_bp) {
14668 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14669 		    "sd_return_command: un:0x%p: "
14670 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14671 		un->un_retry_bp = NULL;
14672 		un->un_retry_statp = NULL;
14673 	}
14674 
14675 	SD_UPDATE_RDWR_STATS(un, bp);
14676 	SD_UPDATE_PARTITION_STATS(un, bp);
14677 
14678 	switch (un->un_state) {
14679 	case SD_STATE_SUSPENDED:
14680 		/*
14681 		 * Notify any threads waiting in sd_ddi_suspend() that
14682 		 * a command completion has occurred.
14683 		 */
14684 		cv_broadcast(&un->un_disk_busy_cv);
14685 		break;
14686 	default:
14687 		sd_start_cmds(un, NULL);
14688 		break;
14689 	}
14690 
14691 	/* Return this command up the iodone chain to its originator. */
14692 	mutex_exit(SD_MUTEX(un));
14693 
14694 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14695 	xp->xb_pktp = NULL;
14696 
14697 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14698 
14699 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14700 	mutex_enter(SD_MUTEX(un));
14701 
14702 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
14703 }
14704 
14705 
14706 /*
14707  *    Function: sd_return_failed_command
14708  *
14709  * Description: Command completion when an error occurred.
14710  *
14711  *     Context: May be called from interrupt context
14712  */
14713 
14714 static void
14715 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
14716 {
14717 	ASSERT(bp != NULL);
14718 	ASSERT(un != NULL);
14719 	ASSERT(mutex_owned(SD_MUTEX(un)));
14720 
14721 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14722 	    "sd_return_failed_command: entry\n");
14723 
14724 	/*
14725 	 * b_resid could already be nonzero due to a partial data
14726 	 * transfer, so do not change it here.
14727 	 */
14728 	SD_BIOERROR(bp, errcode);
14729 
14730 	sd_return_command(un, bp);
14731 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14732 	    "sd_return_failed_command: exit\n");
14733 }
14734 
14735 
14736 /*
14737  *    Function: sd_return_failed_command_no_restart
14738  *
14739  * Description: Same as sd_return_failed_command, but ensures that no
14740  *		call back into sd_start_cmds will be issued.
14741  *
14742  *     Context: May be called from interrupt context
14743  */
14744 
14745 static void
14746 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
14747     int errcode)
14748 {
14749 	struct sd_xbuf *xp;
14750 
14751 	ASSERT(bp != NULL);
14752 	ASSERT(un != NULL);
14753 	ASSERT(mutex_owned(SD_MUTEX(un)));
14754 	xp = SD_GET_XBUF(bp);
14755 	ASSERT(xp != NULL);
14756 	ASSERT(errcode != 0);
14757 
14758 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14759 	    "sd_return_failed_command_no_restart: entry\n");
14760 
14761 	/*
14762 	 * b_resid could already be nonzero due to a partial data
14763 	 * transfer, so do not change it here.
14764 	 */
14765 	SD_BIOERROR(bp, errcode);
14766 
14767 	/*
14768 	 * If this is the failfast bp, clear it. This can happen if the
14769 	 * failfast bp encounterd a fatal error when we attempted to
14770 	 * re-try it (such as a scsi_transport(9F) failure).  However
14771 	 * we should NOT be in an active failfast state if the failfast
14772 	 * bp is not NULL.
14773 	 */
14774 	if (bp == un->un_failfast_bp) {
14775 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14776 		un->un_failfast_bp = NULL;
14777 	}
14778 
14779 	if (bp == un->un_retry_bp) {
14780 		/*
14781 		 * This command was retried one or more times. Show that we are
14782 		 * done with it, and allow processing of the waitq to resume.
14783 		 */
14784 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14785 		    "sd_return_failed_command_no_restart: "
14786 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14787 		un->un_retry_bp = NULL;
14788 		un->un_retry_statp = NULL;
14789 	}
14790 
14791 	SD_UPDATE_RDWR_STATS(un, bp);
14792 	SD_UPDATE_PARTITION_STATS(un, bp);
14793 
14794 	mutex_exit(SD_MUTEX(un));
14795 
14796 	if (xp->xb_pktp != NULL) {
14797 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14798 		xp->xb_pktp = NULL;
14799 	}
14800 
14801 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14802 
14803 	mutex_enter(SD_MUTEX(un));
14804 
14805 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14806 	    "sd_return_failed_command_no_restart: exit\n");
14807 }
14808 
14809 
14810 /*
14811  *    Function: sd_retry_command
14812  *
14813  * Description: queue up a command for retry, or (optionally) fail it
14814  *		if retry counts are exhausted.
14815  *
14816  *   Arguments: un - Pointer to the sd_lun struct for the target.
14817  *
14818  *		bp - Pointer to the buf for the command to be retried.
14819  *
14820  *		retry_check_flag - Flag to see which (if any) of the retry
14821  *		   counts should be decremented/checked. If the indicated
14822  *		   retry count is exhausted, then the command will not be
14823  *		   retried; it will be failed instead. This should use a
14824  *		   value equal to one of the following:
14825  *
14826  *			SD_RETRIES_NOCHECK
14827  *			SD_RESD_RETRIES_STANDARD
14828  *			SD_RETRIES_VICTIM
14829  *
14830  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
14831  *		   if the check should be made to see of FLAG_ISOLATE is set
14832  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
14833  *		   not retried, it is simply failed.
14834  *
14835  *		user_funcp - Ptr to function to call before dispatching the
14836  *		   command. May be NULL if no action needs to be performed.
14837  *		   (Primarily intended for printing messages.)
14838  *
14839  *		user_arg - Optional argument to be passed along to
14840  *		   the user_funcp call.
14841  *
14842  *		failure_code - errno return code to set in the bp if the
14843  *		   command is going to be failed.
14844  *
14845  *		retry_delay - Retry delay interval in (clock_t) units. May
14846  *		   be zero which indicates that the retry should be retried
14847  *		   immediately (ie, without an intervening delay).
14848  *
14849  *		statp - Ptr to kstat function to be updated if the command
14850  *		   is queued for a delayed retry. May be NULL if no kstat
14851  *		   update is desired.
14852  *
14853  *     Context: May be called from interrupt context.
14854  */
14855 
14856 static void
14857 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
14858     void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int code),
14859     void *user_arg, int failure_code, clock_t retry_delay,
14860     void (*statp)(kstat_io_t *))
14861 {
14862 	struct sd_xbuf	*xp;
14863 	struct scsi_pkt	*pktp;
14864 	struct sd_fm_internal *sfip;
14865 
14866 	ASSERT(un != NULL);
14867 	ASSERT(mutex_owned(SD_MUTEX(un)));
14868 	ASSERT(bp != NULL);
14869 	xp = SD_GET_XBUF(bp);
14870 	ASSERT(xp != NULL);
14871 	pktp = SD_GET_PKTP(bp);
14872 	ASSERT(pktp != NULL);
14873 
14874 	sfip = (struct sd_fm_internal *)un->un_fm_private;
14875 	ASSERT(sfip != NULL);
14876 
14877 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14878 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
14879 
14880 	/*
14881 	 * If we are syncing or dumping, fail the command to avoid
14882 	 * recursively calling back into scsi_transport().
14883 	 */
14884 	if (ddi_in_panic()) {
14885 		goto fail_command_no_log;
14886 	}
14887 
14888 	/*
14889 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
14890 	 * log an error and fail the command.
14891 	 */
14892 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14893 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
14894 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
14895 		sd_dump_memory(un, SD_LOG_IO, "CDB",
14896 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
14897 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
14898 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
14899 		goto fail_command;
14900 	}
14901 
14902 	/*
14903 	 * If we are suspended, then put the command onto head of the
14904 	 * wait queue since we don't want to start more commands, and
14905 	 * clear the un_retry_bp. Next time when we are resumed, will
14906 	 * handle the command in the wait queue.
14907 	 */
14908 	switch (un->un_state) {
14909 	case SD_STATE_SUSPENDED:
14910 	case SD_STATE_DUMPING:
14911 		bp->av_forw = un->un_waitq_headp;
14912 		un->un_waitq_headp = bp;
14913 		if (un->un_waitq_tailp == NULL) {
14914 			un->un_waitq_tailp = bp;
14915 		}
14916 		if (bp == un->un_retry_bp) {
14917 			un->un_retry_bp = NULL;
14918 			un->un_retry_statp = NULL;
14919 		}
14920 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
14921 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
14922 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
14923 		return;
14924 	default:
14925 		break;
14926 	}
14927 
14928 	/*
14929 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
14930 	 * is set; if it is then we do not want to retry the command.
14931 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
14932 	 */
14933 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
14934 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
14935 			goto fail_command;
14936 		}
14937 	}
14938 
14939 
14940 	/*
14941 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
14942 	 * command timeout or a selection timeout has occurred. This means
14943 	 * that we were unable to establish an kind of communication with
14944 	 * the target, and subsequent retries and/or commands are likely
14945 	 * to encounter similar results and take a long time to complete.
14946 	 *
14947 	 * If this is a failfast error condition, we need to update the
14948 	 * failfast state, even if this bp does not have B_FAILFAST set.
14949 	 */
14950 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
14951 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
14952 			ASSERT(un->un_failfast_bp == NULL);
14953 			/*
14954 			 * If we are already in the active failfast state, and
14955 			 * another failfast error condition has been detected,
14956 			 * then fail this command if it has B_FAILFAST set.
14957 			 * If B_FAILFAST is clear, then maintain the legacy
14958 			 * behavior of retrying heroically, even tho this will
14959 			 * take a lot more time to fail the command.
14960 			 */
14961 			if (bp->b_flags & B_FAILFAST) {
14962 				goto fail_command;
14963 			}
14964 		} else {
14965 			/*
14966 			 * We're not in the active failfast state, but we
14967 			 * have a failfast error condition, so we must begin
14968 			 * transition to the next state. We do this regardless
14969 			 * of whether or not this bp has B_FAILFAST set.
14970 			 */
14971 			if (un->un_failfast_bp == NULL) {
14972 				/*
14973 				 * This is the first bp to meet a failfast
14974 				 * condition so save it on un_failfast_bp &
14975 				 * do normal retry processing. Do not enter
14976 				 * active failfast state yet. This marks
14977 				 * entry into the "failfast pending" state.
14978 				 */
14979 				un->un_failfast_bp = bp;
14980 
14981 			} else if (un->un_failfast_bp == bp) {
14982 				/*
14983 				 * This is the second time *this* bp has
14984 				 * encountered a failfast error condition,
14985 				 * so enter active failfast state & flush
14986 				 * queues as appropriate.
14987 				 */
14988 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
14989 				un->un_failfast_bp = NULL;
14990 				sd_failfast_flushq(un);
14991 
14992 				/*
14993 				 * Fail this bp now if B_FAILFAST set;
14994 				 * otherwise continue with retries. (It would
14995 				 * be pretty ironic if this bp succeeded on a
14996 				 * subsequent retry after we just flushed all
14997 				 * the queues).
14998 				 */
14999 				if (bp->b_flags & B_FAILFAST) {
15000 					goto fail_command;
15001 				}
15002 
15003 #if !defined(lint) && !defined(__lint)
15004 			} else {
15005 				/*
15006 				 * If neither of the preceeding conditionals
15007 				 * was true, it means that there is some
15008 				 * *other* bp that has met an inital failfast
15009 				 * condition and is currently either being
15010 				 * retried or is waiting to be retried. In
15011 				 * that case we should perform normal retry
15012 				 * processing on *this* bp, since there is a
15013 				 * chance that the current failfast condition
15014 				 * is transient and recoverable. If that does
15015 				 * not turn out to be the case, then retries
15016 				 * will be cleared when the wait queue is
15017 				 * flushed anyway.
15018 				 */
15019 #endif
15020 			}
15021 		}
15022 	} else {
15023 		/*
15024 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15025 		 * likely were able to at least establish some level of
15026 		 * communication with the target and subsequent commands
15027 		 * and/or retries are likely to get through to the target,
15028 		 * In this case we want to be aggressive about clearing
15029 		 * the failfast state. Note that this does not affect
15030 		 * the "failfast pending" condition.
15031 		 */
15032 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15033 	}
15034 
15035 
15036 	/*
15037 	 * Check the specified retry count to see if we can still do
15038 	 * any retries with this pkt before we should fail it.
15039 	 */
15040 	switch (retry_check_flag & SD_RETRIES_MASK) {
15041 	case SD_RETRIES_VICTIM:
15042 		/*
15043 		 * Check the victim retry count. If exhausted, then fall
15044 		 * thru & check against the standard retry count.
15045 		 */
15046 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15047 			/* Increment count & proceed with the retry */
15048 			xp->xb_victim_retry_count++;
15049 			break;
15050 		}
15051 		/* Victim retries exhausted, fall back to std. retries... */
15052 		/* FALLTHRU */
15053 
15054 	case SD_RETRIES_STANDARD:
15055 		if (xp->xb_retry_count >= un->un_retry_count) {
15056 			/* Retries exhausted, fail the command */
15057 			SD_TRACE(SD_LOG_IO_CORE, un,
15058 			    "sd_retry_command: retries exhausted!\n");
15059 			/*
15060 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15061 			 * commands with nonzero pkt_resid.
15062 			 */
15063 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15064 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15065 			    (pktp->pkt_resid != 0)) {
15066 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15067 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15068 					SD_UPDATE_B_RESID(bp, pktp);
15069 				}
15070 			}
15071 			goto fail_command;
15072 		}
15073 		xp->xb_retry_count++;
15074 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15075 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15076 		break;
15077 
15078 	case SD_RETRIES_UA:
15079 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15080 			/* Retries exhausted, fail the command */
15081 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15082 			    "Unit Attention retries exhausted. "
15083 			    "Check the target.\n");
15084 			goto fail_command;
15085 		}
15086 		xp->xb_ua_retry_count++;
15087 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15088 		    "sd_retry_command: retry count:%d\n",
15089 		    xp->xb_ua_retry_count);
15090 		break;
15091 
15092 	case SD_RETRIES_BUSY:
15093 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15094 			/* Retries exhausted, fail the command */
15095 			SD_TRACE(SD_LOG_IO_CORE, un,
15096 			    "sd_retry_command: retries exhausted!\n");
15097 			goto fail_command;
15098 		}
15099 		xp->xb_retry_count++;
15100 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15101 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15102 		break;
15103 
15104 	case SD_RETRIES_NOCHECK:
15105 	default:
15106 		/* No retry count to check. Just proceed with the retry */
15107 		break;
15108 	}
15109 
15110 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15111 
15112 	/*
15113 	 * If this is a non-USCSI command being retried
15114 	 * during execution last time, we should post an ereport with
15115 	 * driver-assessment of the value "retry".
15116 	 * For partial DMA, request sense and STATUS_QFULL, there are no
15117 	 * hardware errors, we bypass ereport posting.
15118 	 */
15119 	if (failure_code != 0) {
15120 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
15121 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15122 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RETRY);
15123 		}
15124 	}
15125 
15126 	/*
15127 	 * If we were given a zero timeout, we must attempt to retry the
15128 	 * command immediately (ie, without a delay).
15129 	 */
15130 	if (retry_delay == 0) {
15131 		/*
15132 		 * Check some limiting conditions to see if we can actually
15133 		 * do the immediate retry.  If we cannot, then we must
15134 		 * fall back to queueing up a delayed retry.
15135 		 */
15136 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15137 			/*
15138 			 * We are at the throttle limit for the target,
15139 			 * fall back to delayed retry.
15140 			 */
15141 			retry_delay = un->un_busy_timeout;
15142 			statp = kstat_waitq_enter;
15143 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15144 			    "sd_retry_command: immed. retry hit "
15145 			    "throttle!\n");
15146 		} else {
15147 			/*
15148 			 * We're clear to proceed with the immediate retry.
15149 			 * First call the user-provided function (if any)
15150 			 */
15151 			if (user_funcp != NULL) {
15152 				(*user_funcp)(un, bp, user_arg,
15153 				    SD_IMMEDIATE_RETRY_ISSUED);
15154 #ifdef __lock_lint
15155 				sd_print_incomplete_msg(un, bp, user_arg,
15156 				    SD_IMMEDIATE_RETRY_ISSUED);
15157 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15158 				    SD_IMMEDIATE_RETRY_ISSUED);
15159 				sd_print_sense_failed_msg(un, bp, user_arg,
15160 				    SD_IMMEDIATE_RETRY_ISSUED);
15161 #endif
15162 			}
15163 
15164 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15165 			    "sd_retry_command: issuing immediate retry\n");
15166 
15167 			/*
15168 			 * Call sd_start_cmds() to transport the command to
15169 			 * the target.
15170 			 */
15171 			sd_start_cmds(un, bp);
15172 
15173 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15174 			    "sd_retry_command exit\n");
15175 			return;
15176 		}
15177 	}
15178 
15179 	/*
15180 	 * Set up to retry the command after a delay.
15181 	 * First call the user-provided function (if any)
15182 	 */
15183 	if (user_funcp != NULL) {
15184 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15185 	}
15186 
15187 	sd_set_retry_bp(un, bp, retry_delay, statp);
15188 
15189 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15190 	return;
15191 
15192 fail_command:
15193 
15194 	if (user_funcp != NULL) {
15195 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15196 	}
15197 
15198 fail_command_no_log:
15199 
15200 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15201 	    "sd_retry_command: returning failed command\n");
15202 
15203 	sd_return_failed_command(un, bp, failure_code);
15204 
15205 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15206 }
15207 
15208 
15209 /*
15210  *    Function: sd_set_retry_bp
15211  *
15212  * Description: Set up the given bp for retry.
15213  *
15214  *   Arguments: un - ptr to associated softstate
15215  *		bp - ptr to buf(9S) for the command
15216  *		retry_delay - time interval before issuing retry (may be 0)
15217  *		statp - optional pointer to kstat function
15218  *
15219  *     Context: May be called under interrupt context
15220  */
15221 
15222 static void
15223 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15224     void (*statp)(kstat_io_t *))
15225 {
15226 	ASSERT(un != NULL);
15227 	ASSERT(mutex_owned(SD_MUTEX(un)));
15228 	ASSERT(bp != NULL);
15229 
15230 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15231 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15232 
15233 	/*
15234 	 * Indicate that the command is being retried. This will not allow any
15235 	 * other commands on the wait queue to be transported to the target
15236 	 * until this command has been completed (success or failure). The
15237 	 * "retry command" is not transported to the target until the given
15238 	 * time delay expires, unless the user specified a 0 retry_delay.
15239 	 *
15240 	 * Note: the timeout(9F) callback routine is what actually calls
15241 	 * sd_start_cmds() to transport the command, with the exception of a
15242 	 * zero retry_delay. The only current implementor of a zero retry delay
15243 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15244 	 */
15245 	if (un->un_retry_bp == NULL) {
15246 		ASSERT(un->un_retry_statp == NULL);
15247 		un->un_retry_bp = bp;
15248 
15249 		/*
15250 		 * If the user has not specified a delay the command should
15251 		 * be queued and no timeout should be scheduled.
15252 		 */
15253 		if (retry_delay == 0) {
15254 			/*
15255 			 * Save the kstat pointer that will be used in the
15256 			 * call to SD_UPDATE_KSTATS() below, so that
15257 			 * sd_start_cmds() can correctly decrement the waitq
15258 			 * count when it is time to transport this command.
15259 			 */
15260 			un->un_retry_statp = statp;
15261 			goto done;
15262 		}
15263 	}
15264 
15265 	if (un->un_retry_bp == bp) {
15266 		/*
15267 		 * Save the kstat pointer that will be used in the call to
15268 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15269 		 * correctly decrement the waitq count when it is time to
15270 		 * transport this command.
15271 		 */
15272 		un->un_retry_statp = statp;
15273 
15274 		/*
15275 		 * Schedule a timeout if:
15276 		 *   1) The user has specified a delay.
15277 		 *   2) There is not a START_STOP_UNIT callback pending.
15278 		 *
15279 		 * If no delay has been specified, then it is up to the caller
15280 		 * to ensure that IO processing continues without stalling.
15281 		 * Effectively, this means that the caller will issue the
15282 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15283 		 * callback does this after the START STOP UNIT command has
15284 		 * completed. In either of these cases we should not schedule
15285 		 * a timeout callback here.  Also don't schedule the timeout if
15286 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15287 		 */
15288 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15289 		    (un->un_direct_priority_timeid == NULL)) {
15290 			un->un_retry_timeid =
15291 			    timeout(sd_start_retry_command, un, retry_delay);
15292 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15293 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15294 			    " bp:0x%p un_retry_timeid:0x%p\n",
15295 			    un, bp, un->un_retry_timeid);
15296 		}
15297 	} else {
15298 		/*
15299 		 * We only get in here if there is already another command
15300 		 * waiting to be retried.  In this case, we just put the
15301 		 * given command onto the wait queue, so it can be transported
15302 		 * after the current retry command has completed.
15303 		 *
15304 		 * Also we have to make sure that if the command at the head
15305 		 * of the wait queue is the un_failfast_bp, that we do not
15306 		 * put ahead of it any other commands that are to be retried.
15307 		 */
15308 		if ((un->un_failfast_bp != NULL) &&
15309 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15310 			/*
15311 			 * Enqueue this command AFTER the first command on
15312 			 * the wait queue (which is also un_failfast_bp).
15313 			 */
15314 			bp->av_forw = un->un_waitq_headp->av_forw;
15315 			un->un_waitq_headp->av_forw = bp;
15316 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15317 				un->un_waitq_tailp = bp;
15318 			}
15319 		} else {
15320 			/* Enqueue this command at the head of the waitq. */
15321 			bp->av_forw = un->un_waitq_headp;
15322 			un->un_waitq_headp = bp;
15323 			if (un->un_waitq_tailp == NULL) {
15324 				un->un_waitq_tailp = bp;
15325 			}
15326 		}
15327 
15328 		if (statp == NULL) {
15329 			statp = kstat_waitq_enter;
15330 		}
15331 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15332 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15333 	}
15334 
15335 done:
15336 	if (statp != NULL) {
15337 		SD_UPDATE_KSTATS(un, statp, bp);
15338 	}
15339 
15340 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15341 	    "sd_set_retry_bp: exit un:0x%p\n", un);
15342 }
15343 
15344 
15345 /*
15346  *    Function: sd_start_retry_command
15347  *
15348  * Description: Start the command that has been waiting on the target's
15349  *		retry queue.  Called from timeout(9F) context after the
15350  *		retry delay interval has expired.
15351  *
15352  *   Arguments: arg - pointer to associated softstate for the device.
15353  *
15354  *     Context: timeout(9F) thread context.  May not sleep.
15355  */
15356 
15357 static void
15358 sd_start_retry_command(void *arg)
15359 {
15360 	struct sd_lun *un = arg;
15361 
15362 	ASSERT(un != NULL);
15363 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15364 
15365 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15366 	    "sd_start_retry_command: entry\n");
15367 
15368 	mutex_enter(SD_MUTEX(un));
15369 
15370 	un->un_retry_timeid = NULL;
15371 
15372 	if (un->un_retry_bp != NULL) {
15373 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15374 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15375 		    un, un->un_retry_bp);
15376 		sd_start_cmds(un, un->un_retry_bp);
15377 	}
15378 
15379 	mutex_exit(SD_MUTEX(un));
15380 
15381 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15382 	    "sd_start_retry_command: exit\n");
15383 }
15384 
15385 /*
15386  *    Function: sd_rmw_msg_print_handler
15387  *
15388  * Description: If RMW mode is enabled and warning message is triggered
15389  *              print I/O count during a fixed interval.
15390  *
15391  *   Arguments: arg - pointer to associated softstate for the device.
15392  *
15393  *     Context: timeout(9F) thread context. May not sleep.
15394  */
15395 static void
15396 sd_rmw_msg_print_handler(void *arg)
15397 {
15398 	struct sd_lun *un = arg;
15399 
15400 	ASSERT(un != NULL);
15401 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15402 
15403 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15404 	    "sd_rmw_msg_print_handler: entry\n");
15405 
15406 	mutex_enter(SD_MUTEX(un));
15407 
15408 	if (un->un_rmw_incre_count > 0) {
15409 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15410 		    "%"PRIu64" I/O requests are not aligned with %d disk "
15411 		    "sector size in %ld seconds. They are handled through "
15412 		    "Read Modify Write but the performance is very low!\n",
15413 		    un->un_rmw_incre_count, un->un_tgt_blocksize,
15414 		    drv_hztousec(SD_RMW_MSG_PRINT_TIMEOUT) / 1000000);
15415 		un->un_rmw_incre_count = 0;
15416 		un->un_rmw_msg_timeid = timeout(sd_rmw_msg_print_handler,
15417 		    un, SD_RMW_MSG_PRINT_TIMEOUT);
15418 	} else {
15419 		un->un_rmw_msg_timeid = NULL;
15420 	}
15421 
15422 	mutex_exit(SD_MUTEX(un));
15423 
15424 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15425 	    "sd_rmw_msg_print_handler: exit\n");
15426 }
15427 
15428 /*
15429  *    Function: sd_start_direct_priority_command
15430  *
15431  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15432  *		received TRAN_BUSY when we called scsi_transport() to send it
15433  *		to the underlying HBA. This function is called from timeout(9F)
15434  *		context after the delay interval has expired.
15435  *
15436  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15437  *
15438  *     Context: timeout(9F) thread context.  May not sleep.
15439  */
15440 
15441 static void
15442 sd_start_direct_priority_command(void *arg)
15443 {
15444 	struct buf	*priority_bp = arg;
15445 	struct sd_lun	*un;
15446 
15447 	ASSERT(priority_bp != NULL);
15448 	un = SD_GET_UN(priority_bp);
15449 	ASSERT(un != NULL);
15450 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15451 
15452 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15453 	    "sd_start_direct_priority_command: entry\n");
15454 
15455 	mutex_enter(SD_MUTEX(un));
15456 	un->un_direct_priority_timeid = NULL;
15457 	sd_start_cmds(un, priority_bp);
15458 	mutex_exit(SD_MUTEX(un));
15459 
15460 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15461 	    "sd_start_direct_priority_command: exit\n");
15462 }
15463 
15464 
15465 /*
15466  *    Function: sd_send_request_sense_command
15467  *
15468  * Description: Sends a REQUEST SENSE command to the target
15469  *
15470  *     Context: May be called from interrupt context.
15471  */
15472 
15473 static void
15474 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15475     struct scsi_pkt *pktp)
15476 {
15477 	ASSERT(bp != NULL);
15478 	ASSERT(un != NULL);
15479 	ASSERT(mutex_owned(SD_MUTEX(un)));
15480 
15481 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15482 	    "entry: buf:0x%p\n", bp);
15483 
15484 	/*
15485 	 * If we are syncing or dumping, then fail the command to avoid a
15486 	 * recursive callback into scsi_transport(). Also fail the command
15487 	 * if we are suspended (legacy behavior).
15488 	 */
15489 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15490 	    (un->un_state == SD_STATE_DUMPING)) {
15491 		sd_return_failed_command(un, bp, EIO);
15492 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15493 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15494 		return;
15495 	}
15496 
15497 	/*
15498 	 * Retry the failed command and don't issue the request sense if:
15499 	 *    1) the sense buf is busy
15500 	 *    2) we have 1 or more outstanding commands on the target
15501 	 *    (the sense data will be cleared or invalidated any way)
15502 	 *
15503 	 * Note: There could be an issue with not checking a retry limit here,
15504 	 * the problem is determining which retry limit to check.
15505 	 */
15506 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15507 		/* Don't retry if the command is flagged as non-retryable */
15508 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15509 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15510 			    NULL, NULL, 0, un->un_busy_timeout,
15511 			    kstat_waitq_enter);
15512 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15513 			    "sd_send_request_sense_command: "
15514 			    "at full throttle, retrying exit\n");
15515 		} else {
15516 			sd_return_failed_command(un, bp, EIO);
15517 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15518 			    "sd_send_request_sense_command: "
15519 			    "at full throttle, non-retryable exit\n");
15520 		}
15521 		return;
15522 	}
15523 
15524 	sd_mark_rqs_busy(un, bp);
15525 	sd_start_cmds(un, un->un_rqs_bp);
15526 
15527 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15528 	    "sd_send_request_sense_command: exit\n");
15529 }
15530 
15531 
15532 /*
15533  *    Function: sd_mark_rqs_busy
15534  *
15535  * Description: Indicate that the request sense bp for this instance is
15536  *		in use.
15537  *
15538  *     Context: May be called under interrupt context
15539  */
15540 
15541 static void
15542 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15543 {
15544 	struct sd_xbuf	*sense_xp;
15545 
15546 	ASSERT(un != NULL);
15547 	ASSERT(bp != NULL);
15548 	ASSERT(mutex_owned(SD_MUTEX(un)));
15549 	ASSERT(un->un_sense_isbusy == 0);
15550 
15551 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15552 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15553 
15554 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15555 	ASSERT(sense_xp != NULL);
15556 
15557 	SD_INFO(SD_LOG_IO, un,
15558 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15559 
15560 	ASSERT(sense_xp->xb_pktp != NULL);
15561 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15562 	    == (FLAG_SENSING | FLAG_HEAD));
15563 
15564 	un->un_sense_isbusy = 1;
15565 	un->un_rqs_bp->b_resid = 0;
15566 	sense_xp->xb_pktp->pkt_resid  = 0;
15567 	sense_xp->xb_pktp->pkt_reason = 0;
15568 
15569 	/* So we can get back the bp at interrupt time! */
15570 	sense_xp->xb_sense_bp = bp;
15571 
15572 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15573 
15574 	/*
15575 	 * Mark this buf as awaiting sense data. (This is already set in
15576 	 * the pkt_flags for the RQS packet.)
15577 	 */
15578 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15579 
15580 	/* Request sense down same path */
15581 	if (scsi_pkt_allocated_correctly((SD_GET_XBUF(bp))->xb_pktp) &&
15582 	    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance)
15583 		sense_xp->xb_pktp->pkt_path_instance =
15584 		    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance;
15585 
15586 	sense_xp->xb_retry_count = 0;
15587 	sense_xp->xb_victim_retry_count = 0;
15588 	sense_xp->xb_ua_retry_count = 0;
15589 	sense_xp->xb_nr_retry_count = 0;
15590 	sense_xp->xb_dma_resid  = 0;
15591 
15592 	/* Clean up the fields for auto-request sense */
15593 	sense_xp->xb_sense_status = 0;
15594 	sense_xp->xb_sense_state = 0;
15595 	sense_xp->xb_sense_resid = 0;
15596 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15597 
15598 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15599 }
15600 
15601 
15602 /*
15603  *    Function: sd_mark_rqs_idle
15604  *
15605  * Description: SD_MUTEX must be held continuously through this routine
15606  *		to prevent reuse of the rqs struct before the caller can
15607  *		complete it's processing.
15608  *
15609  * Return Code: Pointer to the RQS buf
15610  *
15611  *     Context: May be called under interrupt context
15612  */
15613 
15614 static struct buf *
15615 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15616 {
15617 	struct buf *bp;
15618 	ASSERT(un != NULL);
15619 	ASSERT(sense_xp != NULL);
15620 	ASSERT(mutex_owned(SD_MUTEX(un)));
15621 	ASSERT(un->un_sense_isbusy != 0);
15622 
15623 	un->un_sense_isbusy = 0;
15624 	bp = sense_xp->xb_sense_bp;
15625 	sense_xp->xb_sense_bp = NULL;
15626 
15627 	/* This pkt is no longer interested in getting sense data */
15628 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15629 
15630 	return (bp);
15631 }
15632 
15633 
15634 
15635 /*
15636  *    Function: sd_alloc_rqs
15637  *
15638  * Description: Set up the unit to receive auto request sense data
15639  *
15640  * Return Code: DDI_SUCCESS or DDI_FAILURE
15641  *
15642  *     Context: Called under attach(9E) context
15643  */
15644 
15645 static int
15646 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15647 {
15648 	struct sd_xbuf *xp;
15649 
15650 	ASSERT(un != NULL);
15651 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15652 	ASSERT(un->un_rqs_bp == NULL);
15653 	ASSERT(un->un_rqs_pktp == NULL);
15654 
15655 	/*
15656 	 * First allocate the required buf and scsi_pkt structs, then set up
15657 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15658 	 */
15659 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15660 	    MAX_SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15661 	if (un->un_rqs_bp == NULL) {
15662 		return (DDI_FAILURE);
15663 	}
15664 
15665 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15666 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15667 
15668 	if (un->un_rqs_pktp == NULL) {
15669 		sd_free_rqs(un);
15670 		return (DDI_FAILURE);
15671 	}
15672 
15673 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15674 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15675 	    SCMD_REQUEST_SENSE, 0, MAX_SENSE_LENGTH, 0);
15676 
15677 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
15678 
15679 	/* Set up the other needed members in the ARQ scsi_pkt. */
15680 	un->un_rqs_pktp->pkt_comp   = sdintr;
15681 	un->un_rqs_pktp->pkt_time   = sd_io_time;
15682 	un->un_rqs_pktp->pkt_flags |=
15683 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
15684 
15685 	/*
15686 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
15687 	 * provide any intpkt, destroypkt routines as we take care of
15688 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
15689 	 */
15690 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
15691 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
15692 	xp->xb_pktp = un->un_rqs_pktp;
15693 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
15694 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
15695 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
15696 
15697 	/*
15698 	 * Save the pointer to the request sense private bp so it can
15699 	 * be retrieved in sdintr.
15700 	 */
15701 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
15702 	ASSERT(un->un_rqs_bp->b_private == xp);
15703 
15704 	/*
15705 	 * See if the HBA supports auto-request sense for the specified
15706 	 * target/lun. If it does, then try to enable it (if not already
15707 	 * enabled).
15708 	 *
15709 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
15710 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
15711 	 * return success.  However, in both of these cases ARQ is always
15712 	 * enabled and scsi_ifgetcap will always return true. The best approach
15713 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
15714 	 *
15715 	 * The 3rd case is the HBA (adp) always return enabled on
15716 	 * scsi_ifgetgetcap even when it's not enable, the best approach
15717 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
15718 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
15719 	 */
15720 
15721 	if (un->un_f_is_fibre == TRUE) {
15722 		un->un_f_arq_enabled = TRUE;
15723 	} else {
15724 #if defined(__x86)
15725 		/*
15726 		 * Circumvent the Adaptec bug, remove this code when
15727 		 * the bug is fixed
15728 		 */
15729 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
15730 #endif
15731 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
15732 		case 0:
15733 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15734 			    "sd_alloc_rqs: HBA supports ARQ\n");
15735 			/*
15736 			 * ARQ is supported by this HBA but currently is not
15737 			 * enabled. Attempt to enable it and if successful then
15738 			 * mark this instance as ARQ enabled.
15739 			 */
15740 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
15741 			    == 1) {
15742 				/* Successfully enabled ARQ in the HBA */
15743 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15744 				    "sd_alloc_rqs: ARQ enabled\n");
15745 				un->un_f_arq_enabled = TRUE;
15746 			} else {
15747 				/* Could not enable ARQ in the HBA */
15748 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15749 				    "sd_alloc_rqs: failed ARQ enable\n");
15750 				un->un_f_arq_enabled = FALSE;
15751 			}
15752 			break;
15753 		case 1:
15754 			/*
15755 			 * ARQ is supported by this HBA and is already enabled.
15756 			 * Just mark ARQ as enabled for this instance.
15757 			 */
15758 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15759 			    "sd_alloc_rqs: ARQ already enabled\n");
15760 			un->un_f_arq_enabled = TRUE;
15761 			break;
15762 		default:
15763 			/*
15764 			 * ARQ is not supported by this HBA; disable it for this
15765 			 * instance.
15766 			 */
15767 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15768 			    "sd_alloc_rqs: HBA does not support ARQ\n");
15769 			un->un_f_arq_enabled = FALSE;
15770 			break;
15771 		}
15772 	}
15773 
15774 	return (DDI_SUCCESS);
15775 }
15776 
15777 
15778 /*
15779  *    Function: sd_free_rqs
15780  *
15781  * Description: Cleanup for the pre-instance RQS command.
15782  *
15783  *     Context: Kernel thread context
15784  */
15785 
15786 static void
15787 sd_free_rqs(struct sd_lun *un)
15788 {
15789 	ASSERT(un != NULL);
15790 
15791 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
15792 
15793 	/*
15794 	 * If consistent memory is bound to a scsi_pkt, the pkt
15795 	 * has to be destroyed *before* freeing the consistent memory.
15796 	 * Don't change the sequence of this operations.
15797 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
15798 	 * after it was freed in scsi_free_consistent_buf().
15799 	 */
15800 	if (un->un_rqs_pktp != NULL) {
15801 		scsi_destroy_pkt(un->un_rqs_pktp);
15802 		un->un_rqs_pktp = NULL;
15803 	}
15804 
15805 	if (un->un_rqs_bp != NULL) {
15806 		struct sd_xbuf *xp = SD_GET_XBUF(un->un_rqs_bp);
15807 		if (xp != NULL) {
15808 			kmem_free(xp, sizeof (struct sd_xbuf));
15809 		}
15810 		scsi_free_consistent_buf(un->un_rqs_bp);
15811 		un->un_rqs_bp = NULL;
15812 	}
15813 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
15814 }
15815 
15816 
15817 
15818 /*
15819  *    Function: sd_reduce_throttle
15820  *
15821  * Description: Reduces the maximum # of outstanding commands on a
15822  *		target to the current number of outstanding commands.
15823  *		Queues a tiemout(9F) callback to restore the limit
15824  *		after a specified interval has elapsed.
15825  *		Typically used when we get a TRAN_BUSY return code
15826  *		back from scsi_transport().
15827  *
15828  *   Arguments: un - ptr to the sd_lun softstate struct
15829  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
15830  *
15831  *     Context: May be called from interrupt context
15832  */
15833 
15834 static void
15835 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
15836 {
15837 	ASSERT(un != NULL);
15838 	ASSERT(mutex_owned(SD_MUTEX(un)));
15839 	ASSERT(un->un_ncmds_in_transport >= 0);
15840 
15841 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15842 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
15843 	    un, un->un_throttle, un->un_ncmds_in_transport);
15844 
15845 	if (un->un_throttle > 1) {
15846 		if (un->un_f_use_adaptive_throttle == TRUE) {
15847 			switch (throttle_type) {
15848 			case SD_THROTTLE_TRAN_BUSY:
15849 				if (un->un_busy_throttle == 0) {
15850 					un->un_busy_throttle = un->un_throttle;
15851 				}
15852 				break;
15853 			case SD_THROTTLE_QFULL:
15854 				un->un_busy_throttle = 0;
15855 				break;
15856 			default:
15857 				ASSERT(FALSE);
15858 			}
15859 
15860 			if (un->un_ncmds_in_transport > 0) {
15861 				un->un_throttle = un->un_ncmds_in_transport;
15862 			}
15863 
15864 		} else {
15865 			if (un->un_ncmds_in_transport == 0) {
15866 				un->un_throttle = 1;
15867 			} else {
15868 				un->un_throttle = un->un_ncmds_in_transport;
15869 			}
15870 		}
15871 	}
15872 
15873 	/* Reschedule the timeout if none is currently active */
15874 	if (un->un_reset_throttle_timeid == NULL) {
15875 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
15876 		    un, SD_THROTTLE_RESET_INTERVAL);
15877 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15878 		    "sd_reduce_throttle: timeout scheduled!\n");
15879 	}
15880 
15881 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15882 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15883 }
15884 
15885 
15886 
15887 /*
15888  *    Function: sd_restore_throttle
15889  *
15890  * Description: Callback function for timeout(9F).  Resets the current
15891  *		value of un->un_throttle to its default.
15892  *
15893  *   Arguments: arg - pointer to associated softstate for the device.
15894  *
15895  *     Context: May be called from interrupt context
15896  */
15897 
15898 static void
15899 sd_restore_throttle(void *arg)
15900 {
15901 	struct sd_lun	*un = arg;
15902 
15903 	ASSERT(un != NULL);
15904 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15905 
15906 	mutex_enter(SD_MUTEX(un));
15907 
15908 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
15909 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15910 
15911 	un->un_reset_throttle_timeid = NULL;
15912 
15913 	if (un->un_f_use_adaptive_throttle == TRUE) {
15914 		/*
15915 		 * If un_busy_throttle is nonzero, then it contains the
15916 		 * value that un_throttle was when we got a TRAN_BUSY back
15917 		 * from scsi_transport(). We want to revert back to this
15918 		 * value.
15919 		 *
15920 		 * In the QFULL case, the throttle limit will incrementally
15921 		 * increase until it reaches max throttle.
15922 		 */
15923 		if (un->un_busy_throttle > 0) {
15924 			un->un_throttle = un->un_busy_throttle;
15925 			un->un_busy_throttle = 0;
15926 		} else {
15927 			/*
15928 			 * increase throttle by 10% open gate slowly, schedule
15929 			 * another restore if saved throttle has not been
15930 			 * reached
15931 			 */
15932 			short throttle;
15933 			if (sd_qfull_throttle_enable) {
15934 				throttle = un->un_throttle +
15935 				    max((un->un_throttle / 10), 1);
15936 				un->un_throttle =
15937 				    (throttle < un->un_saved_throttle) ?
15938 				    throttle : un->un_saved_throttle;
15939 				if (un->un_throttle < un->un_saved_throttle) {
15940 					un->un_reset_throttle_timeid =
15941 					    timeout(sd_restore_throttle,
15942 					    un,
15943 					    SD_QFULL_THROTTLE_RESET_INTERVAL);
15944 				}
15945 			}
15946 		}
15947 
15948 		/*
15949 		 * If un_throttle has fallen below the low-water mark, we
15950 		 * restore the maximum value here (and allow it to ratchet
15951 		 * down again if necessary).
15952 		 */
15953 		if (un->un_throttle < un->un_min_throttle) {
15954 			un->un_throttle = un->un_saved_throttle;
15955 		}
15956 	} else {
15957 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
15958 		    "restoring limit from 0x%x to 0x%x\n",
15959 		    un->un_throttle, un->un_saved_throttle);
15960 		un->un_throttle = un->un_saved_throttle;
15961 	}
15962 
15963 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15964 	    "sd_restore_throttle: calling sd_start_cmds!\n");
15965 
15966 	sd_start_cmds(un, NULL);
15967 
15968 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15969 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
15970 	    un, un->un_throttle);
15971 
15972 	mutex_exit(SD_MUTEX(un));
15973 
15974 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
15975 }
15976 
15977 /*
15978  *    Function: sdrunout
15979  *
15980  * Description: Callback routine for scsi_init_pkt when a resource allocation
15981  *		fails.
15982  *
15983  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
15984  *		soft state instance.
15985  *
15986  * Return Code: The scsi_init_pkt routine allows for the callback function to
15987  *		return a 0 indicating the callback should be rescheduled or a 1
15988  *		indicating not to reschedule. This routine always returns 1
15989  *		because the driver always provides a callback function to
15990  *		scsi_init_pkt. This results in a callback always being scheduled
15991  *		(via the scsi_init_pkt callback implementation) if a resource
15992  *		failure occurs.
15993  *
15994  *     Context: This callback function may not block or call routines that block
15995  *
15996  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
15997  *		request persisting at the head of the list which cannot be
15998  *		satisfied even after multiple retries. In the future the driver
15999  *		may implement some time of maximum runout count before failing
16000  *		an I/O.
16001  */
16002 
16003 static int
16004 sdrunout(caddr_t arg)
16005 {
16006 	struct sd_lun	*un = (struct sd_lun *)arg;
16007 
16008 	ASSERT(un != NULL);
16009 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16010 
16011 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16012 
16013 	mutex_enter(SD_MUTEX(un));
16014 	sd_start_cmds(un, NULL);
16015 	mutex_exit(SD_MUTEX(un));
16016 	/*
16017 	 * This callback routine always returns 1 (i.e. do not reschedule)
16018 	 * because we always specify sdrunout as the callback handler for
16019 	 * scsi_init_pkt inside the call to sd_start_cmds.
16020 	 */
16021 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16022 	return (1);
16023 }
16024 
16025 
16026 /*
16027  *    Function: sdintr
16028  *
16029  * Description: Completion callback routine for scsi_pkt(9S) structs
16030  *		sent to the HBA driver via scsi_transport(9F).
16031  *
16032  *     Context: Interrupt context
16033  */
16034 
16035 static void
16036 sdintr(struct scsi_pkt *pktp)
16037 {
16038 	struct buf	*bp;
16039 	struct sd_xbuf	*xp;
16040 	struct sd_lun	*un;
16041 	size_t		actual_len;
16042 	sd_ssc_t	*sscp;
16043 
16044 	ASSERT(pktp != NULL);
16045 	bp = (struct buf *)pktp->pkt_private;
16046 	ASSERT(bp != NULL);
16047 	xp = SD_GET_XBUF(bp);
16048 	ASSERT(xp != NULL);
16049 	ASSERT(xp->xb_pktp != NULL);
16050 	un = SD_GET_UN(bp);
16051 	ASSERT(un != NULL);
16052 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16053 
16054 #ifdef SD_FAULT_INJECTION
16055 
16056 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16057 	/* SD FaultInjection */
16058 	sd_faultinjection(pktp);
16059 
16060 #endif /* SD_FAULT_INJECTION */
16061 
16062 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16063 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16064 
16065 	mutex_enter(SD_MUTEX(un));
16066 
16067 	ASSERT(un->un_fm_private != NULL);
16068 	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
16069 	ASSERT(sscp != NULL);
16070 
16071 	/* Reduce the count of the #commands currently in transport */
16072 	un->un_ncmds_in_transport--;
16073 	ASSERT(un->un_ncmds_in_transport >= 0);
16074 
16075 	/* Increment counter to indicate that the callback routine is active */
16076 	un->un_in_callback++;
16077 
16078 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16079 
16080 #ifdef	SDDEBUG
16081 	if (bp == un->un_retry_bp) {
16082 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16083 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16084 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16085 	}
16086 #endif
16087 
16088 	/*
16089 	 * If pkt_reason is CMD_DEV_GONE, fail the command, and update the media
16090 	 * state if needed.
16091 	 */
16092 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16093 		/* Prevent multiple console messages for the same failure. */
16094 		if (un->un_last_pkt_reason != CMD_DEV_GONE) {
16095 			un->un_last_pkt_reason = CMD_DEV_GONE;
16096 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16097 			    "Command failed to complete...Device is gone\n");
16098 		}
16099 		if (un->un_mediastate != DKIO_DEV_GONE) {
16100 			un->un_mediastate = DKIO_DEV_GONE;
16101 			cv_broadcast(&un->un_state_cv);
16102 		}
16103 		/*
16104 		 * If the command happens to be the REQUEST SENSE command,
16105 		 * free up the rqs buf and fail the original command.
16106 		 */
16107 		if (bp == un->un_rqs_bp) {
16108 			bp = sd_mark_rqs_idle(un, xp);
16109 		}
16110 		sd_return_failed_command(un, bp, EIO);
16111 		goto exit;
16112 	}
16113 
16114 	if (pktp->pkt_state & STATE_XARQ_DONE) {
16115 		SD_TRACE(SD_LOG_COMMON, un,
16116 		    "sdintr: extra sense data received. pkt=%p\n", pktp);
16117 	}
16118 
16119 	/*
16120 	 * First see if the pkt has auto-request sense data with it....
16121 	 * Look at the packet state first so we don't take a performance
16122 	 * hit looking at the arq enabled flag unless absolutely necessary.
16123 	 */
16124 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16125 	    (un->un_f_arq_enabled == TRUE)) {
16126 		/*
16127 		 * The HBA did an auto request sense for this command so check
16128 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16129 		 * driver command that should not be retried.
16130 		 */
16131 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16132 			/*
16133 			 * Save the relevant sense info into the xp for the
16134 			 * original cmd.
16135 			 */
16136 			struct scsi_arq_status *asp;
16137 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16138 			xp->xb_sense_status =
16139 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16140 			xp->xb_sense_state  = asp->sts_rqpkt_state;
16141 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16142 			if (pktp->pkt_state & STATE_XARQ_DONE) {
16143 				actual_len = MAX_SENSE_LENGTH -
16144 				    xp->xb_sense_resid;
16145 				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16146 				    MAX_SENSE_LENGTH);
16147 			} else {
16148 				if (xp->xb_sense_resid > SENSE_LENGTH) {
16149 					actual_len = MAX_SENSE_LENGTH -
16150 					    xp->xb_sense_resid;
16151 				} else {
16152 					actual_len = SENSE_LENGTH -
16153 					    xp->xb_sense_resid;
16154 				}
16155 				if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16156 					if ((((struct uscsi_cmd *)
16157 					    (xp->xb_pktinfo))->uscsi_rqlen) >
16158 					    actual_len) {
16159 						xp->xb_sense_resid =
16160 						    (((struct uscsi_cmd *)
16161 						    (xp->xb_pktinfo))->
16162 						    uscsi_rqlen) - actual_len;
16163 					} else {
16164 						xp->xb_sense_resid = 0;
16165 					}
16166 				}
16167 				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16168 				    SENSE_LENGTH);
16169 			}
16170 
16171 			/* fail the command */
16172 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16173 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16174 			sd_return_failed_command(un, bp, EIO);
16175 			goto exit;
16176 		}
16177 
16178 #if (defined(__x86))	/* DMAFREE for x86 only */
16179 		/*
16180 		 * We want to either retry or fail this command, so free
16181 		 * the DMA resources here.  If we retry the command then
16182 		 * the DMA resources will be reallocated in sd_start_cmds().
16183 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16184 		 * causes the *entire* transfer to start over again from the
16185 		 * beginning of the request, even for PARTIAL chunks that
16186 		 * have already transferred successfully.
16187 		 */
16188 		if ((un->un_f_is_fibre == TRUE) &&
16189 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16190 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16191 			scsi_dmafree(pktp);
16192 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16193 		}
16194 #endif
16195 
16196 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16197 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16198 
16199 		sd_handle_auto_request_sense(un, bp, xp, pktp);
16200 		goto exit;
16201 	}
16202 
16203 	/* Next see if this is the REQUEST SENSE pkt for the instance */
16204 	if (pktp->pkt_flags & FLAG_SENSING)  {
16205 		/* This pktp is from the unit's REQUEST_SENSE command */
16206 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16207 		    "sdintr: sd_handle_request_sense\n");
16208 		sd_handle_request_sense(un, bp, xp, pktp);
16209 		goto exit;
16210 	}
16211 
16212 	/*
16213 	 * Check to see if the command successfully completed as requested;
16214 	 * this is the most common case (and also the hot performance path).
16215 	 *
16216 	 * Requirements for successful completion are:
16217 	 * pkt_reason is CMD_CMPLT and packet status is status good.
16218 	 * In addition:
16219 	 * - A residual of zero indicates successful completion no matter what
16220 	 *   the command is.
16221 	 * - If the residual is not zero and the command is not a read or
16222 	 *   write, then it's still defined as successful completion. In other
16223 	 *   words, if the command is a read or write the residual must be
16224 	 *   zero for successful completion.
16225 	 * - If the residual is not zero and the command is a read or
16226 	 *   write, and it's a USCSICMD, then it's still defined as
16227 	 *   successful completion.
16228 	 */
16229 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16230 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16231 
16232 		/*
16233 		 * Return all USCSI commands on good status
16234 		 */
16235 		if (pktp->pkt_resid == 0) {
16236 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16237 			    "sdintr: returning command for resid == 0\n");
16238 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16239 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16240 			SD_UPDATE_B_RESID(bp, pktp);
16241 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16242 			    "sdintr: returning command for resid != 0\n");
16243 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16244 			SD_UPDATE_B_RESID(bp, pktp);
16245 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16246 			    "sdintr: returning uscsi command\n");
16247 		} else {
16248 			goto not_successful;
16249 		}
16250 		sd_return_command(un, bp);
16251 
16252 		/*
16253 		 * Decrement counter to indicate that the callback routine
16254 		 * is done.
16255 		 */
16256 		un->un_in_callback--;
16257 		ASSERT(un->un_in_callback >= 0);
16258 		mutex_exit(SD_MUTEX(un));
16259 
16260 		return;
16261 	}
16262 
16263 not_successful:
16264 
16265 #if (defined(__x86))	/* DMAFREE for x86 only */
16266 	/*
16267 	 * The following is based upon knowledge of the underlying transport
16268 	 * and its use of DMA resources.  This code should be removed when
16269 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16270 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16271 	 * and sd_start_cmds().
16272 	 *
16273 	 * Free any DMA resources associated with this command if there
16274 	 * is a chance it could be retried or enqueued for later retry.
16275 	 * If we keep the DMA binding then mpxio cannot reissue the
16276 	 * command on another path whenever a path failure occurs.
16277 	 *
16278 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16279 	 * causes the *entire* transfer to start over again from the
16280 	 * beginning of the request, even for PARTIAL chunks that
16281 	 * have already transferred successfully.
16282 	 *
16283 	 * This is only done for non-uscsi commands (and also skipped for the
16284 	 * driver's internal RQS command). Also just do this for Fibre Channel
16285 	 * devices as these are the only ones that support mpxio.
16286 	 */
16287 	if ((un->un_f_is_fibre == TRUE) &&
16288 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16289 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16290 		scsi_dmafree(pktp);
16291 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16292 	}
16293 #endif
16294 
16295 	/*
16296 	 * The command did not successfully complete as requested so check
16297 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16298 	 * driver command that should not be retried so just return. If
16299 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16300 	 */
16301 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16302 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16303 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16304 		/*
16305 		 * Issue a request sense if a check condition caused the error
16306 		 * (we handle the auto request sense case above), otherwise
16307 		 * just fail the command.
16308 		 */
16309 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16310 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16311 			sd_send_request_sense_command(un, bp, pktp);
16312 		} else {
16313 			sd_return_failed_command(un, bp, EIO);
16314 		}
16315 		goto exit;
16316 	}
16317 
16318 	/*
16319 	 * The command did not successfully complete as requested so process
16320 	 * the error, retry, and/or attempt recovery.
16321 	 */
16322 	switch (pktp->pkt_reason) {
16323 	case CMD_CMPLT:
16324 		switch (SD_GET_PKT_STATUS(pktp)) {
16325 		case STATUS_GOOD:
16326 			/*
16327 			 * The command completed successfully with a non-zero
16328 			 * residual
16329 			 */
16330 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16331 			    "sdintr: STATUS_GOOD \n");
16332 			sd_pkt_status_good(un, bp, xp, pktp);
16333 			break;
16334 
16335 		case STATUS_CHECK:
16336 		case STATUS_TERMINATED:
16337 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16338 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16339 			sd_pkt_status_check_condition(un, bp, xp, pktp);
16340 			break;
16341 
16342 		case STATUS_BUSY:
16343 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16344 			    "sdintr: STATUS_BUSY\n");
16345 			sd_pkt_status_busy(un, bp, xp, pktp);
16346 			break;
16347 
16348 		case STATUS_RESERVATION_CONFLICT:
16349 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16350 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16351 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16352 			break;
16353 
16354 		case STATUS_QFULL:
16355 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16356 			    "sdintr: STATUS_QFULL\n");
16357 			sd_pkt_status_qfull(un, bp, xp, pktp);
16358 			break;
16359 
16360 		case STATUS_MET:
16361 		case STATUS_INTERMEDIATE:
16362 		case STATUS_SCSI2:
16363 		case STATUS_INTERMEDIATE_MET:
16364 		case STATUS_ACA_ACTIVE:
16365 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16366 			    "Unexpected SCSI status received: 0x%x\n",
16367 			    SD_GET_PKT_STATUS(pktp));
16368 			/*
16369 			 * Mark the ssc_flags when detected invalid status
16370 			 * code for non-USCSI command.
16371 			 */
16372 			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16373 				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
16374 				    0, "stat-code");
16375 			}
16376 			sd_return_failed_command(un, bp, EIO);
16377 			break;
16378 
16379 		default:
16380 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16381 			    "Invalid SCSI status received: 0x%x\n",
16382 			    SD_GET_PKT_STATUS(pktp));
16383 			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16384 				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
16385 				    0, "stat-code");
16386 			}
16387 			sd_return_failed_command(un, bp, EIO);
16388 			break;
16389 
16390 		}
16391 		break;
16392 
16393 	case CMD_INCOMPLETE:
16394 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16395 		    "sdintr:  CMD_INCOMPLETE\n");
16396 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16397 		break;
16398 	case CMD_TRAN_ERR:
16399 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16400 		    "sdintr: CMD_TRAN_ERR\n");
16401 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16402 		break;
16403 	case CMD_RESET:
16404 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16405 		    "sdintr: CMD_RESET \n");
16406 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16407 		break;
16408 	case CMD_ABORTED:
16409 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16410 		    "sdintr: CMD_ABORTED \n");
16411 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16412 		break;
16413 	case CMD_TIMEOUT:
16414 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16415 		    "sdintr: CMD_TIMEOUT\n");
16416 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16417 		break;
16418 	case CMD_UNX_BUS_FREE:
16419 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16420 		    "sdintr: CMD_UNX_BUS_FREE \n");
16421 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16422 		break;
16423 	case CMD_TAG_REJECT:
16424 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16425 		    "sdintr: CMD_TAG_REJECT\n");
16426 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16427 		break;
16428 	default:
16429 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16430 		    "sdintr: default\n");
16431 		/*
16432 		 * Mark the ssc_flags for detecting invliad pkt_reason.
16433 		 */
16434 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16435 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_PKT_REASON,
16436 			    0, "pkt-reason");
16437 		}
16438 		sd_pkt_reason_default(un, bp, xp, pktp);
16439 		break;
16440 	}
16441 
16442 exit:
16443 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16444 
16445 	/* Decrement counter to indicate that the callback routine is done. */
16446 	un->un_in_callback--;
16447 	ASSERT(un->un_in_callback >= 0);
16448 
16449 	/*
16450 	 * At this point, the pkt has been dispatched, ie, it is either
16451 	 * being re-tried or has been returned to its caller and should
16452 	 * not be referenced.
16453 	 */
16454 
16455 	mutex_exit(SD_MUTEX(un));
16456 }
16457 
16458 
16459 /*
16460  *    Function: sd_print_incomplete_msg
16461  *
16462  * Description: Prints the error message for a CMD_INCOMPLETE error.
16463  *
16464  *   Arguments: un - ptr to associated softstate for the device.
16465  *		bp - ptr to the buf(9S) for the command.
16466  *		arg - message string ptr
16467  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16468  *			or SD_NO_RETRY_ISSUED.
16469  *
16470  *     Context: May be called under interrupt context
16471  */
16472 
16473 static void
16474 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16475 {
16476 	struct scsi_pkt	*pktp;
16477 	char	*msgp;
16478 	char	*cmdp = arg;
16479 
16480 	ASSERT(un != NULL);
16481 	ASSERT(mutex_owned(SD_MUTEX(un)));
16482 	ASSERT(bp != NULL);
16483 	ASSERT(arg != NULL);
16484 	pktp = SD_GET_PKTP(bp);
16485 	ASSERT(pktp != NULL);
16486 
16487 	switch (code) {
16488 	case SD_DELAYED_RETRY_ISSUED:
16489 	case SD_IMMEDIATE_RETRY_ISSUED:
16490 		msgp = "retrying";
16491 		break;
16492 	case SD_NO_RETRY_ISSUED:
16493 	default:
16494 		msgp = "giving up";
16495 		break;
16496 	}
16497 
16498 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16499 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16500 		    "incomplete %s- %s\n", cmdp, msgp);
16501 	}
16502 }
16503 
16504 
16505 
16506 /*
16507  *    Function: sd_pkt_status_good
16508  *
16509  * Description: Processing for a STATUS_GOOD code in pkt_status.
16510  *
16511  *     Context: May be called under interrupt context
16512  */
16513 
16514 static void
16515 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16516     struct sd_xbuf *xp, struct scsi_pkt *pktp)
16517 {
16518 	char	*cmdp;
16519 
16520 	ASSERT(un != NULL);
16521 	ASSERT(mutex_owned(SD_MUTEX(un)));
16522 	ASSERT(bp != NULL);
16523 	ASSERT(xp != NULL);
16524 	ASSERT(pktp != NULL);
16525 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16526 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16527 	ASSERT(pktp->pkt_resid != 0);
16528 
16529 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16530 
16531 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16532 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16533 	case SCMD_READ:
16534 		cmdp = "read";
16535 		break;
16536 	case SCMD_WRITE:
16537 		cmdp = "write";
16538 		break;
16539 	default:
16540 		SD_UPDATE_B_RESID(bp, pktp);
16541 		sd_return_command(un, bp);
16542 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16543 		return;
16544 	}
16545 
16546 	/*
16547 	 * See if we can retry the read/write, preferrably immediately.
16548 	 * If retries are exhaused, then sd_retry_command() will update
16549 	 * the b_resid count.
16550 	 */
16551 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16552 	    cmdp, EIO, (clock_t)0, NULL);
16553 
16554 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16555 }
16556 
16557 
16558 
16559 
16560 
16561 /*
16562  *    Function: sd_handle_request_sense
16563  *
16564  * Description: Processing for non-auto Request Sense command.
16565  *
16566  *   Arguments: un - ptr to associated softstate
16567  *		sense_bp - ptr to buf(9S) for the RQS command
16568  *		sense_xp - ptr to the sd_xbuf for the RQS command
16569  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16570  *
16571  *     Context: May be called under interrupt context
16572  */
16573 
16574 static void
16575 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16576     struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16577 {
16578 	struct buf	*cmd_bp;	/* buf for the original command */
16579 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16580 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16581 	size_t		actual_len;	/* actual sense data length */
16582 
16583 	ASSERT(un != NULL);
16584 	ASSERT(mutex_owned(SD_MUTEX(un)));
16585 	ASSERT(sense_bp != NULL);
16586 	ASSERT(sense_xp != NULL);
16587 	ASSERT(sense_pktp != NULL);
16588 
16589 	/*
16590 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16591 	 * RQS command and not the original command.
16592 	 */
16593 	ASSERT(sense_pktp == un->un_rqs_pktp);
16594 	ASSERT(sense_bp   == un->un_rqs_bp);
16595 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16596 	    (FLAG_SENSING | FLAG_HEAD));
16597 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16598 	    FLAG_SENSING) == FLAG_SENSING);
16599 
16600 	/* These are the bp, xp, and pktp for the original command */
16601 	cmd_bp = sense_xp->xb_sense_bp;
16602 	cmd_xp = SD_GET_XBUF(cmd_bp);
16603 	cmd_pktp = SD_GET_PKTP(cmd_bp);
16604 
16605 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16606 		/*
16607 		 * The REQUEST SENSE command failed.  Release the REQUEST
16608 		 * SENSE command for re-use, get back the bp for the original
16609 		 * command, and attempt to re-try the original command if
16610 		 * FLAG_DIAGNOSE is not set in the original packet.
16611 		 */
16612 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16613 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16614 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16615 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16616 			    NULL, NULL, EIO, (clock_t)0, NULL);
16617 			return;
16618 		}
16619 	}
16620 
16621 	/*
16622 	 * Save the relevant sense info into the xp for the original cmd.
16623 	 *
16624 	 * Note: if the request sense failed the state info will be zero
16625 	 * as set in sd_mark_rqs_busy()
16626 	 */
16627 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16628 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16629 	actual_len = MAX_SENSE_LENGTH - sense_pktp->pkt_resid;
16630 	if ((cmd_xp->xb_pkt_flags & SD_XB_USCSICMD) &&
16631 	    (((struct uscsi_cmd *)cmd_xp->xb_pktinfo)->uscsi_rqlen >
16632 	    SENSE_LENGTH)) {
16633 		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
16634 		    MAX_SENSE_LENGTH);
16635 		cmd_xp->xb_sense_resid = sense_pktp->pkt_resid;
16636 	} else {
16637 		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
16638 		    SENSE_LENGTH);
16639 		if (actual_len < SENSE_LENGTH) {
16640 			cmd_xp->xb_sense_resid = SENSE_LENGTH - actual_len;
16641 		} else {
16642 			cmd_xp->xb_sense_resid = 0;
16643 		}
16644 	}
16645 
16646 	/*
16647 	 *  Free up the RQS command....
16648 	 *  NOTE:
16649 	 *	Must do this BEFORE calling sd_validate_sense_data!
16650 	 *	sd_validate_sense_data may return the original command in
16651 	 *	which case the pkt will be freed and the flags can no
16652 	 *	longer be touched.
16653 	 *	SD_MUTEX is held through this process until the command
16654 	 *	is dispatched based upon the sense data, so there are
16655 	 *	no race conditions.
16656 	 */
16657 	(void) sd_mark_rqs_idle(un, sense_xp);
16658 
16659 	/*
16660 	 * For a retryable command see if we have valid sense data, if so then
16661 	 * turn it over to sd_decode_sense() to figure out the right course of
16662 	 * action. Just fail a non-retryable command.
16663 	 */
16664 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16665 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp, actual_len) ==
16666 		    SD_SENSE_DATA_IS_VALID) {
16667 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16668 		}
16669 	} else {
16670 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16671 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16672 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16673 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16674 		sd_return_failed_command(un, cmd_bp, EIO);
16675 	}
16676 }
16677 
16678 
16679 
16680 
16681 /*
16682  *    Function: sd_handle_auto_request_sense
16683  *
16684  * Description: Processing for auto-request sense information.
16685  *
16686  *   Arguments: un - ptr to associated softstate
16687  *		bp - ptr to buf(9S) for the command
16688  *		xp - ptr to the sd_xbuf for the command
16689  *		pktp - ptr to the scsi_pkt(9S) for the command
16690  *
16691  *     Context: May be called under interrupt context
16692  */
16693 
16694 static void
16695 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16696     struct sd_xbuf *xp, struct scsi_pkt *pktp)
16697 {
16698 	struct scsi_arq_status *asp;
16699 	size_t actual_len;
16700 
16701 	ASSERT(un != NULL);
16702 	ASSERT(mutex_owned(SD_MUTEX(un)));
16703 	ASSERT(bp != NULL);
16704 	ASSERT(xp != NULL);
16705 	ASSERT(pktp != NULL);
16706 	ASSERT(pktp != un->un_rqs_pktp);
16707 	ASSERT(bp   != un->un_rqs_bp);
16708 
16709 	/*
16710 	 * For auto-request sense, we get a scsi_arq_status back from
16711 	 * the HBA, with the sense data in the sts_sensedata member.
16712 	 * The pkt_scbp of the packet points to this scsi_arq_status.
16713 	 */
16714 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16715 
16716 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16717 		/*
16718 		 * The auto REQUEST SENSE failed; see if we can re-try
16719 		 * the original command.
16720 		 */
16721 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16722 		    "auto request sense failed (reason=%s)\n",
16723 		    scsi_rname(asp->sts_rqpkt_reason));
16724 
16725 		sd_reset_target(un, pktp);
16726 
16727 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16728 		    NULL, NULL, EIO, (clock_t)0, NULL);
16729 		return;
16730 	}
16731 
16732 	/* Save the relevant sense info into the xp for the original cmd. */
16733 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16734 	xp->xb_sense_state  = asp->sts_rqpkt_state;
16735 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16736 	if (xp->xb_sense_state & STATE_XARQ_DONE) {
16737 		actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
16738 		bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16739 		    MAX_SENSE_LENGTH);
16740 	} else {
16741 		if (xp->xb_sense_resid > SENSE_LENGTH) {
16742 			actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
16743 		} else {
16744 			actual_len = SENSE_LENGTH - xp->xb_sense_resid;
16745 		}
16746 		if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16747 			if ((((struct uscsi_cmd *)
16748 			    (xp->xb_pktinfo))->uscsi_rqlen) > actual_len) {
16749 				xp->xb_sense_resid = (((struct uscsi_cmd *)
16750 				    (xp->xb_pktinfo))->uscsi_rqlen) -
16751 				    actual_len;
16752 			} else {
16753 				xp->xb_sense_resid = 0;
16754 			}
16755 		}
16756 		bcopy(&asp->sts_sensedata, xp->xb_sense_data, SENSE_LENGTH);
16757 	}
16758 
16759 	/*
16760 	 * See if we have valid sense data, if so then turn it over to
16761 	 * sd_decode_sense() to figure out the right course of action.
16762 	 */
16763 	if (sd_validate_sense_data(un, bp, xp, actual_len) ==
16764 	    SD_SENSE_DATA_IS_VALID) {
16765 		sd_decode_sense(un, bp, xp, pktp);
16766 	}
16767 }
16768 
16769 
16770 /*
16771  *    Function: sd_print_sense_failed_msg
16772  *
16773  * Description: Print log message when RQS has failed.
16774  *
16775  *   Arguments: un - ptr to associated softstate
16776  *		bp - ptr to buf(9S) for the command
16777  *		arg - generic message string ptr
16778  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16779  *			or SD_NO_RETRY_ISSUED
16780  *
16781  *     Context: May be called from interrupt context
16782  */
16783 
16784 static void
16785 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
16786     int code)
16787 {
16788 	char	*msgp = arg;
16789 
16790 	ASSERT(un != NULL);
16791 	ASSERT(mutex_owned(SD_MUTEX(un)));
16792 	ASSERT(bp != NULL);
16793 
16794 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
16795 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
16796 	}
16797 }
16798 
16799 
16800 /*
16801  *    Function: sd_validate_sense_data
16802  *
16803  * Description: Check the given sense data for validity.
16804  *		If the sense data is not valid, the command will
16805  *		be either failed or retried!
16806  *
16807  * Return Code: SD_SENSE_DATA_IS_INVALID
16808  *		SD_SENSE_DATA_IS_VALID
16809  *
16810  *     Context: May be called from interrupt context
16811  */
16812 
16813 static int
16814 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16815     size_t actual_len)
16816 {
16817 	struct scsi_extended_sense *esp;
16818 	struct	scsi_pkt *pktp;
16819 	char	*msgp = NULL;
16820 	sd_ssc_t *sscp;
16821 
16822 	ASSERT(un != NULL);
16823 	ASSERT(mutex_owned(SD_MUTEX(un)));
16824 	ASSERT(bp != NULL);
16825 	ASSERT(bp != un->un_rqs_bp);
16826 	ASSERT(xp != NULL);
16827 	ASSERT(un->un_fm_private != NULL);
16828 
16829 	pktp = SD_GET_PKTP(bp);
16830 	ASSERT(pktp != NULL);
16831 
16832 	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
16833 	ASSERT(sscp != NULL);
16834 
16835 	/*
16836 	 * Check the status of the RQS command (auto or manual).
16837 	 */
16838 	switch (xp->xb_sense_status & STATUS_MASK) {
16839 	case STATUS_GOOD:
16840 		break;
16841 
16842 	case STATUS_RESERVATION_CONFLICT:
16843 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16844 		return (SD_SENSE_DATA_IS_INVALID);
16845 
16846 	case STATUS_BUSY:
16847 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16848 		    "Busy Status on REQUEST SENSE\n");
16849 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
16850 		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
16851 		return (SD_SENSE_DATA_IS_INVALID);
16852 
16853 	case STATUS_QFULL:
16854 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16855 		    "QFULL Status on REQUEST SENSE\n");
16856 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
16857 		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
16858 		return (SD_SENSE_DATA_IS_INVALID);
16859 
16860 	case STATUS_CHECK:
16861 	case STATUS_TERMINATED:
16862 		msgp = "Check Condition on REQUEST SENSE\n";
16863 		goto sense_failed;
16864 
16865 	default:
16866 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
16867 		goto sense_failed;
16868 	}
16869 
16870 	/*
16871 	 * See if we got the minimum required amount of sense data.
16872 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
16873 	 * or less.
16874 	 */
16875 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
16876 	    (actual_len == 0)) {
16877 		msgp = "Request Sense couldn't get sense data\n";
16878 		goto sense_failed;
16879 	}
16880 
16881 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
16882 		msgp = "Not enough sense information\n";
16883 		/* Mark the ssc_flags for detecting invalid sense data */
16884 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16885 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
16886 			    "sense-data");
16887 		}
16888 		goto sense_failed;
16889 	}
16890 
16891 	/*
16892 	 * We require the extended sense data
16893 	 */
16894 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
16895 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
16896 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16897 			static char tmp[8];
16898 			static char buf[148];
16899 			char *p = (char *)(xp->xb_sense_data);
16900 			int i;
16901 
16902 			mutex_enter(&sd_sense_mutex);
16903 			(void) strcpy(buf, "undecodable sense information:");
16904 			for (i = 0; i < actual_len; i++) {
16905 				(void) sprintf(tmp, " 0x%x", *(p++) & 0xff);
16906 				(void) strcpy(&buf[strlen(buf)], tmp);
16907 			}
16908 			i = strlen(buf);
16909 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
16910 
16911 			if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
16912 				scsi_log(SD_DEVINFO(un), sd_label,
16913 				    CE_WARN, buf);
16914 			}
16915 			mutex_exit(&sd_sense_mutex);
16916 		}
16917 
16918 		/* Mark the ssc_flags for detecting invalid sense data */
16919 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16920 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
16921 			    "sense-data");
16922 		}
16923 
16924 		/* Note: Legacy behavior, fail the command with no retry */
16925 		sd_return_failed_command(un, bp, EIO);
16926 		return (SD_SENSE_DATA_IS_INVALID);
16927 	}
16928 
16929 	/*
16930 	 * Check that es_code is valid (es_class concatenated with es_code
16931 	 * make up the "response code" field.  es_class will always be 7, so
16932 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
16933 	 * format.
16934 	 */
16935 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
16936 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
16937 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
16938 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
16939 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
16940 		/* Mark the ssc_flags for detecting invalid sense data */
16941 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16942 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
16943 			    "sense-data");
16944 		}
16945 		goto sense_failed;
16946 	}
16947 
16948 	return (SD_SENSE_DATA_IS_VALID);
16949 
16950 sense_failed:
16951 	/*
16952 	 * If the request sense failed (for whatever reason), attempt
16953 	 * to retry the original command.
16954 	 */
16955 	/*
16956 	 * The SD_RETRY_DELAY value need to be adjusted here
16957 	 * when SD_RETRY_DELAY change in sddef.h
16958 	 */
16959 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16960 	    sd_print_sense_failed_msg, msgp, EIO,
16961 	    un->un_f_is_fibre ? drv_usectohz(100000) : (clock_t)0, NULL);
16962 
16963 	return (SD_SENSE_DATA_IS_INVALID);
16964 }
16965 
16966 /*
16967  *    Function: sd_decode_sense
16968  *
16969  * Description: Take recovery action(s) when SCSI Sense Data is received.
16970  *
16971  *     Context: Interrupt context.
16972  */
16973 
16974 static void
16975 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16976     struct scsi_pkt *pktp)
16977 {
16978 	uint8_t sense_key;
16979 
16980 	ASSERT(un != NULL);
16981 	ASSERT(mutex_owned(SD_MUTEX(un)));
16982 	ASSERT(bp != NULL);
16983 	ASSERT(bp != un->un_rqs_bp);
16984 	ASSERT(xp != NULL);
16985 	ASSERT(pktp != NULL);
16986 
16987 	sense_key = scsi_sense_key(xp->xb_sense_data);
16988 
16989 	switch (sense_key) {
16990 	case KEY_NO_SENSE:
16991 		sd_sense_key_no_sense(un, bp, xp, pktp);
16992 		break;
16993 	case KEY_RECOVERABLE_ERROR:
16994 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
16995 		    bp, xp, pktp);
16996 		break;
16997 	case KEY_NOT_READY:
16998 		sd_sense_key_not_ready(un, xp->xb_sense_data,
16999 		    bp, xp, pktp);
17000 		break;
17001 	case KEY_MEDIUM_ERROR:
17002 	case KEY_HARDWARE_ERROR:
17003 		sd_sense_key_medium_or_hardware_error(un,
17004 		    xp->xb_sense_data, bp, xp, pktp);
17005 		break;
17006 	case KEY_ILLEGAL_REQUEST:
17007 		sd_sense_key_illegal_request(un, bp, xp, pktp);
17008 		break;
17009 	case KEY_UNIT_ATTENTION:
17010 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17011 		    bp, xp, pktp);
17012 		break;
17013 	case KEY_WRITE_PROTECT:
17014 	case KEY_VOLUME_OVERFLOW:
17015 	case KEY_MISCOMPARE:
17016 		sd_sense_key_fail_command(un, bp, xp, pktp);
17017 		break;
17018 	case KEY_BLANK_CHECK:
17019 		sd_sense_key_blank_check(un, bp, xp, pktp);
17020 		break;
17021 	case KEY_ABORTED_COMMAND:
17022 		sd_sense_key_aborted_command(un, bp, xp, pktp);
17023 		break;
17024 	case KEY_VENDOR_UNIQUE:
17025 	case KEY_COPY_ABORTED:
17026 	case KEY_EQUAL:
17027 	case KEY_RESERVED:
17028 	default:
17029 		sd_sense_key_default(un, xp->xb_sense_data,
17030 		    bp, xp, pktp);
17031 		break;
17032 	}
17033 }
17034 
17035 
17036 /*
17037  *    Function: sd_dump_memory
17038  *
17039  * Description: Debug logging routine to print the contents of a user provided
17040  *		buffer. The output of the buffer is broken up into 256 byte
17041  *		segments due to a size constraint of the scsi_log.
17042  *		implementation.
17043  *
17044  *   Arguments: un - ptr to softstate
17045  *		comp - component mask
17046  *		title - "title" string to preceed data when printed
17047  *		data - ptr to data block to be printed
17048  *		len - size of data block to be printed
17049  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17050  *
17051  *     Context: May be called from interrupt context
17052  */
17053 
17054 #define	SD_DUMP_MEMORY_BUF_SIZE	256
17055 
17056 static char *sd_dump_format_string[] = {
17057 		" 0x%02x",
17058 		" %c"
17059 };
17060 
17061 static void
17062 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17063     int len, int fmt)
17064 {
17065 	int	i, j;
17066 	int	avail_count;
17067 	int	start_offset;
17068 	int	end_offset;
17069 	size_t	entry_len;
17070 	char	*bufp;
17071 	char	*local_buf;
17072 	char	*format_string;
17073 
17074 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17075 
17076 	/*
17077 	 * In the debug version of the driver, this function is called from a
17078 	 * number of places which are NOPs in the release driver.
17079 	 * The debug driver therefore has additional methods of filtering
17080 	 * debug output.
17081 	 */
17082 #ifdef SDDEBUG
17083 	/*
17084 	 * In the debug version of the driver we can reduce the amount of debug
17085 	 * messages by setting sd_error_level to something other than
17086 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17087 	 * sd_component_mask.
17088 	 */
17089 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17090 	    (sd_error_level != SCSI_ERR_ALL)) {
17091 		return;
17092 	}
17093 	if (((sd_component_mask & comp) == 0) ||
17094 	    (sd_error_level != SCSI_ERR_ALL)) {
17095 		return;
17096 	}
17097 #else
17098 	if (sd_error_level != SCSI_ERR_ALL) {
17099 		return;
17100 	}
17101 #endif
17102 
17103 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17104 	bufp = local_buf;
17105 	/*
17106 	 * Available length is the length of local_buf[], minus the
17107 	 * length of the title string, minus one for the ":", minus
17108 	 * one for the newline, minus one for the NULL terminator.
17109 	 * This gives the #bytes available for holding the printed
17110 	 * values from the given data buffer.
17111 	 */
17112 	if (fmt == SD_LOG_HEX) {
17113 		format_string = sd_dump_format_string[0];
17114 	} else /* SD_LOG_CHAR */ {
17115 		format_string = sd_dump_format_string[1];
17116 	}
17117 	/*
17118 	 * Available count is the number of elements from the given
17119 	 * data buffer that we can fit into the available length.
17120 	 * This is based upon the size of the format string used.
17121 	 * Make one entry and find it's size.
17122 	 */
17123 	(void) sprintf(bufp, format_string, data[0]);
17124 	entry_len = strlen(bufp);
17125 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17126 
17127 	j = 0;
17128 	while (j < len) {
17129 		bufp = local_buf;
17130 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17131 		start_offset = j;
17132 
17133 		end_offset = start_offset + avail_count;
17134 
17135 		(void) sprintf(bufp, "%s:", title);
17136 		bufp += strlen(bufp);
17137 		for (i = start_offset; ((i < end_offset) && (j < len));
17138 		    i++, j++) {
17139 			(void) sprintf(bufp, format_string, data[i]);
17140 			bufp += entry_len;
17141 		}
17142 		(void) sprintf(bufp, "\n");
17143 
17144 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17145 	}
17146 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17147 }
17148 
17149 /*
17150  *    Function: sd_print_sense_msg
17151  *
17152  * Description: Log a message based upon the given sense data.
17153  *
17154  *   Arguments: un - ptr to associated softstate
17155  *		bp - ptr to buf(9S) for the command
17156  *		arg - ptr to associate sd_sense_info struct
17157  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17158  *			or SD_NO_RETRY_ISSUED
17159  *
17160  *     Context: May be called from interrupt context
17161  */
17162 
17163 static void
17164 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17165 {
17166 	struct sd_xbuf	*xp;
17167 	struct scsi_pkt	*pktp;
17168 	uint8_t *sensep;
17169 	daddr_t request_blkno;
17170 	diskaddr_t err_blkno;
17171 	int severity;
17172 	int pfa_flag;
17173 	extern struct scsi_key_strings scsi_cmds[];
17174 
17175 	ASSERT(un != NULL);
17176 	ASSERT(mutex_owned(SD_MUTEX(un)));
17177 	ASSERT(bp != NULL);
17178 	xp = SD_GET_XBUF(bp);
17179 	ASSERT(xp != NULL);
17180 	pktp = SD_GET_PKTP(bp);
17181 	ASSERT(pktp != NULL);
17182 	ASSERT(arg != NULL);
17183 
17184 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17185 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17186 
17187 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17188 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17189 		severity = SCSI_ERR_RETRYABLE;
17190 	}
17191 
17192 	/* Use absolute block number for the request block number */
17193 	request_blkno = xp->xb_blkno;
17194 
17195 	/*
17196 	 * Now try to get the error block number from the sense data
17197 	 */
17198 	sensep = xp->xb_sense_data;
17199 
17200 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
17201 	    (uint64_t *)&err_blkno)) {
17202 		/*
17203 		 * We retrieved the error block number from the information
17204 		 * portion of the sense data.
17205 		 *
17206 		 * For USCSI commands we are better off using the error
17207 		 * block no. as the requested block no. (This is the best
17208 		 * we can estimate.)
17209 		 */
17210 		if ((SD_IS_BUFIO(xp) == FALSE) &&
17211 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17212 			request_blkno = err_blkno;
17213 		}
17214 	} else {
17215 		/*
17216 		 * Without the es_valid bit set (for fixed format) or an
17217 		 * information descriptor (for descriptor format) we cannot
17218 		 * be certain of the error blkno, so just use the
17219 		 * request_blkno.
17220 		 */
17221 		err_blkno = (diskaddr_t)request_blkno;
17222 	}
17223 
17224 	/*
17225 	 * The following will log the buffer contents for the release driver
17226 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17227 	 * level is set to verbose.
17228 	 */
17229 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17230 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17231 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17232 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17233 
17234 	if (pfa_flag == FALSE) {
17235 		/* This is normally only set for USCSI */
17236 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17237 			return;
17238 		}
17239 
17240 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17241 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17242 		    (severity < sd_error_level))) {
17243 			return;
17244 		}
17245 	}
17246 
17247 	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP ||
17248 	    ((scsi_sense_key(sensep) == KEY_RECOVERABLE_ERROR) &&
17249 	    (pktp->pkt_resid == 0))) {
17250 		scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17251 		    request_blkno, err_blkno, scsi_cmds,
17252 		    (struct scsi_extended_sense *)sensep,
17253 		    un->un_additional_codes, NULL);
17254 	}
17255 }
17256 
17257 /*
17258  *    Function: sd_sense_key_no_sense
17259  *
17260  * Description: Recovery action when sense data was not received.
17261  *
17262  *     Context: May be called from interrupt context
17263  */
17264 
17265 static void
17266 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17267     struct scsi_pkt *pktp)
17268 {
17269 	struct sd_sense_info	si;
17270 
17271 	ASSERT(un != NULL);
17272 	ASSERT(mutex_owned(SD_MUTEX(un)));
17273 	ASSERT(bp != NULL);
17274 	ASSERT(xp != NULL);
17275 	ASSERT(pktp != NULL);
17276 
17277 	si.ssi_severity = SCSI_ERR_FATAL;
17278 	si.ssi_pfa_flag = FALSE;
17279 
17280 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17281 
17282 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17283 	    &si, EIO, (clock_t)0, NULL);
17284 }
17285 
17286 
17287 /*
17288  *    Function: sd_sense_key_recoverable_error
17289  *
17290  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17291  *
17292  *     Context: May be called from interrupt context
17293  */
17294 
17295 static void
17296 sd_sense_key_recoverable_error(struct sd_lun *un, uint8_t *sense_datap,
17297     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17298 {
17299 	struct sd_sense_info	si;
17300 	uint8_t asc = scsi_sense_asc(sense_datap);
17301 	uint8_t ascq = scsi_sense_ascq(sense_datap);
17302 
17303 	ASSERT(un != NULL);
17304 	ASSERT(mutex_owned(SD_MUTEX(un)));
17305 	ASSERT(bp != NULL);
17306 	ASSERT(xp != NULL);
17307 	ASSERT(pktp != NULL);
17308 
17309 	/*
17310 	 * 0x00, 0x1D: ATA PASSTHROUGH INFORMATION AVAILABLE
17311 	 */
17312 	if (asc == 0x00 && ascq == 0x1D) {
17313 		sd_return_command(un, bp);
17314 		return;
17315 	}
17316 
17317 	/*
17318 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17319 	 */
17320 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17321 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17322 		si.ssi_severity = SCSI_ERR_INFO;
17323 		si.ssi_pfa_flag = TRUE;
17324 	} else {
17325 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17326 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17327 		si.ssi_severity = SCSI_ERR_RECOVERED;
17328 		si.ssi_pfa_flag = FALSE;
17329 	}
17330 
17331 	if (pktp->pkt_resid == 0) {
17332 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17333 		sd_return_command(un, bp);
17334 		return;
17335 	}
17336 
17337 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17338 	    &si, EIO, (clock_t)0, NULL);
17339 }
17340 
17341 
17342 
17343 
17344 /*
17345  *    Function: sd_sense_key_not_ready
17346  *
17347  * Description: Recovery actions for a SCSI "Not Ready" sense key.
17348  *
17349  *     Context: May be called from interrupt context
17350  */
17351 
17352 static void
17353 sd_sense_key_not_ready(struct sd_lun *un, uint8_t *sense_datap, struct buf *bp,
17354     struct sd_xbuf *xp, struct scsi_pkt *pktp)
17355 {
17356 	struct sd_sense_info	si;
17357 	uint8_t asc = scsi_sense_asc(sense_datap);
17358 	uint8_t ascq = scsi_sense_ascq(sense_datap);
17359 
17360 	ASSERT(un != NULL);
17361 	ASSERT(mutex_owned(SD_MUTEX(un)));
17362 	ASSERT(bp != NULL);
17363 	ASSERT(xp != NULL);
17364 	ASSERT(pktp != NULL);
17365 
17366 	si.ssi_severity = SCSI_ERR_FATAL;
17367 	si.ssi_pfa_flag = FALSE;
17368 
17369 	/*
17370 	 * Update error stats after first NOT READY error. Disks may have
17371 	 * been powered down and may need to be restarted.  For CDROMs,
17372 	 * report NOT READY errors only if media is present.
17373 	 */
17374 	if ((ISCD(un) && (asc == 0x3A)) ||
17375 	    (xp->xb_nr_retry_count > 0)) {
17376 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17377 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17378 	}
17379 
17380 	/*
17381 	 * Just fail if the "not ready" retry limit has been reached.
17382 	 */
17383 	if (xp->xb_nr_retry_count >= un->un_notready_retry_count) {
17384 		/* Special check for error message printing for removables. */
17385 		if (un->un_f_has_removable_media && (asc == 0x04) &&
17386 		    (ascq >= 0x04)) {
17387 			si.ssi_severity = SCSI_ERR_ALL;
17388 		}
17389 		goto fail_command;
17390 	}
17391 
17392 	/*
17393 	 * Check the ASC and ASCQ in the sense data as needed, to determine
17394 	 * what to do.
17395 	 */
17396 	switch (asc) {
17397 	case 0x04:	/* LOGICAL UNIT NOT READY */
17398 		/*
17399 		 * disk drives that don't spin up result in a very long delay
17400 		 * in format without warning messages. We will log a message
17401 		 * if the error level is set to verbose.
17402 		 */
17403 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17404 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17405 			    "logical unit not ready, resetting disk\n");
17406 		}
17407 
17408 		/*
17409 		 * There are different requirements for CDROMs and disks for
17410 		 * the number of retries.  If a CD-ROM is giving this, it is
17411 		 * probably reading TOC and is in the process of getting
17412 		 * ready, so we should keep on trying for a long time to make
17413 		 * sure that all types of media are taken in account (for
17414 		 * some media the drive takes a long time to read TOC).  For
17415 		 * disks we do not want to retry this too many times as this
17416 		 * can cause a long hang in format when the drive refuses to
17417 		 * spin up (a very common failure).
17418 		 */
17419 		switch (ascq) {
17420 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17421 			/*
17422 			 * Disk drives frequently refuse to spin up which
17423 			 * results in a very long hang in format without
17424 			 * warning messages.
17425 			 *
17426 			 * Note: This code preserves the legacy behavior of
17427 			 * comparing xb_nr_retry_count against zero for fibre
17428 			 * channel targets instead of comparing against the
17429 			 * un_reset_retry_count value.  The reason for this
17430 			 * discrepancy has been so utterly lost beneath the
17431 			 * Sands of Time that even Indiana Jones could not
17432 			 * find it.
17433 			 */
17434 			if (un->un_f_is_fibre == TRUE) {
17435 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17436 				    (xp->xb_nr_retry_count > 0)) &&
17437 				    (un->un_startstop_timeid == NULL)) {
17438 					scsi_log(SD_DEVINFO(un), sd_label,
17439 					    CE_WARN, "logical unit not ready, "
17440 					    "resetting disk\n");
17441 					sd_reset_target(un, pktp);
17442 				}
17443 			} else {
17444 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17445 				    (xp->xb_nr_retry_count >
17446 				    un->un_reset_retry_count)) &&
17447 				    (un->un_startstop_timeid == NULL)) {
17448 					scsi_log(SD_DEVINFO(un), sd_label,
17449 					    CE_WARN, "logical unit not ready, "
17450 					    "resetting disk\n");
17451 					sd_reset_target(un, pktp);
17452 				}
17453 			}
17454 			break;
17455 
17456 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17457 			/*
17458 			 * If the target is in the process of becoming
17459 			 * ready, just proceed with the retry. This can
17460 			 * happen with CD-ROMs that take a long time to
17461 			 * read TOC after a power cycle or reset.
17462 			 */
17463 			goto do_retry;
17464 
17465 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17466 			break;
17467 
17468 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17469 			/*
17470 			 * Retries cannot help here so just fail right away.
17471 			 */
17472 			goto fail_command;
17473 
17474 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17475 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17476 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17477 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17478 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17479 		default:    /* Possible future codes in SCSI spec? */
17480 			/*
17481 			 * For removable-media devices, do not retry if
17482 			 * ASCQ > 2 as these result mostly from USCSI commands
17483 			 * on MMC devices issued to check status of an
17484 			 * operation initiated in immediate mode.  Also for
17485 			 * ASCQ >= 4 do not print console messages as these
17486 			 * mainly represent a user-initiated operation
17487 			 * instead of a system failure.
17488 			 */
17489 			if (un->un_f_has_removable_media) {
17490 				si.ssi_severity = SCSI_ERR_ALL;
17491 				goto fail_command;
17492 			}
17493 			break;
17494 		}
17495 
17496 		/*
17497 		 * As part of our recovery attempt for the NOT READY
17498 		 * condition, we issue a START STOP UNIT command. However
17499 		 * we want to wait for a short delay before attempting this
17500 		 * as there may still be more commands coming back from the
17501 		 * target with the check condition. To do this we use
17502 		 * timeout(9F) to call sd_start_stop_unit_callback() after
17503 		 * the delay interval expires. (sd_start_stop_unit_callback()
17504 		 * dispatches sd_start_stop_unit_task(), which will issue
17505 		 * the actual START STOP UNIT command. The delay interval
17506 		 * is one-half of the delay that we will use to retry the
17507 		 * command that generated the NOT READY condition.
17508 		 *
17509 		 * Note that we could just dispatch sd_start_stop_unit_task()
17510 		 * from here and allow it to sleep for the delay interval,
17511 		 * but then we would be tying up the taskq thread
17512 		 * uncesessarily for the duration of the delay.
17513 		 *
17514 		 * Do not issue the START STOP UNIT if the current command
17515 		 * is already a START STOP UNIT.
17516 		 */
17517 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17518 			break;
17519 		}
17520 
17521 		/*
17522 		 * Do not schedule the timeout if one is already pending.
17523 		 */
17524 		if (un->un_startstop_timeid != NULL) {
17525 			SD_INFO(SD_LOG_ERROR, un,
17526 			    "sd_sense_key_not_ready: restart already issued to"
17527 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17528 			    ddi_get_instance(SD_DEVINFO(un)));
17529 			break;
17530 		}
17531 
17532 		/*
17533 		 * Schedule the START STOP UNIT command, then queue the command
17534 		 * for a retry.
17535 		 *
17536 		 * Note: A timeout is not scheduled for this retry because we
17537 		 * want the retry to be serial with the START_STOP_UNIT. The
17538 		 * retry will be started when the START_STOP_UNIT is completed
17539 		 * in sd_start_stop_unit_task.
17540 		 */
17541 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17542 		    un, un->un_busy_timeout / 2);
17543 		xp->xb_nr_retry_count++;
17544 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17545 		return;
17546 
17547 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17548 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17549 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17550 			    "unit does not respond to selection\n");
17551 		}
17552 		break;
17553 
17554 	case 0x3A:	/* MEDIUM NOT PRESENT */
17555 		if (sd_error_level >= SCSI_ERR_FATAL) {
17556 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17557 			    "Caddy not inserted in drive\n");
17558 		}
17559 
17560 		sr_ejected(un);
17561 		un->un_mediastate = DKIO_EJECTED;
17562 		/* The state has changed, inform the media watch routines */
17563 		cv_broadcast(&un->un_state_cv);
17564 		/* Just fail if no media is present in the drive. */
17565 		goto fail_command;
17566 
17567 	default:
17568 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17569 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17570 			    "Unit not Ready. Additional sense code 0x%x\n",
17571 			    asc);
17572 		}
17573 		break;
17574 	}
17575 
17576 do_retry:
17577 
17578 	/*
17579 	 * Retry the command, as some targets may report NOT READY for
17580 	 * several seconds after being reset.
17581 	 */
17582 	xp->xb_nr_retry_count++;
17583 	si.ssi_severity = SCSI_ERR_RETRYABLE;
17584 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17585 	    &si, EIO, un->un_busy_timeout, NULL);
17586 
17587 	return;
17588 
17589 fail_command:
17590 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17591 	sd_return_failed_command(un, bp, EIO);
17592 }
17593 
17594 
17595 
17596 /*
17597  *    Function: sd_sense_key_medium_or_hardware_error
17598  *
17599  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17600  *		sense key.
17601  *
17602  *     Context: May be called from interrupt context
17603  */
17604 
17605 static void
17606 sd_sense_key_medium_or_hardware_error(struct sd_lun *un, uint8_t *sense_datap,
17607     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17608 {
17609 	struct sd_sense_info	si;
17610 	uint8_t sense_key = scsi_sense_key(sense_datap);
17611 
17612 	ASSERT(un != NULL);
17613 	ASSERT(mutex_owned(SD_MUTEX(un)));
17614 	ASSERT(bp != NULL);
17615 	ASSERT(xp != NULL);
17616 	ASSERT(pktp != NULL);
17617 
17618 	si.ssi_severity = SCSI_ERR_FATAL;
17619 	si.ssi_pfa_flag = FALSE;
17620 
17621 	if (sense_key == KEY_MEDIUM_ERROR) {
17622 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17623 	}
17624 
17625 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17626 
17627 	if ((un->un_reset_retry_count != 0) &&
17628 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17629 		mutex_exit(SD_MUTEX(un));
17630 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17631 		if (un->un_f_allow_bus_device_reset == TRUE) {
17632 
17633 			int reset_retval = 0;
17634 			if (un->un_f_lun_reset_enabled == TRUE) {
17635 				SD_TRACE(SD_LOG_IO_CORE, un,
17636 				    "sd_sense_key_medium_or_hardware_"
17637 				    "error: issuing RESET_LUN\n");
17638 				reset_retval = scsi_reset(SD_ADDRESS(un),
17639 				    RESET_LUN);
17640 			}
17641 			if (reset_retval == 0) {
17642 				SD_TRACE(SD_LOG_IO_CORE, un,
17643 				    "sd_sense_key_medium_or_hardware_"
17644 				    "error: issuing RESET_TARGET\n");
17645 				(void) scsi_reset(SD_ADDRESS(un),
17646 				    RESET_TARGET);
17647 			}
17648 		}
17649 		mutex_enter(SD_MUTEX(un));
17650 	}
17651 
17652 	/*
17653 	 * This really ought to be a fatal error, but we will retry anyway
17654 	 * as some drives report this as a spurious error.
17655 	 */
17656 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17657 	    &si, EIO, (clock_t)0, NULL);
17658 }
17659 
17660 
17661 
17662 /*
17663  *    Function: sd_sense_key_illegal_request
17664  *
17665  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17666  *
17667  *     Context: May be called from interrupt context
17668  */
17669 
17670 static void
17671 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17672     struct sd_xbuf *xp, struct scsi_pkt *pktp)
17673 {
17674 	struct sd_sense_info	si;
17675 
17676 	ASSERT(un != NULL);
17677 	ASSERT(mutex_owned(SD_MUTEX(un)));
17678 	ASSERT(bp != NULL);
17679 	ASSERT(xp != NULL);
17680 	ASSERT(pktp != NULL);
17681 
17682 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17683 
17684 	si.ssi_severity = SCSI_ERR_INFO;
17685 	si.ssi_pfa_flag = FALSE;
17686 
17687 	/* Pointless to retry if the target thinks it's an illegal request */
17688 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17689 	sd_return_failed_command(un, bp, EIO);
17690 }
17691 
17692 
17693 
17694 
17695 /*
17696  *    Function: sd_sense_key_unit_attention
17697  *
17698  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17699  *
17700  *     Context: May be called from interrupt context
17701  */
17702 
17703 static void
17704 sd_sense_key_unit_attention(struct sd_lun *un, uint8_t *sense_datap,
17705     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17706 {
17707 	/*
17708 	 * For UNIT ATTENTION we allow retries for one minute. Devices
17709 	 * like Sonoma can return UNIT ATTENTION close to a minute
17710 	 * under certain conditions.
17711 	 */
17712 	int	retry_check_flag = SD_RETRIES_UA;
17713 	boolean_t	kstat_updated = B_FALSE;
17714 	struct	sd_sense_info		si;
17715 	uint8_t asc = scsi_sense_asc(sense_datap);
17716 	uint8_t	ascq = scsi_sense_ascq(sense_datap);
17717 
17718 	ASSERT(un != NULL);
17719 	ASSERT(mutex_owned(SD_MUTEX(un)));
17720 	ASSERT(bp != NULL);
17721 	ASSERT(xp != NULL);
17722 	ASSERT(pktp != NULL);
17723 
17724 	si.ssi_severity = SCSI_ERR_INFO;
17725 	si.ssi_pfa_flag = FALSE;
17726 
17727 
17728 	switch (asc) {
17729 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17730 		if (sd_report_pfa != 0) {
17731 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17732 			si.ssi_pfa_flag = TRUE;
17733 			retry_check_flag = SD_RETRIES_STANDARD;
17734 			goto do_retry;
17735 		}
17736 
17737 		break;
17738 
17739 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17740 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17741 			un->un_resvd_status |=
17742 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17743 		}
17744 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
17745 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
17746 			    un, KM_NOSLEEP) == TASKQID_INVALID) {
17747 				/*
17748 				 * If we can't dispatch the task we'll just
17749 				 * live without descriptor sense.  We can
17750 				 * try again on the next "unit attention"
17751 				 */
17752 				SD_ERROR(SD_LOG_ERROR, un,
17753 				    "sd_sense_key_unit_attention: "
17754 				    "Could not dispatch "
17755 				    "sd_reenable_dsense_task\n");
17756 			}
17757 		}
17758 		/* FALLTHRU */
17759 
17760 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
17761 		if (!un->un_f_has_removable_media) {
17762 			break;
17763 		}
17764 
17765 		/*
17766 		 * When we get a unit attention from a removable-media device,
17767 		 * it may be in a state that will take a long time to recover
17768 		 * (e.g., from a reset).  Since we are executing in interrupt
17769 		 * context here, we cannot wait around for the device to come
17770 		 * back. So hand this command off to sd_media_change_task()
17771 		 * for deferred processing under taskq thread context. (Note
17772 		 * that the command still may be failed if a problem is
17773 		 * encountered at a later time.)
17774 		 */
17775 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
17776 		    KM_NOSLEEP) == TASKQID_INVALID) {
17777 			/*
17778 			 * Cannot dispatch the request so fail the command.
17779 			 */
17780 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
17781 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17782 			si.ssi_severity = SCSI_ERR_FATAL;
17783 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17784 			sd_return_failed_command(un, bp, EIO);
17785 		}
17786 
17787 		/*
17788 		 * If failed to dispatch sd_media_change_task(), we already
17789 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
17790 		 * we should update kstat later if it encounters an error. So,
17791 		 * we update kstat_updated flag here.
17792 		 */
17793 		kstat_updated = B_TRUE;
17794 
17795 		/*
17796 		 * Either the command has been successfully dispatched to a
17797 		 * task Q for retrying, or the dispatch failed. In either case
17798 		 * do NOT retry again by calling sd_retry_command. This sets up
17799 		 * two retries of the same command and when one completes and
17800 		 * frees the resources the other will access freed memory,
17801 		 * a bad thing.
17802 		 */
17803 		return;
17804 
17805 	default:
17806 		break;
17807 	}
17808 
17809 	/*
17810 	 * ASC  ASCQ
17811 	 *  2A   09	Capacity data has changed
17812 	 *  2A   01	Mode parameters changed
17813 	 *  3F   0E	Reported luns data has changed
17814 	 * Arrays that support logical unit expansion should report
17815 	 * capacity changes(2Ah/09). Mode parameters changed and
17816 	 * reported luns data has changed are the approximation.
17817 	 */
17818 	if (((asc == 0x2a) && (ascq == 0x09)) ||
17819 	    ((asc == 0x2a) && (ascq == 0x01)) ||
17820 	    ((asc == 0x3f) && (ascq == 0x0e))) {
17821 		if (taskq_dispatch(sd_tq, sd_target_change_task, un,
17822 		    KM_NOSLEEP) == TASKQID_INVALID) {
17823 			SD_ERROR(SD_LOG_ERROR, un,
17824 			    "sd_sense_key_unit_attention: "
17825 			    "Could not dispatch sd_target_change_task\n");
17826 		}
17827 	}
17828 
17829 	/*
17830 	 * Update kstat if we haven't done that.
17831 	 */
17832 	if (!kstat_updated) {
17833 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17834 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17835 	}
17836 
17837 do_retry:
17838 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
17839 	    EIO, SD_UA_RETRY_DELAY, NULL);
17840 }
17841 
17842 
17843 
17844 /*
17845  *    Function: sd_sense_key_fail_command
17846  *
17847  * Description: Use to fail a command when we don't like the sense key that
17848  *		was returned.
17849  *
17850  *     Context: May be called from interrupt context
17851  */
17852 
17853 static void
17854 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17855     struct scsi_pkt *pktp)
17856 {
17857 	struct sd_sense_info	si;
17858 
17859 	ASSERT(un != NULL);
17860 	ASSERT(mutex_owned(SD_MUTEX(un)));
17861 	ASSERT(bp != NULL);
17862 	ASSERT(xp != NULL);
17863 	ASSERT(pktp != NULL);
17864 
17865 	si.ssi_severity = SCSI_ERR_FATAL;
17866 	si.ssi_pfa_flag = FALSE;
17867 
17868 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17869 	sd_return_failed_command(un, bp, EIO);
17870 }
17871 
17872 
17873 
17874 /*
17875  *    Function: sd_sense_key_blank_check
17876  *
17877  * Description: Recovery actions for a SCSI "Blank Check" sense key.
17878  *		Has no monetary connotation.
17879  *
17880  *     Context: May be called from interrupt context
17881  */
17882 
17883 static void
17884 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17885     struct scsi_pkt *pktp)
17886 {
17887 	struct sd_sense_info	si;
17888 
17889 	ASSERT(un != NULL);
17890 	ASSERT(mutex_owned(SD_MUTEX(un)));
17891 	ASSERT(bp != NULL);
17892 	ASSERT(xp != NULL);
17893 	ASSERT(pktp != NULL);
17894 
17895 	/*
17896 	 * Blank check is not fatal for removable devices, therefore
17897 	 * it does not require a console message.
17898 	 */
17899 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
17900 	    SCSI_ERR_FATAL;
17901 	si.ssi_pfa_flag = FALSE;
17902 
17903 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17904 	sd_return_failed_command(un, bp, EIO);
17905 }
17906 
17907 
17908 
17909 
17910 /*
17911  *    Function: sd_sense_key_aborted_command
17912  *
17913  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
17914  *
17915  *     Context: May be called from interrupt context
17916  */
17917 
17918 static void
17919 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
17920     struct sd_xbuf *xp, struct scsi_pkt *pktp)
17921 {
17922 	struct sd_sense_info	si;
17923 
17924 	ASSERT(un != NULL);
17925 	ASSERT(mutex_owned(SD_MUTEX(un)));
17926 	ASSERT(bp != NULL);
17927 	ASSERT(xp != NULL);
17928 	ASSERT(pktp != NULL);
17929 
17930 	si.ssi_severity = SCSI_ERR_FATAL;
17931 	si.ssi_pfa_flag = FALSE;
17932 
17933 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17934 
17935 	/*
17936 	 * This really ought to be a fatal error, but we will retry anyway
17937 	 * as some drives report this as a spurious error.
17938 	 */
17939 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17940 	    &si, EIO, drv_usectohz(100000), NULL);
17941 }
17942 
17943 
17944 
17945 /*
17946  *    Function: sd_sense_key_default
17947  *
17948  * Description: Default recovery action for several SCSI sense keys (basically
17949  *		attempts a retry).
17950  *
17951  *     Context: May be called from interrupt context
17952  */
17953 
17954 static void
17955 sd_sense_key_default(struct sd_lun *un, uint8_t *sense_datap, struct buf *bp,
17956     struct sd_xbuf *xp, struct scsi_pkt *pktp)
17957 {
17958 	struct sd_sense_info	si;
17959 	uint8_t sense_key = scsi_sense_key(sense_datap);
17960 
17961 	ASSERT(un != NULL);
17962 	ASSERT(mutex_owned(SD_MUTEX(un)));
17963 	ASSERT(bp != NULL);
17964 	ASSERT(xp != NULL);
17965 	ASSERT(pktp != NULL);
17966 
17967 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17968 
17969 	/*
17970 	 * Undecoded sense key.	Attempt retries and hope that will fix
17971 	 * the problem.  Otherwise, we're dead.
17972 	 */
17973 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17974 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17975 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
17976 	}
17977 
17978 	si.ssi_severity = SCSI_ERR_FATAL;
17979 	si.ssi_pfa_flag = FALSE;
17980 
17981 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17982 	    &si, EIO, (clock_t)0, NULL);
17983 }
17984 
17985 
17986 
17987 /*
17988  *    Function: sd_print_retry_msg
17989  *
17990  * Description: Print a message indicating the retry action being taken.
17991  *
17992  *   Arguments: un - ptr to associated softstate
17993  *		bp - ptr to buf(9S) for the command
17994  *		arg - not used.
17995  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17996  *			or SD_NO_RETRY_ISSUED
17997  *
17998  *     Context: May be called from interrupt context
17999  */
18000 /* ARGSUSED */
18001 static void
18002 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18003 {
18004 	struct sd_xbuf	*xp;
18005 	struct scsi_pkt *pktp;
18006 	char *reasonp;
18007 	char *msgp;
18008 
18009 	ASSERT(un != NULL);
18010 	ASSERT(mutex_owned(SD_MUTEX(un)));
18011 	ASSERT(bp != NULL);
18012 	pktp = SD_GET_PKTP(bp);
18013 	ASSERT(pktp != NULL);
18014 	xp = SD_GET_XBUF(bp);
18015 	ASSERT(xp != NULL);
18016 
18017 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18018 	mutex_enter(&un->un_pm_mutex);
18019 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18020 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18021 	    (pktp->pkt_flags & FLAG_SILENT)) {
18022 		mutex_exit(&un->un_pm_mutex);
18023 		goto update_pkt_reason;
18024 	}
18025 	mutex_exit(&un->un_pm_mutex);
18026 
18027 	/*
18028 	 * Suppress messages if they are all the same pkt_reason; with
18029 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18030 	 * If we are in panic, then suppress the retry messages.
18031 	 */
18032 	switch (flag) {
18033 	case SD_NO_RETRY_ISSUED:
18034 		msgp = "giving up";
18035 		break;
18036 	case SD_IMMEDIATE_RETRY_ISSUED:
18037 	case SD_DELAYED_RETRY_ISSUED:
18038 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18039 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18040 		    (sd_error_level != SCSI_ERR_ALL))) {
18041 			return;
18042 		}
18043 		msgp = "retrying command";
18044 		break;
18045 	default:
18046 		goto update_pkt_reason;
18047 	}
18048 
18049 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18050 	    scsi_rname(pktp->pkt_reason));
18051 
18052 	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
18053 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18054 		    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18055 	}
18056 
18057 update_pkt_reason:
18058 	/*
18059 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18060 	 * This is to prevent multiple console messages for the same failure
18061 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18062 	 * when the command is retried successfully because there still may be
18063 	 * more commands coming back with the same value of pktp->pkt_reason.
18064 	 */
18065 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18066 		un->un_last_pkt_reason = pktp->pkt_reason;
18067 	}
18068 }
18069 
18070 
18071 /*
18072  *    Function: sd_print_cmd_incomplete_msg
18073  *
18074  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18075  *
18076  *   Arguments: un - ptr to associated softstate
18077  *		bp - ptr to buf(9S) for the command
18078  *		arg - passed to sd_print_retry_msg()
18079  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18080  *			or SD_NO_RETRY_ISSUED
18081  *
18082  *     Context: May be called from interrupt context
18083  */
18084 
18085 static void
18086 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18087     int code)
18088 {
18089 	dev_info_t	*dip;
18090 
18091 	ASSERT(un != NULL);
18092 	ASSERT(mutex_owned(SD_MUTEX(un)));
18093 	ASSERT(bp != NULL);
18094 
18095 	switch (code) {
18096 	case SD_NO_RETRY_ISSUED:
18097 		/* Command was failed. Someone turned off this target? */
18098 		if (un->un_state != SD_STATE_OFFLINE) {
18099 			/*
18100 			 * Suppress message if we are detaching and
18101 			 * device has been disconnected
18102 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18103 			 * private interface and not part of the DDI
18104 			 */
18105 			dip = un->un_sd->sd_dev;
18106 			if (!(DEVI_IS_DETACHING(dip) &&
18107 			    DEVI_IS_DEVICE_REMOVED(dip))) {
18108 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18109 				"disk not responding to selection\n");
18110 			}
18111 			New_state(un, SD_STATE_OFFLINE);
18112 		}
18113 		break;
18114 
18115 	case SD_DELAYED_RETRY_ISSUED:
18116 	case SD_IMMEDIATE_RETRY_ISSUED:
18117 	default:
18118 		/* Command was successfully queued for retry */
18119 		sd_print_retry_msg(un, bp, arg, code);
18120 		break;
18121 	}
18122 }
18123 
18124 
18125 /*
18126  *    Function: sd_pkt_reason_cmd_incomplete
18127  *
18128  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18129  *
18130  *     Context: May be called from interrupt context
18131  */
18132 
18133 static void
18134 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18135     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18136 {
18137 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18138 
18139 	ASSERT(un != NULL);
18140 	ASSERT(mutex_owned(SD_MUTEX(un)));
18141 	ASSERT(bp != NULL);
18142 	ASSERT(xp != NULL);
18143 	ASSERT(pktp != NULL);
18144 
18145 	/* Do not do a reset if selection did not complete */
18146 	/* Note: Should this not just check the bit? */
18147 	if (pktp->pkt_state != STATE_GOT_BUS) {
18148 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18149 		sd_reset_target(un, pktp);
18150 	}
18151 
18152 	/*
18153 	 * If the target was not successfully selected, then set
18154 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18155 	 * with the target, and further retries and/or commands are
18156 	 * likely to take a long time.
18157 	 */
18158 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18159 		flag |= SD_RETRIES_FAILFAST;
18160 	}
18161 
18162 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18163 
18164 	sd_retry_command(un, bp, flag,
18165 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18166 }
18167 
18168 
18169 
18170 /*
18171  *    Function: sd_pkt_reason_cmd_tran_err
18172  *
18173  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18174  *
18175  *     Context: May be called from interrupt context
18176  */
18177 
18178 static void
18179 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18180     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18181 {
18182 	ASSERT(un != NULL);
18183 	ASSERT(mutex_owned(SD_MUTEX(un)));
18184 	ASSERT(bp != NULL);
18185 	ASSERT(xp != NULL);
18186 	ASSERT(pktp != NULL);
18187 
18188 	/*
18189 	 * Do not reset if we got a parity error, or if
18190 	 * selection did not complete.
18191 	 */
18192 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18193 	/* Note: Should this not just check the bit for pkt_state? */
18194 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18195 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18196 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18197 		sd_reset_target(un, pktp);
18198 	}
18199 
18200 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18201 
18202 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18203 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18204 }
18205 
18206 
18207 
18208 /*
18209  *    Function: sd_pkt_reason_cmd_reset
18210  *
18211  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18212  *
18213  *     Context: May be called from interrupt context
18214  */
18215 
18216 static void
18217 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18218     struct scsi_pkt *pktp)
18219 {
18220 	ASSERT(un != NULL);
18221 	ASSERT(mutex_owned(SD_MUTEX(un)));
18222 	ASSERT(bp != NULL);
18223 	ASSERT(xp != NULL);
18224 	ASSERT(pktp != NULL);
18225 
18226 	/* The target may still be running the command, so try to reset. */
18227 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18228 	sd_reset_target(un, pktp);
18229 
18230 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18231 
18232 	/*
18233 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18234 	 * reset because another target on this bus caused it. The target
18235 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18236 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18237 	 */
18238 
18239 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18240 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18241 }
18242 
18243 
18244 
18245 
18246 /*
18247  *    Function: sd_pkt_reason_cmd_aborted
18248  *
18249  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18250  *
18251  *     Context: May be called from interrupt context
18252  */
18253 
18254 static void
18255 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18256     struct scsi_pkt *pktp)
18257 {
18258 	ASSERT(un != NULL);
18259 	ASSERT(mutex_owned(SD_MUTEX(un)));
18260 	ASSERT(bp != NULL);
18261 	ASSERT(xp != NULL);
18262 	ASSERT(pktp != NULL);
18263 
18264 	/* The target may still be running the command, so try to reset. */
18265 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18266 	sd_reset_target(un, pktp);
18267 
18268 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18269 
18270 	/*
18271 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18272 	 * aborted because another target on this bus caused it. The target
18273 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18274 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18275 	 */
18276 
18277 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18278 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18279 }
18280 
18281 
18282 
18283 /*
18284  *    Function: sd_pkt_reason_cmd_timeout
18285  *
18286  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18287  *
18288  *     Context: May be called from interrupt context
18289  */
18290 
18291 static void
18292 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18293     struct scsi_pkt *pktp)
18294 {
18295 	ASSERT(un != NULL);
18296 	ASSERT(mutex_owned(SD_MUTEX(un)));
18297 	ASSERT(bp != NULL);
18298 	ASSERT(xp != NULL);
18299 	ASSERT(pktp != NULL);
18300 
18301 
18302 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18303 	sd_reset_target(un, pktp);
18304 
18305 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18306 
18307 	/*
18308 	 * A command timeout indicates that we could not establish
18309 	 * communication with the target, so set SD_RETRIES_FAILFAST
18310 	 * as further retries/commands are likely to take a long time.
18311 	 */
18312 	sd_retry_command(un, bp,
18313 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18314 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18315 }
18316 
18317 
18318 
18319 /*
18320  *    Function: sd_pkt_reason_cmd_unx_bus_free
18321  *
18322  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18323  *
18324  *     Context: May be called from interrupt context
18325  */
18326 
18327 static void
18328 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18329     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18330 {
18331 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18332 
18333 	ASSERT(un != NULL);
18334 	ASSERT(mutex_owned(SD_MUTEX(un)));
18335 	ASSERT(bp != NULL);
18336 	ASSERT(xp != NULL);
18337 	ASSERT(pktp != NULL);
18338 
18339 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18340 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18341 
18342 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18343 	    sd_print_retry_msg : NULL;
18344 
18345 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18346 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18347 }
18348 
18349 
18350 /*
18351  *    Function: sd_pkt_reason_cmd_tag_reject
18352  *
18353  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18354  *
18355  *     Context: May be called from interrupt context
18356  */
18357 
18358 static void
18359 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18360     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18361 {
18362 	ASSERT(un != NULL);
18363 	ASSERT(mutex_owned(SD_MUTEX(un)));
18364 	ASSERT(bp != NULL);
18365 	ASSERT(xp != NULL);
18366 	ASSERT(pktp != NULL);
18367 
18368 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18369 	pktp->pkt_flags = 0;
18370 	un->un_tagflags = 0;
18371 	if (un->un_f_opt_queueing == TRUE) {
18372 		un->un_throttle = min(un->un_throttle, 3);
18373 	} else {
18374 		un->un_throttle = 1;
18375 	}
18376 	mutex_exit(SD_MUTEX(un));
18377 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18378 	mutex_enter(SD_MUTEX(un));
18379 
18380 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18381 
18382 	/* Legacy behavior not to check retry counts here. */
18383 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18384 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18385 }
18386 
18387 
18388 /*
18389  *    Function: sd_pkt_reason_default
18390  *
18391  * Description: Default recovery actions for SCSA pkt_reason values that
18392  *		do not have more explicit recovery actions.
18393  *
18394  *     Context: May be called from interrupt context
18395  */
18396 
18397 static void
18398 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18399     struct scsi_pkt *pktp)
18400 {
18401 	ASSERT(un != NULL);
18402 	ASSERT(mutex_owned(SD_MUTEX(un)));
18403 	ASSERT(bp != NULL);
18404 	ASSERT(xp != NULL);
18405 	ASSERT(pktp != NULL);
18406 
18407 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18408 	sd_reset_target(un, pktp);
18409 
18410 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18411 
18412 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18413 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18414 }
18415 
18416 
18417 
18418 /*
18419  *    Function: sd_pkt_status_check_condition
18420  *
18421  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18422  *
18423  *     Context: May be called from interrupt context
18424  */
18425 
18426 static void
18427 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18428     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18429 {
18430 	ASSERT(un != NULL);
18431 	ASSERT(mutex_owned(SD_MUTEX(un)));
18432 	ASSERT(bp != NULL);
18433 	ASSERT(xp != NULL);
18434 	ASSERT(pktp != NULL);
18435 
18436 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18437 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18438 
18439 	/*
18440 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18441 	 * command will be retried after the request sense). Otherwise, retry
18442 	 * the command. Note: we are issuing the request sense even though the
18443 	 * retry limit may have been reached for the failed command.
18444 	 */
18445 	if (un->un_f_arq_enabled == FALSE) {
18446 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18447 		    "no ARQ, sending request sense command\n");
18448 		sd_send_request_sense_command(un, bp, pktp);
18449 	} else {
18450 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18451 		    "ARQ,retrying request sense command\n");
18452 		/*
18453 		 * The SD_RETRY_DELAY value need to be adjusted here
18454 		 * when SD_RETRY_DELAY change in sddef.h
18455 		 */
18456 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18457 		    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18458 		    NULL);
18459 	}
18460 
18461 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18462 }
18463 
18464 
18465 /*
18466  *    Function: sd_pkt_status_busy
18467  *
18468  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18469  *
18470  *     Context: May be called from interrupt context
18471  */
18472 
18473 static void
18474 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18475     struct scsi_pkt *pktp)
18476 {
18477 	ASSERT(un != NULL);
18478 	ASSERT(mutex_owned(SD_MUTEX(un)));
18479 	ASSERT(bp != NULL);
18480 	ASSERT(xp != NULL);
18481 	ASSERT(pktp != NULL);
18482 
18483 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18484 	    "sd_pkt_status_busy: entry\n");
18485 
18486 	/* If retries are exhausted, just fail the command. */
18487 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18488 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18489 		    "device busy too long\n");
18490 		sd_return_failed_command(un, bp, EIO);
18491 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18492 		    "sd_pkt_status_busy: exit\n");
18493 		return;
18494 	}
18495 	xp->xb_retry_count++;
18496 
18497 	/*
18498 	 * Try to reset the target. However, we do not want to perform
18499 	 * more than one reset if the device continues to fail. The reset
18500 	 * will be performed when the retry count reaches the reset
18501 	 * threshold.  This threshold should be set such that at least
18502 	 * one retry is issued before the reset is performed.
18503 	 */
18504 	if (xp->xb_retry_count ==
18505 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18506 		int rval = 0;
18507 		mutex_exit(SD_MUTEX(un));
18508 		if (un->un_f_allow_bus_device_reset == TRUE) {
18509 			/*
18510 			 * First try to reset the LUN; if we cannot then
18511 			 * try to reset the target.
18512 			 */
18513 			if (un->un_f_lun_reset_enabled == TRUE) {
18514 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18515 				    "sd_pkt_status_busy: RESET_LUN\n");
18516 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18517 			}
18518 			if (rval == 0) {
18519 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18520 				    "sd_pkt_status_busy: RESET_TARGET\n");
18521 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18522 			}
18523 		}
18524 		if (rval == 0) {
18525 			/*
18526 			 * If the RESET_LUN and/or RESET_TARGET failed,
18527 			 * try RESET_ALL
18528 			 */
18529 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18530 			    "sd_pkt_status_busy: RESET_ALL\n");
18531 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18532 		}
18533 		mutex_enter(SD_MUTEX(un));
18534 		if (rval == 0) {
18535 			/*
18536 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18537 			 * At this point we give up & fail the command.
18538 			 */
18539 			sd_return_failed_command(un, bp, EIO);
18540 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18541 			    "sd_pkt_status_busy: exit (failed cmd)\n");
18542 			return;
18543 		}
18544 	}
18545 
18546 	/*
18547 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18548 	 * we have already checked the retry counts above.
18549 	 */
18550 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18551 	    EIO, un->un_busy_timeout, NULL);
18552 
18553 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18554 	    "sd_pkt_status_busy: exit\n");
18555 }
18556 
18557 
18558 /*
18559  *    Function: sd_pkt_status_reservation_conflict
18560  *
18561  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18562  *		command status.
18563  *
18564  *     Context: May be called from interrupt context
18565  */
18566 
18567 static void
18568 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18569     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18570 {
18571 	ASSERT(un != NULL);
18572 	ASSERT(mutex_owned(SD_MUTEX(un)));
18573 	ASSERT(bp != NULL);
18574 	ASSERT(xp != NULL);
18575 	ASSERT(pktp != NULL);
18576 
18577 	/*
18578 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18579 	 * conflict could be due to various reasons like incorrect keys, not
18580 	 * registered or not reserved etc. So, we return EACCES to the caller.
18581 	 */
18582 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18583 		int cmd = SD_GET_PKT_OPCODE(pktp);
18584 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18585 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18586 			sd_return_failed_command(un, bp, EACCES);
18587 			return;
18588 		}
18589 	}
18590 
18591 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18592 
18593 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18594 		if (sd_failfast_enable != 0) {
18595 			/* By definition, we must panic here.... */
18596 			sd_panic_for_res_conflict(un);
18597 			/*NOTREACHED*/
18598 		}
18599 		SD_ERROR(SD_LOG_IO, un,
18600 		    "sd_handle_resv_conflict: Disk Reserved\n");
18601 		sd_return_failed_command(un, bp, EACCES);
18602 		return;
18603 	}
18604 
18605 	/*
18606 	 * 1147670: retry only if sd_retry_on_reservation_conflict
18607 	 * property is set (default is 1). Retries will not succeed
18608 	 * on a disk reserved by another initiator. HA systems
18609 	 * may reset this via sd.conf to avoid these retries.
18610 	 *
18611 	 * Note: The legacy return code for this failure is EIO, however EACCES
18612 	 * seems more appropriate for a reservation conflict.
18613 	 */
18614 	if (sd_retry_on_reservation_conflict == 0) {
18615 		SD_ERROR(SD_LOG_IO, un,
18616 		    "sd_handle_resv_conflict: Device Reserved\n");
18617 		sd_return_failed_command(un, bp, EIO);
18618 		return;
18619 	}
18620 
18621 	/*
18622 	 * Retry the command if we can.
18623 	 *
18624 	 * Note: The legacy return code for this failure is EIO, however EACCES
18625 	 * seems more appropriate for a reservation conflict.
18626 	 */
18627 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18628 	    (clock_t)2, NULL);
18629 }
18630 
18631 
18632 
18633 /*
18634  *    Function: sd_pkt_status_qfull
18635  *
18636  * Description: Handle a QUEUE FULL condition from the target.  This can
18637  *		occur if the HBA does not handle the queue full condition.
18638  *		(Basically this means third-party HBAs as Sun HBAs will
18639  *		handle the queue full condition.)  Note that if there are
18640  *		some commands already in the transport, then the queue full
18641  *		has occurred because the queue for this nexus is actually
18642  *		full. If there are no commands in the transport, then the
18643  *		queue full is resulting from some other initiator or lun
18644  *		consuming all the resources at the target.
18645  *
18646  *     Context: May be called from interrupt context
18647  */
18648 
18649 static void
18650 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18651     struct scsi_pkt *pktp)
18652 {
18653 	ASSERT(un != NULL);
18654 	ASSERT(mutex_owned(SD_MUTEX(un)));
18655 	ASSERT(bp != NULL);
18656 	ASSERT(xp != NULL);
18657 	ASSERT(pktp != NULL);
18658 
18659 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18660 	    "sd_pkt_status_qfull: entry\n");
18661 
18662 	/*
18663 	 * Just lower the QFULL throttle and retry the command.  Note that
18664 	 * we do not limit the number of retries here.
18665 	 */
18666 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18667 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18668 	    SD_RESTART_TIMEOUT, NULL);
18669 
18670 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18671 	    "sd_pkt_status_qfull: exit\n");
18672 }
18673 
18674 
18675 /*
18676  *    Function: sd_reset_target
18677  *
18678  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18679  *		RESET_TARGET, or RESET_ALL.
18680  *
18681  *     Context: May be called under interrupt context.
18682  */
18683 
18684 static void
18685 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18686 {
18687 	int rval = 0;
18688 
18689 	ASSERT(un != NULL);
18690 	ASSERT(mutex_owned(SD_MUTEX(un)));
18691 	ASSERT(pktp != NULL);
18692 
18693 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18694 
18695 	/*
18696 	 * No need to reset if the transport layer has already done so.
18697 	 */
18698 	if ((pktp->pkt_statistics &
18699 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18700 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18701 		    "sd_reset_target: no reset\n");
18702 		return;
18703 	}
18704 
18705 	mutex_exit(SD_MUTEX(un));
18706 
18707 	if (un->un_f_allow_bus_device_reset == TRUE) {
18708 		if (un->un_f_lun_reset_enabled == TRUE) {
18709 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18710 			    "sd_reset_target: RESET_LUN\n");
18711 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18712 		}
18713 		if (rval == 0) {
18714 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18715 			    "sd_reset_target: RESET_TARGET\n");
18716 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18717 		}
18718 	}
18719 
18720 	if (rval == 0) {
18721 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18722 		    "sd_reset_target: RESET_ALL\n");
18723 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18724 	}
18725 
18726 	mutex_enter(SD_MUTEX(un));
18727 
18728 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18729 }
18730 
18731 /*
18732  *    Function: sd_target_change_task
18733  *
18734  * Description: Handle dynamic target change
18735  *
18736  *     Context: Executes in a taskq() thread context
18737  */
18738 static void
18739 sd_target_change_task(void *arg)
18740 {
18741 	struct sd_lun		*un = arg;
18742 	uint64_t		capacity;
18743 	diskaddr_t		label_cap;
18744 	uint_t			lbasize;
18745 	sd_ssc_t		*ssc;
18746 
18747 	ASSERT(un != NULL);
18748 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18749 
18750 	if ((un->un_f_blockcount_is_valid == FALSE) ||
18751 	    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
18752 		return;
18753 	}
18754 
18755 	ssc = sd_ssc_init(un);
18756 
18757 	if (sd_send_scsi_READ_CAPACITY(ssc, &capacity,
18758 	    &lbasize, SD_PATH_DIRECT) != 0) {
18759 		SD_ERROR(SD_LOG_ERROR, un,
18760 		    "sd_target_change_task: fail to read capacity\n");
18761 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
18762 		goto task_exit;
18763 	}
18764 
18765 	mutex_enter(SD_MUTEX(un));
18766 	if (capacity <= un->un_blockcount) {
18767 		mutex_exit(SD_MUTEX(un));
18768 		goto task_exit;
18769 	}
18770 
18771 	sd_update_block_info(un, lbasize, capacity);
18772 	mutex_exit(SD_MUTEX(un));
18773 
18774 	/*
18775 	 * If lun is EFI labeled and lun capacity is greater than the
18776 	 * capacity contained in the label, log a sys event.
18777 	 */
18778 	if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
18779 	    (void*)SD_PATH_DIRECT) == 0) {
18780 		mutex_enter(SD_MUTEX(un));
18781 		if (un->un_f_blockcount_is_valid &&
18782 		    un->un_blockcount > label_cap) {
18783 			mutex_exit(SD_MUTEX(un));
18784 			sd_log_lun_expansion_event(un, KM_SLEEP);
18785 		} else {
18786 			mutex_exit(SD_MUTEX(un));
18787 		}
18788 	}
18789 
18790 task_exit:
18791 	sd_ssc_fini(ssc);
18792 }
18793 
18794 
18795 /*
18796  *    Function: sd_log_dev_status_event
18797  *
18798  * Description: Log EC_dev_status sysevent
18799  *
18800  *     Context: Never called from interrupt context
18801  */
18802 static void
18803 sd_log_dev_status_event(struct sd_lun *un, char *esc, int km_flag)
18804 {
18805 	int err;
18806 	char			*path;
18807 	nvlist_t		*attr_list;
18808 	size_t			n;
18809 
18810 	/* Allocate and build sysevent attribute list */
18811 	err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, km_flag);
18812 	if (err != 0) {
18813 		SD_ERROR(SD_LOG_ERROR, un,
18814 		    "sd_log_dev_status_event: fail to allocate space\n");
18815 		return;
18816 	}
18817 
18818 	path = kmem_alloc(MAXPATHLEN, km_flag);
18819 	if (path == NULL) {
18820 		nvlist_free(attr_list);
18821 		SD_ERROR(SD_LOG_ERROR, un,
18822 		    "sd_log_dev_status_event: fail to allocate space\n");
18823 		return;
18824 	}
18825 
18826 	n = snprintf(path, MAXPATHLEN, "/devices");
18827 	(void) ddi_pathname(SD_DEVINFO(un), path + n);
18828 	n = strlen(path);
18829 	n += snprintf(path + n, MAXPATHLEN - n, ":x");
18830 
18831 	/*
18832 	 * On receipt of this event, the ZFS sysevent module will scan
18833 	 * active zpools for child vdevs matching this physical path.
18834 	 * In order to catch both whole disk pools and those with an
18835 	 * EFI boot partition, generate separate sysevents for minor
18836 	 * node 'a' and 'b'.
18837 	 */
18838 	for (char c = 'a'; c < 'c'; c++) {
18839 		path[n - 1] = c;
18840 
18841 		err = nvlist_add_string(attr_list, DEV_PHYS_PATH, path);
18842 		if (err != 0) {
18843 			SD_ERROR(SD_LOG_ERROR, un,
18844 			    "sd_log_dev_status_event: fail to add attribute\n");
18845 			break;
18846 		}
18847 
18848 		err = ddi_log_sysevent(SD_DEVINFO(un), SUNW_VENDOR,
18849 		    EC_DEV_STATUS, esc, attr_list, NULL, km_flag);
18850 		if (err != DDI_SUCCESS) {
18851 			SD_ERROR(SD_LOG_ERROR, un,
18852 			    "sd_log_dev_status_event: fail to log sysevent\n");
18853 			break;
18854 		}
18855 	}
18856 
18857 	nvlist_free(attr_list);
18858 	kmem_free(path, MAXPATHLEN);
18859 }
18860 
18861 
18862 /*
18863  *    Function: sd_log_lun_expansion_event
18864  *
18865  * Description: Log lun expansion sys event
18866  *
18867  *     Context: Never called from interrupt context
18868  */
18869 static void
18870 sd_log_lun_expansion_event(struct sd_lun *un, int km_flag)
18871 {
18872 	sd_log_dev_status_event(un, ESC_DEV_DLE, km_flag);
18873 }
18874 
18875 
18876 /*
18877  *    Function: sd_log_eject_request_event
18878  *
18879  * Description: Log eject request sysevent
18880  *
18881  *     Context: Never called from interrupt context
18882  */
18883 static void
18884 sd_log_eject_request_event(struct sd_lun *un, int km_flag)
18885 {
18886 	sd_log_dev_status_event(un, ESC_DEV_EJECT_REQUEST, km_flag);
18887 }
18888 
18889 
18890 /*
18891  *    Function: sd_media_change_task
18892  *
18893  * Description: Recovery action for CDROM to become available.
18894  *
18895  *     Context: Executes in a taskq() thread context
18896  */
18897 
18898 static void
18899 sd_media_change_task(void *arg)
18900 {
18901 	struct	scsi_pkt	*pktp = arg;
18902 	struct	sd_lun		*un;
18903 	struct	buf		*bp;
18904 	struct	sd_xbuf		*xp;
18905 	int	err		= 0;
18906 	int	retry_count	= 0;
18907 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
18908 	struct	sd_sense_info	si;
18909 
18910 	ASSERT(pktp != NULL);
18911 	bp = (struct buf *)pktp->pkt_private;
18912 	ASSERT(bp != NULL);
18913 	xp = SD_GET_XBUF(bp);
18914 	ASSERT(xp != NULL);
18915 	un = SD_GET_UN(bp);
18916 	ASSERT(un != NULL);
18917 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18918 	ASSERT(un->un_f_monitor_media_state);
18919 
18920 	si.ssi_severity = SCSI_ERR_INFO;
18921 	si.ssi_pfa_flag = FALSE;
18922 
18923 	/*
18924 	 * When a reset is issued on a CDROM, it takes a long time to
18925 	 * recover. First few attempts to read capacity and other things
18926 	 * related to handling unit attention fail (with a ASC 0x4 and
18927 	 * ASCQ 0x1). In that case we want to do enough retries and we want
18928 	 * to limit the retries in other cases of genuine failures like
18929 	 * no media in drive.
18930 	 */
18931 	while (retry_count++ < retry_limit) {
18932 		if ((err = sd_handle_mchange(un)) == 0) {
18933 			break;
18934 		}
18935 		if (err == EAGAIN) {
18936 			retry_limit = SD_UNIT_ATTENTION_RETRY;
18937 		}
18938 		/* Sleep for 0.5 sec. & try again */
18939 		delay(drv_usectohz(500000));
18940 	}
18941 
18942 	/*
18943 	 * Dispatch (retry or fail) the original command here,
18944 	 * along with appropriate console messages....
18945 	 *
18946 	 * Must grab the mutex before calling sd_retry_command,
18947 	 * sd_print_sense_msg and sd_return_failed_command.
18948 	 */
18949 	mutex_enter(SD_MUTEX(un));
18950 	if (err != SD_CMD_SUCCESS) {
18951 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18952 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18953 		si.ssi_severity = SCSI_ERR_FATAL;
18954 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18955 		sd_return_failed_command(un, bp, EIO);
18956 	} else {
18957 		sd_retry_command(un, bp, SD_RETRIES_UA, sd_print_sense_msg,
18958 		    &si, EIO, (clock_t)0, NULL);
18959 	}
18960 	mutex_exit(SD_MUTEX(un));
18961 }
18962 
18963 
18964 
18965 /*
18966  *    Function: sd_handle_mchange
18967  *
18968  * Description: Perform geometry validation & other recovery when CDROM
18969  *		has been removed from drive.
18970  *
18971  * Return Code: 0 for success
18972  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
18973  *		sd_send_scsi_READ_CAPACITY()
18974  *
18975  *     Context: Executes in a taskq() thread context
18976  */
18977 
18978 static int
18979 sd_handle_mchange(struct sd_lun *un)
18980 {
18981 	uint64_t	capacity;
18982 	uint32_t	lbasize;
18983 	int		rval;
18984 	sd_ssc_t	*ssc;
18985 
18986 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18987 	ASSERT(un->un_f_monitor_media_state);
18988 
18989 	ssc = sd_ssc_init(un);
18990 	rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
18991 	    SD_PATH_DIRECT_PRIORITY);
18992 
18993 	if (rval != 0)
18994 		goto failed;
18995 
18996 	mutex_enter(SD_MUTEX(un));
18997 	sd_update_block_info(un, lbasize, capacity);
18998 
18999 	if (un->un_errstats != NULL) {
19000 		struct	sd_errstats *stp =
19001 		    (struct sd_errstats *)un->un_errstats->ks_data;
19002 		stp->sd_capacity.value.ui64 = (uint64_t)
19003 		    ((uint64_t)un->un_blockcount *
19004 		    (uint64_t)un->un_tgt_blocksize);
19005 	}
19006 
19007 	/*
19008 	 * Check if the media in the device is writable or not
19009 	 */
19010 	if (ISCD(un)) {
19011 		sd_check_for_writable_cd(ssc, SD_PATH_DIRECT_PRIORITY);
19012 	}
19013 
19014 	/*
19015 	 * Note: Maybe let the strategy/partitioning chain worry about getting
19016 	 * valid geometry.
19017 	 */
19018 	mutex_exit(SD_MUTEX(un));
19019 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
19020 
19021 
19022 	if (cmlb_validate(un->un_cmlbhandle, 0,
19023 	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
19024 		sd_ssc_fini(ssc);
19025 		return (EIO);
19026 	} else {
19027 		if (un->un_f_pkstats_enabled) {
19028 			sd_set_pstats(un);
19029 			SD_TRACE(SD_LOG_IO_PARTITION, un,
19030 			    "sd_handle_mchange: un:0x%p pstats created and "
19031 			    "set\n", un);
19032 		}
19033 	}
19034 
19035 	/*
19036 	 * Try to lock the door
19037 	 */
19038 	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
19039 	    SD_PATH_DIRECT_PRIORITY);
19040 failed:
19041 	if (rval != 0)
19042 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19043 	sd_ssc_fini(ssc);
19044 	return (rval);
19045 }
19046 
19047 
19048 /*
19049  *    Function: sd_send_scsi_DOORLOCK
19050  *
19051  * Description: Issue the scsi DOOR LOCK command
19052  *
19053  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19054  *                      structure for this target.
19055  *		flag  - SD_REMOVAL_ALLOW
19056  *			SD_REMOVAL_PREVENT
19057  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19058  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19059  *			to use the USCSI "direct" chain and bypass the normal
19060  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19061  *			command is issued as part of an error recovery action.
19062  *
19063  * Return Code: 0   - Success
19064  *		errno return code from sd_ssc_send()
19065  *
19066  *     Context: Can sleep.
19067  */
19068 
19069 static int
19070 sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag)
19071 {
19072 	struct scsi_extended_sense	sense_buf;
19073 	union scsi_cdb		cdb;
19074 	struct uscsi_cmd	ucmd_buf;
19075 	int			status;
19076 	struct sd_lun		*un;
19077 
19078 	ASSERT(ssc != NULL);
19079 	un = ssc->ssc_un;
19080 	ASSERT(un != NULL);
19081 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19082 
19083 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
19084 
19085 	/* already determined doorlock is not supported, fake success */
19086 	if (un->un_f_doorlock_supported == FALSE) {
19087 		return (0);
19088 	}
19089 
19090 	/*
19091 	 * If we are ejecting and see an SD_REMOVAL_PREVENT
19092 	 * ignore the command so we can complete the eject
19093 	 * operation.
19094 	 */
19095 	if (flag == SD_REMOVAL_PREVENT) {
19096 		mutex_enter(SD_MUTEX(un));
19097 		if (un->un_f_ejecting == TRUE) {
19098 			mutex_exit(SD_MUTEX(un));
19099 			return (EAGAIN);
19100 		}
19101 		mutex_exit(SD_MUTEX(un));
19102 	}
19103 
19104 	bzero(&cdb, sizeof (cdb));
19105 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19106 
19107 	cdb.scc_cmd = SCMD_DOORLOCK;
19108 	cdb.cdb_opaque[4] = (uchar_t)flag;
19109 
19110 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19111 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19112 	ucmd_buf.uscsi_bufaddr	= NULL;
19113 	ucmd_buf.uscsi_buflen	= 0;
19114 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19115 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19116 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19117 	ucmd_buf.uscsi_timeout	= 15;
19118 
19119 	SD_TRACE(SD_LOG_IO, un,
19120 	    "sd_send_scsi_DOORLOCK: returning sd_ssc_send\n");
19121 
19122 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19123 	    UIO_SYSSPACE, path_flag);
19124 
19125 	if (status == 0)
19126 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19127 
19128 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
19129 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19130 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
19131 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19132 
19133 		/* fake success and skip subsequent doorlock commands */
19134 		un->un_f_doorlock_supported = FALSE;
19135 		return (0);
19136 	}
19137 
19138 	return (status);
19139 }
19140 
19141 /*
19142  *    Function: sd_send_scsi_READ_CAPACITY
19143  *
19144  * Description: This routine uses the scsi READ CAPACITY command to determine
19145  *		the device capacity in number of blocks and the device native
19146  *		block size. If this function returns a failure, then the
19147  *		values in *capp and *lbap are undefined.  If the capacity
19148  *		returned is 0xffffffff then the lun is too large for a
19149  *		normal READ CAPACITY command and the results of a
19150  *		READ CAPACITY 16 will be used instead.
19151  *
19152  *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
19153  *		capp - ptr to unsigned 64-bit variable to receive the
19154  *			capacity value from the command.
19155  *		lbap - ptr to unsigned 32-bit varaible to receive the
19156  *			block size value from the command
19157  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19158  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19159  *			to use the USCSI "direct" chain and bypass the normal
19160  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19161  *			command is issued as part of an error recovery action.
19162  *
19163  * Return Code: 0   - Success
19164  *		EIO - IO error
19165  *		EACCES - Reservation conflict detected
19166  *		EAGAIN - Device is becoming ready
19167  *		errno return code from sd_ssc_send()
19168  *
19169  *     Context: Can sleep.  Blocks until command completes.
19170  */
19171 
19172 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19173 
19174 static int
19175 sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
19176     int path_flag)
19177 {
19178 	struct	scsi_extended_sense	sense_buf;
19179 	struct	uscsi_cmd	ucmd_buf;
19180 	union	scsi_cdb	cdb;
19181 	uint32_t		*capacity_buf;
19182 	uint64_t		capacity;
19183 	uint32_t		lbasize;
19184 	uint32_t		pbsize;
19185 	int			status;
19186 	struct sd_lun		*un;
19187 
19188 	ASSERT(ssc != NULL);
19189 
19190 	un = ssc->ssc_un;
19191 	ASSERT(un != NULL);
19192 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19193 	ASSERT(capp != NULL);
19194 	ASSERT(lbap != NULL);
19195 
19196 	SD_TRACE(SD_LOG_IO, un,
19197 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19198 
19199 	/*
19200 	 * First send a READ_CAPACITY command to the target.
19201 	 * (This command is mandatory under SCSI-2.)
19202 	 *
19203 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19204 	 * Medium Indicator bit is cleared.  The address field must be
19205 	 * zero if the PMI bit is zero.
19206 	 */
19207 	bzero(&cdb, sizeof (cdb));
19208 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19209 
19210 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19211 
19212 	cdb.scc_cmd = SCMD_READ_CAPACITY;
19213 
19214 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19215 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19216 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19217 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19218 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19219 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19220 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19221 	ucmd_buf.uscsi_timeout	= 60;
19222 
19223 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19224 	    UIO_SYSSPACE, path_flag);
19225 
19226 	switch (status) {
19227 	case 0:
19228 		/* Return failure if we did not get valid capacity data. */
19229 		if (ucmd_buf.uscsi_resid != 0) {
19230 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19231 			    "sd_send_scsi_READ_CAPACITY received invalid "
19232 			    "capacity data");
19233 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19234 			return (EIO);
19235 		}
19236 		/*
19237 		 * Read capacity and block size from the READ CAPACITY 10 data.
19238 		 * This data may be adjusted later due to device specific
19239 		 * issues.
19240 		 *
19241 		 * According to the SCSI spec, the READ CAPACITY 10
19242 		 * command returns the following:
19243 		 *
19244 		 *  bytes 0-3: Maximum logical block address available.
19245 		 *		(MSB in byte:0 & LSB in byte:3)
19246 		 *
19247 		 *  bytes 4-7: Block length in bytes
19248 		 *		(MSB in byte:4 & LSB in byte:7)
19249 		 *
19250 		 */
19251 		capacity = BE_32(capacity_buf[0]);
19252 		lbasize = BE_32(capacity_buf[1]);
19253 
19254 		/*
19255 		 * Done with capacity_buf
19256 		 */
19257 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19258 
19259 		/*
19260 		 * if the reported capacity is set to all 0xf's, then
19261 		 * this disk is too large and requires SBC-2 commands.
19262 		 * Reissue the request using READ CAPACITY 16.
19263 		 */
19264 		if (capacity == 0xffffffff) {
19265 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19266 			status = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity,
19267 			    &lbasize, &pbsize, path_flag);
19268 			if (status != 0) {
19269 				return (status);
19270 			} else {
19271 				goto rc16_done;
19272 			}
19273 		}
19274 		break;	/* Success! */
19275 	case EIO:
19276 		switch (ucmd_buf.uscsi_status) {
19277 		case STATUS_RESERVATION_CONFLICT:
19278 			status = EACCES;
19279 			break;
19280 		case STATUS_CHECK:
19281 			/*
19282 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19283 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19284 			 */
19285 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19286 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19287 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19288 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19289 				return (EAGAIN);
19290 			}
19291 			break;
19292 		default:
19293 			break;
19294 		}
19295 		/* FALLTHRU */
19296 	default:
19297 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19298 		return (status);
19299 	}
19300 
19301 	/*
19302 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19303 	 * (2352 and 0 are common) so for these devices always force the value
19304 	 * to 2048 as required by the ATAPI specs.
19305 	 */
19306 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19307 		lbasize = 2048;
19308 	}
19309 
19310 	/*
19311 	 * Get the maximum LBA value from the READ CAPACITY data.
19312 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19313 	 * was cleared when issuing the command. This means that the LBA
19314 	 * returned from the device is the LBA of the last logical block
19315 	 * on the logical unit.  The actual logical block count will be
19316 	 * this value plus one.
19317 	 */
19318 	capacity += 1;
19319 
19320 	/*
19321 	 * Currently, for removable media, the capacity is saved in terms
19322 	 * of un->un_sys_blocksize, so scale the capacity value to reflect this.
19323 	 */
19324 	if (un->un_f_has_removable_media)
19325 		capacity *= (lbasize / un->un_sys_blocksize);
19326 
19327 rc16_done:
19328 
19329 	/*
19330 	 * Copy the values from the READ CAPACITY command into the space
19331 	 * provided by the caller.
19332 	 */
19333 	*capp = capacity;
19334 	*lbap = lbasize;
19335 
19336 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19337 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19338 
19339 	/*
19340 	 * Both the lbasize and capacity from the device must be nonzero,
19341 	 * otherwise we assume that the values are not valid and return
19342 	 * failure to the caller. (4203735)
19343 	 */
19344 	if ((capacity == 0) || (lbasize == 0)) {
19345 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19346 		    "sd_send_scsi_READ_CAPACITY received invalid value "
19347 		    "capacity %llu lbasize %d", capacity, lbasize);
19348 		return (EIO);
19349 	}
19350 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19351 	return (0);
19352 }
19353 
19354 /*
19355  *    Function: sd_send_scsi_READ_CAPACITY_16
19356  *
19357  * Description: This routine uses the scsi READ CAPACITY 16 command to
19358  *		determine the device capacity in number of blocks and the
19359  *		device native block size.  If this function returns a failure,
19360  *		then the values in *capp and *lbap are undefined.
19361  *		This routine should be called by sd_send_scsi_READ_CAPACITY
19362  *              which will apply any device specific adjustments to capacity
19363  *              and lbasize. One exception is it is also called by
19364  *              sd_get_media_info_ext. In that function, there is no need to
19365  *              adjust the capacity and lbasize.
19366  *
19367  *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
19368  *		capp - ptr to unsigned 64-bit variable to receive the
19369  *			capacity value from the command.
19370  *		lbap - ptr to unsigned 32-bit varaible to receive the
19371  *			block size value from the command
19372  *              psp  - ptr to unsigned 32-bit variable to receive the
19373  *                      physical block size value from the command
19374  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19375  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19376  *			to use the USCSI "direct" chain and bypass the normal
19377  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19378  *			this command is issued as part of an error recovery
19379  *			action.
19380  *
19381  * Return Code: 0   - Success
19382  *		EIO - IO error
19383  *		EACCES - Reservation conflict detected
19384  *		EAGAIN - Device is becoming ready
19385  *		errno return code from sd_ssc_send()
19386  *
19387  *     Context: Can sleep.  Blocks until command completes.
19388  */
19389 
19390 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19391 
19392 static int
19393 sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
19394     uint32_t *psp, int path_flag)
19395 {
19396 	struct	scsi_extended_sense	sense_buf;
19397 	struct	uscsi_cmd	ucmd_buf;
19398 	union	scsi_cdb	cdb;
19399 	uint64_t		*capacity16_buf;
19400 	uint64_t		capacity;
19401 	uint32_t		lbasize;
19402 	uint32_t		pbsize;
19403 	uint32_t		lbpb_exp;
19404 	int			status;
19405 	struct sd_lun		*un;
19406 
19407 	ASSERT(ssc != NULL);
19408 
19409 	un = ssc->ssc_un;
19410 	ASSERT(un != NULL);
19411 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19412 	ASSERT(capp != NULL);
19413 	ASSERT(lbap != NULL);
19414 
19415 	SD_TRACE(SD_LOG_IO, un,
19416 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19417 
19418 	/*
19419 	 * First send a READ_CAPACITY_16 command to the target.
19420 	 *
19421 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19422 	 * Medium Indicator bit is cleared.  The address field must be
19423 	 * zero if the PMI bit is zero.
19424 	 */
19425 	bzero(&cdb, sizeof (cdb));
19426 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19427 
19428 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19429 
19430 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19431 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19432 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19433 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19434 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19435 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19436 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19437 	ucmd_buf.uscsi_timeout	= 60;
19438 
19439 	/*
19440 	 * Read Capacity (16) is a Service Action In command.  One
19441 	 * command byte (0x9E) is overloaded for multiple operations,
19442 	 * with the second CDB byte specifying the desired operation
19443 	 */
19444 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19445 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19446 
19447 	/*
19448 	 * Fill in allocation length field
19449 	 */
19450 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19451 
19452 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19453 	    UIO_SYSSPACE, path_flag);
19454 
19455 	switch (status) {
19456 	case 0:
19457 		/* Return failure if we did not get valid capacity data. */
19458 		if (ucmd_buf.uscsi_resid > 20) {
19459 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19460 			    "sd_send_scsi_READ_CAPACITY_16 received invalid "
19461 			    "capacity data");
19462 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19463 			return (EIO);
19464 		}
19465 
19466 		/*
19467 		 * Read capacity and block size from the READ CAPACITY 16 data.
19468 		 * This data may be adjusted later due to device specific
19469 		 * issues.
19470 		 *
19471 		 * According to the SCSI spec, the READ CAPACITY 16
19472 		 * command returns the following:
19473 		 *
19474 		 *  bytes 0-7: Maximum logical block address available.
19475 		 *		(MSB in byte:0 & LSB in byte:7)
19476 		 *
19477 		 *  bytes 8-11: Block length in bytes
19478 		 *		(MSB in byte:8 & LSB in byte:11)
19479 		 *
19480 		 *  byte 13: LOGICAL BLOCKS PER PHYSICAL BLOCK EXPONENT
19481 		 *
19482 		 *  byte 14:
19483 		 *	bit 7: Thin-Provisioning Enabled
19484 		 *	bit 6: Thin-Provisioning Read Zeros
19485 		 */
19486 		capacity = BE_64(capacity16_buf[0]);
19487 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19488 		lbpb_exp = (BE_64(capacity16_buf[1]) >> 16) & 0x0f;
19489 
19490 		un->un_thin_flags = 0;
19491 		if (((uint8_t *)capacity16_buf)[14] & (1 << 7))
19492 			un->un_thin_flags |= SD_THIN_PROV_ENABLED;
19493 		if (((uint8_t *)capacity16_buf)[14] & (1 << 6))
19494 			un->un_thin_flags |= SD_THIN_PROV_READ_ZEROS;
19495 
19496 		pbsize = lbasize << lbpb_exp;
19497 
19498 		/*
19499 		 * Done with capacity16_buf
19500 		 */
19501 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19502 
19503 		/*
19504 		 * if the reported capacity is set to all 0xf's, then
19505 		 * this disk is too large.  This could only happen with
19506 		 * a device that supports LBAs larger than 64 bits which
19507 		 * are not defined by any current T10 standards.
19508 		 */
19509 		if (capacity == 0xffffffffffffffff) {
19510 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19511 			    "disk is too large");
19512 			return (EIO);
19513 		}
19514 		break;	/* Success! */
19515 	case EIO:
19516 		switch (ucmd_buf.uscsi_status) {
19517 		case STATUS_RESERVATION_CONFLICT:
19518 			status = EACCES;
19519 			break;
19520 		case STATUS_CHECK:
19521 			/*
19522 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19523 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19524 			 */
19525 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19526 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19527 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19528 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19529 				return (EAGAIN);
19530 			}
19531 			break;
19532 		default:
19533 			break;
19534 		}
19535 		/* FALLTHRU */
19536 	default:
19537 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19538 		return (status);
19539 	}
19540 
19541 	/*
19542 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19543 	 * (2352 and 0 are common) so for these devices always force the value
19544 	 * to 2048 as required by the ATAPI specs.
19545 	 */
19546 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19547 		lbasize = 2048;
19548 	}
19549 
19550 	/*
19551 	 * Get the maximum LBA value from the READ CAPACITY 16 data.
19552 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19553 	 * was cleared when issuing the command. This means that the LBA
19554 	 * returned from the device is the LBA of the last logical block
19555 	 * on the logical unit.  The actual logical block count will be
19556 	 * this value plus one.
19557 	 */
19558 	capacity += 1;
19559 
19560 	/*
19561 	 * Currently, for removable media, the capacity is saved in terms
19562 	 * of un->un_sys_blocksize, so scale the capacity value to reflect this.
19563 	 */
19564 	if (un->un_f_has_removable_media)
19565 		capacity *= (lbasize / un->un_sys_blocksize);
19566 
19567 	*capp = capacity;
19568 	*lbap = lbasize;
19569 	*psp = pbsize;
19570 
19571 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19572 	    "capacity:0x%llx  lbasize:0x%x, pbsize: 0x%x\n",
19573 	    capacity, lbasize, pbsize);
19574 
19575 	if ((capacity == 0) || (lbasize == 0) || (pbsize == 0)) {
19576 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19577 		    "sd_send_scsi_READ_CAPACITY_16 received invalid value "
19578 		    "capacity %llu lbasize %d pbsize %d", capacity, lbasize);
19579 		return (EIO);
19580 	}
19581 
19582 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19583 	return (0);
19584 }
19585 
19586 
19587 /*
19588  *    Function: sd_send_scsi_START_STOP_UNIT
19589  *
19590  * Description: Issue a scsi START STOP UNIT command to the target.
19591  *
19592  *   Arguments: ssc    - ssc contatins pointer to driver soft state (unit)
19593  *                       structure for this target.
19594  *      pc_flag - SD_POWER_CONDITION
19595  *                SD_START_STOP
19596  *		flag  - SD_TARGET_START
19597  *			SD_TARGET_STOP
19598  *			SD_TARGET_EJECT
19599  *			SD_TARGET_CLOSE
19600  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19601  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19602  *			to use the USCSI "direct" chain and bypass the normal
19603  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19604  *			command is issued as part of an error recovery action.
19605  *
19606  * Return Code: 0   - Success
19607  *		EIO - IO error
19608  *		EACCES - Reservation conflict detected
19609  *		ENXIO  - Not Ready, medium not present
19610  *		errno return code from sd_ssc_send()
19611  *
19612  *     Context: Can sleep.
19613  */
19614 
19615 static int
19616 sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int pc_flag, int flag,
19617     int path_flag)
19618 {
19619 	struct	scsi_extended_sense	sense_buf;
19620 	union scsi_cdb		cdb;
19621 	struct uscsi_cmd	ucmd_buf;
19622 	int			status;
19623 	struct sd_lun		*un;
19624 
19625 	ASSERT(ssc != NULL);
19626 	un = ssc->ssc_un;
19627 	ASSERT(un != NULL);
19628 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19629 
19630 	SD_TRACE(SD_LOG_IO, un,
19631 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19632 
19633 	if (un->un_f_check_start_stop &&
19634 	    (pc_flag == SD_START_STOP) &&
19635 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19636 	    (un->un_f_start_stop_supported != TRUE)) {
19637 		return (0);
19638 	}
19639 
19640 	/*
19641 	 * If we are performing an eject operation and
19642 	 * we receive any command other than SD_TARGET_EJECT
19643 	 * we should immediately return.
19644 	 */
19645 	if (flag != SD_TARGET_EJECT) {
19646 		mutex_enter(SD_MUTEX(un));
19647 		if (un->un_f_ejecting == TRUE) {
19648 			mutex_exit(SD_MUTEX(un));
19649 			return (EAGAIN);
19650 		}
19651 		mutex_exit(SD_MUTEX(un));
19652 	}
19653 
19654 	bzero(&cdb, sizeof (cdb));
19655 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19656 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19657 
19658 	cdb.scc_cmd = SCMD_START_STOP;
19659 	cdb.cdb_opaque[4] = (pc_flag == SD_POWER_CONDITION) ?
19660 	    (uchar_t)(flag << 4) : (uchar_t)flag;
19661 
19662 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19663 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19664 	ucmd_buf.uscsi_bufaddr	= NULL;
19665 	ucmd_buf.uscsi_buflen	= 0;
19666 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19667 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19668 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19669 	ucmd_buf.uscsi_timeout	= 200;
19670 
19671 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19672 	    UIO_SYSSPACE, path_flag);
19673 
19674 	switch (status) {
19675 	case 0:
19676 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19677 		break;	/* Success! */
19678 	case EIO:
19679 		switch (ucmd_buf.uscsi_status) {
19680 		case STATUS_RESERVATION_CONFLICT:
19681 			status = EACCES;
19682 			break;
19683 		case STATUS_CHECK:
19684 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19685 				switch (scsi_sense_key(
19686 				    (uint8_t *)&sense_buf)) {
19687 				case KEY_ILLEGAL_REQUEST:
19688 					status = ENOTSUP;
19689 					break;
19690 				case KEY_NOT_READY:
19691 					if (scsi_sense_asc(
19692 					    (uint8_t *)&sense_buf)
19693 					    == 0x3A) {
19694 						status = ENXIO;
19695 					}
19696 					break;
19697 				default:
19698 					break;
19699 				}
19700 			}
19701 			break;
19702 		default:
19703 			break;
19704 		}
19705 		break;
19706 	default:
19707 		break;
19708 	}
19709 
19710 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19711 
19712 	return (status);
19713 }
19714 
19715 
19716 /*
19717  *    Function: sd_start_stop_unit_callback
19718  *
19719  * Description: timeout(9F) callback to begin recovery process for a
19720  *		device that has spun down.
19721  *
19722  *   Arguments: arg - pointer to associated softstate struct.
19723  *
19724  *     Context: Executes in a timeout(9F) thread context
19725  */
19726 
19727 static void
19728 sd_start_stop_unit_callback(void *arg)
19729 {
19730 	struct sd_lun	*un = arg;
19731 	ASSERT(un != NULL);
19732 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19733 
19734 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19735 
19736 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19737 }
19738 
19739 
19740 /*
19741  *    Function: sd_start_stop_unit_task
19742  *
19743  * Description: Recovery procedure when a drive is spun down.
19744  *
19745  *   Arguments: arg - pointer to associated softstate struct.
19746  *
19747  *     Context: Executes in a taskq() thread context
19748  */
19749 
19750 static void
19751 sd_start_stop_unit_task(void *arg)
19752 {
19753 	struct sd_lun	*un = arg;
19754 	sd_ssc_t	*ssc;
19755 	int		power_level;
19756 	int		rval;
19757 
19758 	ASSERT(un != NULL);
19759 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19760 
19761 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19762 
19763 	/*
19764 	 * Some unformatted drives report not ready error, no need to
19765 	 * restart if format has been initiated.
19766 	 */
19767 	mutex_enter(SD_MUTEX(un));
19768 	if (un->un_f_format_in_progress == TRUE) {
19769 		mutex_exit(SD_MUTEX(un));
19770 		return;
19771 	}
19772 	mutex_exit(SD_MUTEX(un));
19773 
19774 	ssc = sd_ssc_init(un);
19775 	/*
19776 	 * When a START STOP command is issued from here, it is part of a
19777 	 * failure recovery operation and must be issued before any other
19778 	 * commands, including any pending retries. Thus it must be sent
19779 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19780 	 * succeeds or not, we will start I/O after the attempt.
19781 	 * If power condition is supported and the current power level
19782 	 * is capable of performing I/O, we should set the power condition
19783 	 * to that level. Otherwise, set the power condition to ACTIVE.
19784 	 */
19785 	if (un->un_f_power_condition_supported) {
19786 		mutex_enter(SD_MUTEX(un));
19787 		ASSERT(SD_PM_IS_LEVEL_VALID(un, un->un_power_level));
19788 		power_level = sd_pwr_pc.ran_perf[un->un_power_level]
19789 		    > 0 ? un->un_power_level : SD_SPINDLE_ACTIVE;
19790 		mutex_exit(SD_MUTEX(un));
19791 		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_POWER_CONDITION,
19792 		    sd_pl2pc[power_level], SD_PATH_DIRECT_PRIORITY);
19793 	} else {
19794 		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
19795 		    SD_TARGET_START, SD_PATH_DIRECT_PRIORITY);
19796 	}
19797 
19798 	if (rval != 0)
19799 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19800 	sd_ssc_fini(ssc);
19801 	/*
19802 	 * The above call blocks until the START_STOP_UNIT command completes.
19803 	 * Now that it has completed, we must re-try the original IO that
19804 	 * received the NOT READY condition in the first place. There are
19805 	 * three possible conditions here:
19806 	 *
19807 	 *  (1) The original IO is on un_retry_bp.
19808 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19809 	 *	is NULL.
19810 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19811 	 *	points to some other, unrelated bp.
19812 	 *
19813 	 * For each case, we must call sd_start_cmds() with un_retry_bp
19814 	 * as the argument. If un_retry_bp is NULL, this will initiate
19815 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19816 	 * then this will process the bp on un_retry_bp. That may or may not
19817 	 * be the original IO, but that does not matter: the important thing
19818 	 * is to keep the IO processing going at this point.
19819 	 *
19820 	 * Note: This is a very specific error recovery sequence associated
19821 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19822 	 * serialize the I/O with completion of the spin-up.
19823 	 */
19824 	mutex_enter(SD_MUTEX(un));
19825 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19826 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19827 	    un, un->un_retry_bp);
19828 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19829 	sd_start_cmds(un, un->un_retry_bp);
19830 	mutex_exit(SD_MUTEX(un));
19831 
19832 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19833 }
19834 
19835 
19836 /*
19837  *    Function: sd_send_scsi_INQUIRY
19838  *
19839  * Description: Issue the scsi INQUIRY command.
19840  *
19841  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19842  *                      structure for this target.
19843  *		bufaddr
19844  *		buflen
19845  *		evpd
19846  *		page_code
19847  *		page_length
19848  *
19849  * Return Code: 0   - Success
19850  *		errno return code from sd_ssc_send()
19851  *
19852  *     Context: Can sleep. Does not return until command is completed.
19853  */
19854 
19855 static int
19856 sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr, size_t buflen,
19857     uchar_t evpd, uchar_t page_code, size_t *residp)
19858 {
19859 	union scsi_cdb		cdb;
19860 	struct uscsi_cmd	ucmd_buf;
19861 	int			status;
19862 	struct sd_lun		*un;
19863 
19864 	ASSERT(ssc != NULL);
19865 	un = ssc->ssc_un;
19866 	ASSERT(un != NULL);
19867 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19868 	ASSERT(bufaddr != NULL);
19869 
19870 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19871 
19872 	bzero(&cdb, sizeof (cdb));
19873 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19874 	bzero(bufaddr, buflen);
19875 
19876 	cdb.scc_cmd = SCMD_INQUIRY;
19877 	cdb.cdb_opaque[1] = evpd;
19878 	cdb.cdb_opaque[2] = page_code;
19879 	FORMG0COUNT(&cdb, buflen);
19880 
19881 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19882 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19883 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19884 	ucmd_buf.uscsi_buflen	= buflen;
19885 	ucmd_buf.uscsi_rqbuf	= NULL;
19886 	ucmd_buf.uscsi_rqlen	= 0;
19887 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19888 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19889 
19890 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19891 	    UIO_SYSSPACE, SD_PATH_DIRECT);
19892 
19893 	/*
19894 	 * Only handle status == 0, the upper-level caller
19895 	 * will put different assessment based on the context.
19896 	 */
19897 	if (status == 0)
19898 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19899 
19900 	if ((status == 0) && (residp != NULL)) {
19901 		*residp = ucmd_buf.uscsi_resid;
19902 	}
19903 
19904 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19905 
19906 	return (status);
19907 }
19908 
19909 
19910 /*
19911  *    Function: sd_send_scsi_TEST_UNIT_READY
19912  *
19913  * Description: Issue the scsi TEST UNIT READY command.
19914  *		This routine can be told to set the flag USCSI_DIAGNOSE to
19915  *		prevent retrying failed commands. Use this when the intent
19916  *		is either to check for device readiness, to clear a Unit
19917  *		Attention, or to clear any outstanding sense data.
19918  *		However under specific conditions the expected behavior
19919  *		is for retries to bring a device ready, so use the flag
19920  *		with caution.
19921  *
19922  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19923  *                      structure for this target.
19924  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19925  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19926  *			0: dont check for media present, do retries on cmd.
19927  *
19928  * Return Code: 0   - Success
19929  *		EIO - IO error
19930  *		EACCES - Reservation conflict detected
19931  *		ENXIO  - Not Ready, medium not present
19932  *		errno return code from sd_ssc_send()
19933  *
19934  *     Context: Can sleep. Does not return until command is completed.
19935  */
19936 
19937 static int
19938 sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag)
19939 {
19940 	struct	scsi_extended_sense	sense_buf;
19941 	union scsi_cdb		cdb;
19942 	struct uscsi_cmd	ucmd_buf;
19943 	int			status;
19944 	struct sd_lun		*un;
19945 
19946 	ASSERT(ssc != NULL);
19947 	un = ssc->ssc_un;
19948 	ASSERT(un != NULL);
19949 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19950 
19951 	SD_TRACE(SD_LOG_IO, un,
19952 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
19953 
19954 	/*
19955 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
19956 	 * timeouts when they receive a TUR and the queue is not empty. Check
19957 	 * the configuration flag set during attach (indicating the drive has
19958 	 * this firmware bug) and un_ncmds_in_transport before issuing the
19959 	 * TUR. If there are
19960 	 * pending commands return success, this is a bit arbitrary but is ok
19961 	 * for non-removables (i.e. the eliteI disks) and non-clustering
19962 	 * configurations.
19963 	 */
19964 	if (un->un_f_cfg_tur_check == TRUE) {
19965 		mutex_enter(SD_MUTEX(un));
19966 		if (un->un_ncmds_in_transport != 0) {
19967 			mutex_exit(SD_MUTEX(un));
19968 			return (0);
19969 		}
19970 		mutex_exit(SD_MUTEX(un));
19971 	}
19972 
19973 	bzero(&cdb, sizeof (cdb));
19974 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19975 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19976 
19977 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
19978 
19979 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19980 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19981 	ucmd_buf.uscsi_bufaddr	= NULL;
19982 	ucmd_buf.uscsi_buflen	= 0;
19983 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19984 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19985 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19986 
19987 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
19988 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
19989 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
19990 	}
19991 	ucmd_buf.uscsi_timeout	= 60;
19992 
19993 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19994 	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
19995 	    SD_PATH_STANDARD));
19996 
19997 	switch (status) {
19998 	case 0:
19999 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20000 		break;	/* Success! */
20001 	case EIO:
20002 		switch (ucmd_buf.uscsi_status) {
20003 		case STATUS_RESERVATION_CONFLICT:
20004 			status = EACCES;
20005 			break;
20006 		case STATUS_CHECK:
20007 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
20008 				break;
20009 			}
20010 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20011 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20012 			    KEY_NOT_READY) &&
20013 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
20014 				status = ENXIO;
20015 			}
20016 			break;
20017 		default:
20018 			break;
20019 		}
20020 		break;
20021 	default:
20022 		break;
20023 	}
20024 
20025 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
20026 
20027 	return (status);
20028 }
20029 
20030 /*
20031  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
20032  *
20033  * Description: Issue the scsi PERSISTENT RESERVE IN command.
20034  *
20035  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20036  *                      structure for this target.
20037  *
20038  * Return Code: 0   - Success
20039  *		EACCES
20040  *		ENOTSUP
20041  *		errno return code from sd_ssc_send()
20042  *
20043  *     Context: Can sleep. Does not return until command is completed.
20044  */
20045 
20046 static int
20047 sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc, uchar_t usr_cmd,
20048     uint16_t data_len, uchar_t *data_bufp)
20049 {
20050 	struct scsi_extended_sense	sense_buf;
20051 	union scsi_cdb		cdb;
20052 	struct uscsi_cmd	ucmd_buf;
20053 	int			status;
20054 	int			no_caller_buf = FALSE;
20055 	struct sd_lun		*un;
20056 
20057 	ASSERT(ssc != NULL);
20058 	un = ssc->ssc_un;
20059 	ASSERT(un != NULL);
20060 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20061 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
20062 
20063 	SD_TRACE(SD_LOG_IO, un,
20064 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
20065 
20066 	bzero(&cdb, sizeof (cdb));
20067 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20068 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20069 	if (data_bufp == NULL) {
20070 		/* Allocate a default buf if the caller did not give one */
20071 		ASSERT(data_len == 0);
20072 		data_len  = MHIOC_RESV_KEY_SIZE;
20073 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
20074 		no_caller_buf = TRUE;
20075 	}
20076 
20077 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
20078 	cdb.cdb_opaque[1] = usr_cmd;
20079 	FORMG1COUNT(&cdb, data_len);
20080 
20081 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20082 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20083 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
20084 	ucmd_buf.uscsi_buflen	= data_len;
20085 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20086 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20087 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20088 	ucmd_buf.uscsi_timeout	= 60;
20089 
20090 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20091 	    UIO_SYSSPACE, SD_PATH_STANDARD);
20092 
20093 	switch (status) {
20094 	case 0:
20095 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20096 
20097 		break;	/* Success! */
20098 	case EIO:
20099 		switch (ucmd_buf.uscsi_status) {
20100 		case STATUS_RESERVATION_CONFLICT:
20101 			status = EACCES;
20102 			break;
20103 		case STATUS_CHECK:
20104 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20105 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20106 			    KEY_ILLEGAL_REQUEST)) {
20107 				status = ENOTSUP;
20108 			}
20109 			break;
20110 		default:
20111 			break;
20112 		}
20113 		break;
20114 	default:
20115 		break;
20116 	}
20117 
20118 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
20119 
20120 	if (no_caller_buf == TRUE) {
20121 		kmem_free(data_bufp, data_len);
20122 	}
20123 
20124 	return (status);
20125 }
20126 
20127 
20128 /*
20129  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
20130  *
20131  * Description: This routine is the driver entry point for handling CD-ROM
20132  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
20133  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
20134  *		device.
20135  *
20136  *   Arguments: ssc  -  ssc contains un - pointer to soft state struct
20137  *                      for the target.
20138  *		usr_cmd SCSI-3 reservation facility command (one of
20139  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
20140  *			SD_SCSI3_PREEMPTANDABORT, SD_SCSI3_CLEAR)
20141  *		usr_bufp - user provided pointer register, reserve descriptor or
20142  *			preempt and abort structure (mhioc_register_t,
20143  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
20144  *
20145  * Return Code: 0   - Success
20146  *		EACCES
20147  *		ENOTSUP
20148  *		errno return code from sd_ssc_send()
20149  *
20150  *     Context: Can sleep. Does not return until command is completed.
20151  */
20152 
20153 static int
20154 sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc, uchar_t usr_cmd,
20155     uchar_t *usr_bufp)
20156 {
20157 	struct scsi_extended_sense	sense_buf;
20158 	union scsi_cdb		cdb;
20159 	struct uscsi_cmd	ucmd_buf;
20160 	int			status;
20161 	uchar_t			data_len = sizeof (sd_prout_t);
20162 	sd_prout_t		*prp;
20163 	struct sd_lun		*un;
20164 
20165 	ASSERT(ssc != NULL);
20166 	un = ssc->ssc_un;
20167 	ASSERT(un != NULL);
20168 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20169 	ASSERT(data_len == 24);	/* required by scsi spec */
20170 
20171 	SD_TRACE(SD_LOG_IO, un,
20172 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
20173 
20174 	if (usr_bufp == NULL) {
20175 		return (EINVAL);
20176 	}
20177 
20178 	bzero(&cdb, sizeof (cdb));
20179 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20180 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20181 	prp = kmem_zalloc(data_len, KM_SLEEP);
20182 
20183 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
20184 	cdb.cdb_opaque[1] = usr_cmd;
20185 	FORMG1COUNT(&cdb, data_len);
20186 
20187 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20188 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20189 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
20190 	ucmd_buf.uscsi_buflen	= data_len;
20191 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20192 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20193 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20194 	ucmd_buf.uscsi_timeout	= 60;
20195 
20196 	switch (usr_cmd) {
20197 	case SD_SCSI3_REGISTER: {
20198 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
20199 
20200 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20201 		bcopy(ptr->newkey.key, prp->service_key,
20202 		    MHIOC_RESV_KEY_SIZE);
20203 		prp->aptpl = ptr->aptpl;
20204 		break;
20205 	}
20206 	case SD_SCSI3_CLEAR: {
20207 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
20208 
20209 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20210 		break;
20211 	}
20212 	case SD_SCSI3_RESERVE:
20213 	case SD_SCSI3_RELEASE: {
20214 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
20215 
20216 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20217 		prp->scope_address = BE_32(ptr->scope_specific_addr);
20218 		cdb.cdb_opaque[2] = ptr->type;
20219 		break;
20220 	}
20221 	case SD_SCSI3_PREEMPTANDABORT: {
20222 		mhioc_preemptandabort_t *ptr =
20223 		    (mhioc_preemptandabort_t *)usr_bufp;
20224 
20225 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20226 		bcopy(ptr->victim_key.key, prp->service_key,
20227 		    MHIOC_RESV_KEY_SIZE);
20228 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
20229 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
20230 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
20231 		break;
20232 	}
20233 	case SD_SCSI3_REGISTERANDIGNOREKEY:
20234 	{
20235 		mhioc_registerandignorekey_t *ptr;
20236 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
20237 		bcopy(ptr->newkey.key,
20238 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
20239 		prp->aptpl = ptr->aptpl;
20240 		break;
20241 	}
20242 	default:
20243 		ASSERT(FALSE);
20244 		break;
20245 	}
20246 
20247 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20248 	    UIO_SYSSPACE, SD_PATH_STANDARD);
20249 
20250 	switch (status) {
20251 	case 0:
20252 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20253 		break;	/* Success! */
20254 	case EIO:
20255 		switch (ucmd_buf.uscsi_status) {
20256 		case STATUS_RESERVATION_CONFLICT:
20257 			status = EACCES;
20258 			break;
20259 		case STATUS_CHECK:
20260 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20261 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20262 			    KEY_ILLEGAL_REQUEST)) {
20263 				status = ENOTSUP;
20264 			}
20265 			break;
20266 		default:
20267 			break;
20268 		}
20269 		break;
20270 	default:
20271 		break;
20272 	}
20273 
20274 	kmem_free(prp, data_len);
20275 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
20276 	return (status);
20277 }
20278 
20279 
20280 /*
20281  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
20282  *
20283  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
20284  *
20285  *   Arguments: un - pointer to the target's soft state struct
20286  *              dkc - pointer to the callback structure
20287  *
20288  * Return Code: 0 - success
20289  *		errno-type error code
20290  *
20291  *     Context: kernel thread context only.
20292  *
20293  *  _______________________________________________________________
20294  * | dkc_flag &   | dkc_callback | DKIOCFLUSHWRITECACHE            |
20295  * |FLUSH_VOLATILE|              | operation                       |
20296  * |______________|______________|_________________________________|
20297  * | 0            | NULL         | Synchronous flush on both       |
20298  * |              |              | volatile and non-volatile cache |
20299  * |______________|______________|_________________________________|
20300  * | 1            | NULL         | Synchronous flush on volatile   |
20301  * |              |              | cache; disk drivers may suppress|
20302  * |              |              | flush if disk table indicates   |
20303  * |              |              | non-volatile cache              |
20304  * |______________|______________|_________________________________|
20305  * | 0            | !NULL        | Asynchronous flush on both      |
20306  * |              |              | volatile and non-volatile cache;|
20307  * |______________|______________|_________________________________|
20308  * | 1            | !NULL        | Asynchronous flush on volatile  |
20309  * |              |              | cache; disk drivers may suppress|
20310  * |              |              | flush if disk table indicates   |
20311  * |              |              | non-volatile cache              |
20312  * |______________|______________|_________________________________|
20313  *
20314  */
20315 
20316 static int
20317 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
20318 {
20319 	struct sd_uscsi_info	*uip;
20320 	struct uscsi_cmd	*uscmd;
20321 	union scsi_cdb		*cdb;
20322 	struct buf		*bp;
20323 	int			rval = 0;
20324 	int			is_async;
20325 
20326 	SD_TRACE(SD_LOG_IO, un,
20327 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
20328 
20329 	ASSERT(un != NULL);
20330 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20331 
20332 	if (dkc == NULL || dkc->dkc_callback == NULL) {
20333 		is_async = FALSE;
20334 	} else {
20335 		is_async = TRUE;
20336 	}
20337 
20338 	mutex_enter(SD_MUTEX(un));
20339 	/* check whether cache flush should be suppressed */
20340 	if (un->un_f_suppress_cache_flush == TRUE) {
20341 		mutex_exit(SD_MUTEX(un));
20342 		/*
20343 		 * suppress the cache flush if the device is told to do
20344 		 * so by sd.conf or disk table
20345 		 */
20346 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_SYNCHRONIZE_CACHE: \
20347 		    skip the cache flush since suppress_cache_flush is %d!\n",
20348 		    un->un_f_suppress_cache_flush);
20349 
20350 		if (is_async == TRUE) {
20351 			/* invoke callback for asynchronous flush */
20352 			(*dkc->dkc_callback)(dkc->dkc_cookie, 0);
20353 		}
20354 		return (rval);
20355 	}
20356 	mutex_exit(SD_MUTEX(un));
20357 
20358 	/*
20359 	 * check dkc_flag & FLUSH_VOLATILE so SYNC_NV bit can be
20360 	 * set properly
20361 	 */
20362 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
20363 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
20364 
20365 	mutex_enter(SD_MUTEX(un));
20366 	if (dkc != NULL && un->un_f_sync_nv_supported &&
20367 	    (dkc->dkc_flag & FLUSH_VOLATILE)) {
20368 		/*
20369 		 * if the device supports SYNC_NV bit, turn on
20370 		 * the SYNC_NV bit to only flush volatile cache
20371 		 */
20372 		cdb->cdb_un.tag |= SD_SYNC_NV_BIT;
20373 	}
20374 	mutex_exit(SD_MUTEX(un));
20375 
20376 	/*
20377 	 * First get some memory for the uscsi_cmd struct and cdb
20378 	 * and initialize for SYNCHRONIZE_CACHE cmd.
20379 	 */
20380 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
20381 	uscmd->uscsi_cdblen = CDB_GROUP1;
20382 	uscmd->uscsi_cdb = (caddr_t)cdb;
20383 	uscmd->uscsi_bufaddr = NULL;
20384 	uscmd->uscsi_buflen = 0;
20385 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20386 	uscmd->uscsi_rqlen = SENSE_LENGTH;
20387 	uscmd->uscsi_rqresid = SENSE_LENGTH;
20388 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
20389 	uscmd->uscsi_timeout = sd_io_time;
20390 
20391 	/*
20392 	 * Allocate an sd_uscsi_info struct and fill it with the info
20393 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
20394 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
20395 	 * since we allocate the buf here in this function, we do not
20396 	 * need to preserve the prior contents of b_private.
20397 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
20398 	 */
20399 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
20400 	uip->ui_flags = SD_PATH_DIRECT;
20401 	uip->ui_cmdp  = uscmd;
20402 
20403 	bp = getrbuf(KM_SLEEP);
20404 	bp->b_private = uip;
20405 
20406 	/*
20407 	 * Setup buffer to carry uscsi request.
20408 	 */
20409 	bp->b_flags  = B_BUSY;
20410 	bp->b_bcount = 0;
20411 	bp->b_blkno  = 0;
20412 
20413 	if (is_async == TRUE) {
20414 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20415 		uip->ui_dkc = *dkc;
20416 	}
20417 
20418 	bp->b_edev = SD_GET_DEV(un);
20419 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20420 
20421 	/*
20422 	 * Unset un_f_sync_cache_required flag
20423 	 */
20424 	mutex_enter(SD_MUTEX(un));
20425 	un->un_f_sync_cache_required = FALSE;
20426 	mutex_exit(SD_MUTEX(un));
20427 
20428 	(void) sd_uscsi_strategy(bp);
20429 
20430 	/*
20431 	 * If synchronous request, wait for completion
20432 	 * If async just return and let b_iodone callback
20433 	 * cleanup.
20434 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20435 	 * but it was also incremented in sd_uscsi_strategy(), so
20436 	 * we should be ok.
20437 	 */
20438 	if (is_async == FALSE) {
20439 		(void) biowait(bp);
20440 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20441 	}
20442 
20443 	return (rval);
20444 }
20445 
20446 
20447 static int
20448 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20449 {
20450 	struct sd_uscsi_info *uip;
20451 	struct uscsi_cmd *uscmd;
20452 	uint8_t *sense_buf;
20453 	struct sd_lun *un;
20454 	int status;
20455 	union scsi_cdb *cdb;
20456 
20457 	uip = (struct sd_uscsi_info *)(bp->b_private);
20458 	ASSERT(uip != NULL);
20459 
20460 	uscmd = uip->ui_cmdp;
20461 	ASSERT(uscmd != NULL);
20462 
20463 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
20464 	ASSERT(sense_buf != NULL);
20465 
20466 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20467 	ASSERT(un != NULL);
20468 
20469 	cdb = (union scsi_cdb *)uscmd->uscsi_cdb;
20470 
20471 	status = geterror(bp);
20472 	switch (status) {
20473 	case 0:
20474 		break;	/* Success! */
20475 	case EIO:
20476 		switch (uscmd->uscsi_status) {
20477 		case STATUS_RESERVATION_CONFLICT:
20478 			/* Ignore reservation conflict */
20479 			status = 0;
20480 			goto done;
20481 
20482 		case STATUS_CHECK:
20483 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20484 			    (scsi_sense_key(sense_buf) ==
20485 			    KEY_ILLEGAL_REQUEST)) {
20486 				/* Ignore Illegal Request error */
20487 				if (cdb->cdb_un.tag&SD_SYNC_NV_BIT) {
20488 					mutex_enter(SD_MUTEX(un));
20489 					un->un_f_sync_nv_supported = FALSE;
20490 					mutex_exit(SD_MUTEX(un));
20491 					status = 0;
20492 					SD_TRACE(SD_LOG_IO, un,
20493 					    "un_f_sync_nv_supported \
20494 					    is set to false.\n");
20495 					goto done;
20496 				}
20497 
20498 				mutex_enter(SD_MUTEX(un));
20499 				un->un_f_sync_cache_supported = FALSE;
20500 				mutex_exit(SD_MUTEX(un));
20501 				SD_TRACE(SD_LOG_IO, un,
20502 				    "sd_send_scsi_SYNCHRONIZE_CACHE_biodone: \
20503 				    un_f_sync_cache_supported set to false \
20504 				    with asc = %x, ascq = %x\n",
20505 				    scsi_sense_asc(sense_buf),
20506 				    scsi_sense_ascq(sense_buf));
20507 				status = ENOTSUP;
20508 				goto done;
20509 			}
20510 			break;
20511 		default:
20512 			break;
20513 		}
20514 		/* FALLTHRU */
20515 	default:
20516 		/*
20517 		 * Turn on the un_f_sync_cache_required flag
20518 		 * since the SYNC CACHE command failed
20519 		 */
20520 		mutex_enter(SD_MUTEX(un));
20521 		un->un_f_sync_cache_required = TRUE;
20522 		mutex_exit(SD_MUTEX(un));
20523 
20524 		/*
20525 		 * Don't log an error message if this device
20526 		 * has removable media.
20527 		 */
20528 		if (!un->un_f_has_removable_media) {
20529 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20530 			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20531 		}
20532 		break;
20533 	}
20534 
20535 done:
20536 	if (uip->ui_dkc.dkc_callback != NULL) {
20537 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20538 	}
20539 
20540 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20541 	freerbuf(bp);
20542 	kmem_free(uip, sizeof (struct sd_uscsi_info));
20543 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20544 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20545 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20546 
20547 	return (status);
20548 }
20549 
20550 /*
20551  * Issues a single SCSI UNMAP command with a prepared UNMAP parameter list.
20552  * Returns zero on success, or the non-zero command error code on failure.
20553  */
20554 static int
20555 sd_send_scsi_UNMAP_issue_one(sd_ssc_t *ssc, unmap_param_hdr_t *uph,
20556     uint64_t num_descr, uint64_t bytes)
20557 {
20558 	struct sd_lun		*un = ssc->ssc_un;
20559 	struct scsi_extended_sense	sense_buf;
20560 	union scsi_cdb		cdb;
20561 	struct uscsi_cmd	ucmd_buf;
20562 	int			status;
20563 	const uint64_t		param_size = sizeof (unmap_param_hdr_t) +
20564 	    num_descr * sizeof (unmap_blk_descr_t);
20565 
20566 	ASSERT3U(param_size - 2, <=, UINT16_MAX);
20567 	uph->uph_data_len = BE_16(param_size - 2);
20568 	uph->uph_descr_data_len = BE_16(param_size - 8);
20569 
20570 	bzero(&cdb, sizeof (cdb));
20571 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20572 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20573 
20574 	cdb.scc_cmd = SCMD_UNMAP;
20575 	FORMG1COUNT(&cdb, param_size);
20576 
20577 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20578 	ucmd_buf.uscsi_cdblen	= (uchar_t)CDB_GROUP1;
20579 	ucmd_buf.uscsi_bufaddr	= (caddr_t)uph;
20580 	ucmd_buf.uscsi_buflen	= param_size;
20581 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20582 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20583 	ucmd_buf.uscsi_flags	= USCSI_WRITE | USCSI_RQENABLE | USCSI_SILENT;
20584 	ucmd_buf.uscsi_timeout	= un->un_cmd_timeout;
20585 
20586 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL, UIO_SYSSPACE,
20587 	    SD_PATH_STANDARD);
20588 
20589 	switch (status) {
20590 	case 0:
20591 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20592 
20593 		if (un->un_unmapstats) {
20594 			atomic_inc_64(&un->un_unmapstats->us_cmds.value.ui64);
20595 			atomic_add_64(&un->un_unmapstats->us_extents.value.ui64,
20596 			    num_descr);
20597 			atomic_add_64(&un->un_unmapstats->us_bytes.value.ui64,
20598 			    bytes);
20599 		}
20600 		break;	/* Success! */
20601 	case EIO:
20602 		if (un->un_unmapstats)
20603 			atomic_inc_64(&un->un_unmapstats->us_errs.value.ui64);
20604 		switch (ucmd_buf.uscsi_status) {
20605 		case STATUS_RESERVATION_CONFLICT:
20606 			status = EACCES;
20607 			break;
20608 		default:
20609 			break;
20610 		}
20611 		break;
20612 	default:
20613 		if (un->un_unmapstats)
20614 			atomic_inc_64(&un->un_unmapstats->us_errs.value.ui64);
20615 		break;
20616 	}
20617 
20618 	return (status);
20619 }
20620 
20621 /*
20622  * Returns a pointer to the i'th block descriptor inside an UNMAP param list.
20623  */
20624 static inline unmap_blk_descr_t *
20625 UNMAP_blk_descr_i(void *buf, size_t i)
20626 {
20627 	return ((unmap_blk_descr_t *)((uintptr_t)buf +
20628 	    sizeof (unmap_param_hdr_t) + (i * sizeof (unmap_blk_descr_t))));
20629 }
20630 
20631 /*
20632  * Takes the list of extents from sd_send_scsi_UNMAP, chops it up, prepares
20633  * UNMAP block descriptors and issues individual SCSI UNMAP commands. While
20634  * doing so we consult the block limits to determine at most how many
20635  * extents and LBAs we can UNMAP in one command.
20636  * If a command fails for whatever, reason, extent list processing is aborted
20637  * and the failed command's status is returned. Otherwise returns 0 on
20638  * success.
20639  */
20640 static int
20641 sd_send_scsi_UNMAP_issue(dev_t dev, sd_ssc_t *ssc, const dkioc_free_list_t *dfl)
20642 {
20643 	struct sd_lun		*un = ssc->ssc_un;
20644 	unmap_param_hdr_t	*uph;
20645 	sd_blk_limits_t		*lim = &un->un_blk_lim;
20646 	int			rval = 0;
20647 	int			partition;
20648 	/* partition offset & length in system blocks */
20649 	diskaddr_t		part_off_sysblks = 0, part_len_sysblks = 0;
20650 	uint64_t		part_off, part_len;
20651 	uint64_t		descr_cnt_lim, byte_cnt_lim;
20652 	uint64_t		descr_issued = 0, bytes_issued = 0;
20653 
20654 	uph = kmem_zalloc(SD_UNMAP_PARAM_LIST_MAXSZ, KM_SLEEP);
20655 
20656 	partition = SDPART(dev);
20657 	rval = cmlb_partinfo(un->un_cmlbhandle, partition, &part_len_sysblks,
20658 	    &part_off_sysblks, NULL, NULL, (void *)SD_PATH_DIRECT);
20659 	if (rval != 0)
20660 		goto out;
20661 	part_off = SD_SYSBLOCKS2BYTES(part_off_sysblks);
20662 	part_len = SD_SYSBLOCKS2BYTES(part_len_sysblks);
20663 
20664 	ASSERT(un->un_blk_lim.lim_max_unmap_lba_cnt != 0);
20665 	ASSERT(un->un_blk_lim.lim_max_unmap_descr_cnt != 0);
20666 	/* Spec says 0xffffffff are special values, so compute maximums. */
20667 	byte_cnt_lim = lim->lim_max_unmap_lba_cnt < UINT32_MAX ?
20668 	    (uint64_t)lim->lim_max_unmap_lba_cnt * un->un_tgt_blocksize :
20669 	    UINT64_MAX;
20670 	descr_cnt_lim = MIN(lim->lim_max_unmap_descr_cnt, SD_UNMAP_MAX_DESCR);
20671 
20672 	if (dfl->dfl_offset >= part_len) {
20673 		rval = SET_ERROR(EINVAL);
20674 		goto out;
20675 	}
20676 
20677 	for (size_t i = 0; i < dfl->dfl_num_exts; i++) {
20678 		const dkioc_free_list_ext_t *ext = &dfl->dfl_exts[i];
20679 		uint64_t ext_start = ext->dfle_start;
20680 		uint64_t ext_length = ext->dfle_length;
20681 
20682 		while (ext_length > 0) {
20683 			unmap_blk_descr_t *ubd;
20684 			/* Respect device limit on LBA count per command */
20685 			uint64_t len = MIN(MIN(ext_length, byte_cnt_lim -
20686 			    bytes_issued), SD_TGTBLOCKS2BYTES(un, UINT32_MAX));
20687 
20688 			/* check partition limits */
20689 			if (ext_start >= part_len ||
20690 			    ext_start + len < ext_start ||
20691 			    dfl->dfl_offset + ext_start + len <
20692 			    dfl->dfl_offset ||
20693 			    dfl->dfl_offset + ext_start + len > part_len) {
20694 				rval = SET_ERROR(EINVAL);
20695 				goto out;
20696 			}
20697 
20698 			ASSERT3U(descr_issued, <, descr_cnt_lim);
20699 			ASSERT3U(bytes_issued, <, byte_cnt_lim);
20700 			ubd = UNMAP_blk_descr_i(uph, descr_issued);
20701 
20702 			/* adjust in-partition addresses to be device-global */
20703 			ubd->ubd_lba = BE_64(SD_BYTES2TGTBLOCKS(un,
20704 			    dfl->dfl_offset + ext_start + part_off));
20705 			ubd->ubd_lba_cnt = BE_32(SD_BYTES2TGTBLOCKS(un, len));
20706 
20707 			descr_issued++;
20708 			bytes_issued += len;
20709 
20710 			/* Issue command when device limits reached */
20711 			if (descr_issued == descr_cnt_lim ||
20712 			    bytes_issued == byte_cnt_lim) {
20713 				rval = sd_send_scsi_UNMAP_issue_one(ssc, uph,
20714 				    descr_issued, bytes_issued);
20715 				if (rval != 0)
20716 					goto out;
20717 				descr_issued = 0;
20718 				bytes_issued = 0;
20719 			}
20720 
20721 			ext_start += len;
20722 			ext_length -= len;
20723 		}
20724 	}
20725 
20726 	if (descr_issued > 0) {
20727 		/* issue last command */
20728 		rval = sd_send_scsi_UNMAP_issue_one(ssc, uph, descr_issued,
20729 		    bytes_issued);
20730 	}
20731 
20732 out:
20733 	kmem_free(uph, SD_UNMAP_PARAM_LIST_MAXSZ);
20734 	return (rval);
20735 }
20736 
20737 /*
20738  * Issues one or several UNMAP commands based on a list of extents to be
20739  * unmapped. The internal multi-command processing is hidden, as the exact
20740  * number of commands and extents per command is limited by both SCSI
20741  * command syntax and device limits (as expressed in the SCSI Block Limits
20742  * VPD page and un_blk_lim in struct sd_lun).
20743  * Returns zero on success, or the error code of the first failed SCSI UNMAP
20744  * command.
20745  */
20746 static int
20747 sd_send_scsi_UNMAP(dev_t dev, sd_ssc_t *ssc, dkioc_free_list_t *dfl, int flag)
20748 {
20749 	struct sd_lun		*un = ssc->ssc_un;
20750 	int			rval = 0;
20751 
20752 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20753 	ASSERT(dfl != NULL);
20754 
20755 	/* Per spec, any of these conditions signals lack of UNMAP support. */
20756 	if (!(un->un_thin_flags & SD_THIN_PROV_ENABLED) ||
20757 	    un->un_blk_lim.lim_max_unmap_descr_cnt == 0 ||
20758 	    un->un_blk_lim.lim_max_unmap_lba_cnt == 0) {
20759 		return (SET_ERROR(ENOTSUP));
20760 	}
20761 
20762 	/* For userspace calls we must copy in. */
20763 	if (!(flag & FKIOCTL)) {
20764 		int err = dfl_copyin(dfl, &dfl, flag, KM_SLEEP);
20765 		if (err != 0)
20766 			return (err);
20767 	} else if (dfl->dfl_num_exts > DFL_COPYIN_MAX_EXTS) {
20768 		ASSERT3U(dfl->dfl_num_exts, <=, DFL_COPYIN_MAX_EXTS);
20769 		return (SET_ERROR(EINVAL));
20770 	}
20771 
20772 	rval = sd_send_scsi_UNMAP_issue(dev, ssc, dfl);
20773 
20774 	if (!(flag & FKIOCTL)) {
20775 		dfl_free(dfl);
20776 		dfl = NULL;
20777 	}
20778 
20779 	return (rval);
20780 }
20781 
20782 /*
20783  *    Function: sd_send_scsi_GET_CONFIGURATION
20784  *
20785  * Description: Issues the get configuration command to the device.
20786  *		Called from sd_check_for_writable_cd & sd_get_media_info
20787  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20788  *   Arguments: ssc
20789  *		ucmdbuf
20790  *		rqbuf
20791  *		rqbuflen
20792  *		bufaddr
20793  *		buflen
20794  *		path_flag
20795  *
20796  * Return Code: 0   - Success
20797  *		errno return code from sd_ssc_send()
20798  *
20799  *     Context: Can sleep. Does not return until command is completed.
20800  *
20801  */
20802 
20803 static int
20804 sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc, struct uscsi_cmd *ucmdbuf,
20805     uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
20806     int path_flag)
20807 {
20808 	char	cdb[CDB_GROUP1];
20809 	int	status;
20810 	struct sd_lun	*un;
20811 
20812 	ASSERT(ssc != NULL);
20813 	un = ssc->ssc_un;
20814 	ASSERT(un != NULL);
20815 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20816 	ASSERT(bufaddr != NULL);
20817 	ASSERT(ucmdbuf != NULL);
20818 	ASSERT(rqbuf != NULL);
20819 
20820 	SD_TRACE(SD_LOG_IO, un,
20821 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20822 
20823 	bzero(cdb, sizeof (cdb));
20824 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20825 	bzero(rqbuf, rqbuflen);
20826 	bzero(bufaddr, buflen);
20827 
20828 	/*
20829 	 * Set up cdb field for the get configuration command.
20830 	 */
20831 	cdb[0] = SCMD_GET_CONFIGURATION;
20832 	cdb[1] = 0x02;  /* Requested Type */
20833 	cdb[8] = SD_PROFILE_HEADER_LEN;
20834 	ucmdbuf->uscsi_cdb = cdb;
20835 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20836 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20837 	ucmdbuf->uscsi_buflen = buflen;
20838 	ucmdbuf->uscsi_timeout = sd_io_time;
20839 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20840 	ucmdbuf->uscsi_rqlen = rqbuflen;
20841 	ucmdbuf->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT | USCSI_READ;
20842 
20843 	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
20844 	    UIO_SYSSPACE, path_flag);
20845 
20846 	switch (status) {
20847 	case 0:
20848 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20849 		break;  /* Success! */
20850 	case EIO:
20851 		switch (ucmdbuf->uscsi_status) {
20852 		case STATUS_RESERVATION_CONFLICT:
20853 			status = EACCES;
20854 			break;
20855 		default:
20856 			break;
20857 		}
20858 		break;
20859 	default:
20860 		break;
20861 	}
20862 
20863 	if (status == 0) {
20864 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20865 		    "sd_send_scsi_GET_CONFIGURATION: data",
20866 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20867 	}
20868 
20869 	SD_TRACE(SD_LOG_IO, un,
20870 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20871 
20872 	return (status);
20873 }
20874 
20875 /*
20876  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20877  *
20878  * Description: Issues the get configuration command to the device to
20879  *              retrieve a specific feature. Called from
20880  *		sd_check_for_writable_cd & sd_set_mmc_caps.
20881  *   Arguments: ssc
20882  *              ucmdbuf
20883  *              rqbuf
20884  *              rqbuflen
20885  *              bufaddr
20886  *              buflen
20887  *		feature
20888  *
20889  * Return Code: 0   - Success
20890  *              errno return code from sd_ssc_send()
20891  *
20892  *     Context: Can sleep. Does not return until command is completed.
20893  *
20894  */
20895 static int
20896 sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc, struct uscsi_cmd *ucmdbuf,
20897     uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
20898     char feature, int path_flag)
20899 {
20900 	char    cdb[CDB_GROUP1];
20901 	int	status;
20902 	struct sd_lun	*un;
20903 
20904 	ASSERT(ssc != NULL);
20905 	un = ssc->ssc_un;
20906 	ASSERT(un != NULL);
20907 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20908 	ASSERT(bufaddr != NULL);
20909 	ASSERT(ucmdbuf != NULL);
20910 	ASSERT(rqbuf != NULL);
20911 
20912 	SD_TRACE(SD_LOG_IO, un,
20913 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20914 
20915 	bzero(cdb, sizeof (cdb));
20916 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20917 	bzero(rqbuf, rqbuflen);
20918 	bzero(bufaddr, buflen);
20919 
20920 	/*
20921 	 * Set up cdb field for the get configuration command.
20922 	 */
20923 	cdb[0] = SCMD_GET_CONFIGURATION;
20924 	cdb[1] = 0x02;  /* Requested Type */
20925 	cdb[3] = feature;
20926 	cdb[8] = buflen;
20927 	ucmdbuf->uscsi_cdb = cdb;
20928 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20929 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20930 	ucmdbuf->uscsi_buflen = buflen;
20931 	ucmdbuf->uscsi_timeout = sd_io_time;
20932 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20933 	ucmdbuf->uscsi_rqlen = rqbuflen;
20934 	ucmdbuf->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT | USCSI_READ;
20935 
20936 	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
20937 	    UIO_SYSSPACE, path_flag);
20938 
20939 	switch (status) {
20940 	case 0:
20941 
20942 		break;  /* Success! */
20943 	case EIO:
20944 		switch (ucmdbuf->uscsi_status) {
20945 		case STATUS_RESERVATION_CONFLICT:
20946 			status = EACCES;
20947 			break;
20948 		default:
20949 			break;
20950 		}
20951 		break;
20952 	default:
20953 		break;
20954 	}
20955 
20956 	if (status == 0) {
20957 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20958 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20959 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20960 	}
20961 
20962 	SD_TRACE(SD_LOG_IO, un,
20963 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20964 
20965 	return (status);
20966 }
20967 
20968 
20969 /*
20970  *    Function: sd_send_scsi_MODE_SENSE
20971  *
20972  * Description: Utility function for issuing a scsi MODE SENSE command.
20973  *		Note: This routine uses a consistent implementation for Group0,
20974  *		Group1, and Group2 commands across all platforms. ATAPI devices
20975  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20976  *
20977  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20978  *                      structure for this target.
20979  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20980  *			  CDB_GROUP[1|2] (10 byte).
20981  *		bufaddr - buffer for page data retrieved from the target.
20982  *		buflen - size of page to be retrieved.
20983  *		page_code - page code of data to be retrieved from the target.
20984  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20985  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20986  *			to use the USCSI "direct" chain and bypass the normal
20987  *			command waitq.
20988  *
20989  * Return Code: 0   - Success
20990  *		errno return code from sd_ssc_send()
20991  *
20992  *     Context: Can sleep. Does not return until command is completed.
20993  */
20994 
20995 static int
20996 sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
20997     size_t buflen,  uchar_t page_code, int path_flag)
20998 {
20999 	struct	scsi_extended_sense	sense_buf;
21000 	union scsi_cdb		cdb;
21001 	struct uscsi_cmd	ucmd_buf;
21002 	int			status;
21003 	int			headlen;
21004 	struct sd_lun		*un;
21005 
21006 	ASSERT(ssc != NULL);
21007 	un = ssc->ssc_un;
21008 	ASSERT(un != NULL);
21009 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21010 	ASSERT(bufaddr != NULL);
21011 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
21012 	    (cdbsize == CDB_GROUP2));
21013 
21014 	SD_TRACE(SD_LOG_IO, un,
21015 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
21016 
21017 	bzero(&cdb, sizeof (cdb));
21018 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21019 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21020 	bzero(bufaddr, buflen);
21021 
21022 	if (cdbsize == CDB_GROUP0) {
21023 		cdb.scc_cmd = SCMD_MODE_SENSE;
21024 		cdb.cdb_opaque[2] = page_code;
21025 		FORMG0COUNT(&cdb, buflen);
21026 		headlen = MODE_HEADER_LENGTH;
21027 	} else {
21028 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
21029 		cdb.cdb_opaque[2] = page_code;
21030 		FORMG1COUNT(&cdb, buflen);
21031 		headlen = MODE_HEADER_LENGTH_GRP2;
21032 	}
21033 
21034 	ASSERT(headlen <= buflen);
21035 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21036 
21037 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21038 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21039 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21040 	ucmd_buf.uscsi_buflen	= buflen;
21041 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21042 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21043 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21044 	ucmd_buf.uscsi_timeout	= 60;
21045 
21046 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21047 	    UIO_SYSSPACE, path_flag);
21048 
21049 	switch (status) {
21050 	case 0:
21051 		/*
21052 		 * sr_check_wp() uses 0x3f page code and check the header of
21053 		 * mode page to determine if target device is write-protected.
21054 		 * But some USB devices return 0 bytes for 0x3f page code. For
21055 		 * this case, make sure that mode page header is returned at
21056 		 * least.
21057 		 */
21058 		if (buflen - ucmd_buf.uscsi_resid <  headlen) {
21059 			status = EIO;
21060 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
21061 			    "mode page header is not returned");
21062 		}
21063 		break;	/* Success! */
21064 	case EIO:
21065 		switch (ucmd_buf.uscsi_status) {
21066 		case STATUS_RESERVATION_CONFLICT:
21067 			status = EACCES;
21068 			break;
21069 		default:
21070 			break;
21071 		}
21072 		break;
21073 	default:
21074 		break;
21075 	}
21076 
21077 	if (status == 0) {
21078 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
21079 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21080 	}
21081 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
21082 
21083 	return (status);
21084 }
21085 
21086 
21087 /*
21088  *    Function: sd_send_scsi_MODE_SELECT
21089  *
21090  * Description: Utility function for issuing a scsi MODE SELECT command.
21091  *		Note: This routine uses a consistent implementation for Group0,
21092  *		Group1, and Group2 commands across all platforms. ATAPI devices
21093  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
21094  *
21095  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21096  *                      structure for this target.
21097  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
21098  *			  CDB_GROUP[1|2] (10 byte).
21099  *		bufaddr - buffer for page data retrieved from the target.
21100  *		buflen - size of page to be retrieved.
21101  *		save_page - boolean to determin if SP bit should be set.
21102  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21103  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21104  *			to use the USCSI "direct" chain and bypass the normal
21105  *			command waitq.
21106  *
21107  * Return Code: 0   - Success
21108  *		errno return code from sd_ssc_send()
21109  *
21110  *     Context: Can sleep. Does not return until command is completed.
21111  */
21112 
21113 static int
21114 sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
21115     size_t buflen,  uchar_t save_page, int path_flag)
21116 {
21117 	struct	scsi_extended_sense	sense_buf;
21118 	union scsi_cdb		cdb;
21119 	struct uscsi_cmd	ucmd_buf;
21120 	int			status;
21121 	struct sd_lun		*un;
21122 
21123 	ASSERT(ssc != NULL);
21124 	un = ssc->ssc_un;
21125 	ASSERT(un != NULL);
21126 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21127 	ASSERT(bufaddr != NULL);
21128 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
21129 	    (cdbsize == CDB_GROUP2));
21130 
21131 	SD_TRACE(SD_LOG_IO, un,
21132 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
21133 
21134 	bzero(&cdb, sizeof (cdb));
21135 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21136 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21137 
21138 	/* Set the PF bit for many third party drives */
21139 	cdb.cdb_opaque[1] = 0x10;
21140 
21141 	/* Set the savepage(SP) bit if given */
21142 	if (save_page == SD_SAVE_PAGE) {
21143 		cdb.cdb_opaque[1] |= 0x01;
21144 	}
21145 
21146 	if (cdbsize == CDB_GROUP0) {
21147 		cdb.scc_cmd = SCMD_MODE_SELECT;
21148 		FORMG0COUNT(&cdb, buflen);
21149 	} else {
21150 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
21151 		FORMG1COUNT(&cdb, buflen);
21152 	}
21153 
21154 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21155 
21156 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21157 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21158 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21159 	ucmd_buf.uscsi_buflen	= buflen;
21160 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21161 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21162 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
21163 	ucmd_buf.uscsi_timeout	= 60;
21164 
21165 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21166 	    UIO_SYSSPACE, path_flag);
21167 
21168 	switch (status) {
21169 	case 0:
21170 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21171 		break;	/* Success! */
21172 	case EIO:
21173 		switch (ucmd_buf.uscsi_status) {
21174 		case STATUS_RESERVATION_CONFLICT:
21175 			status = EACCES;
21176 			break;
21177 		default:
21178 			break;
21179 		}
21180 		break;
21181 	default:
21182 		break;
21183 	}
21184 
21185 	if (status == 0) {
21186 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
21187 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21188 	}
21189 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
21190 
21191 	return (status);
21192 }
21193 
21194 
21195 /*
21196  *    Function: sd_send_scsi_RDWR
21197  *
21198  * Description: Issue a scsi READ or WRITE command with the given parameters.
21199  *
21200  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21201  *                      structure for this target.
21202  *		cmd:	 SCMD_READ or SCMD_WRITE
21203  *		bufaddr: Address of caller's buffer to receive the RDWR data
21204  *		buflen:  Length of caller's buffer receive the RDWR data.
21205  *		start_block: Block number for the start of the RDWR operation.
21206  *			 (Assumes target-native block size.)
21207  *		residp:  Pointer to variable to receive the redisual of the
21208  *			 RDWR operation (may be NULL of no residual requested).
21209  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21210  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21211  *			to use the USCSI "direct" chain and bypass the normal
21212  *			command waitq.
21213  *
21214  * Return Code: 0   - Success
21215  *		errno return code from sd_ssc_send()
21216  *
21217  *     Context: Can sleep. Does not return until command is completed.
21218  */
21219 
21220 static int
21221 sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
21222     size_t buflen, daddr_t start_block, int path_flag)
21223 {
21224 	struct	scsi_extended_sense	sense_buf;
21225 	union scsi_cdb		cdb;
21226 	struct uscsi_cmd	ucmd_buf;
21227 	uint32_t		block_count;
21228 	int			status;
21229 	int			cdbsize;
21230 	uchar_t			flag;
21231 	struct sd_lun		*un;
21232 
21233 	ASSERT(ssc != NULL);
21234 	un = ssc->ssc_un;
21235 	ASSERT(un != NULL);
21236 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21237 	ASSERT(bufaddr != NULL);
21238 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
21239 
21240 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
21241 
21242 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
21243 		return (EINVAL);
21244 	}
21245 
21246 	mutex_enter(SD_MUTEX(un));
21247 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
21248 	mutex_exit(SD_MUTEX(un));
21249 
21250 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
21251 
21252 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
21253 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
21254 	    bufaddr, buflen, start_block, block_count);
21255 
21256 	bzero(&cdb, sizeof (cdb));
21257 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21258 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21259 
21260 	/* Compute CDB size to use */
21261 	if (start_block > 0xffffffff)
21262 		cdbsize = CDB_GROUP4;
21263 	else if ((start_block & 0xFFE00000) ||
21264 	    (un->un_f_cfg_is_atapi == TRUE))
21265 		cdbsize = CDB_GROUP1;
21266 	else
21267 		cdbsize = CDB_GROUP0;
21268 
21269 	switch (cdbsize) {
21270 	case CDB_GROUP0:	/* 6-byte CDBs */
21271 		cdb.scc_cmd = cmd;
21272 		FORMG0ADDR(&cdb, start_block);
21273 		FORMG0COUNT(&cdb, block_count);
21274 		break;
21275 	case CDB_GROUP1:	/* 10-byte CDBs */
21276 		cdb.scc_cmd = cmd | SCMD_GROUP1;
21277 		FORMG1ADDR(&cdb, start_block);
21278 		FORMG1COUNT(&cdb, block_count);
21279 		break;
21280 	case CDB_GROUP4:	/* 16-byte CDBs */
21281 		cdb.scc_cmd = cmd | SCMD_GROUP4;
21282 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
21283 		FORMG4COUNT(&cdb, block_count);
21284 		break;
21285 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
21286 	default:
21287 		/* All others reserved */
21288 		return (EINVAL);
21289 	}
21290 
21291 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
21292 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21293 
21294 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21295 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21296 	ucmd_buf.uscsi_bufaddr	= bufaddr;
21297 	ucmd_buf.uscsi_buflen	= buflen;
21298 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21299 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21300 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
21301 	ucmd_buf.uscsi_timeout	= 60;
21302 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21303 	    UIO_SYSSPACE, path_flag);
21304 
21305 	switch (status) {
21306 	case 0:
21307 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21308 		break;	/* Success! */
21309 	case EIO:
21310 		switch (ucmd_buf.uscsi_status) {
21311 		case STATUS_RESERVATION_CONFLICT:
21312 			status = EACCES;
21313 			break;
21314 		default:
21315 			break;
21316 		}
21317 		break;
21318 	default:
21319 		break;
21320 	}
21321 
21322 	if (status == 0) {
21323 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
21324 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21325 	}
21326 
21327 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
21328 
21329 	return (status);
21330 }
21331 
21332 
21333 /*
21334  *    Function: sd_send_scsi_LOG_SENSE
21335  *
21336  * Description: Issue a scsi LOG_SENSE command with the given parameters.
21337  *
21338  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21339  *                      structure for this target.
21340  *
21341  * Return Code: 0   - Success
21342  *		errno return code from sd_ssc_send()
21343  *
21344  *     Context: Can sleep. Does not return until command is completed.
21345  */
21346 
21347 static int
21348 sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr, uint16_t buflen,
21349     uchar_t page_code, uchar_t page_control, uint16_t param_ptr, int path_flag)
21350 {
21351 	struct scsi_extended_sense	sense_buf;
21352 	union scsi_cdb		cdb;
21353 	struct uscsi_cmd	ucmd_buf;
21354 	int			status;
21355 	struct sd_lun		*un;
21356 
21357 	ASSERT(ssc != NULL);
21358 	un = ssc->ssc_un;
21359 	ASSERT(un != NULL);
21360 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21361 
21362 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
21363 
21364 	bzero(&cdb, sizeof (cdb));
21365 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21366 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21367 
21368 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
21369 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
21370 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
21371 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
21372 	FORMG1COUNT(&cdb, buflen);
21373 
21374 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21375 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21376 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21377 	ucmd_buf.uscsi_buflen	= buflen;
21378 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21379 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21380 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21381 	ucmd_buf.uscsi_timeout	= 60;
21382 
21383 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21384 	    UIO_SYSSPACE, path_flag);
21385 
21386 	switch (status) {
21387 	case 0:
21388 		break;
21389 	case EIO:
21390 		switch (ucmd_buf.uscsi_status) {
21391 		case STATUS_RESERVATION_CONFLICT:
21392 			status = EACCES;
21393 			break;
21394 		case STATUS_CHECK:
21395 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
21396 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
21397 			    KEY_ILLEGAL_REQUEST) &&
21398 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
21399 				/*
21400 				 * ASC 0x24: INVALID FIELD IN CDB
21401 				 */
21402 				switch (page_code) {
21403 				case START_STOP_CYCLE_PAGE:
21404 					/*
21405 					 * The start stop cycle counter is
21406 					 * implemented as page 0x31 in earlier
21407 					 * generation disks. In new generation
21408 					 * disks the start stop cycle counter is
21409 					 * implemented as page 0xE. To properly
21410 					 * handle this case if an attempt for
21411 					 * log page 0xE is made and fails we
21412 					 * will try again using page 0x31.
21413 					 *
21414 					 * Network storage BU committed to
21415 					 * maintain the page 0x31 for this
21416 					 * purpose and will not have any other
21417 					 * page implemented with page code 0x31
21418 					 * until all disks transition to the
21419 					 * standard page.
21420 					 */
21421 					mutex_enter(SD_MUTEX(un));
21422 					un->un_start_stop_cycle_page =
21423 					    START_STOP_CYCLE_VU_PAGE;
21424 					cdb.cdb_opaque[2] =
21425 					    (char)(page_control << 6) |
21426 					    un->un_start_stop_cycle_page;
21427 					mutex_exit(SD_MUTEX(un));
21428 					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
21429 					status = sd_ssc_send(
21430 					    ssc, &ucmd_buf, FKIOCTL,
21431 					    UIO_SYSSPACE, path_flag);
21432 
21433 					break;
21434 				case TEMPERATURE_PAGE:
21435 					status = ENOTTY;
21436 					break;
21437 				default:
21438 					break;
21439 				}
21440 			}
21441 			break;
21442 		default:
21443 			break;
21444 		}
21445 		break;
21446 	default:
21447 		break;
21448 	}
21449 
21450 	if (status == 0) {
21451 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21452 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
21453 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21454 	}
21455 
21456 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
21457 
21458 	return (status);
21459 }
21460 
21461 
21462 /*
21463  *    Function: sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION
21464  *
21465  * Description: Issue the scsi GET EVENT STATUS NOTIFICATION command.
21466  *
21467  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21468  *                      structure for this target.
21469  *		bufaddr
21470  *		buflen
21471  *		class_req
21472  *
21473  * Return Code: 0   - Success
21474  *		errno return code from sd_ssc_send()
21475  *
21476  *     Context: Can sleep. Does not return until command is completed.
21477  */
21478 
21479 static int
21480 sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(sd_ssc_t *ssc, uchar_t *bufaddr,
21481     size_t buflen, uchar_t class_req)
21482 {
21483 	union scsi_cdb		cdb;
21484 	struct uscsi_cmd	ucmd_buf;
21485 	int			status;
21486 	struct sd_lun		*un;
21487 
21488 	ASSERT(ssc != NULL);
21489 	un = ssc->ssc_un;
21490 	ASSERT(un != NULL);
21491 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21492 	ASSERT(bufaddr != NULL);
21493 
21494 	SD_TRACE(SD_LOG_IO, un,
21495 	    "sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION: entry: un:0x%p\n", un);
21496 
21497 	bzero(&cdb, sizeof (cdb));
21498 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21499 	bzero(bufaddr, buflen);
21500 
21501 	cdb.scc_cmd = SCMD_GET_EVENT_STATUS_NOTIFICATION;
21502 	cdb.cdb_opaque[1] = 1; /* polled */
21503 	cdb.cdb_opaque[4] = class_req;
21504 	FORMG1COUNT(&cdb, buflen);
21505 
21506 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21507 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21508 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21509 	ucmd_buf.uscsi_buflen	= buflen;
21510 	ucmd_buf.uscsi_rqbuf	= NULL;
21511 	ucmd_buf.uscsi_rqlen	= 0;
21512 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
21513 	ucmd_buf.uscsi_timeout	= 60;
21514 
21515 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21516 	    UIO_SYSSPACE, SD_PATH_DIRECT);
21517 
21518 	/*
21519 	 * Only handle status == 0, the upper-level caller
21520 	 * will put different assessment based on the context.
21521 	 */
21522 	if (status == 0) {
21523 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21524 
21525 		if (ucmd_buf.uscsi_resid != 0) {
21526 			status = EIO;
21527 		}
21528 	}
21529 
21530 	SD_TRACE(SD_LOG_IO, un,
21531 	    "sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION: exit\n");
21532 
21533 	return (status);
21534 }
21535 
21536 
21537 static boolean_t
21538 sd_gesn_media_data_valid(uchar_t *data)
21539 {
21540 	uint16_t			len;
21541 
21542 	len = (data[1] << 8) | data[0];
21543 	return ((len >= 6) &&
21544 	    ((data[2] & SD_GESN_HEADER_NEA) == 0) &&
21545 	    ((data[2] & SD_GESN_HEADER_CLASS) == SD_GESN_MEDIA_CLASS) &&
21546 	    ((data[3] & (1 << SD_GESN_MEDIA_CLASS)) != 0));
21547 }
21548 
21549 
21550 /*
21551  *    Function: sdioctl
21552  *
21553  * Description: Driver's ioctl(9e) entry point function.
21554  *
21555  *   Arguments: dev     - device number
21556  *		cmd     - ioctl operation to be performed
21557  *		arg     - user argument, contains data to be set or reference
21558  *			  parameter for get
21559  *		flag    - bit flag, indicating open settings, 32/64 bit type
21560  *		cred_p  - user credential pointer
21561  *		rval_p  - calling process return value (OPT)
21562  *
21563  * Return Code: EINVAL
21564  *		ENOTTY
21565  *		ENXIO
21566  *		EIO
21567  *		EFAULT
21568  *		ENOTSUP
21569  *		EPERM
21570  *
21571  *     Context: Called from the device switch at normal priority.
21572  */
21573 
21574 static int
21575 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
21576 {
21577 	struct sd_lun	*un = NULL;
21578 	int		err = 0;
21579 	int		i = 0;
21580 	cred_t		*cr;
21581 	int		tmprval = EINVAL;
21582 	boolean_t	is_valid;
21583 	sd_ssc_t	*ssc;
21584 
21585 	/*
21586 	 * All device accesses go thru sdstrategy where we check on suspend
21587 	 * status
21588 	 */
21589 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21590 		return (ENXIO);
21591 	}
21592 
21593 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21594 
21595 	/* Initialize sd_ssc_t for internal uscsi commands */
21596 	ssc = sd_ssc_init(un);
21597 
21598 	is_valid = SD_IS_VALID_LABEL(un);
21599 
21600 	/*
21601 	 * Moved this wait from sd_uscsi_strategy to here for
21602 	 * reasons of deadlock prevention. Internal driver commands,
21603 	 * specifically those to change a devices power level, result
21604 	 * in a call to sd_uscsi_strategy.
21605 	 */
21606 	mutex_enter(SD_MUTEX(un));
21607 	while ((un->un_state == SD_STATE_SUSPENDED) ||
21608 	    (un->un_state == SD_STATE_PM_CHANGING)) {
21609 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
21610 	}
21611 	/*
21612 	 * Twiddling the counter here protects commands from now
21613 	 * through to the top of sd_uscsi_strategy. Without the
21614 	 * counter inc. a power down, for example, could get in
21615 	 * after the above check for state is made and before
21616 	 * execution gets to the top of sd_uscsi_strategy.
21617 	 * That would cause problems.
21618 	 */
21619 	un->un_ncmds_in_driver++;
21620 
21621 	if (!is_valid &&
21622 	    (flag & (FNDELAY | FNONBLOCK))) {
21623 		switch (cmd) {
21624 		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
21625 		case DKIOCGVTOC:
21626 		case DKIOCGEXTVTOC:
21627 		case DKIOCGAPART:
21628 		case DKIOCPARTINFO:
21629 		case DKIOCEXTPARTINFO:
21630 		case DKIOCSGEOM:
21631 		case DKIOCSAPART:
21632 		case DKIOCGETEFI:
21633 		case DKIOCPARTITION:
21634 		case DKIOCSVTOC:
21635 		case DKIOCSEXTVTOC:
21636 		case DKIOCSETEFI:
21637 		case DKIOCGMBOOT:
21638 		case DKIOCSMBOOT:
21639 		case DKIOCG_PHYGEOM:
21640 		case DKIOCG_VIRTGEOM:
21641 #if defined(__x86)
21642 		case DKIOCSETEXTPART:
21643 #endif
21644 			/* let cmlb handle it */
21645 			goto skip_ready_valid;
21646 
21647 		case CDROMPAUSE:
21648 		case CDROMRESUME:
21649 		case CDROMPLAYMSF:
21650 		case CDROMPLAYTRKIND:
21651 		case CDROMREADTOCHDR:
21652 		case CDROMREADTOCENTRY:
21653 		case CDROMSTOP:
21654 		case CDROMSTART:
21655 		case CDROMVOLCTRL:
21656 		case CDROMSUBCHNL:
21657 		case CDROMREADMODE2:
21658 		case CDROMREADMODE1:
21659 		case CDROMREADOFFSET:
21660 		case CDROMSBLKMODE:
21661 		case CDROMGBLKMODE:
21662 		case CDROMGDRVSPEED:
21663 		case CDROMSDRVSPEED:
21664 		case CDROMCDDA:
21665 		case CDROMCDXA:
21666 		case CDROMSUBCODE:
21667 			if (!ISCD(un)) {
21668 				un->un_ncmds_in_driver--;
21669 				ASSERT(un->un_ncmds_in_driver >= 0);
21670 				mutex_exit(SD_MUTEX(un));
21671 				err = ENOTTY;
21672 				goto done_without_assess;
21673 			}
21674 			break;
21675 		case FDEJECT:
21676 		case DKIOCEJECT:
21677 		case CDROMEJECT:
21678 			if (!un->un_f_eject_media_supported) {
21679 				un->un_ncmds_in_driver--;
21680 				ASSERT(un->un_ncmds_in_driver >= 0);
21681 				mutex_exit(SD_MUTEX(un));
21682 				err = ENOTTY;
21683 				goto done_without_assess;
21684 			}
21685 			break;
21686 		case DKIOCFLUSHWRITECACHE:
21687 			mutex_exit(SD_MUTEX(un));
21688 			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
21689 			if (err != 0) {
21690 				mutex_enter(SD_MUTEX(un));
21691 				un->un_ncmds_in_driver--;
21692 				ASSERT(un->un_ncmds_in_driver >= 0);
21693 				mutex_exit(SD_MUTEX(un));
21694 				err = EIO;
21695 				goto done_quick_assess;
21696 			}
21697 			mutex_enter(SD_MUTEX(un));
21698 			/* FALLTHROUGH */
21699 		case DKIOCREMOVABLE:
21700 		case DKIOCHOTPLUGGABLE:
21701 		case DKIOCINFO:
21702 		case DKIOCGMEDIAINFO:
21703 		case DKIOCGMEDIAINFOEXT:
21704 		case DKIOCSOLIDSTATE:
21705 		case DKIOC_CANFREE:
21706 		case MHIOCENFAILFAST:
21707 		case MHIOCSTATUS:
21708 		case MHIOCTKOWN:
21709 		case MHIOCRELEASE:
21710 		case MHIOCGRP_INKEYS:
21711 		case MHIOCGRP_INRESV:
21712 		case MHIOCGRP_REGISTER:
21713 		case MHIOCGRP_CLEAR:
21714 		case MHIOCGRP_RESERVE:
21715 		case MHIOCGRP_PREEMPTANDABORT:
21716 		case MHIOCGRP_REGISTERANDIGNOREKEY:
21717 		case CDROMCLOSETRAY:
21718 		case USCSICMD:
21719 		case USCSIMAXXFER:
21720 			goto skip_ready_valid;
21721 		default:
21722 			break;
21723 		}
21724 
21725 		mutex_exit(SD_MUTEX(un));
21726 		err = sd_ready_and_valid(ssc, SDPART(dev));
21727 		mutex_enter(SD_MUTEX(un));
21728 
21729 		if (err != SD_READY_VALID) {
21730 			switch (cmd) {
21731 			case DKIOCSTATE:
21732 			case CDROMGDRVSPEED:
21733 			case CDROMSDRVSPEED:
21734 			case FDEJECT:	/* for eject command */
21735 			case DKIOCEJECT:
21736 			case CDROMEJECT:
21737 			case DKIOCREMOVABLE:
21738 			case DKIOCHOTPLUGGABLE:
21739 				break;
21740 			default:
21741 				if (un->un_f_has_removable_media) {
21742 					err = ENXIO;
21743 				} else {
21744 				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
21745 					if (err == SD_RESERVED_BY_OTHERS) {
21746 						err = EACCES;
21747 					} else {
21748 						err = EIO;
21749 					}
21750 				}
21751 				un->un_ncmds_in_driver--;
21752 				ASSERT(un->un_ncmds_in_driver >= 0);
21753 				mutex_exit(SD_MUTEX(un));
21754 
21755 				goto done_without_assess;
21756 			}
21757 		}
21758 	}
21759 
21760 skip_ready_valid:
21761 	mutex_exit(SD_MUTEX(un));
21762 
21763 	switch (cmd) {
21764 	case DKIOCINFO:
21765 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
21766 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
21767 		break;
21768 
21769 	case DKIOCGMEDIAINFO:
21770 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
21771 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
21772 		break;
21773 
21774 	case DKIOCGMEDIAINFOEXT:
21775 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFOEXT\n");
21776 		err = sd_get_media_info_ext(dev, (caddr_t)arg, flag);
21777 		break;
21778 
21779 	case DKIOCGGEOM:
21780 	case DKIOCGVTOC:
21781 	case DKIOCGEXTVTOC:
21782 	case DKIOCGAPART:
21783 	case DKIOCPARTINFO:
21784 	case DKIOCEXTPARTINFO:
21785 	case DKIOCSGEOM:
21786 	case DKIOCSAPART:
21787 	case DKIOCGETEFI:
21788 	case DKIOCPARTITION:
21789 	case DKIOCSVTOC:
21790 	case DKIOCSEXTVTOC:
21791 	case DKIOCSETEFI:
21792 	case DKIOCGMBOOT:
21793 	case DKIOCSMBOOT:
21794 	case DKIOCG_PHYGEOM:
21795 	case DKIOCG_VIRTGEOM:
21796 #if defined(__x86)
21797 	case DKIOCSETEXTPART:
21798 #endif
21799 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
21800 
21801 		/* TUR should spin up */
21802 
21803 		if (un->un_f_has_removable_media)
21804 			err = sd_send_scsi_TEST_UNIT_READY(ssc,
21805 			    SD_CHECK_FOR_MEDIA);
21806 
21807 		else
21808 			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
21809 
21810 		if (err != 0)
21811 			goto done_with_assess;
21812 
21813 		err = cmlb_ioctl(un->un_cmlbhandle, dev,
21814 		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
21815 
21816 		if ((err == 0) &&
21817 		    ((cmd == DKIOCSETEFI) ||
21818 		    ((un->un_f_pkstats_enabled) &&
21819 		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC ||
21820 		    cmd == DKIOCSEXTVTOC)))) {
21821 
21822 			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
21823 			    (void *)SD_PATH_DIRECT);
21824 			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
21825 				sd_set_pstats(un);
21826 				SD_TRACE(SD_LOG_IO_PARTITION, un,
21827 				    "sd_ioctl: un:0x%p pstats created and "
21828 				    "set\n", un);
21829 			}
21830 		}
21831 
21832 		if ((cmd == DKIOCSVTOC || cmd == DKIOCSEXTVTOC) ||
21833 		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
21834 
21835 			mutex_enter(SD_MUTEX(un));
21836 			if (un->un_f_devid_supported &&
21837 			    (un->un_f_opt_fab_devid == TRUE)) {
21838 				if (un->un_devid == NULL) {
21839 					sd_register_devid(ssc, SD_DEVINFO(un),
21840 					    SD_TARGET_IS_UNRESERVED);
21841 				} else {
21842 					/*
21843 					 * The device id for this disk
21844 					 * has been fabricated. The
21845 					 * device id must be preserved
21846 					 * by writing it back out to
21847 					 * disk.
21848 					 */
21849 					if (sd_write_deviceid(ssc) != 0) {
21850 						ddi_devid_free(un->un_devid);
21851 						un->un_devid = NULL;
21852 					}
21853 				}
21854 			}
21855 			mutex_exit(SD_MUTEX(un));
21856 		}
21857 
21858 		break;
21859 
21860 	case DKIOCLOCK:
21861 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21862 		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
21863 		    SD_PATH_STANDARD);
21864 		goto done_with_assess;
21865 
21866 	case DKIOCUNLOCK:
21867 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21868 		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
21869 		    SD_PATH_STANDARD);
21870 		goto done_with_assess;
21871 
21872 	case DKIOCSTATE: {
21873 		enum dkio_state		state;
21874 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21875 
21876 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21877 			err = EFAULT;
21878 		} else {
21879 			err = sd_check_media(dev, state);
21880 			if (err == 0) {
21881 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21882 				    sizeof (int), flag) != 0)
21883 					err = EFAULT;
21884 			}
21885 		}
21886 		break;
21887 	}
21888 
21889 	case DKIOCREMOVABLE:
21890 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21891 		i = un->un_f_has_removable_media ? 1 : 0;
21892 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21893 			err = EFAULT;
21894 		} else {
21895 			err = 0;
21896 		}
21897 		break;
21898 
21899 	case DKIOCSOLIDSTATE:
21900 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSOLIDSTATE\n");
21901 		i = un->un_f_is_solid_state ? 1 : 0;
21902 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21903 			err = EFAULT;
21904 		} else {
21905 			err = 0;
21906 		}
21907 		break;
21908 
21909 	case DKIOCHOTPLUGGABLE:
21910 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
21911 		i = un->un_f_is_hotpluggable ? 1 : 0;
21912 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21913 			err = EFAULT;
21914 		} else {
21915 			err = 0;
21916 		}
21917 		break;
21918 
21919 	case DKIOCREADONLY:
21920 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREADONLY\n");
21921 		i = 0;
21922 		if ((ISCD(un) && !un->un_f_mmc_writable_media) ||
21923 		    (sr_check_wp(dev) != 0)) {
21924 			i = 1;
21925 		}
21926 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21927 			err = EFAULT;
21928 		} else {
21929 			err = 0;
21930 		}
21931 		break;
21932 
21933 	case DKIOCGTEMPERATURE:
21934 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21935 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21936 		break;
21937 
21938 	case MHIOCENFAILFAST:
21939 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21940 		if ((err = drv_priv(cred_p)) == 0) {
21941 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21942 		}
21943 		break;
21944 
21945 	case MHIOCTKOWN:
21946 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21947 		if ((err = drv_priv(cred_p)) == 0) {
21948 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21949 		}
21950 		break;
21951 
21952 	case MHIOCRELEASE:
21953 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21954 		if ((err = drv_priv(cred_p)) == 0) {
21955 			err = sd_mhdioc_release(dev);
21956 		}
21957 		break;
21958 
21959 	case MHIOCSTATUS:
21960 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21961 		if ((err = drv_priv(cred_p)) == 0) {
21962 			switch (sd_send_scsi_TEST_UNIT_READY(ssc, 0)) {
21963 			case 0:
21964 				err = 0;
21965 				break;
21966 			case EACCES:
21967 				*rval_p = 1;
21968 				err = 0;
21969 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
21970 				break;
21971 			default:
21972 				err = EIO;
21973 				goto done_with_assess;
21974 			}
21975 		}
21976 		break;
21977 
21978 	case MHIOCQRESERVE:
21979 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21980 		if ((err = drv_priv(cred_p)) == 0) {
21981 			err = sd_reserve_release(dev, SD_RESERVE);
21982 		}
21983 		break;
21984 
21985 	case MHIOCREREGISTERDEVID:
21986 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21987 		if (drv_priv(cred_p) == EPERM) {
21988 			err = EPERM;
21989 		} else if (!un->un_f_devid_supported) {
21990 			err = ENOTTY;
21991 		} else {
21992 			err = sd_mhdioc_register_devid(dev);
21993 		}
21994 		break;
21995 
21996 	case MHIOCGRP_INKEYS:
21997 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
21998 		if (((err = drv_priv(cred_p)) != EPERM) &&
21999 		    arg != (intptr_t)NULL) {
22000 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22001 				err = ENOTSUP;
22002 			} else {
22003 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
22004 				    flag);
22005 			}
22006 		}
22007 		break;
22008 
22009 	case MHIOCGRP_INRESV:
22010 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
22011 		if (((err = drv_priv(cred_p)) != EPERM) &&
22012 		    arg != (intptr_t)NULL) {
22013 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22014 				err = ENOTSUP;
22015 			} else {
22016 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
22017 			}
22018 		}
22019 		break;
22020 
22021 	case MHIOCGRP_REGISTER:
22022 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
22023 		if ((err = drv_priv(cred_p)) != EPERM) {
22024 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22025 				err = ENOTSUP;
22026 			} else if (arg != (intptr_t)NULL) {
22027 				mhioc_register_t reg;
22028 				if (ddi_copyin((void *)arg, &reg,
22029 				    sizeof (mhioc_register_t), flag) != 0) {
22030 					err = EFAULT;
22031 				} else {
22032 					err =
22033 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22034 					    ssc, SD_SCSI3_REGISTER,
22035 					    (uchar_t *)&reg);
22036 					if (err != 0)
22037 						goto done_with_assess;
22038 				}
22039 			}
22040 		}
22041 		break;
22042 
22043 	case MHIOCGRP_CLEAR:
22044 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_CLEAR\n");
22045 		if ((err = drv_priv(cred_p)) != EPERM) {
22046 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22047 				err = ENOTSUP;
22048 			} else if (arg != (intptr_t)NULL) {
22049 				mhioc_register_t reg;
22050 				if (ddi_copyin((void *)arg, &reg,
22051 				    sizeof (mhioc_register_t), flag) != 0) {
22052 					err = EFAULT;
22053 				} else {
22054 					err =
22055 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22056 					    ssc, SD_SCSI3_CLEAR,
22057 					    (uchar_t *)&reg);
22058 					if (err != 0)
22059 						goto done_with_assess;
22060 				}
22061 			}
22062 		}
22063 		break;
22064 
22065 	case MHIOCGRP_RESERVE:
22066 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
22067 		if ((err = drv_priv(cred_p)) != EPERM) {
22068 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22069 				err = ENOTSUP;
22070 			} else if (arg != (intptr_t)NULL) {
22071 				mhioc_resv_desc_t resv_desc;
22072 				if (ddi_copyin((void *)arg, &resv_desc,
22073 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
22074 					err = EFAULT;
22075 				} else {
22076 					err =
22077 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22078 					    ssc, SD_SCSI3_RESERVE,
22079 					    (uchar_t *)&resv_desc);
22080 					if (err != 0)
22081 						goto done_with_assess;
22082 				}
22083 			}
22084 		}
22085 		break;
22086 
22087 	case MHIOCGRP_PREEMPTANDABORT:
22088 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
22089 		if ((err = drv_priv(cred_p)) != EPERM) {
22090 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22091 				err = ENOTSUP;
22092 			} else if (arg != (intptr_t)NULL) {
22093 				mhioc_preemptandabort_t preempt_abort;
22094 				if (ddi_copyin((void *)arg, &preempt_abort,
22095 				    sizeof (mhioc_preemptandabort_t),
22096 				    flag) != 0) {
22097 					err = EFAULT;
22098 				} else {
22099 					err =
22100 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22101 					    ssc, SD_SCSI3_PREEMPTANDABORT,
22102 					    (uchar_t *)&preempt_abort);
22103 					if (err != 0)
22104 						goto done_with_assess;
22105 				}
22106 			}
22107 		}
22108 		break;
22109 
22110 	case MHIOCGRP_REGISTERANDIGNOREKEY:
22111 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTERANDIGNOREKEY\n");
22112 		if ((err = drv_priv(cred_p)) != EPERM) {
22113 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22114 				err = ENOTSUP;
22115 			} else if (arg != (intptr_t)NULL) {
22116 				mhioc_registerandignorekey_t r_and_i;
22117 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
22118 				    sizeof (mhioc_registerandignorekey_t),
22119 				    flag) != 0) {
22120 					err = EFAULT;
22121 				} else {
22122 					err =
22123 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22124 					    ssc, SD_SCSI3_REGISTERANDIGNOREKEY,
22125 					    (uchar_t *)&r_and_i);
22126 					if (err != 0)
22127 						goto done_with_assess;
22128 				}
22129 			}
22130 		}
22131 		break;
22132 
22133 	case USCSICMD:
22134 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
22135 		cr = ddi_get_cred();
22136 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
22137 			err = EPERM;
22138 		} else {
22139 			enum uio_seg	uioseg;
22140 
22141 			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
22142 			    UIO_USERSPACE;
22143 			if (un->un_f_format_in_progress == TRUE) {
22144 				err = EAGAIN;
22145 				break;
22146 			}
22147 
22148 			err = sd_ssc_send(ssc,
22149 			    (struct uscsi_cmd *)arg,
22150 			    flag, uioseg, SD_PATH_STANDARD);
22151 			if (err != 0)
22152 				goto done_with_assess;
22153 			else
22154 				sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22155 		}
22156 		break;
22157 
22158 	case USCSIMAXXFER:
22159 		SD_TRACE(SD_LOG_IOCTL, un, "USCSIMAXXFER\n");
22160 		cr = ddi_get_cred();
22161 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
22162 			err = EPERM;
22163 		} else {
22164 			const uscsi_xfer_t xfer = un->un_max_xfer_size;
22165 
22166 			if (ddi_copyout(&xfer, (void *)arg, sizeof (xfer),
22167 			    flag) != 0) {
22168 				err = EFAULT;
22169 			} else {
22170 				err = 0;
22171 			}
22172 		}
22173 		break;
22174 
22175 	case CDROMPAUSE:
22176 	case CDROMRESUME:
22177 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
22178 		if (!ISCD(un)) {
22179 			err = ENOTTY;
22180 		} else {
22181 			err = sr_pause_resume(dev, cmd);
22182 		}
22183 		break;
22184 
22185 	case CDROMPLAYMSF:
22186 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
22187 		if (!ISCD(un)) {
22188 			err = ENOTTY;
22189 		} else {
22190 			err = sr_play_msf(dev, (caddr_t)arg, flag);
22191 		}
22192 		break;
22193 
22194 	case CDROMPLAYTRKIND:
22195 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
22196 #if defined(__x86)
22197 		/*
22198 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
22199 		 */
22200 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
22201 #else
22202 		if (!ISCD(un)) {
22203 #endif
22204 			err = ENOTTY;
22205 		} else {
22206 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
22207 		}
22208 		break;
22209 
22210 	case CDROMREADTOCHDR:
22211 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
22212 		if (!ISCD(un)) {
22213 			err = ENOTTY;
22214 		} else {
22215 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
22216 		}
22217 		break;
22218 
22219 	case CDROMREADTOCENTRY:
22220 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
22221 		if (!ISCD(un)) {
22222 			err = ENOTTY;
22223 		} else {
22224 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
22225 		}
22226 		break;
22227 
22228 	case CDROMSTOP:
22229 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
22230 		if (!ISCD(un)) {
22231 			err = ENOTTY;
22232 		} else {
22233 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
22234 			    SD_TARGET_STOP, SD_PATH_STANDARD);
22235 			goto done_with_assess;
22236 		}
22237 		break;
22238 
22239 	case CDROMSTART:
22240 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
22241 		if (!ISCD(un)) {
22242 			err = ENOTTY;
22243 		} else {
22244 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
22245 			    SD_TARGET_START, SD_PATH_STANDARD);
22246 			goto done_with_assess;
22247 		}
22248 		break;
22249 
22250 	case CDROMCLOSETRAY:
22251 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
22252 		if (!ISCD(un)) {
22253 			err = ENOTTY;
22254 		} else {
22255 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
22256 			    SD_TARGET_CLOSE, SD_PATH_STANDARD);
22257 			goto done_with_assess;
22258 		}
22259 		break;
22260 
22261 	case FDEJECT:	/* for eject command */
22262 	case DKIOCEJECT:
22263 	case CDROMEJECT:
22264 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
22265 		if (!un->un_f_eject_media_supported) {
22266 			err = ENOTTY;
22267 		} else {
22268 			err = sr_eject(dev);
22269 		}
22270 		break;
22271 
22272 	case CDROMVOLCTRL:
22273 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
22274 		if (!ISCD(un)) {
22275 			err = ENOTTY;
22276 		} else {
22277 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
22278 		}
22279 		break;
22280 
22281 	case CDROMSUBCHNL:
22282 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
22283 		if (!ISCD(un)) {
22284 			err = ENOTTY;
22285 		} else {
22286 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
22287 		}
22288 		break;
22289 
22290 	case CDROMREADMODE2:
22291 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
22292 		if (!ISCD(un)) {
22293 			err = ENOTTY;
22294 		} else if (un->un_f_cfg_is_atapi == TRUE) {
22295 			/*
22296 			 * If the drive supports READ CD, use that instead of
22297 			 * switching the LBA size via a MODE SELECT
22298 			 * Block Descriptor
22299 			 */
22300 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
22301 		} else {
22302 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
22303 		}
22304 		break;
22305 
22306 	case CDROMREADMODE1:
22307 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
22308 		if (!ISCD(un)) {
22309 			err = ENOTTY;
22310 		} else {
22311 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
22312 		}
22313 		break;
22314 
22315 	case CDROMREADOFFSET:
22316 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
22317 		if (!ISCD(un)) {
22318 			err = ENOTTY;
22319 		} else {
22320 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
22321 			    flag);
22322 		}
22323 		break;
22324 
22325 	case CDROMSBLKMODE:
22326 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
22327 		/*
22328 		 * There is no means of changing block size in case of atapi
22329 		 * drives, thus return ENOTTY if drive type is atapi
22330 		 */
22331 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
22332 			err = ENOTTY;
22333 		} else if (un->un_f_mmc_cap == TRUE) {
22334 
22335 			/*
22336 			 * MMC Devices do not support changing the
22337 			 * logical block size
22338 			 *
22339 			 * Note: EINVAL is being returned instead of ENOTTY to
22340 			 * maintain consistancy with the original mmc
22341 			 * driver update.
22342 			 */
22343 			err = EINVAL;
22344 		} else {
22345 			mutex_enter(SD_MUTEX(un));
22346 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
22347 			    (un->un_ncmds_in_transport > 0)) {
22348 				mutex_exit(SD_MUTEX(un));
22349 				err = EINVAL;
22350 			} else {
22351 				mutex_exit(SD_MUTEX(un));
22352 				err = sr_change_blkmode(dev, cmd, arg, flag);
22353 			}
22354 		}
22355 		break;
22356 
22357 	case CDROMGBLKMODE:
22358 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
22359 		if (!ISCD(un)) {
22360 			err = ENOTTY;
22361 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
22362 		    (un->un_f_blockcount_is_valid != FALSE)) {
22363 			/*
22364 			 * Drive is an ATAPI drive so return target block
22365 			 * size for ATAPI drives since we cannot change the
22366 			 * blocksize on ATAPI drives. Used primarily to detect
22367 			 * if an ATAPI cdrom is present.
22368 			 */
22369 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
22370 			    sizeof (int), flag) != 0) {
22371 				err = EFAULT;
22372 			} else {
22373 				err = 0;
22374 			}
22375 
22376 		} else {
22377 			/*
22378 			 * Drive supports changing block sizes via a Mode
22379 			 * Select.
22380 			 */
22381 			err = sr_change_blkmode(dev, cmd, arg, flag);
22382 		}
22383 		break;
22384 
22385 	case CDROMGDRVSPEED:
22386 	case CDROMSDRVSPEED:
22387 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
22388 		if (!ISCD(un)) {
22389 			err = ENOTTY;
22390 		} else if (un->un_f_mmc_cap == TRUE) {
22391 			/*
22392 			 * Note: In the future the driver implementation
22393 			 * for getting and
22394 			 * setting cd speed should entail:
22395 			 * 1) If non-mmc try the Toshiba mode page
22396 			 *    (sr_change_speed)
22397 			 * 2) If mmc but no support for Real Time Streaming try
22398 			 *    the SET CD SPEED (0xBB) command
22399 			 *   (sr_atapi_change_speed)
22400 			 * 3) If mmc and support for Real Time Streaming
22401 			 *    try the GET PERFORMANCE and SET STREAMING
22402 			 *    commands (not yet implemented, 4380808)
22403 			 */
22404 			/*
22405 			 * As per recent MMC spec, CD-ROM speed is variable
22406 			 * and changes with LBA. Since there is no such
22407 			 * things as drive speed now, fail this ioctl.
22408 			 *
22409 			 * Note: EINVAL is returned for consistancy of original
22410 			 * implementation which included support for getting
22411 			 * the drive speed of mmc devices but not setting
22412 			 * the drive speed. Thus EINVAL would be returned
22413 			 * if a set request was made for an mmc device.
22414 			 * We no longer support get or set speed for
22415 			 * mmc but need to remain consistent with regard
22416 			 * to the error code returned.
22417 			 */
22418 			err = EINVAL;
22419 		} else if (un->un_f_cfg_is_atapi == TRUE) {
22420 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
22421 		} else {
22422 			err = sr_change_speed(dev, cmd, arg, flag);
22423 		}
22424 		break;
22425 
22426 	case CDROMCDDA:
22427 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
22428 		if (!ISCD(un)) {
22429 			err = ENOTTY;
22430 		} else {
22431 			err = sr_read_cdda(dev, (void *)arg, flag);
22432 		}
22433 		break;
22434 
22435 	case CDROMCDXA:
22436 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
22437 		if (!ISCD(un)) {
22438 			err = ENOTTY;
22439 		} else {
22440 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
22441 		}
22442 		break;
22443 
22444 	case CDROMSUBCODE:
22445 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
22446 		if (!ISCD(un)) {
22447 			err = ENOTTY;
22448 		} else {
22449 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
22450 		}
22451 		break;
22452 
22453 
22454 #ifdef SDDEBUG
22455 /* RESET/ABORTS testing ioctls */
22456 	case DKIOCRESET: {
22457 		int	reset_level;
22458 
22459 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
22460 			err = EFAULT;
22461 		} else {
22462 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
22463 			    "reset_level = 0x%lx\n", reset_level);
22464 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
22465 				err = 0;
22466 			} else {
22467 				err = EIO;
22468 			}
22469 		}
22470 		break;
22471 	}
22472 
22473 	case DKIOCABORT:
22474 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
22475 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
22476 			err = 0;
22477 		} else {
22478 			err = EIO;
22479 		}
22480 		break;
22481 #endif
22482 
22483 #ifdef SD_FAULT_INJECTION
22484 /* SDIOC FaultInjection testing ioctls */
22485 	case SDIOCSTART:
22486 	case SDIOCSTOP:
22487 	case SDIOCINSERTPKT:
22488 	case SDIOCINSERTXB:
22489 	case SDIOCINSERTUN:
22490 	case SDIOCINSERTARQ:
22491 	case SDIOCPUSH:
22492 	case SDIOCRETRIEVE:
22493 	case SDIOCRUN:
22494 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
22495 		    "SDIOC detected cmd:0x%X:\n", cmd);
22496 		/* call error generator */
22497 		sd_faultinjection_ioctl(cmd, arg, un);
22498 		err = 0;
22499 		break;
22500 
22501 #endif /* SD_FAULT_INJECTION */
22502 
22503 	case DKIOCFLUSHWRITECACHE:
22504 		{
22505 			struct dk_callback *dkc = (struct dk_callback *)arg;
22506 
22507 			mutex_enter(SD_MUTEX(un));
22508 			if (!un->un_f_sync_cache_supported ||
22509 			    !un->un_f_write_cache_enabled) {
22510 				err = un->un_f_sync_cache_supported ?
22511 				    0 : ENOTSUP;
22512 				mutex_exit(SD_MUTEX(un));
22513 				if ((flag & FKIOCTL) && dkc != NULL &&
22514 				    dkc->dkc_callback != NULL) {
22515 					(*dkc->dkc_callback)(dkc->dkc_cookie,
22516 					    err);
22517 					/*
22518 					 * Did callback and reported error.
22519 					 * Since we did a callback, ioctl
22520 					 * should return 0.
22521 					 */
22522 					err = 0;
22523 				}
22524 				break;
22525 			}
22526 			mutex_exit(SD_MUTEX(un));
22527 
22528 			if ((flag & FKIOCTL) && dkc != NULL &&
22529 			    dkc->dkc_callback != NULL) {
22530 				/* async SYNC CACHE request */
22531 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
22532 			} else {
22533 				/* synchronous SYNC CACHE request */
22534 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22535 			}
22536 		}
22537 		break;
22538 
22539 	case DKIOCFREE:
22540 		{
22541 			dkioc_free_list_t *dfl = (dkioc_free_list_t *)arg;
22542 
22543 			/* bad ioctls shouldn't panic */
22544 			if (dfl == NULL) {
22545 				/* check kernel callers strictly in debug */
22546 				ASSERT0(flag & FKIOCTL);
22547 				err = SET_ERROR(EINVAL);
22548 				break;
22549 			}
22550 			/* synchronous UNMAP request */
22551 			err = sd_send_scsi_UNMAP(dev, ssc, dfl, flag);
22552 		}
22553 		break;
22554 
22555 	case DKIOC_CANFREE:
22556 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC_CANFREE\n");
22557 		i = (un->un_thin_flags & SD_THIN_PROV_ENABLED) ? 1 : 0;
22558 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22559 			err = EFAULT;
22560 		} else {
22561 			err = 0;
22562 		}
22563 		break;
22564 
22565 	case DKIOCGETWCE: {
22566 
22567 		int wce;
22568 
22569 		if ((err = sd_get_write_cache_enabled(ssc, &wce)) != 0) {
22570 			break;
22571 		}
22572 
22573 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
22574 			err = EFAULT;
22575 		}
22576 		break;
22577 	}
22578 
22579 	case DKIOCSETWCE: {
22580 
22581 		int wce, sync_supported;
22582 		int cur_wce = 0;
22583 
22584 		if (!un->un_f_cache_mode_changeable) {
22585 			err = EINVAL;
22586 			break;
22587 		}
22588 
22589 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
22590 			err = EFAULT;
22591 			break;
22592 		}
22593 
22594 		/*
22595 		 * Synchronize multiple threads trying to enable
22596 		 * or disable the cache via the un_f_wcc_cv
22597 		 * condition variable.
22598 		 */
22599 		mutex_enter(SD_MUTEX(un));
22600 
22601 		/*
22602 		 * Don't allow the cache to be enabled if the
22603 		 * config file has it disabled.
22604 		 */
22605 		if (un->un_f_opt_disable_cache && wce) {
22606 			mutex_exit(SD_MUTEX(un));
22607 			err = EINVAL;
22608 			break;
22609 		}
22610 
22611 		/*
22612 		 * Wait for write cache change in progress
22613 		 * bit to be clear before proceeding.
22614 		 */
22615 		while (un->un_f_wcc_inprog)
22616 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
22617 
22618 		un->un_f_wcc_inprog = 1;
22619 
22620 		mutex_exit(SD_MUTEX(un));
22621 
22622 		/*
22623 		 * Get the current write cache state
22624 		 */
22625 		if ((err = sd_get_write_cache_enabled(ssc, &cur_wce)) != 0) {
22626 			mutex_enter(SD_MUTEX(un));
22627 			un->un_f_wcc_inprog = 0;
22628 			cv_broadcast(&un->un_wcc_cv);
22629 			mutex_exit(SD_MUTEX(un));
22630 			break;
22631 		}
22632 
22633 		mutex_enter(SD_MUTEX(un));
22634 		un->un_f_write_cache_enabled = (cur_wce != 0);
22635 
22636 		if (un->un_f_write_cache_enabled && wce == 0) {
22637 			/*
22638 			 * Disable the write cache.  Don't clear
22639 			 * un_f_write_cache_enabled until after
22640 			 * the mode select and flush are complete.
22641 			 */
22642 			sync_supported = un->un_f_sync_cache_supported;
22643 
22644 			/*
22645 			 * If cache flush is suppressed, we assume that the
22646 			 * controller firmware will take care of managing the
22647 			 * write cache for us: no need to explicitly
22648 			 * disable it.
22649 			 */
22650 			if (!un->un_f_suppress_cache_flush) {
22651 				mutex_exit(SD_MUTEX(un));
22652 				if ((err = sd_cache_control(ssc,
22653 				    SD_CACHE_NOCHANGE,
22654 				    SD_CACHE_DISABLE)) == 0 &&
22655 				    sync_supported) {
22656 					err = sd_send_scsi_SYNCHRONIZE_CACHE(un,
22657 					    NULL);
22658 				}
22659 			} else {
22660 				mutex_exit(SD_MUTEX(un));
22661 			}
22662 
22663 			mutex_enter(SD_MUTEX(un));
22664 			if (err == 0) {
22665 				un->un_f_write_cache_enabled = 0;
22666 			}
22667 
22668 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
22669 			/*
22670 			 * Set un_f_write_cache_enabled first, so there is
22671 			 * no window where the cache is enabled, but the
22672 			 * bit says it isn't.
22673 			 */
22674 			un->un_f_write_cache_enabled = 1;
22675 
22676 			/*
22677 			 * If cache flush is suppressed, we assume that the
22678 			 * controller firmware will take care of managing the
22679 			 * write cache for us: no need to explicitly
22680 			 * enable it.
22681 			 */
22682 			if (!un->un_f_suppress_cache_flush) {
22683 				mutex_exit(SD_MUTEX(un));
22684 				err = sd_cache_control(ssc, SD_CACHE_NOCHANGE,
22685 				    SD_CACHE_ENABLE);
22686 			} else {
22687 				mutex_exit(SD_MUTEX(un));
22688 			}
22689 
22690 			mutex_enter(SD_MUTEX(un));
22691 
22692 			if (err) {
22693 				un->un_f_write_cache_enabled = 0;
22694 			}
22695 		}
22696 
22697 		un->un_f_wcc_inprog = 0;
22698 		cv_broadcast(&un->un_wcc_cv);
22699 		mutex_exit(SD_MUTEX(un));
22700 		break;
22701 	}
22702 
22703 	default:
22704 		err = ENOTTY;
22705 		break;
22706 	}
22707 	mutex_enter(SD_MUTEX(un));
22708 	un->un_ncmds_in_driver--;
22709 	ASSERT(un->un_ncmds_in_driver >= 0);
22710 	mutex_exit(SD_MUTEX(un));
22711 
22712 
22713 done_without_assess:
22714 	sd_ssc_fini(ssc);
22715 
22716 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22717 	return (err);
22718 
22719 done_with_assess:
22720 	mutex_enter(SD_MUTEX(un));
22721 	un->un_ncmds_in_driver--;
22722 	ASSERT(un->un_ncmds_in_driver >= 0);
22723 	mutex_exit(SD_MUTEX(un));
22724 
22725 done_quick_assess:
22726 	if (err != 0)
22727 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22728 	/* Uninitialize sd_ssc_t pointer */
22729 	sd_ssc_fini(ssc);
22730 
22731 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22732 	return (err);
22733 }
22734 
22735 
22736 /*
22737  *    Function: sd_dkio_ctrl_info
22738  *
22739  * Description: This routine is the driver entry point for handling controller
22740  *		information ioctl requests (DKIOCINFO).
22741  *
22742  *   Arguments: dev  - the device number
22743  *		arg  - pointer to user provided dk_cinfo structure
22744  *		       specifying the controller type and attributes.
22745  *		flag - this argument is a pass through to ddi_copyxxx()
22746  *		       directly from the mode argument of ioctl().
22747  *
22748  * Return Code: 0
22749  *		EFAULT
22750  *		ENXIO
22751  */
22752 
22753 static int
22754 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
22755 {
22756 	struct sd_lun	*un = NULL;
22757 	struct dk_cinfo	*info;
22758 	dev_info_t	*pdip;
22759 	int		lun, tgt;
22760 
22761 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22762 		return (ENXIO);
22763 	}
22764 
22765 	info = (struct dk_cinfo *)
22766 	    kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
22767 
22768 	switch (un->un_ctype) {
22769 	case CTYPE_CDROM:
22770 		info->dki_ctype = DKC_CDROM;
22771 		break;
22772 	default:
22773 		info->dki_ctype = DKC_SCSI_CCS;
22774 		break;
22775 	}
22776 	pdip = ddi_get_parent(SD_DEVINFO(un));
22777 	info->dki_cnum = ddi_get_instance(pdip);
22778 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
22779 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
22780 	} else {
22781 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
22782 		    DK_DEVLEN - 1);
22783 	}
22784 
22785 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22786 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
22787 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22788 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
22789 
22790 	/* Unit Information */
22791 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
22792 	info->dki_slave = ((tgt << 3) | lun);
22793 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
22794 	    DK_DEVLEN - 1);
22795 	info->dki_flags = DKI_FMTVOL;
22796 	info->dki_partition = SDPART(dev);
22797 
22798 	/* Max Transfer size of this device in blocks */
22799 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
22800 	info->dki_addr = 0;
22801 	info->dki_space = 0;
22802 	info->dki_prio = 0;
22803 	info->dki_vec = 0;
22804 
22805 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
22806 		kmem_free(info, sizeof (struct dk_cinfo));
22807 		return (EFAULT);
22808 	} else {
22809 		kmem_free(info, sizeof (struct dk_cinfo));
22810 		return (0);
22811 	}
22812 }
22813 
22814 /*
22815  *    Function: sd_get_media_info_com
22816  *
22817  * Description: This routine returns the information required to populate
22818  *		the fields for the dk_minfo/dk_minfo_ext structures.
22819  *
22820  *   Arguments: dev		- the device number
22821  *		dki_media_type	- media_type
22822  *		dki_lbsize	- logical block size
22823  *		dki_capacity	- capacity in blocks
22824  *		dki_pbsize	- physical block size (if requested)
22825  *
22826  * Return Code: 0
22827  *		EACCESS
22828  *		EFAULT
22829  *		ENXIO
22830  *		EIO
22831  */
22832 static int
22833 sd_get_media_info_com(dev_t dev, uint_t *dki_media_type, uint_t *dki_lbsize,
22834     diskaddr_t *dki_capacity, uint_t *dki_pbsize)
22835 {
22836 	struct sd_lun		*un = NULL;
22837 	struct uscsi_cmd	com;
22838 	struct scsi_inquiry	*sinq;
22839 	u_longlong_t		media_capacity;
22840 	uint64_t		capacity;
22841 	uint_t			lbasize;
22842 	uint_t			pbsize;
22843 	uchar_t			*out_data;
22844 	uchar_t			*rqbuf;
22845 	int			rval = 0;
22846 	int			rtn;
22847 	sd_ssc_t		*ssc;
22848 
22849 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22850 	    (un->un_state == SD_STATE_OFFLINE)) {
22851 		return (ENXIO);
22852 	}
22853 
22854 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info_com: entry\n");
22855 
22856 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22857 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22858 	ssc = sd_ssc_init(un);
22859 
22860 	/* Issue a TUR to determine if the drive is ready with media present */
22861 	rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_CHECK_FOR_MEDIA);
22862 	if (rval == ENXIO) {
22863 		goto done;
22864 	} else if (rval != 0) {
22865 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22866 	}
22867 
22868 	/* Now get configuration data */
22869 	if (ISCD(un)) {
22870 		*dki_media_type = DK_CDROM;
22871 
22872 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22873 		if (un->un_f_mmc_cap == TRUE) {
22874 			rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf,
22875 			    SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
22876 			    SD_PATH_STANDARD);
22877 
22878 			if (rtn) {
22879 				/*
22880 				 * We ignore all failures for CD and need to
22881 				 * put the assessment before processing code
22882 				 * to avoid missing assessment for FMA.
22883 				 */
22884 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22885 				/*
22886 				 * Failed for other than an illegal request
22887 				 * or command not supported
22888 				 */
22889 				if ((com.uscsi_status == STATUS_CHECK) &&
22890 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22891 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22892 					    (rqbuf[12] != 0x20)) {
22893 						rval = EIO;
22894 						goto no_assessment;
22895 					}
22896 				}
22897 			} else {
22898 				/*
22899 				 * The GET CONFIGURATION command succeeded
22900 				 * so set the media type according to the
22901 				 * returned data
22902 				 */
22903 				*dki_media_type = out_data[6];
22904 				*dki_media_type <<= 8;
22905 				*dki_media_type |= out_data[7];
22906 			}
22907 		}
22908 	} else {
22909 		/*
22910 		 * The profile list is not available, so we attempt to identify
22911 		 * the media type based on the inquiry data
22912 		 */
22913 		sinq = un->un_sd->sd_inq;
22914 		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
22915 		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
22916 			/* This is a direct access device  or optical disk */
22917 			*dki_media_type = DK_FIXED_DISK;
22918 
22919 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22920 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22921 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22922 					*dki_media_type = DK_ZIP;
22923 				} else if (
22924 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22925 					*dki_media_type = DK_JAZ;
22926 				}
22927 			}
22928 		} else {
22929 			/*
22930 			 * Not a CD, direct access or optical disk so return
22931 			 * unknown media
22932 			 */
22933 			*dki_media_type = DK_UNKNOWN;
22934 		}
22935 	}
22936 
22937 	/*
22938 	 * Now read the capacity so we can provide the lbasize,
22939 	 * pbsize and capacity.
22940 	 */
22941 	if (dki_pbsize && un->un_f_descr_format_supported) {
22942 		rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize,
22943 		    &pbsize, SD_PATH_DIRECT);
22944 
22945 		/*
22946 		 * Override the physical blocksize if the instance already
22947 		 * has a larger value.
22948 		 */
22949 		pbsize = MAX(pbsize, un->un_phy_blocksize);
22950 	}
22951 
22952 	if (dki_pbsize == NULL || rval != 0 ||
22953 	    !un->un_f_descr_format_supported) {
22954 		rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
22955 		    SD_PATH_DIRECT);
22956 
22957 		switch (rval) {
22958 		case 0:
22959 			if (un->un_f_enable_rmw &&
22960 			    un->un_phy_blocksize != 0) {
22961 				pbsize = un->un_phy_blocksize;
22962 			} else {
22963 				pbsize = lbasize;
22964 			}
22965 			media_capacity = capacity;
22966 
22967 			/*
22968 			 * sd_send_scsi_READ_CAPACITY() reports capacity in
22969 			 * un->un_sys_blocksize chunks. So we need to convert
22970 			 * it into cap.lbsize chunks.
22971 			 */
22972 			if (un->un_f_has_removable_media) {
22973 				media_capacity *= un->un_sys_blocksize;
22974 				media_capacity /= lbasize;
22975 			}
22976 			break;
22977 		case EACCES:
22978 			rval = EACCES;
22979 			goto done;
22980 		default:
22981 			rval = EIO;
22982 			goto done;
22983 		}
22984 	} else {
22985 		if (un->un_f_enable_rmw &&
22986 		    !ISP2(pbsize % DEV_BSIZE)) {
22987 			pbsize = SSD_SECSIZE;
22988 		} else if (!ISP2(lbasize % DEV_BSIZE) ||
22989 		    !ISP2(pbsize % DEV_BSIZE)) {
22990 			pbsize = lbasize = DEV_BSIZE;
22991 		}
22992 		media_capacity = capacity;
22993 	}
22994 
22995 	/*
22996 	 * If lun is expanded dynamically, update the un structure.
22997 	 */
22998 	mutex_enter(SD_MUTEX(un));
22999 	if ((un->un_f_blockcount_is_valid == TRUE) &&
23000 	    (un->un_f_tgt_blocksize_is_valid == TRUE) &&
23001 	    (capacity > un->un_blockcount)) {
23002 		un->un_f_expnevent = B_FALSE;
23003 		sd_update_block_info(un, lbasize, capacity);
23004 	}
23005 	mutex_exit(SD_MUTEX(un));
23006 
23007 	*dki_lbsize = lbasize;
23008 	*dki_capacity = media_capacity;
23009 	if (dki_pbsize)
23010 		*dki_pbsize = pbsize;
23011 
23012 done:
23013 	if (rval != 0) {
23014 		if (rval == EIO)
23015 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23016 		else
23017 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23018 	}
23019 no_assessment:
23020 	sd_ssc_fini(ssc);
23021 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
23022 	kmem_free(rqbuf, SENSE_LENGTH);
23023 	return (rval);
23024 }
23025 
23026 /*
23027  *    Function: sd_get_media_info
23028  *
23029  * Description: This routine is the driver entry point for handling ioctl
23030  *		requests for the media type or command set profile used by the
23031  *		drive to operate on the media (DKIOCGMEDIAINFO).
23032  *
23033  *   Arguments: dev	- the device number
23034  *		arg	- pointer to user provided dk_minfo structure
23035  *			  specifying the media type, logical block size and
23036  *			  drive capacity.
23037  *		flag	- this argument is a pass through to ddi_copyxxx()
23038  *			  directly from the mode argument of ioctl().
23039  *
23040  * Return Code: returns the value from sd_get_media_info_com
23041  */
23042 static int
23043 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
23044 {
23045 	struct dk_minfo		mi;
23046 	int			rval;
23047 
23048 	rval = sd_get_media_info_com(dev, &mi.dki_media_type,
23049 	    &mi.dki_lbsize, &mi.dki_capacity, NULL);
23050 
23051 	if (rval)
23052 		return (rval);
23053 	if (ddi_copyout(&mi, arg, sizeof (struct dk_minfo), flag))
23054 		rval = EFAULT;
23055 	return (rval);
23056 }
23057 
23058 /*
23059  *    Function: sd_get_media_info_ext
23060  *
23061  * Description: This routine is the driver entry point for handling ioctl
23062  *		requests for the media type or command set profile used by the
23063  *		drive to operate on the media (DKIOCGMEDIAINFOEXT). The
23064  *		difference this ioctl and DKIOCGMEDIAINFO is the return value
23065  *		of this ioctl contains both logical block size and physical
23066  *		block size.
23067  *
23068  *
23069  *   Arguments: dev	- the device number
23070  *		arg	- pointer to user provided dk_minfo_ext structure
23071  *			  specifying the media type, logical block size,
23072  *			  physical block size and disk capacity.
23073  *		flag	- this argument is a pass through to ddi_copyxxx()
23074  *			  directly from the mode argument of ioctl().
23075  *
23076  * Return Code: returns the value from sd_get_media_info_com
23077  */
23078 static int
23079 sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag)
23080 {
23081 	struct dk_minfo_ext	mie;
23082 	int			rval = 0;
23083 	size_t			len;
23084 
23085 	rval = sd_get_media_info_com(dev, &mie.dki_media_type,
23086 	    &mie.dki_lbsize, &mie.dki_capacity, &mie.dki_pbsize);
23087 
23088 	if (rval)
23089 		return (rval);
23090 
23091 	switch (ddi_model_convert_from(flag & FMODELS)) {
23092 	case DDI_MODEL_ILP32:
23093 		len = sizeof (struct dk_minfo_ext32);
23094 		break;
23095 	default:
23096 		len = sizeof (struct dk_minfo_ext);
23097 		break;
23098 	}
23099 
23100 	if (ddi_copyout(&mie, arg, len, flag))
23101 		rval = EFAULT;
23102 	return (rval);
23103 
23104 }
23105 
23106 /*
23107  *    Function: sd_watch_request_submit
23108  *
23109  * Description: Call scsi_watch_request_submit or scsi_mmc_watch_request_submit
23110  *		depending on which is supported by device.
23111  */
23112 static opaque_t
23113 sd_watch_request_submit(struct sd_lun *un)
23114 {
23115 	dev_t			dev;
23116 
23117 	/* All submissions are unified to use same device number */
23118 	dev = sd_make_device(SD_DEVINFO(un));
23119 
23120 	if (un->un_f_mmc_cap && un->un_f_mmc_gesn_polling) {
23121 		return (scsi_mmc_watch_request_submit(SD_SCSI_DEVP(un),
23122 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
23123 		    (caddr_t)dev));
23124 	} else {
23125 		return (scsi_watch_request_submit(SD_SCSI_DEVP(un),
23126 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
23127 		    (caddr_t)dev));
23128 	}
23129 }
23130 
23131 
23132 /*
23133  *    Function: sd_check_media
23134  *
23135  * Description: This utility routine implements the functionality for the
23136  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
23137  *		driver state changes from that specified by the user
23138  *		(inserted or ejected). For example, if the user specifies
23139  *		DKIO_EJECTED and the current media state is inserted this
23140  *		routine will immediately return DKIO_INSERTED. However, if the
23141  *		current media state is not inserted the user thread will be
23142  *		blocked until the drive state changes. If DKIO_NONE is specified
23143  *		the user thread will block until a drive state change occurs.
23144  *
23145  *   Arguments: dev  - the device number
23146  *		state  - user pointer to a dkio_state, updated with the current
23147  *			drive state at return.
23148  *
23149  * Return Code: ENXIO
23150  *		EIO
23151  *		EAGAIN
23152  *		EINTR
23153  */
23154 
23155 static int
23156 sd_check_media(dev_t dev, enum dkio_state state)
23157 {
23158 	struct sd_lun		*un = NULL;
23159 	enum dkio_state		prev_state;
23160 	opaque_t		token = NULL;
23161 	int			rval = 0;
23162 	sd_ssc_t		*ssc;
23163 
23164 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23165 		return (ENXIO);
23166 	}
23167 
23168 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
23169 
23170 	ssc = sd_ssc_init(un);
23171 
23172 	mutex_enter(SD_MUTEX(un));
23173 
23174 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
23175 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
23176 
23177 	prev_state = un->un_mediastate;
23178 
23179 	/* is there anything to do? */
23180 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
23181 		/*
23182 		 * submit the request to the scsi_watch service;
23183 		 * scsi_media_watch_cb() does the real work
23184 		 */
23185 		mutex_exit(SD_MUTEX(un));
23186 
23187 		/*
23188 		 * This change handles the case where a scsi watch request is
23189 		 * added to a device that is powered down. To accomplish this
23190 		 * we power up the device before adding the scsi watch request,
23191 		 * since the scsi watch sends a TUR directly to the device
23192 		 * which the device cannot handle if it is powered down.
23193 		 */
23194 		if (sd_pm_entry(un) != DDI_SUCCESS) {
23195 			mutex_enter(SD_MUTEX(un));
23196 			goto done;
23197 		}
23198 
23199 		token = sd_watch_request_submit(un);
23200 
23201 		sd_pm_exit(un);
23202 
23203 		mutex_enter(SD_MUTEX(un));
23204 		if (token == NULL) {
23205 			rval = EAGAIN;
23206 			goto done;
23207 		}
23208 
23209 		/*
23210 		 * This is a special case IOCTL that doesn't return
23211 		 * until the media state changes. Routine sdpower
23212 		 * knows about and handles this so don't count it
23213 		 * as an active cmd in the driver, which would
23214 		 * keep the device busy to the pm framework.
23215 		 * If the count isn't decremented the device can't
23216 		 * be powered down.
23217 		 */
23218 		un->un_ncmds_in_driver--;
23219 		ASSERT(un->un_ncmds_in_driver >= 0);
23220 
23221 		/*
23222 		 * if a prior request had been made, this will be the same
23223 		 * token, as scsi_watch was designed that way.
23224 		 */
23225 		un->un_swr_token = token;
23226 		un->un_specified_mediastate = state;
23227 
23228 		/*
23229 		 * now wait for media change
23230 		 * we will not be signalled unless mediastate == state but it is
23231 		 * still better to test for this condition, since there is a
23232 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
23233 		 */
23234 		SD_TRACE(SD_LOG_COMMON, un,
23235 		    "sd_check_media: waiting for media state change\n");
23236 		while (un->un_mediastate == state) {
23237 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
23238 				SD_TRACE(SD_LOG_COMMON, un,
23239 				    "sd_check_media: waiting for media state "
23240 				    "was interrupted\n");
23241 				un->un_ncmds_in_driver++;
23242 				rval = EINTR;
23243 				goto done;
23244 			}
23245 			SD_TRACE(SD_LOG_COMMON, un,
23246 			    "sd_check_media: received signal, state=%x\n",
23247 			    un->un_mediastate);
23248 		}
23249 		/*
23250 		 * Inc the counter to indicate the device once again
23251 		 * has an active outstanding cmd.
23252 		 */
23253 		un->un_ncmds_in_driver++;
23254 	}
23255 
23256 	/* invalidate geometry */
23257 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
23258 		sr_ejected(un);
23259 	}
23260 
23261 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
23262 		uint64_t	capacity;
23263 		uint_t		lbasize;
23264 
23265 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
23266 		mutex_exit(SD_MUTEX(un));
23267 		/*
23268 		 * Since the following routines use SD_PATH_DIRECT, we must
23269 		 * call PM directly before the upcoming disk accesses. This
23270 		 * may cause the disk to be power/spin up.
23271 		 */
23272 
23273 		if (sd_pm_entry(un) == DDI_SUCCESS) {
23274 			rval = sd_send_scsi_READ_CAPACITY(ssc,
23275 			    &capacity, &lbasize, SD_PATH_DIRECT);
23276 			if (rval != 0) {
23277 				sd_pm_exit(un);
23278 				if (rval == EIO)
23279 					sd_ssc_assessment(ssc,
23280 					    SD_FMT_STATUS_CHECK);
23281 				else
23282 					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23283 				mutex_enter(SD_MUTEX(un));
23284 				goto done;
23285 			}
23286 		} else {
23287 			rval = EIO;
23288 			mutex_enter(SD_MUTEX(un));
23289 			goto done;
23290 		}
23291 		mutex_enter(SD_MUTEX(un));
23292 
23293 		sd_update_block_info(un, lbasize, capacity);
23294 
23295 		/*
23296 		 *  Check if the media in the device is writable or not
23297 		 */
23298 		if (ISCD(un)) {
23299 			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
23300 		}
23301 
23302 		mutex_exit(SD_MUTEX(un));
23303 		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
23304 		if ((cmlb_validate(un->un_cmlbhandle, 0,
23305 		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
23306 			sd_set_pstats(un);
23307 			SD_TRACE(SD_LOG_IO_PARTITION, un,
23308 			    "sd_check_media: un:0x%p pstats created and "
23309 			    "set\n", un);
23310 		}
23311 
23312 		rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
23313 		    SD_PATH_DIRECT);
23314 
23315 		sd_pm_exit(un);
23316 
23317 		if (rval != 0) {
23318 			if (rval == EIO)
23319 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23320 			else
23321 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23322 		}
23323 
23324 		mutex_enter(SD_MUTEX(un));
23325 	}
23326 done:
23327 	sd_ssc_fini(ssc);
23328 	un->un_f_watcht_stopped = FALSE;
23329 	if (token != NULL && un->un_swr_token != NULL) {
23330 		/*
23331 		 * Use of this local token and the mutex ensures that we avoid
23332 		 * some race conditions associated with terminating the
23333 		 * scsi watch.
23334 		 */
23335 		token = un->un_swr_token;
23336 		mutex_exit(SD_MUTEX(un));
23337 		(void) scsi_watch_request_terminate(token,
23338 		    SCSI_WATCH_TERMINATE_WAIT);
23339 		if (scsi_watch_get_ref_count(token) == 0) {
23340 			mutex_enter(SD_MUTEX(un));
23341 			un->un_swr_token = (opaque_t)NULL;
23342 		} else {
23343 			mutex_enter(SD_MUTEX(un));
23344 		}
23345 	}
23346 
23347 	/*
23348 	 * Update the capacity kstat value, if no media previously
23349 	 * (capacity kstat is 0) and a media has been inserted
23350 	 * (un_f_blockcount_is_valid == TRUE)
23351 	 */
23352 	if (un->un_errstats) {
23353 		struct sd_errstats	*stp = NULL;
23354 
23355 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
23356 		if ((stp->sd_capacity.value.ui64 == 0) &&
23357 		    (un->un_f_blockcount_is_valid == TRUE)) {
23358 			stp->sd_capacity.value.ui64 =
23359 			    (uint64_t)((uint64_t)un->un_blockcount *
23360 			    un->un_sys_blocksize);
23361 		}
23362 	}
23363 	mutex_exit(SD_MUTEX(un));
23364 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
23365 	return (rval);
23366 }
23367 
23368 
23369 /*
23370  *    Function: sd_delayed_cv_broadcast
23371  *
23372  * Description: Delayed cv_broadcast to allow for target to recover from media
23373  *		insertion.
23374  *
23375  *   Arguments: arg - driver soft state (unit) structure
23376  */
23377 
23378 static void
23379 sd_delayed_cv_broadcast(void *arg)
23380 {
23381 	struct sd_lun *un = arg;
23382 
23383 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
23384 
23385 	mutex_enter(SD_MUTEX(un));
23386 	un->un_dcvb_timeid = NULL;
23387 	cv_broadcast(&un->un_state_cv);
23388 	mutex_exit(SD_MUTEX(un));
23389 }
23390 
23391 
23392 /*
23393  *    Function: sd_media_watch_cb
23394  *
23395  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
23396  *		routine processes the TUR sense data and updates the driver
23397  *		state if a transition has occurred. The user thread
23398  *		(sd_check_media) is then signalled.
23399  *
23400  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
23401  *			among multiple watches that share this callback function
23402  *		resultp - scsi watch facility result packet containing scsi
23403  *			  packet, status byte and sense data
23404  *
23405  * Return Code: 0 for success, -1 for failure
23406  */
23407 
23408 static int
23409 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
23410 {
23411 	struct sd_lun			*un;
23412 	struct scsi_status		*statusp = resultp->statusp;
23413 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
23414 	enum dkio_state			state = DKIO_NONE;
23415 	dev_t				dev = (dev_t)arg;
23416 	uchar_t				actual_sense_length;
23417 	uint8_t				skey, asc, ascq;
23418 
23419 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23420 		return (-1);
23421 	}
23422 	actual_sense_length = resultp->actual_sense_length;
23423 
23424 	mutex_enter(SD_MUTEX(un));
23425 	SD_TRACE(SD_LOG_COMMON, un,
23426 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
23427 	    *((char *)statusp), (void *)sensep, actual_sense_length);
23428 
23429 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
23430 		un->un_mediastate = DKIO_DEV_GONE;
23431 		cv_broadcast(&un->un_state_cv);
23432 		mutex_exit(SD_MUTEX(un));
23433 
23434 		return (0);
23435 	}
23436 
23437 	if (un->un_f_mmc_cap && un->un_f_mmc_gesn_polling) {
23438 		if (sd_gesn_media_data_valid(resultp->mmc_data)) {
23439 			if ((resultp->mmc_data[5] &
23440 			    SD_GESN_MEDIA_EVENT_STATUS_PRESENT) != 0) {
23441 				state = DKIO_INSERTED;
23442 			} else {
23443 				state = DKIO_EJECTED;
23444 			}
23445 			if ((resultp->mmc_data[4] & SD_GESN_MEDIA_EVENT_CODE) ==
23446 			    SD_GESN_MEDIA_EVENT_EJECTREQUEST) {
23447 				sd_log_eject_request_event(un, KM_NOSLEEP);
23448 			}
23449 		}
23450 	} else if (sensep != NULL) {
23451 		/*
23452 		 * If there was a check condition then sensep points to valid
23453 		 * sense data. If status was not a check condition but a
23454 		 * reservation or busy status then the new state is DKIO_NONE.
23455 		 */
23456 		skey = scsi_sense_key(sensep);
23457 		asc = scsi_sense_asc(sensep);
23458 		ascq = scsi_sense_ascq(sensep);
23459 
23460 		SD_INFO(SD_LOG_COMMON, un,
23461 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
23462 		    skey, asc, ascq);
23463 		/* This routine only uses up to 13 bytes of sense data. */
23464 		if (actual_sense_length >= 13) {
23465 			if (skey == KEY_UNIT_ATTENTION) {
23466 				if (asc == 0x28) {
23467 					state = DKIO_INSERTED;
23468 				}
23469 			} else if (skey == KEY_NOT_READY) {
23470 				/*
23471 				 * Sense data of 02/06/00 means that the
23472 				 * drive could not read the media (No
23473 				 * reference position found). In this case
23474 				 * to prevent a hang on the DKIOCSTATE IOCTL
23475 				 * we set the media state to DKIO_INSERTED.
23476 				 */
23477 				if (asc == 0x06 && ascq == 0x00)
23478 					state = DKIO_INSERTED;
23479 
23480 				/*
23481 				 * if 02/04/02  means that the host
23482 				 * should send start command. Explicitly
23483 				 * leave the media state as is
23484 				 * (inserted) as the media is inserted
23485 				 * and host has stopped device for PM
23486 				 * reasons. Upon next true read/write
23487 				 * to this media will bring the
23488 				 * device to the right state good for
23489 				 * media access.
23490 				 */
23491 				if (asc == 0x3a) {
23492 					state = DKIO_EJECTED;
23493 				} else {
23494 					/*
23495 					 * If the drive is busy with an
23496 					 * operation or long write, keep the
23497 					 * media in an inserted state.
23498 					 */
23499 
23500 					if ((asc == 0x04) &&
23501 					    ((ascq == 0x02) ||
23502 					    (ascq == 0x07) ||
23503 					    (ascq == 0x08))) {
23504 						state = DKIO_INSERTED;
23505 					}
23506 				}
23507 			} else if (skey == KEY_NO_SENSE) {
23508 				if ((asc == 0x00) && (ascq == 0x00)) {
23509 					/*
23510 					 * Sense Data 00/00/00 does not provide
23511 					 * any information about the state of
23512 					 * the media. Ignore it.
23513 					 */
23514 					mutex_exit(SD_MUTEX(un));
23515 					return (0);
23516 				}
23517 			}
23518 		}
23519 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
23520 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
23521 		state = DKIO_INSERTED;
23522 	}
23523 
23524 	SD_TRACE(SD_LOG_COMMON, un,
23525 	    "sd_media_watch_cb: state=%x, specified=%x\n",
23526 	    state, un->un_specified_mediastate);
23527 
23528 	/*
23529 	 * now signal the waiting thread if this is *not* the specified state;
23530 	 * delay the signal if the state is DKIO_INSERTED to allow the target
23531 	 * to recover
23532 	 */
23533 	if (state != un->un_specified_mediastate) {
23534 		un->un_mediastate = state;
23535 		if (state == DKIO_INSERTED) {
23536 			/*
23537 			 * delay the signal to give the drive a chance
23538 			 * to do what it apparently needs to do
23539 			 */
23540 			SD_TRACE(SD_LOG_COMMON, un,
23541 			    "sd_media_watch_cb: delayed cv_broadcast\n");
23542 			if (un->un_dcvb_timeid == NULL) {
23543 				un->un_dcvb_timeid =
23544 				    timeout(sd_delayed_cv_broadcast, un,
23545 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
23546 			}
23547 		} else {
23548 			SD_TRACE(SD_LOG_COMMON, un,
23549 			    "sd_media_watch_cb: immediate cv_broadcast\n");
23550 			cv_broadcast(&un->un_state_cv);
23551 		}
23552 	}
23553 	mutex_exit(SD_MUTEX(un));
23554 	return (0);
23555 }
23556 
23557 
23558 /*
23559  *    Function: sd_dkio_get_temp
23560  *
23561  * Description: This routine is the driver entry point for handling ioctl
23562  *		requests to get the disk temperature.
23563  *
23564  *   Arguments: dev  - the device number
23565  *		arg  - pointer to user provided dk_temperature structure.
23566  *		flag - this argument is a pass through to ddi_copyxxx()
23567  *		       directly from the mode argument of ioctl().
23568  *
23569  * Return Code: 0
23570  *		EFAULT
23571  *		ENXIO
23572  *		EAGAIN
23573  */
23574 
23575 static int
23576 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
23577 {
23578 	struct sd_lun		*un = NULL;
23579 	struct dk_temperature	*dktemp = NULL;
23580 	uchar_t			*temperature_page;
23581 	int			rval = 0;
23582 	int			path_flag = SD_PATH_STANDARD;
23583 	sd_ssc_t		*ssc;
23584 
23585 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23586 		return (ENXIO);
23587 	}
23588 
23589 	ssc = sd_ssc_init(un);
23590 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
23591 
23592 	/* copyin the disk temp argument to get the user flags */
23593 	if (ddi_copyin((void *)arg, dktemp,
23594 	    sizeof (struct dk_temperature), flag) != 0) {
23595 		rval = EFAULT;
23596 		goto done;
23597 	}
23598 
23599 	/* Initialize the temperature to invalid. */
23600 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
23601 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
23602 
23603 	/*
23604 	 * Note: Investigate removing the "bypass pm" semantic.
23605 	 * Can we just bypass PM always?
23606 	 */
23607 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
23608 		path_flag = SD_PATH_DIRECT;
23609 		ASSERT(!mutex_owned(&un->un_pm_mutex));
23610 		mutex_enter(&un->un_pm_mutex);
23611 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
23612 			/*
23613 			 * If DKT_BYPASS_PM is set, and the drive happens to be
23614 			 * in low power mode, we can not wake it up, Need to
23615 			 * return EAGAIN.
23616 			 */
23617 			mutex_exit(&un->un_pm_mutex);
23618 			rval = EAGAIN;
23619 			goto done;
23620 		} else {
23621 			/*
23622 			 * Indicate to PM the device is busy. This is required
23623 			 * to avoid a race - i.e. the ioctl is issuing a
23624 			 * command and the pm framework brings down the device
23625 			 * to low power mode (possible power cut-off on some
23626 			 * platforms).
23627 			 */
23628 			mutex_exit(&un->un_pm_mutex);
23629 			if (sd_pm_entry(un) != DDI_SUCCESS) {
23630 				rval = EAGAIN;
23631 				goto done;
23632 			}
23633 		}
23634 	}
23635 
23636 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
23637 
23638 	rval = sd_send_scsi_LOG_SENSE(ssc, temperature_page,
23639 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag);
23640 	if (rval != 0)
23641 		goto done2;
23642 
23643 	/*
23644 	 * For the current temperature verify that the parameter length is 0x02
23645 	 * and the parameter code is 0x00
23646 	 */
23647 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
23648 	    (temperature_page[5] == 0x00)) {
23649 		if (temperature_page[9] == 0xFF) {
23650 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
23651 		} else {
23652 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
23653 		}
23654 	}
23655 
23656 	/*
23657 	 * For the reference temperature verify that the parameter
23658 	 * length is 0x02 and the parameter code is 0x01
23659 	 */
23660 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
23661 	    (temperature_page[11] == 0x01)) {
23662 		if (temperature_page[15] == 0xFF) {
23663 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
23664 		} else {
23665 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
23666 		}
23667 	}
23668 
23669 	/* Do the copyout regardless of the temperature commands status. */
23670 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
23671 	    flag) != 0) {
23672 		rval = EFAULT;
23673 		goto done1;
23674 	}
23675 
23676 done2:
23677 	if (rval != 0) {
23678 		if (rval == EIO)
23679 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23680 		else
23681 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23682 	}
23683 done1:
23684 	if (path_flag == SD_PATH_DIRECT) {
23685 		sd_pm_exit(un);
23686 	}
23687 
23688 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
23689 done:
23690 	sd_ssc_fini(ssc);
23691 	if (dktemp != NULL) {
23692 		kmem_free(dktemp, sizeof (struct dk_temperature));
23693 	}
23694 
23695 	return (rval);
23696 }
23697 
23698 
23699 /*
23700  *    Function: sd_log_page_supported
23701  *
23702  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
23703  *		supported log pages.
23704  *
23705  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
23706  *                      structure for this target.
23707  *		log_page -
23708  *
23709  * Return Code: -1 - on error (log sense is optional and may not be supported).
23710  *		0  - log page not found.
23711  *		1  - log page found.
23712  */
23713 
23714 static int
23715 sd_log_page_supported(sd_ssc_t *ssc, int log_page)
23716 {
23717 	uchar_t *log_page_data;
23718 	int	i;
23719 	int	match = 0;
23720 	int	log_size;
23721 	int	status = 0;
23722 	struct sd_lun	*un;
23723 
23724 	ASSERT(ssc != NULL);
23725 	un = ssc->ssc_un;
23726 	ASSERT(un != NULL);
23727 
23728 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
23729 
23730 	status = sd_send_scsi_LOG_SENSE(ssc, log_page_data, 0xFF, 0, 0x01, 0,
23731 	    SD_PATH_DIRECT);
23732 
23733 	if (status != 0) {
23734 		if (status == EIO) {
23735 			/*
23736 			 * Some disks do not support log sense, we
23737 			 * should ignore this kind of error(sense key is
23738 			 * 0x5 - illegal request).
23739 			 */
23740 			uint8_t *sensep;
23741 			int senlen;
23742 
23743 			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
23744 			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
23745 			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
23746 
23747 			if (senlen > 0 &&
23748 			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
23749 				sd_ssc_assessment(ssc,
23750 				    SD_FMT_IGNORE_COMPROMISE);
23751 			} else {
23752 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23753 			}
23754 		} else {
23755 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23756 		}
23757 
23758 		SD_ERROR(SD_LOG_COMMON, un,
23759 		    "sd_log_page_supported: failed log page retrieval\n");
23760 		kmem_free(log_page_data, 0xFF);
23761 		return (-1);
23762 	}
23763 
23764 	log_size = log_page_data[3];
23765 
23766 	/*
23767 	 * The list of supported log pages start from the fourth byte. Check
23768 	 * until we run out of log pages or a match is found.
23769 	 */
23770 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
23771 		if (log_page_data[i] == log_page) {
23772 			match++;
23773 		}
23774 	}
23775 	kmem_free(log_page_data, 0xFF);
23776 	return (match);
23777 }
23778 
23779 
23780 /*
23781  *    Function: sd_mhdioc_failfast
23782  *
23783  * Description: This routine is the driver entry point for handling ioctl
23784  *		requests to enable/disable the multihost failfast option.
23785  *		(MHIOCENFAILFAST)
23786  *
23787  *   Arguments: dev	- the device number
23788  *		arg	- user specified probing interval.
23789  *		flag	- this argument is a pass through to ddi_copyxxx()
23790  *			  directly from the mode argument of ioctl().
23791  *
23792  * Return Code: 0
23793  *		EFAULT
23794  *		ENXIO
23795  */
23796 
23797 static int
23798 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
23799 {
23800 	struct sd_lun	*un = NULL;
23801 	int		mh_time;
23802 	int		rval = 0;
23803 
23804 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23805 		return (ENXIO);
23806 	}
23807 
23808 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
23809 		return (EFAULT);
23810 
23811 	if (mh_time) {
23812 		mutex_enter(SD_MUTEX(un));
23813 		un->un_resvd_status |= SD_FAILFAST;
23814 		mutex_exit(SD_MUTEX(un));
23815 		/*
23816 		 * If mh_time is INT_MAX, then this ioctl is being used for
23817 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
23818 		 */
23819 		if (mh_time != INT_MAX) {
23820 			rval = sd_check_mhd(dev, mh_time);
23821 		}
23822 	} else {
23823 		(void) sd_check_mhd(dev, 0);
23824 		mutex_enter(SD_MUTEX(un));
23825 		un->un_resvd_status &= ~SD_FAILFAST;
23826 		mutex_exit(SD_MUTEX(un));
23827 	}
23828 	return (rval);
23829 }
23830 
23831 
23832 /*
23833  *    Function: sd_mhdioc_takeown
23834  *
23835  * Description: This routine is the driver entry point for handling ioctl
23836  *		requests to forcefully acquire exclusive access rights to the
23837  *		multihost disk (MHIOCTKOWN).
23838  *
23839  *   Arguments: dev	- the device number
23840  *		arg	- user provided structure specifying the delay
23841  *			  parameters in milliseconds
23842  *		flag	- this argument is a pass through to ddi_copyxxx()
23843  *			  directly from the mode argument of ioctl().
23844  *
23845  * Return Code: 0
23846  *		EFAULT
23847  *		ENXIO
23848  */
23849 
23850 static int
23851 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
23852 {
23853 	struct sd_lun		*un = NULL;
23854 	struct mhioctkown	*tkown = NULL;
23855 	int			rval = 0;
23856 
23857 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23858 		return (ENXIO);
23859 	}
23860 
23861 	if (arg != NULL) {
23862 		tkown = (struct mhioctkown *)
23863 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
23864 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
23865 		if (rval != 0) {
23866 			rval = EFAULT;
23867 			goto error;
23868 		}
23869 	}
23870 
23871 	rval = sd_take_ownership(dev, tkown);
23872 	mutex_enter(SD_MUTEX(un));
23873 	if (rval == 0) {
23874 		un->un_resvd_status |= SD_RESERVE;
23875 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
23876 			sd_reinstate_resv_delay =
23877 			    tkown->reinstate_resv_delay * 1000;
23878 		} else {
23879 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
23880 		}
23881 		/*
23882 		 * Give the scsi_watch routine interval set by
23883 		 * the MHIOCENFAILFAST ioctl precedence here.
23884 		 */
23885 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
23886 			mutex_exit(SD_MUTEX(un));
23887 			(void) sd_check_mhd(dev,
23888 			    sd_reinstate_resv_delay / 1000);
23889 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
23890 			    "sd_mhdioc_takeown : %d\n",
23891 			    sd_reinstate_resv_delay);
23892 		} else {
23893 			mutex_exit(SD_MUTEX(un));
23894 		}
23895 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
23896 		    sd_mhd_reset_notify_cb, (caddr_t)un);
23897 	} else {
23898 		un->un_resvd_status &= ~SD_RESERVE;
23899 		mutex_exit(SD_MUTEX(un));
23900 	}
23901 
23902 error:
23903 	if (tkown != NULL) {
23904 		kmem_free(tkown, sizeof (struct mhioctkown));
23905 	}
23906 	return (rval);
23907 }
23908 
23909 
23910 /*
23911  *    Function: sd_mhdioc_release
23912  *
23913  * Description: This routine is the driver entry point for handling ioctl
23914  *		requests to release exclusive access rights to the multihost
23915  *		disk (MHIOCRELEASE).
23916  *
23917  *   Arguments: dev	- the device number
23918  *
23919  * Return Code: 0
23920  *		ENXIO
23921  */
23922 
23923 static int
23924 sd_mhdioc_release(dev_t dev)
23925 {
23926 	struct sd_lun		*un = NULL;
23927 	timeout_id_t		resvd_timeid_save;
23928 	int			resvd_status_save;
23929 	int			rval = 0;
23930 
23931 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23932 		return (ENXIO);
23933 	}
23934 
23935 	mutex_enter(SD_MUTEX(un));
23936 	resvd_status_save = un->un_resvd_status;
23937 	un->un_resvd_status &=
23938 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
23939 	if (un->un_resvd_timeid) {
23940 		resvd_timeid_save = un->un_resvd_timeid;
23941 		un->un_resvd_timeid = NULL;
23942 		mutex_exit(SD_MUTEX(un));
23943 		(void) untimeout(resvd_timeid_save);
23944 	} else {
23945 		mutex_exit(SD_MUTEX(un));
23946 	}
23947 
23948 	/*
23949 	 * destroy any pending timeout thread that may be attempting to
23950 	 * reinstate reservation on this device.
23951 	 */
23952 	sd_rmv_resv_reclaim_req(dev);
23953 
23954 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
23955 		mutex_enter(SD_MUTEX(un));
23956 		if ((un->un_mhd_token) &&
23957 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
23958 			mutex_exit(SD_MUTEX(un));
23959 			(void) sd_check_mhd(dev, 0);
23960 		} else {
23961 			mutex_exit(SD_MUTEX(un));
23962 		}
23963 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
23964 		    sd_mhd_reset_notify_cb, (caddr_t)un);
23965 	} else {
23966 		/*
23967 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
23968 		 */
23969 		mutex_enter(SD_MUTEX(un));
23970 		un->un_resvd_status = resvd_status_save;
23971 		mutex_exit(SD_MUTEX(un));
23972 	}
23973 	return (rval);
23974 }
23975 
23976 
23977 /*
23978  *    Function: sd_mhdioc_register_devid
23979  *
23980  * Description: This routine is the driver entry point for handling ioctl
23981  *		requests to register the device id (MHIOCREREGISTERDEVID).
23982  *
23983  *		Note: The implementation for this ioctl has been updated to
23984  *		be consistent with the original PSARC case (1999/357)
23985  *		(4375899, 4241671, 4220005)
23986  *
23987  *   Arguments: dev	- the device number
23988  *
23989  * Return Code: 0
23990  *		ENXIO
23991  */
23992 
23993 static int
23994 sd_mhdioc_register_devid(dev_t dev)
23995 {
23996 	struct sd_lun	*un = NULL;
23997 	int		rval = 0;
23998 	sd_ssc_t	*ssc;
23999 
24000 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24001 		return (ENXIO);
24002 	}
24003 
24004 	ASSERT(!mutex_owned(SD_MUTEX(un)));
24005 
24006 	mutex_enter(SD_MUTEX(un));
24007 
24008 	/* If a devid already exists, de-register it */
24009 	if (un->un_devid != NULL) {
24010 		ddi_devid_unregister(SD_DEVINFO(un));
24011 		/*
24012 		 * After unregister devid, needs to free devid memory
24013 		 */
24014 		ddi_devid_free(un->un_devid);
24015 		un->un_devid = NULL;
24016 	}
24017 
24018 	/* Check for reservation conflict */
24019 	mutex_exit(SD_MUTEX(un));
24020 	ssc = sd_ssc_init(un);
24021 	rval = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
24022 	mutex_enter(SD_MUTEX(un));
24023 
24024 	switch (rval) {
24025 	case 0:
24026 		sd_register_devid(ssc, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
24027 		break;
24028 	case EACCES:
24029 		break;
24030 	default:
24031 		rval = EIO;
24032 	}
24033 
24034 	mutex_exit(SD_MUTEX(un));
24035 	if (rval != 0) {
24036 		if (rval == EIO)
24037 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24038 		else
24039 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24040 	}
24041 	sd_ssc_fini(ssc);
24042 	return (rval);
24043 }
24044 
24045 
24046 /*
24047  *    Function: sd_mhdioc_inkeys
24048  *
24049  * Description: This routine is the driver entry point for handling ioctl
24050  *		requests to issue the SCSI-3 Persistent In Read Keys command
24051  *		to the device (MHIOCGRP_INKEYS).
24052  *
24053  *   Arguments: dev	- the device number
24054  *		arg	- user provided in_keys structure
24055  *		flag	- this argument is a pass through to ddi_copyxxx()
24056  *			  directly from the mode argument of ioctl().
24057  *
24058  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
24059  *		ENXIO
24060  *		EFAULT
24061  */
24062 
24063 static int
24064 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
24065 {
24066 	struct sd_lun		*un;
24067 	mhioc_inkeys_t		inkeys;
24068 	int			rval = 0;
24069 
24070 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24071 		return (ENXIO);
24072 	}
24073 
24074 #ifdef _MULTI_DATAMODEL
24075 	switch (ddi_model_convert_from(flag & FMODELS)) {
24076 	case DDI_MODEL_ILP32: {
24077 		struct mhioc_inkeys32	inkeys32;
24078 
24079 		if (ddi_copyin(arg, &inkeys32,
24080 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
24081 			return (EFAULT);
24082 		}
24083 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
24084 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24085 		    &inkeys, flag)) != 0) {
24086 			return (rval);
24087 		}
24088 		inkeys32.generation = inkeys.generation;
24089 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
24090 		    flag) != 0) {
24091 			return (EFAULT);
24092 		}
24093 		break;
24094 	}
24095 	case DDI_MODEL_NONE:
24096 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
24097 		    flag) != 0) {
24098 			return (EFAULT);
24099 		}
24100 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24101 		    &inkeys, flag)) != 0) {
24102 			return (rval);
24103 		}
24104 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
24105 		    flag) != 0) {
24106 			return (EFAULT);
24107 		}
24108 		break;
24109 	}
24110 
24111 #else /* ! _MULTI_DATAMODEL */
24112 
24113 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
24114 		return (EFAULT);
24115 	}
24116 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
24117 	if (rval != 0) {
24118 		return (rval);
24119 	}
24120 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
24121 		return (EFAULT);
24122 	}
24123 
24124 #endif /* _MULTI_DATAMODEL */
24125 
24126 	return (rval);
24127 }
24128 
24129 
24130 /*
24131  *    Function: sd_mhdioc_inresv
24132  *
24133  * Description: This routine is the driver entry point for handling ioctl
24134  *		requests to issue the SCSI-3 Persistent In Read Reservations
24135  *		command to the device (MHIOCGRP_INKEYS).
24136  *
24137  *   Arguments: dev	- the device number
24138  *		arg	- user provided in_resv structure
24139  *		flag	- this argument is a pass through to ddi_copyxxx()
24140  *			  directly from the mode argument of ioctl().
24141  *
24142  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
24143  *		ENXIO
24144  *		EFAULT
24145  */
24146 
24147 static int
24148 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
24149 {
24150 	struct sd_lun		*un;
24151 	mhioc_inresvs_t		inresvs;
24152 	int			rval = 0;
24153 
24154 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24155 		return (ENXIO);
24156 	}
24157 
24158 #ifdef _MULTI_DATAMODEL
24159 
24160 	switch (ddi_model_convert_from(flag & FMODELS)) {
24161 	case DDI_MODEL_ILP32: {
24162 		struct mhioc_inresvs32	inresvs32;
24163 
24164 		if (ddi_copyin(arg, &inresvs32,
24165 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24166 			return (EFAULT);
24167 		}
24168 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
24169 		if ((rval = sd_persistent_reservation_in_read_resv(un,
24170 		    &inresvs, flag)) != 0) {
24171 			return (rval);
24172 		}
24173 		inresvs32.generation = inresvs.generation;
24174 		if (ddi_copyout(&inresvs32, arg,
24175 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24176 			return (EFAULT);
24177 		}
24178 		break;
24179 	}
24180 	case DDI_MODEL_NONE:
24181 		if (ddi_copyin(arg, &inresvs,
24182 		    sizeof (mhioc_inresvs_t), flag) != 0) {
24183 			return (EFAULT);
24184 		}
24185 		if ((rval = sd_persistent_reservation_in_read_resv(un,
24186 		    &inresvs, flag)) != 0) {
24187 			return (rval);
24188 		}
24189 		if (ddi_copyout(&inresvs, arg,
24190 		    sizeof (mhioc_inresvs_t), flag) != 0) {
24191 			return (EFAULT);
24192 		}
24193 		break;
24194 	}
24195 
24196 #else /* ! _MULTI_DATAMODEL */
24197 
24198 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
24199 		return (EFAULT);
24200 	}
24201 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
24202 	if (rval != 0) {
24203 		return (rval);
24204 	}
24205 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
24206 		return (EFAULT);
24207 	}
24208 
24209 #endif /* ! _MULTI_DATAMODEL */
24210 
24211 	return (rval);
24212 }
24213 
24214 
24215 /*
24216  * The following routines support the clustering functionality described below
24217  * and implement lost reservation reclaim functionality.
24218  *
24219  * Clustering
24220  * ----------
24221  * The clustering code uses two different, independent forms of SCSI
24222  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
24223  * Persistent Group Reservations. For any particular disk, it will use either
24224  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
24225  *
24226  * SCSI-2
24227  * The cluster software takes ownership of a multi-hosted disk by issuing the
24228  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
24229  * MHIOCRELEASE ioctl.  Closely related is the MHIOCENFAILFAST ioctl -- a
24230  * cluster, just after taking ownership of the disk with the MHIOCTKOWN ioctl
24231  * then issues the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the
24232  * driver. The meaning of failfast is that if the driver (on this host) ever
24233  * encounters the scsi error return code RESERVATION_CONFLICT from the device,
24234  * it should immediately panic the host. The motivation for this ioctl is that
24235  * if this host does encounter reservation conflict, the underlying cause is
24236  * that some other host of the cluster has decided that this host is no longer
24237  * in the cluster and has seized control of the disks for itself. Since this
24238  * host is no longer in the cluster, it ought to panic itself. The
24239  * MHIOCENFAILFAST ioctl does two things:
24240  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
24241  *      error to panic the host
24242  *      (b) it sets up a periodic timer to test whether this host still has
24243  *      "access" (in that no other host has reserved the device):  if the
24244  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
24245  *      purpose of that periodic timer is to handle scenarios where the host is
24246  *      otherwise temporarily quiescent, temporarily doing no real i/o.
24247  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
24248  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
24249  * the device itself.
24250  *
24251  * SCSI-3 PGR
24252  * A direct semantic implementation of the SCSI-3 Persistent Reservation
24253  * facility is supported through the shared multihost disk ioctls
24254  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
24255  * MHIOCGRP_PREEMPTANDABORT, MHIOCGRP_CLEAR)
24256  *
24257  * Reservation Reclaim:
24258  * --------------------
24259  * To support the lost reservation reclaim operations this driver creates a
24260  * single thread to handle reinstating reservations on all devices that have
24261  * lost reservations sd_resv_reclaim_requests are logged for all devices that
24262  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
24263  * and the reservation reclaim thread loops through the requests to regain the
24264  * lost reservations.
24265  */
24266 
24267 /*
24268  *    Function: sd_check_mhd()
24269  *
24270  * Description: This function sets up and submits a scsi watch request or
24271  *		terminates an existing watch request. This routine is used in
24272  *		support of reservation reclaim.
24273  *
24274  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
24275  *			 among multiple watches that share the callback function
24276  *		interval - the number of microseconds specifying the watch
24277  *			   interval for issuing TEST UNIT READY commands. If
24278  *			   set to 0 the watch should be terminated. If the
24279  *			   interval is set to 0 and if the device is required
24280  *			   to hold reservation while disabling failfast, the
24281  *			   watch is restarted with an interval of
24282  *			   reinstate_resv_delay.
24283  *
24284  * Return Code: 0	   - Successful submit/terminate of scsi watch request
24285  *		ENXIO      - Indicates an invalid device was specified
24286  *		EAGAIN     - Unable to submit the scsi watch request
24287  */
24288 
24289 static int
24290 sd_check_mhd(dev_t dev, int interval)
24291 {
24292 	struct sd_lun	*un;
24293 	opaque_t	token;
24294 
24295 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24296 		return (ENXIO);
24297 	}
24298 
24299 	/* is this a watch termination request? */
24300 	if (interval == 0) {
24301 		mutex_enter(SD_MUTEX(un));
24302 		/* if there is an existing watch task then terminate it */
24303 		if (un->un_mhd_token) {
24304 			token = un->un_mhd_token;
24305 			un->un_mhd_token = NULL;
24306 			mutex_exit(SD_MUTEX(un));
24307 			(void) scsi_watch_request_terminate(token,
24308 			    SCSI_WATCH_TERMINATE_ALL_WAIT);
24309 			mutex_enter(SD_MUTEX(un));
24310 		} else {
24311 			mutex_exit(SD_MUTEX(un));
24312 			/*
24313 			 * Note: If we return here we don't check for the
24314 			 * failfast case. This is the original legacy
24315 			 * implementation but perhaps we should be checking
24316 			 * the failfast case.
24317 			 */
24318 			return (0);
24319 		}
24320 		/*
24321 		 * If the device is required to hold reservation while
24322 		 * disabling failfast, we need to restart the scsi_watch
24323 		 * routine with an interval of reinstate_resv_delay.
24324 		 */
24325 		if (un->un_resvd_status & SD_RESERVE) {
24326 			interval = sd_reinstate_resv_delay / 1000;
24327 		} else {
24328 			/* no failfast so bail */
24329 			mutex_exit(SD_MUTEX(un));
24330 			return (0);
24331 		}
24332 		mutex_exit(SD_MUTEX(un));
24333 	}
24334 
24335 	/*
24336 	 * adjust minimum time interval to 1 second,
24337 	 * and convert from msecs to usecs
24338 	 */
24339 	if (interval > 0 && interval < 1000) {
24340 		interval = 1000;
24341 	}
24342 	interval *= 1000;
24343 
24344 	/*
24345 	 * submit the request to the scsi_watch service
24346 	 */
24347 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
24348 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
24349 	if (token == NULL) {
24350 		return (EAGAIN);
24351 	}
24352 
24353 	/*
24354 	 * save token for termination later on
24355 	 */
24356 	mutex_enter(SD_MUTEX(un));
24357 	un->un_mhd_token = token;
24358 	mutex_exit(SD_MUTEX(un));
24359 	return (0);
24360 }
24361 
24362 
24363 /*
24364  *    Function: sd_mhd_watch_cb()
24365  *
24366  * Description: This function is the call back function used by the scsi watch
24367  *		facility. The scsi watch facility sends the "Test Unit Ready"
24368  *		and processes the status. If applicable (i.e. a "Unit Attention"
24369  *		status and automatic "Request Sense" not used) the scsi watch
24370  *		facility will send a "Request Sense" and retrieve the sense data
24371  *		to be passed to this callback function. In either case the
24372  *		automatic "Request Sense" or the facility submitting one, this
24373  *		callback is passed the status and sense data.
24374  *
24375  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24376  *			among multiple watches that share this callback function
24377  *		resultp - scsi watch facility result packet containing scsi
24378  *			  packet, status byte and sense data
24379  *
24380  * Return Code: 0 - continue the watch task
24381  *		non-zero - terminate the watch task
24382  */
24383 
24384 static int
24385 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24386 {
24387 	struct sd_lun			*un;
24388 	struct scsi_status		*statusp;
24389 	uint8_t				*sensep;
24390 	struct scsi_pkt			*pkt;
24391 	uchar_t				actual_sense_length;
24392 	dev_t				dev = (dev_t)arg;
24393 
24394 	ASSERT(resultp != NULL);
24395 	statusp			= resultp->statusp;
24396 	sensep			= (uint8_t *)resultp->sensep;
24397 	pkt			= resultp->pkt;
24398 	actual_sense_length	= resultp->actual_sense_length;
24399 
24400 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24401 		return (ENXIO);
24402 	}
24403 
24404 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
24405 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
24406 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
24407 
24408 	/* Begin processing of the status and/or sense data */
24409 	if (pkt->pkt_reason != CMD_CMPLT) {
24410 		/* Handle the incomplete packet */
24411 		sd_mhd_watch_incomplete(un, pkt);
24412 		return (0);
24413 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
24414 		if (*((unsigned char *)statusp)
24415 		    == STATUS_RESERVATION_CONFLICT) {
24416 			/*
24417 			 * Handle a reservation conflict by panicking if
24418 			 * configured for failfast or by logging the conflict
24419 			 * and updating the reservation status
24420 			 */
24421 			mutex_enter(SD_MUTEX(un));
24422 			if ((un->un_resvd_status & SD_FAILFAST) &&
24423 			    (sd_failfast_enable)) {
24424 				sd_panic_for_res_conflict(un);
24425 				/*NOTREACHED*/
24426 			}
24427 			SD_INFO(SD_LOG_IOCTL_MHD, un,
24428 			    "sd_mhd_watch_cb: Reservation Conflict\n");
24429 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
24430 			mutex_exit(SD_MUTEX(un));
24431 		}
24432 	}
24433 
24434 	if (sensep != NULL) {
24435 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
24436 			mutex_enter(SD_MUTEX(un));
24437 			if ((scsi_sense_asc(sensep) ==
24438 			    SD_SCSI_RESET_SENSE_CODE) &&
24439 			    (un->un_resvd_status & SD_RESERVE)) {
24440 				/*
24441 				 * The additional sense code indicates a power
24442 				 * on or bus device reset has occurred; update
24443 				 * the reservation status.
24444 				 */
24445 				un->un_resvd_status |=
24446 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
24447 				SD_INFO(SD_LOG_IOCTL_MHD, un,
24448 				    "sd_mhd_watch_cb: Lost Reservation\n");
24449 			}
24450 		} else {
24451 			return (0);
24452 		}
24453 	} else {
24454 		mutex_enter(SD_MUTEX(un));
24455 	}
24456 
24457 	if ((un->un_resvd_status & SD_RESERVE) &&
24458 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
24459 		if (un->un_resvd_status & SD_WANT_RESERVE) {
24460 			/*
24461 			 * A reset occurred in between the last probe and this
24462 			 * one so if a timeout is pending cancel it.
24463 			 */
24464 			if (un->un_resvd_timeid) {
24465 				timeout_id_t temp_id = un->un_resvd_timeid;
24466 				un->un_resvd_timeid = NULL;
24467 				mutex_exit(SD_MUTEX(un));
24468 				(void) untimeout(temp_id);
24469 				mutex_enter(SD_MUTEX(un));
24470 			}
24471 			un->un_resvd_status &= ~SD_WANT_RESERVE;
24472 		}
24473 		if (un->un_resvd_timeid == 0) {
24474 			/* Schedule a timeout to handle the lost reservation */
24475 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
24476 			    (void *)dev,
24477 			    drv_usectohz(sd_reinstate_resv_delay));
24478 		}
24479 	}
24480 	mutex_exit(SD_MUTEX(un));
24481 	return (0);
24482 }
24483 
24484 
24485 /*
24486  *    Function: sd_mhd_watch_incomplete()
24487  *
24488  * Description: This function is used to find out why a scsi pkt sent by the
24489  *		scsi watch facility was not completed. Under some scenarios this
24490  *		routine will return. Otherwise it will send a bus reset to see
24491  *		if the drive is still online.
24492  *
24493  *   Arguments: un  - driver soft state (unit) structure
24494  *		pkt - incomplete scsi pkt
24495  */
24496 
24497 static void
24498 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
24499 {
24500 	int	be_chatty;
24501 	int	perr;
24502 
24503 	ASSERT(pkt != NULL);
24504 	ASSERT(un != NULL);
24505 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
24506 	perr		= (pkt->pkt_statistics & STAT_PERR);
24507 
24508 	mutex_enter(SD_MUTEX(un));
24509 	if (un->un_state == SD_STATE_DUMPING) {
24510 		mutex_exit(SD_MUTEX(un));
24511 		return;
24512 	}
24513 
24514 	switch (pkt->pkt_reason) {
24515 	case CMD_UNX_BUS_FREE:
24516 		/*
24517 		 * If we had a parity error that caused the target to drop BSY*,
24518 		 * don't be chatty about it.
24519 		 */
24520 		if (perr && be_chatty) {
24521 			be_chatty = 0;
24522 		}
24523 		break;
24524 	case CMD_TAG_REJECT:
24525 		/*
24526 		 * The SCSI-2 spec states that a tag reject will be sent by the
24527 		 * target if tagged queuing is not supported. A tag reject may
24528 		 * also be sent during certain initialization periods or to
24529 		 * control internal resources. For the latter case the target
24530 		 * may also return Queue Full.
24531 		 *
24532 		 * If this driver receives a tag reject from a target that is
24533 		 * going through an init period or controlling internal
24534 		 * resources tagged queuing will be disabled. This is a less
24535 		 * than optimal behavior but the driver is unable to determine
24536 		 * the target state and assumes tagged queueing is not supported
24537 		 */
24538 		pkt->pkt_flags = 0;
24539 		un->un_tagflags = 0;
24540 
24541 		if (un->un_f_opt_queueing == TRUE) {
24542 			un->un_throttle = min(un->un_throttle, 3);
24543 		} else {
24544 			un->un_throttle = 1;
24545 		}
24546 		mutex_exit(SD_MUTEX(un));
24547 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
24548 		mutex_enter(SD_MUTEX(un));
24549 		break;
24550 	case CMD_INCOMPLETE:
24551 		/*
24552 		 * The transport stopped with an abnormal state, fallthrough and
24553 		 * reset the target and/or bus unless selection did not complete
24554 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
24555 		 * go through a target/bus reset
24556 		 */
24557 		if (pkt->pkt_state == STATE_GOT_BUS) {
24558 			break;
24559 		}
24560 		/*FALLTHROUGH*/
24561 
24562 	case CMD_TIMEOUT:
24563 	default:
24564 		/*
24565 		 * The lun may still be running the command, so a lun reset
24566 		 * should be attempted. If the lun reset fails or cannot be
24567 		 * issued, than try a target reset. Lastly try a bus reset.
24568 		 */
24569 		if ((pkt->pkt_statistics &
24570 		    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) == 0) {
24571 			int reset_retval = 0;
24572 			mutex_exit(SD_MUTEX(un));
24573 			if (un->un_f_allow_bus_device_reset == TRUE) {
24574 				if (un->un_f_lun_reset_enabled == TRUE) {
24575 					reset_retval =
24576 					    scsi_reset(SD_ADDRESS(un),
24577 					    RESET_LUN);
24578 				}
24579 				if (reset_retval == 0) {
24580 					reset_retval =
24581 					    scsi_reset(SD_ADDRESS(un),
24582 					    RESET_TARGET);
24583 				}
24584 			}
24585 			if (reset_retval == 0) {
24586 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
24587 			}
24588 			mutex_enter(SD_MUTEX(un));
24589 		}
24590 		break;
24591 	}
24592 
24593 	/* A device/bus reset has occurred; update the reservation status. */
24594 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
24595 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
24596 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
24597 			un->un_resvd_status |=
24598 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
24599 			SD_INFO(SD_LOG_IOCTL_MHD, un,
24600 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
24601 		}
24602 	}
24603 
24604 	/*
24605 	 * The disk has been turned off; Update the device state.
24606 	 *
24607 	 * Note: Should we be offlining the disk here?
24608 	 */
24609 	if (pkt->pkt_state == STATE_GOT_BUS) {
24610 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
24611 		    "Disk not responding to selection\n");
24612 		if (un->un_state != SD_STATE_OFFLINE) {
24613 			New_state(un, SD_STATE_OFFLINE);
24614 		}
24615 	} else if (be_chatty) {
24616 		/*
24617 		 * suppress messages if they are all the same pkt reason;
24618 		 * with TQ, many (up to 256) are returned with the same
24619 		 * pkt_reason
24620 		 */
24621 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
24622 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
24623 			    "sd_mhd_watch_incomplete: "
24624 			    "SCSI transport failed: reason '%s'\n",
24625 			    scsi_rname(pkt->pkt_reason));
24626 		}
24627 	}
24628 	un->un_last_pkt_reason = pkt->pkt_reason;
24629 	mutex_exit(SD_MUTEX(un));
24630 }
24631 
24632 
24633 /*
24634  *    Function: sd_sname()
24635  *
24636  * Description: This is a simple little routine to return a string containing
24637  *		a printable description of command status byte for use in
24638  *		logging.
24639  *
24640  *   Arguments: status - pointer to a status byte
24641  *
24642  * Return Code: char * - string containing status description.
24643  */
24644 
24645 static char *
24646 sd_sname(uchar_t status)
24647 {
24648 	switch (status & STATUS_MASK) {
24649 	case STATUS_GOOD:
24650 		return ("good status");
24651 	case STATUS_CHECK:
24652 		return ("check condition");
24653 	case STATUS_MET:
24654 		return ("condition met");
24655 	case STATUS_BUSY:
24656 		return ("busy");
24657 	case STATUS_INTERMEDIATE:
24658 		return ("intermediate");
24659 	case STATUS_INTERMEDIATE_MET:
24660 		return ("intermediate - condition met");
24661 	case STATUS_RESERVATION_CONFLICT:
24662 		return ("reservation_conflict");
24663 	case STATUS_TERMINATED:
24664 		return ("command terminated");
24665 	case STATUS_QFULL:
24666 		return ("queue full");
24667 	default:
24668 		return ("<unknown status>");
24669 	}
24670 }
24671 
24672 
24673 /*
24674  *    Function: sd_mhd_resvd_recover()
24675  *
24676  * Description: This function adds a reservation entry to the
24677  *		sd_resv_reclaim_request list and signals the reservation
24678  *		reclaim thread that there is work pending. If the reservation
24679  *		reclaim thread has not been previously created this function
24680  *		will kick it off.
24681  *
24682  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24683  *			among multiple watches that share this callback function
24684  *
24685  *     Context: This routine is called by timeout() and is run in interrupt
24686  *		context. It must not sleep or call other functions which may
24687  *		sleep.
24688  */
24689 
24690 static void
24691 sd_mhd_resvd_recover(void *arg)
24692 {
24693 	dev_t			dev = (dev_t)arg;
24694 	struct sd_lun		*un;
24695 	struct sd_thr_request	*sd_treq = NULL;
24696 	struct sd_thr_request	*sd_cur = NULL;
24697 	struct sd_thr_request	*sd_prev = NULL;
24698 	int			already_there = 0;
24699 
24700 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24701 		return;
24702 	}
24703 
24704 	mutex_enter(SD_MUTEX(un));
24705 	un->un_resvd_timeid = NULL;
24706 	if (un->un_resvd_status & SD_WANT_RESERVE) {
24707 		/*
24708 		 * There was a reset so don't issue the reserve, allow the
24709 		 * sd_mhd_watch_cb callback function to notice this and
24710 		 * reschedule the timeout for reservation.
24711 		 */
24712 		mutex_exit(SD_MUTEX(un));
24713 		return;
24714 	}
24715 	mutex_exit(SD_MUTEX(un));
24716 
24717 	/*
24718 	 * Add this device to the sd_resv_reclaim_request list and the
24719 	 * sd_resv_reclaim_thread should take care of the rest.
24720 	 *
24721 	 * Note: We can't sleep in this context so if the memory allocation
24722 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
24723 	 * reschedule the timeout for reservation.  (4378460)
24724 	 */
24725 	sd_treq = (struct sd_thr_request *)
24726 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
24727 	if (sd_treq == NULL) {
24728 		return;
24729 	}
24730 
24731 	sd_treq->sd_thr_req_next = NULL;
24732 	sd_treq->dev = dev;
24733 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24734 	if (sd_tr.srq_thr_req_head == NULL) {
24735 		sd_tr.srq_thr_req_head = sd_treq;
24736 	} else {
24737 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
24738 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
24739 			if (sd_cur->dev == dev) {
24740 				/*
24741 				 * already in Queue so don't log
24742 				 * another request for the device
24743 				 */
24744 				already_there = 1;
24745 				break;
24746 			}
24747 			sd_prev = sd_cur;
24748 		}
24749 		if (!already_there) {
24750 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
24751 			    "logging request for %lx\n", dev);
24752 			sd_prev->sd_thr_req_next = sd_treq;
24753 		} else {
24754 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
24755 		}
24756 	}
24757 
24758 	/*
24759 	 * Create a kernel thread to do the reservation reclaim and free up this
24760 	 * thread. We cannot block this thread while we go away to do the
24761 	 * reservation reclaim
24762 	 */
24763 	if (sd_tr.srq_resv_reclaim_thread == NULL)
24764 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
24765 		    sd_resv_reclaim_thread, NULL,
24766 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
24767 
24768 	/* Tell the reservation reclaim thread that it has work to do */
24769 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
24770 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24771 }
24772 
24773 /*
24774  *    Function: sd_resv_reclaim_thread()
24775  *
24776  * Description: This function implements the reservation reclaim operations
24777  *
24778  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
24779  *		      among multiple watches that share this callback function
24780  */
24781 
24782 static void
24783 sd_resv_reclaim_thread()
24784 {
24785 	struct sd_lun		*un;
24786 	struct sd_thr_request	*sd_mhreq;
24787 
24788 	/* Wait for work */
24789 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24790 	if (sd_tr.srq_thr_req_head == NULL) {
24791 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
24792 		    &sd_tr.srq_resv_reclaim_mutex);
24793 	}
24794 
24795 	/* Loop while we have work */
24796 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
24797 		un = ddi_get_soft_state(sd_state,
24798 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
24799 		if (un == NULL) {
24800 			/*
24801 			 * softstate structure is NULL so just
24802 			 * dequeue the request and continue
24803 			 */
24804 			sd_tr.srq_thr_req_head =
24805 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
24806 			kmem_free(sd_tr.srq_thr_cur_req,
24807 			    sizeof (struct sd_thr_request));
24808 			continue;
24809 		}
24810 
24811 		/* dequeue the request */
24812 		sd_mhreq = sd_tr.srq_thr_cur_req;
24813 		sd_tr.srq_thr_req_head =
24814 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
24815 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24816 
24817 		/*
24818 		 * Reclaim reservation only if SD_RESERVE is still set. There
24819 		 * may have been a call to MHIOCRELEASE before we got here.
24820 		 */
24821 		mutex_enter(SD_MUTEX(un));
24822 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
24823 			/*
24824 			 * Note: The SD_LOST_RESERVE flag is cleared before
24825 			 * reclaiming the reservation. If this is done after the
24826 			 * call to sd_reserve_release a reservation loss in the
24827 			 * window between pkt completion of reserve cmd and
24828 			 * mutex_enter below may not be recognized
24829 			 */
24830 			un->un_resvd_status &= ~SD_LOST_RESERVE;
24831 			mutex_exit(SD_MUTEX(un));
24832 
24833 			if (sd_reserve_release(sd_mhreq->dev,
24834 			    SD_RESERVE) == 0) {
24835 				mutex_enter(SD_MUTEX(un));
24836 				un->un_resvd_status |= SD_RESERVE;
24837 				mutex_exit(SD_MUTEX(un));
24838 				SD_INFO(SD_LOG_IOCTL_MHD, un,
24839 				    "sd_resv_reclaim_thread: "
24840 				    "Reservation Recovered\n");
24841 			} else {
24842 				mutex_enter(SD_MUTEX(un));
24843 				un->un_resvd_status |= SD_LOST_RESERVE;
24844 				mutex_exit(SD_MUTEX(un));
24845 				SD_INFO(SD_LOG_IOCTL_MHD, un,
24846 				    "sd_resv_reclaim_thread: Failed "
24847 				    "Reservation Recovery\n");
24848 			}
24849 		} else {
24850 			mutex_exit(SD_MUTEX(un));
24851 		}
24852 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24853 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
24854 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
24855 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
24856 		/*
24857 		 * wakeup the destroy thread if anyone is waiting on
24858 		 * us to complete.
24859 		 */
24860 		cv_signal(&sd_tr.srq_inprocess_cv);
24861 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
24862 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
24863 	}
24864 
24865 	/*
24866 	 * cleanup the sd_tr structure now that this thread will not exist
24867 	 */
24868 	ASSERT(sd_tr.srq_thr_req_head == NULL);
24869 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
24870 	sd_tr.srq_resv_reclaim_thread = NULL;
24871 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24872 	thread_exit();
24873 }
24874 
24875 
24876 /*
24877  *    Function: sd_rmv_resv_reclaim_req()
24878  *
24879  * Description: This function removes any pending reservation reclaim requests
24880  *		for the specified device.
24881  *
24882  *   Arguments: dev - the device 'dev_t'
24883  */
24884 
24885 static void
24886 sd_rmv_resv_reclaim_req(dev_t dev)
24887 {
24888 	struct sd_thr_request *sd_mhreq;
24889 	struct sd_thr_request *sd_prev;
24890 
24891 	/* Remove a reservation reclaim request from the list */
24892 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24893 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
24894 		/*
24895 		 * We are attempting to reinstate reservation for
24896 		 * this device. We wait for sd_reserve_release()
24897 		 * to return before we return.
24898 		 */
24899 		cv_wait(&sd_tr.srq_inprocess_cv,
24900 		    &sd_tr.srq_resv_reclaim_mutex);
24901 	} else {
24902 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
24903 		if (sd_mhreq && sd_mhreq->dev == dev) {
24904 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
24905 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
24906 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24907 			return;
24908 		}
24909 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
24910 			if (sd_mhreq && sd_mhreq->dev == dev) {
24911 				break;
24912 			}
24913 			sd_prev = sd_mhreq;
24914 		}
24915 		if (sd_mhreq != NULL) {
24916 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
24917 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
24918 		}
24919 	}
24920 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24921 }
24922 
24923 
24924 /*
24925  *    Function: sd_mhd_reset_notify_cb()
24926  *
24927  * Description: This is a call back function for scsi_reset_notify. This
24928  *		function updates the softstate reserved status and logs the
24929  *		reset. The driver scsi watch facility callback function
24930  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
24931  *		will reclaim the reservation.
24932  *
24933  *   Arguments: arg  - driver soft state (unit) structure
24934  */
24935 
24936 static void
24937 sd_mhd_reset_notify_cb(caddr_t arg)
24938 {
24939 	struct sd_lun *un = (struct sd_lun *)arg;
24940 
24941 	mutex_enter(SD_MUTEX(un));
24942 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
24943 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
24944 		SD_INFO(SD_LOG_IOCTL_MHD, un,
24945 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
24946 	}
24947 	mutex_exit(SD_MUTEX(un));
24948 }
24949 
24950 
24951 /*
24952  *    Function: sd_take_ownership()
24953  *
24954  * Description: This routine implements an algorithm to achieve a stable
24955  *		reservation on disks which don't implement priority reserve,
24956  *		and makes sure that other host lose re-reservation attempts.
24957  *		This algorithm contains of a loop that keeps issuing the RESERVE
24958  *		for some period of time (min_ownership_delay, default 6 seconds)
24959  *		During that loop, it looks to see if there has been a bus device
24960  *		reset or bus reset (both of which cause an existing reservation
24961  *		to be lost). If the reservation is lost issue RESERVE until a
24962  *		period of min_ownership_delay with no resets has gone by, or
24963  *		until max_ownership_delay has expired. This loop ensures that
24964  *		the host really did manage to reserve the device, in spite of
24965  *		resets. The looping for min_ownership_delay (default six
24966  *		seconds) is important to early generation clustering products,
24967  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
24968  *		MHIOCENFAILFAST periodic timer of two seconds. By having
24969  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
24970  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
24971  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
24972  *		have already noticed, via the MHIOCENFAILFAST polling, that it
24973  *		no longer "owns" the disk and will have panicked itself.  Thus,
24974  *		the host issuing the MHIOCTKOWN is assured (with timing
24975  *		dependencies) that by the time it actually starts to use the
24976  *		disk for real work, the old owner is no longer accessing it.
24977  *
24978  *		min_ownership_delay is the minimum amount of time for which the
24979  *		disk must be reserved continuously devoid of resets before the
24980  *		MHIOCTKOWN ioctl will return success.
24981  *
24982  *		max_ownership_delay indicates the amount of time by which the
24983  *		take ownership should succeed or timeout with an error.
24984  *
24985  *   Arguments: dev - the device 'dev_t'
24986  *		*p  - struct containing timing info.
24987  *
24988  * Return Code: 0 for success or error code
24989  */
24990 
24991 static int
24992 sd_take_ownership(dev_t dev, struct mhioctkown *p)
24993 {
24994 	struct sd_lun	*un;
24995 	int		rval;
24996 	int		err;
24997 	int		reservation_count   = 0;
24998 	int		min_ownership_delay =  6000000; /* in usec */
24999 	int		max_ownership_delay = 30000000; /* in usec */
25000 	clock_t		start_time;	/* starting time of this algorithm */
25001 	clock_t		end_time;	/* time limit for giving up */
25002 	clock_t		ownership_time;	/* time limit for stable ownership */
25003 	clock_t		current_time;
25004 	clock_t		previous_current_time;
25005 
25006 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25007 		return (ENXIO);
25008 	}
25009 
25010 	/*
25011 	 * Attempt a device reservation. A priority reservation is requested.
25012 	 */
25013 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
25014 	    != SD_SUCCESS) {
25015 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25016 		    "sd_take_ownership: return(1)=%d\n", rval);
25017 		return (rval);
25018 	}
25019 
25020 	/* Update the softstate reserved status to indicate the reservation */
25021 	mutex_enter(SD_MUTEX(un));
25022 	un->un_resvd_status |= SD_RESERVE;
25023 	un->un_resvd_status &=
25024 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
25025 	mutex_exit(SD_MUTEX(un));
25026 
25027 	if (p != NULL) {
25028 		if (p->min_ownership_delay != 0) {
25029 			min_ownership_delay = p->min_ownership_delay * 1000;
25030 		}
25031 		if (p->max_ownership_delay != 0) {
25032 			max_ownership_delay = p->max_ownership_delay * 1000;
25033 		}
25034 	}
25035 	SD_INFO(SD_LOG_IOCTL_MHD, un,
25036 	    "sd_take_ownership: min, max delays: %d, %d\n",
25037 	    min_ownership_delay, max_ownership_delay);
25038 
25039 	start_time = ddi_get_lbolt();
25040 	current_time	= start_time;
25041 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
25042 	end_time	= start_time + drv_usectohz(max_ownership_delay);
25043 
25044 	while (current_time - end_time < 0) {
25045 		delay(drv_usectohz(500000));
25046 
25047 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
25048 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
25049 				mutex_enter(SD_MUTEX(un));
25050 				rval = (un->un_resvd_status &
25051 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
25052 				mutex_exit(SD_MUTEX(un));
25053 				break;
25054 			}
25055 		}
25056 		previous_current_time = current_time;
25057 		current_time = ddi_get_lbolt();
25058 		mutex_enter(SD_MUTEX(un));
25059 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
25060 			ownership_time = ddi_get_lbolt() +
25061 			    drv_usectohz(min_ownership_delay);
25062 			reservation_count = 0;
25063 		} else {
25064 			reservation_count++;
25065 		}
25066 		un->un_resvd_status |= SD_RESERVE;
25067 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
25068 		mutex_exit(SD_MUTEX(un));
25069 
25070 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25071 		    "sd_take_ownership: ticks for loop iteration=%ld, "
25072 		    "reservation=%s\n", (current_time - previous_current_time),
25073 		    reservation_count ? "ok" : "reclaimed");
25074 
25075 		if (current_time - ownership_time >= 0 &&
25076 		    reservation_count >= 4) {
25077 			rval = 0; /* Achieved a stable ownership */
25078 			break;
25079 		}
25080 		if (current_time - end_time >= 0) {
25081 			rval = EACCES; /* No ownership in max possible time */
25082 			break;
25083 		}
25084 	}
25085 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25086 	    "sd_take_ownership: return(2)=%d\n", rval);
25087 	return (rval);
25088 }
25089 
25090 
25091 /*
25092  *    Function: sd_reserve_release()
25093  *
25094  * Description: This function builds and sends scsi RESERVE, RELEASE, and
25095  *		PRIORITY RESERVE commands based on a user specified command type
25096  *
25097  *   Arguments: dev - the device 'dev_t'
25098  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
25099  *		      SD_RESERVE, SD_RELEASE
25100  *
25101  * Return Code: 0 or Error Code
25102  */
25103 
25104 static int
25105 sd_reserve_release(dev_t dev, int cmd)
25106 {
25107 	struct uscsi_cmd	*com = NULL;
25108 	struct sd_lun		*un = NULL;
25109 	char			cdb[CDB_GROUP0];
25110 	int			rval;
25111 
25112 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
25113 	    (cmd == SD_PRIORITY_RESERVE));
25114 
25115 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25116 		return (ENXIO);
25117 	}
25118 
25119 	/* instantiate and initialize the command and cdb */
25120 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25121 	bzero(cdb, CDB_GROUP0);
25122 	com->uscsi_flags   = USCSI_SILENT;
25123 	com->uscsi_timeout = un->un_reserve_release_time;
25124 	com->uscsi_cdblen  = CDB_GROUP0;
25125 	com->uscsi_cdb	   = cdb;
25126 	if (cmd == SD_RELEASE) {
25127 		cdb[0] = SCMD_RELEASE;
25128 	} else {
25129 		cdb[0] = SCMD_RESERVE;
25130 	}
25131 
25132 	/* Send the command. */
25133 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
25134 	    SD_PATH_STANDARD);
25135 
25136 	/*
25137 	 * "break" a reservation that is held by another host, by issuing a
25138 	 * reset if priority reserve is desired, and we could not get the
25139 	 * device.
25140 	 */
25141 	if ((cmd == SD_PRIORITY_RESERVE) &&
25142 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25143 		/*
25144 		 * First try to reset the LUN. If we cannot, then try a target
25145 		 * reset, followed by a bus reset if the target reset fails.
25146 		 */
25147 		int reset_retval = 0;
25148 		if (un->un_f_lun_reset_enabled == TRUE) {
25149 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
25150 		}
25151 		if (reset_retval == 0) {
25152 			/* The LUN reset either failed or was not issued */
25153 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
25154 		}
25155 		if ((reset_retval == 0) &&
25156 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
25157 			rval = EIO;
25158 			kmem_free(com, sizeof (*com));
25159 			return (rval);
25160 		}
25161 
25162 		bzero(com, sizeof (struct uscsi_cmd));
25163 		com->uscsi_flags   = USCSI_SILENT;
25164 		com->uscsi_cdb	   = cdb;
25165 		com->uscsi_cdblen  = CDB_GROUP0;
25166 		com->uscsi_timeout = 5;
25167 
25168 		/*
25169 		 * Reissue the last reserve command, this time without request
25170 		 * sense.  Assume that it is just a regular reserve command.
25171 		 */
25172 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
25173 		    SD_PATH_STANDARD);
25174 	}
25175 
25176 	/* Return an error if still getting a reservation conflict. */
25177 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25178 		rval = EACCES;
25179 	}
25180 
25181 	kmem_free(com, sizeof (*com));
25182 	return (rval);
25183 }
25184 
25185 
25186 #define	SD_NDUMP_RETRIES	12
25187 /*
25188  *	System Crash Dump routine
25189  */
25190 
25191 static int
25192 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
25193 {
25194 	int		instance;
25195 	int		partition;
25196 	int		i;
25197 	int		err;
25198 	struct sd_lun	*un;
25199 	struct scsi_pkt *wr_pktp;
25200 	struct buf	*wr_bp;
25201 	struct buf	wr_buf;
25202 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
25203 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
25204 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
25205 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
25206 	size_t		io_start_offset;
25207 	int		doing_rmw = FALSE;
25208 	int		rval;
25209 	ssize_t		dma_resid;
25210 	daddr_t		oblkno;
25211 	diskaddr_t	nblks = 0;
25212 	diskaddr_t	start_block;
25213 
25214 	instance = SDUNIT(dev);
25215 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
25216 	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
25217 		return (ENXIO);
25218 	}
25219 
25220 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
25221 
25222 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
25223 
25224 	partition = SDPART(dev);
25225 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
25226 
25227 	if (!(NOT_DEVBSIZE(un))) {
25228 		int secmask = 0;
25229 		int blknomask = 0;
25230 
25231 		blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
25232 		secmask = un->un_tgt_blocksize - 1;
25233 
25234 		if (blkno & blknomask) {
25235 			SD_TRACE(SD_LOG_DUMP, un,
25236 			    "sddump: dump start block not modulo %d\n",
25237 			    un->un_tgt_blocksize);
25238 			return (EINVAL);
25239 		}
25240 
25241 		if ((nblk * DEV_BSIZE) & secmask) {
25242 			SD_TRACE(SD_LOG_DUMP, un,
25243 			    "sddump: dump length not modulo %d\n",
25244 			    un->un_tgt_blocksize);
25245 			return (EINVAL);
25246 		}
25247 
25248 	}
25249 
25250 	/* Validate blocks to dump at against partition size. */
25251 
25252 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
25253 	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
25254 
25255 	if (NOT_DEVBSIZE(un)) {
25256 		if ((blkno + nblk) > nblks) {
25257 			SD_TRACE(SD_LOG_DUMP, un,
25258 			    "sddump: dump range larger than partition: "
25259 			    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
25260 			    blkno, nblk, nblks);
25261 			return (EINVAL);
25262 		}
25263 	} else {
25264 		if (((blkno / (un->un_tgt_blocksize / DEV_BSIZE)) +
25265 		    (nblk / (un->un_tgt_blocksize / DEV_BSIZE))) > nblks) {
25266 			SD_TRACE(SD_LOG_DUMP, un,
25267 			    "sddump: dump range larger than partition: "
25268 			    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
25269 			    blkno, nblk, nblks);
25270 			return (EINVAL);
25271 		}
25272 	}
25273 
25274 	mutex_enter(&un->un_pm_mutex);
25275 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
25276 		struct scsi_pkt *start_pktp;
25277 
25278 		mutex_exit(&un->un_pm_mutex);
25279 
25280 		/*
25281 		 * use pm framework to power on HBA 1st
25282 		 */
25283 		(void) pm_raise_power(SD_DEVINFO(un), 0,
25284 		    SD_PM_STATE_ACTIVE(un));
25285 
25286 		/*
25287 		 * Dump no long uses sdpower to power on a device, it's
25288 		 * in-line here so it can be done in polled mode.
25289 		 */
25290 
25291 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
25292 
25293 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
25294 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
25295 
25296 		if (start_pktp == NULL) {
25297 			/* We were not given a SCSI packet, fail. */
25298 			return (EIO);
25299 		}
25300 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
25301 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
25302 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
25303 		start_pktp->pkt_flags = FLAG_NOINTR;
25304 
25305 		mutex_enter(SD_MUTEX(un));
25306 		SD_FILL_SCSI1_LUN(un, start_pktp);
25307 		mutex_exit(SD_MUTEX(un));
25308 		/*
25309 		 * Scsi_poll returns 0 (success) if the command completes and
25310 		 * the status block is STATUS_GOOD.
25311 		 */
25312 		if (sd_scsi_poll(un, start_pktp) != 0) {
25313 			scsi_destroy_pkt(start_pktp);
25314 			return (EIO);
25315 		}
25316 		scsi_destroy_pkt(start_pktp);
25317 		(void) sd_pm_state_change(un, SD_PM_STATE_ACTIVE(un),
25318 		    SD_PM_STATE_CHANGE);
25319 	} else {
25320 		mutex_exit(&un->un_pm_mutex);
25321 	}
25322 
25323 	mutex_enter(SD_MUTEX(un));
25324 	un->un_throttle = 0;
25325 
25326 	/*
25327 	 * The first time through, reset the specific target device.
25328 	 * However, when cpr calls sddump we know that sd is in a
25329 	 * a good state so no bus reset is required.
25330 	 * Clear sense data via Request Sense cmd.
25331 	 * In sddump we don't care about allow_bus_device_reset anymore
25332 	 */
25333 
25334 	if ((un->un_state != SD_STATE_SUSPENDED) &&
25335 	    (un->un_state != SD_STATE_DUMPING)) {
25336 
25337 		New_state(un, SD_STATE_DUMPING);
25338 
25339 		if (un->un_f_is_fibre == FALSE) {
25340 			mutex_exit(SD_MUTEX(un));
25341 			/*
25342 			 * Attempt a bus reset for parallel scsi.
25343 			 *
25344 			 * Note: A bus reset is required because on some host
25345 			 * systems (i.e. E420R) a bus device reset is
25346 			 * insufficient to reset the state of the target.
25347 			 *
25348 			 * Note: Don't issue the reset for fibre-channel,
25349 			 * because this tends to hang the bus (loop) for
25350 			 * too long while everyone is logging out and in
25351 			 * and the deadman timer for dumping will fire
25352 			 * before the dump is complete.
25353 			 */
25354 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
25355 				mutex_enter(SD_MUTEX(un));
25356 				Restore_state(un);
25357 				mutex_exit(SD_MUTEX(un));
25358 				return (EIO);
25359 			}
25360 
25361 			/* Delay to give the device some recovery time. */
25362 			drv_usecwait(10000);
25363 
25364 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
25365 				SD_INFO(SD_LOG_DUMP, un,
25366 				    "sddump: sd_send_polled_RQS failed\n");
25367 			}
25368 			mutex_enter(SD_MUTEX(un));
25369 		}
25370 	}
25371 
25372 	/*
25373 	 * Convert the partition-relative block number to a
25374 	 * disk physical block number.
25375 	 */
25376 	if (NOT_DEVBSIZE(un)) {
25377 		blkno += start_block;
25378 	} else {
25379 		blkno = blkno / (un->un_tgt_blocksize / DEV_BSIZE);
25380 		blkno += start_block;
25381 	}
25382 
25383 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
25384 
25385 
25386 	/*
25387 	 * Check if the device has a non-512 block size.
25388 	 */
25389 	wr_bp = NULL;
25390 	if (NOT_DEVBSIZE(un)) {
25391 		tgt_byte_offset = blkno * un->un_sys_blocksize;
25392 		tgt_byte_count = nblk * un->un_sys_blocksize;
25393 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
25394 		    (tgt_byte_count % un->un_tgt_blocksize)) {
25395 			doing_rmw = TRUE;
25396 			/*
25397 			 * Calculate the block number and number of block
25398 			 * in terms of the media block size.
25399 			 */
25400 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
25401 			tgt_nblk =
25402 			    ((tgt_byte_offset + tgt_byte_count +
25403 			    (un->un_tgt_blocksize - 1)) /
25404 			    un->un_tgt_blocksize) - tgt_blkno;
25405 
25406 			/*
25407 			 * Invoke the routine which is going to do read part
25408 			 * of read-modify-write.
25409 			 * Note that this routine returns a pointer to
25410 			 * a valid bp in wr_bp.
25411 			 */
25412 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
25413 			    &wr_bp);
25414 			if (err) {
25415 				mutex_exit(SD_MUTEX(un));
25416 				return (err);
25417 			}
25418 			/*
25419 			 * Offset is being calculated as -
25420 			 * (original block # * system block size) -
25421 			 * (new block # * target block size)
25422 			 */
25423 			io_start_offset =
25424 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
25425 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
25426 
25427 			ASSERT(io_start_offset < un->un_tgt_blocksize);
25428 			/*
25429 			 * Do the modify portion of read modify write.
25430 			 */
25431 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
25432 			    (size_t)nblk * un->un_sys_blocksize);
25433 		} else {
25434 			doing_rmw = FALSE;
25435 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
25436 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
25437 		}
25438 
25439 		/* Convert blkno and nblk to target blocks */
25440 		blkno = tgt_blkno;
25441 		nblk = tgt_nblk;
25442 	} else {
25443 		wr_bp = &wr_buf;
25444 		bzero(wr_bp, sizeof (struct buf));
25445 		wr_bp->b_flags		= B_BUSY;
25446 		wr_bp->b_un.b_addr	= addr;
25447 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
25448 		wr_bp->b_resid		= 0;
25449 	}
25450 
25451 	mutex_exit(SD_MUTEX(un));
25452 
25453 	/*
25454 	 * Obtain a SCSI packet for the write command.
25455 	 * It should be safe to call the allocator here without
25456 	 * worrying about being locked for DVMA mapping because
25457 	 * the address we're passed is already a DVMA mapping
25458 	 *
25459 	 * We are also not going to worry about semaphore ownership
25460 	 * in the dump buffer. Dumping is single threaded at present.
25461 	 */
25462 
25463 	wr_pktp = NULL;
25464 
25465 	dma_resid = wr_bp->b_bcount;
25466 	oblkno = blkno;
25467 
25468 	if (!(NOT_DEVBSIZE(un))) {
25469 		nblk = nblk / (un->un_tgt_blocksize / DEV_BSIZE);
25470 	}
25471 
25472 	while (dma_resid != 0) {
25473 
25474 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
25475 		wr_bp->b_flags &= ~B_ERROR;
25476 
25477 		if (un->un_partial_dma_supported == 1) {
25478 			blkno = oblkno +
25479 			    ((wr_bp->b_bcount - dma_resid) /
25480 			    un->un_tgt_blocksize);
25481 			nblk = dma_resid / un->un_tgt_blocksize;
25482 
25483 			if (wr_pktp) {
25484 				/*
25485 				 * Partial DMA transfers after initial transfer
25486 				 */
25487 				rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
25488 				    blkno, nblk);
25489 			} else {
25490 				/* Initial transfer */
25491 				rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
25492 				    un->un_pkt_flags, NULL_FUNC, NULL,
25493 				    blkno, nblk);
25494 			}
25495 		} else {
25496 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
25497 			    0, NULL_FUNC, NULL, blkno, nblk);
25498 		}
25499 
25500 		if (rval == 0) {
25501 			/* We were given a SCSI packet, continue. */
25502 			break;
25503 		}
25504 
25505 		if (i == 0) {
25506 			if (wr_bp->b_flags & B_ERROR) {
25507 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25508 				    "no resources for dumping; "
25509 				    "error code: 0x%x, retrying",
25510 				    geterror(wr_bp));
25511 			} else {
25512 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25513 				    "no resources for dumping; retrying");
25514 			}
25515 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
25516 			if (wr_bp->b_flags & B_ERROR) {
25517 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25518 				    "no resources for dumping; error code: "
25519 				    "0x%x, retrying\n", geterror(wr_bp));
25520 			}
25521 		} else {
25522 			if (wr_bp->b_flags & B_ERROR) {
25523 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25524 				    "no resources for dumping; "
25525 				    "error code: 0x%x, retries failed, "
25526 				    "giving up.\n", geterror(wr_bp));
25527 			} else {
25528 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25529 				    "no resources for dumping; "
25530 				    "retries failed, giving up.\n");
25531 			}
25532 			mutex_enter(SD_MUTEX(un));
25533 			Restore_state(un);
25534 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
25535 				mutex_exit(SD_MUTEX(un));
25536 				scsi_free_consistent_buf(wr_bp);
25537 			} else {
25538 				mutex_exit(SD_MUTEX(un));
25539 			}
25540 			return (EIO);
25541 		}
25542 		drv_usecwait(10000);
25543 	}
25544 
25545 	if (un->un_partial_dma_supported == 1) {
25546 		/*
25547 		 * save the resid from PARTIAL_DMA
25548 		 */
25549 		dma_resid = wr_pktp->pkt_resid;
25550 		if (dma_resid != 0)
25551 			nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
25552 		wr_pktp->pkt_resid = 0;
25553 	} else {
25554 		dma_resid = 0;
25555 	}
25556 
25557 	/* SunBug 1222170 */
25558 	wr_pktp->pkt_flags = FLAG_NOINTR;
25559 
25560 	err = EIO;
25561 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
25562 
25563 		/*
25564 		 * Scsi_poll returns 0 (success) if the command completes and
25565 		 * the status block is STATUS_GOOD.  We should only check
25566 		 * errors if this condition is not true.  Even then we should
25567 		 * send our own request sense packet only if we have a check
25568 		 * condition and auto request sense has not been performed by
25569 		 * the hba.
25570 		 */
25571 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
25572 
25573 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
25574 		    (wr_pktp->pkt_resid == 0)) {
25575 			err = SD_SUCCESS;
25576 			break;
25577 		}
25578 
25579 		/*
25580 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
25581 		 */
25582 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
25583 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25584 			    "Error while dumping state...Device is gone\n");
25585 			break;
25586 		}
25587 
25588 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
25589 			SD_INFO(SD_LOG_DUMP, un,
25590 			    "sddump: write failed with CHECK, try # %d\n", i);
25591 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
25592 				(void) sd_send_polled_RQS(un);
25593 			}
25594 
25595 			continue;
25596 		}
25597 
25598 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
25599 			int reset_retval = 0;
25600 
25601 			SD_INFO(SD_LOG_DUMP, un,
25602 			    "sddump: write failed with BUSY, try # %d\n", i);
25603 
25604 			if (un->un_f_lun_reset_enabled == TRUE) {
25605 				reset_retval = scsi_reset(SD_ADDRESS(un),
25606 				    RESET_LUN);
25607 			}
25608 			if (reset_retval == 0) {
25609 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
25610 			}
25611 			(void) sd_send_polled_RQS(un);
25612 
25613 		} else {
25614 			SD_INFO(SD_LOG_DUMP, un,
25615 			    "sddump: write failed with 0x%x, try # %d\n",
25616 			    SD_GET_PKT_STATUS(wr_pktp), i);
25617 			mutex_enter(SD_MUTEX(un));
25618 			sd_reset_target(un, wr_pktp);
25619 			mutex_exit(SD_MUTEX(un));
25620 		}
25621 
25622 		/*
25623 		 * If we are not getting anywhere with lun/target resets,
25624 		 * let's reset the bus.
25625 		 */
25626 		if (i == SD_NDUMP_RETRIES / 2) {
25627 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25628 			(void) sd_send_polled_RQS(un);
25629 		}
25630 	}
25631 	}
25632 
25633 	scsi_destroy_pkt(wr_pktp);
25634 	mutex_enter(SD_MUTEX(un));
25635 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
25636 		mutex_exit(SD_MUTEX(un));
25637 		scsi_free_consistent_buf(wr_bp);
25638 	} else {
25639 		mutex_exit(SD_MUTEX(un));
25640 	}
25641 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
25642 	return (err);
25643 }
25644 
25645 /*
25646  *    Function: sd_scsi_poll()
25647  *
25648  * Description: This is a wrapper for the scsi_poll call.
25649  *
25650  *   Arguments: sd_lun - The unit structure
25651  *              scsi_pkt - The scsi packet being sent to the device.
25652  *
25653  * Return Code: 0 - Command completed successfully with good status
25654  *             -1 - Command failed.  This could indicate a check condition
25655  *                  or other status value requiring recovery action.
25656  *
25657  * NOTE: This code is only called off sddump().
25658  */
25659 
25660 static int
25661 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
25662 {
25663 	int status;
25664 
25665 	ASSERT(un != NULL);
25666 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25667 	ASSERT(pktp != NULL);
25668 
25669 	status = SD_SUCCESS;
25670 
25671 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
25672 		pktp->pkt_flags |= un->un_tagflags;
25673 		pktp->pkt_flags &= ~FLAG_NODISCON;
25674 	}
25675 
25676 	status = sd_ddi_scsi_poll(pktp);
25677 	/*
25678 	 * Scsi_poll returns 0 (success) if the command completes and the
25679 	 * status block is STATUS_GOOD.  We should only check errors if this
25680 	 * condition is not true.  Even then we should send our own request
25681 	 * sense packet only if we have a check condition and auto
25682 	 * request sense has not been performed by the hba.
25683 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
25684 	 */
25685 	if ((status != SD_SUCCESS) &&
25686 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
25687 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
25688 	    (pktp->pkt_reason != CMD_DEV_GONE))
25689 		(void) sd_send_polled_RQS(un);
25690 
25691 	return (status);
25692 }
25693 
25694 /*
25695  *    Function: sd_send_polled_RQS()
25696  *
25697  * Description: This sends the request sense command to a device.
25698  *
25699  *   Arguments: sd_lun - The unit structure
25700  *
25701  * Return Code: 0 - Command completed successfully with good status
25702  *             -1 - Command failed.
25703  *
25704  */
25705 
25706 static int
25707 sd_send_polled_RQS(struct sd_lun *un)
25708 {
25709 	int	ret_val;
25710 	struct	scsi_pkt	*rqs_pktp;
25711 	struct	buf		*rqs_bp;
25712 
25713 	ASSERT(un != NULL);
25714 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25715 
25716 	ret_val = SD_SUCCESS;
25717 
25718 	rqs_pktp = un->un_rqs_pktp;
25719 	rqs_bp	 = un->un_rqs_bp;
25720 
25721 	mutex_enter(SD_MUTEX(un));
25722 
25723 	if (un->un_sense_isbusy) {
25724 		ret_val = SD_FAILURE;
25725 		mutex_exit(SD_MUTEX(un));
25726 		return (ret_val);
25727 	}
25728 
25729 	/*
25730 	 * If the request sense buffer (and packet) is not in use,
25731 	 * let's set the un_sense_isbusy and send our packet
25732 	 */
25733 	un->un_sense_isbusy = 1;
25734 	rqs_pktp->pkt_resid = 0;
25735 	rqs_pktp->pkt_reason = 0;
25736 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
25737 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
25738 
25739 	mutex_exit(SD_MUTEX(un));
25740 
25741 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
25742 	    " 0x%p\n", rqs_bp->b_un.b_addr);
25743 
25744 	/*
25745 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
25746 	 * axle - it has a call into us!
25747 	 */
25748 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
25749 		SD_INFO(SD_LOG_COMMON, un,
25750 		    "sd_send_polled_RQS: RQS failed\n");
25751 	}
25752 
25753 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
25754 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
25755 
25756 	mutex_enter(SD_MUTEX(un));
25757 	un->un_sense_isbusy = 0;
25758 	mutex_exit(SD_MUTEX(un));
25759 
25760 	return (ret_val);
25761 }
25762 
25763 /*
25764  * Defines needed for localized version of the scsi_poll routine.
25765  */
25766 #define	CSEC		10000			/* usecs */
25767 #define	SEC_TO_CSEC	(1000000 / CSEC)
25768 
25769 /*
25770  *    Function: sd_ddi_scsi_poll()
25771  *
25772  * Description: Localized version of the scsi_poll routine.  The purpose is to
25773  *		send a scsi_pkt to a device as a polled command.  This version
25774  *		is to ensure more robust handling of transport errors.
25775  *		Specifically this routine cures not ready, coming ready
25776  *		transition for power up and reset.
25777  *
25778  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
25779  *
25780  * Return Code: 0 - Command completed successfully with good status
25781  *             -1 - Command failed.
25782  *
25783  * NOTE: This code is almost identical to scsi_poll, however before 6668774 can
25784  * be fixed (removing this code), we need to determine how to handle the
25785  * KEY_UNIT_ATTENTION condition below in conditions not as limited as sddump().
25786  *
25787  * NOTE: This code is only called off sddump().
25788  */
25789 static int
25790 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
25791 {
25792 	int			rval = -1;
25793 	int			savef;
25794 	long			savet;
25795 	void			(*savec)();
25796 	int			timeout;
25797 	int			busy_count;
25798 	int			poll_delay;
25799 	int			rc;
25800 	uint8_t			*sensep;
25801 	struct scsi_arq_status	*arqstat;
25802 	extern int		do_polled_io;
25803 
25804 	ASSERT(pkt->pkt_scbp);
25805 
25806 	/*
25807 	 * save old flags..
25808 	 */
25809 	savef = pkt->pkt_flags;
25810 	savec = pkt->pkt_comp;
25811 	savet = pkt->pkt_time;
25812 
25813 	pkt->pkt_flags |= FLAG_NOINTR;
25814 
25815 	/*
25816 	 * XXX there is nothing in the SCSA spec that states that we should not
25817 	 * do a callback for polled cmds; however, removing this will break sd
25818 	 * and probably other target drivers
25819 	 */
25820 	pkt->pkt_comp = NULL;
25821 
25822 	/*
25823 	 * we don't like a polled command without timeout.
25824 	 * 60 seconds seems long enough.
25825 	 */
25826 	if (pkt->pkt_time == 0)
25827 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
25828 
25829 	/*
25830 	 * Send polled cmd.
25831 	 *
25832 	 * We do some error recovery for various errors.  Tran_busy,
25833 	 * queue full, and non-dispatched commands are retried every 10 msec.
25834 	 * as they are typically transient failures.  Busy status and Not
25835 	 * Ready are retried every second as this status takes a while to
25836 	 * change.
25837 	 */
25838 	timeout = pkt->pkt_time * SEC_TO_CSEC;
25839 
25840 	for (busy_count = 0; busy_count < timeout; busy_count++) {
25841 		/*
25842 		 * Initialize pkt status variables.
25843 		 */
25844 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
25845 
25846 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
25847 			if (rc != TRAN_BUSY) {
25848 				/* Transport failed - give up. */
25849 				break;
25850 			} else {
25851 				/* Transport busy - try again. */
25852 				poll_delay = 1 * CSEC;		/* 10 msec. */
25853 			}
25854 		} else {
25855 			/*
25856 			 * Transport accepted - check pkt status.
25857 			 */
25858 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
25859 			if ((pkt->pkt_reason == CMD_CMPLT) &&
25860 			    (rc == STATUS_CHECK) &&
25861 			    (pkt->pkt_state & STATE_ARQ_DONE)) {
25862 				arqstat =
25863 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
25864 				sensep = (uint8_t *)&arqstat->sts_sensedata;
25865 			} else {
25866 				sensep = NULL;
25867 			}
25868 
25869 			if ((pkt->pkt_reason == CMD_CMPLT) &&
25870 			    (rc == STATUS_GOOD)) {
25871 				/* No error - we're done */
25872 				rval = 0;
25873 				break;
25874 
25875 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
25876 				/* Lost connection - give up */
25877 				break;
25878 
25879 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
25880 			    (pkt->pkt_state == 0)) {
25881 				/* Pkt not dispatched - try again. */
25882 				poll_delay = 1 * CSEC;		/* 10 msec. */
25883 
25884 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
25885 			    (rc == STATUS_QFULL)) {
25886 				/* Queue full - try again. */
25887 				poll_delay = 1 * CSEC;		/* 10 msec. */
25888 
25889 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
25890 			    (rc == STATUS_BUSY)) {
25891 				/* Busy - try again. */
25892 				poll_delay = 100 * CSEC;	/* 1 sec. */
25893 				busy_count += (SEC_TO_CSEC - 1);
25894 
25895 			} else if ((sensep != NULL) &&
25896 			    (scsi_sense_key(sensep) == KEY_UNIT_ATTENTION)) {
25897 				/*
25898 				 * Unit Attention - try again.
25899 				 * Pretend it took 1 sec.
25900 				 * NOTE: 'continue' avoids poll_delay
25901 				 */
25902 				busy_count += (SEC_TO_CSEC - 1);
25903 				continue;
25904 
25905 			} else if ((sensep != NULL) &&
25906 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
25907 			    (scsi_sense_asc(sensep) == 0x04) &&
25908 			    (scsi_sense_ascq(sensep) == 0x01)) {
25909 				/*
25910 				 * Not ready -> ready - try again.
25911 				 * 04h/01h: LUN IS IN PROCESS OF BECOMING READY
25912 				 * ...same as STATUS_BUSY
25913 				 */
25914 				poll_delay = 100 * CSEC;	/* 1 sec. */
25915 				busy_count += (SEC_TO_CSEC - 1);
25916 
25917 			} else {
25918 				/* BAD status - give up. */
25919 				break;
25920 			}
25921 		}
25922 
25923 		if (((curthread->t_flag & T_INTR_THREAD) == 0) &&
25924 		    !do_polled_io) {
25925 			delay(drv_usectohz(poll_delay));
25926 		} else {
25927 			/* we busy wait during cpr_dump or interrupt threads */
25928 			drv_usecwait(poll_delay);
25929 		}
25930 	}
25931 
25932 	pkt->pkt_flags = savef;
25933 	pkt->pkt_comp = savec;
25934 	pkt->pkt_time = savet;
25935 
25936 	/* return on error */
25937 	if (rval)
25938 		return (rval);
25939 
25940 	/*
25941 	 * This is not a performance critical code path.
25942 	 *
25943 	 * As an accommodation for scsi_poll callers, to avoid ddi_dma_sync()
25944 	 * issues associated with looking at DMA memory prior to
25945 	 * scsi_pkt_destroy(), we scsi_sync_pkt() prior to return.
25946 	 */
25947 	scsi_sync_pkt(pkt);
25948 	return (0);
25949 }
25950 
25951 
25952 
25953 /*
25954  *    Function: sd_persistent_reservation_in_read_keys
25955  *
25956  * Description: This routine is the driver entry point for handling CD-ROM
25957  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
25958  *		by sending the SCSI-3 PRIN commands to the device.
25959  *		Processes the read keys command response by copying the
25960  *		reservation key information into the user provided buffer.
25961  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
25962  *
25963  *   Arguments: un   -  Pointer to soft state struct for the target.
25964  *		usrp -	user provided pointer to multihost Persistent In Read
25965  *			Keys structure (mhioc_inkeys_t)
25966  *		flag -	this argument is a pass through to ddi_copyxxx()
25967  *			directly from the mode argument of ioctl().
25968  *
25969  * Return Code: 0   - Success
25970  *		EACCES
25971  *		ENOTSUP
25972  *		errno return code from sd_send_scsi_cmd()
25973  *
25974  *     Context: Can sleep. Does not return until command is completed.
25975  */
25976 
25977 static int
25978 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
25979     mhioc_inkeys_t *usrp, int flag)
25980 {
25981 #ifdef _MULTI_DATAMODEL
25982 	struct mhioc_key_list32	li32;
25983 #endif
25984 	sd_prin_readkeys_t	*in;
25985 	mhioc_inkeys_t		*ptr;
25986 	mhioc_key_list_t	li;
25987 	uchar_t			*data_bufp = NULL;
25988 	int			data_len = 0;
25989 	int			rval = 0;
25990 	size_t			copysz = 0;
25991 	sd_ssc_t		*ssc;
25992 
25993 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
25994 		return (EINVAL);
25995 	}
25996 	bzero(&li, sizeof (mhioc_key_list_t));
25997 
25998 	ssc = sd_ssc_init(un);
25999 
26000 	/*
26001 	 * Get the listsize from user
26002 	 */
26003 #ifdef _MULTI_DATAMODEL
26004 	switch (ddi_model_convert_from(flag & FMODELS)) {
26005 	case DDI_MODEL_ILP32:
26006 		copysz = sizeof (struct mhioc_key_list32);
26007 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26008 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26009 			    "sd_persistent_reservation_in_read_keys: "
26010 			    "failed ddi_copyin: mhioc_key_list32_t\n");
26011 			rval = EFAULT;
26012 			goto done;
26013 		}
26014 		li.listsize = li32.listsize;
26015 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26016 		break;
26017 
26018 	case DDI_MODEL_NONE:
26019 		copysz = sizeof (mhioc_key_list_t);
26020 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26021 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26022 			    "sd_persistent_reservation_in_read_keys: "
26023 			    "failed ddi_copyin: mhioc_key_list_t\n");
26024 			rval = EFAULT;
26025 			goto done;
26026 		}
26027 		break;
26028 	}
26029 
26030 #else /* ! _MULTI_DATAMODEL */
26031 	copysz = sizeof (mhioc_key_list_t);
26032 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26033 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26034 		    "sd_persistent_reservation_in_read_keys: "
26035 		    "failed ddi_copyin: mhioc_key_list_t\n");
26036 		rval = EFAULT;
26037 		goto done;
26038 	}
26039 #endif
26040 
26041 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
26042 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
26043 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26044 
26045 	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS,
26046 	    data_len, data_bufp);
26047 	if (rval != 0) {
26048 		if (rval == EIO)
26049 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
26050 		else
26051 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
26052 		goto done;
26053 	}
26054 	in = (sd_prin_readkeys_t *)data_bufp;
26055 	ptr->generation = BE_32(in->generation);
26056 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
26057 
26058 	/*
26059 	 * Return the min(listsize, listlen) keys
26060 	 */
26061 #ifdef _MULTI_DATAMODEL
26062 
26063 	switch (ddi_model_convert_from(flag & FMODELS)) {
26064 	case DDI_MODEL_ILP32:
26065 		li32.listlen = li.listlen;
26066 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
26067 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26068 			    "sd_persistent_reservation_in_read_keys: "
26069 			    "failed ddi_copyout: mhioc_key_list32_t\n");
26070 			rval = EFAULT;
26071 			goto done;
26072 		}
26073 		break;
26074 
26075 	case DDI_MODEL_NONE:
26076 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26077 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26078 			    "sd_persistent_reservation_in_read_keys: "
26079 			    "failed ddi_copyout: mhioc_key_list_t\n");
26080 			rval = EFAULT;
26081 			goto done;
26082 		}
26083 		break;
26084 	}
26085 
26086 #else /* ! _MULTI_DATAMODEL */
26087 
26088 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26089 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26090 		    "sd_persistent_reservation_in_read_keys: "
26091 		    "failed ddi_copyout: mhioc_key_list_t\n");
26092 		rval = EFAULT;
26093 		goto done;
26094 	}
26095 
26096 #endif /* _MULTI_DATAMODEL */
26097 
26098 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
26099 	    li.listsize * MHIOC_RESV_KEY_SIZE);
26100 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
26101 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26102 		    "sd_persistent_reservation_in_read_keys: "
26103 		    "failed ddi_copyout: keylist\n");
26104 		rval = EFAULT;
26105 	}
26106 done:
26107 	sd_ssc_fini(ssc);
26108 	kmem_free(data_bufp, data_len);
26109 	return (rval);
26110 }
26111 
26112 
26113 /*
26114  *    Function: sd_persistent_reservation_in_read_resv
26115  *
26116  * Description: This routine is the driver entry point for handling CD-ROM
26117  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
26118  *		by sending the SCSI-3 PRIN commands to the device.
26119  *		Process the read persistent reservations command response by
26120  *		copying the reservation information into the user provided
26121  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
26122  *
26123  *   Arguments: un   -  Pointer to soft state struct for the target.
26124  *		usrp -	user provided pointer to multihost Persistent In Read
26125  *			Keys structure (mhioc_inkeys_t)
26126  *		flag -	this argument is a pass through to ddi_copyxxx()
26127  *			directly from the mode argument of ioctl().
26128  *
26129  * Return Code: 0   - Success
26130  *		EACCES
26131  *		ENOTSUP
26132  *		errno return code from sd_send_scsi_cmd()
26133  *
26134  *     Context: Can sleep. Does not return until command is completed.
26135  */
26136 
26137 static int
26138 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
26139     mhioc_inresvs_t *usrp, int flag)
26140 {
26141 #ifdef _MULTI_DATAMODEL
26142 	struct mhioc_resv_desc_list32 resvlist32;
26143 #endif
26144 	sd_prin_readresv_t	*in;
26145 	mhioc_inresvs_t		*ptr;
26146 	sd_readresv_desc_t	*readresv_ptr;
26147 	mhioc_resv_desc_list_t	resvlist;
26148 	mhioc_resv_desc_t	resvdesc;
26149 	uchar_t			*data_bufp = NULL;
26150 	int			data_len;
26151 	int			rval = 0;
26152 	int			i;
26153 	size_t			copysz = 0;
26154 	mhioc_resv_desc_t	*bufp;
26155 	sd_ssc_t		*ssc;
26156 
26157 	if ((ptr = usrp) == NULL) {
26158 		return (EINVAL);
26159 	}
26160 
26161 	ssc = sd_ssc_init(un);
26162 
26163 	/*
26164 	 * Get the listsize from user
26165 	 */
26166 #ifdef _MULTI_DATAMODEL
26167 	switch (ddi_model_convert_from(flag & FMODELS)) {
26168 	case DDI_MODEL_ILP32:
26169 		copysz = sizeof (struct mhioc_resv_desc_list32);
26170 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
26171 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26172 			    "sd_persistent_reservation_in_read_resv: "
26173 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26174 			rval = EFAULT;
26175 			goto done;
26176 		}
26177 		resvlist.listsize = resvlist32.listsize;
26178 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
26179 		break;
26180 
26181 	case DDI_MODEL_NONE:
26182 		copysz = sizeof (mhioc_resv_desc_list_t);
26183 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26184 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26185 			    "sd_persistent_reservation_in_read_resv: "
26186 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26187 			rval = EFAULT;
26188 			goto done;
26189 		}
26190 		break;
26191 	}
26192 #else /* ! _MULTI_DATAMODEL */
26193 	copysz = sizeof (mhioc_resv_desc_list_t);
26194 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26195 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26196 		    "sd_persistent_reservation_in_read_resv: "
26197 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26198 		rval = EFAULT;
26199 		goto done;
26200 	}
26201 #endif /* ! _MULTI_DATAMODEL */
26202 
26203 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
26204 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
26205 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26206 
26207 	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_RESV,
26208 	    data_len, data_bufp);
26209 	if (rval != 0) {
26210 		if (rval == EIO)
26211 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
26212 		else
26213 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
26214 		goto done;
26215 	}
26216 	in = (sd_prin_readresv_t *)data_bufp;
26217 	ptr->generation = BE_32(in->generation);
26218 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
26219 
26220 	/*
26221 	 * Return the min(listsize, listlen( keys
26222 	 */
26223 #ifdef _MULTI_DATAMODEL
26224 
26225 	switch (ddi_model_convert_from(flag & FMODELS)) {
26226 	case DDI_MODEL_ILP32:
26227 		resvlist32.listlen = resvlist.listlen;
26228 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
26229 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26230 			    "sd_persistent_reservation_in_read_resv: "
26231 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26232 			rval = EFAULT;
26233 			goto done;
26234 		}
26235 		break;
26236 
26237 	case DDI_MODEL_NONE:
26238 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26239 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26240 			    "sd_persistent_reservation_in_read_resv: "
26241 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26242 			rval = EFAULT;
26243 			goto done;
26244 		}
26245 		break;
26246 	}
26247 
26248 #else /* ! _MULTI_DATAMODEL */
26249 
26250 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26251 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26252 		    "sd_persistent_reservation_in_read_resv: "
26253 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26254 		rval = EFAULT;
26255 		goto done;
26256 	}
26257 
26258 #endif /* ! _MULTI_DATAMODEL */
26259 
26260 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
26261 	bufp = resvlist.list;
26262 	copysz = sizeof (mhioc_resv_desc_t);
26263 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
26264 	    i++, readresv_ptr++, bufp++) {
26265 
26266 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
26267 		    MHIOC_RESV_KEY_SIZE);
26268 		resvdesc.type  = readresv_ptr->type;
26269 		resvdesc.scope = readresv_ptr->scope;
26270 		resvdesc.scope_specific_addr =
26271 		    BE_32(readresv_ptr->scope_specific_addr);
26272 
26273 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
26274 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26275 			    "sd_persistent_reservation_in_read_resv: "
26276 			    "failed ddi_copyout: resvlist\n");
26277 			rval = EFAULT;
26278 			goto done;
26279 		}
26280 	}
26281 done:
26282 	sd_ssc_fini(ssc);
26283 	/* only if data_bufp is allocated, we need to free it */
26284 	if (data_bufp) {
26285 		kmem_free(data_bufp, data_len);
26286 	}
26287 	return (rval);
26288 }
26289 
26290 
26291 /*
26292  *    Function: sr_change_blkmode()
26293  *
26294  * Description: This routine is the driver entry point for handling CD-ROM
26295  *		block mode ioctl requests. Support for returning and changing
26296  *		the current block size in use by the device is implemented. The
26297  *		LBA size is changed via a MODE SELECT Block Descriptor.
26298  *
26299  *		This routine issues a mode sense with an allocation length of
26300  *		12 bytes for the mode page header and a single block descriptor.
26301  *
26302  *   Arguments: dev - the device 'dev_t'
26303  *		cmd - the request type; one of CDROMGBLKMODE (get) or
26304  *		      CDROMSBLKMODE (set)
26305  *		data - current block size or requested block size
26306  *		flag - this argument is a pass through to ddi_copyxxx() directly
26307  *		       from the mode argument of ioctl().
26308  *
26309  * Return Code: the code returned by sd_send_scsi_cmd()
26310  *		EINVAL if invalid arguments are provided
26311  *		EFAULT if ddi_copyxxx() fails
26312  *		ENXIO if fail ddi_get_soft_state
26313  *		EIO if invalid mode sense block descriptor length
26314  *
26315  */
26316 
26317 static int
26318 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
26319 {
26320 	struct sd_lun			*un = NULL;
26321 	struct mode_header		*sense_mhp, *select_mhp;
26322 	struct block_descriptor		*sense_desc, *select_desc;
26323 	int				current_bsize;
26324 	int				rval = EINVAL;
26325 	uchar_t				*sense = NULL;
26326 	uchar_t				*select = NULL;
26327 	sd_ssc_t			*ssc;
26328 
26329 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
26330 
26331 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26332 		return (ENXIO);
26333 	}
26334 
26335 	/*
26336 	 * The block length is changed via the Mode Select block descriptor, the
26337 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
26338 	 * required as part of this routine. Therefore the mode sense allocation
26339 	 * length is specified to be the length of a mode page header and a
26340 	 * block descriptor.
26341 	 */
26342 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
26343 
26344 	ssc = sd_ssc_init(un);
26345 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
26346 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
26347 	sd_ssc_fini(ssc);
26348 	if (rval != 0) {
26349 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26350 		    "sr_change_blkmode: Mode Sense Failed\n");
26351 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26352 		return (rval);
26353 	}
26354 
26355 	/* Check the block descriptor len to handle only 1 block descriptor */
26356 	sense_mhp = (struct mode_header *)sense;
26357 	if ((sense_mhp->bdesc_length == 0) ||
26358 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
26359 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26360 		    "sr_change_blkmode: Mode Sense returned invalid block"
26361 		    " descriptor length\n");
26362 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26363 		return (EIO);
26364 	}
26365 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
26366 	current_bsize = ((sense_desc->blksize_hi << 16) |
26367 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
26368 
26369 	/* Process command */
26370 	switch (cmd) {
26371 	case CDROMGBLKMODE:
26372 		/* Return the block size obtained during the mode sense */
26373 		if (ddi_copyout(&current_bsize, (void *)data,
26374 		    sizeof (int), flag) != 0)
26375 			rval = EFAULT;
26376 		break;
26377 	case CDROMSBLKMODE:
26378 		/* Validate the requested block size */
26379 		switch (data) {
26380 		case CDROM_BLK_512:
26381 		case CDROM_BLK_1024:
26382 		case CDROM_BLK_2048:
26383 		case CDROM_BLK_2056:
26384 		case CDROM_BLK_2336:
26385 		case CDROM_BLK_2340:
26386 		case CDROM_BLK_2352:
26387 		case CDROM_BLK_2368:
26388 		case CDROM_BLK_2448:
26389 		case CDROM_BLK_2646:
26390 		case CDROM_BLK_2647:
26391 			break;
26392 		default:
26393 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26394 			    "sr_change_blkmode: "
26395 			    "Block Size '%ld' Not Supported\n", data);
26396 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26397 			return (EINVAL);
26398 		}
26399 
26400 		/*
26401 		 * The current block size matches the requested block size so
26402 		 * there is no need to send the mode select to change the size
26403 		 */
26404 		if (current_bsize == data) {
26405 			break;
26406 		}
26407 
26408 		/* Build the select data for the requested block size */
26409 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
26410 		select_mhp = (struct mode_header *)select;
26411 		select_desc =
26412 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
26413 		/*
26414 		 * The LBA size is changed via the block descriptor, so the
26415 		 * descriptor is built according to the user data
26416 		 */
26417 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
26418 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
26419 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
26420 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
26421 
26422 		/* Send the mode select for the requested block size */
26423 		ssc = sd_ssc_init(un);
26424 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
26425 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
26426 		    SD_PATH_STANDARD);
26427 		sd_ssc_fini(ssc);
26428 		if (rval != 0) {
26429 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26430 			    "sr_change_blkmode: Mode Select Failed\n");
26431 			/*
26432 			 * The mode select failed for the requested block size,
26433 			 * so reset the data for the original block size and
26434 			 * send it to the target. The error is indicated by the
26435 			 * return value for the failed mode select.
26436 			 */
26437 			select_desc->blksize_hi  = sense_desc->blksize_hi;
26438 			select_desc->blksize_mid = sense_desc->blksize_mid;
26439 			select_desc->blksize_lo  = sense_desc->blksize_lo;
26440 			ssc = sd_ssc_init(un);
26441 			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
26442 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
26443 			    SD_PATH_STANDARD);
26444 			sd_ssc_fini(ssc);
26445 		} else {
26446 			ASSERT(!mutex_owned(SD_MUTEX(un)));
26447 			mutex_enter(SD_MUTEX(un));
26448 			sd_update_block_info(un, (uint32_t)data, 0);
26449 			mutex_exit(SD_MUTEX(un));
26450 		}
26451 		break;
26452 	default:
26453 		/* should not reach here, but check anyway */
26454 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26455 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
26456 		rval = EINVAL;
26457 		break;
26458 	}
26459 
26460 	if (select) {
26461 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
26462 	}
26463 	if (sense) {
26464 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26465 	}
26466 	return (rval);
26467 }
26468 
26469 
26470 /*
26471  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
26472  * implement driver support for getting and setting the CD speed. The command
26473  * set used will be based on the device type. If the device has not been
26474  * identified as MMC the Toshiba vendor specific mode page will be used. If
26475  * the device is MMC but does not support the Real Time Streaming feature
26476  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
26477  * be used to read the speed.
26478  */
26479 
26480 /*
26481  *    Function: sr_change_speed()
26482  *
26483  * Description: This routine is the driver entry point for handling CD-ROM
26484  *		drive speed ioctl requests for devices supporting the Toshiba
26485  *		vendor specific drive speed mode page. Support for returning
26486  *		and changing the current drive speed in use by the device is
26487  *		implemented.
26488  *
26489  *   Arguments: dev - the device 'dev_t'
26490  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
26491  *		      CDROMSDRVSPEED (set)
26492  *		data - current drive speed or requested drive speed
26493  *		flag - this argument is a pass through to ddi_copyxxx() directly
26494  *		       from the mode argument of ioctl().
26495  *
26496  * Return Code: the code returned by sd_send_scsi_cmd()
26497  *		EINVAL if invalid arguments are provided
26498  *		EFAULT if ddi_copyxxx() fails
26499  *		ENXIO if fail ddi_get_soft_state
26500  *		EIO if invalid mode sense block descriptor length
26501  */
26502 
26503 static int
26504 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
26505 {
26506 	struct sd_lun			*un = NULL;
26507 	struct mode_header		*sense_mhp, *select_mhp;
26508 	struct mode_speed		*sense_page, *select_page;
26509 	int				current_speed;
26510 	int				rval = EINVAL;
26511 	int				bd_len;
26512 	uchar_t				*sense = NULL;
26513 	uchar_t				*select = NULL;
26514 	sd_ssc_t			*ssc;
26515 
26516 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
26517 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26518 		return (ENXIO);
26519 	}
26520 
26521 	/*
26522 	 * Note: The drive speed is being modified here according to a Toshiba
26523 	 * vendor specific mode page (0x31).
26524 	 */
26525 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
26526 
26527 	ssc = sd_ssc_init(un);
26528 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
26529 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
26530 	    SD_PATH_STANDARD);
26531 	sd_ssc_fini(ssc);
26532 	if (rval != 0) {
26533 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26534 		    "sr_change_speed: Mode Sense Failed\n");
26535 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26536 		return (rval);
26537 	}
26538 	sense_mhp  = (struct mode_header *)sense;
26539 
26540 	/* Check the block descriptor len to handle only 1 block descriptor */
26541 	bd_len = sense_mhp->bdesc_length;
26542 	if (bd_len > MODE_BLK_DESC_LENGTH) {
26543 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26544 		    "sr_change_speed: Mode Sense returned invalid block "
26545 		    "descriptor length\n");
26546 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26547 		return (EIO);
26548 	}
26549 
26550 	sense_page = (struct mode_speed *)
26551 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
26552 	current_speed = sense_page->speed;
26553 
26554 	/* Process command */
26555 	switch (cmd) {
26556 	case CDROMGDRVSPEED:
26557 		/* Return the drive speed obtained during the mode sense */
26558 		if (current_speed == 0x2) {
26559 			current_speed = CDROM_TWELVE_SPEED;
26560 		}
26561 		if (ddi_copyout(&current_speed, (void *)data,
26562 		    sizeof (int), flag) != 0) {
26563 			rval = EFAULT;
26564 		}
26565 		break;
26566 	case CDROMSDRVSPEED:
26567 		/* Validate the requested drive speed */
26568 		switch ((uchar_t)data) {
26569 		case CDROM_TWELVE_SPEED:
26570 			data = 0x2;
26571 			/*FALLTHROUGH*/
26572 		case CDROM_NORMAL_SPEED:
26573 		case CDROM_DOUBLE_SPEED:
26574 		case CDROM_QUAD_SPEED:
26575 		case CDROM_MAXIMUM_SPEED:
26576 			break;
26577 		default:
26578 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26579 			    "sr_change_speed: "
26580 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
26581 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26582 			return (EINVAL);
26583 		}
26584 
26585 		/*
26586 		 * The current drive speed matches the requested drive speed so
26587 		 * there is no need to send the mode select to change the speed
26588 		 */
26589 		if (current_speed == data) {
26590 			break;
26591 		}
26592 
26593 		/* Build the select data for the requested drive speed */
26594 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
26595 		select_mhp = (struct mode_header *)select;
26596 		select_mhp->bdesc_length = 0;
26597 		select_page =
26598 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
26599 		select_page =
26600 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
26601 		select_page->mode_page.code = CDROM_MODE_SPEED;
26602 		select_page->mode_page.length = 2;
26603 		select_page->speed = (uchar_t)data;
26604 
26605 		/* Send the mode select for the requested block size */
26606 		ssc = sd_ssc_init(un);
26607 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
26608 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
26609 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
26610 		sd_ssc_fini(ssc);
26611 		if (rval != 0) {
26612 			/*
26613 			 * The mode select failed for the requested drive speed,
26614 			 * so reset the data for the original drive speed and
26615 			 * send it to the target. The error is indicated by the
26616 			 * return value for the failed mode select.
26617 			 */
26618 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26619 			    "sr_drive_speed: Mode Select Failed\n");
26620 			select_page->speed = sense_page->speed;
26621 			ssc = sd_ssc_init(un);
26622 			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
26623 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
26624 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
26625 			sd_ssc_fini(ssc);
26626 		}
26627 		break;
26628 	default:
26629 		/* should not reach here, but check anyway */
26630 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26631 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
26632 		rval = EINVAL;
26633 		break;
26634 	}
26635 
26636 	if (select) {
26637 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
26638 	}
26639 	if (sense) {
26640 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26641 	}
26642 
26643 	return (rval);
26644 }
26645 
26646 
26647 /*
26648  *    Function: sr_atapi_change_speed()
26649  *
26650  * Description: This routine is the driver entry point for handling CD-ROM
26651  *		drive speed ioctl requests for MMC devices that do not support
26652  *		the Real Time Streaming feature (0x107).
26653  *
26654  *		Note: This routine will use the SET SPEED command which may not
26655  *		be supported by all devices.
26656  *
26657  *   Arguments: dev- the device 'dev_t'
26658  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
26659  *		     CDROMSDRVSPEED (set)
26660  *		data- current drive speed or requested drive speed
26661  *		flag- this argument is a pass through to ddi_copyxxx() directly
26662  *		      from the mode argument of ioctl().
26663  *
26664  * Return Code: the code returned by sd_send_scsi_cmd()
26665  *		EINVAL if invalid arguments are provided
26666  *		EFAULT if ddi_copyxxx() fails
26667  *		ENXIO if fail ddi_get_soft_state
26668  *		EIO if invalid mode sense block descriptor length
26669  */
26670 
26671 static int
26672 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
26673 {
26674 	struct sd_lun			*un;
26675 	struct uscsi_cmd		*com = NULL;
26676 	struct mode_header_grp2		*sense_mhp;
26677 	uchar_t				*sense_page;
26678 	uchar_t				*sense = NULL;
26679 	char				cdb[CDB_GROUP5];
26680 	int				bd_len;
26681 	int				current_speed = 0;
26682 	int				max_speed = 0;
26683 	int				rval;
26684 	sd_ssc_t			*ssc;
26685 
26686 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
26687 
26688 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26689 		return (ENXIO);
26690 	}
26691 
26692 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
26693 
26694 	ssc = sd_ssc_init(un);
26695 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
26696 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
26697 	    SD_PATH_STANDARD);
26698 	sd_ssc_fini(ssc);
26699 	if (rval != 0) {
26700 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26701 		    "sr_atapi_change_speed: Mode Sense Failed\n");
26702 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26703 		return (rval);
26704 	}
26705 
26706 	/* Check the block descriptor len to handle only 1 block descriptor */
26707 	sense_mhp = (struct mode_header_grp2 *)sense;
26708 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
26709 	if (bd_len > MODE_BLK_DESC_LENGTH) {
26710 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26711 		    "sr_atapi_change_speed: Mode Sense returned invalid "
26712 		    "block descriptor length\n");
26713 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26714 		return (EIO);
26715 	}
26716 
26717 	/* Calculate the current and maximum drive speeds */
26718 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
26719 	current_speed = (sense_page[14] << 8) | sense_page[15];
26720 	max_speed = (sense_page[8] << 8) | sense_page[9];
26721 
26722 	/* Process the command */
26723 	switch (cmd) {
26724 	case CDROMGDRVSPEED:
26725 		current_speed /= SD_SPEED_1X;
26726 		if (ddi_copyout(&current_speed, (void *)data,
26727 		    sizeof (int), flag) != 0)
26728 			rval = EFAULT;
26729 		break;
26730 	case CDROMSDRVSPEED:
26731 		/* Convert the speed code to KB/sec */
26732 		switch ((uchar_t)data) {
26733 		case CDROM_NORMAL_SPEED:
26734 			current_speed = SD_SPEED_1X;
26735 			break;
26736 		case CDROM_DOUBLE_SPEED:
26737 			current_speed = 2 * SD_SPEED_1X;
26738 			break;
26739 		case CDROM_QUAD_SPEED:
26740 			current_speed = 4 * SD_SPEED_1X;
26741 			break;
26742 		case CDROM_TWELVE_SPEED:
26743 			current_speed = 12 * SD_SPEED_1X;
26744 			break;
26745 		case CDROM_MAXIMUM_SPEED:
26746 			current_speed = 0xffff;
26747 			break;
26748 		default:
26749 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26750 			    "sr_atapi_change_speed: invalid drive speed %d\n",
26751 			    (uchar_t)data);
26752 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26753 			return (EINVAL);
26754 		}
26755 
26756 		/* Check the request against the drive's max speed. */
26757 		if (current_speed != 0xffff) {
26758 			if (current_speed > max_speed) {
26759 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26760 				return (EINVAL);
26761 			}
26762 		}
26763 
26764 		/*
26765 		 * Build and send the SET SPEED command
26766 		 *
26767 		 * Note: The SET SPEED (0xBB) command used in this routine is
26768 		 * obsolete per the SCSI MMC spec but still supported in the
26769 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
26770 		 * therefore the command is still implemented in this routine.
26771 		 */
26772 		bzero(cdb, sizeof (cdb));
26773 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
26774 		cdb[2] = (uchar_t)(current_speed >> 8);
26775 		cdb[3] = (uchar_t)current_speed;
26776 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26777 		com->uscsi_cdb	   = (caddr_t)cdb;
26778 		com->uscsi_cdblen  = CDB_GROUP5;
26779 		com->uscsi_bufaddr = NULL;
26780 		com->uscsi_buflen  = 0;
26781 		com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT;
26782 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
26783 		break;
26784 	default:
26785 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26786 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
26787 		rval = EINVAL;
26788 	}
26789 
26790 	if (sense) {
26791 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26792 	}
26793 	if (com) {
26794 		kmem_free(com, sizeof (*com));
26795 	}
26796 	return (rval);
26797 }
26798 
26799 
26800 /*
26801  *    Function: sr_pause_resume()
26802  *
26803  * Description: This routine is the driver entry point for handling CD-ROM
26804  *		pause/resume ioctl requests. This only affects the audio play
26805  *		operation.
26806  *
26807  *   Arguments: dev - the device 'dev_t'
26808  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
26809  *		      for setting the resume bit of the cdb.
26810  *
26811  * Return Code: the code returned by sd_send_scsi_cmd()
26812  *		EINVAL if invalid mode specified
26813  *
26814  */
26815 
26816 static int
26817 sr_pause_resume(dev_t dev, int cmd)
26818 {
26819 	struct sd_lun		*un;
26820 	struct uscsi_cmd	*com;
26821 	char			cdb[CDB_GROUP1];
26822 	int			rval;
26823 
26824 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26825 		return (ENXIO);
26826 	}
26827 
26828 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26829 	bzero(cdb, CDB_GROUP1);
26830 	cdb[0] = SCMD_PAUSE_RESUME;
26831 	switch (cmd) {
26832 	case CDROMRESUME:
26833 		cdb[8] = 1;
26834 		break;
26835 	case CDROMPAUSE:
26836 		cdb[8] = 0;
26837 		break;
26838 	default:
26839 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
26840 		    " Command '%x' Not Supported\n", cmd);
26841 		rval = EINVAL;
26842 		goto done;
26843 	}
26844 
26845 	com->uscsi_cdb    = cdb;
26846 	com->uscsi_cdblen = CDB_GROUP1;
26847 	com->uscsi_flags  = USCSI_DIAGNOSE | USCSI_SILENT;
26848 
26849 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26850 	    SD_PATH_STANDARD);
26851 
26852 done:
26853 	kmem_free(com, sizeof (*com));
26854 	return (rval);
26855 }
26856 
26857 
26858 /*
26859  *    Function: sr_play_msf()
26860  *
26861  * Description: This routine is the driver entry point for handling CD-ROM
26862  *		ioctl requests to output the audio signals at the specified
26863  *		starting address and continue the audio play until the specified
26864  *		ending address (CDROMPLAYMSF) The address is in Minute Second
26865  *		Frame (MSF) format.
26866  *
26867  *   Arguments: dev	- the device 'dev_t'
26868  *		data	- pointer to user provided audio msf structure,
26869  *		          specifying start/end addresses.
26870  *		flag	- this argument is a pass through to ddi_copyxxx()
26871  *		          directly from the mode argument of ioctl().
26872  *
26873  * Return Code: the code returned by sd_send_scsi_cmd()
26874  *		EFAULT if ddi_copyxxx() fails
26875  *		ENXIO if fail ddi_get_soft_state
26876  *		EINVAL if data pointer is NULL
26877  */
26878 
26879 static int
26880 sr_play_msf(dev_t dev, caddr_t data, int flag)
26881 {
26882 	struct sd_lun		*un;
26883 	struct uscsi_cmd	*com;
26884 	struct cdrom_msf	msf_struct;
26885 	struct cdrom_msf	*msf = &msf_struct;
26886 	char			cdb[CDB_GROUP1];
26887 	int			rval;
26888 
26889 	if (data == NULL) {
26890 		return (EINVAL);
26891 	}
26892 
26893 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26894 		return (ENXIO);
26895 	}
26896 
26897 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
26898 		return (EFAULT);
26899 	}
26900 
26901 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26902 	bzero(cdb, CDB_GROUP1);
26903 	cdb[0] = SCMD_PLAYAUDIO_MSF;
26904 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
26905 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
26906 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
26907 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
26908 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
26909 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
26910 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
26911 	} else {
26912 		cdb[3] = msf->cdmsf_min0;
26913 		cdb[4] = msf->cdmsf_sec0;
26914 		cdb[5] = msf->cdmsf_frame0;
26915 		cdb[6] = msf->cdmsf_min1;
26916 		cdb[7] = msf->cdmsf_sec1;
26917 		cdb[8] = msf->cdmsf_frame1;
26918 	}
26919 	com->uscsi_cdb    = cdb;
26920 	com->uscsi_cdblen = CDB_GROUP1;
26921 	com->uscsi_flags  = USCSI_DIAGNOSE | USCSI_SILENT;
26922 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26923 	    SD_PATH_STANDARD);
26924 	kmem_free(com, sizeof (*com));
26925 	return (rval);
26926 }
26927 
26928 
26929 /*
26930  *    Function: sr_play_trkind()
26931  *
26932  * Description: This routine is the driver entry point for handling CD-ROM
26933  *		ioctl requests to output the audio signals at the specified
26934  *		starting address and continue the audio play until the specified
26935  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
26936  *		format.
26937  *
26938  *   Arguments: dev	- the device 'dev_t'
26939  *		data	- pointer to user provided audio track/index structure,
26940  *		          specifying start/end addresses.
26941  *		flag	- this argument is a pass through to ddi_copyxxx()
26942  *		          directly from the mode argument of ioctl().
26943  *
26944  * Return Code: the code returned by sd_send_scsi_cmd()
26945  *		EFAULT if ddi_copyxxx() fails
26946  *		ENXIO if fail ddi_get_soft_state
26947  *		EINVAL if data pointer is NULL
26948  */
26949 
26950 static int
26951 sr_play_trkind(dev_t dev, caddr_t data, int flag)
26952 {
26953 	struct cdrom_ti		ti_struct;
26954 	struct cdrom_ti		*ti = &ti_struct;
26955 	struct uscsi_cmd	*com = NULL;
26956 	char			cdb[CDB_GROUP1];
26957 	int			rval;
26958 
26959 	if (data == NULL) {
26960 		return (EINVAL);
26961 	}
26962 
26963 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
26964 		return (EFAULT);
26965 	}
26966 
26967 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26968 	bzero(cdb, CDB_GROUP1);
26969 	cdb[0] = SCMD_PLAYAUDIO_TI;
26970 	cdb[4] = ti->cdti_trk0;
26971 	cdb[5] = ti->cdti_ind0;
26972 	cdb[7] = ti->cdti_trk1;
26973 	cdb[8] = ti->cdti_ind1;
26974 	com->uscsi_cdb    = cdb;
26975 	com->uscsi_cdblen = CDB_GROUP1;
26976 	com->uscsi_flags  = USCSI_DIAGNOSE | USCSI_SILENT;
26977 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26978 	    SD_PATH_STANDARD);
26979 	kmem_free(com, sizeof (*com));
26980 	return (rval);
26981 }
26982 
26983 
26984 /*
26985  *    Function: sr_read_all_subcodes()
26986  *
26987  * Description: This routine is the driver entry point for handling CD-ROM
26988  *		ioctl requests to return raw subcode data while the target is
26989  *		playing audio (CDROMSUBCODE).
26990  *
26991  *   Arguments: dev	- the device 'dev_t'
26992  *		data	- pointer to user provided cdrom subcode structure,
26993  *		          specifying the transfer length and address.
26994  *		flag	- this argument is a pass through to ddi_copyxxx()
26995  *		          directly from the mode argument of ioctl().
26996  *
26997  * Return Code: the code returned by sd_send_scsi_cmd()
26998  *		EFAULT if ddi_copyxxx() fails
26999  *		ENXIO if fail ddi_get_soft_state
27000  *		EINVAL if data pointer is NULL
27001  */
27002 
27003 static int
27004 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27005 {
27006 	struct sd_lun		*un = NULL;
27007 	struct uscsi_cmd	*com = NULL;
27008 	struct cdrom_subcode	*subcode = NULL;
27009 	int			rval;
27010 	size_t			buflen;
27011 	char			cdb[CDB_GROUP5];
27012 
27013 #ifdef _MULTI_DATAMODEL
27014 	/* To support ILP32 applications in an LP64 world */
27015 	struct cdrom_subcode32		cdrom_subcode32;
27016 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27017 #endif
27018 	if (data == NULL) {
27019 		return (EINVAL);
27020 	}
27021 
27022 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27023 		return (ENXIO);
27024 	}
27025 
27026 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27027 
27028 #ifdef _MULTI_DATAMODEL
27029 	switch (ddi_model_convert_from(flag & FMODELS)) {
27030 	case DDI_MODEL_ILP32:
27031 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27032 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27033 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27034 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27035 			return (EFAULT);
27036 		}
27037 		/* Convert the ILP32 uscsi data from the application to LP64 */
27038 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27039 		break;
27040 	case DDI_MODEL_NONE:
27041 		if (ddi_copyin(data, subcode,
27042 		    sizeof (struct cdrom_subcode), flag)) {
27043 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27044 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27045 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27046 			return (EFAULT);
27047 		}
27048 		break;
27049 	}
27050 #else /* ! _MULTI_DATAMODEL */
27051 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27052 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27053 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27054 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27055 		return (EFAULT);
27056 	}
27057 #endif /* _MULTI_DATAMODEL */
27058 
27059 	/*
27060 	 * Since MMC-2 expects max 3 bytes for length, check if the
27061 	 * length input is greater than 3 bytes
27062 	 */
27063 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
27064 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27065 		    "sr_read_all_subcodes: "
27066 		    "cdrom transfer length too large: %d (limit %d)\n",
27067 		    subcode->cdsc_length, 0xFFFFFF);
27068 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27069 		return (EINVAL);
27070 	}
27071 
27072 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
27073 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27074 	bzero(cdb, CDB_GROUP5);
27075 
27076 	if (un->un_f_mmc_cap == TRUE) {
27077 		cdb[0] = (char)SCMD_READ_CD;
27078 		cdb[2] = (char)0xff;
27079 		cdb[3] = (char)0xff;
27080 		cdb[4] = (char)0xff;
27081 		cdb[5] = (char)0xff;
27082 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27083 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27084 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
27085 		cdb[10] = 1;
27086 	} else {
27087 		/*
27088 		 * Note: A vendor specific command (0xDF) is being used here to
27089 		 * request a read of all subcodes.
27090 		 */
27091 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
27092 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
27093 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27094 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27095 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
27096 	}
27097 	com->uscsi_cdb	   = cdb;
27098 	com->uscsi_cdblen  = CDB_GROUP5;
27099 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
27100 	com->uscsi_buflen  = buflen;
27101 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
27102 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27103 	    SD_PATH_STANDARD);
27104 	kmem_free(subcode, sizeof (struct cdrom_subcode));
27105 	kmem_free(com, sizeof (*com));
27106 	return (rval);
27107 }
27108 
27109 
27110 /*
27111  *    Function: sr_read_subchannel()
27112  *
27113  * Description: This routine is the driver entry point for handling CD-ROM
27114  *		ioctl requests to return the Q sub-channel data of the CD
27115  *		current position block. (CDROMSUBCHNL) The data includes the
27116  *		track number, index number, absolute CD-ROM address (LBA or MSF
27117  *		format per the user) , track relative CD-ROM address (LBA or MSF
27118  *		format per the user), control data and audio status.
27119  *
27120  *   Arguments: dev	- the device 'dev_t'
27121  *		data	- pointer to user provided cdrom sub-channel structure
27122  *		flag	- this argument is a pass through to ddi_copyxxx()
27123  *		          directly from the mode argument of ioctl().
27124  *
27125  * Return Code: the code returned by sd_send_scsi_cmd()
27126  *		EFAULT if ddi_copyxxx() fails
27127  *		ENXIO if fail ddi_get_soft_state
27128  *		EINVAL if data pointer is NULL
27129  */
27130 
27131 static int
27132 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
27133 {
27134 	struct sd_lun		*un;
27135 	struct uscsi_cmd	*com;
27136 	struct cdrom_subchnl	subchanel;
27137 	struct cdrom_subchnl	*subchnl = &subchanel;
27138 	char			cdb[CDB_GROUP1];
27139 	caddr_t			buffer;
27140 	int			rval;
27141 
27142 	if (data == NULL) {
27143 		return (EINVAL);
27144 	}
27145 
27146 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27147 	    (un->un_state == SD_STATE_OFFLINE)) {
27148 		return (ENXIO);
27149 	}
27150 
27151 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
27152 		return (EFAULT);
27153 	}
27154 
27155 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
27156 	bzero(cdb, CDB_GROUP1);
27157 	cdb[0] = SCMD_READ_SUBCHANNEL;
27158 	/* Set the MSF bit based on the user requested address format */
27159 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
27160 	/*
27161 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
27162 	 * returned
27163 	 */
27164 	cdb[2] = 0x40;
27165 	/*
27166 	 * Set byte 3 to specify the return data format. A value of 0x01
27167 	 * indicates that the CD-ROM current position should be returned.
27168 	 */
27169 	cdb[3] = 0x01;
27170 	cdb[8] = 0x10;
27171 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27172 	com->uscsi_cdb	   = cdb;
27173 	com->uscsi_cdblen  = CDB_GROUP1;
27174 	com->uscsi_bufaddr = buffer;
27175 	com->uscsi_buflen  = 16;
27176 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
27177 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27178 	    SD_PATH_STANDARD);
27179 	if (rval != 0) {
27180 		kmem_free(buffer, 16);
27181 		kmem_free(com, sizeof (*com));
27182 		return (rval);
27183 	}
27184 
27185 	/* Process the returned Q sub-channel data */
27186 	subchnl->cdsc_audiostatus = buffer[1];
27187 	subchnl->cdsc_adr	= (buffer[5] & 0xF0) >> 4;
27188 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
27189 	subchnl->cdsc_trk	= buffer[6];
27190 	subchnl->cdsc_ind	= buffer[7];
27191 	if (subchnl->cdsc_format & CDROM_LBA) {
27192 		subchnl->cdsc_absaddr.lba =
27193 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27194 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27195 		subchnl->cdsc_reladdr.lba =
27196 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
27197 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
27198 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
27199 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
27200 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
27201 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
27202 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
27203 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
27204 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
27205 	} else {
27206 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
27207 		subchnl->cdsc_absaddr.msf.second = buffer[10];
27208 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
27209 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
27210 		subchnl->cdsc_reladdr.msf.second = buffer[14];
27211 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
27212 	}
27213 	kmem_free(buffer, 16);
27214 	kmem_free(com, sizeof (*com));
27215 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
27216 	    != 0) {
27217 		return (EFAULT);
27218 	}
27219 	return (rval);
27220 }
27221 
27222 
27223 /*
27224  *    Function: sr_read_tocentry()
27225  *
27226  * Description: This routine is the driver entry point for handling CD-ROM
27227  *		ioctl requests to read from the Table of Contents (TOC)
27228  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
27229  *		fields, the starting address (LBA or MSF format per the user)
27230  *		and the data mode if the user specified track is a data track.
27231  *
27232  *		Note: The READ HEADER (0x44) command used in this routine is
27233  *		obsolete per the SCSI MMC spec but still supported in the
27234  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27235  *		therefore the command is still implemented in this routine.
27236  *
27237  *   Arguments: dev	- the device 'dev_t'
27238  *		data	- pointer to user provided toc entry structure,
27239  *			  specifying the track # and the address format
27240  *			  (LBA or MSF).
27241  *		flag	- this argument is a pass through to ddi_copyxxx()
27242  *		          directly from the mode argument of ioctl().
27243  *
27244  * Return Code: the code returned by sd_send_scsi_cmd()
27245  *		EFAULT if ddi_copyxxx() fails
27246  *		ENXIO if fail ddi_get_soft_state
27247  *		EINVAL if data pointer is NULL
27248  */
27249 
27250 static int
27251 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
27252 {
27253 	struct sd_lun		*un = NULL;
27254 	struct uscsi_cmd	*com;
27255 	struct cdrom_tocentry	toc_entry;
27256 	struct cdrom_tocentry	*entry = &toc_entry;
27257 	caddr_t			buffer;
27258 	int			rval;
27259 	char			cdb[CDB_GROUP1];
27260 
27261 	if (data == NULL) {
27262 		return (EINVAL);
27263 	}
27264 
27265 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27266 	    (un->un_state == SD_STATE_OFFLINE)) {
27267 		return (ENXIO);
27268 	}
27269 
27270 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
27271 		return (EFAULT);
27272 	}
27273 
27274 	/* Validate the requested track and address format */
27275 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
27276 		return (EINVAL);
27277 	}
27278 
27279 	if (entry->cdte_track == 0) {
27280 		return (EINVAL);
27281 	}
27282 
27283 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
27284 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27285 	bzero(cdb, CDB_GROUP1);
27286 
27287 	cdb[0] = SCMD_READ_TOC;
27288 	/* Set the MSF bit based on the user requested address format  */
27289 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
27290 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
27291 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
27292 	} else {
27293 		cdb[6] = entry->cdte_track;
27294 	}
27295 
27296 	/*
27297 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
27298 	 * (4 byte TOC response header + 8 byte track descriptor)
27299 	 */
27300 	cdb[8] = 12;
27301 	com->uscsi_cdb	   = cdb;
27302 	com->uscsi_cdblen  = CDB_GROUP1;
27303 	com->uscsi_bufaddr = buffer;
27304 	com->uscsi_buflen  = 0x0C;
27305 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
27306 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27307 	    SD_PATH_STANDARD);
27308 	if (rval != 0) {
27309 		kmem_free(buffer, 12);
27310 		kmem_free(com, sizeof (*com));
27311 		return (rval);
27312 	}
27313 
27314 	/* Process the toc entry */
27315 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
27316 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
27317 	if (entry->cdte_format & CDROM_LBA) {
27318 		entry->cdte_addr.lba =
27319 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27320 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27321 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
27322 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
27323 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
27324 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
27325 		/*
27326 		 * Send a READ TOC command using the LBA address format to get
27327 		 * the LBA for the track requested so it can be used in the
27328 		 * READ HEADER request
27329 		 *
27330 		 * Note: The MSF bit of the READ HEADER command specifies the
27331 		 * output format. The block address specified in that command
27332 		 * must be in LBA format.
27333 		 */
27334 		cdb[1] = 0;
27335 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27336 		    SD_PATH_STANDARD);
27337 		if (rval != 0) {
27338 			kmem_free(buffer, 12);
27339 			kmem_free(com, sizeof (*com));
27340 			return (rval);
27341 		}
27342 	} else {
27343 		entry->cdte_addr.msf.minute	= buffer[9];
27344 		entry->cdte_addr.msf.second	= buffer[10];
27345 		entry->cdte_addr.msf.frame	= buffer[11];
27346 		/*
27347 		 * Send a READ TOC command using the LBA address format to get
27348 		 * the LBA for the track requested so it can be used in the
27349 		 * READ HEADER request
27350 		 *
27351 		 * Note: The MSF bit of the READ HEADER command specifies the
27352 		 * output format. The block address specified in that command
27353 		 * must be in LBA format.
27354 		 */
27355 		cdb[1] = 0;
27356 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27357 		    SD_PATH_STANDARD);
27358 		if (rval != 0) {
27359 			kmem_free(buffer, 12);
27360 			kmem_free(com, sizeof (*com));
27361 			return (rval);
27362 		}
27363 	}
27364 
27365 	/*
27366 	 * Build and send the READ HEADER command to determine the data mode of
27367 	 * the user specified track.
27368 	 */
27369 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
27370 	    (entry->cdte_track != CDROM_LEADOUT)) {
27371 		bzero(cdb, CDB_GROUP1);
27372 		cdb[0] = SCMD_READ_HEADER;
27373 		cdb[2] = buffer[8];
27374 		cdb[3] = buffer[9];
27375 		cdb[4] = buffer[10];
27376 		cdb[5] = buffer[11];
27377 		cdb[8] = 0x08;
27378 		com->uscsi_buflen = 0x08;
27379 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27380 		    SD_PATH_STANDARD);
27381 		if (rval == 0) {
27382 			entry->cdte_datamode = buffer[0];
27383 		} else {
27384 			/*
27385 			 * READ HEADER command failed, since this is
27386 			 * obsoleted in one spec, its better to return
27387 			 * -1 for an invlid track so that we can still
27388 			 * receive the rest of the TOC data.
27389 			 */
27390 			entry->cdte_datamode = (uchar_t)-1;
27391 		}
27392 	} else {
27393 		entry->cdte_datamode = (uchar_t)-1;
27394 	}
27395 
27396 	kmem_free(buffer, 12);
27397 	kmem_free(com, sizeof (*com));
27398 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
27399 		return (EFAULT);
27400 
27401 	return (rval);
27402 }
27403 
27404 
27405 /*
27406  *    Function: sr_read_tochdr()
27407  *
27408  * Description: This routine is the driver entry point for handling CD-ROM
27409  *		ioctl requests to read the Table of Contents (TOC) header
27410  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
27411  *		and ending track numbers
27412  *
27413  *   Arguments: dev	- the device 'dev_t'
27414  *		data	- pointer to user provided toc header structure,
27415  *			  specifying the starting and ending track numbers.
27416  *		flag	- this argument is a pass through to ddi_copyxxx()
27417  *			  directly from the mode argument of ioctl().
27418  *
27419  * Return Code: the code returned by sd_send_scsi_cmd()
27420  *		EFAULT if ddi_copyxxx() fails
27421  *		ENXIO if fail ddi_get_soft_state
27422  *		EINVAL if data pointer is NULL
27423  */
27424 
27425 static int
27426 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
27427 {
27428 	struct sd_lun		*un;
27429 	struct uscsi_cmd	*com;
27430 	struct cdrom_tochdr	toc_header;
27431 	struct cdrom_tochdr	*hdr = &toc_header;
27432 	char			cdb[CDB_GROUP1];
27433 	int			rval;
27434 	caddr_t			buffer;
27435 
27436 	if (data == NULL) {
27437 		return (EINVAL);
27438 	}
27439 
27440 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27441 	    (un->un_state == SD_STATE_OFFLINE)) {
27442 		return (ENXIO);
27443 	}
27444 
27445 	buffer = kmem_zalloc(4, KM_SLEEP);
27446 	bzero(cdb, CDB_GROUP1);
27447 	cdb[0] = SCMD_READ_TOC;
27448 	/*
27449 	 * Specifying a track number of 0x00 in the READ TOC command indicates
27450 	 * that the TOC header should be returned
27451 	 */
27452 	cdb[6] = 0x00;
27453 	/*
27454 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
27455 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
27456 	 */
27457 	cdb[8] = 0x04;
27458 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27459 	com->uscsi_cdb	   = cdb;
27460 	com->uscsi_cdblen  = CDB_GROUP1;
27461 	com->uscsi_bufaddr = buffer;
27462 	com->uscsi_buflen  = 0x04;
27463 	com->uscsi_timeout = 300;
27464 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
27465 
27466 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27467 	    SD_PATH_STANDARD);
27468 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
27469 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
27470 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
27471 	} else {
27472 		hdr->cdth_trk0 = buffer[2];
27473 		hdr->cdth_trk1 = buffer[3];
27474 	}
27475 	kmem_free(buffer, 4);
27476 	kmem_free(com, sizeof (*com));
27477 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
27478 		return (EFAULT);
27479 	}
27480 	return (rval);
27481 }
27482 
27483 
27484 /*
27485  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
27486  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
27487  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
27488  * digital audio and extended architecture digital audio. These modes are
27489  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
27490  * MMC specs.
27491  *
27492  * In addition to support for the various data formats these routines also
27493  * include support for devices that implement only the direct access READ
27494  * commands (0x08, 0x28), devices that implement the READ_CD commands
27495  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
27496  * READ CDXA commands (0xD8, 0xDB)
27497  */
27498 
27499 /*
27500  *    Function: sr_read_mode1()
27501  *
27502  * Description: This routine is the driver entry point for handling CD-ROM
27503  *		ioctl read mode1 requests (CDROMREADMODE1).
27504  *
27505  *   Arguments: dev	- the device 'dev_t'
27506  *		data	- pointer to user provided cd read structure specifying
27507  *			  the lba buffer address and length.
27508  *		flag	- this argument is a pass through to ddi_copyxxx()
27509  *			  directly from the mode argument of ioctl().
27510  *
27511  * Return Code: the code returned by sd_send_scsi_cmd()
27512  *		EFAULT if ddi_copyxxx() fails
27513  *		ENXIO if fail ddi_get_soft_state
27514  *		EINVAL if data pointer is NULL
27515  */
27516 
27517 static int
27518 sr_read_mode1(dev_t dev, caddr_t data, int flag)
27519 {
27520 	struct sd_lun		*un;
27521 	struct cdrom_read	mode1_struct;
27522 	struct cdrom_read	*mode1 = &mode1_struct;
27523 	int			rval;
27524 	sd_ssc_t		*ssc;
27525 
27526 #ifdef _MULTI_DATAMODEL
27527 	/* To support ILP32 applications in an LP64 world */
27528 	struct cdrom_read32	cdrom_read32;
27529 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27530 #endif /* _MULTI_DATAMODEL */
27531 
27532 	if (data == NULL) {
27533 		return (EINVAL);
27534 	}
27535 
27536 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27537 	    (un->un_state == SD_STATE_OFFLINE)) {
27538 		return (ENXIO);
27539 	}
27540 
27541 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27542 	    "sd_read_mode1: entry: un:0x%p\n", un);
27543 
27544 #ifdef _MULTI_DATAMODEL
27545 	switch (ddi_model_convert_from(flag & FMODELS)) {
27546 	case DDI_MODEL_ILP32:
27547 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
27548 			return (EFAULT);
27549 		}
27550 		/* Convert the ILP32 uscsi data from the application to LP64 */
27551 		cdrom_read32tocdrom_read(cdrd32, mode1);
27552 		break;
27553 	case DDI_MODEL_NONE:
27554 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
27555 			return (EFAULT);
27556 		}
27557 	}
27558 #else /* ! _MULTI_DATAMODEL */
27559 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
27560 		return (EFAULT);
27561 	}
27562 #endif /* _MULTI_DATAMODEL */
27563 
27564 	ssc = sd_ssc_init(un);
27565 	rval = sd_send_scsi_READ(ssc, mode1->cdread_bufaddr,
27566 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
27567 	sd_ssc_fini(ssc);
27568 
27569 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27570 	    "sd_read_mode1: exit: un:0x%p\n", un);
27571 
27572 	return (rval);
27573 }
27574 
27575 
27576 /*
27577  *    Function: sr_read_cd_mode2()
27578  *
27579  * Description: This routine is the driver entry point for handling CD-ROM
27580  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
27581  *		support the READ CD (0xBE) command or the 1st generation
27582  *		READ CD (0xD4) command.
27583  *
27584  *   Arguments: dev	- the device 'dev_t'
27585  *		data	- pointer to user provided cd read structure specifying
27586  *			  the lba buffer address and length.
27587  *		flag	- this argument is a pass through to ddi_copyxxx()
27588  *			  directly from the mode argument of ioctl().
27589  *
27590  * Return Code: the code returned by sd_send_scsi_cmd()
27591  *		EFAULT if ddi_copyxxx() fails
27592  *		ENXIO if fail ddi_get_soft_state
27593  *		EINVAL if data pointer is NULL
27594  */
27595 
27596 static int
27597 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
27598 {
27599 	struct sd_lun		*un;
27600 	struct uscsi_cmd	*com;
27601 	struct cdrom_read	mode2_struct;
27602 	struct cdrom_read	*mode2 = &mode2_struct;
27603 	uchar_t			cdb[CDB_GROUP5];
27604 	int			nblocks;
27605 	int			rval;
27606 #ifdef _MULTI_DATAMODEL
27607 	/*  To support ILP32 applications in an LP64 world */
27608 	struct cdrom_read32	cdrom_read32;
27609 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27610 #endif /* _MULTI_DATAMODEL */
27611 
27612 	if (data == NULL) {
27613 		return (EINVAL);
27614 	}
27615 
27616 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27617 	    (un->un_state == SD_STATE_OFFLINE)) {
27618 		return (ENXIO);
27619 	}
27620 
27621 #ifdef _MULTI_DATAMODEL
27622 	switch (ddi_model_convert_from(flag & FMODELS)) {
27623 	case DDI_MODEL_ILP32:
27624 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
27625 			return (EFAULT);
27626 		}
27627 		/* Convert the ILP32 uscsi data from the application to LP64 */
27628 		cdrom_read32tocdrom_read(cdrd32, mode2);
27629 		break;
27630 	case DDI_MODEL_NONE:
27631 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27632 			return (EFAULT);
27633 		}
27634 		break;
27635 	}
27636 
27637 #else /* ! _MULTI_DATAMODEL */
27638 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27639 		return (EFAULT);
27640 	}
27641 #endif /* _MULTI_DATAMODEL */
27642 
27643 	bzero(cdb, sizeof (cdb));
27644 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
27645 		/* Read command supported by 1st generation atapi drives */
27646 		cdb[0] = SCMD_READ_CDD4;
27647 	} else {
27648 		/* Universal CD Access Command */
27649 		cdb[0] = SCMD_READ_CD;
27650 	}
27651 
27652 	/*
27653 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
27654 	 */
27655 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
27656 
27657 	/* set the start address */
27658 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
27659 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
27660 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
27661 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
27662 
27663 	/* set the transfer length */
27664 	nblocks = mode2->cdread_buflen / 2336;
27665 	cdb[6] = (uchar_t)(nblocks >> 16);
27666 	cdb[7] = (uchar_t)(nblocks >> 8);
27667 	cdb[8] = (uchar_t)nblocks;
27668 
27669 	/* set the filter bits */
27670 	cdb[9] = CDROM_READ_CD_USERDATA;
27671 
27672 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27673 	com->uscsi_cdb = (caddr_t)cdb;
27674 	com->uscsi_cdblen = sizeof (cdb);
27675 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
27676 	com->uscsi_buflen = mode2->cdread_buflen;
27677 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
27678 
27679 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27680 	    SD_PATH_STANDARD);
27681 	kmem_free(com, sizeof (*com));
27682 	return (rval);
27683 }
27684 
27685 
27686 /*
27687  *    Function: sr_read_mode2()
27688  *
27689  * Description: This routine is the driver entry point for handling CD-ROM
27690  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
27691  *		do not support the READ CD (0xBE) command.
27692  *
27693  *   Arguments: dev	- the device 'dev_t'
27694  *		data	- pointer to user provided cd read structure specifying
27695  *			  the lba buffer address and length.
27696  *		flag	- this argument is a pass through to ddi_copyxxx()
27697  *			  directly from the mode argument of ioctl().
27698  *
27699  * Return Code: the code returned by sd_send_scsi_cmd()
27700  *		EFAULT if ddi_copyxxx() fails
27701  *		ENXIO if fail ddi_get_soft_state
27702  *		EINVAL if data pointer is NULL
27703  *		EIO if fail to reset block size
27704  *		EAGAIN if commands are in progress in the driver
27705  */
27706 
27707 static int
27708 sr_read_mode2(dev_t dev, caddr_t data, int flag)
27709 {
27710 	struct sd_lun		*un;
27711 	struct cdrom_read	mode2_struct;
27712 	struct cdrom_read	*mode2 = &mode2_struct;
27713 	int			rval;
27714 	uint32_t		restore_blksize;
27715 	struct uscsi_cmd	*com;
27716 	uchar_t			cdb[CDB_GROUP0];
27717 	int			nblocks;
27718 
27719 #ifdef _MULTI_DATAMODEL
27720 	/* To support ILP32 applications in an LP64 world */
27721 	struct cdrom_read32	cdrom_read32;
27722 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27723 #endif /* _MULTI_DATAMODEL */
27724 
27725 	if (data == NULL) {
27726 		return (EINVAL);
27727 	}
27728 
27729 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27730 	    (un->un_state == SD_STATE_OFFLINE)) {
27731 		return (ENXIO);
27732 	}
27733 
27734 	/*
27735 	 * Because this routine will update the device and driver block size
27736 	 * being used we want to make sure there are no commands in progress.
27737 	 * If commands are in progress the user will have to try again.
27738 	 *
27739 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
27740 	 * in sdioctl to protect commands from sdioctl through to the top of
27741 	 * sd_uscsi_strategy. See sdioctl for details.
27742 	 */
27743 	mutex_enter(SD_MUTEX(un));
27744 	if (un->un_ncmds_in_driver != 1) {
27745 		mutex_exit(SD_MUTEX(un));
27746 		return (EAGAIN);
27747 	}
27748 	mutex_exit(SD_MUTEX(un));
27749 
27750 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27751 	    "sd_read_mode2: entry: un:0x%p\n", un);
27752 
27753 #ifdef _MULTI_DATAMODEL
27754 	switch (ddi_model_convert_from(flag & FMODELS)) {
27755 	case DDI_MODEL_ILP32:
27756 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
27757 			return (EFAULT);
27758 		}
27759 		/* Convert the ILP32 uscsi data from the application to LP64 */
27760 		cdrom_read32tocdrom_read(cdrd32, mode2);
27761 		break;
27762 	case DDI_MODEL_NONE:
27763 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27764 			return (EFAULT);
27765 		}
27766 		break;
27767 	}
27768 #else /* ! _MULTI_DATAMODEL */
27769 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
27770 		return (EFAULT);
27771 	}
27772 #endif /* _MULTI_DATAMODEL */
27773 
27774 	/* Store the current target block size for restoration later */
27775 	restore_blksize = un->un_tgt_blocksize;
27776 
27777 	/* Change the device and soft state target block size to 2336 */
27778 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
27779 		rval = EIO;
27780 		goto done;
27781 	}
27782 
27783 
27784 	bzero(cdb, sizeof (cdb));
27785 
27786 	/* set READ operation */
27787 	cdb[0] = SCMD_READ;
27788 
27789 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
27790 	mode2->cdread_lba >>= 2;
27791 
27792 	/* set the start address */
27793 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
27794 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
27795 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
27796 
27797 	/* set the transfer length */
27798 	nblocks = mode2->cdread_buflen / 2336;
27799 	cdb[4] = (uchar_t)nblocks & 0xFF;
27800 
27801 	/* build command */
27802 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27803 	com->uscsi_cdb = (caddr_t)cdb;
27804 	com->uscsi_cdblen = sizeof (cdb);
27805 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
27806 	com->uscsi_buflen = mode2->cdread_buflen;
27807 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
27808 
27809 	/*
27810 	 * Issue SCSI command with user space address for read buffer.
27811 	 *
27812 	 * This sends the command through main channel in the driver.
27813 	 *
27814 	 * Since this is accessed via an IOCTL call, we go through the
27815 	 * standard path, so that if the device was powered down, then
27816 	 * it would be 'awakened' to handle the command.
27817 	 */
27818 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27819 	    SD_PATH_STANDARD);
27820 
27821 	kmem_free(com, sizeof (*com));
27822 
27823 	/* Restore the device and soft state target block size */
27824 	if (sr_sector_mode(dev, restore_blksize) != 0) {
27825 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27826 		    "can't do switch back to mode 1\n");
27827 		/*
27828 		 * If sd_send_scsi_READ succeeded we still need to report
27829 		 * an error because we failed to reset the block size
27830 		 */
27831 		if (rval == 0) {
27832 			rval = EIO;
27833 		}
27834 	}
27835 
27836 done:
27837 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27838 	    "sd_read_mode2: exit: un:0x%p\n", un);
27839 
27840 	return (rval);
27841 }
27842 
27843 
27844 /*
27845  *    Function: sr_sector_mode()
27846  *
27847  * Description: This utility function is used by sr_read_mode2 to set the target
27848  *		block size based on the user specified size. This is a legacy
27849  *		implementation based upon a vendor specific mode page
27850  *
27851  *   Arguments: dev	- the device 'dev_t'
27852  *		data	- flag indicating if block size is being set to 2336 or
27853  *			  512.
27854  *
27855  * Return Code: the code returned by sd_send_scsi_cmd()
27856  *		EFAULT if ddi_copyxxx() fails
27857  *		ENXIO if fail ddi_get_soft_state
27858  *		EINVAL if data pointer is NULL
27859  */
27860 
27861 static int
27862 sr_sector_mode(dev_t dev, uint32_t blksize)
27863 {
27864 	struct sd_lun	*un;
27865 	uchar_t		*sense;
27866 	uchar_t		*select;
27867 	int		rval;
27868 	sd_ssc_t	*ssc;
27869 
27870 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27871 	    (un->un_state == SD_STATE_OFFLINE)) {
27872 		return (ENXIO);
27873 	}
27874 
27875 	sense = kmem_zalloc(20, KM_SLEEP);
27876 
27877 	/* Note: This is a vendor specific mode page (0x81) */
27878 	ssc = sd_ssc_init(un);
27879 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, 20, 0x81,
27880 	    SD_PATH_STANDARD);
27881 	sd_ssc_fini(ssc);
27882 	if (rval != 0) {
27883 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
27884 		    "sr_sector_mode: Mode Sense failed\n");
27885 		kmem_free(sense, 20);
27886 		return (rval);
27887 	}
27888 	select = kmem_zalloc(20, KM_SLEEP);
27889 	select[3] = 0x08;
27890 	select[10] = ((blksize >> 8) & 0xff);
27891 	select[11] = (blksize & 0xff);
27892 	select[12] = 0x01;
27893 	select[13] = 0x06;
27894 	select[14] = sense[14];
27895 	select[15] = sense[15];
27896 	if (blksize == SD_MODE2_BLKSIZE) {
27897 		select[14] |= 0x01;
27898 	}
27899 
27900 	ssc = sd_ssc_init(un);
27901 	rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select, 20,
27902 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27903 	sd_ssc_fini(ssc);
27904 	if (rval != 0) {
27905 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
27906 		    "sr_sector_mode: Mode Select failed\n");
27907 	} else {
27908 		/*
27909 		 * Only update the softstate block size if we successfully
27910 		 * changed the device block mode.
27911 		 */
27912 		mutex_enter(SD_MUTEX(un));
27913 		sd_update_block_info(un, blksize, 0);
27914 		mutex_exit(SD_MUTEX(un));
27915 	}
27916 	kmem_free(sense, 20);
27917 	kmem_free(select, 20);
27918 	return (rval);
27919 }
27920 
27921 
27922 /*
27923  *    Function: sr_read_cdda()
27924  *
27925  * Description: This routine is the driver entry point for handling CD-ROM
27926  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
27927  *		the target supports CDDA these requests are handled via a vendor
27928  *		specific command (0xD8) If the target does not support CDDA
27929  *		these requests are handled via the READ CD command (0xBE).
27930  *
27931  *   Arguments: dev	- the device 'dev_t'
27932  *		data	- pointer to user provided CD-DA structure specifying
27933  *			  the track starting address, transfer length, and
27934  *			  subcode options.
27935  *		flag	- this argument is a pass through to ddi_copyxxx()
27936  *			  directly from the mode argument of ioctl().
27937  *
27938  * Return Code: the code returned by sd_send_scsi_cmd()
27939  *		EFAULT if ddi_copyxxx() fails
27940  *		ENXIO if fail ddi_get_soft_state
27941  *		EINVAL if invalid arguments are provided
27942  *		ENOTTY
27943  */
27944 
27945 static int
27946 sr_read_cdda(dev_t dev, caddr_t data, int flag)
27947 {
27948 	struct sd_lun			*un;
27949 	struct uscsi_cmd		*com;
27950 	struct cdrom_cdda		*cdda;
27951 	int				rval;
27952 	size_t				buflen;
27953 	char				cdb[CDB_GROUP5];
27954 
27955 #ifdef _MULTI_DATAMODEL
27956 	/* To support ILP32 applications in an LP64 world */
27957 	struct cdrom_cdda32	cdrom_cdda32;
27958 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
27959 #endif /* _MULTI_DATAMODEL */
27960 
27961 	if (data == NULL) {
27962 		return (EINVAL);
27963 	}
27964 
27965 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27966 		return (ENXIO);
27967 	}
27968 
27969 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
27970 
27971 #ifdef _MULTI_DATAMODEL
27972 	switch (ddi_model_convert_from(flag & FMODELS)) {
27973 	case DDI_MODEL_ILP32:
27974 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
27975 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27976 			    "sr_read_cdda: ddi_copyin Failed\n");
27977 			kmem_free(cdda, sizeof (struct cdrom_cdda));
27978 			return (EFAULT);
27979 		}
27980 		/* Convert the ILP32 uscsi data from the application to LP64 */
27981 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
27982 		break;
27983 	case DDI_MODEL_NONE:
27984 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
27985 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27986 			    "sr_read_cdda: ddi_copyin Failed\n");
27987 			kmem_free(cdda, sizeof (struct cdrom_cdda));
27988 			return (EFAULT);
27989 		}
27990 		break;
27991 	}
27992 #else /* ! _MULTI_DATAMODEL */
27993 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
27994 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27995 		    "sr_read_cdda: ddi_copyin Failed\n");
27996 		kmem_free(cdda, sizeof (struct cdrom_cdda));
27997 		return (EFAULT);
27998 	}
27999 #endif /* _MULTI_DATAMODEL */
28000 
28001 	/*
28002 	 * Since MMC-2 expects max 3 bytes for length, check if the
28003 	 * length input is greater than 3 bytes
28004 	 */
28005 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28006 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28007 		    "cdrom transfer length too large: %d (limit %d)\n",
28008 		    cdda->cdda_length, 0xFFFFFF);
28009 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28010 		return (EINVAL);
28011 	}
28012 
28013 	switch (cdda->cdda_subcode) {
28014 	case CDROM_DA_NO_SUBCODE:
28015 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28016 		break;
28017 	case CDROM_DA_SUBQ:
28018 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28019 		break;
28020 	case CDROM_DA_ALL_SUBCODE:
28021 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28022 		break;
28023 	case CDROM_DA_SUBCODE_ONLY:
28024 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28025 		break;
28026 	default:
28027 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28028 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28029 		    cdda->cdda_subcode);
28030 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28031 		return (EINVAL);
28032 	}
28033 
28034 	/* Build and send the command */
28035 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28036 	bzero(cdb, CDB_GROUP5);
28037 
28038 	if (un->un_f_cfg_cdda == TRUE) {
28039 		cdb[0] = (char)SCMD_READ_CD;
28040 		cdb[1] = 0x04;
28041 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28042 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28043 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28044 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28045 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28046 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28047 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28048 		cdb[9] = 0x10;
28049 		switch (cdda->cdda_subcode) {
28050 		case CDROM_DA_NO_SUBCODE :
28051 			cdb[10] = 0x0;
28052 			break;
28053 		case CDROM_DA_SUBQ :
28054 			cdb[10] = 0x2;
28055 			break;
28056 		case CDROM_DA_ALL_SUBCODE :
28057 			cdb[10] = 0x1;
28058 			break;
28059 		case CDROM_DA_SUBCODE_ONLY :
28060 			/* FALLTHROUGH */
28061 		default :
28062 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28063 			kmem_free(com, sizeof (*com));
28064 			return (ENOTTY);
28065 		}
28066 	} else {
28067 		cdb[0] = (char)SCMD_READ_CDDA;
28068 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28069 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28070 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28071 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28072 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
28073 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28074 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28075 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
28076 		cdb[10] = cdda->cdda_subcode;
28077 	}
28078 
28079 	com->uscsi_cdb = cdb;
28080 	com->uscsi_cdblen = CDB_GROUP5;
28081 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
28082 	com->uscsi_buflen = buflen;
28083 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28084 
28085 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28086 	    SD_PATH_STANDARD);
28087 
28088 	kmem_free(cdda, sizeof (struct cdrom_cdda));
28089 	kmem_free(com, sizeof (*com));
28090 	return (rval);
28091 }
28092 
28093 
28094 /*
28095  *    Function: sr_read_cdxa()
28096  *
28097  * Description: This routine is the driver entry point for handling CD-ROM
28098  *		ioctl requests to return CD-XA (Extended Architecture) data.
28099  *		(CDROMCDXA).
28100  *
28101  *   Arguments: dev	- the device 'dev_t'
28102  *		data	- pointer to user provided CD-XA structure specifying
28103  *			  the data starting address, transfer length, and format
28104  *		flag	- this argument is a pass through to ddi_copyxxx()
28105  *			  directly from the mode argument of ioctl().
28106  *
28107  * Return Code: the code returned by sd_send_scsi_cmd()
28108  *		EFAULT if ddi_copyxxx() fails
28109  *		ENXIO if fail ddi_get_soft_state
28110  *		EINVAL if data pointer is NULL
28111  */
28112 
28113 static int
28114 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
28115 {
28116 	struct sd_lun		*un;
28117 	struct uscsi_cmd	*com;
28118 	struct cdrom_cdxa	*cdxa;
28119 	int			rval;
28120 	size_t			buflen;
28121 	char			cdb[CDB_GROUP5];
28122 	uchar_t			read_flags;
28123 
28124 #ifdef _MULTI_DATAMODEL
28125 	/* To support ILP32 applications in an LP64 world */
28126 	struct cdrom_cdxa32		cdrom_cdxa32;
28127 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
28128 #endif /* _MULTI_DATAMODEL */
28129 
28130 	if (data == NULL) {
28131 		return (EINVAL);
28132 	}
28133 
28134 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28135 		return (ENXIO);
28136 	}
28137 
28138 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
28139 
28140 #ifdef _MULTI_DATAMODEL
28141 	switch (ddi_model_convert_from(flag & FMODELS)) {
28142 	case DDI_MODEL_ILP32:
28143 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
28144 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28145 			return (EFAULT);
28146 		}
28147 		/*
28148 		 * Convert the ILP32 uscsi data from the
28149 		 * application to LP64 for internal use.
28150 		 */
28151 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
28152 		break;
28153 	case DDI_MODEL_NONE:
28154 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28155 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28156 			return (EFAULT);
28157 		}
28158 		break;
28159 	}
28160 #else /* ! _MULTI_DATAMODEL */
28161 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28162 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28163 		return (EFAULT);
28164 	}
28165 #endif /* _MULTI_DATAMODEL */
28166 
28167 	/*
28168 	 * Since MMC-2 expects max 3 bytes for length, check if the
28169 	 * length input is greater than 3 bytes
28170 	 */
28171 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
28172 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
28173 		    "cdrom transfer length too large: %d (limit %d)\n",
28174 		    cdxa->cdxa_length, 0xFFFFFF);
28175 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28176 		return (EINVAL);
28177 	}
28178 
28179 	switch (cdxa->cdxa_format) {
28180 	case CDROM_XA_DATA:
28181 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
28182 		read_flags = 0x10;
28183 		break;
28184 	case CDROM_XA_SECTOR_DATA:
28185 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
28186 		read_flags = 0xf8;
28187 		break;
28188 	case CDROM_XA_DATA_W_ERROR:
28189 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
28190 		read_flags = 0xfc;
28191 		break;
28192 	default:
28193 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28194 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
28195 		    cdxa->cdxa_format);
28196 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28197 		return (EINVAL);
28198 	}
28199 
28200 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28201 	bzero(cdb, CDB_GROUP5);
28202 	if (un->un_f_mmc_cap == TRUE) {
28203 		cdb[0] = (char)SCMD_READ_CD;
28204 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28205 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28206 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28207 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28208 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28209 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28210 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
28211 		cdb[9] = (char)read_flags;
28212 	} else {
28213 		/*
28214 		 * Note: A vendor specific command (0xDB) is being used her to
28215 		 * request a read of all subcodes.
28216 		 */
28217 		cdb[0] = (char)SCMD_READ_CDXA;
28218 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28219 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28220 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28221 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28222 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
28223 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28224 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28225 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
28226 		cdb[10] = cdxa->cdxa_format;
28227 	}
28228 	com->uscsi_cdb	   = cdb;
28229 	com->uscsi_cdblen  = CDB_GROUP5;
28230 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
28231 	com->uscsi_buflen  = buflen;
28232 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28233 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28234 	    SD_PATH_STANDARD);
28235 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28236 	kmem_free(com, sizeof (*com));
28237 	return (rval);
28238 }
28239 
28240 
28241 /*
28242  *    Function: sr_eject()
28243  *
28244  * Description: This routine is the driver entry point for handling CD-ROM
28245  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
28246  *
28247  *   Arguments: dev	- the device 'dev_t'
28248  *
28249  * Return Code: the code returned by sd_send_scsi_cmd()
28250  */
28251 
28252 static int
28253 sr_eject(dev_t dev)
28254 {
28255 	struct sd_lun	*un;
28256 	int		rval;
28257 	sd_ssc_t	*ssc;
28258 
28259 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28260 	    (un->un_state == SD_STATE_OFFLINE)) {
28261 		return (ENXIO);
28262 	}
28263 
28264 	/*
28265 	 * To prevent race conditions with the eject
28266 	 * command, keep track of an eject command as
28267 	 * it progresses. If we are already handling
28268 	 * an eject command in the driver for the given
28269 	 * unit and another request to eject is received
28270 	 * immediately return EAGAIN so we don't lose
28271 	 * the command if the current eject command fails.
28272 	 */
28273 	mutex_enter(SD_MUTEX(un));
28274 	if (un->un_f_ejecting == TRUE) {
28275 		mutex_exit(SD_MUTEX(un));
28276 		return (EAGAIN);
28277 	}
28278 	un->un_f_ejecting = TRUE;
28279 	mutex_exit(SD_MUTEX(un));
28280 
28281 	ssc = sd_ssc_init(un);
28282 	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
28283 	    SD_PATH_STANDARD);
28284 	sd_ssc_fini(ssc);
28285 
28286 	if (rval != 0) {
28287 		mutex_enter(SD_MUTEX(un));
28288 		un->un_f_ejecting = FALSE;
28289 		mutex_exit(SD_MUTEX(un));
28290 		return (rval);
28291 	}
28292 
28293 	ssc = sd_ssc_init(un);
28294 	rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
28295 	    SD_TARGET_EJECT, SD_PATH_STANDARD);
28296 	sd_ssc_fini(ssc);
28297 
28298 	if (rval == 0) {
28299 		mutex_enter(SD_MUTEX(un));
28300 		sr_ejected(un);
28301 		un->un_mediastate = DKIO_EJECTED;
28302 		un->un_f_ejecting = FALSE;
28303 		cv_broadcast(&un->un_state_cv);
28304 		mutex_exit(SD_MUTEX(un));
28305 	} else {
28306 		mutex_enter(SD_MUTEX(un));
28307 		un->un_f_ejecting = FALSE;
28308 		mutex_exit(SD_MUTEX(un));
28309 	}
28310 	return (rval);
28311 }
28312 
28313 
28314 /*
28315  *    Function: sr_ejected()
28316  *
28317  * Description: This routine updates the soft state structure to invalidate the
28318  *		geometry information after the media has been ejected or a
28319  *		media eject has been detected.
28320  *
28321  *   Arguments: un - driver soft state (unit) structure
28322  */
28323 
28324 static void
28325 sr_ejected(struct sd_lun *un)
28326 {
28327 	struct sd_errstats *stp;
28328 
28329 	ASSERT(un != NULL);
28330 	ASSERT(mutex_owned(SD_MUTEX(un)));
28331 
28332 	un->un_f_blockcount_is_valid	= FALSE;
28333 	un->un_f_tgt_blocksize_is_valid	= FALSE;
28334 	mutex_exit(SD_MUTEX(un));
28335 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
28336 	mutex_enter(SD_MUTEX(un));
28337 
28338 	if (un->un_errstats != NULL) {
28339 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
28340 		stp->sd_capacity.value.ui64 = 0;
28341 	}
28342 }
28343 
28344 
28345 /*
28346  *    Function: sr_check_wp()
28347  *
28348  * Description: This routine checks the write protection of a removable
28349  *      media disk and hotpluggable devices via the write protect bit of
28350  *      the Mode Page Header device specific field. Some devices choke
28351  *      on unsupported mode page. In order to workaround this issue,
28352  *      this routine has been implemented to use 0x3f mode page(request
28353  *      for all pages) for all device types.
28354  *
28355  *   Arguments: dev             - the device 'dev_t'
28356  *
28357  * Return Code: int indicating if the device is write protected (1) or not (0)
28358  *
28359  *     Context: Kernel thread.
28360  *
28361  */
28362 
28363 static int
28364 sr_check_wp(dev_t dev)
28365 {
28366 	struct sd_lun	*un;
28367 	uchar_t		device_specific;
28368 	uchar_t		*sense;
28369 	int		hdrlen;
28370 	int		rval = FALSE;
28371 	int		status;
28372 	sd_ssc_t	*ssc;
28373 
28374 	/*
28375 	 * Note: The return codes for this routine should be reworked to
28376 	 * properly handle the case of a NULL softstate.
28377 	 */
28378 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28379 		return (FALSE);
28380 	}
28381 
28382 	if (un->un_f_cfg_is_atapi == TRUE) {
28383 		/*
28384 		 * The mode page contents are not required; set the allocation
28385 		 * length for the mode page header only
28386 		 */
28387 		hdrlen = MODE_HEADER_LENGTH_GRP2;
28388 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
28389 		ssc = sd_ssc_init(un);
28390 		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense, hdrlen,
28391 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
28392 		sd_ssc_fini(ssc);
28393 		if (status != 0)
28394 			goto err_exit;
28395 		device_specific =
28396 		    ((struct mode_header_grp2 *)sense)->device_specific;
28397 	} else {
28398 		hdrlen = MODE_HEADER_LENGTH;
28399 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
28400 		ssc = sd_ssc_init(un);
28401 		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, hdrlen,
28402 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
28403 		sd_ssc_fini(ssc);
28404 		if (status != 0)
28405 			goto err_exit;
28406 		device_specific =
28407 		    ((struct mode_header *)sense)->device_specific;
28408 	}
28409 
28410 
28411 	/*
28412 	 * Write protect mode sense failed; not all disks
28413 	 * understand this query. Return FALSE assuming that
28414 	 * these devices are not writable.
28415 	 */
28416 	if (device_specific & WRITE_PROTECT) {
28417 		rval = TRUE;
28418 	}
28419 
28420 err_exit:
28421 	kmem_free(sense, hdrlen);
28422 	return (rval);
28423 }
28424 
28425 /*
28426  *    Function: sr_volume_ctrl()
28427  *
28428  * Description: This routine is the driver entry point for handling CD-ROM
28429  *		audio output volume ioctl requests. (CDROMVOLCTRL)
28430  *
28431  *   Arguments: dev	- the device 'dev_t'
28432  *		data	- pointer to user audio volume control structure
28433  *		flag	- this argument is a pass through to ddi_copyxxx()
28434  *			  directly from the mode argument of ioctl().
28435  *
28436  * Return Code: the code returned by sd_send_scsi_cmd()
28437  *		EFAULT if ddi_copyxxx() fails
28438  *		ENXIO if fail ddi_get_soft_state
28439  *		EINVAL if data pointer is NULL
28440  *
28441  */
28442 
28443 static int
28444 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
28445 {
28446 	struct sd_lun		*un;
28447 	struct cdrom_volctrl    volume;
28448 	struct cdrom_volctrl    *vol = &volume;
28449 	uchar_t			*sense_page;
28450 	uchar_t			*select_page;
28451 	uchar_t			*sense;
28452 	uchar_t			*select;
28453 	int			sense_buflen;
28454 	int			select_buflen;
28455 	int			rval;
28456 	sd_ssc_t		*ssc;
28457 
28458 	if (data == NULL) {
28459 		return (EINVAL);
28460 	}
28461 
28462 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28463 	    (un->un_state == SD_STATE_OFFLINE)) {
28464 		return (ENXIO);
28465 	}
28466 
28467 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
28468 		return (EFAULT);
28469 	}
28470 
28471 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
28472 		struct mode_header_grp2		*sense_mhp;
28473 		struct mode_header_grp2		*select_mhp;
28474 		int				bd_len;
28475 
28476 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
28477 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
28478 		    MODEPAGE_AUDIO_CTRL_LEN;
28479 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
28480 		select = kmem_zalloc(select_buflen, KM_SLEEP);
28481 		ssc = sd_ssc_init(un);
28482 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
28483 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
28484 		    SD_PATH_STANDARD);
28485 		sd_ssc_fini(ssc);
28486 
28487 		if (rval != 0) {
28488 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28489 			    "sr_volume_ctrl: Mode Sense Failed\n");
28490 			kmem_free(sense, sense_buflen);
28491 			kmem_free(select, select_buflen);
28492 			return (rval);
28493 		}
28494 		sense_mhp = (struct mode_header_grp2 *)sense;
28495 		select_mhp = (struct mode_header_grp2 *)select;
28496 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
28497 		    sense_mhp->bdesc_length_lo;
28498 		if (bd_len > MODE_BLK_DESC_LENGTH) {
28499 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28500 			    "sr_volume_ctrl: Mode Sense returned invalid "
28501 			    "block descriptor length\n");
28502 			kmem_free(sense, sense_buflen);
28503 			kmem_free(select, select_buflen);
28504 			return (EIO);
28505 		}
28506 		sense_page = (uchar_t *)
28507 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
28508 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
28509 		select_mhp->length_msb = 0;
28510 		select_mhp->length_lsb = 0;
28511 		select_mhp->bdesc_length_hi = 0;
28512 		select_mhp->bdesc_length_lo = 0;
28513 	} else {
28514 		struct mode_header		*sense_mhp, *select_mhp;
28515 
28516 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
28517 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
28518 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
28519 		select = kmem_zalloc(select_buflen, KM_SLEEP);
28520 		ssc = sd_ssc_init(un);
28521 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
28522 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
28523 		    SD_PATH_STANDARD);
28524 		sd_ssc_fini(ssc);
28525 
28526 		if (rval != 0) {
28527 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28528 			    "sr_volume_ctrl: Mode Sense Failed\n");
28529 			kmem_free(sense, sense_buflen);
28530 			kmem_free(select, select_buflen);
28531 			return (rval);
28532 		}
28533 		sense_mhp  = (struct mode_header *)sense;
28534 		select_mhp = (struct mode_header *)select;
28535 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
28536 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28537 			    "sr_volume_ctrl: Mode Sense returned invalid "
28538 			    "block descriptor length\n");
28539 			kmem_free(sense, sense_buflen);
28540 			kmem_free(select, select_buflen);
28541 			return (EIO);
28542 		}
28543 		sense_page = (uchar_t *)
28544 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
28545 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
28546 		select_mhp->length = 0;
28547 		select_mhp->bdesc_length = 0;
28548 	}
28549 	/*
28550 	 * Note: An audio control data structure could be created and overlayed
28551 	 * on the following in place of the array indexing method implemented.
28552 	 */
28553 
28554 	/* Build the select data for the user volume data */
28555 	select_page[0] = MODEPAGE_AUDIO_CTRL;
28556 	select_page[1] = 0xE;
28557 	/* Set the immediate bit */
28558 	select_page[2] = 0x04;
28559 	/* Zero out reserved fields */
28560 	select_page[3] = 0x00;
28561 	select_page[4] = 0x00;
28562 	/* Return sense data for fields not to be modified */
28563 	select_page[5] = sense_page[5];
28564 	select_page[6] = sense_page[6];
28565 	select_page[7] = sense_page[7];
28566 	/* Set the user specified volume levels for channel 0 and 1 */
28567 	select_page[8] = 0x01;
28568 	select_page[9] = vol->channel0;
28569 	select_page[10] = 0x02;
28570 	select_page[11] = vol->channel1;
28571 	/* Channel 2 and 3 are currently unsupported so return the sense data */
28572 	select_page[12] = sense_page[12];
28573 	select_page[13] = sense_page[13];
28574 	select_page[14] = sense_page[14];
28575 	select_page[15] = sense_page[15];
28576 
28577 	ssc = sd_ssc_init(un);
28578 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
28579 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP1, select,
28580 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
28581 	} else {
28582 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
28583 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
28584 	}
28585 	sd_ssc_fini(ssc);
28586 
28587 	kmem_free(sense, sense_buflen);
28588 	kmem_free(select, select_buflen);
28589 	return (rval);
28590 }
28591 
28592 
28593 /*
28594  *    Function: sr_read_sony_session_offset()
28595  *
28596  * Description: This routine is the driver entry point for handling CD-ROM
28597  *		ioctl requests for session offset information. (CDROMREADOFFSET)
28598  *		The address of the first track in the last session of a
28599  *		multi-session CD-ROM is returned
28600  *
28601  *		Note: This routine uses a vendor specific key value in the
28602  *		command control field without implementing any vendor check here
28603  *		or in the ioctl routine.
28604  *
28605  *   Arguments: dev	- the device 'dev_t'
28606  *		data	- pointer to an int to hold the requested address
28607  *		flag	- this argument is a pass through to ddi_copyxxx()
28608  *			  directly from the mode argument of ioctl().
28609  *
28610  * Return Code: the code returned by sd_send_scsi_cmd()
28611  *		EFAULT if ddi_copyxxx() fails
28612  *		ENXIO if fail ddi_get_soft_state
28613  *		EINVAL if data pointer is NULL
28614  */
28615 
28616 static int
28617 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
28618 {
28619 	struct sd_lun		*un;
28620 	struct uscsi_cmd	*com;
28621 	caddr_t			buffer;
28622 	char			cdb[CDB_GROUP1];
28623 	int			session_offset = 0;
28624 	int			rval;
28625 
28626 	if (data == NULL) {
28627 		return (EINVAL);
28628 	}
28629 
28630 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28631 	    (un->un_state == SD_STATE_OFFLINE)) {
28632 		return (ENXIO);
28633 	}
28634 
28635 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
28636 	bzero(cdb, CDB_GROUP1);
28637 	cdb[0] = SCMD_READ_TOC;
28638 	/*
28639 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28640 	 * (4 byte TOC response header + 8 byte response data)
28641 	 */
28642 	cdb[8] = SONY_SESSION_OFFSET_LEN;
28643 	/* Byte 9 is the control byte. A vendor specific value is used */
28644 	cdb[9] = SONY_SESSION_OFFSET_KEY;
28645 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28646 	com->uscsi_cdb = cdb;
28647 	com->uscsi_cdblen = CDB_GROUP1;
28648 	com->uscsi_bufaddr = buffer;
28649 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
28650 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28651 
28652 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28653 	    SD_PATH_STANDARD);
28654 	if (rval != 0) {
28655 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
28656 		kmem_free(com, sizeof (*com));
28657 		return (rval);
28658 	}
28659 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
28660 		session_offset =
28661 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28662 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28663 		/*
28664 		 * Offset returned offset in current lbasize block's. Convert to
28665 		 * 2k block's to return to the user
28666 		 */
28667 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
28668 			session_offset >>= 2;
28669 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
28670 			session_offset >>= 1;
28671 		}
28672 	}
28673 
28674 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
28675 		rval = EFAULT;
28676 	}
28677 
28678 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
28679 	kmem_free(com, sizeof (*com));
28680 	return (rval);
28681 }
28682 
28683 
28684 /*
28685  *    Function: sd_wm_cache_constructor()
28686  *
28687  * Description: Cache Constructor for the wmap cache for the read/modify/write
28688  *		devices.
28689  *
28690  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
28691  *		un	- sd_lun structure for the device.
28692  *		flag	- the km flags passed to constructor
28693  *
28694  * Return Code: 0 on success.
28695  *		-1 on failure.
28696  */
28697 
28698 /*ARGSUSED*/
28699 static int
28700 sd_wm_cache_constructor(void *wm, void *un, int flags)
28701 {
28702 	bzero(wm, sizeof (struct sd_w_map));
28703 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
28704 	return (0);
28705 }
28706 
28707 
28708 /*
28709  *    Function: sd_wm_cache_destructor()
28710  *
28711  * Description: Cache destructor for the wmap cache for the read/modify/write
28712  *		devices.
28713  *
28714  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
28715  *		un	- sd_lun structure for the device.
28716  */
28717 /*ARGSUSED*/
28718 static void
28719 sd_wm_cache_destructor(void *wm, void *un)
28720 {
28721 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
28722 }
28723 
28724 
28725 /*
28726  *    Function: sd_range_lock()
28727  *
28728  * Description: Lock the range of blocks specified as parameter to ensure
28729  *		that read, modify write is atomic and no other i/o writes
28730  *		to the same location. The range is specified in terms
28731  *		of start and end blocks. Block numbers are the actual
28732  *		media block numbers and not system.
28733  *
28734  *   Arguments: un	- sd_lun structure for the device.
28735  *		startb - The starting block number
28736  *		endb - The end block number
28737  *		typ - type of i/o - simple/read_modify_write
28738  *
28739  * Return Code: wm  - pointer to the wmap structure.
28740  *
28741  *     Context: This routine can sleep.
28742  */
28743 
28744 static struct sd_w_map *
28745 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
28746 {
28747 	struct sd_w_map *wmp = NULL;
28748 	struct sd_w_map *sl_wmp = NULL;
28749 	struct sd_w_map *tmp_wmp;
28750 	wm_state state = SD_WM_CHK_LIST;
28751 
28752 
28753 	ASSERT(un != NULL);
28754 	ASSERT(!mutex_owned(SD_MUTEX(un)));
28755 
28756 	mutex_enter(SD_MUTEX(un));
28757 
28758 	while (state != SD_WM_DONE) {
28759 
28760 		switch (state) {
28761 		case SD_WM_CHK_LIST:
28762 			/*
28763 			 * This is the starting state. Check the wmap list
28764 			 * to see if the range is currently available.
28765 			 */
28766 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
28767 				/*
28768 				 * If this is a simple write and no rmw
28769 				 * i/o is pending then try to lock the
28770 				 * range as the range should be available.
28771 				 */
28772 				state = SD_WM_LOCK_RANGE;
28773 			} else {
28774 				tmp_wmp = sd_get_range(un, startb, endb);
28775 				if (tmp_wmp != NULL) {
28776 					if ((wmp != NULL) && ONLIST(un, wmp)) {
28777 						/*
28778 						 * Should not keep onlist wmps
28779 						 * while waiting this macro
28780 						 * will also do wmp = NULL;
28781 						 */
28782 						FREE_ONLIST_WMAP(un, wmp);
28783 					}
28784 					/*
28785 					 * sl_wmp is the wmap on which wait
28786 					 * is done, since the tmp_wmp points
28787 					 * to the inuse wmap, set sl_wmp to
28788 					 * tmp_wmp and change the state to sleep
28789 					 */
28790 					sl_wmp = tmp_wmp;
28791 					state = SD_WM_WAIT_MAP;
28792 				} else {
28793 					state = SD_WM_LOCK_RANGE;
28794 				}
28795 
28796 			}
28797 			break;
28798 
28799 		case SD_WM_LOCK_RANGE:
28800 			ASSERT(un->un_wm_cache);
28801 			/*
28802 			 * The range need to be locked, try to get a wmap.
28803 			 * First attempt it with NO_SLEEP, want to avoid a sleep
28804 			 * if possible as we will have to release the sd mutex
28805 			 * if we have to sleep.
28806 			 */
28807 			if (wmp == NULL)
28808 				wmp = kmem_cache_alloc(un->un_wm_cache,
28809 				    KM_NOSLEEP);
28810 			if (wmp == NULL) {
28811 				mutex_exit(SD_MUTEX(un));
28812 				_NOTE(DATA_READABLE_WITHOUT_LOCK
28813 				    (sd_lun::un_wm_cache))
28814 				wmp = kmem_cache_alloc(un->un_wm_cache,
28815 				    KM_SLEEP);
28816 				mutex_enter(SD_MUTEX(un));
28817 				/*
28818 				 * we released the mutex so recheck and go to
28819 				 * check list state.
28820 				 */
28821 				state = SD_WM_CHK_LIST;
28822 			} else {
28823 				/*
28824 				 * We exit out of state machine since we
28825 				 * have the wmap. Do the housekeeping first.
28826 				 * place the wmap on the wmap list if it is not
28827 				 * on it already and then set the state to done.
28828 				 */
28829 				wmp->wm_start = startb;
28830 				wmp->wm_end = endb;
28831 				wmp->wm_flags = typ | SD_WM_BUSY;
28832 				if (typ & SD_WTYPE_RMW) {
28833 					un->un_rmw_count++;
28834 				}
28835 				/*
28836 				 * If not already on the list then link
28837 				 */
28838 				if (!ONLIST(un, wmp)) {
28839 					wmp->wm_next = un->un_wm;
28840 					wmp->wm_prev = NULL;
28841 					if (wmp->wm_next)
28842 						wmp->wm_next->wm_prev = wmp;
28843 					un->un_wm = wmp;
28844 				}
28845 				state = SD_WM_DONE;
28846 			}
28847 			break;
28848 
28849 		case SD_WM_WAIT_MAP:
28850 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
28851 			/*
28852 			 * Wait is done on sl_wmp, which is set in the
28853 			 * check_list state.
28854 			 */
28855 			sl_wmp->wm_wanted_count++;
28856 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
28857 			sl_wmp->wm_wanted_count--;
28858 			/*
28859 			 * We can reuse the memory from the completed sl_wmp
28860 			 * lock range for our new lock, but only if noone is
28861 			 * waiting for it.
28862 			 */
28863 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
28864 			if (sl_wmp->wm_wanted_count == 0) {
28865 				if (wmp != NULL) {
28866 					CHK_N_FREEWMP(un, wmp);
28867 				}
28868 				wmp = sl_wmp;
28869 			}
28870 			sl_wmp = NULL;
28871 			/*
28872 			 * After waking up, need to recheck for availability of
28873 			 * range.
28874 			 */
28875 			state = SD_WM_CHK_LIST;
28876 			break;
28877 
28878 		default:
28879 			panic("sd_range_lock: "
28880 			    "Unknown state %d in sd_range_lock", state);
28881 			/*NOTREACHED*/
28882 		} /* switch(state) */
28883 
28884 	} /* while(state != SD_WM_DONE) */
28885 
28886 	mutex_exit(SD_MUTEX(un));
28887 
28888 	ASSERT(wmp != NULL);
28889 
28890 	return (wmp);
28891 }
28892 
28893 
28894 /*
28895  *    Function: sd_get_range()
28896  *
28897  * Description: Find if there any overlapping I/O to this one
28898  *		Returns the write-map of 1st such I/O, NULL otherwise.
28899  *
28900  *   Arguments: un	- sd_lun structure for the device.
28901  *		startb - The starting block number
28902  *		endb - The end block number
28903  *
28904  * Return Code: wm  - pointer to the wmap structure.
28905  */
28906 
28907 static struct sd_w_map *
28908 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
28909 {
28910 	struct sd_w_map *wmp;
28911 
28912 	ASSERT(un != NULL);
28913 
28914 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
28915 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
28916 			continue;
28917 		}
28918 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
28919 			break;
28920 		}
28921 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
28922 			break;
28923 		}
28924 	}
28925 
28926 	return (wmp);
28927 }
28928 
28929 
28930 /*
28931  *    Function: sd_free_inlist_wmap()
28932  *
28933  * Description: Unlink and free a write map struct.
28934  *
28935  *   Arguments: un      - sd_lun structure for the device.
28936  *		wmp	- sd_w_map which needs to be unlinked.
28937  */
28938 
28939 static void
28940 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
28941 {
28942 	ASSERT(un != NULL);
28943 
28944 	if (un->un_wm == wmp) {
28945 		un->un_wm = wmp->wm_next;
28946 	} else {
28947 		wmp->wm_prev->wm_next = wmp->wm_next;
28948 	}
28949 
28950 	if (wmp->wm_next) {
28951 		wmp->wm_next->wm_prev = wmp->wm_prev;
28952 	}
28953 
28954 	wmp->wm_next = wmp->wm_prev = NULL;
28955 
28956 	kmem_cache_free(un->un_wm_cache, wmp);
28957 }
28958 
28959 
28960 /*
28961  *    Function: sd_range_unlock()
28962  *
28963  * Description: Unlock the range locked by wm.
28964  *		Free write map if nobody else is waiting on it.
28965  *
28966  *   Arguments: un      - sd_lun structure for the device.
28967  *              wmp     - sd_w_map which needs to be unlinked.
28968  */
28969 
28970 static void
28971 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
28972 {
28973 	ASSERT(un != NULL);
28974 	ASSERT(wm != NULL);
28975 	ASSERT(!mutex_owned(SD_MUTEX(un)));
28976 
28977 	mutex_enter(SD_MUTEX(un));
28978 
28979 	if (wm->wm_flags & SD_WTYPE_RMW) {
28980 		un->un_rmw_count--;
28981 	}
28982 
28983 	if (wm->wm_wanted_count) {
28984 		wm->wm_flags = 0;
28985 		/*
28986 		 * Broadcast that the wmap is available now.
28987 		 */
28988 		cv_broadcast(&wm->wm_avail);
28989 	} else {
28990 		/*
28991 		 * If no one is waiting on the map, it should be free'ed.
28992 		 */
28993 		sd_free_inlist_wmap(un, wm);
28994 	}
28995 
28996 	mutex_exit(SD_MUTEX(un));
28997 }
28998 
28999 
29000 /*
29001  *    Function: sd_read_modify_write_task
29002  *
29003  * Description: Called from a taskq thread to initiate the write phase of
29004  *		a read-modify-write request.  This is used for targets where
29005  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29006  *
29007  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29008  *
29009  *     Context: Called under taskq thread context.
29010  */
29011 
29012 static void
29013 sd_read_modify_write_task(void *arg)
29014 {
29015 	struct sd_mapblocksize_info	*bsp;
29016 	struct buf	*bp;
29017 	struct sd_xbuf	*xp;
29018 	struct sd_lun	*un;
29019 
29020 	bp = arg;	/* The bp is given in arg */
29021 	ASSERT(bp != NULL);
29022 
29023 	/* Get the pointer to the layer-private data struct */
29024 	xp = SD_GET_XBUF(bp);
29025 	ASSERT(xp != NULL);
29026 	bsp = xp->xb_private;
29027 	ASSERT(bsp != NULL);
29028 
29029 	un = SD_GET_UN(bp);
29030 	ASSERT(un != NULL);
29031 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29032 
29033 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29034 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29035 
29036 	/*
29037 	 * This is the write phase of a read-modify-write request, called
29038 	 * under the context of a taskq thread in response to the completion
29039 	 * of the read portion of the rmw request completing under interrupt
29040 	 * context. The write request must be sent from here down the iostart
29041 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29042 	 * we use the layer index saved in the layer-private data area.
29043 	 */
29044 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29045 
29046 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29047 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29048 }
29049 
29050 
29051 /*
29052  *    Function: sddump_do_read_of_rmw()
29053  *
29054  * Description: This routine will be called from sddump, If sddump is called
29055  *		with an I/O which not aligned on device blocksize boundary
29056  *		then the write has to be converted to read-modify-write.
29057  *		Do the read part here in order to keep sddump simple.
29058  *		Note - That the sd_mutex is held across the call to this
29059  *		routine.
29060  *
29061  *   Arguments: un	- sd_lun
29062  *		blkno	- block number in terms of media block size.
29063  *		nblk	- number of blocks.
29064  *		bpp	- pointer to pointer to the buf structure. On return
29065  *			from this function, *bpp points to the valid buffer
29066  *			to which the write has to be done.
29067  *
29068  * Return Code: 0 for success or errno-type return code
29069  */
29070 
29071 static int
29072 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29073     struct buf **bpp)
29074 {
29075 	int err;
29076 	int i;
29077 	int rval;
29078 	struct buf *bp;
29079 	struct scsi_pkt *pkt = NULL;
29080 	uint32_t target_blocksize;
29081 
29082 	ASSERT(un != NULL);
29083 	ASSERT(mutex_owned(SD_MUTEX(un)));
29084 
29085 	target_blocksize = un->un_tgt_blocksize;
29086 
29087 	mutex_exit(SD_MUTEX(un));
29088 
29089 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29090 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29091 	if (bp == NULL) {
29092 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29093 		    "no resources for dumping; giving up");
29094 		err = ENOMEM;
29095 		goto done;
29096 	}
29097 
29098 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29099 	    blkno, nblk);
29100 	if (rval != 0) {
29101 		scsi_free_consistent_buf(bp);
29102 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29103 		    "no resources for dumping; giving up");
29104 		err = ENOMEM;
29105 		goto done;
29106 	}
29107 
29108 	pkt->pkt_flags |= FLAG_NOINTR;
29109 
29110 	err = EIO;
29111 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
29112 
29113 		/*
29114 		 * Scsi_poll returns 0 (success) if the command completes and
29115 		 * the status block is STATUS_GOOD.  We should only check
29116 		 * errors if this condition is not true.  Even then we should
29117 		 * send our own request sense packet only if we have a check
29118 		 * condition and auto request sense has not been performed by
29119 		 * the hba.
29120 		 */
29121 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
29122 
29123 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
29124 			err = 0;
29125 			break;
29126 		}
29127 
29128 		/*
29129 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
29130 		 * no need to read RQS data.
29131 		 */
29132 		if (pkt->pkt_reason == CMD_DEV_GONE) {
29133 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29134 			    "Error while dumping state with rmw..."
29135 			    "Device is gone\n");
29136 			break;
29137 		}
29138 
29139 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
29140 			SD_INFO(SD_LOG_DUMP, un,
29141 			    "sddump: read failed with CHECK, try # %d\n", i);
29142 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
29143 				(void) sd_send_polled_RQS(un);
29144 			}
29145 
29146 			continue;
29147 		}
29148 
29149 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
29150 			int reset_retval = 0;
29151 
29152 			SD_INFO(SD_LOG_DUMP, un,
29153 			    "sddump: read failed with BUSY, try # %d\n", i);
29154 
29155 			if (un->un_f_lun_reset_enabled == TRUE) {
29156 				reset_retval = scsi_reset(SD_ADDRESS(un),
29157 				    RESET_LUN);
29158 			}
29159 			if (reset_retval == 0) {
29160 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
29161 			}
29162 			(void) sd_send_polled_RQS(un);
29163 
29164 		} else {
29165 			SD_INFO(SD_LOG_DUMP, un,
29166 			    "sddump: read failed with 0x%x, try # %d\n",
29167 			    SD_GET_PKT_STATUS(pkt), i);
29168 			mutex_enter(SD_MUTEX(un));
29169 			sd_reset_target(un, pkt);
29170 			mutex_exit(SD_MUTEX(un));
29171 		}
29172 
29173 		/*
29174 		 * If we are not getting anywhere with lun/target resets,
29175 		 * let's reset the bus.
29176 		 */
29177 		if (i > SD_NDUMP_RETRIES / 2) {
29178 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
29179 			(void) sd_send_polled_RQS(un);
29180 		}
29181 
29182 	}
29183 	scsi_destroy_pkt(pkt);
29184 
29185 	if (err != 0) {
29186 		scsi_free_consistent_buf(bp);
29187 		*bpp = NULL;
29188 	} else {
29189 		*bpp = bp;
29190 	}
29191 
29192 done:
29193 	mutex_enter(SD_MUTEX(un));
29194 	return (err);
29195 }
29196 
29197 
29198 /*
29199  *    Function: sd_failfast_flushq
29200  *
29201  * Description: Take all bp's on the wait queue that have B_FAILFAST set
29202  *		in b_flags and move them onto the failfast queue, then kick
29203  *		off a thread to return all bp's on the failfast queue to
29204  *		their owners with an error set.
29205  *
29206  *   Arguments: un - pointer to the soft state struct for the instance.
29207  *
29208  *     Context: may execute in interrupt context.
29209  */
29210 
29211 static void
29212 sd_failfast_flushq(struct sd_lun *un)
29213 {
29214 	struct buf *bp;
29215 	struct buf *next_waitq_bp;
29216 	struct buf *prev_waitq_bp = NULL;
29217 
29218 	ASSERT(un != NULL);
29219 	ASSERT(mutex_owned(SD_MUTEX(un)));
29220 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
29221 	ASSERT(un->un_failfast_bp == NULL);
29222 
29223 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29224 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
29225 
29226 	/*
29227 	 * Check if we should flush all bufs when entering failfast state, or
29228 	 * just those with B_FAILFAST set.
29229 	 */
29230 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
29231 		/*
29232 		 * Move *all* bp's on the wait queue to the failfast flush
29233 		 * queue, including those that do NOT have B_FAILFAST set.
29234 		 */
29235 		if (un->un_failfast_headp == NULL) {
29236 			ASSERT(un->un_failfast_tailp == NULL);
29237 			un->un_failfast_headp = un->un_waitq_headp;
29238 		} else {
29239 			ASSERT(un->un_failfast_tailp != NULL);
29240 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
29241 		}
29242 
29243 		un->un_failfast_tailp = un->un_waitq_tailp;
29244 
29245 		/* update kstat for each bp moved out of the waitq */
29246 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
29247 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29248 		}
29249 
29250 		/* empty the waitq */
29251 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
29252 
29253 	} else {
29254 		/*
29255 		 * Go thru the wait queue, pick off all entries with
29256 		 * B_FAILFAST set, and move these onto the failfast queue.
29257 		 */
29258 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
29259 			/*
29260 			 * Save the pointer to the next bp on the wait queue,
29261 			 * so we get to it on the next iteration of this loop.
29262 			 */
29263 			next_waitq_bp = bp->av_forw;
29264 
29265 			/*
29266 			 * If this bp from the wait queue does NOT have
29267 			 * B_FAILFAST set, just move on to the next element
29268 			 * in the wait queue. Note, this is the only place
29269 			 * where it is correct to set prev_waitq_bp.
29270 			 */
29271 			if ((bp->b_flags & B_FAILFAST) == 0) {
29272 				prev_waitq_bp = bp;
29273 				continue;
29274 			}
29275 
29276 			/*
29277 			 * Remove the bp from the wait queue.
29278 			 */
29279 			if (bp == un->un_waitq_headp) {
29280 				/* The bp is the first element of the waitq. */
29281 				un->un_waitq_headp = next_waitq_bp;
29282 				if (un->un_waitq_headp == NULL) {
29283 					/* The wait queue is now empty */
29284 					un->un_waitq_tailp = NULL;
29285 				}
29286 			} else {
29287 				/*
29288 				 * The bp is either somewhere in the middle
29289 				 * or at the end of the wait queue.
29290 				 */
29291 				ASSERT(un->un_waitq_headp != NULL);
29292 				ASSERT(prev_waitq_bp != NULL);
29293 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
29294 				    == 0);
29295 				if (bp == un->un_waitq_tailp) {
29296 					/* bp is the last entry on the waitq. */
29297 					ASSERT(next_waitq_bp == NULL);
29298 					un->un_waitq_tailp = prev_waitq_bp;
29299 				}
29300 				prev_waitq_bp->av_forw = next_waitq_bp;
29301 			}
29302 			bp->av_forw = NULL;
29303 
29304 			/*
29305 			 * update kstat since the bp is moved out of
29306 			 * the waitq
29307 			 */
29308 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29309 
29310 			/*
29311 			 * Now put the bp onto the failfast queue.
29312 			 */
29313 			if (un->un_failfast_headp == NULL) {
29314 				/* failfast queue is currently empty */
29315 				ASSERT(un->un_failfast_tailp == NULL);
29316 				un->un_failfast_headp =
29317 				    un->un_failfast_tailp = bp;
29318 			} else {
29319 				/* Add the bp to the end of the failfast q */
29320 				ASSERT(un->un_failfast_tailp != NULL);
29321 				ASSERT(un->un_failfast_tailp->b_flags &
29322 				    B_FAILFAST);
29323 				un->un_failfast_tailp->av_forw = bp;
29324 				un->un_failfast_tailp = bp;
29325 			}
29326 		}
29327 	}
29328 
29329 	/*
29330 	 * Now return all bp's on the failfast queue to their owners.
29331 	 */
29332 	while ((bp = un->un_failfast_headp) != NULL) {
29333 
29334 		un->un_failfast_headp = bp->av_forw;
29335 		if (un->un_failfast_headp == NULL) {
29336 			un->un_failfast_tailp = NULL;
29337 		}
29338 
29339 		/*
29340 		 * We want to return the bp with a failure error code, but
29341 		 * we do not want a call to sd_start_cmds() to occur here,
29342 		 * so use sd_return_failed_command_no_restart() instead of
29343 		 * sd_return_failed_command().
29344 		 */
29345 		sd_return_failed_command_no_restart(un, bp, EIO);
29346 	}
29347 
29348 	/* Flush the xbuf queues if required. */
29349 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
29350 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
29351 	}
29352 
29353 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29354 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
29355 }
29356 
29357 
29358 /*
29359  *    Function: sd_failfast_flushq_callback
29360  *
29361  * Description: Return TRUE if the given bp meets the criteria for failfast
29362  *		flushing. Used with ddi_xbuf_flushq(9F).
29363  *
29364  *   Arguments: bp - ptr to buf struct to be examined.
29365  *
29366  *     Context: Any
29367  */
29368 
29369 static int
29370 sd_failfast_flushq_callback(struct buf *bp)
29371 {
29372 	/*
29373 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
29374 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
29375 	 */
29376 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
29377 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
29378 }
29379 
29380 
29381 
29382 /*
29383  * Function: sd_setup_next_xfer
29384  *
29385  * Description: Prepare next I/O operation using DMA_PARTIAL
29386  *
29387  */
29388 
29389 static int
29390 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
29391     struct scsi_pkt *pkt, struct sd_xbuf *xp)
29392 {
29393 	ssize_t	num_blks_not_xfered;
29394 	daddr_t	strt_blk_num;
29395 	ssize_t	bytes_not_xfered;
29396 	int	rval;
29397 
29398 	ASSERT(pkt->pkt_resid == 0);
29399 
29400 	/*
29401 	 * Calculate next block number and amount to be transferred.
29402 	 *
29403 	 * How much data NOT transfered to the HBA yet.
29404 	 */
29405 	bytes_not_xfered = xp->xb_dma_resid;
29406 
29407 	/*
29408 	 * figure how many blocks NOT transfered to the HBA yet.
29409 	 */
29410 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
29411 
29412 	/*
29413 	 * set starting block number to the end of what WAS transfered.
29414 	 */
29415 	strt_blk_num = xp->xb_blkno +
29416 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
29417 
29418 	/*
29419 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
29420 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
29421 	 * the disk mutex here.
29422 	 */
29423 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
29424 	    strt_blk_num, num_blks_not_xfered);
29425 
29426 	if (rval == 0) {
29427 
29428 		/*
29429 		 * Success.
29430 		 *
29431 		 * Adjust things if there are still more blocks to be
29432 		 * transfered.
29433 		 */
29434 		xp->xb_dma_resid = pkt->pkt_resid;
29435 		pkt->pkt_resid = 0;
29436 
29437 		return (1);
29438 	}
29439 
29440 	/*
29441 	 * There's really only one possible return value from
29442 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
29443 	 * returns NULL.
29444 	 */
29445 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
29446 
29447 	bp->b_resid = bp->b_bcount;
29448 	bp->b_flags |= B_ERROR;
29449 
29450 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29451 	    "Error setting up next portion of DMA transfer\n");
29452 
29453 	return (0);
29454 }
29455 
29456 /*
29457  *    Function: sd_panic_for_res_conflict
29458  *
29459  * Description: Call panic with a string formatted with "Reservation Conflict"
29460  *		and a human readable identifier indicating the SD instance
29461  *		that experienced the reservation conflict.
29462  *
29463  *   Arguments: un - pointer to the soft state struct for the instance.
29464  *
29465  *     Context: may execute in interrupt context.
29466  */
29467 
29468 #define	SD_RESV_CONFLICT_FMT_LEN 40
29469 void
29470 sd_panic_for_res_conflict(struct sd_lun *un)
29471 {
29472 	char panic_str[SD_RESV_CONFLICT_FMT_LEN + MAXPATHLEN];
29473 	char path_str[MAXPATHLEN];
29474 
29475 	(void) snprintf(panic_str, sizeof (panic_str),
29476 	    "Reservation Conflict\nDisk: %s",
29477 	    ddi_pathname(SD_DEVINFO(un), path_str));
29478 
29479 	panic(panic_str);
29480 }
29481 
29482 /*
29483  * Note: The following sd_faultinjection_ioctl( ) routines implement
29484  * driver support for handling fault injection for error analysis
29485  * causing faults in multiple layers of the driver.
29486  *
29487  */
29488 
29489 #ifdef SD_FAULT_INJECTION
29490 static uint_t   sd_fault_injection_on = 0;
29491 
29492 /*
29493  *    Function: sd_faultinjection_ioctl()
29494  *
29495  * Description: This routine is the driver entry point for handling
29496  *              faultinjection ioctls to inject errors into the
29497  *              layer model
29498  *
29499  *   Arguments: cmd	- the ioctl cmd received
29500  *		arg	- the arguments from user and returns
29501  */
29502 
29503 static void
29504 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un)
29505 {
29506 	uint_t i = 0;
29507 	uint_t rval;
29508 
29509 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
29510 
29511 	mutex_enter(SD_MUTEX(un));
29512 
29513 	switch (cmd) {
29514 	case SDIOCRUN:
29515 		/* Allow pushed faults to be injected */
29516 		SD_INFO(SD_LOG_SDTEST, un,
29517 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
29518 
29519 		sd_fault_injection_on = 1;
29520 
29521 		SD_INFO(SD_LOG_IOERR, un,
29522 		    "sd_faultinjection_ioctl: run finished\n");
29523 		break;
29524 
29525 	case SDIOCSTART:
29526 		/* Start Injection Session */
29527 		SD_INFO(SD_LOG_SDTEST, un,
29528 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
29529 
29530 		sd_fault_injection_on = 0;
29531 		un->sd_injection_mask = 0xFFFFFFFF;
29532 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
29533 			un->sd_fi_fifo_pkt[i] = NULL;
29534 			un->sd_fi_fifo_xb[i] = NULL;
29535 			un->sd_fi_fifo_un[i] = NULL;
29536 			un->sd_fi_fifo_arq[i] = NULL;
29537 		}
29538 		un->sd_fi_fifo_start = 0;
29539 		un->sd_fi_fifo_end = 0;
29540 
29541 		mutex_enter(&(un->un_fi_mutex));
29542 		un->sd_fi_log[0] = '\0';
29543 		un->sd_fi_buf_len = 0;
29544 		mutex_exit(&(un->un_fi_mutex));
29545 
29546 		SD_INFO(SD_LOG_IOERR, un,
29547 		    "sd_faultinjection_ioctl: start finished\n");
29548 		break;
29549 
29550 	case SDIOCSTOP:
29551 		/* Stop Injection Session */
29552 		SD_INFO(SD_LOG_SDTEST, un,
29553 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
29554 		sd_fault_injection_on = 0;
29555 		un->sd_injection_mask = 0x0;
29556 
29557 		/* Empty stray or unuseds structs from fifo */
29558 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
29559 			if (un->sd_fi_fifo_pkt[i] != NULL) {
29560 				kmem_free(un->sd_fi_fifo_pkt[i],
29561 				    sizeof (struct sd_fi_pkt));
29562 			}
29563 			if (un->sd_fi_fifo_xb[i] != NULL) {
29564 				kmem_free(un->sd_fi_fifo_xb[i],
29565 				    sizeof (struct sd_fi_xb));
29566 			}
29567 			if (un->sd_fi_fifo_un[i] != NULL) {
29568 				kmem_free(un->sd_fi_fifo_un[i],
29569 				    sizeof (struct sd_fi_un));
29570 			}
29571 			if (un->sd_fi_fifo_arq[i] != NULL) {
29572 				kmem_free(un->sd_fi_fifo_arq[i],
29573 				    sizeof (struct sd_fi_arq));
29574 			}
29575 			un->sd_fi_fifo_pkt[i] = NULL;
29576 			un->sd_fi_fifo_un[i] = NULL;
29577 			un->sd_fi_fifo_xb[i] = NULL;
29578 			un->sd_fi_fifo_arq[i] = NULL;
29579 		}
29580 		un->sd_fi_fifo_start = 0;
29581 		un->sd_fi_fifo_end = 0;
29582 
29583 		SD_INFO(SD_LOG_IOERR, un,
29584 		    "sd_faultinjection_ioctl: stop finished\n");
29585 		break;
29586 
29587 	case SDIOCINSERTPKT:
29588 		/* Store a packet struct to be pushed onto fifo */
29589 		SD_INFO(SD_LOG_SDTEST, un,
29590 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
29591 
29592 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29593 
29594 		sd_fault_injection_on = 0;
29595 
29596 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
29597 		if (un->sd_fi_fifo_pkt[i] != NULL) {
29598 			kmem_free(un->sd_fi_fifo_pkt[i],
29599 			    sizeof (struct sd_fi_pkt));
29600 		}
29601 		if (arg != (uintptr_t)NULL) {
29602 			un->sd_fi_fifo_pkt[i] =
29603 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
29604 			if (un->sd_fi_fifo_pkt[i] == NULL) {
29605 				/* Alloc failed don't store anything */
29606 				break;
29607 			}
29608 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
29609 			    sizeof (struct sd_fi_pkt), 0);
29610 			if (rval == -1) {
29611 				kmem_free(un->sd_fi_fifo_pkt[i],
29612 				    sizeof (struct sd_fi_pkt));
29613 				un->sd_fi_fifo_pkt[i] = NULL;
29614 			}
29615 		} else {
29616 			SD_INFO(SD_LOG_IOERR, un,
29617 			    "sd_faultinjection_ioctl: pkt null\n");
29618 		}
29619 		break;
29620 
29621 	case SDIOCINSERTXB:
29622 		/* Store a xb struct to be pushed onto fifo */
29623 		SD_INFO(SD_LOG_SDTEST, un,
29624 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
29625 
29626 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29627 
29628 		sd_fault_injection_on = 0;
29629 
29630 		if (un->sd_fi_fifo_xb[i] != NULL) {
29631 			kmem_free(un->sd_fi_fifo_xb[i],
29632 			    sizeof (struct sd_fi_xb));
29633 			un->sd_fi_fifo_xb[i] = NULL;
29634 		}
29635 		if (arg != (uintptr_t)NULL) {
29636 			un->sd_fi_fifo_xb[i] =
29637 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
29638 			if (un->sd_fi_fifo_xb[i] == NULL) {
29639 				/* Alloc failed don't store anything */
29640 				break;
29641 			}
29642 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
29643 			    sizeof (struct sd_fi_xb), 0);
29644 
29645 			if (rval == -1) {
29646 				kmem_free(un->sd_fi_fifo_xb[i],
29647 				    sizeof (struct sd_fi_xb));
29648 				un->sd_fi_fifo_xb[i] = NULL;
29649 			}
29650 		} else {
29651 			SD_INFO(SD_LOG_IOERR, un,
29652 			    "sd_faultinjection_ioctl: xb null\n");
29653 		}
29654 		break;
29655 
29656 	case SDIOCINSERTUN:
29657 		/* Store a un struct to be pushed onto fifo */
29658 		SD_INFO(SD_LOG_SDTEST, un,
29659 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
29660 
29661 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29662 
29663 		sd_fault_injection_on = 0;
29664 
29665 		if (un->sd_fi_fifo_un[i] != NULL) {
29666 			kmem_free(un->sd_fi_fifo_un[i],
29667 			    sizeof (struct sd_fi_un));
29668 			un->sd_fi_fifo_un[i] = NULL;
29669 		}
29670 		if (arg != (uintptr_t)NULL) {
29671 			un->sd_fi_fifo_un[i] =
29672 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
29673 			if (un->sd_fi_fifo_un[i] == NULL) {
29674 				/* Alloc failed don't store anything */
29675 				break;
29676 			}
29677 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
29678 			    sizeof (struct sd_fi_un), 0);
29679 			if (rval == -1) {
29680 				kmem_free(un->sd_fi_fifo_un[i],
29681 				    sizeof (struct sd_fi_un));
29682 				un->sd_fi_fifo_un[i] = NULL;
29683 			}
29684 
29685 		} else {
29686 			SD_INFO(SD_LOG_IOERR, un,
29687 			    "sd_faultinjection_ioctl: un null\n");
29688 		}
29689 
29690 		break;
29691 
29692 	case SDIOCINSERTARQ:
29693 		/* Store a arq struct to be pushed onto fifo */
29694 		SD_INFO(SD_LOG_SDTEST, un,
29695 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
29696 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29697 
29698 		sd_fault_injection_on = 0;
29699 
29700 		if (un->sd_fi_fifo_arq[i] != NULL) {
29701 			kmem_free(un->sd_fi_fifo_arq[i],
29702 			    sizeof (struct sd_fi_arq));
29703 			un->sd_fi_fifo_arq[i] = NULL;
29704 		}
29705 		if (arg != (uintptr_t)NULL) {
29706 			un->sd_fi_fifo_arq[i] =
29707 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
29708 			if (un->sd_fi_fifo_arq[i] == NULL) {
29709 				/* Alloc failed don't store anything */
29710 				break;
29711 			}
29712 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
29713 			    sizeof (struct sd_fi_arq), 0);
29714 			if (rval == -1) {
29715 				kmem_free(un->sd_fi_fifo_arq[i],
29716 				    sizeof (struct sd_fi_arq));
29717 				un->sd_fi_fifo_arq[i] = NULL;
29718 			}
29719 
29720 		} else {
29721 			SD_INFO(SD_LOG_IOERR, un,
29722 			    "sd_faultinjection_ioctl: arq null\n");
29723 		}
29724 
29725 		break;
29726 
29727 	case SDIOCPUSH:
29728 		/* Push stored xb, pkt, un, and arq onto fifo */
29729 		sd_fault_injection_on = 0;
29730 
29731 		if (arg != (uintptr_t)NULL) {
29732 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
29733 			if (rval != -1 &&
29734 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
29735 				un->sd_fi_fifo_end += i;
29736 			}
29737 		} else {
29738 			SD_INFO(SD_LOG_IOERR, un,
29739 			    "sd_faultinjection_ioctl: push arg null\n");
29740 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
29741 				un->sd_fi_fifo_end++;
29742 			}
29743 		}
29744 		SD_INFO(SD_LOG_IOERR, un,
29745 		    "sd_faultinjection_ioctl: push to end=%d\n",
29746 		    un->sd_fi_fifo_end);
29747 		break;
29748 
29749 	case SDIOCRETRIEVE:
29750 		/* Return buffer of log from Injection session */
29751 		SD_INFO(SD_LOG_SDTEST, un,
29752 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
29753 
29754 		sd_fault_injection_on = 0;
29755 
29756 		mutex_enter(&(un->un_fi_mutex));
29757 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
29758 		    un->sd_fi_buf_len+1, 0);
29759 		mutex_exit(&(un->un_fi_mutex));
29760 
29761 		if (rval == -1) {
29762 			/*
29763 			 * arg is possibly invalid setting
29764 			 * it to NULL for return
29765 			 */
29766 			arg = (uintptr_t)NULL;
29767 		}
29768 		break;
29769 	}
29770 
29771 	mutex_exit(SD_MUTEX(un));
29772 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: exit\n");
29773 }
29774 
29775 
29776 /*
29777  *    Function: sd_injection_log()
29778  *
29779  * Description: This routine adds buff to the already existing injection log
29780  *              for retrieval via faultinjection_ioctl for use in fault
29781  *              detection and recovery
29782  *
29783  *   Arguments: buf - the string to add to the log
29784  */
29785 
29786 static void
29787 sd_injection_log(char *buf, struct sd_lun *un)
29788 {
29789 	uint_t len;
29790 
29791 	ASSERT(un != NULL);
29792 	ASSERT(buf != NULL);
29793 
29794 	mutex_enter(&(un->un_fi_mutex));
29795 
29796 	len = min(strlen(buf), 255);
29797 	/* Add logged value to Injection log to be returned later */
29798 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
29799 		uint_t	offset = strlen((char *)un->sd_fi_log);
29800 		char *destp = (char *)un->sd_fi_log + offset;
29801 		int i;
29802 		for (i = 0; i < len; i++) {
29803 			*destp++ = *buf++;
29804 		}
29805 		un->sd_fi_buf_len += len;
29806 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
29807 	}
29808 
29809 	mutex_exit(&(un->un_fi_mutex));
29810 }
29811 
29812 
29813 /*
29814  *    Function: sd_faultinjection()
29815  *
29816  * Description: This routine takes the pkt and changes its
29817  *		content based on error injection scenerio.
29818  *
29819  *   Arguments: pktp	- packet to be changed
29820  */
29821 
29822 static void
29823 sd_faultinjection(struct scsi_pkt *pktp)
29824 {
29825 	uint_t i;
29826 	struct sd_fi_pkt *fi_pkt;
29827 	struct sd_fi_xb *fi_xb;
29828 	struct sd_fi_un *fi_un;
29829 	struct sd_fi_arq *fi_arq;
29830 	struct buf *bp;
29831 	struct sd_xbuf *xb;
29832 	struct sd_lun *un;
29833 
29834 	ASSERT(pktp != NULL);
29835 
29836 	/* pull bp xb and un from pktp */
29837 	bp = (struct buf *)pktp->pkt_private;
29838 	xb = SD_GET_XBUF(bp);
29839 	un = SD_GET_UN(bp);
29840 
29841 	ASSERT(un != NULL);
29842 
29843 	mutex_enter(SD_MUTEX(un));
29844 
29845 	SD_TRACE(SD_LOG_SDTEST, un,
29846 	    "sd_faultinjection: entry Injection from sdintr\n");
29847 
29848 	/* if injection is off return */
29849 	if (sd_fault_injection_on == 0 ||
29850 	    un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
29851 		mutex_exit(SD_MUTEX(un));
29852 		return;
29853 	}
29854 
29855 	SD_INFO(SD_LOG_SDTEST, un,
29856 	    "sd_faultinjection: is working for copying\n");
29857 
29858 	/* take next set off fifo */
29859 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
29860 
29861 	fi_pkt = un->sd_fi_fifo_pkt[i];
29862 	fi_xb = un->sd_fi_fifo_xb[i];
29863 	fi_un = un->sd_fi_fifo_un[i];
29864 	fi_arq = un->sd_fi_fifo_arq[i];
29865 
29866 
29867 	/* set variables accordingly */
29868 	/* set pkt if it was on fifo */
29869 	if (fi_pkt != NULL) {
29870 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
29871 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
29872 		if (fi_pkt->pkt_cdbp != 0xff)
29873 			SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
29874 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
29875 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
29876 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
29877 
29878 	}
29879 	/* set xb if it was on fifo */
29880 	if (fi_xb != NULL) {
29881 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
29882 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
29883 		if (fi_xb->xb_retry_count != 0)
29884 			SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
29885 		SD_CONDSET(xb, xb, xb_victim_retry_count,
29886 		    "xb_victim_retry_count");
29887 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
29888 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
29889 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
29890 
29891 		/* copy in block data from sense */
29892 		/*
29893 		 * if (fi_xb->xb_sense_data[0] != -1) {
29894 		 *	bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
29895 		 *	SENSE_LENGTH);
29896 		 * }
29897 		 */
29898 		bcopy(fi_xb->xb_sense_data, xb->xb_sense_data, SENSE_LENGTH);
29899 
29900 		/* copy in extended sense codes */
29901 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29902 		    xb, es_code, "es_code");
29903 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29904 		    xb, es_key, "es_key");
29905 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29906 		    xb, es_add_code, "es_add_code");
29907 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29908 		    xb, es_qual_code, "es_qual_code");
29909 		struct scsi_extended_sense *esp;
29910 		esp = (struct scsi_extended_sense *)xb->xb_sense_data;
29911 		esp->es_class = CLASS_EXTENDED_SENSE;
29912 	}
29913 
29914 	/* set un if it was on fifo */
29915 	if (fi_un != NULL) {
29916 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
29917 		SD_CONDSET(un, un, un_ctype, "un_ctype");
29918 		SD_CONDSET(un, un, un_reset_retry_count,
29919 		    "un_reset_retry_count");
29920 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
29921 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
29922 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
29923 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
29924 		    "un_f_allow_bus_device_reset");
29925 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
29926 
29927 	}
29928 
29929 	/* copy in auto request sense if it was on fifo */
29930 	if (fi_arq != NULL) {
29931 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
29932 	}
29933 
29934 	/* free structs */
29935 	if (un->sd_fi_fifo_pkt[i] != NULL) {
29936 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
29937 	}
29938 	if (un->sd_fi_fifo_xb[i] != NULL) {
29939 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
29940 	}
29941 	if (un->sd_fi_fifo_un[i] != NULL) {
29942 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
29943 	}
29944 	if (un->sd_fi_fifo_arq[i] != NULL) {
29945 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
29946 	}
29947 
29948 	/*
29949 	 * kmem_free does not gurantee to set to NULL
29950 	 * since we uses these to determine if we set
29951 	 * values or not lets confirm they are always
29952 	 * NULL after free
29953 	 */
29954 	un->sd_fi_fifo_pkt[i] = NULL;
29955 	un->sd_fi_fifo_un[i] = NULL;
29956 	un->sd_fi_fifo_xb[i] = NULL;
29957 	un->sd_fi_fifo_arq[i] = NULL;
29958 
29959 	un->sd_fi_fifo_start++;
29960 
29961 	mutex_exit(SD_MUTEX(un));
29962 
29963 	SD_INFO(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
29964 }
29965 
29966 #endif /* SD_FAULT_INJECTION */
29967 
29968 /*
29969  * This routine is invoked in sd_unit_attach(). Before calling it, the
29970  * properties in conf file should be processed already, and "hotpluggable"
29971  * property was processed also.
29972  *
29973  * The sd driver distinguishes 3 different type of devices: removable media,
29974  * non-removable media, and hotpluggable. Below the differences are defined:
29975  *
29976  * 1. Device ID
29977  *
29978  *     The device ID of a device is used to identify this device. Refer to
29979  *     ddi_devid_register(9F).
29980  *
29981  *     For a non-removable media disk device which can provide 0x80 or 0x83
29982  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
29983  *     device ID is created to identify this device. For other non-removable
29984  *     media devices, a default device ID is created only if this device has
29985  *     at least 2 alter cylinders. Otherwise, this device has no devid.
29986  *
29987  *     -------------------------------------------------------
29988  *     removable media   hotpluggable  | Can Have Device ID
29989  *     -------------------------------------------------------
29990  *         false             false     |     Yes
29991  *         false             true      |     Yes
29992  *         true                x       |     No
29993  *     ------------------------------------------------------
29994  *
29995  *
29996  * 2. SCSI group 4 commands
29997  *
29998  *     In SCSI specs, only some commands in group 4 command set can use
29999  *     8-byte addresses that can be used to access >2TB storage spaces.
30000  *     Other commands have no such capability. Without supporting group4,
30001  *     it is impossible to make full use of storage spaces of a disk with
30002  *     capacity larger than 2TB.
30003  *
30004  *     -----------------------------------------------
30005  *     removable media   hotpluggable   LP64  |  Group
30006  *     -----------------------------------------------
30007  *           false          false       false |   1
30008  *           false          false       true  |   4
30009  *           false          true        false |   1
30010  *           false          true        true  |   4
30011  *           true             x           x   |   5
30012  *     -----------------------------------------------
30013  *
30014  *
30015  * 3. Check for VTOC Label
30016  *
30017  *     If a direct-access disk has no EFI label, sd will check if it has a
30018  *     valid VTOC label. Now, sd also does that check for removable media
30019  *     and hotpluggable devices.
30020  *
30021  *     --------------------------------------------------------------
30022  *     Direct-Access   removable media    hotpluggable |  Check Label
30023  *     -------------------------------------------------------------
30024  *         false          false           false        |   No
30025  *         false          false           true         |   No
30026  *         false          true            false        |   Yes
30027  *         false          true            true         |   Yes
30028  *         true            x                x          |   Yes
30029  *     --------------------------------------------------------------
30030  *
30031  *
30032  * 4. Building default VTOC label
30033  *
30034  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30035  *     If those devices have no valid VTOC label, sd(4D) will attempt to
30036  *     create default VTOC for them. Currently sd creates default VTOC label
30037  *     for all devices on x86 platform (VTOC_16), but only for removable
30038  *     media devices on SPARC (VTOC_8).
30039  *
30040  *     -----------------------------------------------------------
30041  *       removable media hotpluggable platform   |   Default Label
30042  *     -----------------------------------------------------------
30043  *             false          false    sparc     |     No
30044  *             false          true      x86      |     Yes
30045  *             false          true     sparc     |     Yes
30046  *             true             x        x       |     Yes
30047  *     ----------------------------------------------------------
30048  *
30049  *
30050  * 5. Supported blocksizes of target devices
30051  *
30052  *     Sd supports non-512-byte blocksize for removable media devices only.
30053  *     For other devices, only 512-byte blocksize is supported. This may be
30054  *     changed in near future because some RAID devices require non-512-byte
30055  *     blocksize
30056  *
30057  *     -----------------------------------------------------------
30058  *     removable media    hotpluggable    | non-512-byte blocksize
30059  *     -----------------------------------------------------------
30060  *           false          false         |   No
30061  *           false          true          |   No
30062  *           true             x           |   Yes
30063  *     -----------------------------------------------------------
30064  *
30065  *
30066  * 6. Automatic mount & unmount
30067  *
30068  *     sd(4D) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
30069  *     if a device is removable media device. It return 1 for removable media
30070  *     devices, and 0 for others.
30071  *
30072  *     The automatic mounting subsystem should distinguish between the types
30073  *     of devices and apply automounting policies to each.
30074  *
30075  *
30076  * 7. fdisk partition management
30077  *
30078  *     Fdisk is traditional partition method on x86 platform. sd(4D) driver
30079  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
30080  *     doesn't support fdisk partitions at all. Note: pcfs(4FS) can recognize
30081  *     fdisk partitions on both x86 and SPARC platform.
30082  *
30083  *     -----------------------------------------------------------
30084  *       platform   removable media  USB/1394  |  fdisk supported
30085  *     -----------------------------------------------------------
30086  *        x86         X               X        |       true
30087  *     ------------------------------------------------------------
30088  *        sparc       X               X        |       false
30089  *     ------------------------------------------------------------
30090  *
30091  *
30092  * 8. MBOOT/MBR
30093  *
30094  *     Although sd(4D) doesn't support fdisk on SPARC platform, it does support
30095  *     read/write mboot for removable media devices on sparc platform.
30096  *
30097  *     -----------------------------------------------------------
30098  *       platform   removable media  USB/1394  |  mboot supported
30099  *     -----------------------------------------------------------
30100  *        x86         X               X        |       true
30101  *     ------------------------------------------------------------
30102  *        sparc      false           false     |       false
30103  *        sparc      false           true      |       true
30104  *        sparc      true            false     |       true
30105  *        sparc      true            true      |       true
30106  *     ------------------------------------------------------------
30107  *
30108  *
30109  * 9.  error handling during opening device
30110  *
30111  *     If failed to open a disk device, an errno is returned. For some kinds
30112  *     of errors, different errno is returned depending on if this device is
30113  *     a removable media device. This brings USB/1394 hard disks in line with
30114  *     expected hard disk behavior. It is not expected that this breaks any
30115  *     application.
30116  *
30117  *     ------------------------------------------------------
30118  *       removable media    hotpluggable   |  errno
30119  *     ------------------------------------------------------
30120  *             false          false        |   EIO
30121  *             false          true         |   EIO
30122  *             true             x          |   ENXIO
30123  *     ------------------------------------------------------
30124  *
30125  *
30126  * 11. ioctls: DKIOCEJECT, CDROMEJECT
30127  *
30128  *     These IOCTLs are applicable only to removable media devices.
30129  *
30130  *     -----------------------------------------------------------
30131  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
30132  *     -----------------------------------------------------------
30133  *             false          false        |     No
30134  *             false          true         |     No
30135  *             true            x           |     Yes
30136  *     -----------------------------------------------------------
30137  *
30138  *
30139  * 12. Kstats for partitions
30140  *
30141  *     sd creates partition kstat for non-removable media devices. USB and
30142  *     Firewire hard disks now have partition kstats
30143  *
30144  *      ------------------------------------------------------
30145  *       removable media    hotpluggable   |   kstat
30146  *      ------------------------------------------------------
30147  *             false          false        |    Yes
30148  *             false          true         |    Yes
30149  *             true             x          |    No
30150  *       ------------------------------------------------------
30151  *
30152  *
30153  * 13. Removable media & hotpluggable properties
30154  *
30155  *     Sd driver creates a "removable-media" property for removable media
30156  *     devices. Parent nexus drivers create a "hotpluggable" property if
30157  *     it supports hotplugging.
30158  *
30159  *     ---------------------------------------------------------------------
30160  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
30161  *     ---------------------------------------------------------------------
30162  *       false            false       |    No                   No
30163  *       false            true        |    No                   Yes
30164  *       true             false       |    Yes                  No
30165  *       true             true        |    Yes                  Yes
30166  *     ---------------------------------------------------------------------
30167  *
30168  *
30169  * 14. Power Management
30170  *
30171  *     sd only power manages removable media devices or devices that support
30172  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
30173  *
30174  *     A parent nexus that supports hotplugging can also set "pm-capable"
30175  *     if the disk can be power managed.
30176  *
30177  *     ------------------------------------------------------------
30178  *       removable media hotpluggable pm-capable  |   power manage
30179  *     ------------------------------------------------------------
30180  *             false          false     false     |     No
30181  *             false          false     true      |     Yes
30182  *             false          true      false     |     No
30183  *             false          true      true      |     Yes
30184  *             true             x        x        |     Yes
30185  *     ------------------------------------------------------------
30186  *
30187  *      USB and firewire hard disks can now be power managed independently
30188  *      of the framebuffer
30189  *
30190  *
30191  * 15. Support for USB disks with capacity larger than 1TB
30192  *
30193  *     Currently, sd doesn't permit a fixed disk device with capacity
30194  *     larger than 1TB to be used in a 32-bit operating system environment.
30195  *     However, sd doesn't do that for removable media devices. Instead, it
30196  *     assumes that removable media devices cannot have a capacity larger
30197  *     than 1TB. Therefore, using those devices on 32-bit system is partially
30198  *     supported, which can cause some unexpected results.
30199  *
30200  *     ---------------------------------------------------------------------
30201  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
30202  *     ---------------------------------------------------------------------
30203  *             false          false  |   true         |     no
30204  *             false          true   |   true         |     no
30205  *             true           false  |   true         |     Yes
30206  *             true           true   |   true         |     Yes
30207  *     ---------------------------------------------------------------------
30208  *
30209  *
30210  * 16. Check write-protection at open time
30211  *
30212  *     When a removable media device is being opened for writing without NDELAY
30213  *     flag, sd will check if this device is writable. If attempting to open
30214  *     without NDELAY flag a write-protected device, this operation will abort.
30215  *
30216  *     ------------------------------------------------------------
30217  *       removable media    USB/1394   |   WP Check
30218  *     ------------------------------------------------------------
30219  *             false          false    |     No
30220  *             false          true     |     No
30221  *             true           false    |     Yes
30222  *             true           true     |     Yes
30223  *     ------------------------------------------------------------
30224  *
30225  *
30226  * 17. syslog when corrupted VTOC is encountered
30227  *
30228  *      Currently, if an invalid VTOC is encountered, sd only print syslog
30229  *      for fixed SCSI disks.
30230  *     ------------------------------------------------------------
30231  *       removable media    USB/1394   |   print syslog
30232  *     ------------------------------------------------------------
30233  *             false          false    |     Yes
30234  *             false          true     |     No
30235  *             true           false    |     No
30236  *             true           true     |     No
30237  *     ------------------------------------------------------------
30238  */
30239 static void
30240 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
30241 {
30242 	int	pm_cap;
30243 
30244 	ASSERT(un->un_sd);
30245 	ASSERT(un->un_sd->sd_inq);
30246 
30247 	/*
30248 	 * Enable SYNC CACHE support for all devices.
30249 	 */
30250 	un->un_f_sync_cache_supported = TRUE;
30251 
30252 	/*
30253 	 * Set the sync cache required flag to false.
30254 	 * This would ensure that there is no SYNC CACHE
30255 	 * sent when there are no writes
30256 	 */
30257 	un->un_f_sync_cache_required = FALSE;
30258 
30259 	if (un->un_sd->sd_inq->inq_rmb) {
30260 		/*
30261 		 * The media of this device is removable. And for this kind
30262 		 * of devices, it is possible to change medium after opening
30263 		 * devices. Thus we should support this operation.
30264 		 */
30265 		un->un_f_has_removable_media = TRUE;
30266 
30267 		/*
30268 		 * support non-512-byte blocksize of removable media devices
30269 		 */
30270 		un->un_f_non_devbsize_supported = TRUE;
30271 
30272 		/*
30273 		 * Assume that all removable media devices support DOOR_LOCK
30274 		 */
30275 		un->un_f_doorlock_supported = TRUE;
30276 
30277 		/*
30278 		 * For a removable media device, it is possible to be opened
30279 		 * with NDELAY flag when there is no media in drive, in this
30280 		 * case we don't care if device is writable. But if without
30281 		 * NDELAY flag, we need to check if media is write-protected.
30282 		 */
30283 		un->un_f_chk_wp_open = TRUE;
30284 
30285 		/*
30286 		 * need to start a SCSI watch thread to monitor media state,
30287 		 * when media is being inserted or ejected, notify syseventd.
30288 		 */
30289 		un->un_f_monitor_media_state = TRUE;
30290 
30291 		/*
30292 		 * Some devices don't support START_STOP_UNIT command.
30293 		 * Therefore, we'd better check if a device supports it
30294 		 * before sending it.
30295 		 */
30296 		un->un_f_check_start_stop = TRUE;
30297 
30298 		/*
30299 		 * support eject media ioctl:
30300 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
30301 		 */
30302 		un->un_f_eject_media_supported = TRUE;
30303 
30304 		/*
30305 		 * Because many removable-media devices don't support
30306 		 * LOG_SENSE, we couldn't use this command to check if
30307 		 * a removable media device support power-management.
30308 		 * We assume that they support power-management via
30309 		 * START_STOP_UNIT command and can be spun up and down
30310 		 * without limitations.
30311 		 */
30312 		un->un_f_pm_supported = TRUE;
30313 
30314 		/*
30315 		 * Need to create a zero length (Boolean) property
30316 		 * removable-media for the removable media devices.
30317 		 * Note that the return value of the property is not being
30318 		 * checked, since if unable to create the property
30319 		 * then do not want the attach to fail altogether. Consistent
30320 		 * with other property creation in attach.
30321 		 */
30322 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
30323 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
30324 
30325 	} else {
30326 		/*
30327 		 * create device ID for device
30328 		 */
30329 		un->un_f_devid_supported = TRUE;
30330 
30331 		/*
30332 		 * Spin up non-removable-media devices once it is attached
30333 		 */
30334 		un->un_f_attach_spinup = TRUE;
30335 
30336 		/*
30337 		 * According to SCSI specification, Sense data has two kinds of
30338 		 * format: fixed format, and descriptor format. At present, we
30339 		 * don't support descriptor format sense data for removable
30340 		 * media.
30341 		 */
30342 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
30343 			un->un_f_descr_format_supported = TRUE;
30344 		}
30345 
30346 		/*
30347 		 * kstats are created only for non-removable media devices.
30348 		 *
30349 		 * Set this in sd.conf to 0 in order to disable kstats.  The
30350 		 * default is 1, so they are enabled by default.
30351 		 */
30352 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
30353 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
30354 		    "enable-partition-kstats", 1));
30355 
30356 		/*
30357 		 * Check if HBA has set the "pm-capable" property.
30358 		 * If "pm-capable" exists and is non-zero then we can
30359 		 * power manage the device without checking the start/stop
30360 		 * cycle count log sense page.
30361 		 *
30362 		 * If "pm-capable" exists and is set to be false (0),
30363 		 * then we should not power manage the device.
30364 		 *
30365 		 * If "pm-capable" doesn't exist then pm_cap will
30366 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
30367 		 * sd will check the start/stop cycle count log sense page
30368 		 * and power manage the device if the cycle count limit has
30369 		 * not been exceeded.
30370 		 */
30371 		pm_cap = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
30372 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
30373 		if (SD_PM_CAPABLE_IS_UNDEFINED(pm_cap)) {
30374 			un->un_f_log_sense_supported = TRUE;
30375 			if (!un->un_f_power_condition_disabled &&
30376 			    SD_INQUIRY(un)->inq_ansi == 6) {
30377 				un->un_f_power_condition_supported = TRUE;
30378 			}
30379 		} else {
30380 			/*
30381 			 * pm-capable property exists.
30382 			 *
30383 			 * Convert "TRUE" values for pm_cap to
30384 			 * SD_PM_CAPABLE_IS_TRUE to make it easier to check
30385 			 * later. "TRUE" values are any values defined in
30386 			 * inquiry.h.
30387 			 */
30388 			if (SD_PM_CAPABLE_IS_FALSE(pm_cap)) {
30389 				un->un_f_log_sense_supported = FALSE;
30390 			} else {
30391 				/* SD_PM_CAPABLE_IS_TRUE case */
30392 				un->un_f_pm_supported = TRUE;
30393 				if (!un->un_f_power_condition_disabled &&
30394 				    SD_PM_CAPABLE_IS_SPC_4(pm_cap)) {
30395 					un->un_f_power_condition_supported =
30396 					    TRUE;
30397 				}
30398 				if (SD_PM_CAP_LOG_SUPPORTED(pm_cap)) {
30399 					un->un_f_log_sense_supported = TRUE;
30400 					un->un_f_pm_log_sense_smart =
30401 					    SD_PM_CAP_SMART_LOG(pm_cap);
30402 				}
30403 			}
30404 
30405 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
30406 			    "sd_unit_attach: un:0x%p pm-capable "
30407 			    "property set to %d.\n", un, un->un_f_pm_supported);
30408 		}
30409 	}
30410 
30411 	if (un->un_f_is_hotpluggable) {
30412 
30413 		/*
30414 		 * Have to watch hotpluggable devices as well, since
30415 		 * that's the only way for userland applications to
30416 		 * detect hot removal while device is busy/mounted.
30417 		 */
30418 		un->un_f_monitor_media_state = TRUE;
30419 
30420 		un->un_f_check_start_stop = TRUE;
30421 
30422 	}
30423 }
30424 
30425 /*
30426  * sd_tg_rdwr:
30427  * Provides rdwr access for cmlb via sd_tgops. The start_block is
30428  * in sys block size, req_length in bytes.
30429  *
30430  */
30431 static int
30432 sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
30433     diskaddr_t start_block, size_t reqlength, void *tg_cookie)
30434 {
30435 	struct sd_lun *un;
30436 	int path_flag = (int)(uintptr_t)tg_cookie;
30437 	char *dkl = NULL;
30438 	diskaddr_t real_addr = start_block;
30439 	diskaddr_t first_byte, end_block;
30440 
30441 	size_t	buffer_size = reqlength;
30442 	int rval = 0;
30443 	diskaddr_t	cap;
30444 	uint32_t	lbasize;
30445 	sd_ssc_t	*ssc;
30446 
30447 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
30448 	if (un == NULL)
30449 		return (ENXIO);
30450 
30451 	if (cmd != TG_READ && cmd != TG_WRITE)
30452 		return (EINVAL);
30453 
30454 	ssc = sd_ssc_init(un);
30455 	mutex_enter(SD_MUTEX(un));
30456 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
30457 		mutex_exit(SD_MUTEX(un));
30458 		rval = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
30459 		    &lbasize, path_flag);
30460 		if (rval != 0)
30461 			goto done1;
30462 		mutex_enter(SD_MUTEX(un));
30463 		sd_update_block_info(un, lbasize, cap);
30464 		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
30465 			mutex_exit(SD_MUTEX(un));
30466 			rval = EIO;
30467 			goto done;
30468 		}
30469 	}
30470 
30471 	if (NOT_DEVBSIZE(un)) {
30472 		/*
30473 		 * sys_blocksize != tgt_blocksize, need to re-adjust
30474 		 * blkno and save the index to beginning of dk_label
30475 		 */
30476 		first_byte  = SD_SYSBLOCKS2BYTES(start_block);
30477 		real_addr = first_byte / un->un_tgt_blocksize;
30478 
30479 		end_block = (first_byte + reqlength +
30480 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
30481 
30482 		/* round up buffer size to multiple of target block size */
30483 		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
30484 
30485 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
30486 		    "label_addr: 0x%x allocation size: 0x%x\n",
30487 		    real_addr, buffer_size);
30488 
30489 		if (((first_byte % un->un_tgt_blocksize) != 0) ||
30490 		    (reqlength % un->un_tgt_blocksize) != 0)
30491 			/* the request is not aligned */
30492 			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
30493 	}
30494 
30495 	/*
30496 	 * The MMC standard allows READ CAPACITY to be
30497 	 * inaccurate by a bounded amount (in the interest of
30498 	 * response latency).  As a result, failed READs are
30499 	 * commonplace (due to the reading of metadata and not
30500 	 * data). Depending on the per-Vendor/drive Sense data,
30501 	 * the failed READ can cause many (unnecessary) retries.
30502 	 */
30503 
30504 	if (ISCD(un) && (cmd == TG_READ) &&
30505 	    (un->un_f_blockcount_is_valid == TRUE) &&
30506 	    ((start_block == (un->un_blockcount - 1)) ||
30507 	    (start_block == (un->un_blockcount - 2)))) {
30508 			path_flag = SD_PATH_DIRECT_PRIORITY;
30509 	}
30510 
30511 	mutex_exit(SD_MUTEX(un));
30512 	if (cmd == TG_READ) {
30513 		rval = sd_send_scsi_READ(ssc, (dkl != NULL) ? dkl : bufaddr,
30514 		    buffer_size, real_addr, path_flag);
30515 		if (dkl != NULL)
30516 			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
30517 			    real_addr), bufaddr, reqlength);
30518 	} else {
30519 		if (dkl) {
30520 			rval = sd_send_scsi_READ(ssc, dkl, buffer_size,
30521 			    real_addr, path_flag);
30522 			if (rval) {
30523 				goto done1;
30524 			}
30525 			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
30526 			    real_addr), reqlength);
30527 		}
30528 		rval = sd_send_scsi_WRITE(ssc, (dkl != NULL) ? dkl : bufaddr,
30529 		    buffer_size, real_addr, path_flag);
30530 	}
30531 
30532 done1:
30533 	if (dkl != NULL)
30534 		kmem_free(dkl, buffer_size);
30535 
30536 	if (rval != 0) {
30537 		if (rval == EIO)
30538 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
30539 		else
30540 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
30541 	}
30542 done:
30543 	sd_ssc_fini(ssc);
30544 	return (rval);
30545 }
30546 
30547 
30548 static int
30549 sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
30550 {
30551 
30552 	struct sd_lun *un;
30553 	diskaddr_t	cap;
30554 	uint32_t	lbasize;
30555 	int		path_flag = (int)(uintptr_t)tg_cookie;
30556 	int		ret = 0;
30557 
30558 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
30559 	if (un == NULL)
30560 		return (ENXIO);
30561 
30562 	switch (cmd) {
30563 	case TG_GETPHYGEOM:
30564 	case TG_GETVIRTGEOM:
30565 	case TG_GETCAPACITY:
30566 	case TG_GETBLOCKSIZE:
30567 		mutex_enter(SD_MUTEX(un));
30568 
30569 		if ((un->un_f_blockcount_is_valid == TRUE) &&
30570 		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
30571 			cap = un->un_blockcount;
30572 			lbasize = un->un_tgt_blocksize;
30573 			mutex_exit(SD_MUTEX(un));
30574 		} else {
30575 			sd_ssc_t	*ssc;
30576 			mutex_exit(SD_MUTEX(un));
30577 			ssc = sd_ssc_init(un);
30578 			ret = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
30579 			    &lbasize, path_flag);
30580 			if (ret != 0) {
30581 				if (ret == EIO)
30582 					sd_ssc_assessment(ssc,
30583 					    SD_FMT_STATUS_CHECK);
30584 				else
30585 					sd_ssc_assessment(ssc,
30586 					    SD_FMT_IGNORE);
30587 				sd_ssc_fini(ssc);
30588 				return (ret);
30589 			}
30590 			sd_ssc_fini(ssc);
30591 			mutex_enter(SD_MUTEX(un));
30592 			sd_update_block_info(un, lbasize, cap);
30593 			if ((un->un_f_blockcount_is_valid == FALSE) ||
30594 			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
30595 				mutex_exit(SD_MUTEX(un));
30596 				return (EIO);
30597 			}
30598 			mutex_exit(SD_MUTEX(un));
30599 		}
30600 
30601 		if (cmd == TG_GETCAPACITY) {
30602 			*(diskaddr_t *)arg = cap;
30603 			return (0);
30604 		}
30605 
30606 		if (cmd == TG_GETBLOCKSIZE) {
30607 			*(uint32_t *)arg = lbasize;
30608 			return (0);
30609 		}
30610 
30611 		if (cmd == TG_GETPHYGEOM)
30612 			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
30613 			    cap, lbasize, path_flag);
30614 		else
30615 			/* TG_GETVIRTGEOM */
30616 			ret = sd_get_virtual_geometry(un,
30617 			    (cmlb_geom_t *)arg, cap, lbasize);
30618 
30619 		return (ret);
30620 
30621 	case TG_GETATTR:
30622 		mutex_enter(SD_MUTEX(un));
30623 		((tg_attribute_t *)arg)->media_is_writable =
30624 		    un->un_f_mmc_writable_media;
30625 		((tg_attribute_t *)arg)->media_is_solid_state =
30626 		    un->un_f_is_solid_state;
30627 		((tg_attribute_t *)arg)->media_is_rotational =
30628 		    un->un_f_is_rotational;
30629 		mutex_exit(SD_MUTEX(un));
30630 		return (0);
30631 	default:
30632 		return (ENOTTY);
30633 
30634 	}
30635 }
30636 
30637 /*
30638  *    Function: sd_ssc_ereport_post
30639  *
30640  * Description: Will be called when SD driver need to post an ereport.
30641  *
30642  *    Context: Kernel thread or interrupt context.
30643  */
30644 
30645 #define	DEVID_IF_KNOWN(d) "devid", DATA_TYPE_STRING, (d) ? (d) : "unknown"
30646 
30647 static void
30648 sd_ssc_ereport_post(sd_ssc_t *ssc, enum sd_driver_assessment drv_assess)
30649 {
30650 	int uscsi_path_instance = 0;
30651 	uchar_t	uscsi_pkt_reason;
30652 	uint32_t uscsi_pkt_state;
30653 	uint32_t uscsi_pkt_statistics;
30654 	uint64_t uscsi_ena;
30655 	uchar_t op_code;
30656 	uint8_t *sensep;
30657 	union scsi_cdb *cdbp;
30658 	uint_t cdblen = 0;
30659 	uint_t senlen = 0;
30660 	struct sd_lun *un;
30661 	dev_info_t *dip;
30662 	char *devid;
30663 	int ssc_invalid_flags = SSC_FLAGS_INVALID_PKT_REASON |
30664 	    SSC_FLAGS_INVALID_STATUS |
30665 	    SSC_FLAGS_INVALID_SENSE |
30666 	    SSC_FLAGS_INVALID_DATA;
30667 	char assessment[16];
30668 
30669 	ASSERT(ssc != NULL);
30670 	ASSERT(ssc->ssc_uscsi_cmd != NULL);
30671 	ASSERT(ssc->ssc_uscsi_info != NULL);
30672 
30673 	un = ssc->ssc_un;
30674 	ASSERT(un != NULL);
30675 
30676 	dip = un->un_sd->sd_dev;
30677 
30678 	/*
30679 	 * Get the devid:
30680 	 *	devid will only be passed to non-transport error reports.
30681 	 */
30682 	devid = DEVI(dip)->devi_devid_str;
30683 
30684 	/*
30685 	 * If we are syncing or dumping, the command will not be executed
30686 	 * so we bypass this situation.
30687 	 */
30688 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
30689 	    (un->un_state == SD_STATE_DUMPING))
30690 		return;
30691 
30692 	uscsi_pkt_reason = ssc->ssc_uscsi_info->ui_pkt_reason;
30693 	uscsi_path_instance = ssc->ssc_uscsi_cmd->uscsi_path_instance;
30694 	uscsi_pkt_state = ssc->ssc_uscsi_info->ui_pkt_state;
30695 	uscsi_pkt_statistics = ssc->ssc_uscsi_info->ui_pkt_statistics;
30696 	uscsi_ena = ssc->ssc_uscsi_info->ui_ena;
30697 
30698 	sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
30699 	cdbp = (union scsi_cdb *)ssc->ssc_uscsi_cmd->uscsi_cdb;
30700 
30701 	/* In rare cases, EG:DOORLOCK, the cdb could be NULL */
30702 	if (cdbp == NULL) {
30703 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30704 		    "sd_ssc_ereport_post meet empty cdb\n");
30705 		return;
30706 	}
30707 
30708 	op_code = cdbp->scc_cmd;
30709 
30710 	cdblen = (int)ssc->ssc_uscsi_cmd->uscsi_cdblen;
30711 	senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
30712 	    ssc->ssc_uscsi_cmd->uscsi_rqresid);
30713 
30714 	if (senlen > 0)
30715 		ASSERT(sensep != NULL);
30716 
30717 	/*
30718 	 * Initialize drv_assess to corresponding values.
30719 	 * SD_FM_DRV_FATAL will be mapped to "fail" or "fatal" depending
30720 	 * on the sense-key returned back.
30721 	 */
30722 	switch (drv_assess) {
30723 		case SD_FM_DRV_RECOVERY:
30724 			(void) sprintf(assessment, "%s", "recovered");
30725 			break;
30726 		case SD_FM_DRV_RETRY:
30727 			(void) sprintf(assessment, "%s", "retry");
30728 			break;
30729 		case SD_FM_DRV_NOTICE:
30730 			(void) sprintf(assessment, "%s", "info");
30731 			break;
30732 		case SD_FM_DRV_FATAL:
30733 		default:
30734 			(void) sprintf(assessment, "%s", "unknown");
30735 	}
30736 	/*
30737 	 * If drv_assess == SD_FM_DRV_RECOVERY, this should be a recovered
30738 	 * command, we will post ereport.io.scsi.cmd.disk.recovered.
30739 	 * driver-assessment will always be "recovered" here.
30740 	 */
30741 	if (drv_assess == SD_FM_DRV_RECOVERY) {
30742 		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance, NULL,
30743 		    "cmd.disk.recovered", uscsi_ena, devid, NULL,
30744 		    DDI_NOSLEEP, NULL,
30745 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30746 		    DEVID_IF_KNOWN(devid),
30747 		    "driver-assessment", DATA_TYPE_STRING, assessment,
30748 		    "op-code", DATA_TYPE_UINT8, op_code,
30749 		    "cdb", DATA_TYPE_UINT8_ARRAY,
30750 		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30751 		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30752 		    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
30753 		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
30754 		    NULL);
30755 		return;
30756 	}
30757 
30758 	/*
30759 	 * If there is un-expected/un-decodable data, we should post
30760 	 * ereport.io.scsi.cmd.disk.dev.uderr.
30761 	 * driver-assessment will be set based on parameter drv_assess.
30762 	 * SSC_FLAGS_INVALID_SENSE - invalid sense data sent back.
30763 	 * SSC_FLAGS_INVALID_PKT_REASON - invalid pkt-reason encountered.
30764 	 * SSC_FLAGS_INVALID_STATUS - invalid stat-code encountered.
30765 	 * SSC_FLAGS_INVALID_DATA - invalid data sent back.
30766 	 */
30767 	if (ssc->ssc_flags & ssc_invalid_flags) {
30768 		if (ssc->ssc_flags & SSC_FLAGS_INVALID_SENSE) {
30769 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30770 			    NULL, "cmd.disk.dev.uderr", uscsi_ena, devid,
30771 			    NULL, DDI_NOSLEEP, NULL,
30772 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30773 			    DEVID_IF_KNOWN(devid),
30774 			    "driver-assessment", DATA_TYPE_STRING,
30775 			    drv_assess == SD_FM_DRV_FATAL ?
30776 			    "fail" : assessment,
30777 			    "op-code", DATA_TYPE_UINT8, op_code,
30778 			    "cdb", DATA_TYPE_UINT8_ARRAY,
30779 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30780 			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30781 			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
30782 			    "pkt-stats", DATA_TYPE_UINT32,
30783 			    uscsi_pkt_statistics,
30784 			    "stat-code", DATA_TYPE_UINT8,
30785 			    ssc->ssc_uscsi_cmd->uscsi_status,
30786 			    "un-decode-info", DATA_TYPE_STRING,
30787 			    ssc->ssc_info,
30788 			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
30789 			    senlen, sensep,
30790 			    NULL);
30791 		} else {
30792 			/*
30793 			 * For other type of invalid data, the
30794 			 * un-decode-value field would be empty because the
30795 			 * un-decodable content could be seen from upper
30796 			 * level payload or inside un-decode-info.
30797 			 */
30798 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30799 			    NULL,
30800 			    "cmd.disk.dev.uderr", uscsi_ena, devid,
30801 			    NULL, DDI_NOSLEEP, NULL,
30802 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30803 			    DEVID_IF_KNOWN(devid),
30804 			    "driver-assessment", DATA_TYPE_STRING,
30805 			    drv_assess == SD_FM_DRV_FATAL ?
30806 			    "fail" : assessment,
30807 			    "op-code", DATA_TYPE_UINT8, op_code,
30808 			    "cdb", DATA_TYPE_UINT8_ARRAY,
30809 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30810 			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30811 			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
30812 			    "pkt-stats", DATA_TYPE_UINT32,
30813 			    uscsi_pkt_statistics,
30814 			    "stat-code", DATA_TYPE_UINT8,
30815 			    ssc->ssc_uscsi_cmd->uscsi_status,
30816 			    "un-decode-info", DATA_TYPE_STRING,
30817 			    ssc->ssc_info,
30818 			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
30819 			    0, NULL,
30820 			    NULL);
30821 		}
30822 		ssc->ssc_flags &= ~ssc_invalid_flags;
30823 		return;
30824 	}
30825 
30826 	if (uscsi_pkt_reason != CMD_CMPLT ||
30827 	    (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)) {
30828 		/*
30829 		 * pkt-reason != CMD_CMPLT or SSC_FLAGS_TRAN_ABORT was
30830 		 * set inside sd_start_cmds due to errors(bad packet or
30831 		 * fatal transport error), we should take it as a
30832 		 * transport error, so we post ereport.io.scsi.cmd.disk.tran.
30833 		 * driver-assessment will be set based on drv_assess.
30834 		 * We will set devid to NULL because it is a transport
30835 		 * error.
30836 		 */
30837 		if (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)
30838 			ssc->ssc_flags &= ~SSC_FLAGS_TRAN_ABORT;
30839 
30840 		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance, NULL,
30841 		    "cmd.disk.tran", uscsi_ena, NULL, NULL, DDI_NOSLEEP, NULL,
30842 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30843 		    DEVID_IF_KNOWN(devid),
30844 		    "driver-assessment", DATA_TYPE_STRING,
30845 		    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
30846 		    "op-code", DATA_TYPE_UINT8, op_code,
30847 		    "cdb", DATA_TYPE_UINT8_ARRAY,
30848 		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30849 		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30850 		    "pkt-state", DATA_TYPE_UINT8, uscsi_pkt_state,
30851 		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
30852 		    NULL);
30853 	} else {
30854 		/*
30855 		 * If we got here, we have a completed command, and we need
30856 		 * to further investigate the sense data to see what kind
30857 		 * of ereport we should post.
30858 		 * No ereport is needed if sense-key is KEY_RECOVERABLE_ERROR
30859 		 * and asc/ascq is "ATA PASS-THROUGH INFORMATION AVAILABLE".
30860 		 * Post ereport.io.scsi.cmd.disk.dev.rqs.merr if sense-key is
30861 		 * KEY_MEDIUM_ERROR.
30862 		 * Post ereport.io.scsi.cmd.disk.dev.rqs.derr otherwise.
30863 		 * driver-assessment will be set based on the parameter
30864 		 * drv_assess.
30865 		 */
30866 		if (senlen > 0) {
30867 			/*
30868 			 * Here we have sense data available.
30869 			 */
30870 			uint8_t sense_key = scsi_sense_key(sensep);
30871 			uint8_t sense_asc = scsi_sense_asc(sensep);
30872 			uint8_t sense_ascq = scsi_sense_ascq(sensep);
30873 
30874 			if (sense_key == KEY_RECOVERABLE_ERROR &&
30875 			    sense_asc == 0x00 && sense_ascq == 0x1d)
30876 				return;
30877 
30878 			if (sense_key == KEY_MEDIUM_ERROR) {
30879 				/*
30880 				 * driver-assessment should be "fatal" if
30881 				 * drv_assess is SD_FM_DRV_FATAL.
30882 				 */
30883 				scsi_fm_ereport_post(un->un_sd,
30884 				    uscsi_path_instance, NULL,
30885 				    "cmd.disk.dev.rqs.merr",
30886 				    uscsi_ena, devid, NULL, DDI_NOSLEEP, NULL,
30887 				    FM_VERSION, DATA_TYPE_UINT8,
30888 				    FM_EREPORT_VERS0,
30889 				    DEVID_IF_KNOWN(devid),
30890 				    "driver-assessment",
30891 				    DATA_TYPE_STRING,
30892 				    drv_assess == SD_FM_DRV_FATAL ?
30893 				    "fatal" : assessment,
30894 				    "op-code",
30895 				    DATA_TYPE_UINT8, op_code,
30896 				    "cdb",
30897 				    DATA_TYPE_UINT8_ARRAY, cdblen,
30898 				    ssc->ssc_uscsi_cmd->uscsi_cdb,
30899 				    "pkt-reason",
30900 				    DATA_TYPE_UINT8, uscsi_pkt_reason,
30901 				    "pkt-state",
30902 				    DATA_TYPE_UINT8, uscsi_pkt_state,
30903 				    "pkt-stats",
30904 				    DATA_TYPE_UINT32,
30905 				    uscsi_pkt_statistics,
30906 				    "stat-code",
30907 				    DATA_TYPE_UINT8,
30908 				    ssc->ssc_uscsi_cmd->uscsi_status,
30909 				    "key",
30910 				    DATA_TYPE_UINT8,
30911 				    scsi_sense_key(sensep),
30912 				    "asc",
30913 				    DATA_TYPE_UINT8,
30914 				    scsi_sense_asc(sensep),
30915 				    "ascq",
30916 				    DATA_TYPE_UINT8,
30917 				    scsi_sense_ascq(sensep),
30918 				    "sense-data",
30919 				    DATA_TYPE_UINT8_ARRAY,
30920 				    senlen, sensep,
30921 				    "lba",
30922 				    DATA_TYPE_UINT64,
30923 				    ssc->ssc_uscsi_info->ui_lba,
30924 				    NULL);
30925 			} else {
30926 				/*
30927 				 * if sense-key == 0x4(hardware
30928 				 * error), driver-assessment should
30929 				 * be "fatal" if drv_assess is
30930 				 * SD_FM_DRV_FATAL.
30931 				 */
30932 				scsi_fm_ereport_post(un->un_sd,
30933 				    uscsi_path_instance, NULL,
30934 				    "cmd.disk.dev.rqs.derr",
30935 				    uscsi_ena, devid,
30936 				    NULL, DDI_NOSLEEP, NULL,
30937 				    FM_VERSION,
30938 				    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30939 				    DEVID_IF_KNOWN(devid),
30940 				    "driver-assessment",
30941 				    DATA_TYPE_STRING,
30942 				    drv_assess == SD_FM_DRV_FATAL ?
30943 				    (sense_key == 0x4 ?
30944 				    "fatal" : "fail") : assessment,
30945 				    "op-code",
30946 				    DATA_TYPE_UINT8, op_code,
30947 				    "cdb",
30948 				    DATA_TYPE_UINT8_ARRAY, cdblen,
30949 				    ssc->ssc_uscsi_cmd->uscsi_cdb,
30950 				    "pkt-reason",
30951 				    DATA_TYPE_UINT8, uscsi_pkt_reason,
30952 				    "pkt-state",
30953 				    DATA_TYPE_UINT8, uscsi_pkt_state,
30954 				    "pkt-stats",
30955 				    DATA_TYPE_UINT32,
30956 				    uscsi_pkt_statistics,
30957 				    "stat-code",
30958 				    DATA_TYPE_UINT8,
30959 				    ssc->ssc_uscsi_cmd->uscsi_status,
30960 				    "key",
30961 				    DATA_TYPE_UINT8,
30962 				    scsi_sense_key(sensep),
30963 				    "asc",
30964 				    DATA_TYPE_UINT8,
30965 				    scsi_sense_asc(sensep),
30966 				    "ascq",
30967 				    DATA_TYPE_UINT8,
30968 				    scsi_sense_ascq(sensep),
30969 				    "sense-data",
30970 				    DATA_TYPE_UINT8_ARRAY,
30971 				    senlen, sensep,
30972 				    NULL);
30973 			}
30974 		} else {
30975 			/*
30976 			 * For stat_code == STATUS_GOOD, this is not a
30977 			 * hardware error.
30978 			 */
30979 			if (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD)
30980 				return;
30981 
30982 			/*
30983 			 * Post ereport.io.scsi.cmd.disk.dev.serr if we got the
30984 			 * stat-code but with sense data unavailable.
30985 			 * driver-assessment will be set based on parameter
30986 			 * drv_assess.
30987 			 */
30988 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30989 			    NULL,
30990 			    "cmd.disk.dev.serr", uscsi_ena,
30991 			    devid, NULL, DDI_NOSLEEP, NULL,
30992 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30993 			    DEVID_IF_KNOWN(devid),
30994 			    "driver-assessment", DATA_TYPE_STRING,
30995 			    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
30996 			    "op-code", DATA_TYPE_UINT8, op_code,
30997 			    "cdb",
30998 			    DATA_TYPE_UINT8_ARRAY,
30999 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31000 			    "pkt-reason",
31001 			    DATA_TYPE_UINT8, uscsi_pkt_reason,
31002 			    "pkt-state",
31003 			    DATA_TYPE_UINT8, uscsi_pkt_state,
31004 			    "pkt-stats",
31005 			    DATA_TYPE_UINT32, uscsi_pkt_statistics,
31006 			    "stat-code",
31007 			    DATA_TYPE_UINT8,
31008 			    ssc->ssc_uscsi_cmd->uscsi_status,
31009 			    NULL);
31010 		}
31011 	}
31012 }
31013 
31014 /*
31015  *     Function: sd_ssc_extract_info
31016  *
31017  * Description: Extract information available to help generate ereport.
31018  *
31019  *     Context: Kernel thread or interrupt context.
31020  */
31021 static void
31022 sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un, struct scsi_pkt *pktp,
31023     struct buf *bp, struct sd_xbuf *xp)
31024 {
31025 	size_t senlen = 0;
31026 	union scsi_cdb *cdbp;
31027 	int path_instance;
31028 	/*
31029 	 * Need scsi_cdb_size array to determine the cdb length.
31030 	 */
31031 	extern uchar_t	scsi_cdb_size[];
31032 
31033 	ASSERT(un != NULL);
31034 	ASSERT(pktp != NULL);
31035 	ASSERT(bp != NULL);
31036 	ASSERT(xp != NULL);
31037 	ASSERT(ssc != NULL);
31038 	ASSERT(mutex_owned(SD_MUTEX(un)));
31039 
31040 	/*
31041 	 * Transfer the cdb buffer pointer here.
31042 	 */
31043 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
31044 
31045 	ssc->ssc_uscsi_cmd->uscsi_cdblen = scsi_cdb_size[GETGROUP(cdbp)];
31046 	ssc->ssc_uscsi_cmd->uscsi_cdb = (caddr_t)cdbp;
31047 
31048 	/*
31049 	 * Transfer the sense data buffer pointer if sense data is available,
31050 	 * calculate the sense data length first.
31051 	 */
31052 	if ((xp->xb_sense_state & STATE_XARQ_DONE) ||
31053 	    (xp->xb_sense_state & STATE_ARQ_DONE)) {
31054 		/*
31055 		 * For arq case, we will enter here.
31056 		 */
31057 		if (xp->xb_sense_state & STATE_XARQ_DONE) {
31058 			senlen = MAX_SENSE_LENGTH - xp->xb_sense_resid;
31059 		} else {
31060 			senlen = SENSE_LENGTH;
31061 		}
31062 	} else {
31063 		/*
31064 		 * For non-arq case, we will enter this branch.
31065 		 */
31066 		if (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK &&
31067 		    (xp->xb_sense_state & STATE_XFERRED_DATA)) {
31068 			senlen = SENSE_LENGTH - xp->xb_sense_resid;
31069 		}
31070 
31071 	}
31072 
31073 	ssc->ssc_uscsi_cmd->uscsi_rqlen = (senlen & 0xff);
31074 	ssc->ssc_uscsi_cmd->uscsi_rqresid = 0;
31075 	ssc->ssc_uscsi_cmd->uscsi_rqbuf = (caddr_t)xp->xb_sense_data;
31076 
31077 	ssc->ssc_uscsi_cmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
31078 
31079 	/*
31080 	 * Only transfer path_instance when scsi_pkt was properly allocated.
31081 	 */
31082 	path_instance = pktp->pkt_path_instance;
31083 	if (scsi_pkt_allocated_correctly(pktp) && path_instance)
31084 		ssc->ssc_uscsi_cmd->uscsi_path_instance = path_instance;
31085 	else
31086 		ssc->ssc_uscsi_cmd->uscsi_path_instance = 0;
31087 
31088 	/*
31089 	 * Copy in the other fields we may need when posting ereport.
31090 	 */
31091 	ssc->ssc_uscsi_info->ui_pkt_reason = pktp->pkt_reason;
31092 	ssc->ssc_uscsi_info->ui_pkt_state = pktp->pkt_state;
31093 	ssc->ssc_uscsi_info->ui_pkt_statistics = pktp->pkt_statistics;
31094 	ssc->ssc_uscsi_info->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
31095 
31096 	/*
31097 	 * For partially read/write command, we will not create ena
31098 	 * in case of a successful command be reconized as recovered.
31099 	 */
31100 	if ((pktp->pkt_reason == CMD_CMPLT) &&
31101 	    (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD) &&
31102 	    (senlen == 0)) {
31103 		return;
31104 	}
31105 
31106 	/*
31107 	 * To associate ereports of a single command execution flow, we
31108 	 * need a shared ena for a specific command.
31109 	 */
31110 	if (xp->xb_ena == 0)
31111 		xp->xb_ena = fm_ena_generate(0, FM_ENA_FMT1);
31112 	ssc->ssc_uscsi_info->ui_ena = xp->xb_ena;
31113 }
31114 
31115 
31116 /*
31117  *     Function: sd_check_bdc_vpd
31118  *
31119  * Description: Query the optional INQUIRY VPD page 0xb1. If the device
31120  *              supports VPD page 0xb1, sd examines the MEDIUM ROTATION
31121  *              RATE.
31122  *
31123  *		Set the following based on RPM value:
31124  *		= 0	device is not solid state, non-rotational
31125  *		= 1	device is solid state, non-rotational
31126  *		> 1	device is not solid state, rotational
31127  *
31128  *     Context: Kernel thread or interrupt context.
31129  */
31130 
31131 static void
31132 sd_check_bdc_vpd(sd_ssc_t *ssc)
31133 {
31134 	int		rval		= 0;
31135 	uchar_t		*inqb1		= NULL;
31136 	size_t		inqb1_len	= MAX_INQUIRY_SIZE;
31137 	size_t		inqb1_resid	= 0;
31138 	struct sd_lun	*un;
31139 
31140 	ASSERT(ssc != NULL);
31141 	un = ssc->ssc_un;
31142 	ASSERT(un != NULL);
31143 	ASSERT(!mutex_owned(SD_MUTEX(un)));
31144 
31145 	mutex_enter(SD_MUTEX(un));
31146 	un->un_f_is_rotational = TRUE;
31147 	un->un_f_is_solid_state = FALSE;
31148 
31149 	if (ISCD(un)) {
31150 		mutex_exit(SD_MUTEX(un));
31151 		return;
31152 	}
31153 
31154 	if (sd_check_vpd_page_support(ssc) == 0 &&
31155 	    un->un_vpd_page_mask & SD_VPD_DEV_CHARACTER_PG) {
31156 		mutex_exit(SD_MUTEX(un));
31157 		/* collect page b1 data */
31158 		inqb1 = kmem_zalloc(inqb1_len, KM_SLEEP);
31159 
31160 		rval = sd_send_scsi_INQUIRY(ssc, inqb1, inqb1_len,
31161 		    0x01, 0xB1, &inqb1_resid);
31162 
31163 		if (rval == 0 && (inqb1_len - inqb1_resid > 5)) {
31164 			SD_TRACE(SD_LOG_COMMON, un,
31165 			    "sd_check_bdc_vpd: \
31166 			    successfully get VPD page: %x \
31167 			    PAGE LENGTH: %x BYTE 4: %x \
31168 			    BYTE 5: %x", inqb1[1], inqb1[3], inqb1[4],
31169 			    inqb1[5]);
31170 
31171 			mutex_enter(SD_MUTEX(un));
31172 			/*
31173 			 * Check the MEDIUM ROTATION RATE.
31174 			 */
31175 			if (inqb1[4] == 0) {
31176 				if (inqb1[5] == 0) {
31177 					un->un_f_is_rotational = FALSE;
31178 				} else if (inqb1[5] == 1) {
31179 					un->un_f_is_rotational = FALSE;
31180 					un->un_f_is_solid_state = TRUE;
31181 					/*
31182 					 * Solid state drives don't need
31183 					 * disksort.
31184 					 */
31185 					un->un_f_disksort_disabled = TRUE;
31186 				}
31187 			}
31188 			mutex_exit(SD_MUTEX(un));
31189 		} else if (rval != 0) {
31190 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
31191 		}
31192 
31193 		kmem_free(inqb1, inqb1_len);
31194 	} else {
31195 		mutex_exit(SD_MUTEX(un));
31196 	}
31197 }
31198 
31199 /*
31200  *	Function: sd_check_emulation_mode
31201  *
31202  *   Description: Check whether the SSD is at emulation mode
31203  *		  by issuing READ_CAPACITY_16 to see whether
31204  *		  we can get physical block size of the drive.
31205  *
31206  *	 Context: Kernel thread or interrupt context.
31207  */
31208 
31209 static void
31210 sd_check_emulation_mode(sd_ssc_t *ssc)
31211 {
31212 	int		rval = 0;
31213 	uint64_t	capacity;
31214 	uint_t		lbasize;
31215 	uint_t		pbsize;
31216 	int		i;
31217 	int		devid_len;
31218 	struct sd_lun	*un;
31219 
31220 	ASSERT(ssc != NULL);
31221 	un = ssc->ssc_un;
31222 	ASSERT(un != NULL);
31223 	ASSERT(!mutex_owned(SD_MUTEX(un)));
31224 
31225 	mutex_enter(SD_MUTEX(un));
31226 	if (ISCD(un)) {
31227 		mutex_exit(SD_MUTEX(un));
31228 		return;
31229 	}
31230 
31231 	if (un->un_f_descr_format_supported) {
31232 		mutex_exit(SD_MUTEX(un));
31233 		rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize,
31234 		    &pbsize, SD_PATH_DIRECT);
31235 		mutex_enter(SD_MUTEX(un));
31236 
31237 		if (rval != 0) {
31238 			un->un_phy_blocksize = DEV_BSIZE;
31239 		} else {
31240 			if (!ISP2(pbsize % DEV_BSIZE) || pbsize == 0) {
31241 				un->un_phy_blocksize = DEV_BSIZE;
31242 			} else if (pbsize > un->un_phy_blocksize) {
31243 				/*
31244 				 * Don't reset the physical blocksize
31245 				 * unless we've detected a larger value.
31246 				 */
31247 				un->un_phy_blocksize = pbsize;
31248 			}
31249 		}
31250 	}
31251 
31252 	for (i = 0; i < sd_flash_dev_table_size; i++) {
31253 		devid_len = (int)strlen(sd_flash_dev_table[i]);
31254 		if (sd_sdconf_id_match(un, sd_flash_dev_table[i], devid_len)
31255 		    == SD_SUCCESS) {
31256 			un->un_phy_blocksize = SSD_SECSIZE;
31257 			if (un->un_f_is_solid_state &&
31258 			    un->un_phy_blocksize != un->un_tgt_blocksize)
31259 				un->un_f_enable_rmw = TRUE;
31260 		}
31261 	}
31262 
31263 	mutex_exit(SD_MUTEX(un));
31264 }
31265