1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2013 by Delphix. All rights reserved.
25  */
26 
27 #include <sys/conf.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/modctl.h>
32 #include <sys/scsi/scsi.h>
33 #include <sys/scsi/impl/scsi_reset_notify.h>
34 #include <sys/scsi/generic/mode.h>
35 #include <sys/disp.h>
36 #include <sys/byteorder.h>
37 #include <sys/atomic.h>
38 #include <sys/sdt.h>
39 #include <sys/dkio.h>
40 #include <sys/dkioc_free_util.h>
41 
42 #include <sys/stmf.h>
43 #include <sys/lpif.h>
44 #include <sys/portif.h>
45 #include <sys/stmf_ioctl.h>
46 #include <sys/stmf_sbd_ioctl.h>
47 
48 #include "stmf_sbd.h"
49 #include "sbd_impl.h"
50 
51 #define	SCSI2_CONFLICT_FREE_CMDS(cdb)	( \
52 	/* ----------------------- */                                      \
53 	/* Refer Both		   */                                      \
54 	/* SPC-2 (rev 20) Table 10 */                                      \
55 	/* SPC-3 (rev 23) Table 31 */                                      \
56 	/* ----------------------- */                                      \
57 	((cdb[0]) == SCMD_INQUIRY)					|| \
58 	((cdb[0]) == SCMD_LOG_SENSE_G1)					|| \
59 	((cdb[0]) == SCMD_RELEASE)					|| \
60 	((cdb[0]) == SCMD_RELEASE_G1)					|| \
61 	((cdb[0]) == SCMD_REPORT_LUNS)					|| \
62 	((cdb[0]) == SCMD_REQUEST_SENSE)				|| \
63 	/* PREVENT ALLOW MEDIUM REMOVAL with prevent == 0 */               \
64 	((((cdb[0]) == SCMD_DOORLOCK) && (((cdb[4]) & 0x3) == 0)))	|| \
65 	/* SERVICE ACTION IN with READ MEDIA SERIAL NUMBER (0x01) */       \
66 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G5) && (                          \
67 	    ((cdb[1]) & 0x1F) == 0x01))					|| \
68 	/* MAINTENANCE IN with service actions REPORT ALIASES (0x0Bh) */   \
69 	/* REPORT DEVICE IDENTIFIER (0x05)  REPORT PRIORITY (0x0Eh) */     \
70 	/* REPORT TARGET PORT GROUPS (0x0A) REPORT TIMESTAMP (0x0F) */     \
71 	(((cdb[0]) == SCMD_MAINTENANCE_IN) && (                            \
72 	    (((cdb[1]) & 0x1F) == 0x0B) ||                                 \
73 	    (((cdb[1]) & 0x1F) == 0x05) ||                                 \
74 	    (((cdb[1]) & 0x1F) == 0x0E) ||                                 \
75 	    (((cdb[1]) & 0x1F) == 0x0A) ||                                 \
76 	    (((cdb[1]) & 0x1F) == 0x0F)))				|| \
77 	/* ----------------------- */                                      \
78 	/* SBC-3 (rev 17) Table 3  */                                      \
79 	/* ----------------------- */                                      \
80 	/* READ CAPACITY(10) */                                            \
81 	((cdb[0]) == SCMD_READ_CAPACITY)				|| \
82 	/* READ CAPACITY(16) */                                            \
83 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G4) && (                          \
84 	    ((cdb[1]) & 0x1F) == 0x10))					|| \
85 	/* START STOP UNIT with START bit 0 and POWER CONDITION 0  */      \
86 	(((cdb[0]) == SCMD_START_STOP) && (                                \
87 	    (((cdb[4]) & 0xF0) == 0) && (((cdb[4]) & 0x01) == 0))))
88 /* End of SCSI2_CONFLICT_FREE_CMDS */
89 
90 stmf_status_t sbd_lu_reset_state(stmf_lu_t *lu);
91 static void sbd_handle_sync_cache(struct scsi_task *task,
92     struct stmf_data_buf *initial_dbuf);
93 void sbd_handle_read_xfer_completion(struct scsi_task *task,
94     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf);
95 void sbd_handle_short_write_xfer_completion(scsi_task_t *task,
96     stmf_data_buf_t *dbuf);
97 void sbd_handle_short_write_transfers(scsi_task_t *task,
98     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size);
99 void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf,
100     uint32_t buflen);
101 void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf);
102 void sbd_handle_identifying_info(scsi_task_t *task, stmf_data_buf_t *dbuf);
103 
104 static void sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf,
105     uint32_t buflen);
106 static void sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf);
107 
108 extern void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
109 extern int sbd_pgr_reservation_conflict(scsi_task_t *);
110 extern void sbd_pgr_reset(sbd_lu_t *);
111 extern void sbd_pgr_remove_it_handle(sbd_lu_t *, sbd_it_data_t *);
112 extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
113 extern void sbd_handle_pgr_out_cmd(scsi_task_t *, stmf_data_buf_t *);
114 extern void sbd_handle_pgr_out_data(scsi_task_t *, stmf_data_buf_t *);
115 void sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
116     int first_xfer);
117 static void sbd_handle_write_same(scsi_task_t *task,
118     struct stmf_data_buf *initial_dbuf);
119 static void sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
120     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
121 static void sbd_handle_write_same_xfer_completion(struct scsi_task *task,
122     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
123 /*
124  * IMPORTANT NOTE:
125  * =================
126  * The whole world here is based on the assumption that everything within
127  * a scsi task executes in a single threaded manner, even the aborts.
128  * Dont ever change that. There wont be any performance gain but there
129  * will be tons of race conditions.
130  */
131 
132 void
133 sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
134     struct stmf_data_buf *dbuf)
135 {
136 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
137 	uint64_t laddr;
138 	uint32_t len, buflen, iolen;
139 	int ndx;
140 	int bufs_to_take;
141 
142 	/* Lets try not to hog all the buffers the port has. */
143 	bufs_to_take = ((task->task_max_nbufs > 2) &&
144 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
145 	    task->task_max_nbufs;
146 
147 	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size : scmd->len;
148 	laddr = scmd->addr + scmd->current_ro;
149 
150 	for (buflen = 0, ndx = 0; (buflen < len) &&
151 	    (ndx < dbuf->db_sglist_length); ndx++) {
152 		iolen = min(len - buflen, dbuf->db_sglist[ndx].seg_length);
153 		if (iolen == 0)
154 			break;
155 		if (sbd_data_read(sl, task, laddr, (uint64_t)iolen,
156 		    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
157 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
158 			/* Do not need to do xfer anymore, just complete it */
159 			dbuf->db_data_size = 0;
160 			dbuf->db_xfer_status = STMF_SUCCESS;
161 			sbd_handle_read_xfer_completion(task, scmd, dbuf);
162 			return;
163 		}
164 		buflen += iolen;
165 		laddr += (uint64_t)iolen;
166 	}
167 	dbuf->db_relative_offset = scmd->current_ro;
168 	dbuf->db_data_size = buflen;
169 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
170 	(void) stmf_xfer_data(task, dbuf, 0);
171 	scmd->len -= buflen;
172 	scmd->current_ro += buflen;
173 	if (scmd->len && (scmd->nbufs < bufs_to_take)) {
174 		uint32_t maxsize, minsize, old_minsize;
175 
176 		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
177 		minsize = maxsize >> 2;
178 		do {
179 			/*
180 			 * A bad port implementation can keep on failing the
181 			 * the request but keep on sending us a false
182 			 * minsize.
183 			 */
184 			old_minsize = minsize;
185 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
186 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
187 		    (minsize >= 512));
188 		if (dbuf == NULL) {
189 			return;
190 		}
191 		scmd->nbufs++;
192 		sbd_do_read_xfer(task, scmd, dbuf);
193 	}
194 }
195 
196 /*
197  * sbd_zcopy: Bail-out switch for reduced copy path.
198  *
199  * 0 - read & write off
200  * 1 - read & write on
201  * 2 - only read on
202  * 4 - only write on
203  */
204 int sbd_zcopy = 1;	/* enable zcopy read & write path */
205 uint32_t sbd_max_xfer_len = 0;		/* Valid if non-zero */
206 uint32_t sbd_1st_xfer_len = 0;		/* Valid if non-zero */
207 uint32_t sbd_copy_threshold = 0;		/* Valid if non-zero */
208 
209 static void
210 sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
211 {
212 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
213 	sbd_zvol_io_t *zvio;
214 	int ret, final_xfer;
215 	uint64_t offset;
216 	uint32_t xfer_len, max_len, first_len;
217 	stmf_status_t xstat;
218 	stmf_data_buf_t *dbuf;
219 	uint_t nblks;
220 	uint64_t blksize = sl->sl_blksize;
221 	size_t db_private_sz;
222 	uintptr_t pad;
223 
224 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
225 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
226 
227 	/*
228 	 * Calculate the limits on xfer_len to the minimum of :
229 	 *    - task limit
230 	 *    - lun limit
231 	 *    - sbd global limit if set
232 	 *    - first xfer limit if set
233 	 *
234 	 * First, protect against silly over-ride value
235 	 */
236 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
237 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
238 		    sbd_max_xfer_len);
239 		sbd_max_xfer_len = 0;
240 	}
241 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
242 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
243 		    sbd_1st_xfer_len);
244 		sbd_1st_xfer_len = 0;
245 	}
246 
247 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
248 	if (sbd_max_xfer_len)
249 		max_len = MIN(max_len, sbd_max_xfer_len);
250 	/*
251 	 * Special case the first xfer if hints are set.
252 	 */
253 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
254 		/* global over-ride has precedence */
255 		if (sbd_1st_xfer_len)
256 			first_len = sbd_1st_xfer_len;
257 		else
258 			first_len = task->task_1st_xfer_len;
259 	} else {
260 		first_len = 0;
261 	}
262 
263 	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
264 
265 		xfer_len = MIN(max_len, scmd->len);
266 		if (first_len) {
267 			xfer_len = MIN(xfer_len, first_len);
268 			first_len = 0;
269 		}
270 		if (scmd->len == xfer_len) {
271 			final_xfer = 1;
272 		} else {
273 			/*
274 			 * Attempt to end xfer on a block boundary.
275 			 * The only way this does not happen is if the
276 			 * xfer_len is small enough to stay contained
277 			 * within the same block.
278 			 */
279 			uint64_t xfer_offset, xfer_aligned_end;
280 
281 			final_xfer = 0;
282 			xfer_offset = scmd->addr + scmd->current_ro;
283 			xfer_aligned_end =
284 			    P2ALIGN(xfer_offset+xfer_len, blksize);
285 			if (xfer_aligned_end > xfer_offset)
286 				xfer_len = xfer_aligned_end - xfer_offset;
287 		}
288 		/*
289 		 * Allocate object to track the read and reserve
290 		 * enough space for scatter/gather list.
291 		 */
292 		offset = scmd->addr + scmd->current_ro;
293 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
294 
295 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
296 		    (nblks * sizeof (stmf_sglist_ent_t));
297 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
298 		    AF_DONTZERO);
299 		/*
300 		 * Setup the dbuf
301 		 *
302 		 * XXX Framework does not handle variable length sglists
303 		 * properly, so setup db_lu_private and db_port_private
304 		 * fields here. db_stmf_private is properly set for
305 		 * calls to stmf_free.
306 		 */
307 		if (dbuf->db_port_private == NULL) {
308 			/*
309 			 * XXX Framework assigns space to PP after db_sglist[0]
310 			 */
311 			cmn_err(CE_PANIC, "db_port_private == NULL");
312 		}
313 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
314 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
315 		dbuf->db_port_private = NULL;
316 		dbuf->db_buf_size = xfer_len;
317 		dbuf->db_data_size = xfer_len;
318 		dbuf->db_relative_offset = scmd->current_ro;
319 		dbuf->db_sglist_length = (uint16_t)nblks;
320 		dbuf->db_xfer_status = 0;
321 		dbuf->db_handle = 0;
322 
323 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
324 		    DB_DIRECTION_TO_RPORT | DB_LU_DATA_BUF);
325 		if (final_xfer)
326 			dbuf->db_flags |= DB_SEND_STATUS_GOOD;
327 
328 		zvio = dbuf->db_lu_private;
329 		/* Need absolute offset for zvol access */
330 		zvio->zvio_offset = offset;
331 		zvio->zvio_flags = ZVIO_SYNC;
332 
333 		/*
334 		 * Accounting for start of read.
335 		 * Note there is no buffer address for the probe yet.
336 		 */
337 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
338 		    uint8_t *, NULL, uint64_t, xfer_len,
339 		    uint64_t, offset, scsi_task_t *, task);
340 
341 		ret = sbd_zvol_alloc_read_bufs(sl, dbuf);
342 
343 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
344 		    uint8_t *, NULL, uint64_t, xfer_len,
345 		    uint64_t, offset, int, ret, scsi_task_t *, task);
346 
347 		if (ret != 0) {
348 			/*
349 			 * Read failure from the backend.
350 			 */
351 			stmf_free(dbuf);
352 			if (scmd->nbufs == 0) {
353 				/* nothing queued, just finish */
354 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
355 				stmf_scsilib_send_status(task, STATUS_CHECK,
356 				    STMF_SAA_READ_ERROR);
357 				rw_exit(&sl->sl_access_state_lock);
358 			} else {
359 				/* process failure when other dbufs finish */
360 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
361 			}
362 			return;
363 		}
364 
365 
366 		/*
367 		 * Allow PP to do setup
368 		 */
369 		xstat = stmf_setup_dbuf(task, dbuf, 0);
370 		if (xstat != STMF_SUCCESS) {
371 			/*
372 			 * This could happen if the driver cannot get the
373 			 * DDI resources it needs for this request.
374 			 * If other dbufs are queued, try again when the next
375 			 * one completes, otherwise give up.
376 			 */
377 			sbd_zvol_rele_read_bufs(sl, dbuf);
378 			stmf_free(dbuf);
379 			if (scmd->nbufs > 0) {
380 				/* completion of previous dbuf will retry */
381 				return;
382 			}
383 			/*
384 			 * Done with this command.
385 			 */
386 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
387 			if (first_xfer)
388 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
389 			else
390 				stmf_scsilib_send_status(task, STATUS_CHECK,
391 				    STMF_SAA_READ_ERROR);
392 			rw_exit(&sl->sl_access_state_lock);
393 			return;
394 		}
395 		/*
396 		 * dbuf is now queued on task
397 		 */
398 		scmd->nbufs++;
399 
400 		/* XXX leave this in for FW? */
401 		DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
402 		    struct stmf_data_buf *, dbuf, uint64_t, offset,
403 		    uint32_t, xfer_len);
404 		/*
405 		 * Do not pass STMF_IOF_LU_DONE so that the zvol
406 		 * state can be released in the completion callback.
407 		 */
408 		xstat = stmf_xfer_data(task, dbuf, 0);
409 		switch (xstat) {
410 		case STMF_SUCCESS:
411 			break;
412 		case STMF_BUSY:
413 			/*
414 			 * The dbuf is queued on the task, but unknown
415 			 * to the PP, thus no completion will occur.
416 			 */
417 			sbd_zvol_rele_read_bufs(sl, dbuf);
418 			stmf_teardown_dbuf(task, dbuf);
419 			stmf_free(dbuf);
420 			scmd->nbufs--;
421 			if (scmd->nbufs > 0) {
422 				/* completion of previous dbuf will retry */
423 				return;
424 			}
425 			/*
426 			 * Done with this command.
427 			 */
428 			rw_exit(&sl->sl_access_state_lock);
429 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
430 			if (first_xfer)
431 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
432 			else
433 				stmf_scsilib_send_status(task, STATUS_CHECK,
434 				    STMF_SAA_READ_ERROR);
435 			return;
436 		case STMF_ABORTED:
437 			/*
438 			 * Completion from task_done will cleanup
439 			 */
440 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
441 			return;
442 		}
443 		/*
444 		 * Update the xfer progress.
445 		 */
446 		ASSERT(scmd->len >= xfer_len);
447 		scmd->len -= xfer_len;
448 		scmd->current_ro += xfer_len;
449 	}
450 }
451 
452 void
453 sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
454     struct stmf_data_buf *dbuf)
455 {
456 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
457 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
458 		    dbuf->db_xfer_status, NULL);
459 		return;
460 	}
461 	task->task_nbytes_transferred += dbuf->db_data_size;
462 	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
463 		stmf_free_dbuf(task, dbuf);
464 		scmd->nbufs--;
465 		if (scmd->nbufs)
466 			return;	/* wait for all buffers to complete */
467 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
468 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL)
469 			stmf_scsilib_send_status(task, STATUS_CHECK,
470 			    STMF_SAA_READ_ERROR);
471 		else
472 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
473 		return;
474 	}
475 	if (dbuf->db_flags & DB_DONT_REUSE) {
476 		/* allocate new dbuf */
477 		uint32_t maxsize, minsize, old_minsize;
478 		stmf_free_dbuf(task, dbuf);
479 
480 		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
481 		minsize = maxsize >> 2;
482 		do {
483 			old_minsize = minsize;
484 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
485 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
486 		    (minsize >= 512));
487 		if (dbuf == NULL) {
488 			scmd->nbufs --;
489 			if (scmd->nbufs == 0) {
490 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
491 				    STMF_ALLOC_FAILURE, NULL);
492 			}
493 			return;
494 		}
495 	}
496 	sbd_do_read_xfer(task, scmd, dbuf);
497 }
498 
499 /*
500  * This routine must release the DMU resources and free the dbuf
501  * in all cases.  If this is the final dbuf of the task, then drop
502  * the reader lock on the LU state. If there are no errors and more
503  * work to do, then queue more xfer operations.
504  */
505 void
506 sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
507     struct stmf_data_buf *dbuf)
508 {
509 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
510 	stmf_status_t xfer_status;
511 	uint32_t data_size;
512 	int scmd_err;
513 
514 	ASSERT(dbuf->db_lu_private);
515 	ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ);
516 
517 	scmd->nbufs--;	/* account for this dbuf */
518 	/*
519 	 * Release the DMU resources.
520 	 */
521 	sbd_zvol_rele_read_bufs(sl, dbuf);
522 	/*
523 	 * Release the dbuf after retrieving needed fields.
524 	 */
525 	xfer_status = dbuf->db_xfer_status;
526 	data_size = dbuf->db_data_size;
527 	stmf_teardown_dbuf(task, dbuf);
528 	stmf_free(dbuf);
529 	/*
530 	 * Release the state lock if this is the last completion.
531 	 * If this is the last dbuf on task and all data has been
532 	 * transferred or an error encountered, then no more dbufs
533 	 * will be queued.
534 	 */
535 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
536 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
537 	    (xfer_status != STMF_SUCCESS));
538 	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
539 		/* all DMU state has been released */
540 		rw_exit(&sl->sl_access_state_lock);
541 	}
542 
543 	/*
544 	 * If there have been no errors, either complete the task
545 	 * or issue more data xfer operations.
546 	 */
547 	if (!scmd_err) {
548 		/*
549 		 * This chunk completed successfully
550 		 */
551 		task->task_nbytes_transferred += data_size;
552 		if (scmd->nbufs == 0 && scmd->len == 0) {
553 			/*
554 			 * This command completed successfully
555 			 *
556 			 * Status was sent along with data, so no status
557 			 * completion will occur. Tell stmf we are done.
558 			 */
559 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
560 			stmf_task_lu_done(task);
561 			return;
562 		}
563 		/*
564 		 * Start more xfers
565 		 */
566 		sbd_do_sgl_read_xfer(task, scmd, 0);
567 		return;
568 	}
569 	/*
570 	 * Sort out the failure
571 	 */
572 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
573 		/*
574 		 * If a previous error occurred, leave the command active
575 		 * and wait for the last completion to send the status check.
576 		 */
577 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
578 			if (scmd->nbufs == 0) {
579 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
580 				stmf_scsilib_send_status(task, STATUS_CHECK,
581 				    STMF_SAA_READ_ERROR);
582 			}
583 			return;
584 		}
585 		/*
586 		 * Must have been a failure on current dbuf
587 		 */
588 		ASSERT(xfer_status != STMF_SUCCESS);
589 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
590 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
591 	}
592 }
593 
594 void
595 sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
596     struct stmf_data_buf *dbuf)
597 {
598 	sbd_zvol_io_t *zvio = dbuf->db_lu_private;
599 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
600 	int ret;
601 	int scmd_err, scmd_xfer_done;
602 	stmf_status_t xfer_status = dbuf->db_xfer_status;
603 	uint32_t data_size = dbuf->db_data_size;
604 
605 	ASSERT(zvio);
606 
607 	/*
608 	 * Allow PP to free up resources before releasing the write bufs
609 	 * as writing to the backend could take some time.
610 	 */
611 	stmf_teardown_dbuf(task, dbuf);
612 
613 	scmd->nbufs--;	/* account for this dbuf */
614 	/*
615 	 * All data was queued and this is the last completion,
616 	 * but there could still be an error.
617 	 */
618 	scmd_xfer_done = (scmd->len == 0 && scmd->nbufs == 0);
619 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
620 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
621 	    (xfer_status != STMF_SUCCESS));
622 
623 	DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
624 	    uint8_t *, NULL, uint64_t, data_size,
625 	    uint64_t, zvio->zvio_offset, scsi_task_t *, task);
626 
627 	if (scmd_err) {
628 		/* just return the write buffers */
629 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
630 		ret = 0;
631 	} else {
632 		if (scmd_xfer_done)
633 			zvio->zvio_flags = ZVIO_COMMIT;
634 		else
635 			zvio->zvio_flags = 0;
636 		/* write the data */
637 		ret = sbd_zvol_rele_write_bufs(sl, dbuf);
638 	}
639 
640 	DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
641 	    uint8_t *, NULL, uint64_t, data_size,
642 	    uint64_t, zvio->zvio_offset, int, ret,  scsi_task_t *, task);
643 
644 	if (ret != 0) {
645 		/* update the error flag */
646 		scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
647 		scmd_err = 1;
648 	}
649 
650 	/* Release the dbuf */
651 	stmf_free(dbuf);
652 
653 	/*
654 	 * Release the state lock if this is the last completion.
655 	 * If this is the last dbuf on task and all data has been
656 	 * transferred or an error encountered, then no more dbufs
657 	 * will be queued.
658 	 */
659 	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
660 		/* all DMU state has been released */
661 		rw_exit(&sl->sl_access_state_lock);
662 	}
663 	/*
664 	 * If there have been no errors, either complete the task
665 	 * or issue more data xfer operations.
666 	 */
667 	if (!scmd_err) {
668 		/* This chunk completed successfully */
669 		task->task_nbytes_transferred += data_size;
670 		if (scmd_xfer_done) {
671 			/* This command completed successfully */
672 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
673 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
674 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
675 				stmf_scsilib_send_status(task, STATUS_CHECK,
676 				    STMF_SAA_WRITE_ERROR);
677 			} else {
678 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
679 			}
680 			return;
681 		}
682 		/*
683 		 * Start more xfers
684 		 */
685 		sbd_do_sgl_write_xfer(task, scmd, 0);
686 		return;
687 	}
688 	/*
689 	 * Sort out the failure
690 	 */
691 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
692 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
693 			if (scmd->nbufs == 0) {
694 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
695 				stmf_scsilib_send_status(task, STATUS_CHECK,
696 				    STMF_SAA_WRITE_ERROR);
697 			}
698 			/*
699 			 * Leave the command active until last dbuf completes.
700 			 */
701 			return;
702 		}
703 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
704 		ASSERT(xfer_status != STMF_SUCCESS);
705 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
706 	}
707 }
708 
709 /*
710  * Handle a copy operation using the zvol interface.
711  *
712  * Similar to the sbd_data_read/write path, except it goes directly through
713  * the zvol interfaces. It can pass a port provider sglist in the
714  * form of uio which is lost through the vn_rdwr path.
715  *
716  * Returns:
717  *	STMF_SUCCESS - request handled
718  *	STMF_FAILURE - request not handled, caller must deal with error
719  */
720 static stmf_status_t
721 sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
722     int cmd, int commit)
723 {
724 	sbd_lu_t		*sl = task->task_lu->lu_provider_private;
725 	struct uio		uio;
726 	struct iovec		*iov, *tiov, iov1[8];
727 	uint32_t		len, resid;
728 	int			ret, i, iovcnt, flags;
729 	boolean_t		is_read;
730 
731 	ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE);
732 
733 	is_read = (cmd == SBD_CMD_SCSI_READ) ? B_TRUE : B_FALSE;
734 	iovcnt = dbuf->db_sglist_length;
735 	/* use the stack for small iovecs */
736 	if (iovcnt > 8) {
737 		iov = kmem_alloc(iovcnt * sizeof (*iov), KM_SLEEP);
738 	} else {
739 		iov = &iov1[0];
740 	}
741 
742 	/* Convert dbuf sglist to iovec format */
743 	len = dbuf->db_data_size;
744 	resid = len;
745 	tiov = iov;
746 	for (i = 0; i < iovcnt; i++) {
747 		tiov->iov_base = (caddr_t)dbuf->db_sglist[i].seg_addr;
748 		tiov->iov_len = MIN(resid, dbuf->db_sglist[i].seg_length);
749 		resid -= tiov->iov_len;
750 		tiov++;
751 	}
752 	if (resid != 0) {
753 		cmn_err(CE_WARN, "inconsistant sglist rem %d", resid);
754 		if (iov != &iov1[0])
755 			kmem_free(iov, iovcnt * sizeof (*iov));
756 		return (STMF_FAILURE);
757 	}
758 	/* Setup the uio struct */
759 	uio.uio_iov = iov;
760 	uio.uio_iovcnt = iovcnt;
761 	uio.uio_loffset = laddr;
762 	uio.uio_segflg = (short)UIO_SYSSPACE;
763 	uio.uio_resid = (uint64_t)len;
764 	uio.uio_llimit = RLIM64_INFINITY;
765 
766 	if (is_read == B_TRUE) {
767 		uio.uio_fmode = FREAD;
768 		uio.uio_extflg = UIO_COPY_CACHED;
769 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
770 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
771 		    scsi_task_t *, task);
772 
773 		/* Fetch the data */
774 		ret = sbd_zvol_copy_read(sl, &uio);
775 
776 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
777 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
778 		    scsi_task_t *, task);
779 	} else {
780 		uio.uio_fmode = FWRITE;
781 		uio.uio_extflg = UIO_COPY_DEFAULT;
782 		DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
783 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
784 		    scsi_task_t *, task);
785 
786 		flags = (commit) ? ZVIO_COMMIT : 0;
787 		/* Write the data */
788 		ret = sbd_zvol_copy_write(sl, &uio, flags);
789 
790 		DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
791 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
792 		    scsi_task_t *, task);
793 	}
794 
795 	if (iov != &iov1[0])
796 		kmem_free(iov, iovcnt * sizeof (*iov));
797 	if (ret != 0) {
798 		/* Backend I/O error */
799 		return (STMF_FAILURE);
800 	}
801 	return (STMF_SUCCESS);
802 }
803 
804 void
805 sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
806 {
807 	uint64_t lba, laddr;
808 	uint32_t len;
809 	uint8_t op = task->task_cdb[0];
810 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
811 	sbd_cmd_t *scmd;
812 	stmf_data_buf_t *dbuf;
813 	int fast_path;
814 
815 	if (op == SCMD_READ) {
816 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
817 		len = (uint32_t)task->task_cdb[4];
818 
819 		if (len == 0) {
820 			len = 256;
821 		}
822 	} else if (op == SCMD_READ_G1) {
823 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
824 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
825 	} else if (op == SCMD_READ_G5) {
826 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
827 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
828 	} else if (op == SCMD_READ_G4) {
829 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
830 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
831 	} else {
832 		stmf_scsilib_send_status(task, STATUS_CHECK,
833 		    STMF_SAA_INVALID_OPCODE);
834 		return;
835 	}
836 
837 	laddr = lba << sl->sl_data_blocksize_shift;
838 	len <<= sl->sl_data_blocksize_shift;
839 
840 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
841 		stmf_scsilib_send_status(task, STATUS_CHECK,
842 		    STMF_SAA_LBA_OUT_OF_RANGE);
843 		return;
844 	}
845 
846 	task->task_cmd_xfer_length = len;
847 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
848 		task->task_expected_xfer_length = len;
849 	}
850 
851 	if (len != task->task_expected_xfer_length) {
852 		fast_path = 0;
853 		len = (len > task->task_expected_xfer_length) ?
854 		    task->task_expected_xfer_length : len;
855 	} else {
856 		fast_path = 1;
857 	}
858 
859 	if (len == 0) {
860 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
861 		return;
862 	}
863 
864 	/*
865 	 * Determine if this read can directly use DMU buffers.
866 	 */
867 	if (sbd_zcopy & (2|1) &&		/* Debug switch */
868 	    initial_dbuf == NULL &&		/* No PP buffer passed in */
869 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
870 	    (task->task_additional_flags &
871 	    TASK_AF_ACCEPT_LU_DBUF))		/* PP allows it */
872 	{
873 		/*
874 		 * Reduced copy path
875 		 */
876 		uint32_t copy_threshold, minsize;
877 		int ret;
878 
879 		/*
880 		 * The sl_access_state_lock will be held shared
881 		 * for the entire request and released when all
882 		 * dbufs have completed.
883 		 */
884 		rw_enter(&sl->sl_access_state_lock, RW_READER);
885 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
886 			rw_exit(&sl->sl_access_state_lock);
887 			stmf_scsilib_send_status(task, STATUS_CHECK,
888 			    STMF_SAA_READ_ERROR);
889 			return;
890 		}
891 
892 		/*
893 		 * Check if setup is more expensive than copying the data.
894 		 *
895 		 * Use the global over-ride sbd_zcopy_threshold if set.
896 		 */
897 		copy_threshold = (sbd_copy_threshold > 0) ?
898 		    sbd_copy_threshold : task->task_copy_threshold;
899 		minsize = len;
900 		if (len < copy_threshold &&
901 		    (dbuf = stmf_alloc_dbuf(task, len, &minsize, 0)) != 0) {
902 
903 			ret = sbd_copy_rdwr(task, laddr, dbuf,
904 			    SBD_CMD_SCSI_READ, 0);
905 			/* done with the backend */
906 			rw_exit(&sl->sl_access_state_lock);
907 			if (ret != 0) {
908 				/* backend error */
909 				stmf_scsilib_send_status(task, STATUS_CHECK,
910 				    STMF_SAA_READ_ERROR);
911 			} else {
912 				/* send along good data */
913 				dbuf->db_relative_offset = 0;
914 				dbuf->db_data_size = len;
915 				dbuf->db_flags = DB_SEND_STATUS_GOOD |
916 				    DB_DIRECTION_TO_RPORT;
917 				/* XXX keep for FW? */
918 				DTRACE_PROBE4(sbd__xfer,
919 				    struct scsi_task *, task,
920 				    struct stmf_data_buf *, dbuf,
921 				    uint64_t, laddr, uint32_t, len);
922 				(void) stmf_xfer_data(task, dbuf,
923 				    STMF_IOF_LU_DONE);
924 			}
925 			return;
926 		}
927 
928 		/* committed to reduced copy */
929 		if (task->task_lu_private) {
930 			scmd = (sbd_cmd_t *)task->task_lu_private;
931 		} else {
932 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
933 			    KM_SLEEP);
934 			task->task_lu_private = scmd;
935 		}
936 		/*
937 		 * Setup scmd to track read progress.
938 		 */
939 		scmd->flags = SBD_SCSI_CMD_ACTIVE;
940 		scmd->cmd_type = SBD_CMD_SCSI_READ;
941 		scmd->nbufs = 0;
942 		scmd->addr = laddr;
943 		scmd->len = len;
944 		scmd->current_ro = 0;
945 
946 		/*
947 		 * Kick-off the read.
948 		 */
949 		sbd_do_sgl_read_xfer(task, scmd, 1);
950 		return;
951 	}
952 
953 	if (initial_dbuf == NULL) {
954 		uint32_t maxsize, minsize, old_minsize;
955 
956 		maxsize = (len > (128*1024)) ? 128*1024 : len;
957 		minsize = maxsize >> 2;
958 		do {
959 			old_minsize = minsize;
960 			initial_dbuf = stmf_alloc_dbuf(task, maxsize,
961 			    &minsize, 0);
962 		} while ((initial_dbuf == NULL) && (old_minsize > minsize) &&
963 		    (minsize >= 512));
964 		if (initial_dbuf == NULL) {
965 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
966 			return;
967 		}
968 	}
969 	dbuf = initial_dbuf;
970 
971 	if ((dbuf->db_buf_size >= len) && fast_path &&
972 	    (dbuf->db_sglist_length == 1)) {
973 		if (sbd_data_read(sl, task, laddr, (uint64_t)len,
974 		    dbuf->db_sglist[0].seg_addr) == STMF_SUCCESS) {
975 			dbuf->db_relative_offset = 0;
976 			dbuf->db_data_size = len;
977 			dbuf->db_flags = DB_SEND_STATUS_GOOD |
978 			    DB_DIRECTION_TO_RPORT;
979 			/* XXX keep for FW? */
980 			DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
981 			    struct stmf_data_buf *, dbuf,
982 			    uint64_t, laddr, uint32_t, len);
983 			(void) stmf_xfer_data(task, dbuf, STMF_IOF_LU_DONE);
984 		} else {
985 			stmf_scsilib_send_status(task, STATUS_CHECK,
986 			    STMF_SAA_READ_ERROR);
987 		}
988 		return;
989 	}
990 
991 	if (task->task_lu_private) {
992 		scmd = (sbd_cmd_t *)task->task_lu_private;
993 	} else {
994 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
995 		task->task_lu_private = scmd;
996 	}
997 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
998 	scmd->cmd_type = SBD_CMD_SCSI_READ;
999 	scmd->nbufs = 1;
1000 	scmd->addr = laddr;
1001 	scmd->len = len;
1002 	scmd->current_ro = 0;
1003 
1004 	sbd_do_read_xfer(task, scmd, dbuf);
1005 }
1006 
1007 void
1008 sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
1009     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1010 {
1011 	uint32_t len;
1012 	int bufs_to_take;
1013 
1014 	if (scmd->len == 0) {
1015 		goto DO_WRITE_XFER_DONE;
1016 	}
1017 
1018 	/* Lets try not to hog all the buffers the port has. */
1019 	bufs_to_take = ((task->task_max_nbufs > 2) &&
1020 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
1021 	    task->task_max_nbufs;
1022 
1023 	if ((dbuf != NULL) &&
1024 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
1025 		/* free current dbuf and allocate a new one */
1026 		stmf_free_dbuf(task, dbuf);
1027 		dbuf = NULL;
1028 	}
1029 	if (scmd->nbufs >= bufs_to_take) {
1030 		goto DO_WRITE_XFER_DONE;
1031 	}
1032 	if (dbuf == NULL) {
1033 		uint32_t maxsize, minsize, old_minsize;
1034 
1035 		maxsize = (scmd->len > (128*1024)) ? 128*1024 :
1036 		    scmd->len;
1037 		minsize = maxsize >> 2;
1038 		do {
1039 			old_minsize = minsize;
1040 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
1041 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
1042 		    (minsize >= 512));
1043 		if (dbuf == NULL) {
1044 			if (scmd->nbufs == 0) {
1045 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1046 				    STMF_ALLOC_FAILURE, NULL);
1047 			}
1048 			return;
1049 		}
1050 	}
1051 
1052 	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
1053 	    scmd->len;
1054 
1055 	dbuf->db_relative_offset = scmd->current_ro;
1056 	dbuf->db_data_size = len;
1057 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1058 	(void) stmf_xfer_data(task, dbuf, 0);
1059 	scmd->nbufs++; /* outstanding port xfers and bufs used */
1060 	scmd->len -= len;
1061 	scmd->current_ro += len;
1062 
1063 	if ((scmd->len != 0) && (scmd->nbufs < bufs_to_take)) {
1064 		sbd_do_write_xfer(task, scmd, NULL, 0);
1065 	}
1066 	return;
1067 
1068 DO_WRITE_XFER_DONE:
1069 	if (dbuf != NULL) {
1070 		stmf_free_dbuf(task, dbuf);
1071 	}
1072 }
1073 
1074 void
1075 sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
1076 {
1077 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1078 	sbd_zvol_io_t *zvio;
1079 	int ret;
1080 	uint32_t xfer_len, max_len, first_len;
1081 	stmf_status_t xstat;
1082 	stmf_data_buf_t *dbuf;
1083 	uint_t nblks;
1084 	uint64_t blksize = sl->sl_blksize;
1085 	uint64_t offset;
1086 	size_t db_private_sz;
1087 	uintptr_t pad;
1088 
1089 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
1090 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
1091 
1092 	/*
1093 	 * Calculate the limits on xfer_len to the minimum of :
1094 	 *    - task limit
1095 	 *    - lun limit
1096 	 *    - sbd global limit if set
1097 	 *    - first xfer limit if set
1098 	 *
1099 	 * First, protect against silly over-ride value
1100 	 */
1101 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
1102 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
1103 		    sbd_max_xfer_len);
1104 		sbd_max_xfer_len = 0;
1105 	}
1106 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
1107 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
1108 		    sbd_1st_xfer_len);
1109 		sbd_1st_xfer_len = 0;
1110 	}
1111 
1112 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
1113 	if (sbd_max_xfer_len)
1114 		max_len = MIN(max_len, sbd_max_xfer_len);
1115 	/*
1116 	 * Special case the first xfer if hints are set.
1117 	 */
1118 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
1119 		/* global over-ride has precedence */
1120 		if (sbd_1st_xfer_len)
1121 			first_len = sbd_1st_xfer_len;
1122 		else
1123 			first_len = task->task_1st_xfer_len;
1124 	} else {
1125 		first_len = 0;
1126 	}
1127 
1128 
1129 	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
1130 
1131 		xfer_len = MIN(max_len, scmd->len);
1132 		if (first_len) {
1133 			xfer_len = MIN(xfer_len, first_len);
1134 			first_len = 0;
1135 		}
1136 		if (xfer_len < scmd->len) {
1137 			/*
1138 			 * Attempt to end xfer on a block boundary.
1139 			 * The only way this does not happen is if the
1140 			 * xfer_len is small enough to stay contained
1141 			 * within the same block.
1142 			 */
1143 			uint64_t xfer_offset, xfer_aligned_end;
1144 
1145 			xfer_offset = scmd->addr + scmd->current_ro;
1146 			xfer_aligned_end =
1147 			    P2ALIGN(xfer_offset+xfer_len, blksize);
1148 			if (xfer_aligned_end > xfer_offset)
1149 				xfer_len = xfer_aligned_end - xfer_offset;
1150 		}
1151 		/*
1152 		 * Allocate object to track the write and reserve
1153 		 * enough space for scatter/gather list.
1154 		 */
1155 		offset = scmd->addr + scmd->current_ro;
1156 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
1157 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
1158 		    (nblks * sizeof (stmf_sglist_ent_t));
1159 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
1160 		    AF_DONTZERO);
1161 
1162 		/*
1163 		 * Setup the dbuf
1164 		 *
1165 		 * XXX Framework does not handle variable length sglists
1166 		 * properly, so setup db_lu_private and db_port_private
1167 		 * fields here. db_stmf_private is properly set for
1168 		 * calls to stmf_free.
1169 		 */
1170 		if (dbuf->db_port_private == NULL) {
1171 			/*
1172 			 * XXX Framework assigns space to PP after db_sglist[0]
1173 			 */
1174 			cmn_err(CE_PANIC, "db_port_private == NULL");
1175 		}
1176 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
1177 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
1178 		dbuf->db_port_private = NULL;
1179 		dbuf->db_buf_size = xfer_len;
1180 		dbuf->db_data_size = xfer_len;
1181 		dbuf->db_relative_offset = scmd->current_ro;
1182 		dbuf->db_sglist_length = (uint16_t)nblks;
1183 		dbuf->db_xfer_status = 0;
1184 		dbuf->db_handle = 0;
1185 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
1186 		    DB_DIRECTION_FROM_RPORT | DB_LU_DATA_BUF);
1187 
1188 		zvio = dbuf->db_lu_private;
1189 		zvio->zvio_offset = offset;
1190 
1191 		/* get the buffers */
1192 		ret = sbd_zvol_alloc_write_bufs(sl, dbuf);
1193 		if (ret != 0) {
1194 			/*
1195 			 * Could not allocate buffers from the backend;
1196 			 * treat it like an IO error.
1197 			 */
1198 			stmf_free(dbuf);
1199 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1200 			if (scmd->nbufs == 0) {
1201 				/*
1202 				 * Nothing queued, so no completions coming
1203 				 */
1204 				stmf_scsilib_send_status(task, STATUS_CHECK,
1205 				    STMF_SAA_WRITE_ERROR);
1206 				rw_exit(&sl->sl_access_state_lock);
1207 			}
1208 			/*
1209 			 * Completions of previous buffers will cleanup.
1210 			 */
1211 			return;
1212 		}
1213 
1214 		/*
1215 		 * Allow PP to do setup
1216 		 */
1217 		xstat = stmf_setup_dbuf(task, dbuf, 0);
1218 		if (xstat != STMF_SUCCESS) {
1219 			/*
1220 			 * This could happen if the driver cannot get the
1221 			 * DDI resources it needs for this request.
1222 			 * If other dbufs are queued, try again when the next
1223 			 * one completes, otherwise give up.
1224 			 */
1225 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1226 			stmf_free(dbuf);
1227 			if (scmd->nbufs > 0) {
1228 				/* completion of previous dbuf will retry */
1229 				return;
1230 			}
1231 			/*
1232 			 * Done with this command.
1233 			 */
1234 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1235 			if (first_xfer)
1236 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1237 			else
1238 				stmf_scsilib_send_status(task, STATUS_CHECK,
1239 				    STMF_SAA_WRITE_ERROR);
1240 			rw_exit(&sl->sl_access_state_lock);
1241 			return;
1242 		}
1243 
1244 		/*
1245 		 * dbuf is now queued on task
1246 		 */
1247 		scmd->nbufs++;
1248 
1249 		xstat = stmf_xfer_data(task, dbuf, 0);
1250 		switch (xstat) {
1251 		case STMF_SUCCESS:
1252 			break;
1253 		case STMF_BUSY:
1254 			/*
1255 			 * The dbuf is queued on the task, but unknown
1256 			 * to the PP, thus no completion will occur.
1257 			 */
1258 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1259 			stmf_teardown_dbuf(task, dbuf);
1260 			stmf_free(dbuf);
1261 			scmd->nbufs--;
1262 			if (scmd->nbufs > 0) {
1263 				/* completion of previous dbuf will retry */
1264 				return;
1265 			}
1266 			/*
1267 			 * Done with this command.
1268 			 */
1269 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1270 			if (first_xfer)
1271 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1272 			else
1273 				stmf_scsilib_send_status(task, STATUS_CHECK,
1274 				    STMF_SAA_WRITE_ERROR);
1275 			rw_exit(&sl->sl_access_state_lock);
1276 			return;
1277 		case STMF_ABORTED:
1278 			/*
1279 			 * Completion code will cleanup.
1280 			 */
1281 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1282 			return;
1283 		}
1284 		/*
1285 		 * Update the xfer progress.
1286 		 */
1287 		scmd->len -= xfer_len;
1288 		scmd->current_ro += xfer_len;
1289 	}
1290 }
1291 
1292 void
1293 sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1294     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1295 {
1296 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1297 	uint64_t laddr;
1298 	uint32_t buflen, iolen;
1299 	int ndx;
1300 
1301 	if (scmd->nbufs > 0) {
1302 		/*
1303 		 * Decrement the count to indicate the port xfer
1304 		 * into the dbuf has completed even though the buf is
1305 		 * still in use here in the LU provider.
1306 		 */
1307 		scmd->nbufs--;
1308 	}
1309 
1310 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1311 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1312 		    dbuf->db_xfer_status, NULL);
1313 		return;
1314 	}
1315 
1316 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1317 		goto WRITE_XFER_DONE;
1318 	}
1319 
1320 	if (scmd->len != 0) {
1321 		/*
1322 		 * Initiate the next port xfer to occur in parallel
1323 		 * with writing this buf.
1324 		 */
1325 		sbd_do_write_xfer(task, scmd, NULL, 0);
1326 	}
1327 
1328 	laddr = scmd->addr + dbuf->db_relative_offset;
1329 
1330 	/*
1331 	 * If this is going to a zvol, use the direct call to
1332 	 * sbd_zvol_copy_{read,write}. The direct call interface is
1333 	 * restricted to PPs that accept sglists, but that is not required.
1334 	 */
1335 	if (sl->sl_flags & SL_CALL_ZVOL &&
1336 	    (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) &&
1337 	    (sbd_zcopy & (4|1))) {
1338 		int commit;
1339 
1340 		commit = (scmd->len == 0 && scmd->nbufs == 0);
1341 		if (sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
1342 		    commit) != STMF_SUCCESS)
1343 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1344 		buflen = dbuf->db_data_size;
1345 	} else {
1346 		for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
1347 		    (ndx < dbuf->db_sglist_length); ndx++) {
1348 			iolen = min(dbuf->db_data_size - buflen,
1349 			    dbuf->db_sglist[ndx].seg_length);
1350 			if (iolen == 0)
1351 				break;
1352 			if (sbd_data_write(sl, task, laddr, (uint64_t)iolen,
1353 			    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
1354 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1355 				break;
1356 			}
1357 			buflen += iolen;
1358 			laddr += (uint64_t)iolen;
1359 		}
1360 	}
1361 	task->task_nbytes_transferred += buflen;
1362 WRITE_XFER_DONE:
1363 	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1364 		stmf_free_dbuf(task, dbuf);
1365 		if (scmd->nbufs)
1366 			return;	/* wait for all buffers to complete */
1367 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1368 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1369 			stmf_scsilib_send_status(task, STATUS_CHECK,
1370 			    STMF_SAA_WRITE_ERROR);
1371 		} else {
1372 			/*
1373 			 * If SYNC_WRITE flag is on then we need to flush
1374 			 * cache before sending status.
1375 			 * Note: this may be a no-op because of how
1376 			 * SL_WRITEBACK_CACHE_DISABLE and
1377 			 * SL_FLUSH_ON_DISABLED_WRITECACHE are set, but not
1378 			 * worth code complexity of checking those in this code
1379 			 * path, SBD_SCSI_CMD_SYNC_WRITE is rarely set.
1380 			 */
1381 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
1382 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
1383 				stmf_scsilib_send_status(task, STATUS_CHECK,
1384 				    STMF_SAA_WRITE_ERROR);
1385 			} else {
1386 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1387 			}
1388 		}
1389 		return;
1390 	}
1391 	sbd_do_write_xfer(task, scmd, dbuf, dbuf_reusable);
1392 }
1393 
1394 /*
1395  * Return true if copy avoidance is beneficial.
1396  */
1397 static int
1398 sbd_zcopy_write_useful(scsi_task_t *task, uint64_t laddr, uint32_t len,
1399     uint64_t blksize)
1400 {
1401 	/*
1402 	 * If there is a global copy threshold over-ride, use it.
1403 	 * Otherwise use the PP value with the caveat that at least
1404 	 * 1/2 the data must avoid being copied to be useful.
1405 	 */
1406 	if (sbd_copy_threshold > 0) {
1407 		return (len >= sbd_copy_threshold);
1408 	} else {
1409 		uint64_t no_copy_span;
1410 
1411 		/* sub-blocksize writes always copy */
1412 		if (len < task->task_copy_threshold || len < blksize)
1413 			return (0);
1414 		/*
1415 		 * Calculate amount of data that will avoid the copy path.
1416 		 * The calculation is only valid if len >= blksize.
1417 		 */
1418 		no_copy_span = P2ALIGN(laddr+len, blksize) -
1419 		    P2ROUNDUP(laddr, blksize);
1420 		return (no_copy_span >= len/2);
1421 	}
1422 }
1423 
1424 void
1425 sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
1426 {
1427 	uint64_t lba, laddr;
1428 	uint32_t len;
1429 	uint8_t op = task->task_cdb[0], do_immediate_data = 0;
1430 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1431 	sbd_cmd_t *scmd;
1432 	stmf_data_buf_t *dbuf;
1433 	uint8_t	sync_wr_flag = 0;
1434 
1435 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1436 		stmf_scsilib_send_status(task, STATUS_CHECK,
1437 		    STMF_SAA_WRITE_PROTECTED);
1438 		return;
1439 	}
1440 	if (op == SCMD_WRITE) {
1441 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
1442 		len = (uint32_t)task->task_cdb[4];
1443 
1444 		if (len == 0) {
1445 			len = 256;
1446 		}
1447 	} else if (op == SCMD_WRITE_G1) {
1448 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1449 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1450 	} else if (op == SCMD_WRITE_G5) {
1451 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1452 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1453 	} else if (op == SCMD_WRITE_G4) {
1454 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1455 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1456 	} else if (op == SCMD_WRITE_VERIFY) {
1457 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1458 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1459 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1460 	} else if (op == SCMD_WRITE_VERIFY_G5) {
1461 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1462 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1463 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1464 	} else if (op == SCMD_WRITE_VERIFY_G4) {
1465 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1466 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1467 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1468 	} else {
1469 		stmf_scsilib_send_status(task, STATUS_CHECK,
1470 		    STMF_SAA_INVALID_OPCODE);
1471 		return;
1472 	}
1473 
1474 	laddr = lba << sl->sl_data_blocksize_shift;
1475 	len <<= sl->sl_data_blocksize_shift;
1476 
1477 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
1478 		stmf_scsilib_send_status(task, STATUS_CHECK,
1479 		    STMF_SAA_LBA_OUT_OF_RANGE);
1480 		return;
1481 	}
1482 
1483 	task->task_cmd_xfer_length = len;
1484 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1485 		task->task_expected_xfer_length = len;
1486 	}
1487 
1488 	len = (len > task->task_expected_xfer_length) ?
1489 	    task->task_expected_xfer_length : len;
1490 
1491 	if (len == 0) {
1492 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1493 		return;
1494 	}
1495 
1496 	if (sbd_zcopy & (4|1) &&		/* Debug switch */
1497 	    initial_dbuf == NULL &&		/* No PP buf passed in */
1498 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
1499 	    (task->task_additional_flags &
1500 	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
1501 	    sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize)) {
1502 
1503 		/*
1504 		 * XXX Note that disallowing initial_dbuf will eliminate
1505 		 * iSCSI from participating. For small writes, that is
1506 		 * probably ok. For large writes, it may be best to just
1507 		 * copy the data from the initial dbuf and use zcopy for
1508 		 * the rest.
1509 		 */
1510 		rw_enter(&sl->sl_access_state_lock, RW_READER);
1511 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
1512 			rw_exit(&sl->sl_access_state_lock);
1513 			stmf_scsilib_send_status(task, STATUS_CHECK,
1514 			    STMF_SAA_READ_ERROR);
1515 			return;
1516 		}
1517 		/*
1518 		 * Setup scmd to track the write progress.
1519 		 */
1520 		if (task->task_lu_private) {
1521 			scmd = (sbd_cmd_t *)task->task_lu_private;
1522 		} else {
1523 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1524 			    KM_SLEEP);
1525 			task->task_lu_private = scmd;
1526 		}
1527 		scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
1528 		scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1529 		scmd->nbufs = 0;
1530 		scmd->addr = laddr;
1531 		scmd->len = len;
1532 		scmd->current_ro = 0;
1533 		sbd_do_sgl_write_xfer(task, scmd, 1);
1534 		return;
1535 	}
1536 
1537 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
1538 		if (initial_dbuf->db_data_size > len) {
1539 			if (initial_dbuf->db_data_size >
1540 			    task->task_expected_xfer_length) {
1541 				/* protocol error */
1542 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1543 				    STMF_INVALID_ARG, NULL);
1544 				return;
1545 			}
1546 			initial_dbuf->db_data_size = len;
1547 		}
1548 		do_immediate_data = 1;
1549 	}
1550 	dbuf = initial_dbuf;
1551 
1552 	if (task->task_lu_private) {
1553 		scmd = (sbd_cmd_t *)task->task_lu_private;
1554 	} else {
1555 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1556 		task->task_lu_private = scmd;
1557 	}
1558 	scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
1559 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1560 	scmd->nbufs = 0;
1561 	scmd->addr = laddr;
1562 	scmd->len = len;
1563 	scmd->current_ro = 0;
1564 
1565 	if (do_immediate_data) {
1566 		/*
1567 		 * Account for data passed in this write command
1568 		 */
1569 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
1570 		scmd->len -= dbuf->db_data_size;
1571 		scmd->current_ro += dbuf->db_data_size;
1572 		dbuf->db_xfer_status = STMF_SUCCESS;
1573 		sbd_handle_write_xfer_completion(task, scmd, dbuf, 0);
1574 	} else {
1575 		sbd_do_write_xfer(task, scmd, dbuf, 0);
1576 	}
1577 }
1578 
1579 /*
1580  * Utility routine to handle small non performance data transfers to the
1581  * initiators. dbuf is an initial data buf (if any), 'p' points to a data
1582  * buffer which is source of data for transfer, cdb_xfer_size is the
1583  * transfer size based on CDB, cmd_xfer_size is the actual amount of data
1584  * which this command would transfer (the size of data pointed to by 'p').
1585  */
1586 void
1587 sbd_handle_short_read_transfers(scsi_task_t *task, stmf_data_buf_t *dbuf,
1588     uint8_t *p, uint32_t cdb_xfer_size, uint32_t cmd_xfer_size)
1589 {
1590 	uint32_t bufsize, ndx;
1591 	sbd_cmd_t *scmd;
1592 
1593 	cmd_xfer_size = min(cmd_xfer_size, cdb_xfer_size);
1594 
1595 	task->task_cmd_xfer_length = cmd_xfer_size;
1596 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1597 		task->task_expected_xfer_length = cmd_xfer_size;
1598 	} else {
1599 		cmd_xfer_size = min(cmd_xfer_size,
1600 		    task->task_expected_xfer_length);
1601 	}
1602 
1603 	if (cmd_xfer_size == 0) {
1604 		stmf_scsilib_send_status(task, STATUS_CHECK,
1605 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1606 		return;
1607 	}
1608 	if (dbuf == NULL) {
1609 		uint32_t minsize = cmd_xfer_size;
1610 
1611 		dbuf = stmf_alloc_dbuf(task, cmd_xfer_size, &minsize, 0);
1612 	}
1613 	if (dbuf == NULL) {
1614 		stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1615 		return;
1616 	}
1617 
1618 	for (bufsize = 0, ndx = 0; bufsize < cmd_xfer_size; ndx++) {
1619 		uint8_t *d;
1620 		uint32_t s;
1621 
1622 		d = dbuf->db_sglist[ndx].seg_addr;
1623 		s = min((cmd_xfer_size - bufsize),
1624 		    dbuf->db_sglist[ndx].seg_length);
1625 		bcopy(p+bufsize, d, s);
1626 		bufsize += s;
1627 	}
1628 	dbuf->db_relative_offset = 0;
1629 	dbuf->db_data_size = cmd_xfer_size;
1630 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
1631 
1632 	if (task->task_lu_private == NULL) {
1633 		task->task_lu_private =
1634 		    kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1635 	}
1636 	scmd = (sbd_cmd_t *)task->task_lu_private;
1637 
1638 	scmd->cmd_type = SBD_CMD_SMALL_READ;
1639 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1640 	(void) stmf_xfer_data(task, dbuf, 0);
1641 }
1642 
1643 void
1644 sbd_handle_short_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1645     struct stmf_data_buf *dbuf)
1646 {
1647 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1648 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1649 		    dbuf->db_xfer_status, NULL);
1650 		return;
1651 	}
1652 	task->task_nbytes_transferred = dbuf->db_data_size;
1653 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1654 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1655 }
1656 
1657 void
1658 sbd_handle_short_write_transfers(scsi_task_t *task,
1659     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size)
1660 {
1661 	sbd_cmd_t *scmd;
1662 
1663 	task->task_cmd_xfer_length = cdb_xfer_size;
1664 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1665 		task->task_expected_xfer_length = cdb_xfer_size;
1666 	} else {
1667 		cdb_xfer_size = min(cdb_xfer_size,
1668 		    task->task_expected_xfer_length);
1669 	}
1670 
1671 	if (cdb_xfer_size == 0) {
1672 		stmf_scsilib_send_status(task, STATUS_CHECK,
1673 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1674 		return;
1675 	}
1676 	if (task->task_lu_private == NULL) {
1677 		task->task_lu_private = kmem_zalloc(sizeof (sbd_cmd_t),
1678 		    KM_SLEEP);
1679 	} else {
1680 		bzero(task->task_lu_private, sizeof (sbd_cmd_t));
1681 	}
1682 	scmd = (sbd_cmd_t *)task->task_lu_private;
1683 	scmd->cmd_type = SBD_CMD_SMALL_WRITE;
1684 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1685 	scmd->len = cdb_xfer_size;
1686 	if (dbuf == NULL) {
1687 		uint32_t minsize = cdb_xfer_size;
1688 
1689 		dbuf = stmf_alloc_dbuf(task, cdb_xfer_size, &minsize, 0);
1690 		if (dbuf == NULL) {
1691 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1692 			    STMF_ALLOC_FAILURE, NULL);
1693 			return;
1694 		}
1695 		dbuf->db_data_size = cdb_xfer_size;
1696 		dbuf->db_relative_offset = 0;
1697 		dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1698 		(void) stmf_xfer_data(task, dbuf, 0);
1699 	} else {
1700 		if (dbuf->db_data_size < cdb_xfer_size) {
1701 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1702 			    STMF_ABORTED, NULL);
1703 			return;
1704 		}
1705 		dbuf->db_data_size = cdb_xfer_size;
1706 		sbd_handle_short_write_xfer_completion(task, dbuf);
1707 	}
1708 }
1709 
1710 void
1711 sbd_handle_short_write_xfer_completion(scsi_task_t *task,
1712     stmf_data_buf_t *dbuf)
1713 {
1714 	sbd_cmd_t *scmd;
1715 	stmf_status_t st_ret;
1716 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1717 
1718 	/*
1719 	 * For now lets assume we will get only one sglist element
1720 	 * for short writes. If that ever changes, we should allocate
1721 	 * a local buffer and copy all the sg elements to one linear space.
1722 	 */
1723 	if ((dbuf->db_xfer_status != STMF_SUCCESS) ||
1724 	    (dbuf->db_sglist_length > 1)) {
1725 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1726 		    dbuf->db_xfer_status, NULL);
1727 		return;
1728 	}
1729 
1730 	task->task_nbytes_transferred = dbuf->db_data_size;
1731 	scmd = (sbd_cmd_t *)task->task_lu_private;
1732 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1733 
1734 	/* Lets find out who to call */
1735 	switch (task->task_cdb[0]) {
1736 	case SCMD_MODE_SELECT:
1737 	case SCMD_MODE_SELECT_G1:
1738 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1739 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1740 			if (st_ret != STMF_SUCCESS) {
1741 				stmf_scsilib_send_status(task, STATUS_CHECK,
1742 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1743 			}
1744 		} else {
1745 			sbd_handle_mode_select_xfer(task,
1746 			    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1747 		}
1748 		break;
1749 	case SCMD_UNMAP:
1750 		sbd_handle_unmap_xfer(task,
1751 		    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1752 		break;
1753 	case SCMD_PERSISTENT_RESERVE_OUT:
1754 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1755 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1756 			if (st_ret != STMF_SUCCESS) {
1757 				stmf_scsilib_send_status(task, STATUS_CHECK,
1758 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1759 			}
1760 		} else {
1761 			sbd_handle_pgr_out_data(task, dbuf);
1762 		}
1763 		break;
1764 	default:
1765 		/* This should never happen */
1766 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1767 		    STMF_ABORTED, NULL);
1768 	}
1769 }
1770 
1771 void
1772 sbd_handle_read_capacity(struct scsi_task *task,
1773     struct stmf_data_buf *initial_dbuf)
1774 {
1775 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1776 	uint32_t cdb_len;
1777 	uint8_t p[32];
1778 	uint64_t s;
1779 	uint16_t blksize;
1780 
1781 	s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
1782 	s--;
1783 	blksize = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1784 
1785 	switch (task->task_cdb[0]) {
1786 	case SCMD_READ_CAPACITY:
1787 		if (s & 0xffffffff00000000ull) {
1788 			p[0] = p[1] = p[2] = p[3] = 0xFF;
1789 		} else {
1790 			p[0] = (s >> 24) & 0xff;
1791 			p[1] = (s >> 16) & 0xff;
1792 			p[2] = (s >> 8) & 0xff;
1793 			p[3] = s & 0xff;
1794 		}
1795 		p[4] = 0; p[5] = 0;
1796 		p[6] = (blksize >> 8) & 0xff;
1797 		p[7] = blksize & 0xff;
1798 		sbd_handle_short_read_transfers(task, initial_dbuf, p, 8, 8);
1799 		break;
1800 
1801 	case SCMD_SVC_ACTION_IN_G4:
1802 		cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1803 		bzero(p, 32);
1804 		p[0] = (s >> 56) & 0xff;
1805 		p[1] = (s >> 48) & 0xff;
1806 		p[2] = (s >> 40) & 0xff;
1807 		p[3] = (s >> 32) & 0xff;
1808 		p[4] = (s >> 24) & 0xff;
1809 		p[5] = (s >> 16) & 0xff;
1810 		p[6] = (s >> 8) & 0xff;
1811 		p[7] = s & 0xff;
1812 		p[10] = (blksize >> 8) & 0xff;
1813 		p[11] = blksize & 0xff;
1814 		if (sl->sl_flags & SL_UNMAP_ENABLED) {
1815 			p[14] = 0x80;
1816 		}
1817 		sbd_handle_short_read_transfers(task, initial_dbuf, p,
1818 		    cdb_len, 32);
1819 		break;
1820 	}
1821 }
1822 
1823 void
1824 sbd_calc_geometry(uint64_t s, uint16_t blksize, uint8_t *nsectors,
1825     uint8_t *nheads, uint32_t *ncyl)
1826 {
1827 	if (s < (4ull * 1024ull * 1024ull * 1024ull)) {
1828 		*nsectors = 32;
1829 		*nheads = 8;
1830 	} else {
1831 		*nsectors = 254;
1832 		*nheads = 254;
1833 	}
1834 	*ncyl = s / ((uint64_t)blksize * (uint64_t)(*nsectors) *
1835 	    (uint64_t)(*nheads));
1836 }
1837 
1838 void
1839 sbd_handle_mode_sense(struct scsi_task *task,
1840     struct stmf_data_buf *initial_dbuf, uint8_t *buf)
1841 {
1842 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1843 	uint32_t cmd_size, n;
1844 	uint8_t *cdb;
1845 	uint32_t ncyl;
1846 	uint8_t nsectors, nheads;
1847 	uint8_t page, ctrl, header_size, pc_valid;
1848 	uint16_t nbytes;
1849 	uint8_t *p;
1850 	uint64_t s = sl->sl_lu_size;
1851 	uint32_t dev_spec_param_offset;
1852 
1853 	p = buf;	/* buf is assumed to be zeroed out and large enough */
1854 	n = 0;
1855 	cdb = &task->task_cdb[0];
1856 	page = cdb[2] & 0x3F;
1857 	ctrl = (cdb[2] >> 6) & 3;
1858 	cmd_size = (cdb[0] == SCMD_MODE_SENSE) ? cdb[4] :
1859 	    READ_SCSI16(&cdb[7], uint32_t);
1860 
1861 	if (cdb[0] == SCMD_MODE_SENSE) {
1862 		header_size = 4;
1863 		dev_spec_param_offset = 2;
1864 	} else {
1865 		header_size = 8;
1866 		dev_spec_param_offset = 3;
1867 	}
1868 
1869 	/* Now validate the command */
1870 	if ((cdb[2] == 0) || (page == MODEPAGE_ALLPAGES) || (page == 0x08) ||
1871 	    (page == 0x0A) || (page == 0x03) || (page == 0x04)) {
1872 		pc_valid = 1;
1873 	} else {
1874 		pc_valid = 0;
1875 	}
1876 	if ((cmd_size < header_size) || (pc_valid == 0)) {
1877 		stmf_scsilib_send_status(task, STATUS_CHECK,
1878 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1879 		return;
1880 	}
1881 
1882 	/* We will update the length in the mode header at the end */
1883 
1884 	/* Block dev device specific param in mode param header has wp bit */
1885 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1886 		p[n + dev_spec_param_offset] = BIT_7;
1887 	}
1888 	n += header_size;
1889 	/* We are not going to return any block descriptor */
1890 
1891 	nbytes = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1892 	sbd_calc_geometry(s, nbytes, &nsectors, &nheads, &ncyl);
1893 
1894 	if ((page == 0x03) || (page == MODEPAGE_ALLPAGES)) {
1895 		p[n] = 0x03;
1896 		p[n+1] = 0x16;
1897 		if (ctrl != 1) {
1898 			p[n + 11] = nsectors;
1899 			p[n + 12] = nbytes >> 8;
1900 			p[n + 13] = nbytes & 0xff;
1901 			p[n + 20] = 0x80;
1902 		}
1903 		n += 24;
1904 	}
1905 	if ((page == 0x04) || (page == MODEPAGE_ALLPAGES)) {
1906 		p[n] = 0x04;
1907 		p[n + 1] = 0x16;
1908 		if (ctrl != 1) {
1909 			p[n + 2] = ncyl >> 16;
1910 			p[n + 3] = ncyl >> 8;
1911 			p[n + 4] = ncyl & 0xff;
1912 			p[n + 5] = nheads;
1913 			p[n + 20] = 0x15;
1914 			p[n + 21] = 0x18;
1915 		}
1916 		n += 24;
1917 	}
1918 	if ((page == MODEPAGE_CACHING) || (page == MODEPAGE_ALLPAGES)) {
1919 		struct mode_caching *mode_caching_page;
1920 
1921 		mode_caching_page = (struct mode_caching *)&p[n];
1922 
1923 		mode_caching_page->mode_page.code = MODEPAGE_CACHING;
1924 		mode_caching_page->mode_page.ps = 1; /* A saveable page */
1925 		mode_caching_page->mode_page.length = 0x12;
1926 
1927 		switch (ctrl) {
1928 		case (0):
1929 			/* Current */
1930 			if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) == 0) {
1931 				mode_caching_page->wce = 1;
1932 			}
1933 			break;
1934 
1935 		case (1):
1936 			/* Changeable */
1937 			if ((sl->sl_flags &
1938 			    SL_WRITEBACK_CACHE_SET_UNSUPPORTED) == 0) {
1939 				mode_caching_page->wce = 1;
1940 			}
1941 			break;
1942 
1943 		default:
1944 			if ((sl->sl_flags &
1945 			    SL_SAVED_WRITE_CACHE_DISABLE) == 0) {
1946 				mode_caching_page->wce = 1;
1947 			}
1948 			break;
1949 		}
1950 		n += (sizeof (struct mode_page) +
1951 		    mode_caching_page->mode_page.length);
1952 	}
1953 	if ((page == MODEPAGE_CTRL_MODE) || (page == MODEPAGE_ALLPAGES)) {
1954 		struct mode_control_scsi3 *mode_control_page;
1955 
1956 		mode_control_page = (struct mode_control_scsi3 *)&p[n];
1957 
1958 		mode_control_page->mode_page.code = MODEPAGE_CTRL_MODE;
1959 		mode_control_page->mode_page.length =
1960 		    PAGELENGTH_MODE_CONTROL_SCSI3;
1961 		if (ctrl != 1) {
1962 			/* If not looking for changeable values, report this. */
1963 			mode_control_page->que_mod = CTRL_QMOD_UNRESTRICT;
1964 		}
1965 		n += (sizeof (struct mode_page) +
1966 		    mode_control_page->mode_page.length);
1967 	}
1968 
1969 	if (cdb[0] == SCMD_MODE_SENSE) {
1970 		if (n > 255) {
1971 			stmf_scsilib_send_status(task, STATUS_CHECK,
1972 			    STMF_SAA_INVALID_FIELD_IN_CDB);
1973 			return;
1974 		}
1975 		/*
1976 		 * Mode parameter header length doesn't include the number
1977 		 * of bytes in the length field, so adjust the count.
1978 		 * Byte count minus header length field size.
1979 		 */
1980 		buf[0] = (n - 1) & 0xff;
1981 	} else {
1982 		/* Byte count minus header length field size. */
1983 		buf[1] = (n - 2) & 0xff;
1984 		buf[0] = ((n - 2) >> 8) & 0xff;
1985 	}
1986 
1987 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
1988 	    cmd_size, n);
1989 }
1990 
1991 void
1992 sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf)
1993 {
1994 	uint32_t cmd_xfer_len;
1995 
1996 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
1997 		cmd_xfer_len = (uint32_t)task->task_cdb[4];
1998 	} else {
1999 		cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2000 	}
2001 
2002 	if ((task->task_cdb[1] & 0xFE) != 0x10) {
2003 		stmf_scsilib_send_status(task, STATUS_CHECK,
2004 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2005 		return;
2006 	}
2007 
2008 	if (cmd_xfer_len == 0) {
2009 		/* zero byte mode selects are allowed */
2010 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2011 		return;
2012 	}
2013 
2014 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2015 }
2016 
2017 void
2018 sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2019 {
2020 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2021 	sbd_it_data_t *it;
2022 	int hdr_len, bd_len;
2023 	sbd_status_t sret;
2024 	int i;
2025 
2026 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2027 		hdr_len = 4;
2028 	} else {
2029 		hdr_len = 8;
2030 	}
2031 
2032 	if (buflen < hdr_len)
2033 		goto mode_sel_param_len_err;
2034 
2035 	bd_len = hdr_len == 4 ? buf[3] : READ_SCSI16(&buf[6], int);
2036 
2037 	if (buflen < (hdr_len + bd_len + 2))
2038 		goto mode_sel_param_len_err;
2039 
2040 	buf += hdr_len + bd_len;
2041 	buflen -= hdr_len + bd_len;
2042 
2043 	if ((buf[0] != 8) || (buflen != ((uint32_t)buf[1] + 2))) {
2044 		goto mode_sel_param_len_err;
2045 	}
2046 
2047 	if (buf[2] & 0xFB) {
2048 		goto mode_sel_param_field_err;
2049 	}
2050 
2051 	for (i = 3; i < (buf[1] + 2); i++) {
2052 		if (buf[i]) {
2053 			goto mode_sel_param_field_err;
2054 		}
2055 	}
2056 
2057 	sret = SBD_SUCCESS;
2058 
2059 	/* All good. Lets handle the write cache change, if any */
2060 	if (buf[2] & BIT_2) {
2061 		sret = sbd_wcd_set(0, sl);
2062 	} else {
2063 		sret = sbd_wcd_set(1, sl);
2064 	}
2065 
2066 	if (sret != SBD_SUCCESS) {
2067 		stmf_scsilib_send_status(task, STATUS_CHECK,
2068 		    STMF_SAA_WRITE_ERROR);
2069 		return;
2070 	}
2071 
2072 	/* set on the device passed, now set the flags */
2073 	mutex_enter(&sl->sl_lock);
2074 	if (buf[2] & BIT_2) {
2075 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
2076 	} else {
2077 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2078 	}
2079 
2080 	for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2081 		if (it == task->task_lu_itl_handle)
2082 			continue;
2083 		it->sbd_it_ua_conditions |= SBD_UA_MODE_PARAMETERS_CHANGED;
2084 	}
2085 
2086 	if (task->task_cdb[1] & 1) {
2087 		if (buf[2] & BIT_2) {
2088 			sl->sl_flags &= ~SL_SAVED_WRITE_CACHE_DISABLE;
2089 		} else {
2090 			sl->sl_flags |= SL_SAVED_WRITE_CACHE_DISABLE;
2091 		}
2092 		mutex_exit(&sl->sl_lock);
2093 		sret = sbd_write_lu_info(sl);
2094 	} else {
2095 		mutex_exit(&sl->sl_lock);
2096 	}
2097 	if (sret == SBD_SUCCESS) {
2098 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2099 	} else {
2100 		stmf_scsilib_send_status(task, STATUS_CHECK,
2101 		    STMF_SAA_WRITE_ERROR);
2102 	}
2103 	return;
2104 
2105 mode_sel_param_len_err:
2106 	stmf_scsilib_send_status(task, STATUS_CHECK,
2107 	    STMF_SAA_PARAM_LIST_LENGTH_ERROR);
2108 	return;
2109 mode_sel_param_field_err:
2110 	stmf_scsilib_send_status(task, STATUS_CHECK,
2111 	    STMF_SAA_INVALID_FIELD_IN_PARAM_LIST);
2112 }
2113 
2114 /*
2115  * Command support added from SPC-4 r24
2116  * Supports info type 0, 2, 127
2117  */
2118 void
2119 sbd_handle_identifying_info(struct scsi_task *task,
2120     stmf_data_buf_t *initial_dbuf)
2121 {
2122 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2123 	uint8_t *cdb;
2124 	uint32_t cmd_size;
2125 	uint32_t param_len;
2126 	uint32_t xfer_size;
2127 	uint8_t info_type;
2128 	uint8_t *buf, *p;
2129 
2130 	cdb = &task->task_cdb[0];
2131 	cmd_size = READ_SCSI32(&cdb[6], uint32_t);
2132 	info_type = cdb[10]>>1;
2133 
2134 	/* Validate the command */
2135 	if (cmd_size < 4) {
2136 		stmf_scsilib_send_status(task, STATUS_CHECK,
2137 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2138 		return;
2139 	}
2140 
2141 	p = buf = kmem_zalloc(260, KM_SLEEP);
2142 
2143 	switch (info_type) {
2144 		case 0:
2145 			/*
2146 			 * No value is supplied but this info type
2147 			 * is mandatory.
2148 			 */
2149 			xfer_size = 4;
2150 			break;
2151 		case 2:
2152 			mutex_enter(&sl->sl_lock);
2153 			param_len = strlcpy((char *)(p+4), sl->sl_alias, 256);
2154 			mutex_exit(&sl->sl_lock);
2155 			/* text info must be null terminated */
2156 			if (++param_len > 256)
2157 				param_len = 256;
2158 			SCSI_WRITE16(p+2, param_len);
2159 			xfer_size = param_len + 4;
2160 			break;
2161 		case 127:
2162 			/* 0 and 2 descriptor supported */
2163 			SCSI_WRITE16(p+2, 8); /* set param length */
2164 			p += 8;
2165 			*p = 4; /* set type to 2 (7 hi bits) */
2166 			p += 2;
2167 			SCSI_WRITE16(p, 256); /* 256 max length */
2168 			xfer_size = 12;
2169 			break;
2170 		default:
2171 			stmf_scsilib_send_status(task, STATUS_CHECK,
2172 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2173 			kmem_free(buf, 260);
2174 			return;
2175 	}
2176 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2177 	    cmd_size, xfer_size);
2178 	kmem_free(buf, 260);
2179 }
2180 
2181 /*
2182  * This function parse through a string, passed to it as a pointer to a string,
2183  * by adjusting the pointer to the first non-space character and returns
2184  * the count/length of the first bunch of non-space characters. Multiple
2185  * Management URLs are stored as a space delimited string in sl_mgmt_url
2186  * field of sbd_lu_t. This function is used to retrieve one url at a time.
2187  *
2188  * i/p : pointer to pointer to a url string
2189  * o/p : Adjust the pointer to the url to the first non white character
2190  *       and returns the length of the URL
2191  */
2192 uint16_t
2193 sbd_parse_mgmt_url(char **url_addr)
2194 {
2195 	uint16_t url_length = 0;
2196 	char *url;
2197 	url = *url_addr;
2198 
2199 	while (*url != '\0') {
2200 		if (*url == ' ' || *url == '\t' || *url == '\n') {
2201 			(*url_addr)++;
2202 			url = *url_addr;
2203 		} else {
2204 			break;
2205 		}
2206 	}
2207 
2208 	while (*url != '\0') {
2209 		if (*url == ' ' || *url == '\t' ||
2210 		    *url == '\n' || *url == '\0') {
2211 			break;
2212 		}
2213 		url++;
2214 		url_length++;
2215 	}
2216 	return (url_length);
2217 }
2218 
2219 /* Try to make this the size of a kmem allocation cache. */
2220 static uint_t sbd_write_same_optimal_chunk = 128 * 1024;
2221 
2222 static sbd_status_t
2223 sbd_write_same_data(struct scsi_task *task, sbd_cmd_t *scmd)
2224 {
2225 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2226 	uint64_t addr, len, sz_done;
2227 	uint32_t big_buf_size, xfer_size, off;
2228 	uint8_t *big_buf;
2229 	sbd_status_t ret;
2230 
2231 	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2232 		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2233 		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2234 	} else {
2235 		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2236 		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2237 	}
2238 	addr <<= sl->sl_data_blocksize_shift;
2239 	len <<= sl->sl_data_blocksize_shift;
2240 
2241 	/*
2242 	 * Reminders:
2243 	 *    "len" is total size of what we wish to "write same".
2244 	 *
2245 	 *    xfer_size will be scmd->trans_data_len, which is the length
2246 	 *    of the pattern we wish to replicate over "len".  We replicate
2247 	 *    "xfer_size" of pattern over "len".
2248 	 *
2249 	 *    big_buf_size is set to an ideal actual-write size for an output
2250 	 *    operation.  It may be the same as "len".  If it's not, it should
2251 	 *    be an exact multiple of "xfer_size" so we don't get pattern
2252 	 *    breakage until the very end of "len".
2253 	 */
2254 	big_buf_size = len > sbd_write_same_optimal_chunk ?
2255 	    sbd_write_same_optimal_chunk : (uint32_t)len;
2256 	xfer_size = scmd->trans_data_len;
2257 
2258 	/*
2259 	 * All transfers should be an integral multiple of the sector size.
2260 	 */
2261 	ASSERT((big_buf_size % xfer_size) == 0);
2262 
2263 	/*
2264 	 * Don't sleep for the allocation, and don't make the system
2265 	 * reclaim memory.  Trade higher I/Os if in a low-memory situation.
2266 	 */
2267 	big_buf = kmem_alloc(big_buf_size, KM_NOSLEEP | KM_NORMALPRI);
2268 
2269 	if (big_buf == NULL) {
2270 		/*
2271 		 * Just send it in terms of of the transmitted data.  This
2272 		 * will be very slow.
2273 		 */
2274 		DTRACE_PROBE1(write__same__low__memory, uint64_t, big_buf_size);
2275 		big_buf = scmd->trans_data;
2276 		big_buf_size = scmd->trans_data_len;
2277 	} else {
2278 		/*
2279 		 * We already ASSERT()ed big_buf_size is an integral multiple
2280 		 * of xfer_size.
2281 		 */
2282 		for (off = 0; off < big_buf_size; off += xfer_size)
2283 			bcopy(scmd->trans_data, big_buf + off, xfer_size);
2284 	}
2285 
2286 	/* Do the actual I/O.  Recycle xfer_size now to be write size. */
2287 	DTRACE_PROBE1(write__same__io__begin, uint64_t, len);
2288 	for (sz_done = 0; sz_done < len; sz_done += (uint64_t)xfer_size) {
2289 		xfer_size = ((big_buf_size + sz_done) <= len) ? big_buf_size :
2290 		    len - sz_done;
2291 		ret = sbd_data_write(sl, task, addr + sz_done,
2292 		    (uint64_t)xfer_size, big_buf);
2293 		if (ret != SBD_SUCCESS)
2294 			break;
2295 	}
2296 	DTRACE_PROBE2(write__same__io__end, uint64_t, len, uint64_t, sz_done);
2297 
2298 	if (big_buf != scmd->trans_data)
2299 		kmem_free(big_buf, big_buf_size);
2300 
2301 	return (ret);
2302 }
2303 
2304 static void
2305 sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
2306     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2307 {
2308 	uint64_t laddr;
2309 	uint32_t buflen, iolen;
2310 	int ndx, ret;
2311 
2312 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
2313 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2314 		    dbuf->db_xfer_status, NULL);
2315 		return;
2316 	}
2317 
2318 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2319 		goto write_same_xfer_done;
2320 	}
2321 
2322 	if (scmd->len != 0) {
2323 		/*
2324 		 * Initiate the next port xfer to occur in parallel
2325 		 * with writing this buf.
2326 		 */
2327 		sbd_do_write_same_xfer(task, scmd, NULL, 0);
2328 	}
2329 
2330 	laddr = dbuf->db_relative_offset;
2331 
2332 	for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
2333 	    (ndx < dbuf->db_sglist_length); ndx++) {
2334 		iolen = min(dbuf->db_data_size - buflen,
2335 		    dbuf->db_sglist[ndx].seg_length);
2336 		if (iolen == 0)
2337 			break;
2338 		bcopy(dbuf->db_sglist[ndx].seg_addr, &scmd->trans_data[laddr],
2339 		    iolen);
2340 		buflen += iolen;
2341 		laddr += (uint64_t)iolen;
2342 	}
2343 	task->task_nbytes_transferred += buflen;
2344 
2345 write_same_xfer_done:
2346 	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2347 		stmf_free_dbuf(task, dbuf);
2348 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
2349 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2350 			stmf_scsilib_send_status(task, STATUS_CHECK,
2351 			    STMF_SAA_WRITE_ERROR);
2352 		} else {
2353 			ret = sbd_write_same_data(task, scmd);
2354 			if (ret != SBD_SUCCESS) {
2355 				stmf_scsilib_send_status(task, STATUS_CHECK,
2356 				    STMF_SAA_WRITE_ERROR);
2357 			} else {
2358 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2359 			}
2360 		}
2361 		/*
2362 		 * Only way we should get here is via handle_write_same(),
2363 		 * and that should make the following assertion always pass.
2364 		 */
2365 		ASSERT((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) &&
2366 		    scmd->trans_data != NULL);
2367 		kmem_free(scmd->trans_data, scmd->trans_data_len);
2368 		scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
2369 		return;
2370 	}
2371 	sbd_do_write_same_xfer(task, scmd, dbuf, dbuf_reusable);
2372 }
2373 
2374 static void
2375 sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
2376     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2377 {
2378 	uint32_t len;
2379 
2380 	if (scmd->len == 0) {
2381 		if (dbuf != NULL)
2382 			stmf_free_dbuf(task, dbuf);
2383 		return;
2384 	}
2385 
2386 	if ((dbuf != NULL) &&
2387 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
2388 		/* free current dbuf and allocate a new one */
2389 		stmf_free_dbuf(task, dbuf);
2390 		dbuf = NULL;
2391 	}
2392 	if (dbuf == NULL) {
2393 		uint32_t maxsize, minsize, old_minsize;
2394 
2395 		maxsize = (scmd->len > (128*1024)) ? 128*1024 :
2396 		    scmd->len;
2397 		minsize = maxsize >> 2;
2398 		do {
2399 			old_minsize = minsize;
2400 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
2401 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
2402 		    (minsize >= 512));
2403 		if (dbuf == NULL) {
2404 			if (scmd->nbufs == 0) {
2405 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2406 				    STMF_ALLOC_FAILURE, NULL);
2407 			}
2408 			return;
2409 		}
2410 	}
2411 
2412 	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
2413 	    scmd->len;
2414 
2415 	dbuf->db_relative_offset = scmd->current_ro;
2416 	dbuf->db_data_size = len;
2417 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
2418 	(void) stmf_xfer_data(task, dbuf, 0);
2419 	scmd->nbufs++; /* outstanding port xfers and bufs used */
2420 	scmd->len -= len;
2421 	scmd->current_ro += len;
2422 }
2423 
2424 static void
2425 sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
2426 {
2427 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2428 	uint64_t addr, len;
2429 	sbd_cmd_t *scmd;
2430 	stmf_data_buf_t *dbuf;
2431 	uint8_t unmap;
2432 	uint8_t do_immediate_data = 0;
2433 
2434 	task->task_cmd_xfer_length = 0;
2435 	if (task->task_additional_flags &
2436 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2437 		task->task_expected_xfer_length = 0;
2438 	}
2439 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
2440 		stmf_scsilib_send_status(task, STATUS_CHECK,
2441 		    STMF_SAA_WRITE_PROTECTED);
2442 		return;
2443 	}
2444 	if (task->task_cdb[1] & 0xF7) {
2445 		stmf_scsilib_send_status(task, STATUS_CHECK,
2446 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2447 		return;
2448 	}
2449 	unmap = task->task_cdb[1] & 0x08;
2450 	if (unmap && ((sl->sl_flags & SL_UNMAP_ENABLED) == 0)) {
2451 		stmf_scsilib_send_status(task, STATUS_CHECK,
2452 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2453 		return;
2454 	}
2455 	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2456 		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2457 		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2458 	} else {
2459 		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2460 		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2461 	}
2462 	if (len == 0) {
2463 		stmf_scsilib_send_status(task, STATUS_CHECK,
2464 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2465 		return;
2466 	}
2467 	addr <<= sl->sl_data_blocksize_shift;
2468 	len <<= sl->sl_data_blocksize_shift;
2469 
2470 	/* Check if the command is for the unmap function */
2471 	if (unmap) {
2472 		dkioc_free_list_t *dfl = kmem_zalloc(DFL_SZ(1), KM_SLEEP);
2473 
2474 		dfl->dfl_num_exts = 1;
2475 		dfl->dfl_exts[0].dfle_start = addr;
2476 		dfl->dfl_exts[0].dfle_length = len;
2477 		if (sbd_unmap(sl, dfl) != 0) {
2478 			stmf_scsilib_send_status(task, STATUS_CHECK,
2479 			    STMF_SAA_LBA_OUT_OF_RANGE);
2480 		} else {
2481 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2482 		}
2483 		dfl_free(dfl);
2484 		return;
2485 	}
2486 
2487 	/* Write same function */
2488 
2489 	task->task_cmd_xfer_length = 1 << sl->sl_data_blocksize_shift;
2490 	if (task->task_additional_flags &
2491 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2492 		task->task_expected_xfer_length = task->task_cmd_xfer_length;
2493 	}
2494 	if ((addr + len) > sl->sl_lu_size) {
2495 		stmf_scsilib_send_status(task, STATUS_CHECK,
2496 		    STMF_SAA_LBA_OUT_OF_RANGE);
2497 		return;
2498 	}
2499 
2500 	/* For rest of this I/O the transfer length is 1 block */
2501 	len = ((uint64_t)1) << sl->sl_data_blocksize_shift;
2502 
2503 	/* Some basic checks */
2504 	if ((len == 0) || (len != task->task_expected_xfer_length)) {
2505 		stmf_scsilib_send_status(task, STATUS_CHECK,
2506 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2507 		return;
2508 	}
2509 
2510 
2511 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
2512 		if (initial_dbuf->db_data_size > len) {
2513 			if (initial_dbuf->db_data_size >
2514 			    task->task_expected_xfer_length) {
2515 				/* protocol error */
2516 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2517 				    STMF_INVALID_ARG, NULL);
2518 				return;
2519 			}
2520 			initial_dbuf->db_data_size = (uint32_t)len;
2521 		}
2522 		do_immediate_data = 1;
2523 	}
2524 	dbuf = initial_dbuf;
2525 
2526 	if (task->task_lu_private) {
2527 		scmd = (sbd_cmd_t *)task->task_lu_private;
2528 	} else {
2529 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
2530 		task->task_lu_private = scmd;
2531 	}
2532 	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA;
2533 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
2534 	scmd->nbufs = 0;
2535 	scmd->len = (uint32_t)len;
2536 	scmd->trans_data_len = (uint32_t)len;
2537 	scmd->trans_data = kmem_alloc((size_t)len, KM_SLEEP);
2538 	scmd->current_ro = 0;
2539 
2540 	if (do_immediate_data) {
2541 		/*
2542 		 * Account for data passed in this write command
2543 		 */
2544 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
2545 		scmd->len -= dbuf->db_data_size;
2546 		scmd->current_ro += dbuf->db_data_size;
2547 		dbuf->db_xfer_status = STMF_SUCCESS;
2548 		sbd_handle_write_same_xfer_completion(task, scmd, dbuf, 0);
2549 	} else {
2550 		sbd_do_write_same_xfer(task, scmd, dbuf, 0);
2551 	}
2552 }
2553 
2554 static void
2555 sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf)
2556 {
2557 	uint32_t cmd_xfer_len;
2558 
2559 	cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2560 
2561 	if (task->task_cdb[1] & 1) {
2562 		stmf_scsilib_send_status(task, STATUS_CHECK,
2563 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2564 		return;
2565 	}
2566 
2567 	if (cmd_xfer_len == 0) {
2568 		task->task_cmd_xfer_length = 0;
2569 		if (task->task_additional_flags &
2570 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2571 			task->task_expected_xfer_length = 0;
2572 		}
2573 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2574 		return;
2575 	}
2576 
2577 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2578 }
2579 
2580 static void
2581 sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2582 {
2583 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2584 	uint32_t ulen, dlen, num_desc;
2585 	uint64_t addr, len;
2586 	uint8_t *p;
2587 	dkioc_free_list_t *dfl;
2588 	int ret;
2589 	int i;
2590 
2591 	if (buflen < 24) {
2592 		stmf_scsilib_send_status(task, STATUS_CHECK,
2593 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2594 		return;
2595 	}
2596 	ulen = READ_SCSI16(buf, uint32_t);
2597 	dlen = READ_SCSI16(buf + 2, uint32_t);
2598 	num_desc = dlen >> 4;
2599 	if (((ulen + 2) != buflen) || ((dlen + 8) != buflen) || (dlen & 0xf) ||
2600 	    (num_desc == 0)) {
2601 		stmf_scsilib_send_status(task, STATUS_CHECK,
2602 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2603 		return;
2604 	}
2605 
2606 	dfl = kmem_zalloc(DFL_SZ(num_desc), KM_SLEEP);
2607 	dfl->dfl_num_exts = num_desc;
2608 	for (p = buf + 8, i = 0; num_desc; num_desc--, p += 16, i++) {
2609 		addr = READ_SCSI64(p, uint64_t);
2610 		addr <<= sl->sl_data_blocksize_shift;
2611 		len = READ_SCSI32(p+8, uint64_t);
2612 		len <<= sl->sl_data_blocksize_shift;
2613 		/* Prepare a list of extents to unmap */
2614 		dfl->dfl_exts[i].dfle_start = addr;
2615 		dfl->dfl_exts[i].dfle_length = len;
2616 	}
2617 	ASSERT(i == dfl->dfl_num_exts);
2618 
2619 	/* Finally execute the unmap operations in a single step */
2620 	ret = sbd_unmap(sl, dfl);
2621 	dfl_free(dfl);
2622 	if (ret != 0) {
2623 		stmf_scsilib_send_status(task, STATUS_CHECK,
2624 		    STMF_SAA_LBA_OUT_OF_RANGE);
2625 		return;
2626 	}
2627 
2628 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2629 }
2630 
2631 void
2632 sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
2633 {
2634 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2635 	uint8_t *cdbp = (uint8_t *)&task->task_cdb[0];
2636 	uint8_t *p;
2637 	uint8_t byte0;
2638 	uint8_t page_length;
2639 	uint16_t bsize = 512;
2640 	uint16_t cmd_size;
2641 	uint32_t xfer_size = 4;
2642 	uint32_t mgmt_url_size = 0;
2643 	uint8_t exp;
2644 	uint64_t s;
2645 	char *mgmt_url = NULL;
2646 
2647 
2648 	byte0 = DTYPE_DIRECT;
2649 	/*
2650 	 * Basic protocol checks.
2651 	 */
2652 
2653 	if ((((cdbp[1] & 1) == 0) && cdbp[2]) || cdbp[5]) {
2654 		stmf_scsilib_send_status(task, STATUS_CHECK,
2655 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2656 		return;
2657 	}
2658 
2659 	/*
2660 	 * Zero byte allocation length is not an error.  Just
2661 	 * return success.
2662 	 */
2663 
2664 	cmd_size = (((uint16_t)cdbp[3]) << 8) | cdbp[4];
2665 
2666 	if (cmd_size == 0) {
2667 		task->task_cmd_xfer_length = 0;
2668 		if (task->task_additional_flags &
2669 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2670 			task->task_expected_xfer_length = 0;
2671 		}
2672 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2673 		return;
2674 	}
2675 
2676 	/*
2677 	 * Standard inquiry
2678 	 */
2679 
2680 	if ((cdbp[1] & 1) == 0) {
2681 		int	i;
2682 		struct scsi_inquiry *inq;
2683 
2684 		p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2685 		inq = (struct scsi_inquiry *)p;
2686 
2687 		page_length = 69;
2688 		xfer_size = page_length + 5;
2689 
2690 		inq->inq_dtype = DTYPE_DIRECT;
2691 		inq->inq_ansi = 5;	/* SPC-3 */
2692 		inq->inq_hisup = 1;
2693 		inq->inq_rdf = 2;	/* Response data format for SPC-3 */
2694 		inq->inq_len = page_length;
2695 
2696 		inq->inq_tpgs = TPGS_FAILOVER_IMPLICIT;
2697 		inq->inq_cmdque = 1;
2698 
2699 		if (sl->sl_flags & SL_VID_VALID) {
2700 			bcopy(sl->sl_vendor_id, inq->inq_vid, 8);
2701 		} else {
2702 			bcopy(sbd_vendor_id, inq->inq_vid, 8);
2703 		}
2704 
2705 		if (sl->sl_flags & SL_PID_VALID) {
2706 			bcopy(sl->sl_product_id, inq->inq_pid, 16);
2707 		} else {
2708 			bcopy(sbd_product_id, inq->inq_pid, 16);
2709 		}
2710 
2711 		if (sl->sl_flags & SL_REV_VALID) {
2712 			bcopy(sl->sl_revision, inq->inq_revision, 4);
2713 		} else {
2714 			bcopy(sbd_revision, inq->inq_revision, 4);
2715 		}
2716 
2717 		/* Adding Version Descriptors */
2718 		i = 0;
2719 		/* SAM-3 no version */
2720 		inq->inq_vd[i].inq_vd_msb = 0x00;
2721 		inq->inq_vd[i].inq_vd_lsb = 0x60;
2722 		i++;
2723 
2724 		/* transport */
2725 		switch (task->task_lport->lport_id->protocol_id) {
2726 		case PROTOCOL_FIBRE_CHANNEL:
2727 			inq->inq_vd[i].inq_vd_msb = 0x09;
2728 			inq->inq_vd[i].inq_vd_lsb = 0x00;
2729 			i++;
2730 			break;
2731 
2732 		case PROTOCOL_PARALLEL_SCSI:
2733 		case PROTOCOL_SSA:
2734 		case PROTOCOL_IEEE_1394:
2735 			/* Currently no claims of conformance */
2736 			break;
2737 
2738 		case PROTOCOL_SRP:
2739 			inq->inq_vd[i].inq_vd_msb = 0x09;
2740 			inq->inq_vd[i].inq_vd_lsb = 0x40;
2741 			i++;
2742 			break;
2743 
2744 		case PROTOCOL_iSCSI:
2745 			inq->inq_vd[i].inq_vd_msb = 0x09;
2746 			inq->inq_vd[i].inq_vd_lsb = 0x60;
2747 			i++;
2748 			break;
2749 
2750 		case PROTOCOL_SAS:
2751 		case PROTOCOL_ADT:
2752 		case PROTOCOL_ATAPI:
2753 		default:
2754 			/* Currently no claims of conformance */
2755 			break;
2756 		}
2757 
2758 		/* SPC-3 no version */
2759 		inq->inq_vd[i].inq_vd_msb = 0x03;
2760 		inq->inq_vd[i].inq_vd_lsb = 0x00;
2761 		i++;
2762 
2763 		/* SBC-2 no version */
2764 		inq->inq_vd[i].inq_vd_msb = 0x03;
2765 		inq->inq_vd[i].inq_vd_lsb = 0x20;
2766 
2767 		sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2768 		    min(cmd_size, xfer_size));
2769 		kmem_free(p, bsize);
2770 
2771 		return;
2772 	}
2773 
2774 	rw_enter(&sbd_global_prop_lock, RW_READER);
2775 	if (sl->sl_mgmt_url) {
2776 		mgmt_url_size = strlen(sl->sl_mgmt_url);
2777 		mgmt_url = sl->sl_mgmt_url;
2778 	} else if (sbd_mgmt_url) {
2779 		mgmt_url_size = strlen(sbd_mgmt_url);
2780 		mgmt_url = sbd_mgmt_url;
2781 	}
2782 
2783 	/*
2784 	 * EVPD handling
2785 	 */
2786 
2787 	/* Default 512 bytes may not be enough, increase bsize if necessary */
2788 	if (cdbp[2] == 0x83 || cdbp[2] == 0x85) {
2789 		if (bsize <  cmd_size)
2790 			bsize = cmd_size;
2791 	}
2792 	p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2793 
2794 	switch (cdbp[2]) {
2795 	case 0x00:
2796 		page_length = 4 + (mgmt_url_size ? 1 : 0);
2797 		if (sl->sl_flags & SL_UNMAP_ENABLED)
2798 			page_length += 2;
2799 
2800 		p[0] = byte0;
2801 		p[3] = page_length;
2802 		/* Supported VPD pages in ascending order */
2803 		{
2804 			uint8_t i = 5;
2805 
2806 			p[i++] = 0x80;
2807 			p[i++] = 0x83;
2808 			if (mgmt_url_size != 0)
2809 				p[i++] = 0x85;
2810 			p[i++] = 0x86;
2811 			if (sl->sl_flags & SL_UNMAP_ENABLED) {
2812 				p[i++] = 0xb0;
2813 				p[i++] = 0xb2;
2814 			}
2815 		}
2816 		xfer_size = page_length + 4;
2817 		break;
2818 
2819 	case 0x80:
2820 		if (sl->sl_serial_no_size) {
2821 			page_length = sl->sl_serial_no_size;
2822 			bcopy(sl->sl_serial_no, p + 4, sl->sl_serial_no_size);
2823 		} else {
2824 			/* if no serial num is specified set 4 spaces */
2825 			page_length = 4;
2826 			bcopy("    ", p + 4, 4);
2827 		}
2828 		p[0] = byte0;
2829 		p[1] = 0x80;
2830 		p[3] = page_length;
2831 		xfer_size = page_length + 4;
2832 		break;
2833 
2834 	case 0x83:
2835 		xfer_size = stmf_scsilib_prepare_vpd_page83(task, p,
2836 		    bsize, byte0, STMF_VPD_LU_ID|STMF_VPD_TARGET_ID|
2837 		    STMF_VPD_TP_GROUP|STMF_VPD_RELATIVE_TP_ID);
2838 		break;
2839 
2840 	case 0x85:
2841 		if (mgmt_url_size == 0) {
2842 			stmf_scsilib_send_status(task, STATUS_CHECK,
2843 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2844 			goto err_done;
2845 		}
2846 		{
2847 			uint16_t idx, newidx, sz, url_size;
2848 			char *url;
2849 
2850 			p[0] = byte0;
2851 			p[1] = 0x85;
2852 
2853 			idx = 4;
2854 			url = mgmt_url;
2855 			url_size = sbd_parse_mgmt_url(&url);
2856 			/* Creating Network Service Descriptors */
2857 			while (url_size != 0) {
2858 				/* Null terminated and 4 Byte aligned */
2859 				sz = url_size + 1;
2860 				sz += (sz % 4) ? 4 - (sz % 4) : 0;
2861 				newidx = idx + sz + 4;
2862 
2863 				if (newidx < bsize) {
2864 					/*
2865 					 * SPC-3r23 : Table 320  (Sec 7.6.5)
2866 					 * (Network service descriptor format
2867 					 *
2868 					 * Note: Hard coding service type as
2869 					 * "Storage Configuration Service".
2870 					 */
2871 					p[idx] = 1;
2872 					SCSI_WRITE16(p + idx + 2, sz);
2873 					bcopy(url, p + idx + 4, url_size);
2874 					xfer_size = newidx + 4;
2875 				}
2876 				idx = newidx;
2877 
2878 				/* skip to next mgmt url if any */
2879 				url += url_size;
2880 				url_size = sbd_parse_mgmt_url(&url);
2881 			}
2882 
2883 			/* Total descriptor length */
2884 			SCSI_WRITE16(p + 2, idx - 4);
2885 			break;
2886 		}
2887 
2888 	case 0x86:
2889 		page_length = 0x3c;
2890 
2891 		p[0] = byte0;
2892 		p[1] = 0x86;		/* Page 86 response */
2893 		p[3] = page_length;
2894 
2895 		/*
2896 		 * Bits 0, 1, and 2 will need to be updated
2897 		 * to reflect the queue tag handling if/when
2898 		 * that is implemented.  For now, we're going
2899 		 * to claim support only for Simple TA.
2900 		 */
2901 		p[5] = 1;
2902 		xfer_size = page_length + 4;
2903 		break;
2904 
2905 	case 0xb0:
2906 		if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
2907 			stmf_scsilib_send_status(task, STATUS_CHECK,
2908 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2909 			goto err_done;
2910 		}
2911 		page_length = 0x3c;
2912 		p[0] = byte0;
2913 		p[1] = 0xb0;
2914 		p[3] = page_length;
2915 		p[20] = p[21] = p[22] = p[23] = 0xFF;
2916 		p[24] = p[25] = p[26] = p[27] = 0xFF;
2917 		xfer_size = page_length + 4;
2918 		break;
2919 
2920 	case 0xb2:
2921 		if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
2922 			stmf_scsilib_send_status(task, STATUS_CHECK,
2923 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2924 			goto err_done;
2925 		}
2926 		page_length = 4;
2927 		p[0] = byte0;
2928 		p[1] = 0xb2;
2929 		p[3] = page_length;
2930 
2931 		exp = (uint8_t)sl->sl_data_blocksize_shift;
2932 		s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
2933 		while (s & ((uint64_t)0xFFFFFFFF80000000ull)) {
2934 			s >>= 1;
2935 			exp++;
2936 		}
2937 		p[4] = exp;
2938 		p[5] = 0xc0;
2939 		xfer_size = page_length + 4;
2940 		break;
2941 
2942 	default:
2943 		stmf_scsilib_send_status(task, STATUS_CHECK,
2944 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2945 		goto err_done;
2946 	}
2947 
2948 	sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2949 	    min(cmd_size, xfer_size));
2950 err_done:
2951 	kmem_free(p, bsize);
2952 	rw_exit(&sbd_global_prop_lock);
2953 }
2954 
2955 stmf_status_t
2956 sbd_task_alloc(struct scsi_task *task)
2957 {
2958 	if ((task->task_lu_private =
2959 	    kmem_alloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
2960 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
2961 		scmd->flags = 0;
2962 		return (STMF_SUCCESS);
2963 	}
2964 	return (STMF_ALLOC_FAILURE);
2965 }
2966 
2967 void
2968 sbd_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *it)
2969 {
2970 	sbd_it_data_t **ppit;
2971 
2972 	sbd_pgr_remove_it_handle(sl, it);
2973 	mutex_enter(&sl->sl_lock);
2974 	for (ppit = &sl->sl_it_list; *ppit != NULL;
2975 	    ppit = &((*ppit)->sbd_it_next)) {
2976 		if ((*ppit) == it) {
2977 			*ppit = it->sbd_it_next;
2978 			break;
2979 		}
2980 	}
2981 	mutex_exit(&sl->sl_lock);
2982 
2983 	DTRACE_PROBE2(itl__nexus__end, stmf_lu_t *, sl->sl_lu,
2984 	    sbd_it_data_t *, it);
2985 
2986 	kmem_free(it, sizeof (*it));
2987 }
2988 
2989 void
2990 sbd_check_and_clear_scsi2_reservation(sbd_lu_t *sl, sbd_it_data_t *it)
2991 {
2992 	mutex_enter(&sl->sl_lock);
2993 	if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) == 0) {
2994 		/* If we dont have any reservations, just get out. */
2995 		mutex_exit(&sl->sl_lock);
2996 		return;
2997 	}
2998 
2999 	if (it == NULL) {
3000 		/* Find the I_T nexus which is holding the reservation. */
3001 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3002 			if (it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) {
3003 				ASSERT(it->sbd_it_session_id ==
3004 				    sl->sl_rs_owner_session_id);
3005 				break;
3006 			}
3007 		}
3008 		ASSERT(it != NULL);
3009 	} else {
3010 		/*
3011 		 * We were passed an I_T nexus. If this nexus does not hold
3012 		 * the reservation, do nothing. This is why this function is
3013 		 * called "check_and_clear".
3014 		 */
3015 		if ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0) {
3016 			mutex_exit(&sl->sl_lock);
3017 			return;
3018 		}
3019 	}
3020 	it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3021 	sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3022 	mutex_exit(&sl->sl_lock);
3023 }
3024 
3025 
3026 
3027 void
3028 sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
3029 {
3030 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3031 	sbd_it_data_t *it;
3032 	uint8_t cdb0, cdb1;
3033 	stmf_status_t st_ret;
3034 
3035 	if ((it = task->task_lu_itl_handle) == NULL) {
3036 		mutex_enter(&sl->sl_lock);
3037 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3038 			if (it->sbd_it_session_id ==
3039 			    task->task_session->ss_session_id) {
3040 				mutex_exit(&sl->sl_lock);
3041 				stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3042 				return;
3043 			}
3044 		}
3045 		it = (sbd_it_data_t *)kmem_zalloc(sizeof (*it), KM_NOSLEEP);
3046 		if (it == NULL) {
3047 			mutex_exit(&sl->sl_lock);
3048 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3049 			return;
3050 		}
3051 		it->sbd_it_session_id = task->task_session->ss_session_id;
3052 		bcopy(task->task_lun_no, it->sbd_it_lun, 8);
3053 		it->sbd_it_next = sl->sl_it_list;
3054 		sl->sl_it_list = it;
3055 		mutex_exit(&sl->sl_lock);
3056 
3057 		DTRACE_PROBE1(itl__nexus__start, scsi_task *, task);
3058 
3059 		sbd_pgr_initialize_it(task, it);
3060 		if (stmf_register_itl_handle(task->task_lu, task->task_lun_no,
3061 		    task->task_session, it->sbd_it_session_id, it)
3062 		    != STMF_SUCCESS) {
3063 			sbd_remove_it_handle(sl, it);
3064 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3065 			return;
3066 		}
3067 		task->task_lu_itl_handle = it;
3068 		if (sl->sl_access_state != SBD_LU_STANDBY) {
3069 			it->sbd_it_ua_conditions = SBD_UA_POR;
3070 		}
3071 	} else if (it->sbd_it_flags & SBD_IT_PGR_CHECK_FLAG) {
3072 		mutex_enter(&sl->sl_lock);
3073 		it->sbd_it_flags &= ~SBD_IT_PGR_CHECK_FLAG;
3074 		mutex_exit(&sl->sl_lock);
3075 		sbd_pgr_initialize_it(task, it);
3076 	}
3077 
3078 	if (task->task_mgmt_function) {
3079 		stmf_scsilib_handle_task_mgmt(task);
3080 		return;
3081 	}
3082 
3083 	/*
3084 	 * if we're transitioning between access
3085 	 * states, return NOT READY
3086 	 */
3087 	if (sl->sl_access_state == SBD_LU_TRANSITION_TO_STANDBY ||
3088 	    sl->sl_access_state == SBD_LU_TRANSITION_TO_ACTIVE) {
3089 		stmf_scsilib_send_status(task, STATUS_CHECK,
3090 		    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
3091 		return;
3092 	}
3093 
3094 	/* Checking ua conditions as per SAM3R14 5.3.2 specified order */
3095 	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
3096 		uint32_t saa = 0;
3097 
3098 		mutex_enter(&sl->sl_lock);
3099 		if (it->sbd_it_ua_conditions & SBD_UA_POR) {
3100 			it->sbd_it_ua_conditions &= ~SBD_UA_POR;
3101 			saa = STMF_SAA_POR;
3102 		}
3103 		mutex_exit(&sl->sl_lock);
3104 		if (saa) {
3105 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3106 			return;
3107 		}
3108 	}
3109 
3110 	/* Reservation conflict checks */
3111 	if (sl->sl_access_state == SBD_LU_ACTIVE) {
3112 		if (SBD_PGR_RSVD(sl->sl_pgr)) {
3113 			if (sbd_pgr_reservation_conflict(task)) {
3114 				stmf_scsilib_send_status(task,
3115 				    STATUS_RESERVATION_CONFLICT, 0);
3116 				return;
3117 			}
3118 		} else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
3119 		    ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
3120 			if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
3121 				stmf_scsilib_send_status(task,
3122 				    STATUS_RESERVATION_CONFLICT, 0);
3123 				return;
3124 			}
3125 		}
3126 	}
3127 
3128 	/* Rest of the ua conndition checks */
3129 	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
3130 		uint32_t saa = 0;
3131 
3132 		mutex_enter(&sl->sl_lock);
3133 		if (it->sbd_it_ua_conditions & SBD_UA_CAPACITY_CHANGED) {
3134 			it->sbd_it_ua_conditions &= ~SBD_UA_CAPACITY_CHANGED;
3135 			if ((task->task_cdb[0] == SCMD_READ_CAPACITY) ||
3136 			    ((task->task_cdb[0] == SCMD_SVC_ACTION_IN_G4) &&
3137 			    (task->task_cdb[1] ==
3138 			    SSVC_ACTION_READ_CAPACITY_G4))) {
3139 				saa = 0;
3140 			} else {
3141 				saa = STMF_SAA_CAPACITY_DATA_HAS_CHANGED;
3142 			}
3143 		} else if (it->sbd_it_ua_conditions &
3144 		    SBD_UA_MODE_PARAMETERS_CHANGED) {
3145 			it->sbd_it_ua_conditions &=
3146 			    ~SBD_UA_MODE_PARAMETERS_CHANGED;
3147 			saa = STMF_SAA_MODE_PARAMETERS_CHANGED;
3148 		} else if (it->sbd_it_ua_conditions &
3149 		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
3150 			it->sbd_it_ua_conditions &=
3151 			    ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
3152 			saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
3153 		} else if (it->sbd_it_ua_conditions &
3154 		    SBD_UA_ACCESS_STATE_TRANSITION) {
3155 			it->sbd_it_ua_conditions &=
3156 			    ~SBD_UA_ACCESS_STATE_TRANSITION;
3157 			saa = STMF_SAA_LU_NO_ACCESS_TRANSITION;
3158 		} else {
3159 			it->sbd_it_ua_conditions = 0;
3160 			saa = 0;
3161 		}
3162 		mutex_exit(&sl->sl_lock);
3163 		if (saa) {
3164 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3165 			return;
3166 		}
3167 	}
3168 
3169 	cdb0 = task->task_cdb[0];
3170 	cdb1 = task->task_cdb[1];
3171 
3172 	if (sl->sl_access_state == SBD_LU_STANDBY) {
3173 		if (cdb0 != SCMD_INQUIRY &&
3174 		    cdb0 != SCMD_MODE_SENSE &&
3175 		    cdb0 != SCMD_MODE_SENSE_G1 &&
3176 		    cdb0 != SCMD_MODE_SELECT &&
3177 		    cdb0 != SCMD_MODE_SELECT_G1 &&
3178 		    cdb0 != SCMD_RESERVE &&
3179 		    cdb0 != SCMD_RELEASE &&
3180 		    cdb0 != SCMD_PERSISTENT_RESERVE_OUT &&
3181 		    cdb0 != SCMD_PERSISTENT_RESERVE_IN &&
3182 		    cdb0 != SCMD_REQUEST_SENSE &&
3183 		    cdb0 != SCMD_READ_CAPACITY &&
3184 		    cdb0 != SCMD_TEST_UNIT_READY &&
3185 		    cdb0 != SCMD_START_STOP &&
3186 		    cdb0 != SCMD_READ &&
3187 		    cdb0 != SCMD_READ_G1 &&
3188 		    cdb0 != SCMD_READ_G4 &&
3189 		    cdb0 != SCMD_READ_G5 &&
3190 		    !(cdb0 == SCMD_SVC_ACTION_IN_G4 &&
3191 		    cdb1 == SSVC_ACTION_READ_CAPACITY_G4) &&
3192 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3193 		    (cdb1 & 0x1F) == 0x05) &&
3194 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3195 		    (cdb1 & 0x1F) == 0x0A)) {
3196 			stmf_scsilib_send_status(task, STATUS_CHECK,
3197 			    STMF_SAA_LU_NO_ACCESS_STANDBY);
3198 			return;
3199 		}
3200 
3201 		/*
3202 		 * is this a short write?
3203 		 * if so, we'll need to wait until we have the buffer
3204 		 * before proxying the command
3205 		 */
3206 		switch (cdb0) {
3207 			case SCMD_MODE_SELECT:
3208 			case SCMD_MODE_SELECT_G1:
3209 			case SCMD_PERSISTENT_RESERVE_OUT:
3210 				break;
3211 			default:
3212 				st_ret = stmf_proxy_scsi_cmd(task,
3213 				    initial_dbuf);
3214 				if (st_ret != STMF_SUCCESS) {
3215 					stmf_scsilib_send_status(task,
3216 					    STATUS_CHECK,
3217 					    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
3218 				}
3219 				return;
3220 		}
3221 	}
3222 
3223 	cdb0 = task->task_cdb[0] & 0x1F;
3224 
3225 	if ((cdb0 == SCMD_READ) || (cdb0 == SCMD_WRITE)) {
3226 		if (task->task_additional_flags & TASK_AF_PORT_LOAD_HIGH) {
3227 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
3228 			return;
3229 		}
3230 		if (cdb0 == SCMD_READ) {
3231 			sbd_handle_read(task, initial_dbuf);
3232 			return;
3233 		}
3234 		sbd_handle_write(task, initial_dbuf);
3235 		return;
3236 	}
3237 
3238 	cdb0 = task->task_cdb[0];
3239 	cdb1 = task->task_cdb[1];
3240 
3241 	if (cdb0 == SCMD_INQUIRY) {		/* Inquiry */
3242 		sbd_handle_inquiry(task, initial_dbuf);
3243 		return;
3244 	}
3245 
3246 	if (cdb0  == SCMD_PERSISTENT_RESERVE_OUT) {
3247 		sbd_handle_pgr_out_cmd(task, initial_dbuf);
3248 		return;
3249 	}
3250 
3251 	if (cdb0  == SCMD_PERSISTENT_RESERVE_IN) {
3252 		sbd_handle_pgr_in_cmd(task, initial_dbuf);
3253 		return;
3254 	}
3255 
3256 	if (cdb0 == SCMD_RELEASE) {
3257 		if (cdb1) {
3258 			stmf_scsilib_send_status(task, STATUS_CHECK,
3259 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3260 			return;
3261 		}
3262 
3263 		mutex_enter(&sl->sl_lock);
3264 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3265 			/* If not owner don't release it, just return good */
3266 			if (it->sbd_it_session_id !=
3267 			    sl->sl_rs_owner_session_id) {
3268 				mutex_exit(&sl->sl_lock);
3269 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3270 				return;
3271 			}
3272 		}
3273 		sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3274 		it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3275 		mutex_exit(&sl->sl_lock);
3276 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3277 		return;
3278 	}
3279 
3280 	if (cdb0 == SCMD_RESERVE) {
3281 		if (cdb1) {
3282 			stmf_scsilib_send_status(task, STATUS_CHECK,
3283 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3284 			return;
3285 		}
3286 
3287 		mutex_enter(&sl->sl_lock);
3288 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3289 			/* If not owner, return conflict status */
3290 			if (it->sbd_it_session_id !=
3291 			    sl->sl_rs_owner_session_id) {
3292 				mutex_exit(&sl->sl_lock);
3293 				stmf_scsilib_send_status(task,
3294 				    STATUS_RESERVATION_CONFLICT, 0);
3295 				return;
3296 			}
3297 		}
3298 		sl->sl_flags |= SL_LU_HAS_SCSI2_RESERVATION;
3299 		it->sbd_it_flags |= SBD_IT_HAS_SCSI2_RESERVATION;
3300 		sl->sl_rs_owner_session_id = it->sbd_it_session_id;
3301 		mutex_exit(&sl->sl_lock);
3302 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3303 		return;
3304 	}
3305 
3306 	if (cdb0 == SCMD_REQUEST_SENSE) {
3307 		/*
3308 		 * LU provider needs to store unretrieved sense data
3309 		 * (e.g. after power-on/reset).  For now, we'll just
3310 		 * return good status with no sense.
3311 		 */
3312 
3313 		if ((cdb1 & ~1) || task->task_cdb[2] || task->task_cdb[3] ||
3314 		    task->task_cdb[5]) {
3315 			stmf_scsilib_send_status(task, STATUS_CHECK,
3316 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3317 		} else {
3318 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3319 		}
3320 
3321 		return;
3322 	}
3323 
3324 	/* Report Target Port Groups */
3325 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3326 	    ((cdb1 & 0x1F) == 0x0A)) {
3327 		stmf_scsilib_handle_report_tpgs(task, initial_dbuf);
3328 		return;
3329 	}
3330 
3331 	/* Report Identifying Information */
3332 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3333 	    ((cdb1 & 0x1F) == 0x05)) {
3334 		sbd_handle_identifying_info(task, initial_dbuf);
3335 		return;
3336 	}
3337 
3338 	if (cdb0 == SCMD_START_STOP) {			/* Start stop */
3339 		task->task_cmd_xfer_length = 0;
3340 		if (task->task_cdb[4] & 0xFC) {
3341 			stmf_scsilib_send_status(task, STATUS_CHECK,
3342 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3343 			return;
3344 		}
3345 		if (task->task_cdb[4] & 2) {
3346 			stmf_scsilib_send_status(task, STATUS_CHECK,
3347 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3348 		} else {
3349 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3350 		}
3351 		return;
3352 
3353 	}
3354 
3355 	if ((cdb0 == SCMD_MODE_SENSE) || (cdb0 == SCMD_MODE_SENSE_G1)) {
3356 		uint8_t *p;
3357 		p = kmem_zalloc(512, KM_SLEEP);
3358 		sbd_handle_mode_sense(task, initial_dbuf, p);
3359 		kmem_free(p, 512);
3360 		return;
3361 	}
3362 
3363 	if ((cdb0 == SCMD_MODE_SELECT) || (cdb0 == SCMD_MODE_SELECT_G1)) {
3364 		sbd_handle_mode_select(task, initial_dbuf);
3365 		return;
3366 	}
3367 
3368 	if ((cdb0 == SCMD_UNMAP) && (sl->sl_flags & SL_UNMAP_ENABLED)) {
3369 		sbd_handle_unmap(task, initial_dbuf);
3370 		return;
3371 	}
3372 
3373 	if ((cdb0 == SCMD_WRITE_SAME_G4) || (cdb0 == SCMD_WRITE_SAME_G1)) {
3374 		sbd_handle_write_same(task, initial_dbuf);
3375 		return;
3376 	}
3377 
3378 	if (cdb0 == SCMD_TEST_UNIT_READY) {	/* Test unit ready */
3379 		task->task_cmd_xfer_length = 0;
3380 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3381 		return;
3382 	}
3383 
3384 	if (cdb0 == SCMD_READ_CAPACITY) {		/* Read Capacity */
3385 		sbd_handle_read_capacity(task, initial_dbuf);
3386 		return;
3387 	}
3388 
3389 	if (cdb0 == SCMD_SVC_ACTION_IN_G4) { /* Read Capacity or read long */
3390 		if (cdb1 == SSVC_ACTION_READ_CAPACITY_G4) {
3391 			sbd_handle_read_capacity(task, initial_dbuf);
3392 			return;
3393 		/*
3394 		 * } else if (cdb1 == SSVC_ACTION_READ_LONG_G4) {
3395 		 * 	sbd_handle_read(task, initial_dbuf);
3396 		 * 	return;
3397 		 */
3398 		}
3399 	}
3400 
3401 	/*
3402 	 * if (cdb0 == SCMD_SVC_ACTION_OUT_G4) {
3403 	 *	if (cdb1 == SSVC_ACTION_WRITE_LONG_G4) {
3404 	 *		 sbd_handle_write(task, initial_dbuf);
3405 	 * 		return;
3406 	 *	}
3407 	 * }
3408 	 */
3409 
3410 	if (cdb0 == SCMD_VERIFY) {
3411 		/*
3412 		 * Something more likely needs to be done here.
3413 		 */
3414 		task->task_cmd_xfer_length = 0;
3415 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3416 		return;
3417 	}
3418 
3419 	if (cdb0 == SCMD_SYNCHRONIZE_CACHE ||
3420 	    cdb0 == SCMD_SYNCHRONIZE_CACHE_G4) {
3421 		sbd_handle_sync_cache(task, initial_dbuf);
3422 		return;
3423 	}
3424 
3425 	/*
3426 	 * Write and Verify use the same path as write, but don't clutter the
3427 	 * performance path above with checking for write_verify opcodes.  We
3428 	 * rely on zfs's integrity checks for the "Verify" part of Write &
3429 	 * Verify.  (Even if we did a read to "verify" we'd merely be reading
3430 	 * cache, not actual media.)
3431 	 * Therefore we
3432 	 *   a) only support this if sbd_is_zvol, and
3433 	 *   b) run the IO through the normal write path with a forced
3434 	 *	sbd_flush_data_cache at the end.
3435 	 */
3436 
3437 	if ((sl->sl_flags & SL_ZFS_META) && (
3438 	    cdb0 == SCMD_WRITE_VERIFY ||
3439 	    cdb0 == SCMD_WRITE_VERIFY_G4 ||
3440 	    cdb0 == SCMD_WRITE_VERIFY_G5)) {
3441 		sbd_handle_write(task, initial_dbuf);
3442 		return;
3443 	}
3444 
3445 	stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_OPCODE);
3446 }
3447 
3448 void
3449 sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf)
3450 {
3451 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3452 
3453 	if (dbuf->db_flags & DB_LU_DATA_BUF) {
3454 		/*
3455 		 * Buffers passed in from the LU always complete
3456 		 * even if the task is no longer active.
3457 		 */
3458 		ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3459 		ASSERT(scmd);
3460 		switch (scmd->cmd_type) {
3461 		case (SBD_CMD_SCSI_READ):
3462 			sbd_handle_sgl_read_xfer_completion(task, scmd, dbuf);
3463 			break;
3464 		case (SBD_CMD_SCSI_WRITE):
3465 			sbd_handle_sgl_write_xfer_completion(task, scmd, dbuf);
3466 			break;
3467 		default:
3468 			cmn_err(CE_PANIC, "Unknown cmd type, task = %p",
3469 			    (void *)task);
3470 			break;
3471 		}
3472 		return;
3473 	}
3474 
3475 	if ((scmd == NULL) || ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0))
3476 		return;
3477 
3478 	switch (scmd->cmd_type) {
3479 	case (SBD_CMD_SCSI_READ):
3480 		sbd_handle_read_xfer_completion(task, scmd, dbuf);
3481 		break;
3482 
3483 	case (SBD_CMD_SCSI_WRITE):
3484 		if ((task->task_cdb[0] == SCMD_WRITE_SAME_G1) ||
3485 		    (task->task_cdb[0] == SCMD_WRITE_SAME_G4)) {
3486 			sbd_handle_write_same_xfer_completion(task, scmd, dbuf,
3487 			    1);
3488 		} else {
3489 			sbd_handle_write_xfer_completion(task, scmd, dbuf, 1);
3490 		}
3491 		break;
3492 
3493 	case (SBD_CMD_SMALL_READ):
3494 		sbd_handle_short_read_xfer_completion(task, scmd, dbuf);
3495 		break;
3496 
3497 	case (SBD_CMD_SMALL_WRITE):
3498 		sbd_handle_short_write_xfer_completion(task, dbuf);
3499 		break;
3500 
3501 	default:
3502 		cmn_err(CE_PANIC, "Unknown cmd type, task = %p", (void *)task);
3503 		break;
3504 	}
3505 }
3506 
3507 /* ARGSUSED */
3508 void
3509 sbd_send_status_done(struct scsi_task *task)
3510 {
3511 	cmn_err(CE_PANIC,
3512 	    "sbd_send_status_done: this should not have been called");
3513 }
3514 
3515 void
3516 sbd_task_free(struct scsi_task *task)
3517 {
3518 	if (task->task_lu_private) {
3519 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3520 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3521 			cmn_err(CE_PANIC, "cmd is active, task = %p",
3522 			    (void *)task);
3523 		}
3524 		kmem_free(scmd, sizeof (sbd_cmd_t));
3525 	}
3526 }
3527 
3528 /*
3529  * Aborts are synchronus w.r.t. I/O AND
3530  * All the I/O which SBD does is synchronous AND
3531  * Everything within a task is single threaded.
3532  *   IT MEANS
3533  * If this function is called, we are doing nothing with this task
3534  * inside of sbd module.
3535  */
3536 /* ARGSUSED */
3537 stmf_status_t
3538 sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags)
3539 {
3540 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3541 	scsi_task_t *task;
3542 
3543 	if (abort_cmd == STMF_LU_RESET_STATE) {
3544 		return (sbd_lu_reset_state(lu));
3545 	}
3546 
3547 	if (abort_cmd == STMF_LU_ITL_HANDLE_REMOVED) {
3548 		sbd_check_and_clear_scsi2_reservation(sl, (sbd_it_data_t *)arg);
3549 		sbd_remove_it_handle(sl, (sbd_it_data_t *)arg);
3550 		return (STMF_SUCCESS);
3551 	}
3552 
3553 	ASSERT(abort_cmd == STMF_LU_ABORT_TASK);
3554 	task = (scsi_task_t *)arg;
3555 	if (task->task_lu_private) {
3556 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3557 
3558 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3559 			if (scmd->flags & SBD_SCSI_CMD_TRANS_DATA) {
3560 				kmem_free(scmd->trans_data,
3561 				    scmd->trans_data_len);
3562 				scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
3563 			}
3564 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
3565 			return (STMF_ABORT_SUCCESS);
3566 		}
3567 	}
3568 
3569 	return (STMF_NOT_FOUND);
3570 }
3571 
3572 /*
3573  * This function is called during task clean-up if the
3574  * DB_LU_FLAG is set on the dbuf. This should only be called for
3575  * abort processing after sbd_abort has been called for the task.
3576  */
3577 void
3578 sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf)
3579 {
3580 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3581 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3582 
3583 	ASSERT(dbuf->db_lu_private);
3584 	ASSERT(scmd && scmd->nbufs > 0);
3585 	ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0);
3586 	ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
3587 	ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3588 	ASSERT((curthread->t_flag & T_INTR_THREAD) == 0);
3589 
3590 	if (scmd->cmd_type == SBD_CMD_SCSI_READ) {
3591 		sbd_zvol_rele_read_bufs(sl, dbuf);
3592 	} else if (scmd->cmd_type == SBD_CMD_SCSI_WRITE) {
3593 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
3594 	} else {
3595 		cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p",
3596 		    scmd->cmd_type, (void *)task);
3597 	}
3598 	if (--scmd->nbufs == 0)
3599 		rw_exit(&sl->sl_access_state_lock);
3600 	stmf_teardown_dbuf(task, dbuf);
3601 	stmf_free(dbuf);
3602 }
3603 
3604 /* ARGSUSED */
3605 void
3606 sbd_ctl(struct stmf_lu *lu, int cmd, void *arg)
3607 {
3608 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3609 	stmf_change_status_t st;
3610 
3611 	ASSERT((cmd == STMF_CMD_LU_ONLINE) ||
3612 	    (cmd == STMF_CMD_LU_OFFLINE) ||
3613 	    (cmd == STMF_ACK_LU_ONLINE_COMPLETE) ||
3614 	    (cmd == STMF_ACK_LU_OFFLINE_COMPLETE));
3615 
3616 	st.st_completion_status = STMF_SUCCESS;
3617 	st.st_additional_info = NULL;
3618 
3619 	switch (cmd) {
3620 	case STMF_CMD_LU_ONLINE:
3621 		if (sl->sl_state == STMF_STATE_ONLINE)
3622 			st.st_completion_status = STMF_ALREADY;
3623 		else if (sl->sl_state != STMF_STATE_OFFLINE)
3624 			st.st_completion_status = STMF_FAILURE;
3625 		if (st.st_completion_status == STMF_SUCCESS) {
3626 			sl->sl_state = STMF_STATE_ONLINE;
3627 			sl->sl_state_not_acked = 1;
3628 		}
3629 		(void) stmf_ctl(STMF_CMD_LU_ONLINE_COMPLETE, lu, &st);
3630 		break;
3631 
3632 	case STMF_CMD_LU_OFFLINE:
3633 		if (sl->sl_state == STMF_STATE_OFFLINE)
3634 			st.st_completion_status = STMF_ALREADY;
3635 		else if (sl->sl_state != STMF_STATE_ONLINE)
3636 			st.st_completion_status = STMF_FAILURE;
3637 		if (st.st_completion_status == STMF_SUCCESS) {
3638 			sl->sl_flags &= ~(SL_MEDIUM_REMOVAL_PREVENTED |
3639 			    SL_LU_HAS_SCSI2_RESERVATION);
3640 			sl->sl_state = STMF_STATE_OFFLINE;
3641 			sl->sl_state_not_acked = 1;
3642 			sbd_pgr_reset(sl);
3643 		}
3644 		(void) stmf_ctl(STMF_CMD_LU_OFFLINE_COMPLETE, lu, &st);
3645 		break;
3646 
3647 	case STMF_ACK_LU_ONLINE_COMPLETE:
3648 		/* Fallthrough */
3649 	case STMF_ACK_LU_OFFLINE_COMPLETE:
3650 		sl->sl_state_not_acked = 0;
3651 		break;
3652 
3653 	}
3654 }
3655 
3656 /* ARGSUSED */
3657 stmf_status_t
3658 sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg, uint8_t *buf,
3659     uint32_t *bufsizep)
3660 {
3661 	return (STMF_NOT_SUPPORTED);
3662 }
3663 
3664 stmf_status_t
3665 sbd_lu_reset_state(stmf_lu_t *lu)
3666 {
3667 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3668 
3669 	mutex_enter(&sl->sl_lock);
3670 	if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE) {
3671 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
3672 		mutex_exit(&sl->sl_lock);
3673 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3674 			(void) sbd_wcd_set(1, sl);
3675 		}
3676 	} else {
3677 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
3678 		mutex_exit(&sl->sl_lock);
3679 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3680 			(void) sbd_wcd_set(0, sl);
3681 		}
3682 	}
3683 	sbd_pgr_reset(sl);
3684 	sbd_check_and_clear_scsi2_reservation(sl, NULL);
3685 	if (stmf_deregister_all_lu_itl_handles(lu) != STMF_SUCCESS) {
3686 		return (STMF_FAILURE);
3687 	}
3688 	return (STMF_SUCCESS);
3689 }
3690 
3691 sbd_status_t
3692 sbd_flush_data_cache(sbd_lu_t *sl, int fsync_done)
3693 {
3694 	int r = 0;
3695 	int ret;
3696 
3697 	if (fsync_done)
3698 		goto over_fsync;
3699 	if ((sl->sl_data_vtype == VREG) || (sl->sl_data_vtype == VBLK)) {
3700 		if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL))
3701 			return (SBD_FAILURE);
3702 	}
3703 over_fsync:
3704 	if (((sl->sl_data_vtype == VCHR) || (sl->sl_data_vtype == VBLK)) &&
3705 	    ((sl->sl_flags & SL_NO_DATA_DKIOFLUSH) == 0)) {
3706 		ret = VOP_IOCTL(sl->sl_data_vp, DKIOCFLUSHWRITECACHE, NULL,
3707 		    FKIOCTL, kcred, &r, NULL);
3708 		if ((ret == ENOTTY) || (ret == ENOTSUP)) {
3709 			mutex_enter(&sl->sl_lock);
3710 			sl->sl_flags |= SL_NO_DATA_DKIOFLUSH;
3711 			mutex_exit(&sl->sl_lock);
3712 		} else if (ret != 0) {
3713 			return (SBD_FAILURE);
3714 		}
3715 	}
3716 
3717 	return (SBD_SUCCESS);
3718 }
3719 
3720 /* ARGSUSED */
3721 static void
3722 sbd_handle_sync_cache(struct scsi_task *task,
3723     struct stmf_data_buf *initial_dbuf)
3724 {
3725 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3726 	uint64_t	lba, laddr;
3727 	sbd_status_t	sret;
3728 	uint32_t	len;
3729 	int		is_g4 = 0;
3730 	int		immed;
3731 
3732 	task->task_cmd_xfer_length = 0;
3733 	/*
3734 	 * Determine if this is a 10 or 16 byte CDB
3735 	 */
3736 
3737 	if (task->task_cdb[0] == SCMD_SYNCHRONIZE_CACHE_G4)
3738 		is_g4 = 1;
3739 
3740 	/*
3741 	 * Determine other requested parameters
3742 	 *
3743 	 * We don't have a non-volatile cache, so don't care about SYNC_NV.
3744 	 * Do not support the IMMED bit.
3745 	 */
3746 
3747 	immed = (task->task_cdb[1] & 0x02);
3748 
3749 	if (immed) {
3750 		stmf_scsilib_send_status(task, STATUS_CHECK,
3751 		    STMF_SAA_INVALID_FIELD_IN_CDB);
3752 		return;
3753 	}
3754 
3755 	/*
3756 	 * Check to be sure we're not being asked to sync an LBA
3757 	 * that is out of range.  While checking, verify reserved fields.
3758 	 */
3759 
3760 	if (is_g4) {
3761 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[14] ||
3762 		    task->task_cdb[15]) {
3763 			stmf_scsilib_send_status(task, STATUS_CHECK,
3764 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3765 			return;
3766 		}
3767 
3768 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
3769 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
3770 	} else {
3771 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[6] ||
3772 		    task->task_cdb[9]) {
3773 			stmf_scsilib_send_status(task, STATUS_CHECK,
3774 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3775 			return;
3776 		}
3777 
3778 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
3779 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
3780 	}
3781 
3782 	laddr = lba << sl->sl_data_blocksize_shift;
3783 	len <<= sl->sl_data_blocksize_shift;
3784 
3785 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
3786 		stmf_scsilib_send_status(task, STATUS_CHECK,
3787 		    STMF_SAA_LBA_OUT_OF_RANGE);
3788 		return;
3789 	}
3790 
3791 	sret = sbd_flush_data_cache(sl, 0);
3792 	if (sret != SBD_SUCCESS) {
3793 		stmf_scsilib_send_status(task, STATUS_CHECK,
3794 		    STMF_SAA_WRITE_ERROR);
3795 		return;
3796 	}
3797 
3798 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3799 }
3800