1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/conf.h>
26 #include <sys/file.h>
27 #include <sys/ddi.h>
28 #include <sys/sunddi.h>
29 #include <sys/modctl.h>
30 #include <sys/scsi/scsi.h>
31 #include <sys/scsi/impl/scsi_reset_notify.h>
32 #include <sys/scsi/generic/mode.h>
33 #include <sys/disp.h>
34 #include <sys/byteorder.h>
35 #include <sys/atomic.h>
36 #include <sys/sdt.h>
37 #include <sys/dkio.h>
38 
39 #include <sys/stmf.h>
40 #include <sys/lpif.h>
41 #include <sys/portif.h>
42 #include <sys/stmf_ioctl.h>
43 #include <sys/stmf_sbd_ioctl.h>
44 
45 #include "stmf_sbd.h"
46 #include "sbd_impl.h"
47 
48 #define	SCSI2_CONFLICT_FREE_CMDS(cdb)	( \
49 	/* ----------------------- */                                      \
50 	/* Refer Both		   */                                      \
51 	/* SPC-2 (rev 20) Table 10 */                                      \
52 	/* SPC-3 (rev 23) Table 31 */                                      \
53 	/* ----------------------- */                                      \
54 	((cdb[0]) == SCMD_INQUIRY)					|| \
55 	((cdb[0]) == SCMD_LOG_SENSE_G1)					|| \
56 	((cdb[0]) == SCMD_RELEASE)					|| \
57 	((cdb[0]) == SCMD_RELEASE_G1)					|| \
58 	((cdb[0]) == SCMD_REPORT_LUNS)					|| \
59 	((cdb[0]) == SCMD_REQUEST_SENSE)				|| \
60 	/* PREVENT ALLOW MEDIUM REMOVAL with prevent == 0 */               \
61 	((((cdb[0]) == SCMD_DOORLOCK) && (((cdb[4]) & 0x3) == 0)))	|| \
62 	/* SERVICE ACTION IN with READ MEDIA SERIAL NUMBER (0x01) */       \
63 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G5) && (                          \
64 	    ((cdb[1]) & 0x1F) == 0x01))					|| \
65 	/* MAINTENANCE IN with service actions REPORT ALIASES (0x0Bh) */   \
66 	/* REPORT DEVICE IDENTIFIER (0x05)  REPORT PRIORITY (0x0Eh) */     \
67 	/* REPORT TARGET PORT GROUPS (0x0A) REPORT TIMESTAMP (0x0F) */     \
68 	(((cdb[0]) == SCMD_MAINTENANCE_IN) && (                            \
69 	    (((cdb[1]) & 0x1F) == 0x0B) ||                                 \
70 	    (((cdb[1]) & 0x1F) == 0x05) ||                                 \
71 	    (((cdb[1]) & 0x1F) == 0x0E) ||                                 \
72 	    (((cdb[1]) & 0x1F) == 0x0A) ||                                 \
73 	    (((cdb[1]) & 0x1F) == 0x0F)))				|| \
74 	/* ----------------------- */                                      \
75 	/* SBC-3 (rev 17) Table 3  */                                      \
76 	/* ----------------------- */                                      \
77 	/* READ CAPACITY(10) */                                            \
78 	((cdb[0]) == SCMD_READ_CAPACITY)				|| \
79 	/* READ CAPACITY(16) */                                            \
80 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G4) && (                          \
81 	    ((cdb[1]) & 0x1F) == 0x10))					|| \
82 	/* START STOP UNIT with START bit 0 and POWER CONDITION 0  */      \
83 	(((cdb[0]) == SCMD_START_STOP) && (                                \
84 	    (((cdb[4]) & 0xF0) == 0) && (((cdb[4]) & 0x01) == 0))))
85 /* End of SCSI2_CONFLICT_FREE_CMDS */
86 
87 stmf_status_t sbd_lu_reset_state(stmf_lu_t *lu);
88 static void sbd_handle_sync_cache(struct scsi_task *task,
89     struct stmf_data_buf *initial_dbuf);
90 void sbd_handle_read_xfer_completion(struct scsi_task *task,
91     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf);
92 void sbd_handle_short_write_xfer_completion(scsi_task_t *task,
93     stmf_data_buf_t *dbuf);
94 void sbd_handle_short_write_transfers(scsi_task_t *task,
95     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size);
96 void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf,
97     uint32_t buflen);
98 void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf);
99 void sbd_handle_identifying_info(scsi_task_t *task, stmf_data_buf_t *dbuf);
100 
101 extern void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
102 extern int sbd_pgr_reservation_conflict(scsi_task_t *);
103 extern void sbd_pgr_reset(sbd_lu_t *);
104 extern void sbd_pgr_remove_it_handle(sbd_lu_t *, sbd_it_data_t *);
105 extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
106 extern void sbd_handle_pgr_out_cmd(scsi_task_t *, stmf_data_buf_t *);
107 extern void sbd_handle_pgr_out_data(scsi_task_t *, stmf_data_buf_t *);
108 void sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
109     int first_xfer);
110 /*
111  * IMPORTANT NOTE:
112  * =================
113  * The whole world here is based on the assumption that everything within
114  * a scsi task executes in a single threaded manner, even the aborts.
115  * Dont ever change that. There wont be any performance gain but there
116  * will be tons of race conditions.
117  */
118 
119 void
120 sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
121 					struct stmf_data_buf *dbuf)
122 {
123 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
124 	uint64_t laddr;
125 	uint32_t len, buflen, iolen;
126 	int ndx;
127 	int bufs_to_take;
128 
129 	/* Lets try not to hog all the buffers the port has. */
130 	bufs_to_take = ((task->task_max_nbufs > 2) &&
131 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
132 	    task->task_max_nbufs;
133 
134 	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size : scmd->len;
135 	laddr = scmd->addr + scmd->current_ro;
136 
137 	for (buflen = 0, ndx = 0; (buflen < len) &&
138 	    (ndx < dbuf->db_sglist_length); ndx++) {
139 		iolen = min(len - buflen, dbuf->db_sglist[ndx].seg_length);
140 		if (iolen == 0)
141 			break;
142 		if (sbd_data_read(sl, task, laddr, (uint64_t)iolen,
143 		    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
144 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
145 			/* Do not need to do xfer anymore, just complete it */
146 			dbuf->db_data_size = 0;
147 			dbuf->db_xfer_status = STMF_SUCCESS;
148 			sbd_handle_read_xfer_completion(task, scmd, dbuf);
149 			return;
150 		}
151 		buflen += iolen;
152 		laddr += (uint64_t)iolen;
153 	}
154 	dbuf->db_relative_offset = scmd->current_ro;
155 	dbuf->db_data_size = buflen;
156 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
157 	(void) stmf_xfer_data(task, dbuf, 0);
158 	scmd->len -= buflen;
159 	scmd->current_ro += buflen;
160 	if (scmd->len && (scmd->nbufs < bufs_to_take)) {
161 		uint32_t maxsize, minsize, old_minsize;
162 
163 		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
164 		minsize = maxsize >> 2;
165 		do {
166 			/*
167 			 * A bad port implementation can keep on failing the
168 			 * the request but keep on sending us a false
169 			 * minsize.
170 			 */
171 			old_minsize = minsize;
172 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
173 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
174 		    (minsize >= 512));
175 		if (dbuf == NULL) {
176 			return;
177 		}
178 		scmd->nbufs++;
179 		sbd_do_read_xfer(task, scmd, dbuf);
180 	}
181 }
182 
183 /*
184  * sbd_zcopy: Bail-out switch for reduced copy path.
185  *
186  * 0 - read & write off
187  * 1 - read & write on
188  * 2 - only read on
189  * 4 - only write on
190  */
191 int sbd_zcopy = 1;	/* enable zcopy read & write path */
192 uint32_t sbd_max_xfer_len = 0;		/* Valid if non-zero */
193 uint32_t sbd_1st_xfer_len = 0;		/* Valid if non-zero */
194 uint32_t sbd_copy_threshold = 0;		/* Valid if non-zero */
195 
196 static void
197 sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
198 {
199 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
200 	sbd_zvol_io_t *zvio;
201 	int ret, final_xfer;
202 	uint64_t offset;
203 	uint32_t xfer_len, max_len, first_len;
204 	stmf_status_t xstat;
205 	stmf_data_buf_t *dbuf;
206 	uint_t nblks;
207 	uint64_t blksize = sl->sl_blksize;
208 	size_t db_private_sz;
209 	hrtime_t xfer_start, xfer_elapsed;
210 	uintptr_t pad;
211 
212 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
213 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
214 
215 	/*
216 	 * Calculate the limits on xfer_len to the minimum of :
217 	 *    - task limit
218 	 *    - lun limit
219 	 *    - sbd global limit if set
220 	 *    - first xfer limit if set
221 	 *
222 	 * First, protect against silly over-ride value
223 	 */
224 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
225 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
226 		    sbd_max_xfer_len);
227 		sbd_max_xfer_len = 0;
228 	}
229 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
230 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
231 		    sbd_1st_xfer_len);
232 		sbd_1st_xfer_len = 0;
233 	}
234 
235 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
236 	if (sbd_max_xfer_len)
237 		max_len = MIN(max_len, sbd_max_xfer_len);
238 	/*
239 	 * Special case the first xfer if hints are set.
240 	 */
241 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
242 		/* global over-ride has precedence */
243 		if (sbd_1st_xfer_len)
244 			first_len = sbd_1st_xfer_len;
245 		else
246 			first_len = task->task_1st_xfer_len;
247 	} else {
248 		first_len = 0;
249 	}
250 
251 	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
252 
253 		xfer_len = MIN(max_len, scmd->len);
254 		if (first_len) {
255 			xfer_len = MIN(xfer_len, first_len);
256 			first_len = 0;
257 		}
258 		if (scmd->len == xfer_len) {
259 			final_xfer = 1;
260 		} else {
261 			/*
262 			 * Attempt to end xfer on a block boundary.
263 			 * The only way this does not happen is if the
264 			 * xfer_len is small enough to stay contained
265 			 * within the same block.
266 			 */
267 			uint64_t xfer_offset, xfer_aligned_end;
268 
269 			final_xfer = 0;
270 			xfer_offset = scmd->addr + scmd->current_ro;
271 			xfer_aligned_end =
272 			    P2ALIGN(xfer_offset+xfer_len, blksize);
273 			if (xfer_aligned_end > xfer_offset)
274 				xfer_len = xfer_aligned_end - xfer_offset;
275 		}
276 		/*
277 		 * Allocate object to track the read and reserve
278 		 * enough space for scatter/gather list.
279 		 */
280 		offset = scmd->addr + scmd->current_ro;
281 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
282 
283 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
284 		    (nblks * sizeof (stmf_sglist_ent_t));
285 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
286 		    AF_DONTZERO);
287 		/*
288 		 * Setup the dbuf
289 		 *
290 		 * XXX Framework does not handle variable length sglists
291 		 * properly, so setup db_lu_private and db_port_private
292 		 * fields here. db_stmf_private is properly set for
293 		 * calls to stmf_free.
294 		 */
295 		if (dbuf->db_port_private == NULL) {
296 			/*
297 			 * XXX Framework assigns space to PP after db_sglist[0]
298 			 */
299 			cmn_err(CE_PANIC, "db_port_private == NULL");
300 		}
301 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
302 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
303 		dbuf->db_port_private = NULL;
304 		dbuf->db_buf_size = xfer_len;
305 		dbuf->db_data_size = xfer_len;
306 		dbuf->db_relative_offset = scmd->current_ro;
307 		dbuf->db_sglist_length = (uint16_t)nblks;
308 		dbuf->db_xfer_status = 0;
309 		dbuf->db_handle = 0;
310 
311 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
312 		    DB_DIRECTION_TO_RPORT | DB_LU_DATA_BUF);
313 		if (final_xfer)
314 			dbuf->db_flags |= DB_SEND_STATUS_GOOD;
315 
316 		zvio = dbuf->db_lu_private;
317 		/* Need absolute offset for zvol access */
318 		zvio->zvio_offset = offset;
319 		zvio->zvio_flags = ZVIO_SYNC;
320 
321 		/*
322 		 * Accounting for start of read.
323 		 * Note there is no buffer address for the probe yet.
324 		 */
325 		stmf_lu_xfer_start(task);
326 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
327 		    uint8_t *, NULL, uint64_t, xfer_len,
328 		    uint64_t, offset, scsi_task_t *, task);
329 		xfer_start = gethrtime();
330 
331 		ret = sbd_zvol_alloc_read_bufs(sl, dbuf);
332 
333 		xfer_elapsed = gethrtime() - xfer_start;
334 
335 		stmf_lu_xfer_done(task, B_TRUE /* read */, (uint64_t)xfer_len,
336 		    xfer_elapsed);
337 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
338 		    uint8_t *, NULL, uint64_t, xfer_len,
339 		    uint64_t, offset, int, ret, scsi_task_t *, task);
340 
341 		if (ret != 0) {
342 			/*
343 			 * Read failure from the backend.
344 			 */
345 			stmf_free(dbuf);
346 			if (scmd->nbufs == 0) {
347 				/* nothing queued, just finish */
348 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
349 				stmf_scsilib_send_status(task, STATUS_CHECK,
350 				    STMF_SAA_READ_ERROR);
351 				rw_exit(&sl->sl_access_state_lock);
352 			} else {
353 				/* process failure when other dbufs finish */
354 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
355 			}
356 			return;
357 		}
358 
359 
360 		/*
361 		 * Allow PP to do setup
362 		 */
363 		xstat = stmf_setup_dbuf(task, dbuf, 0);
364 		if (xstat != STMF_SUCCESS) {
365 			/*
366 			 * This could happen if the driver cannot get the
367 			 * DDI resources it needs for this request.
368 			 * If other dbufs are queued, try again when the next
369 			 * one completes, otherwise give up.
370 			 */
371 			sbd_zvol_rele_read_bufs(sl, dbuf);
372 			stmf_free(dbuf);
373 			if (scmd->nbufs > 0) {
374 				/* completion of previous dbuf will retry */
375 				return;
376 			}
377 			/*
378 			 * Done with this command.
379 			 */
380 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
381 			if (first_xfer)
382 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
383 			else
384 				stmf_scsilib_send_status(task, STATUS_CHECK,
385 				    STMF_SAA_READ_ERROR);
386 			rw_exit(&sl->sl_access_state_lock);
387 			return;
388 		}
389 		/*
390 		 * dbuf is now queued on task
391 		 */
392 		scmd->nbufs++;
393 
394 		/* XXX leave this in for FW? */
395 		DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
396 		    struct stmf_data_buf *, dbuf, uint64_t, offset,
397 		    uint32_t, xfer_len);
398 		/*
399 		 * Do not pass STMF_IOF_LU_DONE so that the zvol
400 		 * state can be released in the completion callback.
401 		 */
402 		xstat = stmf_xfer_data(task, dbuf, 0);
403 		switch (xstat) {
404 		case STMF_SUCCESS:
405 			break;
406 		case STMF_BUSY:
407 			/*
408 			 * The dbuf is queued on the task, but unknown
409 			 * to the PP, thus no completion will occur.
410 			 */
411 			sbd_zvol_rele_read_bufs(sl, dbuf);
412 			stmf_teardown_dbuf(task, dbuf);
413 			stmf_free(dbuf);
414 			scmd->nbufs--;
415 			if (scmd->nbufs > 0) {
416 				/* completion of previous dbuf will retry */
417 				return;
418 			}
419 			/*
420 			 * Done with this command.
421 			 */
422 			rw_exit(&sl->sl_access_state_lock);
423 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
424 			if (first_xfer)
425 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
426 			else
427 				stmf_scsilib_send_status(task, STATUS_CHECK,
428 				    STMF_SAA_READ_ERROR);
429 			return;
430 		case STMF_ABORTED:
431 			/*
432 			 * Completion from task_done will cleanup
433 			 */
434 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
435 			return;
436 		}
437 		/*
438 		 * Update the xfer progress.
439 		 */
440 		ASSERT(scmd->len >= xfer_len);
441 		scmd->len -= xfer_len;
442 		scmd->current_ro += xfer_len;
443 	}
444 }
445 
446 void
447 sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
448 				struct stmf_data_buf *dbuf)
449 {
450 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
451 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
452 		    dbuf->db_xfer_status, NULL);
453 		return;
454 	}
455 	task->task_nbytes_transferred += dbuf->db_data_size;
456 	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
457 		stmf_free_dbuf(task, dbuf);
458 		scmd->nbufs--;
459 		if (scmd->nbufs)
460 			return;	/* wait for all buffers to complete */
461 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
462 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL)
463 			stmf_scsilib_send_status(task, STATUS_CHECK,
464 			    STMF_SAA_READ_ERROR);
465 		else
466 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
467 		return;
468 	}
469 	if (dbuf->db_flags & DB_DONT_REUSE) {
470 		/* allocate new dbuf */
471 		uint32_t maxsize, minsize, old_minsize;
472 		stmf_free_dbuf(task, dbuf);
473 
474 		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
475 		minsize = maxsize >> 2;
476 		do {
477 			old_minsize = minsize;
478 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
479 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
480 		    (minsize >= 512));
481 		if (dbuf == NULL) {
482 			scmd->nbufs --;
483 			if (scmd->nbufs == 0) {
484 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
485 				    STMF_ALLOC_FAILURE, NULL);
486 			}
487 			return;
488 		}
489 	}
490 	sbd_do_read_xfer(task, scmd, dbuf);
491 }
492 
493 /*
494  * This routine must release the DMU resources and free the dbuf
495  * in all cases.  If this is the final dbuf of the task, then drop
496  * the reader lock on the LU state. If there are no errors and more
497  * work to do, then queue more xfer operations.
498  */
499 void
500 sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
501 				struct stmf_data_buf *dbuf)
502 {
503 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
504 	stmf_status_t xfer_status;
505 	uint32_t data_size;
506 	int scmd_err;
507 
508 	ASSERT(dbuf->db_lu_private);
509 	ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ);
510 
511 	scmd->nbufs--;	/* account for this dbuf */
512 	/*
513 	 * Release the DMU resources.
514 	 */
515 	sbd_zvol_rele_read_bufs(sl, dbuf);
516 	/*
517 	 * Release the dbuf after retrieving needed fields.
518 	 */
519 	xfer_status = dbuf->db_xfer_status;
520 	data_size = dbuf->db_data_size;
521 	stmf_teardown_dbuf(task, dbuf);
522 	stmf_free(dbuf);
523 	/*
524 	 * Release the state lock if this is the last completion.
525 	 * If this is the last dbuf on task and all data has been
526 	 * transferred or an error encountered, then no more dbufs
527 	 * will be queued.
528 	 */
529 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
530 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
531 	    (xfer_status != STMF_SUCCESS));
532 	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
533 		/* all DMU state has been released */
534 		rw_exit(&sl->sl_access_state_lock);
535 	}
536 
537 	/*
538 	 * If there have been no errors, either complete the task
539 	 * or issue more data xfer operations.
540 	 */
541 	if (!scmd_err) {
542 		/*
543 		 * This chunk completed successfully
544 		 */
545 		task->task_nbytes_transferred += data_size;
546 		if (scmd->nbufs == 0 && scmd->len == 0) {
547 			/*
548 			 * This command completed successfully
549 			 *
550 			 * Status was sent along with data, so no status
551 			 * completion will occur. Tell stmf we are done.
552 			 */
553 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
554 			stmf_task_lu_done(task);
555 			return;
556 		}
557 		/*
558 		 * Start more xfers
559 		 */
560 		sbd_do_sgl_read_xfer(task, scmd, 0);
561 		return;
562 	}
563 	/*
564 	 * Sort out the failure
565 	 */
566 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
567 		/*
568 		 * If a previous error occurred, leave the command active
569 		 * and wait for the last completion to send the status check.
570 		 */
571 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
572 			if (scmd->nbufs == 0) {
573 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
574 				stmf_scsilib_send_status(task, STATUS_CHECK,
575 				    STMF_SAA_READ_ERROR);
576 			}
577 			return;
578 		}
579 		/*
580 		 * Must have been a failure on current dbuf
581 		 */
582 		ASSERT(xfer_status != STMF_SUCCESS);
583 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
584 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
585 	}
586 }
587 
588 void
589 sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
590 				struct stmf_data_buf *dbuf)
591 {
592 	sbd_zvol_io_t *zvio = dbuf->db_lu_private;
593 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
594 	int ret;
595 	int scmd_err, scmd_xfer_done;
596 	stmf_status_t xfer_status = dbuf->db_xfer_status;
597 	uint32_t data_size = dbuf->db_data_size;
598 	hrtime_t xfer_start;
599 
600 	ASSERT(zvio);
601 
602 	/*
603 	 * Allow PP to free up resources before releasing the write bufs
604 	 * as writing to the backend could take some time.
605 	 */
606 	stmf_teardown_dbuf(task, dbuf);
607 
608 	scmd->nbufs--;	/* account for this dbuf */
609 	/*
610 	 * All data was queued and this is the last completion,
611 	 * but there could still be an error.
612 	 */
613 	scmd_xfer_done = (scmd->len == 0 && scmd->nbufs == 0);
614 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
615 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
616 	    (xfer_status != STMF_SUCCESS));
617 
618 	/* start the accounting clock */
619 	stmf_lu_xfer_start(task);
620 	DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
621 	    uint8_t *, NULL, uint64_t, data_size,
622 	    uint64_t, zvio->zvio_offset, scsi_task_t *, task);
623 	xfer_start = gethrtime();
624 
625 	if (scmd_err) {
626 		/* just return the write buffers */
627 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
628 		ret = 0;
629 	} else {
630 		if (scmd_xfer_done)
631 			zvio->zvio_flags = ZVIO_COMMIT;
632 		else
633 			zvio->zvio_flags = 0;
634 		/* write the data */
635 		ret = sbd_zvol_rele_write_bufs(sl, dbuf);
636 	}
637 
638 	/* finalize accounting */
639 	stmf_lu_xfer_done(task, B_FALSE /* not read */, data_size,
640 	    (gethrtime() - xfer_start));
641 	DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
642 	    uint8_t *, NULL, uint64_t, data_size,
643 	    uint64_t, zvio->zvio_offset, int, ret,  scsi_task_t *, task);
644 
645 	if (ret != 0) {
646 		/* update the error flag */
647 		scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
648 		scmd_err = 1;
649 	}
650 
651 	/* Release the dbuf */
652 	stmf_free(dbuf);
653 
654 	/*
655 	 * Release the state lock if this is the last completion.
656 	 * If this is the last dbuf on task and all data has been
657 	 * transferred or an error encountered, then no more dbufs
658 	 * will be queued.
659 	 */
660 	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
661 		/* all DMU state has been released */
662 		rw_exit(&sl->sl_access_state_lock);
663 	}
664 	/*
665 	 * If there have been no errors, either complete the task
666 	 * or issue more data xfer operations.
667 	 */
668 	if (!scmd_err) {
669 		/* This chunk completed successfully */
670 		task->task_nbytes_transferred += data_size;
671 		if (scmd_xfer_done) {
672 			/* This command completed successfully */
673 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
674 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
675 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
676 				stmf_scsilib_send_status(task, STATUS_CHECK,
677 				    STMF_SAA_WRITE_ERROR);
678 			} else {
679 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
680 			}
681 			return;
682 		}
683 		/*
684 		 * Start more xfers
685 		 */
686 		sbd_do_sgl_write_xfer(task, scmd, 0);
687 		return;
688 	}
689 	/*
690 	 * Sort out the failure
691 	 */
692 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
693 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
694 			if (scmd->nbufs == 0) {
695 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
696 				stmf_scsilib_send_status(task, STATUS_CHECK,
697 				    STMF_SAA_WRITE_ERROR);
698 			}
699 			/*
700 			 * Leave the command active until last dbuf completes.
701 			 */
702 			return;
703 		}
704 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
705 		ASSERT(xfer_status != STMF_SUCCESS);
706 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
707 	}
708 }
709 
710 /*
711  * Handle a copy operation using the zvol interface.
712  *
713  * Similar to the sbd_data_read/write path, except it goes directly through
714  * the zvol interfaces. It can pass a port provider sglist in the
715  * form of uio which is lost through the vn_rdwr path.
716  *
717  * Returns:
718  *	STMF_SUCCESS - request handled
719  *	STMF_FAILURE - request not handled, caller must deal with error
720  */
721 static stmf_status_t
722 sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
723     int cmd, int commit)
724 {
725 	sbd_lu_t		*sl = task->task_lu->lu_provider_private;
726 	struct uio		uio;
727 	struct iovec		*iov, *tiov, iov1[8];
728 	uint32_t		len, resid;
729 	int			ret, i, iovcnt, flags;
730 	hrtime_t		xfer_start;
731 	boolean_t		is_read;
732 
733 	ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE);
734 
735 	is_read = (cmd == SBD_CMD_SCSI_READ) ? B_TRUE : B_FALSE;
736 	iovcnt = dbuf->db_sglist_length;
737 	/* use the stack for small iovecs */
738 	if (iovcnt > 8) {
739 		iov = kmem_alloc(iovcnt * sizeof (*iov), KM_SLEEP);
740 	} else {
741 		iov = &iov1[0];
742 	}
743 
744 	/* Convert dbuf sglist to iovec format */
745 	len = dbuf->db_data_size;
746 	resid = len;
747 	tiov = iov;
748 	for (i = 0; i < iovcnt; i++) {
749 		tiov->iov_base = (caddr_t)dbuf->db_sglist[i].seg_addr;
750 		tiov->iov_len = MIN(resid, dbuf->db_sglist[i].seg_length);
751 		resid -= tiov->iov_len;
752 		tiov++;
753 	}
754 	if (resid != 0) {
755 		cmn_err(CE_WARN, "inconsistant sglist rem %d", resid);
756 		if (iov != &iov1[0])
757 			kmem_free(iov, iovcnt * sizeof (*iov));
758 		return (STMF_FAILURE);
759 	}
760 	/* Setup the uio struct */
761 	uio.uio_iov = iov;
762 	uio.uio_iovcnt = iovcnt;
763 	uio.uio_loffset = laddr;
764 	uio.uio_segflg = (short)UIO_SYSSPACE;
765 	uio.uio_resid = (uint64_t)len;
766 	uio.uio_llimit = RLIM64_INFINITY;
767 
768 	/* start the accounting clock */
769 	stmf_lu_xfer_start(task);
770 	xfer_start = gethrtime();
771 	if (is_read == B_TRUE) {
772 		uio.uio_fmode = FREAD;
773 		uio.uio_extflg = UIO_COPY_CACHED;
774 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
775 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
776 		    scsi_task_t *, task);
777 
778 		/* Fetch the data */
779 		ret = sbd_zvol_copy_read(sl, &uio);
780 
781 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
782 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
783 		    scsi_task_t *, task);
784 	} else {
785 		uio.uio_fmode = FWRITE;
786 		uio.uio_extflg = UIO_COPY_DEFAULT;
787 		DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
788 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
789 		    scsi_task_t *, task);
790 
791 		flags = (commit) ? ZVIO_COMMIT : 0;
792 		/* Write the data */
793 		ret = sbd_zvol_copy_write(sl, &uio, flags);
794 
795 		DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
796 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
797 		    scsi_task_t *, task);
798 	}
799 	/* finalize accounting */
800 	stmf_lu_xfer_done(task, is_read, (uint64_t)len,
801 	    (gethrtime() - xfer_start));
802 
803 	if (iov != &iov1[0])
804 		kmem_free(iov, iovcnt * sizeof (*iov));
805 	if (ret != 0) {
806 		/* Backend I/O error */
807 		return (STMF_FAILURE);
808 	}
809 	return (STMF_SUCCESS);
810 }
811 
812 void
813 sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
814 {
815 	uint64_t lba, laddr;
816 	uint32_t len;
817 	uint8_t op = task->task_cdb[0];
818 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
819 	sbd_cmd_t *scmd;
820 	stmf_data_buf_t *dbuf;
821 	int fast_path;
822 
823 	if (op == SCMD_READ) {
824 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
825 		len = (uint32_t)task->task_cdb[4];
826 
827 		if (len == 0) {
828 			len = 256;
829 		}
830 	} else if (op == SCMD_READ_G1) {
831 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
832 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
833 	} else if (op == SCMD_READ_G5) {
834 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
835 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
836 	} else if (op == SCMD_READ_G4) {
837 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
838 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
839 	} else {
840 		stmf_scsilib_send_status(task, STATUS_CHECK,
841 		    STMF_SAA_INVALID_OPCODE);
842 		return;
843 	}
844 
845 	laddr = lba << sl->sl_data_blocksize_shift;
846 	len <<= sl->sl_data_blocksize_shift;
847 
848 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
849 		stmf_scsilib_send_status(task, STATUS_CHECK,
850 		    STMF_SAA_LBA_OUT_OF_RANGE);
851 		return;
852 	}
853 
854 	task->task_cmd_xfer_length = len;
855 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
856 		task->task_expected_xfer_length = len;
857 	}
858 
859 	if (len != task->task_expected_xfer_length) {
860 		fast_path = 0;
861 		len = (len > task->task_expected_xfer_length) ?
862 		    task->task_expected_xfer_length : len;
863 	} else {
864 		fast_path = 1;
865 	}
866 
867 	if (len == 0) {
868 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
869 		return;
870 	}
871 
872 	/*
873 	 * Determine if this read can directly use DMU buffers.
874 	 */
875 	if (sbd_zcopy & (2|1) &&		/* Debug switch */
876 	    initial_dbuf == NULL &&		/* No PP buffer passed in */
877 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
878 	    (task->task_additional_flags &
879 	    TASK_AF_ACCEPT_LU_DBUF))		/* PP allows it */
880 	{
881 		/*
882 		 * Reduced copy path
883 		 */
884 		uint32_t copy_threshold, minsize;
885 		int ret;
886 
887 		/*
888 		 * The sl_access_state_lock will be held shared
889 		 * for the entire request and released when all
890 		 * dbufs have completed.
891 		 */
892 		rw_enter(&sl->sl_access_state_lock, RW_READER);
893 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
894 			rw_exit(&sl->sl_access_state_lock);
895 			stmf_scsilib_send_status(task, STATUS_CHECK,
896 			    STMF_SAA_READ_ERROR);
897 			return;
898 		}
899 
900 		/*
901 		 * Check if setup is more expensive than copying the data.
902 		 *
903 		 * Use the global over-ride sbd_zcopy_threshold if set.
904 		 */
905 		copy_threshold = (sbd_copy_threshold > 0) ?
906 		    sbd_copy_threshold : task->task_copy_threshold;
907 		minsize = len;
908 		if (len < copy_threshold &&
909 		    (dbuf = stmf_alloc_dbuf(task, len, &minsize, 0)) != 0) {
910 
911 			ret = sbd_copy_rdwr(task, laddr, dbuf,
912 			    SBD_CMD_SCSI_READ, 0);
913 			/* done with the backend */
914 			rw_exit(&sl->sl_access_state_lock);
915 			if (ret != 0) {
916 				/* backend error */
917 				stmf_scsilib_send_status(task, STATUS_CHECK,
918 				    STMF_SAA_READ_ERROR);
919 			} else {
920 				/* send along good data */
921 				dbuf->db_relative_offset = 0;
922 				dbuf->db_data_size = len;
923 				dbuf->db_flags = DB_SEND_STATUS_GOOD |
924 				    DB_DIRECTION_TO_RPORT;
925 				/* XXX keep for FW? */
926 				DTRACE_PROBE4(sbd__xfer,
927 				    struct scsi_task *, task,
928 				    struct stmf_data_buf *, dbuf,
929 				    uint64_t, laddr, uint32_t, len);
930 				(void) stmf_xfer_data(task, dbuf,
931 				    STMF_IOF_LU_DONE);
932 			}
933 			return;
934 		}
935 
936 		/* committed to reduced copy */
937 		if (task->task_lu_private) {
938 			scmd = (sbd_cmd_t *)task->task_lu_private;
939 		} else {
940 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
941 			    KM_SLEEP);
942 			task->task_lu_private = scmd;
943 		}
944 		/*
945 		 * Setup scmd to track read progress.
946 		 */
947 		scmd->flags = SBD_SCSI_CMD_ACTIVE;
948 		scmd->cmd_type = SBD_CMD_SCSI_READ;
949 		scmd->nbufs = 0;
950 		scmd->addr = laddr;
951 		scmd->len = len;
952 		scmd->current_ro = 0;
953 
954 		/*
955 		 * Kick-off the read.
956 		 */
957 		sbd_do_sgl_read_xfer(task, scmd, 1);
958 		return;
959 	}
960 
961 	if (initial_dbuf == NULL) {
962 		uint32_t maxsize, minsize, old_minsize;
963 
964 		maxsize = (len > (128*1024)) ? 128*1024 : len;
965 		minsize = maxsize >> 2;
966 		do {
967 			old_minsize = minsize;
968 			initial_dbuf = stmf_alloc_dbuf(task, maxsize,
969 			    &minsize, 0);
970 		} while ((initial_dbuf == NULL) && (old_minsize > minsize) &&
971 		    (minsize >= 512));
972 		if (initial_dbuf == NULL) {
973 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
974 			return;
975 		}
976 	}
977 	dbuf = initial_dbuf;
978 
979 	if ((dbuf->db_buf_size >= len) && fast_path &&
980 	    (dbuf->db_sglist_length == 1)) {
981 		if (sbd_data_read(sl, task, laddr, (uint64_t)len,
982 		    dbuf->db_sglist[0].seg_addr) == STMF_SUCCESS) {
983 			dbuf->db_relative_offset = 0;
984 			dbuf->db_data_size = len;
985 			dbuf->db_flags = DB_SEND_STATUS_GOOD |
986 			    DB_DIRECTION_TO_RPORT;
987 			/* XXX keep for FW? */
988 			DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
989 			    struct stmf_data_buf *, dbuf,
990 			    uint64_t, laddr, uint32_t, len);
991 			(void) stmf_xfer_data(task, dbuf, STMF_IOF_LU_DONE);
992 		} else {
993 			stmf_scsilib_send_status(task, STATUS_CHECK,
994 			    STMF_SAA_READ_ERROR);
995 		}
996 		return;
997 	}
998 
999 	if (task->task_lu_private) {
1000 		scmd = (sbd_cmd_t *)task->task_lu_private;
1001 	} else {
1002 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1003 		task->task_lu_private = scmd;
1004 	}
1005 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1006 	scmd->cmd_type = SBD_CMD_SCSI_READ;
1007 	scmd->nbufs = 1;
1008 	scmd->addr = laddr;
1009 	scmd->len = len;
1010 	scmd->current_ro = 0;
1011 
1012 	sbd_do_read_xfer(task, scmd, dbuf);
1013 }
1014 
1015 void
1016 sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
1017     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1018 {
1019 	uint32_t len;
1020 	int bufs_to_take;
1021 
1022 	if (scmd->len == 0) {
1023 		goto DO_WRITE_XFER_DONE;
1024 	}
1025 
1026 	/* Lets try not to hog all the buffers the port has. */
1027 	bufs_to_take = ((task->task_max_nbufs > 2) &&
1028 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
1029 	    task->task_max_nbufs;
1030 
1031 	if ((dbuf != NULL) &&
1032 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
1033 		/* free current dbuf and allocate a new one */
1034 		stmf_free_dbuf(task, dbuf);
1035 		dbuf = NULL;
1036 	}
1037 	if (scmd->nbufs >= bufs_to_take) {
1038 		goto DO_WRITE_XFER_DONE;
1039 	}
1040 	if (dbuf == NULL) {
1041 		uint32_t maxsize, minsize, old_minsize;
1042 
1043 		maxsize = (scmd->len > (128*1024)) ? 128*1024 :
1044 		    scmd->len;
1045 		minsize = maxsize >> 2;
1046 		do {
1047 			old_minsize = minsize;
1048 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
1049 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
1050 		    (minsize >= 512));
1051 		if (dbuf == NULL) {
1052 			if (scmd->nbufs == 0) {
1053 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1054 				    STMF_ALLOC_FAILURE, NULL);
1055 			}
1056 			return;
1057 		}
1058 	}
1059 
1060 	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
1061 	    scmd->len;
1062 
1063 	dbuf->db_relative_offset = scmd->current_ro;
1064 	dbuf->db_data_size = len;
1065 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1066 	(void) stmf_xfer_data(task, dbuf, 0);
1067 	scmd->nbufs++; /* outstanding port xfers and bufs used */
1068 	scmd->len -= len;
1069 	scmd->current_ro += len;
1070 
1071 	if ((scmd->len != 0) && (scmd->nbufs < bufs_to_take)) {
1072 		sbd_do_write_xfer(task, scmd, NULL, 0);
1073 	}
1074 	return;
1075 
1076 DO_WRITE_XFER_DONE:
1077 	if (dbuf != NULL) {
1078 		stmf_free_dbuf(task, dbuf);
1079 	}
1080 }
1081 
1082 void
1083 sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
1084 {
1085 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1086 	sbd_zvol_io_t *zvio;
1087 	int ret;
1088 	uint32_t xfer_len, max_len, first_len;
1089 	stmf_status_t xstat;
1090 	stmf_data_buf_t *dbuf;
1091 	uint_t nblks;
1092 	uint64_t blksize = sl->sl_blksize;
1093 	uint64_t offset;
1094 	size_t db_private_sz;
1095 	uintptr_t pad;
1096 
1097 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
1098 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
1099 
1100 	/*
1101 	 * Calculate the limits on xfer_len to the minimum of :
1102 	 *    - task limit
1103 	 *    - lun limit
1104 	 *    - sbd global limit if set
1105 	 *    - first xfer limit if set
1106 	 *
1107 	 * First, protect against silly over-ride value
1108 	 */
1109 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
1110 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
1111 		    sbd_max_xfer_len);
1112 		sbd_max_xfer_len = 0;
1113 	}
1114 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
1115 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
1116 		    sbd_1st_xfer_len);
1117 		sbd_1st_xfer_len = 0;
1118 	}
1119 
1120 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
1121 	if (sbd_max_xfer_len)
1122 		max_len = MIN(max_len, sbd_max_xfer_len);
1123 	/*
1124 	 * Special case the first xfer if hints are set.
1125 	 */
1126 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
1127 		/* global over-ride has precedence */
1128 		if (sbd_1st_xfer_len)
1129 			first_len = sbd_1st_xfer_len;
1130 		else
1131 			first_len = task->task_1st_xfer_len;
1132 	} else {
1133 		first_len = 0;
1134 	}
1135 
1136 
1137 	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
1138 
1139 		xfer_len = MIN(max_len, scmd->len);
1140 		if (first_len) {
1141 			xfer_len = MIN(xfer_len, first_len);
1142 			first_len = 0;
1143 		}
1144 		if (xfer_len < scmd->len) {
1145 			/*
1146 			 * Attempt to end xfer on a block boundary.
1147 			 * The only way this does not happen is if the
1148 			 * xfer_len is small enough to stay contained
1149 			 * within the same block.
1150 			 */
1151 			uint64_t xfer_offset, xfer_aligned_end;
1152 
1153 			xfer_offset = scmd->addr + scmd->current_ro;
1154 			xfer_aligned_end =
1155 			    P2ALIGN(xfer_offset+xfer_len, blksize);
1156 			if (xfer_aligned_end > xfer_offset)
1157 				xfer_len = xfer_aligned_end - xfer_offset;
1158 		}
1159 		/*
1160 		 * Allocate object to track the write and reserve
1161 		 * enough space for scatter/gather list.
1162 		 */
1163 		offset = scmd->addr + scmd->current_ro;
1164 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
1165 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
1166 		    (nblks * sizeof (stmf_sglist_ent_t));
1167 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
1168 		    AF_DONTZERO);
1169 
1170 		/*
1171 		 * Setup the dbuf
1172 		 *
1173 		 * XXX Framework does not handle variable length sglists
1174 		 * properly, so setup db_lu_private and db_port_private
1175 		 * fields here. db_stmf_private is properly set for
1176 		 * calls to stmf_free.
1177 		 */
1178 		if (dbuf->db_port_private == NULL) {
1179 			/*
1180 			 * XXX Framework assigns space to PP after db_sglist[0]
1181 			 */
1182 			cmn_err(CE_PANIC, "db_port_private == NULL");
1183 		}
1184 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
1185 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
1186 		dbuf->db_port_private = NULL;
1187 		dbuf->db_buf_size = xfer_len;
1188 		dbuf->db_data_size = xfer_len;
1189 		dbuf->db_relative_offset = scmd->current_ro;
1190 		dbuf->db_sglist_length = (uint16_t)nblks;
1191 		dbuf->db_xfer_status = 0;
1192 		dbuf->db_handle = 0;
1193 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
1194 		    DB_DIRECTION_FROM_RPORT | DB_LU_DATA_BUF);
1195 
1196 		zvio = dbuf->db_lu_private;
1197 		zvio->zvio_offset = offset;
1198 
1199 		/* get the buffers */
1200 		ret = sbd_zvol_alloc_write_bufs(sl, dbuf);
1201 		if (ret != 0) {
1202 			/*
1203 			 * Could not allocate buffers from the backend;
1204 			 * treat it like an IO error.
1205 			 */
1206 			stmf_free(dbuf);
1207 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1208 			if (scmd->nbufs == 0) {
1209 				/*
1210 				 * Nothing queued, so no completions coming
1211 				 */
1212 				stmf_scsilib_send_status(task, STATUS_CHECK,
1213 				    STMF_SAA_WRITE_ERROR);
1214 				rw_exit(&sl->sl_access_state_lock);
1215 			}
1216 			/*
1217 			 * Completions of previous buffers will cleanup.
1218 			 */
1219 			return;
1220 		}
1221 
1222 		/*
1223 		 * Allow PP to do setup
1224 		 */
1225 		xstat = stmf_setup_dbuf(task, dbuf, 0);
1226 		if (xstat != STMF_SUCCESS) {
1227 			/*
1228 			 * This could happen if the driver cannot get the
1229 			 * DDI resources it needs for this request.
1230 			 * If other dbufs are queued, try again when the next
1231 			 * one completes, otherwise give up.
1232 			 */
1233 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1234 			stmf_free(dbuf);
1235 			if (scmd->nbufs > 0) {
1236 				/* completion of previous dbuf will retry */
1237 				return;
1238 			}
1239 			/*
1240 			 * Done with this command.
1241 			 */
1242 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1243 			if (first_xfer)
1244 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1245 			else
1246 				stmf_scsilib_send_status(task, STATUS_CHECK,
1247 				    STMF_SAA_WRITE_ERROR);
1248 			rw_exit(&sl->sl_access_state_lock);
1249 			return;
1250 		}
1251 
1252 		/*
1253 		 * dbuf is now queued on task
1254 		 */
1255 		scmd->nbufs++;
1256 
1257 		xstat = stmf_xfer_data(task, dbuf, 0);
1258 		switch (xstat) {
1259 		case STMF_SUCCESS:
1260 			break;
1261 		case STMF_BUSY:
1262 			/*
1263 			 * The dbuf is queued on the task, but unknown
1264 			 * to the PP, thus no completion will occur.
1265 			 */
1266 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1267 			stmf_teardown_dbuf(task, dbuf);
1268 			stmf_free(dbuf);
1269 			scmd->nbufs--;
1270 			if (scmd->nbufs > 0) {
1271 				/* completion of previous dbuf will retry */
1272 				return;
1273 			}
1274 			/*
1275 			 * Done with this command.
1276 			 */
1277 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1278 			if (first_xfer)
1279 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1280 			else
1281 				stmf_scsilib_send_status(task, STATUS_CHECK,
1282 				    STMF_SAA_WRITE_ERROR);
1283 			rw_exit(&sl->sl_access_state_lock);
1284 			return;
1285 		case STMF_ABORTED:
1286 			/*
1287 			 * Completion code will cleanup.
1288 			 */
1289 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1290 			return;
1291 		}
1292 		/*
1293 		 * Update the xfer progress.
1294 		 */
1295 		scmd->len -= xfer_len;
1296 		scmd->current_ro += xfer_len;
1297 	}
1298 }
1299 
1300 void
1301 sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1302     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1303 {
1304 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1305 	uint64_t laddr;
1306 	uint32_t buflen, iolen;
1307 	int ndx;
1308 
1309 	if (scmd->nbufs > 0) {
1310 		/*
1311 		 * Decrement the count to indicate the port xfer
1312 		 * into the dbuf has completed even though the buf is
1313 		 * still in use here in the LU provider.
1314 		 */
1315 		scmd->nbufs--;
1316 	}
1317 
1318 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1319 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1320 		    dbuf->db_xfer_status, NULL);
1321 		return;
1322 	}
1323 
1324 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1325 		goto WRITE_XFER_DONE;
1326 	}
1327 
1328 	if (scmd->len != 0) {
1329 		/*
1330 		 * Initiate the next port xfer to occur in parallel
1331 		 * with writing this buf.
1332 		 */
1333 		sbd_do_write_xfer(task, scmd, NULL, 0);
1334 	}
1335 
1336 	laddr = scmd->addr + dbuf->db_relative_offset;
1337 
1338 	/*
1339 	 * If this is going to a zvol, use the direct call to
1340 	 * sbd_zvol_copy_{read,write}. The direct call interface is
1341 	 * restricted to PPs that accept sglists, but that is not required.
1342 	 */
1343 	if (sl->sl_flags & SL_CALL_ZVOL &&
1344 	    (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) &&
1345 	    (sbd_zcopy & (4|1))) {
1346 		int commit;
1347 
1348 		commit = (scmd->len == 0 && scmd->nbufs == 0);
1349 		if (sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
1350 		    commit) != STMF_SUCCESS)
1351 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1352 		buflen = dbuf->db_data_size;
1353 	} else {
1354 		for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
1355 		    (ndx < dbuf->db_sglist_length); ndx++) {
1356 			iolen = min(dbuf->db_data_size - buflen,
1357 			    dbuf->db_sglist[ndx].seg_length);
1358 			if (iolen == 0)
1359 				break;
1360 			if (sbd_data_write(sl, task, laddr, (uint64_t)iolen,
1361 			    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
1362 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1363 				break;
1364 			}
1365 			buflen += iolen;
1366 			laddr += (uint64_t)iolen;
1367 		}
1368 	}
1369 	task->task_nbytes_transferred += buflen;
1370 WRITE_XFER_DONE:
1371 	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1372 		stmf_free_dbuf(task, dbuf);
1373 		if (scmd->nbufs)
1374 			return;	/* wait for all buffers to complete */
1375 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1376 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1377 			stmf_scsilib_send_status(task, STATUS_CHECK,
1378 			    STMF_SAA_WRITE_ERROR);
1379 		} else {
1380 			/*
1381 			 * If SYNC_WRITE flag is on then we need to flush
1382 			 * cache before sending status.
1383 			 * Note: this may be a no-op because of how
1384 			 * SL_WRITEBACK_CACHE_DISABLE and
1385 			 * SL_FLUSH_ON_DISABLED_WRITECACHE are set, but not
1386 			 * worth code complexity of checking those in this code
1387 			 * path, SBD_SCSI_CMD_SYNC_WRITE is rarely set.
1388 			 */
1389 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
1390 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
1391 				stmf_scsilib_send_status(task, STATUS_CHECK,
1392 				    STMF_SAA_WRITE_ERROR);
1393 			} else {
1394 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1395 			}
1396 		}
1397 		return;
1398 	}
1399 	sbd_do_write_xfer(task, scmd, dbuf, dbuf_reusable);
1400 }
1401 
1402 /*
1403  * Return true if copy avoidance is beneficial.
1404  */
1405 static int
1406 sbd_zcopy_write_useful(scsi_task_t *task, uint64_t laddr, uint32_t len,
1407     uint64_t blksize)
1408 {
1409 	/*
1410 	 * If there is a global copy threshold over-ride, use it.
1411 	 * Otherwise use the PP value with the caveat that at least
1412 	 * 1/2 the data must avoid being copied to be useful.
1413 	 */
1414 	if (sbd_copy_threshold > 0) {
1415 		return (len >= sbd_copy_threshold);
1416 	} else {
1417 		uint64_t no_copy_span;
1418 
1419 		/* sub-blocksize writes always copy */
1420 		if (len < task->task_copy_threshold || len < blksize)
1421 			return (0);
1422 		/*
1423 		 * Calculate amount of data that will avoid the copy path.
1424 		 * The calculation is only valid if len >= blksize.
1425 		 */
1426 		no_copy_span = P2ALIGN(laddr+len, blksize) -
1427 		    P2ROUNDUP(laddr, blksize);
1428 		return (no_copy_span >= len/2);
1429 	}
1430 }
1431 
1432 void
1433 sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
1434 {
1435 	uint64_t lba, laddr;
1436 	uint32_t len;
1437 	uint8_t op = task->task_cdb[0], do_immediate_data = 0;
1438 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1439 	sbd_cmd_t *scmd;
1440 	stmf_data_buf_t *dbuf;
1441 	uint8_t	sync_wr_flag = 0;
1442 
1443 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1444 		stmf_scsilib_send_status(task, STATUS_CHECK,
1445 		    STMF_SAA_WRITE_PROTECTED);
1446 		return;
1447 	}
1448 	if (op == SCMD_WRITE) {
1449 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
1450 		len = (uint32_t)task->task_cdb[4];
1451 
1452 		if (len == 0) {
1453 			len = 256;
1454 		}
1455 	} else if (op == SCMD_WRITE_G1) {
1456 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1457 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1458 	} else if (op == SCMD_WRITE_G5) {
1459 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1460 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1461 	} else if (op == SCMD_WRITE_G4) {
1462 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1463 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1464 	} else if (op == SCMD_WRITE_VERIFY) {
1465 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1466 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1467 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1468 	} else if (op == SCMD_WRITE_VERIFY_G5) {
1469 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1470 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1471 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1472 	} else if (op == SCMD_WRITE_VERIFY_G4) {
1473 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1474 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1475 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1476 	} else {
1477 		stmf_scsilib_send_status(task, STATUS_CHECK,
1478 		    STMF_SAA_INVALID_OPCODE);
1479 		return;
1480 	}
1481 
1482 	laddr = lba << sl->sl_data_blocksize_shift;
1483 	len <<= sl->sl_data_blocksize_shift;
1484 
1485 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
1486 		stmf_scsilib_send_status(task, STATUS_CHECK,
1487 		    STMF_SAA_LBA_OUT_OF_RANGE);
1488 		return;
1489 	}
1490 
1491 	task->task_cmd_xfer_length = len;
1492 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1493 		task->task_expected_xfer_length = len;
1494 	}
1495 
1496 	len = (len > task->task_expected_xfer_length) ?
1497 	    task->task_expected_xfer_length : len;
1498 
1499 	if (len == 0) {
1500 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1501 		return;
1502 	}
1503 
1504 	if (sbd_zcopy & (4|1) &&		/* Debug switch */
1505 	    initial_dbuf == NULL &&		/* No PP buf passed in */
1506 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
1507 	    (task->task_additional_flags &
1508 	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
1509 	    sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize)) {
1510 
1511 		/*
1512 		 * XXX Note that disallowing initial_dbuf will eliminate
1513 		 * iSCSI from participating. For small writes, that is
1514 		 * probably ok. For large writes, it may be best to just
1515 		 * copy the data from the initial dbuf and use zcopy for
1516 		 * the rest.
1517 		 */
1518 		rw_enter(&sl->sl_access_state_lock, RW_READER);
1519 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
1520 			rw_exit(&sl->sl_access_state_lock);
1521 			stmf_scsilib_send_status(task, STATUS_CHECK,
1522 			    STMF_SAA_READ_ERROR);
1523 			return;
1524 		}
1525 		/*
1526 		 * Setup scmd to track the write progress.
1527 		 */
1528 		if (task->task_lu_private) {
1529 			scmd = (sbd_cmd_t *)task->task_lu_private;
1530 		} else {
1531 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1532 			    KM_SLEEP);
1533 			task->task_lu_private = scmd;
1534 		}
1535 		scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
1536 		scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1537 		scmd->nbufs = 0;
1538 		scmd->addr = laddr;
1539 		scmd->len = len;
1540 		scmd->current_ro = 0;
1541 		sbd_do_sgl_write_xfer(task, scmd, 1);
1542 		return;
1543 	}
1544 
1545 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
1546 		if (initial_dbuf->db_data_size > len) {
1547 			if (initial_dbuf->db_data_size >
1548 			    task->task_expected_xfer_length) {
1549 				/* protocol error */
1550 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1551 				    STMF_INVALID_ARG, NULL);
1552 				return;
1553 			}
1554 			initial_dbuf->db_data_size = len;
1555 		}
1556 		do_immediate_data = 1;
1557 	}
1558 	dbuf = initial_dbuf;
1559 
1560 	if (task->task_lu_private) {
1561 		scmd = (sbd_cmd_t *)task->task_lu_private;
1562 	} else {
1563 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1564 		task->task_lu_private = scmd;
1565 	}
1566 	scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
1567 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1568 	scmd->nbufs = 0;
1569 	scmd->addr = laddr;
1570 	scmd->len = len;
1571 	scmd->current_ro = 0;
1572 
1573 	if (do_immediate_data) {
1574 		/*
1575 		 * Account for data passed in this write command
1576 		 */
1577 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
1578 		scmd->len -= dbuf->db_data_size;
1579 		scmd->current_ro += dbuf->db_data_size;
1580 		dbuf->db_xfer_status = STMF_SUCCESS;
1581 		sbd_handle_write_xfer_completion(task, scmd, dbuf, 0);
1582 	} else {
1583 		sbd_do_write_xfer(task, scmd, dbuf, 0);
1584 	}
1585 }
1586 
1587 /*
1588  * Utility routine to handle small non performance data transfers to the
1589  * initiators. dbuf is an initial data buf (if any), 'p' points to a data
1590  * buffer which is source of data for transfer, cdb_xfer_size is the
1591  * transfer size based on CDB, cmd_xfer_size is the actual amount of data
1592  * which this command would transfer (the size of data pointed to by 'p').
1593  */
1594 void
1595 sbd_handle_short_read_transfers(scsi_task_t *task, stmf_data_buf_t *dbuf,
1596     uint8_t *p, uint32_t cdb_xfer_size, uint32_t cmd_xfer_size)
1597 {
1598 	uint32_t bufsize, ndx;
1599 	sbd_cmd_t *scmd;
1600 
1601 	cmd_xfer_size = min(cmd_xfer_size, cdb_xfer_size);
1602 
1603 	task->task_cmd_xfer_length = cmd_xfer_size;
1604 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1605 		task->task_expected_xfer_length = cmd_xfer_size;
1606 	} else {
1607 		cmd_xfer_size = min(cmd_xfer_size,
1608 		    task->task_expected_xfer_length);
1609 	}
1610 
1611 	if (cmd_xfer_size == 0) {
1612 		stmf_scsilib_send_status(task, STATUS_CHECK,
1613 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1614 		return;
1615 	}
1616 	if (dbuf == NULL) {
1617 		uint32_t minsize = cmd_xfer_size;
1618 
1619 		dbuf = stmf_alloc_dbuf(task, cmd_xfer_size, &minsize, 0);
1620 	}
1621 	if (dbuf == NULL) {
1622 		stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1623 		return;
1624 	}
1625 
1626 	for (bufsize = 0, ndx = 0; bufsize < cmd_xfer_size; ndx++) {
1627 		uint8_t *d;
1628 		uint32_t s;
1629 
1630 		d = dbuf->db_sglist[ndx].seg_addr;
1631 		s = min((cmd_xfer_size - bufsize),
1632 		    dbuf->db_sglist[ndx].seg_length);
1633 		bcopy(p+bufsize, d, s);
1634 		bufsize += s;
1635 	}
1636 	dbuf->db_relative_offset = 0;
1637 	dbuf->db_data_size = cmd_xfer_size;
1638 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
1639 
1640 	if (task->task_lu_private == NULL) {
1641 		task->task_lu_private =
1642 		    kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1643 	}
1644 	scmd = (sbd_cmd_t *)task->task_lu_private;
1645 
1646 	scmd->cmd_type = SBD_CMD_SMALL_READ;
1647 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1648 	(void) stmf_xfer_data(task, dbuf, 0);
1649 }
1650 
1651 void
1652 sbd_handle_short_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1653 				struct stmf_data_buf *dbuf)
1654 {
1655 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1656 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1657 		    dbuf->db_xfer_status, NULL);
1658 		return;
1659 	}
1660 	task->task_nbytes_transferred = dbuf->db_data_size;
1661 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1662 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1663 }
1664 
1665 void
1666 sbd_handle_short_write_transfers(scsi_task_t *task,
1667     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size)
1668 {
1669 	sbd_cmd_t *scmd;
1670 
1671 	task->task_cmd_xfer_length = cdb_xfer_size;
1672 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1673 		task->task_expected_xfer_length = cdb_xfer_size;
1674 	} else {
1675 		cdb_xfer_size = min(cdb_xfer_size,
1676 		    task->task_expected_xfer_length);
1677 	}
1678 
1679 	if (cdb_xfer_size == 0) {
1680 		stmf_scsilib_send_status(task, STATUS_CHECK,
1681 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1682 		return;
1683 	}
1684 	if (task->task_lu_private == NULL) {
1685 		task->task_lu_private = kmem_zalloc(sizeof (sbd_cmd_t),
1686 		    KM_SLEEP);
1687 	} else {
1688 		bzero(task->task_lu_private, sizeof (sbd_cmd_t));
1689 	}
1690 	scmd = (sbd_cmd_t *)task->task_lu_private;
1691 	scmd->cmd_type = SBD_CMD_SMALL_WRITE;
1692 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1693 	scmd->len = cdb_xfer_size;
1694 	if (dbuf == NULL) {
1695 		uint32_t minsize = cdb_xfer_size;
1696 
1697 		dbuf = stmf_alloc_dbuf(task, cdb_xfer_size, &minsize, 0);
1698 		if (dbuf == NULL) {
1699 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1700 			    STMF_ALLOC_FAILURE, NULL);
1701 			return;
1702 		}
1703 		dbuf->db_data_size = cdb_xfer_size;
1704 		dbuf->db_relative_offset = 0;
1705 		dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1706 		(void) stmf_xfer_data(task, dbuf, 0);
1707 	} else {
1708 		if (dbuf->db_data_size < cdb_xfer_size) {
1709 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1710 			    STMF_ABORTED, NULL);
1711 			return;
1712 		}
1713 		dbuf->db_data_size = cdb_xfer_size;
1714 		sbd_handle_short_write_xfer_completion(task, dbuf);
1715 	}
1716 }
1717 
1718 void
1719 sbd_handle_short_write_xfer_completion(scsi_task_t *task,
1720     stmf_data_buf_t *dbuf)
1721 {
1722 	sbd_cmd_t *scmd;
1723 	stmf_status_t st_ret;
1724 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1725 
1726 	/*
1727 	 * For now lets assume we will get only one sglist element
1728 	 * for short writes. If that ever changes, we should allocate
1729 	 * a local buffer and copy all the sg elements to one linear space.
1730 	 */
1731 	if ((dbuf->db_xfer_status != STMF_SUCCESS) ||
1732 	    (dbuf->db_sglist_length > 1)) {
1733 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1734 		    dbuf->db_xfer_status, NULL);
1735 		return;
1736 	}
1737 
1738 	task->task_nbytes_transferred = dbuf->db_data_size;
1739 	scmd = (sbd_cmd_t *)task->task_lu_private;
1740 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1741 
1742 	/* Lets find out who to call */
1743 	switch (task->task_cdb[0]) {
1744 	case SCMD_MODE_SELECT:
1745 	case SCMD_MODE_SELECT_G1:
1746 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1747 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1748 			if (st_ret != STMF_SUCCESS) {
1749 				stmf_scsilib_send_status(task, STATUS_CHECK,
1750 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1751 			}
1752 		} else {
1753 			sbd_handle_mode_select_xfer(task,
1754 			    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1755 		}
1756 		break;
1757 	case SCMD_PERSISTENT_RESERVE_OUT:
1758 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1759 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1760 			if (st_ret != STMF_SUCCESS) {
1761 				stmf_scsilib_send_status(task, STATUS_CHECK,
1762 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1763 			}
1764 		} else {
1765 			sbd_handle_pgr_out_data(task, dbuf);
1766 		}
1767 		break;
1768 	default:
1769 		/* This should never happen */
1770 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1771 		    STMF_ABORTED, NULL);
1772 	}
1773 }
1774 
1775 void
1776 sbd_handle_read_capacity(struct scsi_task *task,
1777     struct stmf_data_buf *initial_dbuf)
1778 {
1779 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1780 	uint32_t cdb_len;
1781 	uint8_t p[32];
1782 	uint64_t s;
1783 	uint16_t blksize;
1784 
1785 	s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
1786 	s--;
1787 	blksize = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1788 
1789 	switch (task->task_cdb[0]) {
1790 	case SCMD_READ_CAPACITY:
1791 		if (s & 0xffffffff00000000ull) {
1792 			p[0] = p[1] = p[2] = p[3] = 0xFF;
1793 		} else {
1794 			p[0] = (s >> 24) & 0xff;
1795 			p[1] = (s >> 16) & 0xff;
1796 			p[2] = (s >> 8) & 0xff;
1797 			p[3] = s & 0xff;
1798 		}
1799 		p[4] = 0; p[5] = 0;
1800 		p[6] = (blksize >> 8) & 0xff;
1801 		p[7] = blksize & 0xff;
1802 		sbd_handle_short_read_transfers(task, initial_dbuf, p, 8, 8);
1803 		break;
1804 
1805 	case SCMD_SVC_ACTION_IN_G4:
1806 		cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1807 		bzero(p, 32);
1808 		p[0] = (s >> 56) & 0xff;
1809 		p[1] = (s >> 48) & 0xff;
1810 		p[2] = (s >> 40) & 0xff;
1811 		p[3] = (s >> 32) & 0xff;
1812 		p[4] = (s >> 24) & 0xff;
1813 		p[5] = (s >> 16) & 0xff;
1814 		p[6] = (s >> 8) & 0xff;
1815 		p[7] = s & 0xff;
1816 		p[10] = (blksize >> 8) & 0xff;
1817 		p[11] = blksize & 0xff;
1818 		sbd_handle_short_read_transfers(task, initial_dbuf, p,
1819 		    cdb_len, 32);
1820 		break;
1821 	}
1822 }
1823 
1824 void
1825 sbd_calc_geometry(uint64_t s, uint16_t blksize, uint8_t *nsectors,
1826     uint8_t *nheads, uint32_t *ncyl)
1827 {
1828 	if (s < (4ull * 1024ull * 1024ull * 1024ull)) {
1829 		*nsectors = 32;
1830 		*nheads = 8;
1831 	} else {
1832 		*nsectors = 254;
1833 		*nheads = 254;
1834 	}
1835 	*ncyl = s / ((uint64_t)blksize * (uint64_t)(*nsectors) *
1836 	    (uint64_t)(*nheads));
1837 }
1838 
1839 void
1840 sbd_handle_mode_sense(struct scsi_task *task,
1841     struct stmf_data_buf *initial_dbuf, uint8_t *buf)
1842 {
1843 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1844 	uint32_t cmd_size, n;
1845 	uint8_t *cdb;
1846 	uint32_t ncyl;
1847 	uint8_t nsectors, nheads;
1848 	uint8_t page, ctrl, header_size, pc_valid;
1849 	uint16_t nbytes;
1850 	uint8_t *p;
1851 	uint64_t s = sl->sl_lu_size;
1852 	uint32_t dev_spec_param_offset;
1853 
1854 	p = buf;	/* buf is assumed to be zeroed out and large enough */
1855 	n = 0;
1856 	cdb = &task->task_cdb[0];
1857 	page = cdb[2] & 0x3F;
1858 	ctrl = (cdb[2] >> 6) & 3;
1859 	cmd_size = (cdb[0] == SCMD_MODE_SENSE) ? cdb[4] :
1860 	    READ_SCSI16(&cdb[7], uint32_t);
1861 
1862 	if (cdb[0] == SCMD_MODE_SENSE) {
1863 		header_size = 4;
1864 		dev_spec_param_offset = 2;
1865 	} else {
1866 		header_size = 8;
1867 		dev_spec_param_offset = 3;
1868 	}
1869 
1870 	/* Now validate the command */
1871 	if ((cdb[2] == 0) || (page == MODEPAGE_ALLPAGES) || (page == 0x08) ||
1872 	    (page == 0x0A) || (page == 0x03) || (page == 0x04)) {
1873 		pc_valid = 1;
1874 	} else {
1875 		pc_valid = 0;
1876 	}
1877 	if ((cmd_size < header_size) || (pc_valid == 0)) {
1878 		stmf_scsilib_send_status(task, STATUS_CHECK,
1879 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1880 		return;
1881 	}
1882 
1883 	/* We will update the length in the mode header at the end */
1884 
1885 	/* Block dev device specific param in mode param header has wp bit */
1886 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1887 		p[n + dev_spec_param_offset] = BIT_7;
1888 	}
1889 	n += header_size;
1890 	/* We are not going to return any block descriptor */
1891 
1892 	nbytes = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1893 	sbd_calc_geometry(s, nbytes, &nsectors, &nheads, &ncyl);
1894 
1895 	if ((page == 0x03) || (page == MODEPAGE_ALLPAGES)) {
1896 		p[n] = 0x03;
1897 		p[n+1] = 0x16;
1898 		if (ctrl != 1) {
1899 			p[n + 11] = nsectors;
1900 			p[n + 12] = nbytes >> 8;
1901 			p[n + 13] = nbytes & 0xff;
1902 			p[n + 20] = 0x80;
1903 		}
1904 		n += 24;
1905 	}
1906 	if ((page == 0x04) || (page == MODEPAGE_ALLPAGES)) {
1907 		p[n] = 0x04;
1908 		p[n + 1] = 0x16;
1909 		if (ctrl != 1) {
1910 			p[n + 2] = ncyl >> 16;
1911 			p[n + 3] = ncyl >> 8;
1912 			p[n + 4] = ncyl & 0xff;
1913 			p[n + 5] = nheads;
1914 			p[n + 20] = 0x15;
1915 			p[n + 21] = 0x18;
1916 		}
1917 		n += 24;
1918 	}
1919 	if ((page == MODEPAGE_CACHING) || (page == MODEPAGE_ALLPAGES)) {
1920 		struct mode_caching *mode_caching_page;
1921 
1922 		mode_caching_page = (struct mode_caching *)&p[n];
1923 
1924 		mode_caching_page->mode_page.code = MODEPAGE_CACHING;
1925 		mode_caching_page->mode_page.ps = 1; /* A saveable page */
1926 		mode_caching_page->mode_page.length = 0x12;
1927 
1928 		switch (ctrl) {
1929 		case (0):
1930 			/* Current */
1931 			if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) == 0) {
1932 				mode_caching_page->wce = 1;
1933 			}
1934 			break;
1935 
1936 		case (1):
1937 			/* Changeable */
1938 			if ((sl->sl_flags &
1939 			    SL_WRITEBACK_CACHE_SET_UNSUPPORTED) == 0) {
1940 				mode_caching_page->wce = 1;
1941 			}
1942 			break;
1943 
1944 		default:
1945 			if ((sl->sl_flags &
1946 			    SL_SAVED_WRITE_CACHE_DISABLE) == 0) {
1947 				mode_caching_page->wce = 1;
1948 			}
1949 			break;
1950 		}
1951 		n += (sizeof (struct mode_page) +
1952 		    mode_caching_page->mode_page.length);
1953 	}
1954 	if ((page == MODEPAGE_CTRL_MODE) || (page == MODEPAGE_ALLPAGES)) {
1955 		struct mode_control_scsi3 *mode_control_page;
1956 
1957 		mode_control_page = (struct mode_control_scsi3 *)&p[n];
1958 
1959 		mode_control_page->mode_page.code = MODEPAGE_CTRL_MODE;
1960 		mode_control_page->mode_page.length =
1961 		    PAGELENGTH_MODE_CONTROL_SCSI3;
1962 		if (ctrl != 1) {
1963 			/* If not looking for changeable values, report this. */
1964 			mode_control_page->que_mod = CTRL_QMOD_UNRESTRICT;
1965 		}
1966 		n += (sizeof (struct mode_page) +
1967 		    mode_control_page->mode_page.length);
1968 	}
1969 
1970 	if (cdb[0] == SCMD_MODE_SENSE) {
1971 		if (n > 255) {
1972 			stmf_scsilib_send_status(task, STATUS_CHECK,
1973 			    STMF_SAA_INVALID_FIELD_IN_CDB);
1974 			return;
1975 		}
1976 		/*
1977 		 * Mode parameter header length doesn't include the number
1978 		 * of bytes in the length field, so adjust the count.
1979 		 * Byte count minus header length field size.
1980 		 */
1981 		buf[0] = (n - 1) & 0xff;
1982 	} else {
1983 		/* Byte count minus header length field size. */
1984 		buf[1] = (n - 2) & 0xff;
1985 		buf[0] = ((n - 2) >> 8) & 0xff;
1986 	}
1987 
1988 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
1989 	    cmd_size, n);
1990 }
1991 
1992 void
1993 sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf)
1994 {
1995 	uint32_t cmd_xfer_len;
1996 
1997 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
1998 		cmd_xfer_len = (uint32_t)task->task_cdb[4];
1999 	} else {
2000 		cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2001 	}
2002 
2003 	if ((task->task_cdb[1] & 0xFE) != 0x10) {
2004 		stmf_scsilib_send_status(task, STATUS_CHECK,
2005 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2006 		return;
2007 	}
2008 
2009 	if (cmd_xfer_len == 0) {
2010 		/* zero byte mode selects are allowed */
2011 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2012 		return;
2013 	}
2014 
2015 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2016 }
2017 
2018 void
2019 sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2020 {
2021 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2022 	sbd_it_data_t *it;
2023 	int hdr_len, bd_len;
2024 	sbd_status_t sret;
2025 	int i;
2026 
2027 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2028 		hdr_len = 4;
2029 	} else {
2030 		hdr_len = 8;
2031 	}
2032 
2033 	if (buflen < hdr_len)
2034 		goto mode_sel_param_len_err;
2035 
2036 	bd_len = hdr_len == 4 ? buf[3] : READ_SCSI16(&buf[6], int);
2037 
2038 	if (buflen < (hdr_len + bd_len + 2))
2039 		goto mode_sel_param_len_err;
2040 
2041 	buf += hdr_len + bd_len;
2042 	buflen -= hdr_len + bd_len;
2043 
2044 	if ((buf[0] != 8) || (buflen != ((uint32_t)buf[1] + 2))) {
2045 		goto mode_sel_param_len_err;
2046 	}
2047 
2048 	if (buf[2] & 0xFB) {
2049 		goto mode_sel_param_field_err;
2050 	}
2051 
2052 	for (i = 3; i < (buf[1] + 2); i++) {
2053 		if (buf[i]) {
2054 			goto mode_sel_param_field_err;
2055 		}
2056 	}
2057 
2058 	sret = SBD_SUCCESS;
2059 
2060 	/* All good. Lets handle the write cache change, if any */
2061 	if (buf[2] & BIT_2) {
2062 		sret = sbd_wcd_set(0, sl);
2063 	} else {
2064 		sret = sbd_wcd_set(1, sl);
2065 	}
2066 
2067 	if (sret != SBD_SUCCESS) {
2068 		stmf_scsilib_send_status(task, STATUS_CHECK,
2069 		    STMF_SAA_WRITE_ERROR);
2070 		return;
2071 	}
2072 
2073 	/* set on the device passed, now set the flags */
2074 	mutex_enter(&sl->sl_lock);
2075 	if (buf[2] & BIT_2) {
2076 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
2077 	} else {
2078 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2079 	}
2080 
2081 	for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2082 		if (it == task->task_lu_itl_handle)
2083 			continue;
2084 		it->sbd_it_ua_conditions |= SBD_UA_MODE_PARAMETERS_CHANGED;
2085 	}
2086 
2087 	if (task->task_cdb[1] & 1) {
2088 		if (buf[2] & BIT_2) {
2089 			sl->sl_flags &= ~SL_SAVED_WRITE_CACHE_DISABLE;
2090 		} else {
2091 			sl->sl_flags |= SL_SAVED_WRITE_CACHE_DISABLE;
2092 		}
2093 		mutex_exit(&sl->sl_lock);
2094 		sret = sbd_write_lu_info(sl);
2095 	} else {
2096 		mutex_exit(&sl->sl_lock);
2097 	}
2098 	if (sret == SBD_SUCCESS) {
2099 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2100 	} else {
2101 		stmf_scsilib_send_status(task, STATUS_CHECK,
2102 		    STMF_SAA_WRITE_ERROR);
2103 	}
2104 	return;
2105 
2106 mode_sel_param_len_err:
2107 	stmf_scsilib_send_status(task, STATUS_CHECK,
2108 	    STMF_SAA_PARAM_LIST_LENGTH_ERROR);
2109 	return;
2110 mode_sel_param_field_err:
2111 	stmf_scsilib_send_status(task, STATUS_CHECK,
2112 	    STMF_SAA_INVALID_FIELD_IN_PARAM_LIST);
2113 }
2114 
2115 /*
2116  * Command support added from SPC-4 r24
2117  * Supports info type 0, 2, 127
2118  */
2119 void
2120 sbd_handle_identifying_info(struct scsi_task *task,
2121     stmf_data_buf_t *initial_dbuf)
2122 {
2123 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2124 	uint8_t *cdb;
2125 	uint32_t cmd_size;
2126 	uint32_t param_len;
2127 	uint32_t xfer_size;
2128 	uint8_t info_type;
2129 	uint8_t *buf, *p;
2130 
2131 	cdb = &task->task_cdb[0];
2132 	cmd_size = READ_SCSI32(&cdb[6], uint32_t);
2133 	info_type = cdb[10]>>1;
2134 
2135 	/* Validate the command */
2136 	if (cmd_size < 4) {
2137 		stmf_scsilib_send_status(task, STATUS_CHECK,
2138 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2139 		return;
2140 	}
2141 
2142 	p = buf = kmem_zalloc(260, KM_SLEEP);
2143 
2144 	switch (info_type) {
2145 		case 0:
2146 			/*
2147 			 * No value is supplied but this info type
2148 			 * is mandatory.
2149 			 */
2150 			xfer_size = 4;
2151 			break;
2152 		case 2:
2153 			mutex_enter(&sl->sl_lock);
2154 			param_len = strlcpy((char *)(p+4), sl->sl_alias, 256);
2155 			mutex_exit(&sl->sl_lock);
2156 			/* text info must be null terminated */
2157 			if (++param_len > 256)
2158 				param_len = 256;
2159 			SCSI_WRITE16(p+2, param_len);
2160 			xfer_size = param_len + 4;
2161 			break;
2162 		case 127:
2163 			/* 0 and 2 descriptor supported */
2164 			SCSI_WRITE16(p+2, 8); /* set param length */
2165 			p += 8;
2166 			*p = 4; /* set type to 2 (7 hi bits) */
2167 			p += 2;
2168 			SCSI_WRITE16(p, 256); /* 256 max length */
2169 			xfer_size = 12;
2170 			break;
2171 		default:
2172 			stmf_scsilib_send_status(task, STATUS_CHECK,
2173 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2174 			kmem_free(buf, 260);
2175 			return;
2176 	}
2177 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2178 	    cmd_size, xfer_size);
2179 	kmem_free(buf, 260);
2180 }
2181 
2182 /*
2183  * This function parse through a string, passed to it as a pointer to a string,
2184  * by adjusting the pointer to the first non-space character and returns
2185  * the count/length of the first bunch of non-space characters. Multiple
2186  * Management URLs are stored as a space delimited string in sl_mgmt_url
2187  * field of sbd_lu_t. This function is used to retrieve one url at a time.
2188  *
2189  * i/p : pointer to pointer to a url string
2190  * o/p : Adjust the pointer to the url to the first non white character
2191  *       and returns the length of the URL
2192  */
2193 uint16_t
2194 sbd_parse_mgmt_url(char **url_addr) {
2195 	uint16_t url_length = 0;
2196 	char *url;
2197 	url = *url_addr;
2198 
2199 	while (*url != '\0') {
2200 		if (*url == ' ' || *url == '\t' || *url == '\n') {
2201 			(*url_addr)++;
2202 			url = *url_addr;
2203 		} else {
2204 			break;
2205 		}
2206 	}
2207 
2208 	while (*url != '\0') {
2209 		if (*url == ' ' || *url == '\t' ||
2210 		    *url == '\n' || *url == '\0') {
2211 			break;
2212 		}
2213 		url++;
2214 		url_length++;
2215 	}
2216 	return (url_length);
2217 }
2218 
2219 void
2220 sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
2221 {
2222 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2223 	uint8_t *cdbp = (uint8_t *)&task->task_cdb[0];
2224 	uint8_t *p;
2225 	uint8_t byte0;
2226 	uint8_t page_length;
2227 	uint16_t bsize = 512;
2228 	uint16_t cmd_size;
2229 	uint32_t xfer_size = 4;
2230 	uint32_t mgmt_url_size = 0;
2231 	char *mgmt_url = NULL;
2232 
2233 
2234 	byte0 = DTYPE_DIRECT;
2235 	/*
2236 	 * Basic protocol checks.
2237 	 */
2238 
2239 	if ((((cdbp[1] & 1) == 0) && cdbp[2]) || cdbp[5]) {
2240 		stmf_scsilib_send_status(task, STATUS_CHECK,
2241 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2242 		return;
2243 	}
2244 
2245 	/*
2246 	 * Zero byte allocation length is not an error.  Just
2247 	 * return success.
2248 	 */
2249 
2250 	cmd_size = (((uint16_t)cdbp[3]) << 8) | cdbp[4];
2251 
2252 	if (cmd_size == 0) {
2253 		task->task_cmd_xfer_length = 0;
2254 		if (task->task_additional_flags &
2255 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2256 			task->task_expected_xfer_length = 0;
2257 		}
2258 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2259 		return;
2260 	}
2261 
2262 	/*
2263 	 * Standard inquiry
2264 	 */
2265 
2266 	if ((cdbp[1] & 1) == 0) {
2267 		int	i;
2268 		struct scsi_inquiry *inq;
2269 
2270 		p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2271 		inq = (struct scsi_inquiry *)p;
2272 
2273 		page_length = 69;
2274 		xfer_size = page_length + 5;
2275 
2276 		inq->inq_dtype = DTYPE_DIRECT;
2277 		inq->inq_ansi = 5;	/* SPC-3 */
2278 		inq->inq_hisup = 1;
2279 		inq->inq_rdf = 2;	/* Response data format for SPC-3 */
2280 		inq->inq_len = page_length;
2281 
2282 		inq->inq_tpgs = TPGS_FAILOVER_IMPLICIT;
2283 		inq->inq_cmdque = 1;
2284 
2285 		if (sl->sl_flags & SL_VID_VALID) {
2286 			bcopy(sl->sl_vendor_id, inq->inq_vid, 8);
2287 		} else {
2288 			bcopy(sbd_vendor_id, inq->inq_vid, 8);
2289 		}
2290 
2291 		if (sl->sl_flags & SL_PID_VALID) {
2292 			bcopy(sl->sl_product_id, inq->inq_pid, 16);
2293 		} else {
2294 			bcopy(sbd_product_id, inq->inq_pid, 16);
2295 		}
2296 
2297 		if (sl->sl_flags & SL_REV_VALID) {
2298 			bcopy(sl->sl_revision, inq->inq_revision, 4);
2299 		} else {
2300 			bcopy(sbd_revision, inq->inq_revision, 4);
2301 		}
2302 
2303 		/* Adding Version Descriptors */
2304 		i = 0;
2305 		/* SAM-3 no version */
2306 		inq->inq_vd[i].inq_vd_msb = 0x00;
2307 		inq->inq_vd[i].inq_vd_lsb = 0x60;
2308 		i++;
2309 
2310 		/* transport */
2311 		switch (task->task_lport->lport_id->protocol_id) {
2312 		case PROTOCOL_FIBRE_CHANNEL:
2313 			inq->inq_vd[i].inq_vd_msb = 0x09;
2314 			inq->inq_vd[i].inq_vd_lsb = 0x00;
2315 			i++;
2316 			break;
2317 
2318 		case PROTOCOL_PARALLEL_SCSI:
2319 		case PROTOCOL_SSA:
2320 		case PROTOCOL_IEEE_1394:
2321 			/* Currently no claims of conformance */
2322 			break;
2323 
2324 		case PROTOCOL_SRP:
2325 			inq->inq_vd[i].inq_vd_msb = 0x09;
2326 			inq->inq_vd[i].inq_vd_lsb = 0x40;
2327 			i++;
2328 			break;
2329 
2330 		case PROTOCOL_iSCSI:
2331 			inq->inq_vd[i].inq_vd_msb = 0x09;
2332 			inq->inq_vd[i].inq_vd_lsb = 0x60;
2333 			i++;
2334 			break;
2335 
2336 		case PROTOCOL_SAS:
2337 		case PROTOCOL_ADT:
2338 		case PROTOCOL_ATAPI:
2339 		default:
2340 			/* Currently no claims of conformance */
2341 			break;
2342 		}
2343 
2344 		/* SPC-3 no version */
2345 		inq->inq_vd[i].inq_vd_msb = 0x03;
2346 		inq->inq_vd[i].inq_vd_lsb = 0x00;
2347 		i++;
2348 
2349 		/* SBC-2 no version */
2350 		inq->inq_vd[i].inq_vd_msb = 0x03;
2351 		inq->inq_vd[i].inq_vd_lsb = 0x20;
2352 
2353 		sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2354 		    min(cmd_size, xfer_size));
2355 		kmem_free(p, bsize);
2356 
2357 		return;
2358 	}
2359 
2360 	rw_enter(&sbd_global_prop_lock, RW_READER);
2361 	if (sl->sl_mgmt_url) {
2362 		mgmt_url_size = strlen(sl->sl_mgmt_url);
2363 		mgmt_url = sl->sl_mgmt_url;
2364 	} else if (sbd_mgmt_url) {
2365 		mgmt_url_size = strlen(sbd_mgmt_url);
2366 		mgmt_url = sbd_mgmt_url;
2367 	}
2368 
2369 	/*
2370 	 * EVPD handling
2371 	 */
2372 
2373 	/* Default 512 bytes may not be enough, increase bsize if necessary */
2374 	if (cdbp[2] == 0x83 || cdbp[2] == 0x85) {
2375 		if (bsize <  cmd_size)
2376 			bsize = cmd_size;
2377 	}
2378 	p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2379 
2380 	switch (cdbp[2]) {
2381 	case 0x00:
2382 		page_length = 4 + (mgmt_url_size ? 1 : 0);
2383 
2384 		p[0] = byte0;
2385 		p[3] = page_length;
2386 		/* Supported VPD pages in ascending order */
2387 		{
2388 			uint8_t i = 5;
2389 
2390 			p[i++] = 0x80;
2391 			p[i++] = 0x83;
2392 			if (mgmt_url_size != 0)
2393 				p[i++] = 0x85;
2394 			p[i++] = 0x86;
2395 		}
2396 		xfer_size = page_length + 4;
2397 		break;
2398 
2399 	case 0x80:
2400 		if (sl->sl_serial_no_size) {
2401 			page_length = sl->sl_serial_no_size;
2402 			bcopy(sl->sl_serial_no, p + 4, sl->sl_serial_no_size);
2403 		} else {
2404 			/* if no serial num is specified set 4 spaces */
2405 			page_length = 4;
2406 			bcopy("    ", p + 4, 4);
2407 		}
2408 		p[0] = byte0;
2409 		p[1] = 0x80;
2410 		p[3] = page_length;
2411 		xfer_size = page_length + 4;
2412 		break;
2413 
2414 	case 0x83:
2415 		xfer_size = stmf_scsilib_prepare_vpd_page83(task, p,
2416 		    bsize, byte0, STMF_VPD_LU_ID|STMF_VPD_TARGET_ID|
2417 		    STMF_VPD_TP_GROUP|STMF_VPD_RELATIVE_TP_ID);
2418 		break;
2419 
2420 	case 0x85:
2421 		if (mgmt_url_size == 0) {
2422 			stmf_scsilib_send_status(task, STATUS_CHECK,
2423 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2424 			goto err_done;
2425 		}
2426 		{
2427 			uint16_t idx, newidx, sz, url_size;
2428 			char *url;
2429 
2430 			p[0] = byte0;
2431 			p[1] = 0x85;
2432 
2433 			idx = 4;
2434 			url = mgmt_url;
2435 			url_size = sbd_parse_mgmt_url(&url);
2436 			/* Creating Network Service Descriptors */
2437 			while (url_size != 0) {
2438 				/* Null terminated and 4 Byte aligned */
2439 				sz = url_size + 1;
2440 				sz += (sz % 4) ? 4 - (sz % 4) : 0;
2441 				newidx = idx + sz + 4;
2442 
2443 				if (newidx < bsize) {
2444 					/*
2445 					 * SPC-3r23 : Table 320  (Sec 7.6.5)
2446 					 * (Network service descriptor format
2447 					 *
2448 					 * Note: Hard coding service type as
2449 					 * "Storage Configuration Service".
2450 					 */
2451 					p[idx] = 1;
2452 					SCSI_WRITE16(p + idx + 2, sz);
2453 					bcopy(url, p + idx + 4, url_size);
2454 					xfer_size = newidx + 4;
2455 				}
2456 				idx = newidx;
2457 
2458 				/* skip to next mgmt url if any */
2459 				url += url_size;
2460 				url_size = sbd_parse_mgmt_url(&url);
2461 			}
2462 
2463 			/* Total descriptor length */
2464 			SCSI_WRITE16(p + 2, idx - 4);
2465 			break;
2466 		}
2467 
2468 	case 0x86:
2469 		page_length = 0x3c;
2470 
2471 		p[0] = byte0;
2472 		p[1] = 0x86;		/* Page 86 response */
2473 		p[3] = page_length;
2474 
2475 		/*
2476 		 * Bits 0, 1, and 2 will need to be updated
2477 		 * to reflect the queue tag handling if/when
2478 		 * that is implemented.  For now, we're going
2479 		 * to claim support only for Simple TA.
2480 		 */
2481 		p[5] = 1;
2482 		xfer_size = page_length + 4;
2483 		break;
2484 
2485 	default:
2486 		stmf_scsilib_send_status(task, STATUS_CHECK,
2487 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2488 		goto err_done;
2489 	}
2490 
2491 	sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2492 	    min(cmd_size, xfer_size));
2493 err_done:
2494 	kmem_free(p, bsize);
2495 	rw_exit(&sbd_global_prop_lock);
2496 }
2497 
2498 stmf_status_t
2499 sbd_task_alloc(struct scsi_task *task)
2500 {
2501 	if ((task->task_lu_private =
2502 	    kmem_alloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
2503 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
2504 		scmd->flags = 0;
2505 		return (STMF_SUCCESS);
2506 	}
2507 	return (STMF_ALLOC_FAILURE);
2508 }
2509 
2510 void
2511 sbd_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *it)
2512 {
2513 	sbd_it_data_t **ppit;
2514 
2515 	sbd_pgr_remove_it_handle(sl, it);
2516 	mutex_enter(&sl->sl_lock);
2517 	for (ppit = &sl->sl_it_list; *ppit != NULL;
2518 	    ppit = &((*ppit)->sbd_it_next)) {
2519 		if ((*ppit) == it) {
2520 			*ppit = it->sbd_it_next;
2521 			break;
2522 		}
2523 	}
2524 	mutex_exit(&sl->sl_lock);
2525 
2526 	DTRACE_PROBE2(itl__nexus__end, stmf_lu_t *, sl->sl_lu,
2527 	    sbd_it_data_t *, it);
2528 
2529 	kmem_free(it, sizeof (*it));
2530 }
2531 
2532 void
2533 sbd_check_and_clear_scsi2_reservation(sbd_lu_t *sl, sbd_it_data_t *it)
2534 {
2535 	mutex_enter(&sl->sl_lock);
2536 	if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) == 0) {
2537 		/* If we dont have any reservations, just get out. */
2538 		mutex_exit(&sl->sl_lock);
2539 		return;
2540 	}
2541 
2542 	if (it == NULL) {
2543 		/* Find the I_T nexus which is holding the reservation. */
2544 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2545 			if (it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) {
2546 				ASSERT(it->sbd_it_session_id ==
2547 				    sl->sl_rs_owner_session_id);
2548 				break;
2549 			}
2550 		}
2551 		ASSERT(it != NULL);
2552 	} else {
2553 		/*
2554 		 * We were passed an I_T nexus. If this nexus does not hold
2555 		 * the reservation, do nothing. This is why this function is
2556 		 * called "check_and_clear".
2557 		 */
2558 		if ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0) {
2559 			mutex_exit(&sl->sl_lock);
2560 			return;
2561 		}
2562 	}
2563 	it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
2564 	sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
2565 	mutex_exit(&sl->sl_lock);
2566 }
2567 
2568 
2569 
2570 void
2571 sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
2572 {
2573 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2574 	sbd_it_data_t *it;
2575 	uint8_t cdb0, cdb1;
2576 	stmf_status_t st_ret;
2577 
2578 	if ((it = task->task_lu_itl_handle) == NULL) {
2579 		mutex_enter(&sl->sl_lock);
2580 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2581 			if (it->sbd_it_session_id ==
2582 			    task->task_session->ss_session_id) {
2583 				mutex_exit(&sl->sl_lock);
2584 				stmf_scsilib_send_status(task, STATUS_BUSY, 0);
2585 				return;
2586 			}
2587 		}
2588 		it = (sbd_it_data_t *)kmem_zalloc(sizeof (*it), KM_NOSLEEP);
2589 		if (it == NULL) {
2590 			mutex_exit(&sl->sl_lock);
2591 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
2592 			return;
2593 		}
2594 		it->sbd_it_session_id = task->task_session->ss_session_id;
2595 		bcopy(task->task_lun_no, it->sbd_it_lun, 8);
2596 		it->sbd_it_next = sl->sl_it_list;
2597 		sl->sl_it_list = it;
2598 		mutex_exit(&sl->sl_lock);
2599 
2600 		DTRACE_PROBE1(itl__nexus__start, scsi_task *, task);
2601 
2602 		sbd_pgr_initialize_it(task, it);
2603 		if (stmf_register_itl_handle(task->task_lu, task->task_lun_no,
2604 		    task->task_session, it->sbd_it_session_id, it)
2605 		    != STMF_SUCCESS) {
2606 			sbd_remove_it_handle(sl, it);
2607 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
2608 			return;
2609 		}
2610 		task->task_lu_itl_handle = it;
2611 		if (sl->sl_access_state != SBD_LU_STANDBY) {
2612 			it->sbd_it_ua_conditions = SBD_UA_POR;
2613 		}
2614 	} else if (it->sbd_it_flags & SBD_IT_PGR_CHECK_FLAG) {
2615 		mutex_enter(&sl->sl_lock);
2616 		it->sbd_it_flags &= ~SBD_IT_PGR_CHECK_FLAG;
2617 		mutex_exit(&sl->sl_lock);
2618 		sbd_pgr_initialize_it(task, it);
2619 	}
2620 
2621 	if (task->task_mgmt_function) {
2622 		stmf_scsilib_handle_task_mgmt(task);
2623 		return;
2624 	}
2625 
2626 	/*
2627 	 * if we're transitioning between access
2628 	 * states, return NOT READY
2629 	 */
2630 	if (sl->sl_access_state == SBD_LU_TRANSITION_TO_STANDBY ||
2631 	    sl->sl_access_state == SBD_LU_TRANSITION_TO_ACTIVE) {
2632 		stmf_scsilib_send_status(task, STATUS_CHECK,
2633 		    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
2634 		return;
2635 	}
2636 
2637 	/* Checking ua conditions as per SAM3R14 5.3.2 specified order */
2638 	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
2639 		uint32_t saa = 0;
2640 
2641 		mutex_enter(&sl->sl_lock);
2642 		if (it->sbd_it_ua_conditions & SBD_UA_POR) {
2643 			it->sbd_it_ua_conditions &= ~SBD_UA_POR;
2644 			saa = STMF_SAA_POR;
2645 		}
2646 		mutex_exit(&sl->sl_lock);
2647 		if (saa) {
2648 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
2649 			return;
2650 		}
2651 	}
2652 
2653 	/* Reservation conflict checks */
2654 	if (sl->sl_access_state == SBD_LU_ACTIVE) {
2655 		if (SBD_PGR_RSVD(sl->sl_pgr)) {
2656 			if (sbd_pgr_reservation_conflict(task)) {
2657 				stmf_scsilib_send_status(task,
2658 				    STATUS_RESERVATION_CONFLICT, 0);
2659 				return;
2660 			}
2661 		} else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
2662 		    ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
2663 			if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
2664 				stmf_scsilib_send_status(task,
2665 				    STATUS_RESERVATION_CONFLICT, 0);
2666 				return;
2667 			}
2668 		}
2669 	}
2670 
2671 	/* Rest of the ua conndition checks */
2672 	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
2673 		uint32_t saa = 0;
2674 
2675 		mutex_enter(&sl->sl_lock);
2676 		if (it->sbd_it_ua_conditions & SBD_UA_CAPACITY_CHANGED) {
2677 			it->sbd_it_ua_conditions &= ~SBD_UA_CAPACITY_CHANGED;
2678 			if ((task->task_cdb[0] == SCMD_READ_CAPACITY) ||
2679 			    ((task->task_cdb[0] == SCMD_SVC_ACTION_IN_G4) &&
2680 			    (task->task_cdb[1] ==
2681 			    SSVC_ACTION_READ_CAPACITY_G4))) {
2682 				saa = 0;
2683 			} else {
2684 				saa = STMF_SAA_CAPACITY_DATA_HAS_CHANGED;
2685 			}
2686 		} else if (it->sbd_it_ua_conditions &
2687 		    SBD_UA_MODE_PARAMETERS_CHANGED) {
2688 			it->sbd_it_ua_conditions &=
2689 			    ~SBD_UA_MODE_PARAMETERS_CHANGED;
2690 			saa = STMF_SAA_MODE_PARAMETERS_CHANGED;
2691 		} else if (it->sbd_it_ua_conditions &
2692 		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
2693 			it->sbd_it_ua_conditions &=
2694 			    ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
2695 			saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
2696 		} else if (it->sbd_it_ua_conditions &
2697 		    SBD_UA_ACCESS_STATE_TRANSITION) {
2698 			it->sbd_it_ua_conditions &=
2699 			    ~SBD_UA_ACCESS_STATE_TRANSITION;
2700 			saa = STMF_SAA_LU_NO_ACCESS_TRANSITION;
2701 		} else {
2702 			it->sbd_it_ua_conditions = 0;
2703 			saa = 0;
2704 		}
2705 		mutex_exit(&sl->sl_lock);
2706 		if (saa) {
2707 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
2708 			return;
2709 		}
2710 	}
2711 
2712 	cdb0 = task->task_cdb[0];
2713 	cdb1 = task->task_cdb[1];
2714 
2715 	if (sl->sl_access_state == SBD_LU_STANDBY) {
2716 		if (cdb0 != SCMD_INQUIRY &&
2717 		    cdb0 != SCMD_MODE_SENSE &&
2718 		    cdb0 != SCMD_MODE_SENSE_G1 &&
2719 		    cdb0 != SCMD_MODE_SELECT &&
2720 		    cdb0 != SCMD_MODE_SELECT_G1 &&
2721 		    cdb0 != SCMD_RESERVE &&
2722 		    cdb0 != SCMD_RELEASE &&
2723 		    cdb0 != SCMD_PERSISTENT_RESERVE_OUT &&
2724 		    cdb0 != SCMD_PERSISTENT_RESERVE_IN &&
2725 		    cdb0 != SCMD_REQUEST_SENSE &&
2726 		    cdb0 != SCMD_READ_CAPACITY &&
2727 		    cdb0 != SCMD_TEST_UNIT_READY &&
2728 		    cdb0 != SCMD_START_STOP &&
2729 		    cdb0 != SCMD_READ &&
2730 		    cdb0 != SCMD_READ_G1 &&
2731 		    cdb0 != SCMD_READ_G4 &&
2732 		    cdb0 != SCMD_READ_G5 &&
2733 		    !(cdb0 == SCMD_SVC_ACTION_IN_G4 &&
2734 		    cdb1 == SSVC_ACTION_READ_CAPACITY_G4) &&
2735 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
2736 		    (cdb1 & 0x1F) == 0x05) &&
2737 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
2738 		    (cdb1 & 0x1F) == 0x0A)) {
2739 			stmf_scsilib_send_status(task, STATUS_CHECK,
2740 			    STMF_SAA_LU_NO_ACCESS_STANDBY);
2741 			return;
2742 		}
2743 
2744 		/*
2745 		 * is this a short write?
2746 		 * if so, we'll need to wait until we have the buffer
2747 		 * before proxying the command
2748 		 */
2749 		switch (cdb0) {
2750 			case SCMD_MODE_SELECT:
2751 			case SCMD_MODE_SELECT_G1:
2752 			case SCMD_PERSISTENT_RESERVE_OUT:
2753 				break;
2754 			default:
2755 				st_ret = stmf_proxy_scsi_cmd(task,
2756 				    initial_dbuf);
2757 				if (st_ret != STMF_SUCCESS) {
2758 					stmf_scsilib_send_status(task,
2759 					    STATUS_CHECK,
2760 					    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
2761 				}
2762 				return;
2763 		}
2764 	}
2765 
2766 	cdb0 = task->task_cdb[0] & 0x1F;
2767 
2768 	if ((cdb0 == SCMD_READ) || (cdb0 == SCMD_WRITE)) {
2769 		if (task->task_additional_flags & TASK_AF_PORT_LOAD_HIGH) {
2770 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
2771 			return;
2772 		}
2773 		if (cdb0 == SCMD_READ) {
2774 			sbd_handle_read(task, initial_dbuf);
2775 			return;
2776 		}
2777 		sbd_handle_write(task, initial_dbuf);
2778 		return;
2779 	}
2780 
2781 	cdb0 = task->task_cdb[0];
2782 	cdb1 = task->task_cdb[1];
2783 
2784 	if (cdb0 == SCMD_INQUIRY) {		/* Inquiry */
2785 		sbd_handle_inquiry(task, initial_dbuf);
2786 		return;
2787 	}
2788 
2789 	if (cdb0  == SCMD_PERSISTENT_RESERVE_OUT) {
2790 		sbd_handle_pgr_out_cmd(task, initial_dbuf);
2791 		return;
2792 	}
2793 
2794 	if (cdb0  == SCMD_PERSISTENT_RESERVE_IN) {
2795 		sbd_handle_pgr_in_cmd(task, initial_dbuf);
2796 		return;
2797 	}
2798 
2799 	if (cdb0 == SCMD_RELEASE) {
2800 		if (cdb1) {
2801 			stmf_scsilib_send_status(task, STATUS_CHECK,
2802 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2803 			return;
2804 		}
2805 
2806 		mutex_enter(&sl->sl_lock);
2807 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
2808 			/* If not owner don't release it, just return good */
2809 			if (it->sbd_it_session_id !=
2810 			    sl->sl_rs_owner_session_id) {
2811 				mutex_exit(&sl->sl_lock);
2812 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2813 				return;
2814 			}
2815 		}
2816 		sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
2817 		it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
2818 		mutex_exit(&sl->sl_lock);
2819 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2820 		return;
2821 	}
2822 
2823 	if (cdb0 == SCMD_RESERVE) {
2824 		if (cdb1) {
2825 			stmf_scsilib_send_status(task, STATUS_CHECK,
2826 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2827 			return;
2828 		}
2829 
2830 		mutex_enter(&sl->sl_lock);
2831 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
2832 			/* If not owner, return conflict status */
2833 			if (it->sbd_it_session_id !=
2834 			    sl->sl_rs_owner_session_id) {
2835 				mutex_exit(&sl->sl_lock);
2836 				stmf_scsilib_send_status(task,
2837 				    STATUS_RESERVATION_CONFLICT, 0);
2838 				return;
2839 			}
2840 		}
2841 		sl->sl_flags |= SL_LU_HAS_SCSI2_RESERVATION;
2842 		it->sbd_it_flags |= SBD_IT_HAS_SCSI2_RESERVATION;
2843 		sl->sl_rs_owner_session_id = it->sbd_it_session_id;
2844 		mutex_exit(&sl->sl_lock);
2845 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2846 		return;
2847 	}
2848 
2849 	if (cdb0 == SCMD_REQUEST_SENSE) {
2850 		/*
2851 		 * LU provider needs to store unretrieved sense data
2852 		 * (e.g. after power-on/reset).  For now, we'll just
2853 		 * return good status with no sense.
2854 		 */
2855 
2856 		if ((cdb1 & ~1) || task->task_cdb[2] || task->task_cdb[3] ||
2857 		    task->task_cdb[5]) {
2858 			stmf_scsilib_send_status(task, STATUS_CHECK,
2859 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2860 		} else {
2861 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2862 		}
2863 
2864 		return;
2865 	}
2866 
2867 	/* Report Target Port Groups */
2868 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
2869 	    ((cdb1 & 0x1F) == 0x0A)) {
2870 		stmf_scsilib_handle_report_tpgs(task, initial_dbuf);
2871 		return;
2872 	}
2873 
2874 	/* Report Identifying Information */
2875 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
2876 	    ((cdb1 & 0x1F) == 0x05)) {
2877 		sbd_handle_identifying_info(task, initial_dbuf);
2878 		return;
2879 	}
2880 
2881 	if (cdb0 == SCMD_START_STOP) {			/* Start stop */
2882 		task->task_cmd_xfer_length = 0;
2883 		if (task->task_cdb[4] & 0xFC) {
2884 			stmf_scsilib_send_status(task, STATUS_CHECK,
2885 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2886 			return;
2887 		}
2888 		if (task->task_cdb[4] & 2) {
2889 			stmf_scsilib_send_status(task, STATUS_CHECK,
2890 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2891 		} else {
2892 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2893 		}
2894 		return;
2895 
2896 	}
2897 
2898 	if ((cdb0 == SCMD_MODE_SENSE) || (cdb0 == SCMD_MODE_SENSE_G1)) {
2899 		uint8_t *p;
2900 		p = kmem_zalloc(512, KM_SLEEP);
2901 		sbd_handle_mode_sense(task, initial_dbuf, p);
2902 		kmem_free(p, 512);
2903 		return;
2904 	}
2905 
2906 	if ((cdb0 == SCMD_MODE_SELECT) || (cdb0 == SCMD_MODE_SELECT_G1)) {
2907 		sbd_handle_mode_select(task, initial_dbuf);
2908 		return;
2909 	}
2910 
2911 	if (cdb0 == SCMD_TEST_UNIT_READY) {	/* Test unit ready */
2912 		task->task_cmd_xfer_length = 0;
2913 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2914 		return;
2915 	}
2916 
2917 	if (cdb0 == SCMD_READ_CAPACITY) {		/* Read Capacity */
2918 		sbd_handle_read_capacity(task, initial_dbuf);
2919 		return;
2920 	}
2921 
2922 	if (cdb0 == SCMD_SVC_ACTION_IN_G4) { 	/* Read Capacity or read long */
2923 		if (cdb1 == SSVC_ACTION_READ_CAPACITY_G4) {
2924 			sbd_handle_read_capacity(task, initial_dbuf);
2925 			return;
2926 		/*
2927 		 * } else if (cdb1 == SSVC_ACTION_READ_LONG_G4) {
2928 		 * 	sbd_handle_read(task, initial_dbuf);
2929 		 * 	return;
2930 		 */
2931 		}
2932 	}
2933 
2934 	/*
2935 	 * if (cdb0 == SCMD_SVC_ACTION_OUT_G4) {
2936 	 *	if (cdb1 == SSVC_ACTION_WRITE_LONG_G4) {
2937 	 *		 sbd_handle_write(task, initial_dbuf);
2938 	 * 		return;
2939 	 *	}
2940 	 * }
2941 	 */
2942 
2943 	if (cdb0 == SCMD_VERIFY) {
2944 		/*
2945 		 * Something more likely needs to be done here.
2946 		 */
2947 		task->task_cmd_xfer_length = 0;
2948 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2949 		return;
2950 	}
2951 
2952 	if (cdb0 == SCMD_SYNCHRONIZE_CACHE ||
2953 	    cdb0 == SCMD_SYNCHRONIZE_CACHE_G4) {
2954 		sbd_handle_sync_cache(task, initial_dbuf);
2955 		return;
2956 	}
2957 
2958 	/*
2959 	 * Write and Verify use the same path as write, but don't clutter the
2960 	 * performance path above with checking for write_verify opcodes.  We
2961 	 * rely on zfs's integrity checks for the "Verify" part of Write &
2962 	 * Verify.  (Even if we did a read to "verify" we'd merely be reading
2963 	 * cache, not actual media.)
2964 	 * Therefore we
2965 	 *   a) only support this if sbd_is_zvol, and
2966 	 *   b) run the IO through the normal write path with a forced
2967 	 *	sbd_flush_data_cache at the end.
2968 	 */
2969 
2970 	if ((sl->sl_flags & SL_ZFS_META) && (
2971 	    cdb0 == SCMD_WRITE_VERIFY ||
2972 	    cdb0 == SCMD_WRITE_VERIFY_G4 ||
2973 	    cdb0 == SCMD_WRITE_VERIFY_G5)) {
2974 		sbd_handle_write(task, initial_dbuf);
2975 		return;
2976 	}
2977 
2978 	stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_OPCODE);
2979 }
2980 
2981 void
2982 sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf)
2983 {
2984 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
2985 
2986 	if (dbuf->db_flags & DB_LU_DATA_BUF) {
2987 		/*
2988 		 * Buffers passed in from the LU always complete
2989 		 * even if the task is no longer active.
2990 		 */
2991 		ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
2992 		ASSERT(scmd);
2993 		switch (scmd->cmd_type) {
2994 		case (SBD_CMD_SCSI_READ):
2995 			sbd_handle_sgl_read_xfer_completion(task, scmd, dbuf);
2996 			break;
2997 		case (SBD_CMD_SCSI_WRITE):
2998 			sbd_handle_sgl_write_xfer_completion(task, scmd, dbuf);
2999 			break;
3000 		default:
3001 			cmn_err(CE_PANIC, "Unknown cmd type, task = %p",
3002 			    (void *)task);
3003 			break;
3004 		}
3005 		return;
3006 	}
3007 
3008 	if ((scmd == NULL) || ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0))
3009 		return;
3010 
3011 	switch (scmd->cmd_type) {
3012 	case (SBD_CMD_SCSI_READ):
3013 		sbd_handle_read_xfer_completion(task, scmd, dbuf);
3014 		break;
3015 
3016 	case (SBD_CMD_SCSI_WRITE):
3017 		sbd_handle_write_xfer_completion(task, scmd, dbuf, 1);
3018 		break;
3019 
3020 	case (SBD_CMD_SMALL_READ):
3021 		sbd_handle_short_read_xfer_completion(task, scmd, dbuf);
3022 		break;
3023 
3024 	case (SBD_CMD_SMALL_WRITE):
3025 		sbd_handle_short_write_xfer_completion(task, dbuf);
3026 		break;
3027 
3028 	default:
3029 		cmn_err(CE_PANIC, "Unknown cmd type, task = %p", (void *)task);
3030 		break;
3031 	}
3032 }
3033 
3034 /* ARGSUSED */
3035 void
3036 sbd_send_status_done(struct scsi_task *task)
3037 {
3038 	cmn_err(CE_PANIC,
3039 	    "sbd_send_status_done: this should not have been called");
3040 }
3041 
3042 void
3043 sbd_task_free(struct scsi_task *task)
3044 {
3045 	if (task->task_lu_private) {
3046 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3047 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3048 			cmn_err(CE_PANIC, "cmd is active, task = %p",
3049 			    (void *)task);
3050 		}
3051 		kmem_free(scmd, sizeof (sbd_cmd_t));
3052 	}
3053 }
3054 
3055 /*
3056  * Aborts are synchronus w.r.t. I/O AND
3057  * All the I/O which SBD does is synchronous AND
3058  * Everything within a task is single threaded.
3059  *   IT MEANS
3060  * If this function is called, we are doing nothing with this task
3061  * inside of sbd module.
3062  */
3063 /* ARGSUSED */
3064 stmf_status_t
3065 sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags)
3066 {
3067 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3068 	scsi_task_t *task;
3069 
3070 	if (abort_cmd == STMF_LU_RESET_STATE) {
3071 		return (sbd_lu_reset_state(lu));
3072 	}
3073 
3074 	if (abort_cmd == STMF_LU_ITL_HANDLE_REMOVED) {
3075 		sbd_check_and_clear_scsi2_reservation(sl, (sbd_it_data_t *)arg);
3076 		sbd_remove_it_handle(sl, (sbd_it_data_t *)arg);
3077 		return (STMF_SUCCESS);
3078 	}
3079 
3080 	ASSERT(abort_cmd == STMF_LU_ABORT_TASK);
3081 	task = (scsi_task_t *)arg;
3082 	if (task->task_lu_private) {
3083 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3084 
3085 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3086 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
3087 			return (STMF_ABORT_SUCCESS);
3088 		}
3089 	}
3090 
3091 	return (STMF_NOT_FOUND);
3092 }
3093 
3094 /*
3095  * This function is called during task clean-up if the
3096  * DB_LU_FLAG is set on the dbuf. This should only be called for
3097  * abort processing after sbd_abort has been called for the task.
3098  */
3099 void
3100 sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf)
3101 {
3102 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3103 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3104 
3105 	ASSERT(dbuf->db_lu_private);
3106 	ASSERT(scmd && scmd->nbufs > 0);
3107 	ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0);
3108 	ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
3109 	ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3110 	ASSERT((curthread->t_flag & T_INTR_THREAD) == 0);
3111 
3112 	if (scmd->cmd_type == SBD_CMD_SCSI_READ) {
3113 		sbd_zvol_rele_read_bufs(sl, dbuf);
3114 	} else if (scmd->cmd_type == SBD_CMD_SCSI_WRITE) {
3115 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
3116 	} else {
3117 		cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p",
3118 		    scmd->cmd_type, (void *)task);
3119 	}
3120 	if (--scmd->nbufs == 0)
3121 		rw_exit(&sl->sl_access_state_lock);
3122 	stmf_teardown_dbuf(task, dbuf);
3123 	stmf_free(dbuf);
3124 }
3125 
3126 /* ARGSUSED */
3127 void
3128 sbd_ctl(struct stmf_lu *lu, int cmd, void *arg)
3129 {
3130 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3131 	stmf_change_status_t st;
3132 
3133 	ASSERT((cmd == STMF_CMD_LU_ONLINE) ||
3134 	    (cmd == STMF_CMD_LU_OFFLINE) ||
3135 	    (cmd == STMF_ACK_LU_ONLINE_COMPLETE) ||
3136 	    (cmd == STMF_ACK_LU_OFFLINE_COMPLETE));
3137 
3138 	st.st_completion_status = STMF_SUCCESS;
3139 	st.st_additional_info = NULL;
3140 
3141 	switch (cmd) {
3142 	case STMF_CMD_LU_ONLINE:
3143 		if (sl->sl_state == STMF_STATE_ONLINE)
3144 			st.st_completion_status = STMF_ALREADY;
3145 		else if (sl->sl_state != STMF_STATE_OFFLINE)
3146 			st.st_completion_status = STMF_FAILURE;
3147 		if (st.st_completion_status == STMF_SUCCESS) {
3148 			sl->sl_state = STMF_STATE_ONLINE;
3149 			sl->sl_state_not_acked = 1;
3150 		}
3151 		(void) stmf_ctl(STMF_CMD_LU_ONLINE_COMPLETE, lu, &st);
3152 		break;
3153 
3154 	case STMF_CMD_LU_OFFLINE:
3155 		if (sl->sl_state == STMF_STATE_OFFLINE)
3156 			st.st_completion_status = STMF_ALREADY;
3157 		else if (sl->sl_state != STMF_STATE_ONLINE)
3158 			st.st_completion_status = STMF_FAILURE;
3159 		if (st.st_completion_status == STMF_SUCCESS) {
3160 			sl->sl_flags &= ~(SL_MEDIUM_REMOVAL_PREVENTED |
3161 			    SL_LU_HAS_SCSI2_RESERVATION);
3162 			sl->sl_state = STMF_STATE_OFFLINE;
3163 			sl->sl_state_not_acked = 1;
3164 			sbd_pgr_reset(sl);
3165 		}
3166 		(void) stmf_ctl(STMF_CMD_LU_OFFLINE_COMPLETE, lu, &st);
3167 		break;
3168 
3169 	case STMF_ACK_LU_ONLINE_COMPLETE:
3170 		/* Fallthrough */
3171 	case STMF_ACK_LU_OFFLINE_COMPLETE:
3172 		sl->sl_state_not_acked = 0;
3173 		break;
3174 
3175 	}
3176 }
3177 
3178 /* ARGSUSED */
3179 stmf_status_t
3180 sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg, uint8_t *buf,
3181     uint32_t *bufsizep)
3182 {
3183 	return (STMF_NOT_SUPPORTED);
3184 }
3185 
3186 stmf_status_t
3187 sbd_lu_reset_state(stmf_lu_t *lu)
3188 {
3189 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3190 
3191 	mutex_enter(&sl->sl_lock);
3192 	if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE) {
3193 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
3194 		mutex_exit(&sl->sl_lock);
3195 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3196 			(void) sbd_wcd_set(1, sl);
3197 		}
3198 	} else {
3199 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
3200 		mutex_exit(&sl->sl_lock);
3201 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3202 			(void) sbd_wcd_set(0, sl);
3203 		}
3204 	}
3205 	sbd_pgr_reset(sl);
3206 	sbd_check_and_clear_scsi2_reservation(sl, NULL);
3207 	if (stmf_deregister_all_lu_itl_handles(lu) != STMF_SUCCESS) {
3208 		return (STMF_FAILURE);
3209 	}
3210 	return (STMF_SUCCESS);
3211 }
3212 
3213 sbd_status_t
3214 sbd_flush_data_cache(sbd_lu_t *sl, int fsync_done)
3215 {
3216 	int r = 0;
3217 	int ret;
3218 
3219 	if (fsync_done)
3220 		goto over_fsync;
3221 	if ((sl->sl_data_vtype == VREG) || (sl->sl_data_vtype == VBLK)) {
3222 		if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL))
3223 			return (SBD_FAILURE);
3224 	}
3225 over_fsync:
3226 	if (((sl->sl_data_vtype == VCHR) || (sl->sl_data_vtype == VBLK)) &&
3227 	    ((sl->sl_flags & SL_NO_DATA_DKIOFLUSH) == 0)) {
3228 		ret = VOP_IOCTL(sl->sl_data_vp, DKIOCFLUSHWRITECACHE, NULL,
3229 		    FKIOCTL, kcred, &r, NULL);
3230 		if ((ret == ENOTTY) || (ret == ENOTSUP)) {
3231 			mutex_enter(&sl->sl_lock);
3232 			sl->sl_flags |= SL_NO_DATA_DKIOFLUSH;
3233 			mutex_exit(&sl->sl_lock);
3234 		} else if (ret != 0) {
3235 			return (SBD_FAILURE);
3236 		}
3237 	}
3238 
3239 	return (SBD_SUCCESS);
3240 }
3241 
3242 /* ARGSUSED */
3243 static void
3244 sbd_handle_sync_cache(struct scsi_task *task,
3245     struct stmf_data_buf *initial_dbuf)
3246 {
3247 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3248 	uint64_t	lba, laddr;
3249 	sbd_status_t	sret;
3250 	uint32_t	len;
3251 	int		is_g4 = 0;
3252 	int		immed;
3253 
3254 	task->task_cmd_xfer_length = 0;
3255 	/*
3256 	 * Determine if this is a 10 or 16 byte CDB
3257 	 */
3258 
3259 	if (task->task_cdb[0] == SCMD_SYNCHRONIZE_CACHE_G4)
3260 		is_g4 = 1;
3261 
3262 	/*
3263 	 * Determine other requested parameters
3264 	 *
3265 	 * We don't have a non-volatile cache, so don't care about SYNC_NV.
3266 	 * Do not support the IMMED bit.
3267 	 */
3268 
3269 	immed = (task->task_cdb[1] & 0x02);
3270 
3271 	if (immed) {
3272 		stmf_scsilib_send_status(task, STATUS_CHECK,
3273 		    STMF_SAA_INVALID_FIELD_IN_CDB);
3274 		return;
3275 	}
3276 
3277 	/*
3278 	 * Check to be sure we're not being asked to sync an LBA
3279 	 * that is out of range.  While checking, verify reserved fields.
3280 	 */
3281 
3282 	if (is_g4) {
3283 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[14] ||
3284 		    task->task_cdb[15]) {
3285 			stmf_scsilib_send_status(task, STATUS_CHECK,
3286 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3287 			return;
3288 		}
3289 
3290 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
3291 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
3292 	} else {
3293 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[6] ||
3294 		    task->task_cdb[9]) {
3295 			stmf_scsilib_send_status(task, STATUS_CHECK,
3296 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3297 			return;
3298 		}
3299 
3300 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
3301 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
3302 	}
3303 
3304 	laddr = lba << sl->sl_data_blocksize_shift;
3305 	len <<= sl->sl_data_blocksize_shift;
3306 
3307 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
3308 		stmf_scsilib_send_status(task, STATUS_CHECK,
3309 		    STMF_SAA_LBA_OUT_OF_RANGE);
3310 		return;
3311 	}
3312 
3313 	sret = sbd_flush_data_cache(sl, 0);
3314 	if (sret != SBD_SUCCESS) {
3315 		stmf_scsilib_send_status(task, STATUS_CHECK,
3316 		    STMF_SAA_WRITE_ERROR);
3317 		return;
3318 	}
3319 
3320 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3321 }
3322