1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  *
24  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
25  */
26 
27 #include <sys/conf.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/modctl.h>
32 #include <sys/scsi/scsi.h>
33 #include <sys/scsi/impl/scsi_reset_notify.h>
34 #include <sys/scsi/generic/mode.h>
35 #include <sys/disp.h>
36 #include <sys/byteorder.h>
37 #include <sys/atomic.h>
38 #include <sys/sdt.h>
39 #include <sys/dkio.h>
40 
41 #include <sys/stmf.h>
42 #include <sys/lpif.h>
43 #include <sys/portif.h>
44 #include <sys/stmf_ioctl.h>
45 #include <sys/stmf_sbd_ioctl.h>
46 
47 #include "stmf_sbd.h"
48 #include "sbd_impl.h"
49 
50 #define	SCSI2_CONFLICT_FREE_CMDS(cdb)	( \
51 	/* ----------------------- */                                      \
52 	/* Refer Both		   */                                      \
53 	/* SPC-2 (rev 20) Table 10 */                                      \
54 	/* SPC-3 (rev 23) Table 31 */                                      \
55 	/* ----------------------- */                                      \
56 	((cdb[0]) == SCMD_INQUIRY)					|| \
57 	((cdb[0]) == SCMD_LOG_SENSE_G1)					|| \
58 	((cdb[0]) == SCMD_RELEASE)					|| \
59 	((cdb[0]) == SCMD_RELEASE_G1)					|| \
60 	((cdb[0]) == SCMD_REPORT_LUNS)					|| \
61 	((cdb[0]) == SCMD_REQUEST_SENSE)				|| \
62 	/* PREVENT ALLOW MEDIUM REMOVAL with prevent == 0 */               \
63 	((((cdb[0]) == SCMD_DOORLOCK) && (((cdb[4]) & 0x3) == 0)))	|| \
64 	/* SERVICE ACTION IN with READ MEDIA SERIAL NUMBER (0x01) */       \
65 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G5) && (                          \
66 	    ((cdb[1]) & 0x1F) == 0x01))					|| \
67 	/* MAINTENANCE IN with service actions REPORT ALIASES (0x0Bh) */   \
68 	/* REPORT DEVICE IDENTIFIER (0x05)  REPORT PRIORITY (0x0Eh) */     \
69 	/* REPORT TARGET PORT GROUPS (0x0A) REPORT TIMESTAMP (0x0F) */     \
70 	(((cdb[0]) == SCMD_MAINTENANCE_IN) && (                            \
71 	    (((cdb[1]) & 0x1F) == 0x0B) ||                                 \
72 	    (((cdb[1]) & 0x1F) == 0x05) ||                                 \
73 	    (((cdb[1]) & 0x1F) == 0x0E) ||                                 \
74 	    (((cdb[1]) & 0x1F) == 0x0A) ||                                 \
75 	    (((cdb[1]) & 0x1F) == 0x0F)))				|| \
76 	/* ----------------------- */                                      \
77 	/* SBC-3 (rev 17) Table 3  */                                      \
78 	/* ----------------------- */                                      \
79 	/* READ CAPACITY(10) */                                            \
80 	((cdb[0]) == SCMD_READ_CAPACITY)				|| \
81 	/* READ CAPACITY(16) */                                            \
82 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G4) && (                          \
83 	    ((cdb[1]) & 0x1F) == 0x10))					|| \
84 	/* START STOP UNIT with START bit 0 and POWER CONDITION 0  */      \
85 	(((cdb[0]) == SCMD_START_STOP) && (                                \
86 	    (((cdb[4]) & 0xF0) == 0) && (((cdb[4]) & 0x01) == 0))))
87 /* End of SCSI2_CONFLICT_FREE_CMDS */
88 
89 stmf_status_t sbd_lu_reset_state(stmf_lu_t *lu);
90 static void sbd_handle_sync_cache(struct scsi_task *task,
91     struct stmf_data_buf *initial_dbuf);
92 void sbd_handle_read_xfer_completion(struct scsi_task *task,
93     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf);
94 void sbd_handle_short_write_xfer_completion(scsi_task_t *task,
95     stmf_data_buf_t *dbuf);
96 void sbd_handle_short_write_transfers(scsi_task_t *task,
97     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size);
98 void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf,
99     uint32_t buflen);
100 void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf);
101 void sbd_handle_identifying_info(scsi_task_t *task, stmf_data_buf_t *dbuf);
102 
103 static void sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf,
104     uint32_t buflen);
105 static void sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf);
106 
107 extern void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
108 extern int sbd_pgr_reservation_conflict(scsi_task_t *);
109 extern void sbd_pgr_reset(sbd_lu_t *);
110 extern void sbd_pgr_remove_it_handle(sbd_lu_t *, sbd_it_data_t *);
111 extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
112 extern void sbd_handle_pgr_out_cmd(scsi_task_t *, stmf_data_buf_t *);
113 extern void sbd_handle_pgr_out_data(scsi_task_t *, stmf_data_buf_t *);
114 void sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
115     int first_xfer);
116 static void sbd_handle_write_same(scsi_task_t *task,
117     struct stmf_data_buf *initial_dbuf);
118 static void sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
119     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
120 static void sbd_handle_write_same_xfer_completion(struct scsi_task *task,
121     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
122 /*
123  * IMPORTANT NOTE:
124  * =================
125  * The whole world here is based on the assumption that everything within
126  * a scsi task executes in a single threaded manner, even the aborts.
127  * Dont ever change that. There wont be any performance gain but there
128  * will be tons of race conditions.
129  */
130 
131 void
132 sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
133 					struct stmf_data_buf *dbuf)
134 {
135 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
136 	uint64_t laddr;
137 	uint32_t len, buflen, iolen;
138 	int ndx;
139 	int bufs_to_take;
140 
141 	/* Lets try not to hog all the buffers the port has. */
142 	bufs_to_take = ((task->task_max_nbufs > 2) &&
143 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
144 	    task->task_max_nbufs;
145 
146 	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size : scmd->len;
147 	laddr = scmd->addr + scmd->current_ro;
148 
149 	for (buflen = 0, ndx = 0; (buflen < len) &&
150 	    (ndx < dbuf->db_sglist_length); ndx++) {
151 		iolen = min(len - buflen, dbuf->db_sglist[ndx].seg_length);
152 		if (iolen == 0)
153 			break;
154 		if (sbd_data_read(sl, task, laddr, (uint64_t)iolen,
155 		    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
156 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
157 			/* Do not need to do xfer anymore, just complete it */
158 			dbuf->db_data_size = 0;
159 			dbuf->db_xfer_status = STMF_SUCCESS;
160 			sbd_handle_read_xfer_completion(task, scmd, dbuf);
161 			return;
162 		}
163 		buflen += iolen;
164 		laddr += (uint64_t)iolen;
165 	}
166 	dbuf->db_relative_offset = scmd->current_ro;
167 	dbuf->db_data_size = buflen;
168 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
169 	(void) stmf_xfer_data(task, dbuf, 0);
170 	scmd->len -= buflen;
171 	scmd->current_ro += buflen;
172 	if (scmd->len && (scmd->nbufs < bufs_to_take)) {
173 		uint32_t maxsize, minsize, old_minsize;
174 
175 		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
176 		minsize = maxsize >> 2;
177 		do {
178 			/*
179 			 * A bad port implementation can keep on failing the
180 			 * the request but keep on sending us a false
181 			 * minsize.
182 			 */
183 			old_minsize = minsize;
184 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
185 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
186 		    (minsize >= 512));
187 		if (dbuf == NULL) {
188 			return;
189 		}
190 		scmd->nbufs++;
191 		sbd_do_read_xfer(task, scmd, dbuf);
192 	}
193 }
194 
195 /*
196  * sbd_zcopy: Bail-out switch for reduced copy path.
197  *
198  * 0 - read & write off
199  * 1 - read & write on
200  * 2 - only read on
201  * 4 - only write on
202  */
203 int sbd_zcopy = 1;	/* enable zcopy read & write path */
204 uint32_t sbd_max_xfer_len = 0;		/* Valid if non-zero */
205 uint32_t sbd_1st_xfer_len = 0;		/* Valid if non-zero */
206 uint32_t sbd_copy_threshold = 0;		/* Valid if non-zero */
207 
208 static void
209 sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
210 {
211 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
212 	sbd_zvol_io_t *zvio;
213 	int ret, final_xfer;
214 	uint64_t offset;
215 	uint32_t xfer_len, max_len, first_len;
216 	stmf_status_t xstat;
217 	stmf_data_buf_t *dbuf;
218 	uint_t nblks;
219 	uint64_t blksize = sl->sl_blksize;
220 	size_t db_private_sz;
221 	hrtime_t xfer_start, xfer_elapsed;
222 	uintptr_t pad;
223 
224 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
225 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
226 
227 	/*
228 	 * Calculate the limits on xfer_len to the minimum of :
229 	 *    - task limit
230 	 *    - lun limit
231 	 *    - sbd global limit if set
232 	 *    - first xfer limit if set
233 	 *
234 	 * First, protect against silly over-ride value
235 	 */
236 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
237 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
238 		    sbd_max_xfer_len);
239 		sbd_max_xfer_len = 0;
240 	}
241 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
242 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
243 		    sbd_1st_xfer_len);
244 		sbd_1st_xfer_len = 0;
245 	}
246 
247 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
248 	if (sbd_max_xfer_len)
249 		max_len = MIN(max_len, sbd_max_xfer_len);
250 	/*
251 	 * Special case the first xfer if hints are set.
252 	 */
253 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
254 		/* global over-ride has precedence */
255 		if (sbd_1st_xfer_len)
256 			first_len = sbd_1st_xfer_len;
257 		else
258 			first_len = task->task_1st_xfer_len;
259 	} else {
260 		first_len = 0;
261 	}
262 
263 	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
264 
265 		xfer_len = MIN(max_len, scmd->len);
266 		if (first_len) {
267 			xfer_len = MIN(xfer_len, first_len);
268 			first_len = 0;
269 		}
270 		if (scmd->len == xfer_len) {
271 			final_xfer = 1;
272 		} else {
273 			/*
274 			 * Attempt to end xfer on a block boundary.
275 			 * The only way this does not happen is if the
276 			 * xfer_len is small enough to stay contained
277 			 * within the same block.
278 			 */
279 			uint64_t xfer_offset, xfer_aligned_end;
280 
281 			final_xfer = 0;
282 			xfer_offset = scmd->addr + scmd->current_ro;
283 			xfer_aligned_end =
284 			    P2ALIGN(xfer_offset+xfer_len, blksize);
285 			if (xfer_aligned_end > xfer_offset)
286 				xfer_len = xfer_aligned_end - xfer_offset;
287 		}
288 		/*
289 		 * Allocate object to track the read and reserve
290 		 * enough space for scatter/gather list.
291 		 */
292 		offset = scmd->addr + scmd->current_ro;
293 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
294 
295 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
296 		    (nblks * sizeof (stmf_sglist_ent_t));
297 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
298 		    AF_DONTZERO);
299 		/*
300 		 * Setup the dbuf
301 		 *
302 		 * XXX Framework does not handle variable length sglists
303 		 * properly, so setup db_lu_private and db_port_private
304 		 * fields here. db_stmf_private is properly set for
305 		 * calls to stmf_free.
306 		 */
307 		if (dbuf->db_port_private == NULL) {
308 			/*
309 			 * XXX Framework assigns space to PP after db_sglist[0]
310 			 */
311 			cmn_err(CE_PANIC, "db_port_private == NULL");
312 		}
313 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
314 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
315 		dbuf->db_port_private = NULL;
316 		dbuf->db_buf_size = xfer_len;
317 		dbuf->db_data_size = xfer_len;
318 		dbuf->db_relative_offset = scmd->current_ro;
319 		dbuf->db_sglist_length = (uint16_t)nblks;
320 		dbuf->db_xfer_status = 0;
321 		dbuf->db_handle = 0;
322 
323 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
324 		    DB_DIRECTION_TO_RPORT | DB_LU_DATA_BUF);
325 		if (final_xfer)
326 			dbuf->db_flags |= DB_SEND_STATUS_GOOD;
327 
328 		zvio = dbuf->db_lu_private;
329 		/* Need absolute offset for zvol access */
330 		zvio->zvio_offset = offset;
331 		zvio->zvio_flags = ZVIO_SYNC;
332 
333 		/*
334 		 * Accounting for start of read.
335 		 * Note there is no buffer address for the probe yet.
336 		 */
337 		stmf_lu_xfer_start(task);
338 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
339 		    uint8_t *, NULL, uint64_t, xfer_len,
340 		    uint64_t, offset, scsi_task_t *, task);
341 		xfer_start = gethrtime();
342 
343 		ret = sbd_zvol_alloc_read_bufs(sl, dbuf);
344 
345 		xfer_elapsed = gethrtime() - xfer_start;
346 
347 		stmf_lu_xfer_done(task, B_TRUE /* read */, (uint64_t)xfer_len,
348 		    xfer_elapsed);
349 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
350 		    uint8_t *, NULL, uint64_t, xfer_len,
351 		    uint64_t, offset, int, ret, scsi_task_t *, task);
352 
353 		if (ret != 0) {
354 			/*
355 			 * Read failure from the backend.
356 			 */
357 			stmf_free(dbuf);
358 			if (scmd->nbufs == 0) {
359 				/* nothing queued, just finish */
360 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
361 				stmf_scsilib_send_status(task, STATUS_CHECK,
362 				    STMF_SAA_READ_ERROR);
363 				rw_exit(&sl->sl_access_state_lock);
364 			} else {
365 				/* process failure when other dbufs finish */
366 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
367 			}
368 			return;
369 		}
370 
371 
372 		/*
373 		 * Allow PP to do setup
374 		 */
375 		xstat = stmf_setup_dbuf(task, dbuf, 0);
376 		if (xstat != STMF_SUCCESS) {
377 			/*
378 			 * This could happen if the driver cannot get the
379 			 * DDI resources it needs for this request.
380 			 * If other dbufs are queued, try again when the next
381 			 * one completes, otherwise give up.
382 			 */
383 			sbd_zvol_rele_read_bufs(sl, dbuf);
384 			stmf_free(dbuf);
385 			if (scmd->nbufs > 0) {
386 				/* completion of previous dbuf will retry */
387 				return;
388 			}
389 			/*
390 			 * Done with this command.
391 			 */
392 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
393 			if (first_xfer)
394 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
395 			else
396 				stmf_scsilib_send_status(task, STATUS_CHECK,
397 				    STMF_SAA_READ_ERROR);
398 			rw_exit(&sl->sl_access_state_lock);
399 			return;
400 		}
401 		/*
402 		 * dbuf is now queued on task
403 		 */
404 		scmd->nbufs++;
405 
406 		/* XXX leave this in for FW? */
407 		DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
408 		    struct stmf_data_buf *, dbuf, uint64_t, offset,
409 		    uint32_t, xfer_len);
410 		/*
411 		 * Do not pass STMF_IOF_LU_DONE so that the zvol
412 		 * state can be released in the completion callback.
413 		 */
414 		xstat = stmf_xfer_data(task, dbuf, 0);
415 		switch (xstat) {
416 		case STMF_SUCCESS:
417 			break;
418 		case STMF_BUSY:
419 			/*
420 			 * The dbuf is queued on the task, but unknown
421 			 * to the PP, thus no completion will occur.
422 			 */
423 			sbd_zvol_rele_read_bufs(sl, dbuf);
424 			stmf_teardown_dbuf(task, dbuf);
425 			stmf_free(dbuf);
426 			scmd->nbufs--;
427 			if (scmd->nbufs > 0) {
428 				/* completion of previous dbuf will retry */
429 				return;
430 			}
431 			/*
432 			 * Done with this command.
433 			 */
434 			rw_exit(&sl->sl_access_state_lock);
435 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
436 			if (first_xfer)
437 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
438 			else
439 				stmf_scsilib_send_status(task, STATUS_CHECK,
440 				    STMF_SAA_READ_ERROR);
441 			return;
442 		case STMF_ABORTED:
443 			/*
444 			 * Completion from task_done will cleanup
445 			 */
446 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
447 			return;
448 		}
449 		/*
450 		 * Update the xfer progress.
451 		 */
452 		ASSERT(scmd->len >= xfer_len);
453 		scmd->len -= xfer_len;
454 		scmd->current_ro += xfer_len;
455 	}
456 }
457 
458 void
459 sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
460 				struct stmf_data_buf *dbuf)
461 {
462 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
463 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
464 		    dbuf->db_xfer_status, NULL);
465 		return;
466 	}
467 	task->task_nbytes_transferred += dbuf->db_data_size;
468 	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
469 		stmf_free_dbuf(task, dbuf);
470 		scmd->nbufs--;
471 		if (scmd->nbufs)
472 			return;	/* wait for all buffers to complete */
473 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
474 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL)
475 			stmf_scsilib_send_status(task, STATUS_CHECK,
476 			    STMF_SAA_READ_ERROR);
477 		else
478 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
479 		return;
480 	}
481 	if (dbuf->db_flags & DB_DONT_REUSE) {
482 		/* allocate new dbuf */
483 		uint32_t maxsize, minsize, old_minsize;
484 		stmf_free_dbuf(task, dbuf);
485 
486 		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
487 		minsize = maxsize >> 2;
488 		do {
489 			old_minsize = minsize;
490 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
491 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
492 		    (minsize >= 512));
493 		if (dbuf == NULL) {
494 			scmd->nbufs --;
495 			if (scmd->nbufs == 0) {
496 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
497 				    STMF_ALLOC_FAILURE, NULL);
498 			}
499 			return;
500 		}
501 	}
502 	sbd_do_read_xfer(task, scmd, dbuf);
503 }
504 
505 /*
506  * This routine must release the DMU resources and free the dbuf
507  * in all cases.  If this is the final dbuf of the task, then drop
508  * the reader lock on the LU state. If there are no errors and more
509  * work to do, then queue more xfer operations.
510  */
511 void
512 sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
513 				struct stmf_data_buf *dbuf)
514 {
515 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
516 	stmf_status_t xfer_status;
517 	uint32_t data_size;
518 	int scmd_err;
519 
520 	ASSERT(dbuf->db_lu_private);
521 	ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ);
522 
523 	scmd->nbufs--;	/* account for this dbuf */
524 	/*
525 	 * Release the DMU resources.
526 	 */
527 	sbd_zvol_rele_read_bufs(sl, dbuf);
528 	/*
529 	 * Release the dbuf after retrieving needed fields.
530 	 */
531 	xfer_status = dbuf->db_xfer_status;
532 	data_size = dbuf->db_data_size;
533 	stmf_teardown_dbuf(task, dbuf);
534 	stmf_free(dbuf);
535 	/*
536 	 * Release the state lock if this is the last completion.
537 	 * If this is the last dbuf on task and all data has been
538 	 * transferred or an error encountered, then no more dbufs
539 	 * will be queued.
540 	 */
541 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
542 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
543 	    (xfer_status != STMF_SUCCESS));
544 	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
545 		/* all DMU state has been released */
546 		rw_exit(&sl->sl_access_state_lock);
547 	}
548 
549 	/*
550 	 * If there have been no errors, either complete the task
551 	 * or issue more data xfer operations.
552 	 */
553 	if (!scmd_err) {
554 		/*
555 		 * This chunk completed successfully
556 		 */
557 		task->task_nbytes_transferred += data_size;
558 		if (scmd->nbufs == 0 && scmd->len == 0) {
559 			/*
560 			 * This command completed successfully
561 			 *
562 			 * Status was sent along with data, so no status
563 			 * completion will occur. Tell stmf we are done.
564 			 */
565 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
566 			stmf_task_lu_done(task);
567 			return;
568 		}
569 		/*
570 		 * Start more xfers
571 		 */
572 		sbd_do_sgl_read_xfer(task, scmd, 0);
573 		return;
574 	}
575 	/*
576 	 * Sort out the failure
577 	 */
578 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
579 		/*
580 		 * If a previous error occurred, leave the command active
581 		 * and wait for the last completion to send the status check.
582 		 */
583 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
584 			if (scmd->nbufs == 0) {
585 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
586 				stmf_scsilib_send_status(task, STATUS_CHECK,
587 				    STMF_SAA_READ_ERROR);
588 			}
589 			return;
590 		}
591 		/*
592 		 * Must have been a failure on current dbuf
593 		 */
594 		ASSERT(xfer_status != STMF_SUCCESS);
595 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
596 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
597 	}
598 }
599 
600 void
601 sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
602 				struct stmf_data_buf *dbuf)
603 {
604 	sbd_zvol_io_t *zvio = dbuf->db_lu_private;
605 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
606 	int ret;
607 	int scmd_err, scmd_xfer_done;
608 	stmf_status_t xfer_status = dbuf->db_xfer_status;
609 	uint32_t data_size = dbuf->db_data_size;
610 	hrtime_t xfer_start;
611 
612 	ASSERT(zvio);
613 
614 	/*
615 	 * Allow PP to free up resources before releasing the write bufs
616 	 * as writing to the backend could take some time.
617 	 */
618 	stmf_teardown_dbuf(task, dbuf);
619 
620 	scmd->nbufs--;	/* account for this dbuf */
621 	/*
622 	 * All data was queued and this is the last completion,
623 	 * but there could still be an error.
624 	 */
625 	scmd_xfer_done = (scmd->len == 0 && scmd->nbufs == 0);
626 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
627 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
628 	    (xfer_status != STMF_SUCCESS));
629 
630 	/* start the accounting clock */
631 	stmf_lu_xfer_start(task);
632 	DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
633 	    uint8_t *, NULL, uint64_t, data_size,
634 	    uint64_t, zvio->zvio_offset, scsi_task_t *, task);
635 	xfer_start = gethrtime();
636 
637 	if (scmd_err) {
638 		/* just return the write buffers */
639 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
640 		ret = 0;
641 	} else {
642 		if (scmd_xfer_done)
643 			zvio->zvio_flags = ZVIO_COMMIT;
644 		else
645 			zvio->zvio_flags = 0;
646 		/* write the data */
647 		ret = sbd_zvol_rele_write_bufs(sl, dbuf);
648 	}
649 
650 	/* finalize accounting */
651 	stmf_lu_xfer_done(task, B_FALSE /* not read */, data_size,
652 	    (gethrtime() - xfer_start));
653 	DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
654 	    uint8_t *, NULL, uint64_t, data_size,
655 	    uint64_t, zvio->zvio_offset, int, ret,  scsi_task_t *, task);
656 
657 	if (ret != 0) {
658 		/* update the error flag */
659 		scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
660 		scmd_err = 1;
661 	}
662 
663 	/* Release the dbuf */
664 	stmf_free(dbuf);
665 
666 	/*
667 	 * Release the state lock if this is the last completion.
668 	 * If this is the last dbuf on task and all data has been
669 	 * transferred or an error encountered, then no more dbufs
670 	 * will be queued.
671 	 */
672 	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
673 		/* all DMU state has been released */
674 		rw_exit(&sl->sl_access_state_lock);
675 	}
676 	/*
677 	 * If there have been no errors, either complete the task
678 	 * or issue more data xfer operations.
679 	 */
680 	if (!scmd_err) {
681 		/* This chunk completed successfully */
682 		task->task_nbytes_transferred += data_size;
683 		if (scmd_xfer_done) {
684 			/* This command completed successfully */
685 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
686 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
687 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
688 				stmf_scsilib_send_status(task, STATUS_CHECK,
689 				    STMF_SAA_WRITE_ERROR);
690 			} else {
691 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
692 			}
693 			return;
694 		}
695 		/*
696 		 * Start more xfers
697 		 */
698 		sbd_do_sgl_write_xfer(task, scmd, 0);
699 		return;
700 	}
701 	/*
702 	 * Sort out the failure
703 	 */
704 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
705 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
706 			if (scmd->nbufs == 0) {
707 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
708 				stmf_scsilib_send_status(task, STATUS_CHECK,
709 				    STMF_SAA_WRITE_ERROR);
710 			}
711 			/*
712 			 * Leave the command active until last dbuf completes.
713 			 */
714 			return;
715 		}
716 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
717 		ASSERT(xfer_status != STMF_SUCCESS);
718 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
719 	}
720 }
721 
722 /*
723  * Handle a copy operation using the zvol interface.
724  *
725  * Similar to the sbd_data_read/write path, except it goes directly through
726  * the zvol interfaces. It can pass a port provider sglist in the
727  * form of uio which is lost through the vn_rdwr path.
728  *
729  * Returns:
730  *	STMF_SUCCESS - request handled
731  *	STMF_FAILURE - request not handled, caller must deal with error
732  */
733 static stmf_status_t
734 sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
735     int cmd, int commit)
736 {
737 	sbd_lu_t		*sl = task->task_lu->lu_provider_private;
738 	struct uio		uio;
739 	struct iovec		*iov, *tiov, iov1[8];
740 	uint32_t		len, resid;
741 	int			ret, i, iovcnt, flags;
742 	hrtime_t		xfer_start;
743 	boolean_t		is_read;
744 
745 	ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE);
746 
747 	is_read = (cmd == SBD_CMD_SCSI_READ) ? B_TRUE : B_FALSE;
748 	iovcnt = dbuf->db_sglist_length;
749 	/* use the stack for small iovecs */
750 	if (iovcnt > 8) {
751 		iov = kmem_alloc(iovcnt * sizeof (*iov), KM_SLEEP);
752 	} else {
753 		iov = &iov1[0];
754 	}
755 
756 	/* Convert dbuf sglist to iovec format */
757 	len = dbuf->db_data_size;
758 	resid = len;
759 	tiov = iov;
760 	for (i = 0; i < iovcnt; i++) {
761 		tiov->iov_base = (caddr_t)dbuf->db_sglist[i].seg_addr;
762 		tiov->iov_len = MIN(resid, dbuf->db_sglist[i].seg_length);
763 		resid -= tiov->iov_len;
764 		tiov++;
765 	}
766 	if (resid != 0) {
767 		cmn_err(CE_WARN, "inconsistant sglist rem %d", resid);
768 		if (iov != &iov1[0])
769 			kmem_free(iov, iovcnt * sizeof (*iov));
770 		return (STMF_FAILURE);
771 	}
772 	/* Setup the uio struct */
773 	uio.uio_iov = iov;
774 	uio.uio_iovcnt = iovcnt;
775 	uio.uio_loffset = laddr;
776 	uio.uio_segflg = (short)UIO_SYSSPACE;
777 	uio.uio_resid = (uint64_t)len;
778 	uio.uio_llimit = RLIM64_INFINITY;
779 
780 	/* start the accounting clock */
781 	stmf_lu_xfer_start(task);
782 	xfer_start = gethrtime();
783 	if (is_read == B_TRUE) {
784 		uio.uio_fmode = FREAD;
785 		uio.uio_extflg = UIO_COPY_CACHED;
786 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
787 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
788 		    scsi_task_t *, task);
789 
790 		/* Fetch the data */
791 		ret = sbd_zvol_copy_read(sl, &uio);
792 
793 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
794 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
795 		    scsi_task_t *, task);
796 	} else {
797 		uio.uio_fmode = FWRITE;
798 		uio.uio_extflg = UIO_COPY_DEFAULT;
799 		DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
800 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
801 		    scsi_task_t *, task);
802 
803 		flags = (commit) ? ZVIO_COMMIT : 0;
804 		/* Write the data */
805 		ret = sbd_zvol_copy_write(sl, &uio, flags);
806 
807 		DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
808 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
809 		    scsi_task_t *, task);
810 	}
811 	/* finalize accounting */
812 	stmf_lu_xfer_done(task, is_read, (uint64_t)len,
813 	    (gethrtime() - xfer_start));
814 
815 	if (iov != &iov1[0])
816 		kmem_free(iov, iovcnt * sizeof (*iov));
817 	if (ret != 0) {
818 		/* Backend I/O error */
819 		return (STMF_FAILURE);
820 	}
821 	return (STMF_SUCCESS);
822 }
823 
824 void
825 sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
826 {
827 	uint64_t lba, laddr;
828 	uint32_t len;
829 	uint8_t op = task->task_cdb[0];
830 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
831 	sbd_cmd_t *scmd;
832 	stmf_data_buf_t *dbuf;
833 	int fast_path;
834 
835 	if (op == SCMD_READ) {
836 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
837 		len = (uint32_t)task->task_cdb[4];
838 
839 		if (len == 0) {
840 			len = 256;
841 		}
842 	} else if (op == SCMD_READ_G1) {
843 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
844 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
845 	} else if (op == SCMD_READ_G5) {
846 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
847 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
848 	} else if (op == SCMD_READ_G4) {
849 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
850 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
851 	} else {
852 		stmf_scsilib_send_status(task, STATUS_CHECK,
853 		    STMF_SAA_INVALID_OPCODE);
854 		return;
855 	}
856 
857 	laddr = lba << sl->sl_data_blocksize_shift;
858 	len <<= sl->sl_data_blocksize_shift;
859 
860 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
861 		stmf_scsilib_send_status(task, STATUS_CHECK,
862 		    STMF_SAA_LBA_OUT_OF_RANGE);
863 		return;
864 	}
865 
866 	task->task_cmd_xfer_length = len;
867 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
868 		task->task_expected_xfer_length = len;
869 	}
870 
871 	if (len != task->task_expected_xfer_length) {
872 		fast_path = 0;
873 		len = (len > task->task_expected_xfer_length) ?
874 		    task->task_expected_xfer_length : len;
875 	} else {
876 		fast_path = 1;
877 	}
878 
879 	if (len == 0) {
880 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
881 		return;
882 	}
883 
884 	/*
885 	 * Determine if this read can directly use DMU buffers.
886 	 */
887 	if (sbd_zcopy & (2|1) &&		/* Debug switch */
888 	    initial_dbuf == NULL &&		/* No PP buffer passed in */
889 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
890 	    (task->task_additional_flags &
891 	    TASK_AF_ACCEPT_LU_DBUF))		/* PP allows it */
892 	{
893 		/*
894 		 * Reduced copy path
895 		 */
896 		uint32_t copy_threshold, minsize;
897 		int ret;
898 
899 		/*
900 		 * The sl_access_state_lock will be held shared
901 		 * for the entire request and released when all
902 		 * dbufs have completed.
903 		 */
904 		rw_enter(&sl->sl_access_state_lock, RW_READER);
905 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
906 			rw_exit(&sl->sl_access_state_lock);
907 			stmf_scsilib_send_status(task, STATUS_CHECK,
908 			    STMF_SAA_READ_ERROR);
909 			return;
910 		}
911 
912 		/*
913 		 * Check if setup is more expensive than copying the data.
914 		 *
915 		 * Use the global over-ride sbd_zcopy_threshold if set.
916 		 */
917 		copy_threshold = (sbd_copy_threshold > 0) ?
918 		    sbd_copy_threshold : task->task_copy_threshold;
919 		minsize = len;
920 		if (len < copy_threshold &&
921 		    (dbuf = stmf_alloc_dbuf(task, len, &minsize, 0)) != 0) {
922 
923 			ret = sbd_copy_rdwr(task, laddr, dbuf,
924 			    SBD_CMD_SCSI_READ, 0);
925 			/* done with the backend */
926 			rw_exit(&sl->sl_access_state_lock);
927 			if (ret != 0) {
928 				/* backend error */
929 				stmf_scsilib_send_status(task, STATUS_CHECK,
930 				    STMF_SAA_READ_ERROR);
931 			} else {
932 				/* send along good data */
933 				dbuf->db_relative_offset = 0;
934 				dbuf->db_data_size = len;
935 				dbuf->db_flags = DB_SEND_STATUS_GOOD |
936 				    DB_DIRECTION_TO_RPORT;
937 				/* XXX keep for FW? */
938 				DTRACE_PROBE4(sbd__xfer,
939 				    struct scsi_task *, task,
940 				    struct stmf_data_buf *, dbuf,
941 				    uint64_t, laddr, uint32_t, len);
942 				(void) stmf_xfer_data(task, dbuf,
943 				    STMF_IOF_LU_DONE);
944 			}
945 			return;
946 		}
947 
948 		/* committed to reduced copy */
949 		if (task->task_lu_private) {
950 			scmd = (sbd_cmd_t *)task->task_lu_private;
951 		} else {
952 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
953 			    KM_SLEEP);
954 			task->task_lu_private = scmd;
955 		}
956 		/*
957 		 * Setup scmd to track read progress.
958 		 */
959 		scmd->flags = SBD_SCSI_CMD_ACTIVE;
960 		scmd->cmd_type = SBD_CMD_SCSI_READ;
961 		scmd->nbufs = 0;
962 		scmd->addr = laddr;
963 		scmd->len = len;
964 		scmd->current_ro = 0;
965 
966 		/*
967 		 * Kick-off the read.
968 		 */
969 		sbd_do_sgl_read_xfer(task, scmd, 1);
970 		return;
971 	}
972 
973 	if (initial_dbuf == NULL) {
974 		uint32_t maxsize, minsize, old_minsize;
975 
976 		maxsize = (len > (128*1024)) ? 128*1024 : len;
977 		minsize = maxsize >> 2;
978 		do {
979 			old_minsize = minsize;
980 			initial_dbuf = stmf_alloc_dbuf(task, maxsize,
981 			    &minsize, 0);
982 		} while ((initial_dbuf == NULL) && (old_minsize > minsize) &&
983 		    (minsize >= 512));
984 		if (initial_dbuf == NULL) {
985 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
986 			return;
987 		}
988 	}
989 	dbuf = initial_dbuf;
990 
991 	if ((dbuf->db_buf_size >= len) && fast_path &&
992 	    (dbuf->db_sglist_length == 1)) {
993 		if (sbd_data_read(sl, task, laddr, (uint64_t)len,
994 		    dbuf->db_sglist[0].seg_addr) == STMF_SUCCESS) {
995 			dbuf->db_relative_offset = 0;
996 			dbuf->db_data_size = len;
997 			dbuf->db_flags = DB_SEND_STATUS_GOOD |
998 			    DB_DIRECTION_TO_RPORT;
999 			/* XXX keep for FW? */
1000 			DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
1001 			    struct stmf_data_buf *, dbuf,
1002 			    uint64_t, laddr, uint32_t, len);
1003 			(void) stmf_xfer_data(task, dbuf, STMF_IOF_LU_DONE);
1004 		} else {
1005 			stmf_scsilib_send_status(task, STATUS_CHECK,
1006 			    STMF_SAA_READ_ERROR);
1007 		}
1008 		return;
1009 	}
1010 
1011 	if (task->task_lu_private) {
1012 		scmd = (sbd_cmd_t *)task->task_lu_private;
1013 	} else {
1014 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1015 		task->task_lu_private = scmd;
1016 	}
1017 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1018 	scmd->cmd_type = SBD_CMD_SCSI_READ;
1019 	scmd->nbufs = 1;
1020 	scmd->addr = laddr;
1021 	scmd->len = len;
1022 	scmd->current_ro = 0;
1023 
1024 	sbd_do_read_xfer(task, scmd, dbuf);
1025 }
1026 
1027 void
1028 sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
1029     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1030 {
1031 	uint32_t len;
1032 	int bufs_to_take;
1033 
1034 	if (scmd->len == 0) {
1035 		goto DO_WRITE_XFER_DONE;
1036 	}
1037 
1038 	/* Lets try not to hog all the buffers the port has. */
1039 	bufs_to_take = ((task->task_max_nbufs > 2) &&
1040 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
1041 	    task->task_max_nbufs;
1042 
1043 	if ((dbuf != NULL) &&
1044 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
1045 		/* free current dbuf and allocate a new one */
1046 		stmf_free_dbuf(task, dbuf);
1047 		dbuf = NULL;
1048 	}
1049 	if (scmd->nbufs >= bufs_to_take) {
1050 		goto DO_WRITE_XFER_DONE;
1051 	}
1052 	if (dbuf == NULL) {
1053 		uint32_t maxsize, minsize, old_minsize;
1054 
1055 		maxsize = (scmd->len > (128*1024)) ? 128*1024 :
1056 		    scmd->len;
1057 		minsize = maxsize >> 2;
1058 		do {
1059 			old_minsize = minsize;
1060 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
1061 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
1062 		    (minsize >= 512));
1063 		if (dbuf == NULL) {
1064 			if (scmd->nbufs == 0) {
1065 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1066 				    STMF_ALLOC_FAILURE, NULL);
1067 			}
1068 			return;
1069 		}
1070 	}
1071 
1072 	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
1073 	    scmd->len;
1074 
1075 	dbuf->db_relative_offset = scmd->current_ro;
1076 	dbuf->db_data_size = len;
1077 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1078 	(void) stmf_xfer_data(task, dbuf, 0);
1079 	scmd->nbufs++; /* outstanding port xfers and bufs used */
1080 	scmd->len -= len;
1081 	scmd->current_ro += len;
1082 
1083 	if ((scmd->len != 0) && (scmd->nbufs < bufs_to_take)) {
1084 		sbd_do_write_xfer(task, scmd, NULL, 0);
1085 	}
1086 	return;
1087 
1088 DO_WRITE_XFER_DONE:
1089 	if (dbuf != NULL) {
1090 		stmf_free_dbuf(task, dbuf);
1091 	}
1092 }
1093 
1094 void
1095 sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
1096 {
1097 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1098 	sbd_zvol_io_t *zvio;
1099 	int ret;
1100 	uint32_t xfer_len, max_len, first_len;
1101 	stmf_status_t xstat;
1102 	stmf_data_buf_t *dbuf;
1103 	uint_t nblks;
1104 	uint64_t blksize = sl->sl_blksize;
1105 	uint64_t offset;
1106 	size_t db_private_sz;
1107 	uintptr_t pad;
1108 
1109 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
1110 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
1111 
1112 	/*
1113 	 * Calculate the limits on xfer_len to the minimum of :
1114 	 *    - task limit
1115 	 *    - lun limit
1116 	 *    - sbd global limit if set
1117 	 *    - first xfer limit if set
1118 	 *
1119 	 * First, protect against silly over-ride value
1120 	 */
1121 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
1122 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
1123 		    sbd_max_xfer_len);
1124 		sbd_max_xfer_len = 0;
1125 	}
1126 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
1127 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
1128 		    sbd_1st_xfer_len);
1129 		sbd_1st_xfer_len = 0;
1130 	}
1131 
1132 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
1133 	if (sbd_max_xfer_len)
1134 		max_len = MIN(max_len, sbd_max_xfer_len);
1135 	/*
1136 	 * Special case the first xfer if hints are set.
1137 	 */
1138 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
1139 		/* global over-ride has precedence */
1140 		if (sbd_1st_xfer_len)
1141 			first_len = sbd_1st_xfer_len;
1142 		else
1143 			first_len = task->task_1st_xfer_len;
1144 	} else {
1145 		first_len = 0;
1146 	}
1147 
1148 
1149 	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
1150 
1151 		xfer_len = MIN(max_len, scmd->len);
1152 		if (first_len) {
1153 			xfer_len = MIN(xfer_len, first_len);
1154 			first_len = 0;
1155 		}
1156 		if (xfer_len < scmd->len) {
1157 			/*
1158 			 * Attempt to end xfer on a block boundary.
1159 			 * The only way this does not happen is if the
1160 			 * xfer_len is small enough to stay contained
1161 			 * within the same block.
1162 			 */
1163 			uint64_t xfer_offset, xfer_aligned_end;
1164 
1165 			xfer_offset = scmd->addr + scmd->current_ro;
1166 			xfer_aligned_end =
1167 			    P2ALIGN(xfer_offset+xfer_len, blksize);
1168 			if (xfer_aligned_end > xfer_offset)
1169 				xfer_len = xfer_aligned_end - xfer_offset;
1170 		}
1171 		/*
1172 		 * Allocate object to track the write and reserve
1173 		 * enough space for scatter/gather list.
1174 		 */
1175 		offset = scmd->addr + scmd->current_ro;
1176 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
1177 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
1178 		    (nblks * sizeof (stmf_sglist_ent_t));
1179 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
1180 		    AF_DONTZERO);
1181 
1182 		/*
1183 		 * Setup the dbuf
1184 		 *
1185 		 * XXX Framework does not handle variable length sglists
1186 		 * properly, so setup db_lu_private and db_port_private
1187 		 * fields here. db_stmf_private is properly set for
1188 		 * calls to stmf_free.
1189 		 */
1190 		if (dbuf->db_port_private == NULL) {
1191 			/*
1192 			 * XXX Framework assigns space to PP after db_sglist[0]
1193 			 */
1194 			cmn_err(CE_PANIC, "db_port_private == NULL");
1195 		}
1196 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
1197 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
1198 		dbuf->db_port_private = NULL;
1199 		dbuf->db_buf_size = xfer_len;
1200 		dbuf->db_data_size = xfer_len;
1201 		dbuf->db_relative_offset = scmd->current_ro;
1202 		dbuf->db_sglist_length = (uint16_t)nblks;
1203 		dbuf->db_xfer_status = 0;
1204 		dbuf->db_handle = 0;
1205 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
1206 		    DB_DIRECTION_FROM_RPORT | DB_LU_DATA_BUF);
1207 
1208 		zvio = dbuf->db_lu_private;
1209 		zvio->zvio_offset = offset;
1210 
1211 		/* get the buffers */
1212 		ret = sbd_zvol_alloc_write_bufs(sl, dbuf);
1213 		if (ret != 0) {
1214 			/*
1215 			 * Could not allocate buffers from the backend;
1216 			 * treat it like an IO error.
1217 			 */
1218 			stmf_free(dbuf);
1219 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1220 			if (scmd->nbufs == 0) {
1221 				/*
1222 				 * Nothing queued, so no completions coming
1223 				 */
1224 				stmf_scsilib_send_status(task, STATUS_CHECK,
1225 				    STMF_SAA_WRITE_ERROR);
1226 				rw_exit(&sl->sl_access_state_lock);
1227 			}
1228 			/*
1229 			 * Completions of previous buffers will cleanup.
1230 			 */
1231 			return;
1232 		}
1233 
1234 		/*
1235 		 * Allow PP to do setup
1236 		 */
1237 		xstat = stmf_setup_dbuf(task, dbuf, 0);
1238 		if (xstat != STMF_SUCCESS) {
1239 			/*
1240 			 * This could happen if the driver cannot get the
1241 			 * DDI resources it needs for this request.
1242 			 * If other dbufs are queued, try again when the next
1243 			 * one completes, otherwise give up.
1244 			 */
1245 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1246 			stmf_free(dbuf);
1247 			if (scmd->nbufs > 0) {
1248 				/* completion of previous dbuf will retry */
1249 				return;
1250 			}
1251 			/*
1252 			 * Done with this command.
1253 			 */
1254 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1255 			if (first_xfer)
1256 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1257 			else
1258 				stmf_scsilib_send_status(task, STATUS_CHECK,
1259 				    STMF_SAA_WRITE_ERROR);
1260 			rw_exit(&sl->sl_access_state_lock);
1261 			return;
1262 		}
1263 
1264 		/*
1265 		 * dbuf is now queued on task
1266 		 */
1267 		scmd->nbufs++;
1268 
1269 		xstat = stmf_xfer_data(task, dbuf, 0);
1270 		switch (xstat) {
1271 		case STMF_SUCCESS:
1272 			break;
1273 		case STMF_BUSY:
1274 			/*
1275 			 * The dbuf is queued on the task, but unknown
1276 			 * to the PP, thus no completion will occur.
1277 			 */
1278 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1279 			stmf_teardown_dbuf(task, dbuf);
1280 			stmf_free(dbuf);
1281 			scmd->nbufs--;
1282 			if (scmd->nbufs > 0) {
1283 				/* completion of previous dbuf will retry */
1284 				return;
1285 			}
1286 			/*
1287 			 * Done with this command.
1288 			 */
1289 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1290 			if (first_xfer)
1291 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1292 			else
1293 				stmf_scsilib_send_status(task, STATUS_CHECK,
1294 				    STMF_SAA_WRITE_ERROR);
1295 			rw_exit(&sl->sl_access_state_lock);
1296 			return;
1297 		case STMF_ABORTED:
1298 			/*
1299 			 * Completion code will cleanup.
1300 			 */
1301 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1302 			return;
1303 		}
1304 		/*
1305 		 * Update the xfer progress.
1306 		 */
1307 		scmd->len -= xfer_len;
1308 		scmd->current_ro += xfer_len;
1309 	}
1310 }
1311 
1312 void
1313 sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1314     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1315 {
1316 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1317 	uint64_t laddr;
1318 	uint32_t buflen, iolen;
1319 	int ndx;
1320 
1321 	if (scmd->nbufs > 0) {
1322 		/*
1323 		 * Decrement the count to indicate the port xfer
1324 		 * into the dbuf has completed even though the buf is
1325 		 * still in use here in the LU provider.
1326 		 */
1327 		scmd->nbufs--;
1328 	}
1329 
1330 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1331 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1332 		    dbuf->db_xfer_status, NULL);
1333 		return;
1334 	}
1335 
1336 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1337 		goto WRITE_XFER_DONE;
1338 	}
1339 
1340 	if (scmd->len != 0) {
1341 		/*
1342 		 * Initiate the next port xfer to occur in parallel
1343 		 * with writing this buf.
1344 		 */
1345 		sbd_do_write_xfer(task, scmd, NULL, 0);
1346 	}
1347 
1348 	laddr = scmd->addr + dbuf->db_relative_offset;
1349 
1350 	/*
1351 	 * If this is going to a zvol, use the direct call to
1352 	 * sbd_zvol_copy_{read,write}. The direct call interface is
1353 	 * restricted to PPs that accept sglists, but that is not required.
1354 	 */
1355 	if (sl->sl_flags & SL_CALL_ZVOL &&
1356 	    (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) &&
1357 	    (sbd_zcopy & (4|1))) {
1358 		int commit;
1359 
1360 		commit = (scmd->len == 0 && scmd->nbufs == 0);
1361 		if (sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
1362 		    commit) != STMF_SUCCESS)
1363 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1364 		buflen = dbuf->db_data_size;
1365 	} else {
1366 		for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
1367 		    (ndx < dbuf->db_sglist_length); ndx++) {
1368 			iolen = min(dbuf->db_data_size - buflen,
1369 			    dbuf->db_sglist[ndx].seg_length);
1370 			if (iolen == 0)
1371 				break;
1372 			if (sbd_data_write(sl, task, laddr, (uint64_t)iolen,
1373 			    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
1374 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1375 				break;
1376 			}
1377 			buflen += iolen;
1378 			laddr += (uint64_t)iolen;
1379 		}
1380 	}
1381 	task->task_nbytes_transferred += buflen;
1382 WRITE_XFER_DONE:
1383 	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1384 		stmf_free_dbuf(task, dbuf);
1385 		if (scmd->nbufs)
1386 			return;	/* wait for all buffers to complete */
1387 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1388 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1389 			stmf_scsilib_send_status(task, STATUS_CHECK,
1390 			    STMF_SAA_WRITE_ERROR);
1391 		} else {
1392 			/*
1393 			 * If SYNC_WRITE flag is on then we need to flush
1394 			 * cache before sending status.
1395 			 * Note: this may be a no-op because of how
1396 			 * SL_WRITEBACK_CACHE_DISABLE and
1397 			 * SL_FLUSH_ON_DISABLED_WRITECACHE are set, but not
1398 			 * worth code complexity of checking those in this code
1399 			 * path, SBD_SCSI_CMD_SYNC_WRITE is rarely set.
1400 			 */
1401 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
1402 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
1403 				stmf_scsilib_send_status(task, STATUS_CHECK,
1404 				    STMF_SAA_WRITE_ERROR);
1405 			} else {
1406 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1407 			}
1408 		}
1409 		return;
1410 	}
1411 	sbd_do_write_xfer(task, scmd, dbuf, dbuf_reusable);
1412 }
1413 
1414 /*
1415  * Return true if copy avoidance is beneficial.
1416  */
1417 static int
1418 sbd_zcopy_write_useful(scsi_task_t *task, uint64_t laddr, uint32_t len,
1419     uint64_t blksize)
1420 {
1421 	/*
1422 	 * If there is a global copy threshold over-ride, use it.
1423 	 * Otherwise use the PP value with the caveat that at least
1424 	 * 1/2 the data must avoid being copied to be useful.
1425 	 */
1426 	if (sbd_copy_threshold > 0) {
1427 		return (len >= sbd_copy_threshold);
1428 	} else {
1429 		uint64_t no_copy_span;
1430 
1431 		/* sub-blocksize writes always copy */
1432 		if (len < task->task_copy_threshold || len < blksize)
1433 			return (0);
1434 		/*
1435 		 * Calculate amount of data that will avoid the copy path.
1436 		 * The calculation is only valid if len >= blksize.
1437 		 */
1438 		no_copy_span = P2ALIGN(laddr+len, blksize) -
1439 		    P2ROUNDUP(laddr, blksize);
1440 		return (no_copy_span >= len/2);
1441 	}
1442 }
1443 
1444 void
1445 sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
1446 {
1447 	uint64_t lba, laddr;
1448 	uint32_t len;
1449 	uint8_t op = task->task_cdb[0], do_immediate_data = 0;
1450 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1451 	sbd_cmd_t *scmd;
1452 	stmf_data_buf_t *dbuf;
1453 	uint8_t	sync_wr_flag = 0;
1454 
1455 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1456 		stmf_scsilib_send_status(task, STATUS_CHECK,
1457 		    STMF_SAA_WRITE_PROTECTED);
1458 		return;
1459 	}
1460 	if (op == SCMD_WRITE) {
1461 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
1462 		len = (uint32_t)task->task_cdb[4];
1463 
1464 		if (len == 0) {
1465 			len = 256;
1466 		}
1467 	} else if (op == SCMD_WRITE_G1) {
1468 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1469 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1470 	} else if (op == SCMD_WRITE_G5) {
1471 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1472 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1473 	} else if (op == SCMD_WRITE_G4) {
1474 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1475 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1476 	} else if (op == SCMD_WRITE_VERIFY) {
1477 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1478 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1479 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1480 	} else if (op == SCMD_WRITE_VERIFY_G5) {
1481 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1482 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1483 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1484 	} else if (op == SCMD_WRITE_VERIFY_G4) {
1485 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1486 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1487 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1488 	} else {
1489 		stmf_scsilib_send_status(task, STATUS_CHECK,
1490 		    STMF_SAA_INVALID_OPCODE);
1491 		return;
1492 	}
1493 
1494 	laddr = lba << sl->sl_data_blocksize_shift;
1495 	len <<= sl->sl_data_blocksize_shift;
1496 
1497 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
1498 		stmf_scsilib_send_status(task, STATUS_CHECK,
1499 		    STMF_SAA_LBA_OUT_OF_RANGE);
1500 		return;
1501 	}
1502 
1503 	task->task_cmd_xfer_length = len;
1504 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1505 		task->task_expected_xfer_length = len;
1506 	}
1507 
1508 	len = (len > task->task_expected_xfer_length) ?
1509 	    task->task_expected_xfer_length : len;
1510 
1511 	if (len == 0) {
1512 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1513 		return;
1514 	}
1515 
1516 	if (sbd_zcopy & (4|1) &&		/* Debug switch */
1517 	    initial_dbuf == NULL &&		/* No PP buf passed in */
1518 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
1519 	    (task->task_additional_flags &
1520 	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
1521 	    sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize)) {
1522 
1523 		/*
1524 		 * XXX Note that disallowing initial_dbuf will eliminate
1525 		 * iSCSI from participating. For small writes, that is
1526 		 * probably ok. For large writes, it may be best to just
1527 		 * copy the data from the initial dbuf and use zcopy for
1528 		 * the rest.
1529 		 */
1530 		rw_enter(&sl->sl_access_state_lock, RW_READER);
1531 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
1532 			rw_exit(&sl->sl_access_state_lock);
1533 			stmf_scsilib_send_status(task, STATUS_CHECK,
1534 			    STMF_SAA_READ_ERROR);
1535 			return;
1536 		}
1537 		/*
1538 		 * Setup scmd to track the write progress.
1539 		 */
1540 		if (task->task_lu_private) {
1541 			scmd = (sbd_cmd_t *)task->task_lu_private;
1542 		} else {
1543 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1544 			    KM_SLEEP);
1545 			task->task_lu_private = scmd;
1546 		}
1547 		scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
1548 		scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1549 		scmd->nbufs = 0;
1550 		scmd->addr = laddr;
1551 		scmd->len = len;
1552 		scmd->current_ro = 0;
1553 		sbd_do_sgl_write_xfer(task, scmd, 1);
1554 		return;
1555 	}
1556 
1557 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
1558 		if (initial_dbuf->db_data_size > len) {
1559 			if (initial_dbuf->db_data_size >
1560 			    task->task_expected_xfer_length) {
1561 				/* protocol error */
1562 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1563 				    STMF_INVALID_ARG, NULL);
1564 				return;
1565 			}
1566 			initial_dbuf->db_data_size = len;
1567 		}
1568 		do_immediate_data = 1;
1569 	}
1570 	dbuf = initial_dbuf;
1571 
1572 	if (task->task_lu_private) {
1573 		scmd = (sbd_cmd_t *)task->task_lu_private;
1574 	} else {
1575 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1576 		task->task_lu_private = scmd;
1577 	}
1578 	scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
1579 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1580 	scmd->nbufs = 0;
1581 	scmd->addr = laddr;
1582 	scmd->len = len;
1583 	scmd->current_ro = 0;
1584 
1585 	if (do_immediate_data) {
1586 		/*
1587 		 * Account for data passed in this write command
1588 		 */
1589 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
1590 		scmd->len -= dbuf->db_data_size;
1591 		scmd->current_ro += dbuf->db_data_size;
1592 		dbuf->db_xfer_status = STMF_SUCCESS;
1593 		sbd_handle_write_xfer_completion(task, scmd, dbuf, 0);
1594 	} else {
1595 		sbd_do_write_xfer(task, scmd, dbuf, 0);
1596 	}
1597 }
1598 
1599 /*
1600  * Utility routine to handle small non performance data transfers to the
1601  * initiators. dbuf is an initial data buf (if any), 'p' points to a data
1602  * buffer which is source of data for transfer, cdb_xfer_size is the
1603  * transfer size based on CDB, cmd_xfer_size is the actual amount of data
1604  * which this command would transfer (the size of data pointed to by 'p').
1605  */
1606 void
1607 sbd_handle_short_read_transfers(scsi_task_t *task, stmf_data_buf_t *dbuf,
1608     uint8_t *p, uint32_t cdb_xfer_size, uint32_t cmd_xfer_size)
1609 {
1610 	uint32_t bufsize, ndx;
1611 	sbd_cmd_t *scmd;
1612 
1613 	cmd_xfer_size = min(cmd_xfer_size, cdb_xfer_size);
1614 
1615 	task->task_cmd_xfer_length = cmd_xfer_size;
1616 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1617 		task->task_expected_xfer_length = cmd_xfer_size;
1618 	} else {
1619 		cmd_xfer_size = min(cmd_xfer_size,
1620 		    task->task_expected_xfer_length);
1621 	}
1622 
1623 	if (cmd_xfer_size == 0) {
1624 		stmf_scsilib_send_status(task, STATUS_CHECK,
1625 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1626 		return;
1627 	}
1628 	if (dbuf == NULL) {
1629 		uint32_t minsize = cmd_xfer_size;
1630 
1631 		dbuf = stmf_alloc_dbuf(task, cmd_xfer_size, &minsize, 0);
1632 	}
1633 	if (dbuf == NULL) {
1634 		stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1635 		return;
1636 	}
1637 
1638 	for (bufsize = 0, ndx = 0; bufsize < cmd_xfer_size; ndx++) {
1639 		uint8_t *d;
1640 		uint32_t s;
1641 
1642 		d = dbuf->db_sglist[ndx].seg_addr;
1643 		s = min((cmd_xfer_size - bufsize),
1644 		    dbuf->db_sglist[ndx].seg_length);
1645 		bcopy(p+bufsize, d, s);
1646 		bufsize += s;
1647 	}
1648 	dbuf->db_relative_offset = 0;
1649 	dbuf->db_data_size = cmd_xfer_size;
1650 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
1651 
1652 	if (task->task_lu_private == NULL) {
1653 		task->task_lu_private =
1654 		    kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1655 	}
1656 	scmd = (sbd_cmd_t *)task->task_lu_private;
1657 
1658 	scmd->cmd_type = SBD_CMD_SMALL_READ;
1659 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1660 	(void) stmf_xfer_data(task, dbuf, 0);
1661 }
1662 
1663 void
1664 sbd_handle_short_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1665 				struct stmf_data_buf *dbuf)
1666 {
1667 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1668 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1669 		    dbuf->db_xfer_status, NULL);
1670 		return;
1671 	}
1672 	task->task_nbytes_transferred = dbuf->db_data_size;
1673 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1674 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1675 }
1676 
1677 void
1678 sbd_handle_short_write_transfers(scsi_task_t *task,
1679     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size)
1680 {
1681 	sbd_cmd_t *scmd;
1682 
1683 	task->task_cmd_xfer_length = cdb_xfer_size;
1684 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1685 		task->task_expected_xfer_length = cdb_xfer_size;
1686 	} else {
1687 		cdb_xfer_size = min(cdb_xfer_size,
1688 		    task->task_expected_xfer_length);
1689 	}
1690 
1691 	if (cdb_xfer_size == 0) {
1692 		stmf_scsilib_send_status(task, STATUS_CHECK,
1693 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1694 		return;
1695 	}
1696 	if (task->task_lu_private == NULL) {
1697 		task->task_lu_private = kmem_zalloc(sizeof (sbd_cmd_t),
1698 		    KM_SLEEP);
1699 	} else {
1700 		bzero(task->task_lu_private, sizeof (sbd_cmd_t));
1701 	}
1702 	scmd = (sbd_cmd_t *)task->task_lu_private;
1703 	scmd->cmd_type = SBD_CMD_SMALL_WRITE;
1704 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1705 	scmd->len = cdb_xfer_size;
1706 	if (dbuf == NULL) {
1707 		uint32_t minsize = cdb_xfer_size;
1708 
1709 		dbuf = stmf_alloc_dbuf(task, cdb_xfer_size, &minsize, 0);
1710 		if (dbuf == NULL) {
1711 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1712 			    STMF_ALLOC_FAILURE, NULL);
1713 			return;
1714 		}
1715 		dbuf->db_data_size = cdb_xfer_size;
1716 		dbuf->db_relative_offset = 0;
1717 		dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1718 		(void) stmf_xfer_data(task, dbuf, 0);
1719 	} else {
1720 		if (dbuf->db_data_size < cdb_xfer_size) {
1721 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1722 			    STMF_ABORTED, NULL);
1723 			return;
1724 		}
1725 		dbuf->db_data_size = cdb_xfer_size;
1726 		sbd_handle_short_write_xfer_completion(task, dbuf);
1727 	}
1728 }
1729 
1730 void
1731 sbd_handle_short_write_xfer_completion(scsi_task_t *task,
1732     stmf_data_buf_t *dbuf)
1733 {
1734 	sbd_cmd_t *scmd;
1735 	stmf_status_t st_ret;
1736 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1737 
1738 	/*
1739 	 * For now lets assume we will get only one sglist element
1740 	 * for short writes. If that ever changes, we should allocate
1741 	 * a local buffer and copy all the sg elements to one linear space.
1742 	 */
1743 	if ((dbuf->db_xfer_status != STMF_SUCCESS) ||
1744 	    (dbuf->db_sglist_length > 1)) {
1745 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1746 		    dbuf->db_xfer_status, NULL);
1747 		return;
1748 	}
1749 
1750 	task->task_nbytes_transferred = dbuf->db_data_size;
1751 	scmd = (sbd_cmd_t *)task->task_lu_private;
1752 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1753 
1754 	/* Lets find out who to call */
1755 	switch (task->task_cdb[0]) {
1756 	case SCMD_MODE_SELECT:
1757 	case SCMD_MODE_SELECT_G1:
1758 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1759 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1760 			if (st_ret != STMF_SUCCESS) {
1761 				stmf_scsilib_send_status(task, STATUS_CHECK,
1762 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1763 			}
1764 		} else {
1765 			sbd_handle_mode_select_xfer(task,
1766 			    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1767 		}
1768 		break;
1769 	case SCMD_UNMAP:
1770 		sbd_handle_unmap_xfer(task,
1771 		    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1772 		break;
1773 	case SCMD_PERSISTENT_RESERVE_OUT:
1774 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1775 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1776 			if (st_ret != STMF_SUCCESS) {
1777 				stmf_scsilib_send_status(task, STATUS_CHECK,
1778 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1779 			}
1780 		} else {
1781 			sbd_handle_pgr_out_data(task, dbuf);
1782 		}
1783 		break;
1784 	default:
1785 		/* This should never happen */
1786 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1787 		    STMF_ABORTED, NULL);
1788 	}
1789 }
1790 
1791 void
1792 sbd_handle_read_capacity(struct scsi_task *task,
1793     struct stmf_data_buf *initial_dbuf)
1794 {
1795 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1796 	uint32_t cdb_len;
1797 	uint8_t p[32];
1798 	uint64_t s;
1799 	uint16_t blksize;
1800 
1801 	s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
1802 	s--;
1803 	blksize = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1804 
1805 	switch (task->task_cdb[0]) {
1806 	case SCMD_READ_CAPACITY:
1807 		if (s & 0xffffffff00000000ull) {
1808 			p[0] = p[1] = p[2] = p[3] = 0xFF;
1809 		} else {
1810 			p[0] = (s >> 24) & 0xff;
1811 			p[1] = (s >> 16) & 0xff;
1812 			p[2] = (s >> 8) & 0xff;
1813 			p[3] = s & 0xff;
1814 		}
1815 		p[4] = 0; p[5] = 0;
1816 		p[6] = (blksize >> 8) & 0xff;
1817 		p[7] = blksize & 0xff;
1818 		sbd_handle_short_read_transfers(task, initial_dbuf, p, 8, 8);
1819 		break;
1820 
1821 	case SCMD_SVC_ACTION_IN_G4:
1822 		cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1823 		bzero(p, 32);
1824 		p[0] = (s >> 56) & 0xff;
1825 		p[1] = (s >> 48) & 0xff;
1826 		p[2] = (s >> 40) & 0xff;
1827 		p[3] = (s >> 32) & 0xff;
1828 		p[4] = (s >> 24) & 0xff;
1829 		p[5] = (s >> 16) & 0xff;
1830 		p[6] = (s >> 8) & 0xff;
1831 		p[7] = s & 0xff;
1832 		p[10] = (blksize >> 8) & 0xff;
1833 		p[11] = blksize & 0xff;
1834 		if (sl->sl_flags & SL_UNMAP_ENABLED) {
1835 			p[14] = 0x80;
1836 		}
1837 		sbd_handle_short_read_transfers(task, initial_dbuf, p,
1838 		    cdb_len, 32);
1839 		break;
1840 	}
1841 }
1842 
1843 void
1844 sbd_calc_geometry(uint64_t s, uint16_t blksize, uint8_t *nsectors,
1845     uint8_t *nheads, uint32_t *ncyl)
1846 {
1847 	if (s < (4ull * 1024ull * 1024ull * 1024ull)) {
1848 		*nsectors = 32;
1849 		*nheads = 8;
1850 	} else {
1851 		*nsectors = 254;
1852 		*nheads = 254;
1853 	}
1854 	*ncyl = s / ((uint64_t)blksize * (uint64_t)(*nsectors) *
1855 	    (uint64_t)(*nheads));
1856 }
1857 
1858 void
1859 sbd_handle_mode_sense(struct scsi_task *task,
1860     struct stmf_data_buf *initial_dbuf, uint8_t *buf)
1861 {
1862 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1863 	uint32_t cmd_size, n;
1864 	uint8_t *cdb;
1865 	uint32_t ncyl;
1866 	uint8_t nsectors, nheads;
1867 	uint8_t page, ctrl, header_size, pc_valid;
1868 	uint16_t nbytes;
1869 	uint8_t *p;
1870 	uint64_t s = sl->sl_lu_size;
1871 	uint32_t dev_spec_param_offset;
1872 
1873 	p = buf;	/* buf is assumed to be zeroed out and large enough */
1874 	n = 0;
1875 	cdb = &task->task_cdb[0];
1876 	page = cdb[2] & 0x3F;
1877 	ctrl = (cdb[2] >> 6) & 3;
1878 	cmd_size = (cdb[0] == SCMD_MODE_SENSE) ? cdb[4] :
1879 	    READ_SCSI16(&cdb[7], uint32_t);
1880 
1881 	if (cdb[0] == SCMD_MODE_SENSE) {
1882 		header_size = 4;
1883 		dev_spec_param_offset = 2;
1884 	} else {
1885 		header_size = 8;
1886 		dev_spec_param_offset = 3;
1887 	}
1888 
1889 	/* Now validate the command */
1890 	if ((cdb[2] == 0) || (page == MODEPAGE_ALLPAGES) || (page == 0x08) ||
1891 	    (page == 0x0A) || (page == 0x03) || (page == 0x04)) {
1892 		pc_valid = 1;
1893 	} else {
1894 		pc_valid = 0;
1895 	}
1896 	if ((cmd_size < header_size) || (pc_valid == 0)) {
1897 		stmf_scsilib_send_status(task, STATUS_CHECK,
1898 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1899 		return;
1900 	}
1901 
1902 	/* We will update the length in the mode header at the end */
1903 
1904 	/* Block dev device specific param in mode param header has wp bit */
1905 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1906 		p[n + dev_spec_param_offset] = BIT_7;
1907 	}
1908 	n += header_size;
1909 	/* We are not going to return any block descriptor */
1910 
1911 	nbytes = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1912 	sbd_calc_geometry(s, nbytes, &nsectors, &nheads, &ncyl);
1913 
1914 	if ((page == 0x03) || (page == MODEPAGE_ALLPAGES)) {
1915 		p[n] = 0x03;
1916 		p[n+1] = 0x16;
1917 		if (ctrl != 1) {
1918 			p[n + 11] = nsectors;
1919 			p[n + 12] = nbytes >> 8;
1920 			p[n + 13] = nbytes & 0xff;
1921 			p[n + 20] = 0x80;
1922 		}
1923 		n += 24;
1924 	}
1925 	if ((page == 0x04) || (page == MODEPAGE_ALLPAGES)) {
1926 		p[n] = 0x04;
1927 		p[n + 1] = 0x16;
1928 		if (ctrl != 1) {
1929 			p[n + 2] = ncyl >> 16;
1930 			p[n + 3] = ncyl >> 8;
1931 			p[n + 4] = ncyl & 0xff;
1932 			p[n + 5] = nheads;
1933 			p[n + 20] = 0x15;
1934 			p[n + 21] = 0x18;
1935 		}
1936 		n += 24;
1937 	}
1938 	if ((page == MODEPAGE_CACHING) || (page == MODEPAGE_ALLPAGES)) {
1939 		struct mode_caching *mode_caching_page;
1940 
1941 		mode_caching_page = (struct mode_caching *)&p[n];
1942 
1943 		mode_caching_page->mode_page.code = MODEPAGE_CACHING;
1944 		mode_caching_page->mode_page.ps = 1; /* A saveable page */
1945 		mode_caching_page->mode_page.length = 0x12;
1946 
1947 		switch (ctrl) {
1948 		case (0):
1949 			/* Current */
1950 			if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) == 0) {
1951 				mode_caching_page->wce = 1;
1952 			}
1953 			break;
1954 
1955 		case (1):
1956 			/* Changeable */
1957 			if ((sl->sl_flags &
1958 			    SL_WRITEBACK_CACHE_SET_UNSUPPORTED) == 0) {
1959 				mode_caching_page->wce = 1;
1960 			}
1961 			break;
1962 
1963 		default:
1964 			if ((sl->sl_flags &
1965 			    SL_SAVED_WRITE_CACHE_DISABLE) == 0) {
1966 				mode_caching_page->wce = 1;
1967 			}
1968 			break;
1969 		}
1970 		n += (sizeof (struct mode_page) +
1971 		    mode_caching_page->mode_page.length);
1972 	}
1973 	if ((page == MODEPAGE_CTRL_MODE) || (page == MODEPAGE_ALLPAGES)) {
1974 		struct mode_control_scsi3 *mode_control_page;
1975 
1976 		mode_control_page = (struct mode_control_scsi3 *)&p[n];
1977 
1978 		mode_control_page->mode_page.code = MODEPAGE_CTRL_MODE;
1979 		mode_control_page->mode_page.length =
1980 		    PAGELENGTH_MODE_CONTROL_SCSI3;
1981 		if (ctrl != 1) {
1982 			/* If not looking for changeable values, report this. */
1983 			mode_control_page->que_mod = CTRL_QMOD_UNRESTRICT;
1984 		}
1985 		n += (sizeof (struct mode_page) +
1986 		    mode_control_page->mode_page.length);
1987 	}
1988 
1989 	if (cdb[0] == SCMD_MODE_SENSE) {
1990 		if (n > 255) {
1991 			stmf_scsilib_send_status(task, STATUS_CHECK,
1992 			    STMF_SAA_INVALID_FIELD_IN_CDB);
1993 			return;
1994 		}
1995 		/*
1996 		 * Mode parameter header length doesn't include the number
1997 		 * of bytes in the length field, so adjust the count.
1998 		 * Byte count minus header length field size.
1999 		 */
2000 		buf[0] = (n - 1) & 0xff;
2001 	} else {
2002 		/* Byte count minus header length field size. */
2003 		buf[1] = (n - 2) & 0xff;
2004 		buf[0] = ((n - 2) >> 8) & 0xff;
2005 	}
2006 
2007 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2008 	    cmd_size, n);
2009 }
2010 
2011 void
2012 sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf)
2013 {
2014 	uint32_t cmd_xfer_len;
2015 
2016 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2017 		cmd_xfer_len = (uint32_t)task->task_cdb[4];
2018 	} else {
2019 		cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2020 	}
2021 
2022 	if ((task->task_cdb[1] & 0xFE) != 0x10) {
2023 		stmf_scsilib_send_status(task, STATUS_CHECK,
2024 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2025 		return;
2026 	}
2027 
2028 	if (cmd_xfer_len == 0) {
2029 		/* zero byte mode selects are allowed */
2030 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2031 		return;
2032 	}
2033 
2034 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2035 }
2036 
2037 void
2038 sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2039 {
2040 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2041 	sbd_it_data_t *it;
2042 	int hdr_len, bd_len;
2043 	sbd_status_t sret;
2044 	int i;
2045 
2046 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2047 		hdr_len = 4;
2048 	} else {
2049 		hdr_len = 8;
2050 	}
2051 
2052 	if (buflen < hdr_len)
2053 		goto mode_sel_param_len_err;
2054 
2055 	bd_len = hdr_len == 4 ? buf[3] : READ_SCSI16(&buf[6], int);
2056 
2057 	if (buflen < (hdr_len + bd_len + 2))
2058 		goto mode_sel_param_len_err;
2059 
2060 	buf += hdr_len + bd_len;
2061 	buflen -= hdr_len + bd_len;
2062 
2063 	if ((buf[0] != 8) || (buflen != ((uint32_t)buf[1] + 2))) {
2064 		goto mode_sel_param_len_err;
2065 	}
2066 
2067 	if (buf[2] & 0xFB) {
2068 		goto mode_sel_param_field_err;
2069 	}
2070 
2071 	for (i = 3; i < (buf[1] + 2); i++) {
2072 		if (buf[i]) {
2073 			goto mode_sel_param_field_err;
2074 		}
2075 	}
2076 
2077 	sret = SBD_SUCCESS;
2078 
2079 	/* All good. Lets handle the write cache change, if any */
2080 	if (buf[2] & BIT_2) {
2081 		sret = sbd_wcd_set(0, sl);
2082 	} else {
2083 		sret = sbd_wcd_set(1, sl);
2084 	}
2085 
2086 	if (sret != SBD_SUCCESS) {
2087 		stmf_scsilib_send_status(task, STATUS_CHECK,
2088 		    STMF_SAA_WRITE_ERROR);
2089 		return;
2090 	}
2091 
2092 	/* set on the device passed, now set the flags */
2093 	mutex_enter(&sl->sl_lock);
2094 	if (buf[2] & BIT_2) {
2095 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
2096 	} else {
2097 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2098 	}
2099 
2100 	for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2101 		if (it == task->task_lu_itl_handle)
2102 			continue;
2103 		it->sbd_it_ua_conditions |= SBD_UA_MODE_PARAMETERS_CHANGED;
2104 	}
2105 
2106 	if (task->task_cdb[1] & 1) {
2107 		if (buf[2] & BIT_2) {
2108 			sl->sl_flags &= ~SL_SAVED_WRITE_CACHE_DISABLE;
2109 		} else {
2110 			sl->sl_flags |= SL_SAVED_WRITE_CACHE_DISABLE;
2111 		}
2112 		mutex_exit(&sl->sl_lock);
2113 		sret = sbd_write_lu_info(sl);
2114 	} else {
2115 		mutex_exit(&sl->sl_lock);
2116 	}
2117 	if (sret == SBD_SUCCESS) {
2118 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2119 	} else {
2120 		stmf_scsilib_send_status(task, STATUS_CHECK,
2121 		    STMF_SAA_WRITE_ERROR);
2122 	}
2123 	return;
2124 
2125 mode_sel_param_len_err:
2126 	stmf_scsilib_send_status(task, STATUS_CHECK,
2127 	    STMF_SAA_PARAM_LIST_LENGTH_ERROR);
2128 	return;
2129 mode_sel_param_field_err:
2130 	stmf_scsilib_send_status(task, STATUS_CHECK,
2131 	    STMF_SAA_INVALID_FIELD_IN_PARAM_LIST);
2132 }
2133 
2134 /*
2135  * Command support added from SPC-4 r24
2136  * Supports info type 0, 2, 127
2137  */
2138 void
2139 sbd_handle_identifying_info(struct scsi_task *task,
2140     stmf_data_buf_t *initial_dbuf)
2141 {
2142 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2143 	uint8_t *cdb;
2144 	uint32_t cmd_size;
2145 	uint32_t param_len;
2146 	uint32_t xfer_size;
2147 	uint8_t info_type;
2148 	uint8_t *buf, *p;
2149 
2150 	cdb = &task->task_cdb[0];
2151 	cmd_size = READ_SCSI32(&cdb[6], uint32_t);
2152 	info_type = cdb[10]>>1;
2153 
2154 	/* Validate the command */
2155 	if (cmd_size < 4) {
2156 		stmf_scsilib_send_status(task, STATUS_CHECK,
2157 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2158 		return;
2159 	}
2160 
2161 	p = buf = kmem_zalloc(260, KM_SLEEP);
2162 
2163 	switch (info_type) {
2164 		case 0:
2165 			/*
2166 			 * No value is supplied but this info type
2167 			 * is mandatory.
2168 			 */
2169 			xfer_size = 4;
2170 			break;
2171 		case 2:
2172 			mutex_enter(&sl->sl_lock);
2173 			param_len = strlcpy((char *)(p+4), sl->sl_alias, 256);
2174 			mutex_exit(&sl->sl_lock);
2175 			/* text info must be null terminated */
2176 			if (++param_len > 256)
2177 				param_len = 256;
2178 			SCSI_WRITE16(p+2, param_len);
2179 			xfer_size = param_len + 4;
2180 			break;
2181 		case 127:
2182 			/* 0 and 2 descriptor supported */
2183 			SCSI_WRITE16(p+2, 8); /* set param length */
2184 			p += 8;
2185 			*p = 4; /* set type to 2 (7 hi bits) */
2186 			p += 2;
2187 			SCSI_WRITE16(p, 256); /* 256 max length */
2188 			xfer_size = 12;
2189 			break;
2190 		default:
2191 			stmf_scsilib_send_status(task, STATUS_CHECK,
2192 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2193 			kmem_free(buf, 260);
2194 			return;
2195 	}
2196 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2197 	    cmd_size, xfer_size);
2198 	kmem_free(buf, 260);
2199 }
2200 
2201 /*
2202  * This function parse through a string, passed to it as a pointer to a string,
2203  * by adjusting the pointer to the first non-space character and returns
2204  * the count/length of the first bunch of non-space characters. Multiple
2205  * Management URLs are stored as a space delimited string in sl_mgmt_url
2206  * field of sbd_lu_t. This function is used to retrieve one url at a time.
2207  *
2208  * i/p : pointer to pointer to a url string
2209  * o/p : Adjust the pointer to the url to the first non white character
2210  *       and returns the length of the URL
2211  */
2212 uint16_t
2213 sbd_parse_mgmt_url(char **url_addr) {
2214 	uint16_t url_length = 0;
2215 	char *url;
2216 	url = *url_addr;
2217 
2218 	while (*url != '\0') {
2219 		if (*url == ' ' || *url == '\t' || *url == '\n') {
2220 			(*url_addr)++;
2221 			url = *url_addr;
2222 		} else {
2223 			break;
2224 		}
2225 	}
2226 
2227 	while (*url != '\0') {
2228 		if (*url == ' ' || *url == '\t' ||
2229 		    *url == '\n' || *url == '\0') {
2230 			break;
2231 		}
2232 		url++;
2233 		url_length++;
2234 	}
2235 	return (url_length);
2236 }
2237 
2238 /* Try to make this the size of a kmem allocation cache. */
2239 static uint_t sbd_write_same_optimal_chunk = 128 * 1024;
2240 
2241 static sbd_status_t
2242 sbd_write_same_data(struct scsi_task *task, sbd_cmd_t *scmd)
2243 {
2244 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2245 	uint64_t addr, len, sz_done;
2246 	uint32_t big_buf_size, xfer_size, off;
2247 	uint8_t *big_buf;
2248 	sbd_status_t ret;
2249 
2250 	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2251 		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2252 		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2253 	} else {
2254 		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2255 		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2256 	}
2257 	addr <<= sl->sl_data_blocksize_shift;
2258 	len <<= sl->sl_data_blocksize_shift;
2259 
2260 	/*
2261 	 * Reminders:
2262 	 *    "len" is total size of what we wish to "write same".
2263 	 *
2264 	 *    xfer_size will be scmd->trans_data_len, which is the length
2265 	 *    of the pattern we wish to replicate over "len".  We replicate
2266 	 *    "xfer_size" of pattern over "len".
2267 	 *
2268 	 *    big_buf_size is set to an ideal actual-write size for an output
2269 	 *    operation.  It may be the same as "len".  If it's not, it should
2270 	 *    be an exact multiple of "xfer_size" so we don't get pattern
2271 	 *    breakage until the very end of "len".
2272 	 */
2273 	big_buf_size = len > sbd_write_same_optimal_chunk ?
2274 	    sbd_write_same_optimal_chunk : (uint32_t)len;
2275 	xfer_size = scmd->trans_data_len;
2276 
2277 	/*
2278 	 * All transfers should be an integral multiple of the sector size.
2279 	 */
2280 	ASSERT((big_buf_size % xfer_size) == 0);
2281 
2282 	/*
2283 	 * Don't sleep for the allocation, and don't make the system
2284 	 * reclaim memory.  Trade higher I/Os if in a low-memory situation.
2285 	 */
2286 	big_buf = kmem_alloc(big_buf_size, KM_NOSLEEP | KM_NORMALPRI);
2287 
2288 	if (big_buf == NULL) {
2289 		/*
2290 		 * Just send it in terms of of the transmitted data.  This
2291 		 * will be very slow.
2292 		 */
2293 		DTRACE_PROBE1(write__same__low__memory, uint64_t, big_buf_size);
2294 		big_buf = scmd->trans_data;
2295 		big_buf_size = scmd->trans_data_len;
2296 	} else {
2297 		/*
2298 		 * We already ASSERT()ed big_buf_size is an integral multiple
2299 		 * of xfer_size.
2300 		 */
2301 		for (off = 0; off < big_buf_size; off += xfer_size)
2302 			bcopy(scmd->trans_data, big_buf + off, xfer_size);
2303 	}
2304 
2305 	/* Do the actual I/O.  Recycle xfer_size now to be write size. */
2306 	DTRACE_PROBE1(write__same__io__begin, uint64_t, len);
2307 	for (sz_done = 0; sz_done < len; sz_done += (uint64_t)xfer_size) {
2308 		xfer_size = ((big_buf_size + sz_done) <= len) ? big_buf_size :
2309 		    len - sz_done;
2310 		ret = sbd_data_write(sl, task, addr + sz_done,
2311 		    (uint64_t)xfer_size, big_buf);
2312 		if (ret != SBD_SUCCESS)
2313 			break;
2314 	}
2315 	DTRACE_PROBE2(write__same__io__end, uint64_t, len, uint64_t, sz_done);
2316 
2317 	if (big_buf != scmd->trans_data)
2318 		kmem_free(big_buf, big_buf_size);
2319 
2320 	return (ret);
2321 }
2322 
2323 static void
2324 sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
2325     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2326 {
2327 	uint64_t laddr;
2328 	uint32_t buflen, iolen;
2329 	int ndx, ret;
2330 
2331 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
2332 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2333 		    dbuf->db_xfer_status, NULL);
2334 		return;
2335 	}
2336 
2337 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2338 		goto write_same_xfer_done;
2339 	}
2340 
2341 	if (scmd->len != 0) {
2342 		/*
2343 		 * Initiate the next port xfer to occur in parallel
2344 		 * with writing this buf.
2345 		 */
2346 		sbd_do_write_same_xfer(task, scmd, NULL, 0);
2347 	}
2348 
2349 	laddr = dbuf->db_relative_offset;
2350 
2351 	for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
2352 	    (ndx < dbuf->db_sglist_length); ndx++) {
2353 		iolen = min(dbuf->db_data_size - buflen,
2354 		    dbuf->db_sglist[ndx].seg_length);
2355 		if (iolen == 0)
2356 			break;
2357 		bcopy(dbuf->db_sglist[ndx].seg_addr, &scmd->trans_data[laddr],
2358 		    iolen);
2359 		buflen += iolen;
2360 		laddr += (uint64_t)iolen;
2361 	}
2362 	task->task_nbytes_transferred += buflen;
2363 
2364 write_same_xfer_done:
2365 	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2366 		stmf_free_dbuf(task, dbuf);
2367 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
2368 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2369 			stmf_scsilib_send_status(task, STATUS_CHECK,
2370 			    STMF_SAA_WRITE_ERROR);
2371 		} else {
2372 			ret = sbd_write_same_data(task, scmd);
2373 			if (ret != SBD_SUCCESS) {
2374 				stmf_scsilib_send_status(task, STATUS_CHECK,
2375 				    STMF_SAA_WRITE_ERROR);
2376 			} else {
2377 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2378 			}
2379 		}
2380 		/*
2381 		 * Only way we should get here is via handle_write_same(),
2382 		 * and that should make the following assertion always pass.
2383 		 */
2384 		ASSERT((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) &&
2385 		    scmd->trans_data != NULL);
2386 		kmem_free(scmd->trans_data, scmd->trans_data_len);
2387 		scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
2388 		return;
2389 	}
2390 	sbd_do_write_same_xfer(task, scmd, dbuf, dbuf_reusable);
2391 }
2392 
2393 static void
2394 sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
2395     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2396 {
2397 	uint32_t len;
2398 
2399 	if (scmd->len == 0) {
2400 		if (dbuf != NULL)
2401 			stmf_free_dbuf(task, dbuf);
2402 		return;
2403 	}
2404 
2405 	if ((dbuf != NULL) &&
2406 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
2407 		/* free current dbuf and allocate a new one */
2408 		stmf_free_dbuf(task, dbuf);
2409 		dbuf = NULL;
2410 	}
2411 	if (dbuf == NULL) {
2412 		uint32_t maxsize, minsize, old_minsize;
2413 
2414 		maxsize = (scmd->len > (128*1024)) ? 128*1024 :
2415 		    scmd->len;
2416 		minsize = maxsize >> 2;
2417 		do {
2418 			old_minsize = minsize;
2419 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
2420 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
2421 		    (minsize >= 512));
2422 		if (dbuf == NULL) {
2423 			if (scmd->nbufs == 0) {
2424 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2425 				    STMF_ALLOC_FAILURE, NULL);
2426 			}
2427 			return;
2428 		}
2429 	}
2430 
2431 	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
2432 	    scmd->len;
2433 
2434 	dbuf->db_relative_offset = scmd->current_ro;
2435 	dbuf->db_data_size = len;
2436 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
2437 	(void) stmf_xfer_data(task, dbuf, 0);
2438 	scmd->nbufs++; /* outstanding port xfers and bufs used */
2439 	scmd->len -= len;
2440 	scmd->current_ro += len;
2441 }
2442 
2443 static void
2444 sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
2445 {
2446 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2447 	uint64_t addr, len;
2448 	sbd_cmd_t *scmd;
2449 	stmf_data_buf_t *dbuf;
2450 	uint8_t unmap;
2451 	uint8_t do_immediate_data = 0;
2452 
2453 	task->task_cmd_xfer_length = 0;
2454 	if (task->task_additional_flags &
2455 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2456 		task->task_expected_xfer_length = 0;
2457 	}
2458 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
2459 		stmf_scsilib_send_status(task, STATUS_CHECK,
2460 		    STMF_SAA_WRITE_PROTECTED);
2461 		return;
2462 	}
2463 	if (task->task_cdb[1] & 0xF7) {
2464 		stmf_scsilib_send_status(task, STATUS_CHECK,
2465 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2466 		return;
2467 	}
2468 	unmap = task->task_cdb[1] & 0x08;
2469 	if (unmap && ((sl->sl_flags & SL_UNMAP_ENABLED) == 0)) {
2470 		stmf_scsilib_send_status(task, STATUS_CHECK,
2471 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2472 		return;
2473 	}
2474 	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2475 		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2476 		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2477 	} else {
2478 		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2479 		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2480 	}
2481 	if (len == 0) {
2482 		stmf_scsilib_send_status(task, STATUS_CHECK,
2483 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2484 		return;
2485 	}
2486 	addr <<= sl->sl_data_blocksize_shift;
2487 	len <<= sl->sl_data_blocksize_shift;
2488 
2489 	/* Check if the command is for the unmap function */
2490 	if (unmap) {
2491 		if (sbd_unmap(sl, addr, len) != 0) {
2492 			stmf_scsilib_send_status(task, STATUS_CHECK,
2493 			    STMF_SAA_LBA_OUT_OF_RANGE);
2494 		} else {
2495 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2496 		}
2497 		return;
2498 	}
2499 
2500 	/* Write same function */
2501 
2502 	task->task_cmd_xfer_length = 1 << sl->sl_data_blocksize_shift;
2503 	if (task->task_additional_flags &
2504 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2505 		task->task_expected_xfer_length = task->task_cmd_xfer_length;
2506 	}
2507 	if ((addr + len) > sl->sl_lu_size) {
2508 		stmf_scsilib_send_status(task, STATUS_CHECK,
2509 		    STMF_SAA_LBA_OUT_OF_RANGE);
2510 		return;
2511 	}
2512 
2513 	/* For rest of this I/O the transfer length is 1 block */
2514 	len = ((uint64_t)1) << sl->sl_data_blocksize_shift;
2515 
2516 	/* Some basic checks */
2517 	if ((len == 0) || (len != task->task_expected_xfer_length)) {
2518 		stmf_scsilib_send_status(task, STATUS_CHECK,
2519 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2520 		return;
2521 	}
2522 
2523 
2524 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
2525 		if (initial_dbuf->db_data_size > len) {
2526 			if (initial_dbuf->db_data_size >
2527 			    task->task_expected_xfer_length) {
2528 				/* protocol error */
2529 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2530 				    STMF_INVALID_ARG, NULL);
2531 				return;
2532 			}
2533 			initial_dbuf->db_data_size = (uint32_t)len;
2534 		}
2535 		do_immediate_data = 1;
2536 	}
2537 	dbuf = initial_dbuf;
2538 
2539 	if (task->task_lu_private) {
2540 		scmd = (sbd_cmd_t *)task->task_lu_private;
2541 	} else {
2542 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
2543 		task->task_lu_private = scmd;
2544 	}
2545 	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA;
2546 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
2547 	scmd->nbufs = 0;
2548 	scmd->len = (uint32_t)len;
2549 	scmd->trans_data_len = (uint32_t)len;
2550 	scmd->trans_data = kmem_alloc((size_t)len, KM_SLEEP);
2551 	scmd->current_ro = 0;
2552 
2553 	if (do_immediate_data) {
2554 		/*
2555 		 * Account for data passed in this write command
2556 		 */
2557 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
2558 		scmd->len -= dbuf->db_data_size;
2559 		scmd->current_ro += dbuf->db_data_size;
2560 		dbuf->db_xfer_status = STMF_SUCCESS;
2561 		sbd_handle_write_same_xfer_completion(task, scmd, dbuf, 0);
2562 	} else {
2563 		sbd_do_write_same_xfer(task, scmd, dbuf, 0);
2564 	}
2565 }
2566 
2567 static void
2568 sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf)
2569 {
2570 	uint32_t cmd_xfer_len;
2571 
2572 	cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2573 
2574 	if (task->task_cdb[1] & 1) {
2575 		stmf_scsilib_send_status(task, STATUS_CHECK,
2576 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2577 		return;
2578 	}
2579 
2580 	if (cmd_xfer_len == 0) {
2581 		task->task_cmd_xfer_length = 0;
2582 		if (task->task_additional_flags &
2583 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2584 			task->task_expected_xfer_length = 0;
2585 		}
2586 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2587 		return;
2588 	}
2589 
2590 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2591 }
2592 
2593 static void
2594 sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2595 {
2596 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2597 	uint32_t ulen, dlen, num_desc;
2598 	uint64_t addr, len;
2599 	uint8_t *p;
2600 	int ret;
2601 
2602 	if (buflen < 24) {
2603 		stmf_scsilib_send_status(task, STATUS_CHECK,
2604 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2605 		return;
2606 	}
2607 	ulen = READ_SCSI16(buf, uint32_t);
2608 	dlen = READ_SCSI16(buf + 2, uint32_t);
2609 	num_desc = dlen >> 4;
2610 	if (((ulen + 2) != buflen) || ((dlen + 8) != buflen) || (dlen & 0xf) ||
2611 	    (num_desc == 0)) {
2612 		stmf_scsilib_send_status(task, STATUS_CHECK,
2613 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2614 		return;
2615 	}
2616 
2617 	for (p = buf + 8; num_desc; num_desc--, p += 16) {
2618 		addr = READ_SCSI64(p, uint64_t);
2619 		addr <<= sl->sl_data_blocksize_shift;
2620 		len = READ_SCSI32(p+8, uint64_t);
2621 		len <<= sl->sl_data_blocksize_shift;
2622 		ret = sbd_unmap(sl, addr, len);
2623 		if (ret != 0) {
2624 			stmf_scsilib_send_status(task, STATUS_CHECK,
2625 			    STMF_SAA_LBA_OUT_OF_RANGE);
2626 			return;
2627 		}
2628 	}
2629 
2630 unmap_done:
2631 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2632 }
2633 
2634 void
2635 sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
2636 {
2637 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2638 	uint8_t *cdbp = (uint8_t *)&task->task_cdb[0];
2639 	uint8_t *p;
2640 	uint8_t byte0;
2641 	uint8_t page_length;
2642 	uint16_t bsize = 512;
2643 	uint16_t cmd_size;
2644 	uint32_t xfer_size = 4;
2645 	uint32_t mgmt_url_size = 0;
2646 	uint8_t exp;
2647 	uint64_t s;
2648 	char *mgmt_url = NULL;
2649 
2650 
2651 	byte0 = DTYPE_DIRECT;
2652 	/*
2653 	 * Basic protocol checks.
2654 	 */
2655 
2656 	if ((((cdbp[1] & 1) == 0) && cdbp[2]) || cdbp[5]) {
2657 		stmf_scsilib_send_status(task, STATUS_CHECK,
2658 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2659 		return;
2660 	}
2661 
2662 	/*
2663 	 * Zero byte allocation length is not an error.  Just
2664 	 * return success.
2665 	 */
2666 
2667 	cmd_size = (((uint16_t)cdbp[3]) << 8) | cdbp[4];
2668 
2669 	if (cmd_size == 0) {
2670 		task->task_cmd_xfer_length = 0;
2671 		if (task->task_additional_flags &
2672 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2673 			task->task_expected_xfer_length = 0;
2674 		}
2675 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2676 		return;
2677 	}
2678 
2679 	/*
2680 	 * Standard inquiry
2681 	 */
2682 
2683 	if ((cdbp[1] & 1) == 0) {
2684 		int	i;
2685 		struct scsi_inquiry *inq;
2686 
2687 		p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2688 		inq = (struct scsi_inquiry *)p;
2689 
2690 		page_length = 69;
2691 		xfer_size = page_length + 5;
2692 
2693 		inq->inq_dtype = DTYPE_DIRECT;
2694 		inq->inq_ansi = 5;	/* SPC-3 */
2695 		inq->inq_hisup = 1;
2696 		inq->inq_rdf = 2;	/* Response data format for SPC-3 */
2697 		inq->inq_len = page_length;
2698 
2699 		inq->inq_tpgs = TPGS_FAILOVER_IMPLICIT;
2700 		inq->inq_cmdque = 1;
2701 
2702 		if (sl->sl_flags & SL_VID_VALID) {
2703 			bcopy(sl->sl_vendor_id, inq->inq_vid, 8);
2704 		} else {
2705 			bcopy(sbd_vendor_id, inq->inq_vid, 8);
2706 		}
2707 
2708 		if (sl->sl_flags & SL_PID_VALID) {
2709 			bcopy(sl->sl_product_id, inq->inq_pid, 16);
2710 		} else {
2711 			bcopy(sbd_product_id, inq->inq_pid, 16);
2712 		}
2713 
2714 		if (sl->sl_flags & SL_REV_VALID) {
2715 			bcopy(sl->sl_revision, inq->inq_revision, 4);
2716 		} else {
2717 			bcopy(sbd_revision, inq->inq_revision, 4);
2718 		}
2719 
2720 		/* Adding Version Descriptors */
2721 		i = 0;
2722 		/* SAM-3 no version */
2723 		inq->inq_vd[i].inq_vd_msb = 0x00;
2724 		inq->inq_vd[i].inq_vd_lsb = 0x60;
2725 		i++;
2726 
2727 		/* transport */
2728 		switch (task->task_lport->lport_id->protocol_id) {
2729 		case PROTOCOL_FIBRE_CHANNEL:
2730 			inq->inq_vd[i].inq_vd_msb = 0x09;
2731 			inq->inq_vd[i].inq_vd_lsb = 0x00;
2732 			i++;
2733 			break;
2734 
2735 		case PROTOCOL_PARALLEL_SCSI:
2736 		case PROTOCOL_SSA:
2737 		case PROTOCOL_IEEE_1394:
2738 			/* Currently no claims of conformance */
2739 			break;
2740 
2741 		case PROTOCOL_SRP:
2742 			inq->inq_vd[i].inq_vd_msb = 0x09;
2743 			inq->inq_vd[i].inq_vd_lsb = 0x40;
2744 			i++;
2745 			break;
2746 
2747 		case PROTOCOL_iSCSI:
2748 			inq->inq_vd[i].inq_vd_msb = 0x09;
2749 			inq->inq_vd[i].inq_vd_lsb = 0x60;
2750 			i++;
2751 			break;
2752 
2753 		case PROTOCOL_SAS:
2754 		case PROTOCOL_ADT:
2755 		case PROTOCOL_ATAPI:
2756 		default:
2757 			/* Currently no claims of conformance */
2758 			break;
2759 		}
2760 
2761 		/* SPC-3 no version */
2762 		inq->inq_vd[i].inq_vd_msb = 0x03;
2763 		inq->inq_vd[i].inq_vd_lsb = 0x00;
2764 		i++;
2765 
2766 		/* SBC-2 no version */
2767 		inq->inq_vd[i].inq_vd_msb = 0x03;
2768 		inq->inq_vd[i].inq_vd_lsb = 0x20;
2769 
2770 		sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2771 		    min(cmd_size, xfer_size));
2772 		kmem_free(p, bsize);
2773 
2774 		return;
2775 	}
2776 
2777 	rw_enter(&sbd_global_prop_lock, RW_READER);
2778 	if (sl->sl_mgmt_url) {
2779 		mgmt_url_size = strlen(sl->sl_mgmt_url);
2780 		mgmt_url = sl->sl_mgmt_url;
2781 	} else if (sbd_mgmt_url) {
2782 		mgmt_url_size = strlen(sbd_mgmt_url);
2783 		mgmt_url = sbd_mgmt_url;
2784 	}
2785 
2786 	/*
2787 	 * EVPD handling
2788 	 */
2789 
2790 	/* Default 512 bytes may not be enough, increase bsize if necessary */
2791 	if (cdbp[2] == 0x83 || cdbp[2] == 0x85) {
2792 		if (bsize <  cmd_size)
2793 			bsize = cmd_size;
2794 	}
2795 	p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2796 
2797 	switch (cdbp[2]) {
2798 	case 0x00:
2799 		page_length = 4 + (mgmt_url_size ? 1 : 0);
2800 		if (sl->sl_flags & SL_UNMAP_ENABLED)
2801 			page_length += 2;
2802 
2803 		p[0] = byte0;
2804 		p[3] = page_length;
2805 		/* Supported VPD pages in ascending order */
2806 		{
2807 			uint8_t i = 5;
2808 
2809 			p[i++] = 0x80;
2810 			p[i++] = 0x83;
2811 			if (mgmt_url_size != 0)
2812 				p[i++] = 0x85;
2813 			p[i++] = 0x86;
2814 			if (sl->sl_flags & SL_UNMAP_ENABLED) {
2815 				p[i++] = 0xb0;
2816 				p[i++] = 0xb2;
2817 			}
2818 		}
2819 		xfer_size = page_length + 4;
2820 		break;
2821 
2822 	case 0x80:
2823 		if (sl->sl_serial_no_size) {
2824 			page_length = sl->sl_serial_no_size;
2825 			bcopy(sl->sl_serial_no, p + 4, sl->sl_serial_no_size);
2826 		} else {
2827 			/* if no serial num is specified set 4 spaces */
2828 			page_length = 4;
2829 			bcopy("    ", p + 4, 4);
2830 		}
2831 		p[0] = byte0;
2832 		p[1] = 0x80;
2833 		p[3] = page_length;
2834 		xfer_size = page_length + 4;
2835 		break;
2836 
2837 	case 0x83:
2838 		xfer_size = stmf_scsilib_prepare_vpd_page83(task, p,
2839 		    bsize, byte0, STMF_VPD_LU_ID|STMF_VPD_TARGET_ID|
2840 		    STMF_VPD_TP_GROUP|STMF_VPD_RELATIVE_TP_ID);
2841 		break;
2842 
2843 	case 0x85:
2844 		if (mgmt_url_size == 0) {
2845 			stmf_scsilib_send_status(task, STATUS_CHECK,
2846 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2847 			goto err_done;
2848 		}
2849 		{
2850 			uint16_t idx, newidx, sz, url_size;
2851 			char *url;
2852 
2853 			p[0] = byte0;
2854 			p[1] = 0x85;
2855 
2856 			idx = 4;
2857 			url = mgmt_url;
2858 			url_size = sbd_parse_mgmt_url(&url);
2859 			/* Creating Network Service Descriptors */
2860 			while (url_size != 0) {
2861 				/* Null terminated and 4 Byte aligned */
2862 				sz = url_size + 1;
2863 				sz += (sz % 4) ? 4 - (sz % 4) : 0;
2864 				newidx = idx + sz + 4;
2865 
2866 				if (newidx < bsize) {
2867 					/*
2868 					 * SPC-3r23 : Table 320  (Sec 7.6.5)
2869 					 * (Network service descriptor format
2870 					 *
2871 					 * Note: Hard coding service type as
2872 					 * "Storage Configuration Service".
2873 					 */
2874 					p[idx] = 1;
2875 					SCSI_WRITE16(p + idx + 2, sz);
2876 					bcopy(url, p + idx + 4, url_size);
2877 					xfer_size = newidx + 4;
2878 				}
2879 				idx = newidx;
2880 
2881 				/* skip to next mgmt url if any */
2882 				url += url_size;
2883 				url_size = sbd_parse_mgmt_url(&url);
2884 			}
2885 
2886 			/* Total descriptor length */
2887 			SCSI_WRITE16(p + 2, idx - 4);
2888 			break;
2889 		}
2890 
2891 	case 0x86:
2892 		page_length = 0x3c;
2893 
2894 		p[0] = byte0;
2895 		p[1] = 0x86;		/* Page 86 response */
2896 		p[3] = page_length;
2897 
2898 		/*
2899 		 * Bits 0, 1, and 2 will need to be updated
2900 		 * to reflect the queue tag handling if/when
2901 		 * that is implemented.  For now, we're going
2902 		 * to claim support only for Simple TA.
2903 		 */
2904 		p[5] = 1;
2905 		xfer_size = page_length + 4;
2906 		break;
2907 
2908 	case 0xb0:
2909 		if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
2910 			stmf_scsilib_send_status(task, STATUS_CHECK,
2911 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2912 			goto err_done;
2913 		}
2914 		page_length = 0x3c;
2915 		p[0] = byte0;
2916 		p[1] = 0xb0;
2917 		p[3] = page_length;
2918 		p[20] = p[21] = p[22] = p[23] = 0xFF;
2919 		p[24] = p[25] = p[26] = p[27] = 0xFF;
2920 		xfer_size = page_length + 4;
2921 		break;
2922 
2923 	case 0xb2:
2924 		if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
2925 			stmf_scsilib_send_status(task, STATUS_CHECK,
2926 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2927 			goto err_done;
2928 		}
2929 		page_length = 4;
2930 		p[0] = byte0;
2931 		p[1] = 0xb2;
2932 		p[3] = page_length;
2933 
2934 		exp = (uint8_t)sl->sl_data_blocksize_shift;
2935 		s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
2936 		while (s & ((uint64_t)0xFFFFFFFF80000000ull)) {
2937 			s >>= 1;
2938 			exp++;
2939 		}
2940 		p[4] = exp;
2941 		p[5] = 0xc0;
2942 		xfer_size = page_length + 4;
2943 		break;
2944 
2945 	default:
2946 		stmf_scsilib_send_status(task, STATUS_CHECK,
2947 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2948 		goto err_done;
2949 	}
2950 
2951 	sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2952 	    min(cmd_size, xfer_size));
2953 err_done:
2954 	kmem_free(p, bsize);
2955 	rw_exit(&sbd_global_prop_lock);
2956 }
2957 
2958 stmf_status_t
2959 sbd_task_alloc(struct scsi_task *task)
2960 {
2961 	if ((task->task_lu_private =
2962 	    kmem_alloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
2963 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
2964 		scmd->flags = 0;
2965 		return (STMF_SUCCESS);
2966 	}
2967 	return (STMF_ALLOC_FAILURE);
2968 }
2969 
2970 void
2971 sbd_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *it)
2972 {
2973 	sbd_it_data_t **ppit;
2974 
2975 	sbd_pgr_remove_it_handle(sl, it);
2976 	mutex_enter(&sl->sl_lock);
2977 	for (ppit = &sl->sl_it_list; *ppit != NULL;
2978 	    ppit = &((*ppit)->sbd_it_next)) {
2979 		if ((*ppit) == it) {
2980 			*ppit = it->sbd_it_next;
2981 			break;
2982 		}
2983 	}
2984 	mutex_exit(&sl->sl_lock);
2985 
2986 	DTRACE_PROBE2(itl__nexus__end, stmf_lu_t *, sl->sl_lu,
2987 	    sbd_it_data_t *, it);
2988 
2989 	kmem_free(it, sizeof (*it));
2990 }
2991 
2992 void
2993 sbd_check_and_clear_scsi2_reservation(sbd_lu_t *sl, sbd_it_data_t *it)
2994 {
2995 	mutex_enter(&sl->sl_lock);
2996 	if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) == 0) {
2997 		/* If we dont have any reservations, just get out. */
2998 		mutex_exit(&sl->sl_lock);
2999 		return;
3000 	}
3001 
3002 	if (it == NULL) {
3003 		/* Find the I_T nexus which is holding the reservation. */
3004 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3005 			if (it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) {
3006 				ASSERT(it->sbd_it_session_id ==
3007 				    sl->sl_rs_owner_session_id);
3008 				break;
3009 			}
3010 		}
3011 		ASSERT(it != NULL);
3012 	} else {
3013 		/*
3014 		 * We were passed an I_T nexus. If this nexus does not hold
3015 		 * the reservation, do nothing. This is why this function is
3016 		 * called "check_and_clear".
3017 		 */
3018 		if ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0) {
3019 			mutex_exit(&sl->sl_lock);
3020 			return;
3021 		}
3022 	}
3023 	it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3024 	sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3025 	mutex_exit(&sl->sl_lock);
3026 }
3027 
3028 
3029 
3030 void
3031 sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
3032 {
3033 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3034 	sbd_it_data_t *it;
3035 	uint8_t cdb0, cdb1;
3036 	stmf_status_t st_ret;
3037 
3038 	if ((it = task->task_lu_itl_handle) == NULL) {
3039 		mutex_enter(&sl->sl_lock);
3040 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3041 			if (it->sbd_it_session_id ==
3042 			    task->task_session->ss_session_id) {
3043 				mutex_exit(&sl->sl_lock);
3044 				stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3045 				return;
3046 			}
3047 		}
3048 		it = (sbd_it_data_t *)kmem_zalloc(sizeof (*it), KM_NOSLEEP);
3049 		if (it == NULL) {
3050 			mutex_exit(&sl->sl_lock);
3051 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3052 			return;
3053 		}
3054 		it->sbd_it_session_id = task->task_session->ss_session_id;
3055 		bcopy(task->task_lun_no, it->sbd_it_lun, 8);
3056 		it->sbd_it_next = sl->sl_it_list;
3057 		sl->sl_it_list = it;
3058 		mutex_exit(&sl->sl_lock);
3059 
3060 		DTRACE_PROBE1(itl__nexus__start, scsi_task *, task);
3061 
3062 		sbd_pgr_initialize_it(task, it);
3063 		if (stmf_register_itl_handle(task->task_lu, task->task_lun_no,
3064 		    task->task_session, it->sbd_it_session_id, it)
3065 		    != STMF_SUCCESS) {
3066 			sbd_remove_it_handle(sl, it);
3067 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3068 			return;
3069 		}
3070 		task->task_lu_itl_handle = it;
3071 		if (sl->sl_access_state != SBD_LU_STANDBY) {
3072 			it->sbd_it_ua_conditions = SBD_UA_POR;
3073 		}
3074 	} else if (it->sbd_it_flags & SBD_IT_PGR_CHECK_FLAG) {
3075 		mutex_enter(&sl->sl_lock);
3076 		it->sbd_it_flags &= ~SBD_IT_PGR_CHECK_FLAG;
3077 		mutex_exit(&sl->sl_lock);
3078 		sbd_pgr_initialize_it(task, it);
3079 	}
3080 
3081 	if (task->task_mgmt_function) {
3082 		stmf_scsilib_handle_task_mgmt(task);
3083 		return;
3084 	}
3085 
3086 	/*
3087 	 * if we're transitioning between access
3088 	 * states, return NOT READY
3089 	 */
3090 	if (sl->sl_access_state == SBD_LU_TRANSITION_TO_STANDBY ||
3091 	    sl->sl_access_state == SBD_LU_TRANSITION_TO_ACTIVE) {
3092 		stmf_scsilib_send_status(task, STATUS_CHECK,
3093 		    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
3094 		return;
3095 	}
3096 
3097 	/* Checking ua conditions as per SAM3R14 5.3.2 specified order */
3098 	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
3099 		uint32_t saa = 0;
3100 
3101 		mutex_enter(&sl->sl_lock);
3102 		if (it->sbd_it_ua_conditions & SBD_UA_POR) {
3103 			it->sbd_it_ua_conditions &= ~SBD_UA_POR;
3104 			saa = STMF_SAA_POR;
3105 		}
3106 		mutex_exit(&sl->sl_lock);
3107 		if (saa) {
3108 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3109 			return;
3110 		}
3111 	}
3112 
3113 	/* Reservation conflict checks */
3114 	if (sl->sl_access_state == SBD_LU_ACTIVE) {
3115 		if (SBD_PGR_RSVD(sl->sl_pgr)) {
3116 			if (sbd_pgr_reservation_conflict(task)) {
3117 				stmf_scsilib_send_status(task,
3118 				    STATUS_RESERVATION_CONFLICT, 0);
3119 				return;
3120 			}
3121 		} else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
3122 		    ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
3123 			if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
3124 				stmf_scsilib_send_status(task,
3125 				    STATUS_RESERVATION_CONFLICT, 0);
3126 				return;
3127 			}
3128 		}
3129 	}
3130 
3131 	/* Rest of the ua conndition checks */
3132 	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
3133 		uint32_t saa = 0;
3134 
3135 		mutex_enter(&sl->sl_lock);
3136 		if (it->sbd_it_ua_conditions & SBD_UA_CAPACITY_CHANGED) {
3137 			it->sbd_it_ua_conditions &= ~SBD_UA_CAPACITY_CHANGED;
3138 			if ((task->task_cdb[0] == SCMD_READ_CAPACITY) ||
3139 			    ((task->task_cdb[0] == SCMD_SVC_ACTION_IN_G4) &&
3140 			    (task->task_cdb[1] ==
3141 			    SSVC_ACTION_READ_CAPACITY_G4))) {
3142 				saa = 0;
3143 			} else {
3144 				saa = STMF_SAA_CAPACITY_DATA_HAS_CHANGED;
3145 			}
3146 		} else if (it->sbd_it_ua_conditions &
3147 		    SBD_UA_MODE_PARAMETERS_CHANGED) {
3148 			it->sbd_it_ua_conditions &=
3149 			    ~SBD_UA_MODE_PARAMETERS_CHANGED;
3150 			saa = STMF_SAA_MODE_PARAMETERS_CHANGED;
3151 		} else if (it->sbd_it_ua_conditions &
3152 		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
3153 			it->sbd_it_ua_conditions &=
3154 			    ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
3155 			saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
3156 		} else if (it->sbd_it_ua_conditions &
3157 		    SBD_UA_ACCESS_STATE_TRANSITION) {
3158 			it->sbd_it_ua_conditions &=
3159 			    ~SBD_UA_ACCESS_STATE_TRANSITION;
3160 			saa = STMF_SAA_LU_NO_ACCESS_TRANSITION;
3161 		} else {
3162 			it->sbd_it_ua_conditions = 0;
3163 			saa = 0;
3164 		}
3165 		mutex_exit(&sl->sl_lock);
3166 		if (saa) {
3167 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3168 			return;
3169 		}
3170 	}
3171 
3172 	cdb0 = task->task_cdb[0];
3173 	cdb1 = task->task_cdb[1];
3174 
3175 	if (sl->sl_access_state == SBD_LU_STANDBY) {
3176 		if (cdb0 != SCMD_INQUIRY &&
3177 		    cdb0 != SCMD_MODE_SENSE &&
3178 		    cdb0 != SCMD_MODE_SENSE_G1 &&
3179 		    cdb0 != SCMD_MODE_SELECT &&
3180 		    cdb0 != SCMD_MODE_SELECT_G1 &&
3181 		    cdb0 != SCMD_RESERVE &&
3182 		    cdb0 != SCMD_RELEASE &&
3183 		    cdb0 != SCMD_PERSISTENT_RESERVE_OUT &&
3184 		    cdb0 != SCMD_PERSISTENT_RESERVE_IN &&
3185 		    cdb0 != SCMD_REQUEST_SENSE &&
3186 		    cdb0 != SCMD_READ_CAPACITY &&
3187 		    cdb0 != SCMD_TEST_UNIT_READY &&
3188 		    cdb0 != SCMD_START_STOP &&
3189 		    cdb0 != SCMD_READ &&
3190 		    cdb0 != SCMD_READ_G1 &&
3191 		    cdb0 != SCMD_READ_G4 &&
3192 		    cdb0 != SCMD_READ_G5 &&
3193 		    !(cdb0 == SCMD_SVC_ACTION_IN_G4 &&
3194 		    cdb1 == SSVC_ACTION_READ_CAPACITY_G4) &&
3195 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3196 		    (cdb1 & 0x1F) == 0x05) &&
3197 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3198 		    (cdb1 & 0x1F) == 0x0A)) {
3199 			stmf_scsilib_send_status(task, STATUS_CHECK,
3200 			    STMF_SAA_LU_NO_ACCESS_STANDBY);
3201 			return;
3202 		}
3203 
3204 		/*
3205 		 * is this a short write?
3206 		 * if so, we'll need to wait until we have the buffer
3207 		 * before proxying the command
3208 		 */
3209 		switch (cdb0) {
3210 			case SCMD_MODE_SELECT:
3211 			case SCMD_MODE_SELECT_G1:
3212 			case SCMD_PERSISTENT_RESERVE_OUT:
3213 				break;
3214 			default:
3215 				st_ret = stmf_proxy_scsi_cmd(task,
3216 				    initial_dbuf);
3217 				if (st_ret != STMF_SUCCESS) {
3218 					stmf_scsilib_send_status(task,
3219 					    STATUS_CHECK,
3220 					    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
3221 				}
3222 				return;
3223 		}
3224 	}
3225 
3226 	cdb0 = task->task_cdb[0] & 0x1F;
3227 
3228 	if ((cdb0 == SCMD_READ) || (cdb0 == SCMD_WRITE)) {
3229 		if (task->task_additional_flags & TASK_AF_PORT_LOAD_HIGH) {
3230 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
3231 			return;
3232 		}
3233 		if (cdb0 == SCMD_READ) {
3234 			sbd_handle_read(task, initial_dbuf);
3235 			return;
3236 		}
3237 		sbd_handle_write(task, initial_dbuf);
3238 		return;
3239 	}
3240 
3241 	cdb0 = task->task_cdb[0];
3242 	cdb1 = task->task_cdb[1];
3243 
3244 	if (cdb0 == SCMD_INQUIRY) {		/* Inquiry */
3245 		sbd_handle_inquiry(task, initial_dbuf);
3246 		return;
3247 	}
3248 
3249 	if (cdb0  == SCMD_PERSISTENT_RESERVE_OUT) {
3250 		sbd_handle_pgr_out_cmd(task, initial_dbuf);
3251 		return;
3252 	}
3253 
3254 	if (cdb0  == SCMD_PERSISTENT_RESERVE_IN) {
3255 		sbd_handle_pgr_in_cmd(task, initial_dbuf);
3256 		return;
3257 	}
3258 
3259 	if (cdb0 == SCMD_RELEASE) {
3260 		if (cdb1) {
3261 			stmf_scsilib_send_status(task, STATUS_CHECK,
3262 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3263 			return;
3264 		}
3265 
3266 		mutex_enter(&sl->sl_lock);
3267 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3268 			/* If not owner don't release it, just return good */
3269 			if (it->sbd_it_session_id !=
3270 			    sl->sl_rs_owner_session_id) {
3271 				mutex_exit(&sl->sl_lock);
3272 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3273 				return;
3274 			}
3275 		}
3276 		sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3277 		it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3278 		mutex_exit(&sl->sl_lock);
3279 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3280 		return;
3281 	}
3282 
3283 	if (cdb0 == SCMD_RESERVE) {
3284 		if (cdb1) {
3285 			stmf_scsilib_send_status(task, STATUS_CHECK,
3286 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3287 			return;
3288 		}
3289 
3290 		mutex_enter(&sl->sl_lock);
3291 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3292 			/* If not owner, return conflict status */
3293 			if (it->sbd_it_session_id !=
3294 			    sl->sl_rs_owner_session_id) {
3295 				mutex_exit(&sl->sl_lock);
3296 				stmf_scsilib_send_status(task,
3297 				    STATUS_RESERVATION_CONFLICT, 0);
3298 				return;
3299 			}
3300 		}
3301 		sl->sl_flags |= SL_LU_HAS_SCSI2_RESERVATION;
3302 		it->sbd_it_flags |= SBD_IT_HAS_SCSI2_RESERVATION;
3303 		sl->sl_rs_owner_session_id = it->sbd_it_session_id;
3304 		mutex_exit(&sl->sl_lock);
3305 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3306 		return;
3307 	}
3308 
3309 	if (cdb0 == SCMD_REQUEST_SENSE) {
3310 		/*
3311 		 * LU provider needs to store unretrieved sense data
3312 		 * (e.g. after power-on/reset).  For now, we'll just
3313 		 * return good status with no sense.
3314 		 */
3315 
3316 		if ((cdb1 & ~1) || task->task_cdb[2] || task->task_cdb[3] ||
3317 		    task->task_cdb[5]) {
3318 			stmf_scsilib_send_status(task, STATUS_CHECK,
3319 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3320 		} else {
3321 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3322 		}
3323 
3324 		return;
3325 	}
3326 
3327 	/* Report Target Port Groups */
3328 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3329 	    ((cdb1 & 0x1F) == 0x0A)) {
3330 		stmf_scsilib_handle_report_tpgs(task, initial_dbuf);
3331 		return;
3332 	}
3333 
3334 	/* Report Identifying Information */
3335 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3336 	    ((cdb1 & 0x1F) == 0x05)) {
3337 		sbd_handle_identifying_info(task, initial_dbuf);
3338 		return;
3339 	}
3340 
3341 	if (cdb0 == SCMD_START_STOP) {			/* Start stop */
3342 		task->task_cmd_xfer_length = 0;
3343 		if (task->task_cdb[4] & 0xFC) {
3344 			stmf_scsilib_send_status(task, STATUS_CHECK,
3345 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3346 			return;
3347 		}
3348 		if (task->task_cdb[4] & 2) {
3349 			stmf_scsilib_send_status(task, STATUS_CHECK,
3350 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3351 		} else {
3352 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3353 		}
3354 		return;
3355 
3356 	}
3357 
3358 	if ((cdb0 == SCMD_MODE_SENSE) || (cdb0 == SCMD_MODE_SENSE_G1)) {
3359 		uint8_t *p;
3360 		p = kmem_zalloc(512, KM_SLEEP);
3361 		sbd_handle_mode_sense(task, initial_dbuf, p);
3362 		kmem_free(p, 512);
3363 		return;
3364 	}
3365 
3366 	if ((cdb0 == SCMD_MODE_SELECT) || (cdb0 == SCMD_MODE_SELECT_G1)) {
3367 		sbd_handle_mode_select(task, initial_dbuf);
3368 		return;
3369 	}
3370 
3371 	if ((cdb0 == SCMD_UNMAP) && (sl->sl_flags & SL_UNMAP_ENABLED)) {
3372 		sbd_handle_unmap(task, initial_dbuf);
3373 		return;
3374 	}
3375 
3376 	if ((cdb0 == SCMD_WRITE_SAME_G4) || (cdb0 == SCMD_WRITE_SAME_G1)) {
3377 		sbd_handle_write_same(task, initial_dbuf);
3378 		return;
3379 	}
3380 
3381 	if (cdb0 == SCMD_TEST_UNIT_READY) {	/* Test unit ready */
3382 		task->task_cmd_xfer_length = 0;
3383 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3384 		return;
3385 	}
3386 
3387 	if (cdb0 == SCMD_READ_CAPACITY) {		/* Read Capacity */
3388 		sbd_handle_read_capacity(task, initial_dbuf);
3389 		return;
3390 	}
3391 
3392 	if (cdb0 == SCMD_SVC_ACTION_IN_G4) { /* Read Capacity or read long */
3393 		if (cdb1 == SSVC_ACTION_READ_CAPACITY_G4) {
3394 			sbd_handle_read_capacity(task, initial_dbuf);
3395 			return;
3396 		/*
3397 		 * } else if (cdb1 == SSVC_ACTION_READ_LONG_G4) {
3398 		 * 	sbd_handle_read(task, initial_dbuf);
3399 		 * 	return;
3400 		 */
3401 		}
3402 	}
3403 
3404 	/*
3405 	 * if (cdb0 == SCMD_SVC_ACTION_OUT_G4) {
3406 	 *	if (cdb1 == SSVC_ACTION_WRITE_LONG_G4) {
3407 	 *		 sbd_handle_write(task, initial_dbuf);
3408 	 * 		return;
3409 	 *	}
3410 	 * }
3411 	 */
3412 
3413 	if (cdb0 == SCMD_VERIFY) {
3414 		/*
3415 		 * Something more likely needs to be done here.
3416 		 */
3417 		task->task_cmd_xfer_length = 0;
3418 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3419 		return;
3420 	}
3421 
3422 	if (cdb0 == SCMD_SYNCHRONIZE_CACHE ||
3423 	    cdb0 == SCMD_SYNCHRONIZE_CACHE_G4) {
3424 		sbd_handle_sync_cache(task, initial_dbuf);
3425 		return;
3426 	}
3427 
3428 	/*
3429 	 * Write and Verify use the same path as write, but don't clutter the
3430 	 * performance path above with checking for write_verify opcodes.  We
3431 	 * rely on zfs's integrity checks for the "Verify" part of Write &
3432 	 * Verify.  (Even if we did a read to "verify" we'd merely be reading
3433 	 * cache, not actual media.)
3434 	 * Therefore we
3435 	 *   a) only support this if sbd_is_zvol, and
3436 	 *   b) run the IO through the normal write path with a forced
3437 	 *	sbd_flush_data_cache at the end.
3438 	 */
3439 
3440 	if ((sl->sl_flags & SL_ZFS_META) && (
3441 	    cdb0 == SCMD_WRITE_VERIFY ||
3442 	    cdb0 == SCMD_WRITE_VERIFY_G4 ||
3443 	    cdb0 == SCMD_WRITE_VERIFY_G5)) {
3444 		sbd_handle_write(task, initial_dbuf);
3445 		return;
3446 	}
3447 
3448 	stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_OPCODE);
3449 }
3450 
3451 void
3452 sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf)
3453 {
3454 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3455 
3456 	if (dbuf->db_flags & DB_LU_DATA_BUF) {
3457 		/*
3458 		 * Buffers passed in from the LU always complete
3459 		 * even if the task is no longer active.
3460 		 */
3461 		ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3462 		ASSERT(scmd);
3463 		switch (scmd->cmd_type) {
3464 		case (SBD_CMD_SCSI_READ):
3465 			sbd_handle_sgl_read_xfer_completion(task, scmd, dbuf);
3466 			break;
3467 		case (SBD_CMD_SCSI_WRITE):
3468 			sbd_handle_sgl_write_xfer_completion(task, scmd, dbuf);
3469 			break;
3470 		default:
3471 			cmn_err(CE_PANIC, "Unknown cmd type, task = %p",
3472 			    (void *)task);
3473 			break;
3474 		}
3475 		return;
3476 	}
3477 
3478 	if ((scmd == NULL) || ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0))
3479 		return;
3480 
3481 	switch (scmd->cmd_type) {
3482 	case (SBD_CMD_SCSI_READ):
3483 		sbd_handle_read_xfer_completion(task, scmd, dbuf);
3484 		break;
3485 
3486 	case (SBD_CMD_SCSI_WRITE):
3487 		if ((task->task_cdb[0] == SCMD_WRITE_SAME_G1) ||
3488 		    (task->task_cdb[0] == SCMD_WRITE_SAME_G4)) {
3489 			sbd_handle_write_same_xfer_completion(task, scmd, dbuf,
3490 			    1);
3491 		} else {
3492 			sbd_handle_write_xfer_completion(task, scmd, dbuf, 1);
3493 		}
3494 		break;
3495 
3496 	case (SBD_CMD_SMALL_READ):
3497 		sbd_handle_short_read_xfer_completion(task, scmd, dbuf);
3498 		break;
3499 
3500 	case (SBD_CMD_SMALL_WRITE):
3501 		sbd_handle_short_write_xfer_completion(task, dbuf);
3502 		break;
3503 
3504 	default:
3505 		cmn_err(CE_PANIC, "Unknown cmd type, task = %p", (void *)task);
3506 		break;
3507 	}
3508 }
3509 
3510 /* ARGSUSED */
3511 void
3512 sbd_send_status_done(struct scsi_task *task)
3513 {
3514 	cmn_err(CE_PANIC,
3515 	    "sbd_send_status_done: this should not have been called");
3516 }
3517 
3518 void
3519 sbd_task_free(struct scsi_task *task)
3520 {
3521 	if (task->task_lu_private) {
3522 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3523 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3524 			cmn_err(CE_PANIC, "cmd is active, task = %p",
3525 			    (void *)task);
3526 		}
3527 		kmem_free(scmd, sizeof (sbd_cmd_t));
3528 	}
3529 }
3530 
3531 /*
3532  * Aborts are synchronus w.r.t. I/O AND
3533  * All the I/O which SBD does is synchronous AND
3534  * Everything within a task is single threaded.
3535  *   IT MEANS
3536  * If this function is called, we are doing nothing with this task
3537  * inside of sbd module.
3538  */
3539 /* ARGSUSED */
3540 stmf_status_t
3541 sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags)
3542 {
3543 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3544 	scsi_task_t *task;
3545 
3546 	if (abort_cmd == STMF_LU_RESET_STATE) {
3547 		return (sbd_lu_reset_state(lu));
3548 	}
3549 
3550 	if (abort_cmd == STMF_LU_ITL_HANDLE_REMOVED) {
3551 		sbd_check_and_clear_scsi2_reservation(sl, (sbd_it_data_t *)arg);
3552 		sbd_remove_it_handle(sl, (sbd_it_data_t *)arg);
3553 		return (STMF_SUCCESS);
3554 	}
3555 
3556 	ASSERT(abort_cmd == STMF_LU_ABORT_TASK);
3557 	task = (scsi_task_t *)arg;
3558 	if (task->task_lu_private) {
3559 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3560 
3561 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3562 			if (scmd->flags & SBD_SCSI_CMD_TRANS_DATA) {
3563 				kmem_free(scmd->trans_data,
3564 				    scmd->trans_data_len);
3565 				scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
3566 			}
3567 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
3568 			return (STMF_ABORT_SUCCESS);
3569 		}
3570 	}
3571 
3572 	return (STMF_NOT_FOUND);
3573 }
3574 
3575 /*
3576  * This function is called during task clean-up if the
3577  * DB_LU_FLAG is set on the dbuf. This should only be called for
3578  * abort processing after sbd_abort has been called for the task.
3579  */
3580 void
3581 sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf)
3582 {
3583 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3584 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3585 
3586 	ASSERT(dbuf->db_lu_private);
3587 	ASSERT(scmd && scmd->nbufs > 0);
3588 	ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0);
3589 	ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
3590 	ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3591 	ASSERT((curthread->t_flag & T_INTR_THREAD) == 0);
3592 
3593 	if (scmd->cmd_type == SBD_CMD_SCSI_READ) {
3594 		sbd_zvol_rele_read_bufs(sl, dbuf);
3595 	} else if (scmd->cmd_type == SBD_CMD_SCSI_WRITE) {
3596 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
3597 	} else {
3598 		cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p",
3599 		    scmd->cmd_type, (void *)task);
3600 	}
3601 	if (--scmd->nbufs == 0)
3602 		rw_exit(&sl->sl_access_state_lock);
3603 	stmf_teardown_dbuf(task, dbuf);
3604 	stmf_free(dbuf);
3605 }
3606 
3607 /* ARGSUSED */
3608 void
3609 sbd_ctl(struct stmf_lu *lu, int cmd, void *arg)
3610 {
3611 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3612 	stmf_change_status_t st;
3613 
3614 	ASSERT((cmd == STMF_CMD_LU_ONLINE) ||
3615 	    (cmd == STMF_CMD_LU_OFFLINE) ||
3616 	    (cmd == STMF_ACK_LU_ONLINE_COMPLETE) ||
3617 	    (cmd == STMF_ACK_LU_OFFLINE_COMPLETE));
3618 
3619 	st.st_completion_status = STMF_SUCCESS;
3620 	st.st_additional_info = NULL;
3621 
3622 	switch (cmd) {
3623 	case STMF_CMD_LU_ONLINE:
3624 		if (sl->sl_state == STMF_STATE_ONLINE)
3625 			st.st_completion_status = STMF_ALREADY;
3626 		else if (sl->sl_state != STMF_STATE_OFFLINE)
3627 			st.st_completion_status = STMF_FAILURE;
3628 		if (st.st_completion_status == STMF_SUCCESS) {
3629 			sl->sl_state = STMF_STATE_ONLINE;
3630 			sl->sl_state_not_acked = 1;
3631 		}
3632 		(void) stmf_ctl(STMF_CMD_LU_ONLINE_COMPLETE, lu, &st);
3633 		break;
3634 
3635 	case STMF_CMD_LU_OFFLINE:
3636 		if (sl->sl_state == STMF_STATE_OFFLINE)
3637 			st.st_completion_status = STMF_ALREADY;
3638 		else if (sl->sl_state != STMF_STATE_ONLINE)
3639 			st.st_completion_status = STMF_FAILURE;
3640 		if (st.st_completion_status == STMF_SUCCESS) {
3641 			sl->sl_flags &= ~(SL_MEDIUM_REMOVAL_PREVENTED |
3642 			    SL_LU_HAS_SCSI2_RESERVATION);
3643 			sl->sl_state = STMF_STATE_OFFLINE;
3644 			sl->sl_state_not_acked = 1;
3645 			sbd_pgr_reset(sl);
3646 		}
3647 		(void) stmf_ctl(STMF_CMD_LU_OFFLINE_COMPLETE, lu, &st);
3648 		break;
3649 
3650 	case STMF_ACK_LU_ONLINE_COMPLETE:
3651 		/* Fallthrough */
3652 	case STMF_ACK_LU_OFFLINE_COMPLETE:
3653 		sl->sl_state_not_acked = 0;
3654 		break;
3655 
3656 	}
3657 }
3658 
3659 /* ARGSUSED */
3660 stmf_status_t
3661 sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg, uint8_t *buf,
3662     uint32_t *bufsizep)
3663 {
3664 	return (STMF_NOT_SUPPORTED);
3665 }
3666 
3667 stmf_status_t
3668 sbd_lu_reset_state(stmf_lu_t *lu)
3669 {
3670 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3671 
3672 	mutex_enter(&sl->sl_lock);
3673 	if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE) {
3674 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
3675 		mutex_exit(&sl->sl_lock);
3676 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3677 			(void) sbd_wcd_set(1, sl);
3678 		}
3679 	} else {
3680 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
3681 		mutex_exit(&sl->sl_lock);
3682 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3683 			(void) sbd_wcd_set(0, sl);
3684 		}
3685 	}
3686 	sbd_pgr_reset(sl);
3687 	sbd_check_and_clear_scsi2_reservation(sl, NULL);
3688 	if (stmf_deregister_all_lu_itl_handles(lu) != STMF_SUCCESS) {
3689 		return (STMF_FAILURE);
3690 	}
3691 	return (STMF_SUCCESS);
3692 }
3693 
3694 sbd_status_t
3695 sbd_flush_data_cache(sbd_lu_t *sl, int fsync_done)
3696 {
3697 	int r = 0;
3698 	int ret;
3699 
3700 	if (fsync_done)
3701 		goto over_fsync;
3702 	if ((sl->sl_data_vtype == VREG) || (sl->sl_data_vtype == VBLK)) {
3703 		if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL))
3704 			return (SBD_FAILURE);
3705 	}
3706 over_fsync:
3707 	if (((sl->sl_data_vtype == VCHR) || (sl->sl_data_vtype == VBLK)) &&
3708 	    ((sl->sl_flags & SL_NO_DATA_DKIOFLUSH) == 0)) {
3709 		ret = VOP_IOCTL(sl->sl_data_vp, DKIOCFLUSHWRITECACHE, NULL,
3710 		    FKIOCTL, kcred, &r, NULL);
3711 		if ((ret == ENOTTY) || (ret == ENOTSUP)) {
3712 			mutex_enter(&sl->sl_lock);
3713 			sl->sl_flags |= SL_NO_DATA_DKIOFLUSH;
3714 			mutex_exit(&sl->sl_lock);
3715 		} else if (ret != 0) {
3716 			return (SBD_FAILURE);
3717 		}
3718 	}
3719 
3720 	return (SBD_SUCCESS);
3721 }
3722 
3723 /* ARGSUSED */
3724 static void
3725 sbd_handle_sync_cache(struct scsi_task *task,
3726     struct stmf_data_buf *initial_dbuf)
3727 {
3728 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3729 	uint64_t	lba, laddr;
3730 	sbd_status_t	sret;
3731 	uint32_t	len;
3732 	int		is_g4 = 0;
3733 	int		immed;
3734 
3735 	task->task_cmd_xfer_length = 0;
3736 	/*
3737 	 * Determine if this is a 10 or 16 byte CDB
3738 	 */
3739 
3740 	if (task->task_cdb[0] == SCMD_SYNCHRONIZE_CACHE_G4)
3741 		is_g4 = 1;
3742 
3743 	/*
3744 	 * Determine other requested parameters
3745 	 *
3746 	 * We don't have a non-volatile cache, so don't care about SYNC_NV.
3747 	 * Do not support the IMMED bit.
3748 	 */
3749 
3750 	immed = (task->task_cdb[1] & 0x02);
3751 
3752 	if (immed) {
3753 		stmf_scsilib_send_status(task, STATUS_CHECK,
3754 		    STMF_SAA_INVALID_FIELD_IN_CDB);
3755 		return;
3756 	}
3757 
3758 	/*
3759 	 * Check to be sure we're not being asked to sync an LBA
3760 	 * that is out of range.  While checking, verify reserved fields.
3761 	 */
3762 
3763 	if (is_g4) {
3764 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[14] ||
3765 		    task->task_cdb[15]) {
3766 			stmf_scsilib_send_status(task, STATUS_CHECK,
3767 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3768 			return;
3769 		}
3770 
3771 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
3772 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
3773 	} else {
3774 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[6] ||
3775 		    task->task_cdb[9]) {
3776 			stmf_scsilib_send_status(task, STATUS_CHECK,
3777 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3778 			return;
3779 		}
3780 
3781 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
3782 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
3783 	}
3784 
3785 	laddr = lba << sl->sl_data_blocksize_shift;
3786 	len <<= sl->sl_data_blocksize_shift;
3787 
3788 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
3789 		stmf_scsilib_send_status(task, STATUS_CHECK,
3790 		    STMF_SAA_LBA_OUT_OF_RANGE);
3791 		return;
3792 	}
3793 
3794 	sret = sbd_flush_data_cache(sl, 0);
3795 	if (sret != SBD_SUCCESS) {
3796 		stmf_scsilib_send_status(task, STATUS_CHECK,
3797 		    STMF_SAA_WRITE_ERROR);
3798 		return;
3799 	}
3800 
3801 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3802 }
3803