1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24 * Copyright (c) 2013 by Delphix. All rights reserved.
25 */
26
27#include <sys/conf.h>
28#include <sys/file.h>
29#include <sys/ddi.h>
30#include <sys/sunddi.h>
31#include <sys/modctl.h>
32#include <sys/scsi/scsi.h>
33#include <sys/scsi/impl/scsi_reset_notify.h>
34#include <sys/scsi/generic/mode.h>
35#include <sys/disp.h>
36#include <sys/byteorder.h>
37#include <sys/atomic.h>
38#include <sys/sdt.h>
39#include <sys/dkio.h>
40
41#include <sys/stmf.h>
42#include <sys/lpif.h>
43#include <sys/portif.h>
44#include <sys/stmf_ioctl.h>
45#include <sys/stmf_sbd_ioctl.h>
46
47#include "stmf_sbd.h"
48#include "sbd_impl.h"
49
50#define	SCSI2_CONFLICT_FREE_CMDS(cdb)	( \
51	/* ----------------------- */                                      \
52	/* Refer Both		   */                                      \
53	/* SPC-2 (rev 20) Table 10 */                                      \
54	/* SPC-3 (rev 23) Table 31 */                                      \
55	/* ----------------------- */                                      \
56	((cdb[0]) == SCMD_INQUIRY)					|| \
57	((cdb[0]) == SCMD_LOG_SENSE_G1)					|| \
58	((cdb[0]) == SCMD_RELEASE)					|| \
59	((cdb[0]) == SCMD_RELEASE_G1)					|| \
60	((cdb[0]) == SCMD_REPORT_LUNS)					|| \
61	((cdb[0]) == SCMD_REQUEST_SENSE)				|| \
62	/* PREVENT ALLOW MEDIUM REMOVAL with prevent == 0 */               \
63	((((cdb[0]) == SCMD_DOORLOCK) && (((cdb[4]) & 0x3) == 0)))	|| \
64	/* SERVICE ACTION IN with READ MEDIA SERIAL NUMBER (0x01) */       \
65	(((cdb[0]) == SCMD_SVC_ACTION_IN_G5) && (                          \
66	    ((cdb[1]) & 0x1F) == 0x01))					|| \
67	/* MAINTENANCE IN with service actions REPORT ALIASES (0x0Bh) */   \
68	/* REPORT DEVICE IDENTIFIER (0x05)  REPORT PRIORITY (0x0Eh) */     \
69	/* REPORT TARGET PORT GROUPS (0x0A) REPORT TIMESTAMP (0x0F) */     \
70	(((cdb[0]) == SCMD_MAINTENANCE_IN) && (                            \
71	    (((cdb[1]) & 0x1F) == 0x0B) ||                                 \
72	    (((cdb[1]) & 0x1F) == 0x05) ||                                 \
73	    (((cdb[1]) & 0x1F) == 0x0E) ||                                 \
74	    (((cdb[1]) & 0x1F) == 0x0A) ||                                 \
75	    (((cdb[1]) & 0x1F) == 0x0F)))				|| \
76	/* ----------------------- */                                      \
77	/* SBC-3 (rev 17) Table 3  */                                      \
78	/* ----------------------- */                                      \
79	/* READ CAPACITY(10) */                                            \
80	((cdb[0]) == SCMD_READ_CAPACITY)				|| \
81	/* READ CAPACITY(16) */                                            \
82	(((cdb[0]) == SCMD_SVC_ACTION_IN_G4) && (                          \
83	    ((cdb[1]) & 0x1F) == 0x10))					|| \
84	/* START STOP UNIT with START bit 0 and POWER CONDITION 0  */      \
85	(((cdb[0]) == SCMD_START_STOP) && (                                \
86	    (((cdb[4]) & 0xF0) == 0) && (((cdb[4]) & 0x01) == 0))))
87/* End of SCSI2_CONFLICT_FREE_CMDS */
88
89stmf_status_t sbd_lu_reset_state(stmf_lu_t *lu);
90static void sbd_handle_sync_cache(struct scsi_task *task,
91    struct stmf_data_buf *initial_dbuf);
92void sbd_handle_read_xfer_completion(struct scsi_task *task,
93    sbd_cmd_t *scmd, struct stmf_data_buf *dbuf);
94void sbd_handle_short_write_xfer_completion(scsi_task_t *task,
95    stmf_data_buf_t *dbuf);
96void sbd_handle_short_write_transfers(scsi_task_t *task,
97    stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size);
98void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf,
99    uint32_t buflen);
100void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf);
101void sbd_handle_identifying_info(scsi_task_t *task, stmf_data_buf_t *dbuf);
102
103static void sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf,
104    uint32_t buflen);
105static void sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf);
106
107extern void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
108extern int sbd_pgr_reservation_conflict(scsi_task_t *);
109extern void sbd_pgr_reset(sbd_lu_t *);
110extern void sbd_pgr_remove_it_handle(sbd_lu_t *, sbd_it_data_t *);
111extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
112extern void sbd_handle_pgr_out_cmd(scsi_task_t *, stmf_data_buf_t *);
113extern void sbd_handle_pgr_out_data(scsi_task_t *, stmf_data_buf_t *);
114void sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
115    int first_xfer);
116static void sbd_handle_write_same(scsi_task_t *task,
117    struct stmf_data_buf *initial_dbuf);
118static void sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
119    struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
120static void sbd_handle_write_same_xfer_completion(struct scsi_task *task,
121    sbd_cmd_t *scmd, struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
122/*
123 * IMPORTANT NOTE:
124 * =================
125 * The whole world here is based on the assumption that everything within
126 * a scsi task executes in a single threaded manner, even the aborts.
127 * Dont ever change that. There wont be any performance gain but there
128 * will be tons of race conditions.
129 */
130
131void
132sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
133					struct stmf_data_buf *dbuf)
134{
135	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
136	uint64_t laddr;
137	uint32_t len, buflen, iolen;
138	int ndx;
139	int bufs_to_take;
140
141	/* Lets try not to hog all the buffers the port has. */
142	bufs_to_take = ((task->task_max_nbufs > 2) &&
143	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
144	    task->task_max_nbufs;
145
146	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size : scmd->len;
147	laddr = scmd->addr + scmd->current_ro;
148
149	for (buflen = 0, ndx = 0; (buflen < len) &&
150	    (ndx < dbuf->db_sglist_length); ndx++) {
151		iolen = min(len - buflen, dbuf->db_sglist[ndx].seg_length);
152		if (iolen == 0)
153			break;
154		if (sbd_data_read(sl, task, laddr, (uint64_t)iolen,
155		    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
156			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
157			/* Do not need to do xfer anymore, just complete it */
158			dbuf->db_data_size = 0;
159			dbuf->db_xfer_status = STMF_SUCCESS;
160			sbd_handle_read_xfer_completion(task, scmd, dbuf);
161			return;
162		}
163		buflen += iolen;
164		laddr += (uint64_t)iolen;
165	}
166	dbuf->db_relative_offset = scmd->current_ro;
167	dbuf->db_data_size = buflen;
168	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
169	(void) stmf_xfer_data(task, dbuf, 0);
170	scmd->len -= buflen;
171	scmd->current_ro += buflen;
172	if (scmd->len && (scmd->nbufs < bufs_to_take)) {
173		uint32_t maxsize, minsize, old_minsize;
174
175		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
176		minsize = maxsize >> 2;
177		do {
178			/*
179			 * A bad port implementation can keep on failing the
180			 * the request but keep on sending us a false
181			 * minsize.
182			 */
183			old_minsize = minsize;
184			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
185		} while ((dbuf == NULL) && (old_minsize > minsize) &&
186		    (minsize >= 512));
187		if (dbuf == NULL) {
188			return;
189		}
190		scmd->nbufs++;
191		sbd_do_read_xfer(task, scmd, dbuf);
192	}
193}
194
195/*
196 * sbd_zcopy: Bail-out switch for reduced copy path.
197 *
198 * 0 - read & write off
199 * 1 - read & write on
200 * 2 - only read on
201 * 4 - only write on
202 */
203int sbd_zcopy = 1;	/* enable zcopy read & write path */
204uint32_t sbd_max_xfer_len = 0;		/* Valid if non-zero */
205uint32_t sbd_1st_xfer_len = 0;		/* Valid if non-zero */
206uint32_t sbd_copy_threshold = 0;		/* Valid if non-zero */
207
208static void
209sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
210{
211	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
212	sbd_zvol_io_t *zvio;
213	int ret, final_xfer;
214	uint64_t offset;
215	uint32_t xfer_len, max_len, first_len;
216	stmf_status_t xstat;
217	stmf_data_buf_t *dbuf;
218	uint_t nblks;
219	uint64_t blksize = sl->sl_blksize;
220	size_t db_private_sz;
221	uintptr_t pad;
222
223	ASSERT(rw_read_held(&sl->sl_access_state_lock));
224	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
225
226	/*
227	 * Calculate the limits on xfer_len to the minimum of :
228	 *    - task limit
229	 *    - lun limit
230	 *    - sbd global limit if set
231	 *    - first xfer limit if set
232	 *
233	 * First, protect against silly over-ride value
234	 */
235	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
236		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
237		    sbd_max_xfer_len);
238		sbd_max_xfer_len = 0;
239	}
240	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
241		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
242		    sbd_1st_xfer_len);
243		sbd_1st_xfer_len = 0;
244	}
245
246	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
247	if (sbd_max_xfer_len)
248		max_len = MIN(max_len, sbd_max_xfer_len);
249	/*
250	 * Special case the first xfer if hints are set.
251	 */
252	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
253		/* global over-ride has precedence */
254		if (sbd_1st_xfer_len)
255			first_len = sbd_1st_xfer_len;
256		else
257			first_len = task->task_1st_xfer_len;
258	} else {
259		first_len = 0;
260	}
261
262	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
263
264		xfer_len = MIN(max_len, scmd->len);
265		if (first_len) {
266			xfer_len = MIN(xfer_len, first_len);
267			first_len = 0;
268		}
269		if (scmd->len == xfer_len) {
270			final_xfer = 1;
271		} else {
272			/*
273			 * Attempt to end xfer on a block boundary.
274			 * The only way this does not happen is if the
275			 * xfer_len is small enough to stay contained
276			 * within the same block.
277			 */
278			uint64_t xfer_offset, xfer_aligned_end;
279
280			final_xfer = 0;
281			xfer_offset = scmd->addr + scmd->current_ro;
282			xfer_aligned_end =
283			    P2ALIGN(xfer_offset+xfer_len, blksize);
284			if (xfer_aligned_end > xfer_offset)
285				xfer_len = xfer_aligned_end - xfer_offset;
286		}
287		/*
288		 * Allocate object to track the read and reserve
289		 * enough space for scatter/gather list.
290		 */
291		offset = scmd->addr + scmd->current_ro;
292		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
293
294		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
295		    (nblks * sizeof (stmf_sglist_ent_t));
296		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
297		    AF_DONTZERO);
298		/*
299		 * Setup the dbuf
300		 *
301		 * XXX Framework does not handle variable length sglists
302		 * properly, so setup db_lu_private and db_port_private
303		 * fields here. db_stmf_private is properly set for
304		 * calls to stmf_free.
305		 */
306		if (dbuf->db_port_private == NULL) {
307			/*
308			 * XXX Framework assigns space to PP after db_sglist[0]
309			 */
310			cmn_err(CE_PANIC, "db_port_private == NULL");
311		}
312		pad = (uintptr_t)&dbuf->db_sglist[nblks];
313		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
314		dbuf->db_port_private = NULL;
315		dbuf->db_buf_size = xfer_len;
316		dbuf->db_data_size = xfer_len;
317		dbuf->db_relative_offset = scmd->current_ro;
318		dbuf->db_sglist_length = (uint16_t)nblks;
319		dbuf->db_xfer_status = 0;
320		dbuf->db_handle = 0;
321
322		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
323		    DB_DIRECTION_TO_RPORT | DB_LU_DATA_BUF);
324		if (final_xfer)
325			dbuf->db_flags |= DB_SEND_STATUS_GOOD;
326
327		zvio = dbuf->db_lu_private;
328		/* Need absolute offset for zvol access */
329		zvio->zvio_offset = offset;
330		zvio->zvio_flags = ZVIO_SYNC;
331
332		/*
333		 * Accounting for start of read.
334		 * Note there is no buffer address for the probe yet.
335		 */
336		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
337		    uint8_t *, NULL, uint64_t, xfer_len,
338		    uint64_t, offset, scsi_task_t *, task);
339
340		ret = sbd_zvol_alloc_read_bufs(sl, dbuf);
341
342		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
343		    uint8_t *, NULL, uint64_t, xfer_len,
344		    uint64_t, offset, int, ret, scsi_task_t *, task);
345
346		if (ret != 0) {
347			/*
348			 * Read failure from the backend.
349			 */
350			stmf_free(dbuf);
351			if (scmd->nbufs == 0) {
352				/* nothing queued, just finish */
353				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
354				stmf_scsilib_send_status(task, STATUS_CHECK,
355				    STMF_SAA_READ_ERROR);
356				rw_exit(&sl->sl_access_state_lock);
357			} else {
358				/* process failure when other dbufs finish */
359				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
360			}
361			return;
362		}
363
364
365		/*
366		 * Allow PP to do setup
367		 */
368		xstat = stmf_setup_dbuf(task, dbuf, 0);
369		if (xstat != STMF_SUCCESS) {
370			/*
371			 * This could happen if the driver cannot get the
372			 * DDI resources it needs for this request.
373			 * If other dbufs are queued, try again when the next
374			 * one completes, otherwise give up.
375			 */
376			sbd_zvol_rele_read_bufs(sl, dbuf);
377			stmf_free(dbuf);
378			if (scmd->nbufs > 0) {
379				/* completion of previous dbuf will retry */
380				return;
381			}
382			/*
383			 * Done with this command.
384			 */
385			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
386			if (first_xfer)
387				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
388			else
389				stmf_scsilib_send_status(task, STATUS_CHECK,
390				    STMF_SAA_READ_ERROR);
391			rw_exit(&sl->sl_access_state_lock);
392			return;
393		}
394		/*
395		 * dbuf is now queued on task
396		 */
397		scmd->nbufs++;
398
399		/* XXX leave this in for FW? */
400		DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
401		    struct stmf_data_buf *, dbuf, uint64_t, offset,
402		    uint32_t, xfer_len);
403		/*
404		 * Do not pass STMF_IOF_LU_DONE so that the zvol
405		 * state can be released in the completion callback.
406		 */
407		xstat = stmf_xfer_data(task, dbuf, 0);
408		switch (xstat) {
409		case STMF_SUCCESS:
410			break;
411		case STMF_BUSY:
412			/*
413			 * The dbuf is queued on the task, but unknown
414			 * to the PP, thus no completion will occur.
415			 */
416			sbd_zvol_rele_read_bufs(sl, dbuf);
417			stmf_teardown_dbuf(task, dbuf);
418			stmf_free(dbuf);
419			scmd->nbufs--;
420			if (scmd->nbufs > 0) {
421				/* completion of previous dbuf will retry */
422				return;
423			}
424			/*
425			 * Done with this command.
426			 */
427			rw_exit(&sl->sl_access_state_lock);
428			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
429			if (first_xfer)
430				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
431			else
432				stmf_scsilib_send_status(task, STATUS_CHECK,
433				    STMF_SAA_READ_ERROR);
434			return;
435		case STMF_ABORTED:
436			/*
437			 * Completion from task_done will cleanup
438			 */
439			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
440			return;
441		}
442		/*
443		 * Update the xfer progress.
444		 */
445		ASSERT(scmd->len >= xfer_len);
446		scmd->len -= xfer_len;
447		scmd->current_ro += xfer_len;
448	}
449}
450
451void
452sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
453				struct stmf_data_buf *dbuf)
454{
455	if (dbuf->db_xfer_status != STMF_SUCCESS) {
456		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
457		    dbuf->db_xfer_status, NULL);
458		return;
459	}
460	task->task_nbytes_transferred += dbuf->db_data_size;
461	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
462		stmf_free_dbuf(task, dbuf);
463		scmd->nbufs--;
464		if (scmd->nbufs)
465			return;	/* wait for all buffers to complete */
466		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
467		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL)
468			stmf_scsilib_send_status(task, STATUS_CHECK,
469			    STMF_SAA_READ_ERROR);
470		else
471			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
472		return;
473	}
474	if (dbuf->db_flags & DB_DONT_REUSE) {
475		/* allocate new dbuf */
476		uint32_t maxsize, minsize, old_minsize;
477		stmf_free_dbuf(task, dbuf);
478
479		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
480		minsize = maxsize >> 2;
481		do {
482			old_minsize = minsize;
483			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
484		} while ((dbuf == NULL) && (old_minsize > minsize) &&
485		    (minsize >= 512));
486		if (dbuf == NULL) {
487			scmd->nbufs --;
488			if (scmd->nbufs == 0) {
489				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
490				    STMF_ALLOC_FAILURE, NULL);
491			}
492			return;
493		}
494	}
495	sbd_do_read_xfer(task, scmd, dbuf);
496}
497
498/*
499 * This routine must release the DMU resources and free the dbuf
500 * in all cases.  If this is the final dbuf of the task, then drop
501 * the reader lock on the LU state. If there are no errors and more
502 * work to do, then queue more xfer operations.
503 */
504void
505sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
506				struct stmf_data_buf *dbuf)
507{
508	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
509	stmf_status_t xfer_status;
510	uint32_t data_size;
511	int scmd_err;
512
513	ASSERT(dbuf->db_lu_private);
514	ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ);
515
516	scmd->nbufs--;	/* account for this dbuf */
517	/*
518	 * Release the DMU resources.
519	 */
520	sbd_zvol_rele_read_bufs(sl, dbuf);
521	/*
522	 * Release the dbuf after retrieving needed fields.
523	 */
524	xfer_status = dbuf->db_xfer_status;
525	data_size = dbuf->db_data_size;
526	stmf_teardown_dbuf(task, dbuf);
527	stmf_free(dbuf);
528	/*
529	 * Release the state lock if this is the last completion.
530	 * If this is the last dbuf on task and all data has been
531	 * transferred or an error encountered, then no more dbufs
532	 * will be queued.
533	 */
534	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
535	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
536	    (xfer_status != STMF_SUCCESS));
537	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
538		/* all DMU state has been released */
539		rw_exit(&sl->sl_access_state_lock);
540	}
541
542	/*
543	 * If there have been no errors, either complete the task
544	 * or issue more data xfer operations.
545	 */
546	if (!scmd_err) {
547		/*
548		 * This chunk completed successfully
549		 */
550		task->task_nbytes_transferred += data_size;
551		if (scmd->nbufs == 0 && scmd->len == 0) {
552			/*
553			 * This command completed successfully
554			 *
555			 * Status was sent along with data, so no status
556			 * completion will occur. Tell stmf we are done.
557			 */
558			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
559			stmf_task_lu_done(task);
560			return;
561		}
562		/*
563		 * Start more xfers
564		 */
565		sbd_do_sgl_read_xfer(task, scmd, 0);
566		return;
567	}
568	/*
569	 * Sort out the failure
570	 */
571	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
572		/*
573		 * If a previous error occurred, leave the command active
574		 * and wait for the last completion to send the status check.
575		 */
576		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
577			if (scmd->nbufs == 0) {
578				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
579				stmf_scsilib_send_status(task, STATUS_CHECK,
580				    STMF_SAA_READ_ERROR);
581			}
582			return;
583		}
584		/*
585		 * Must have been a failure on current dbuf
586		 */
587		ASSERT(xfer_status != STMF_SUCCESS);
588		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
589		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
590	}
591}
592
593void
594sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
595				struct stmf_data_buf *dbuf)
596{
597	sbd_zvol_io_t *zvio = dbuf->db_lu_private;
598	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
599	int ret;
600	int scmd_err, scmd_xfer_done;
601	stmf_status_t xfer_status = dbuf->db_xfer_status;
602	uint32_t data_size = dbuf->db_data_size;
603
604	ASSERT(zvio);
605
606	/*
607	 * Allow PP to free up resources before releasing the write bufs
608	 * as writing to the backend could take some time.
609	 */
610	stmf_teardown_dbuf(task, dbuf);
611
612	scmd->nbufs--;	/* account for this dbuf */
613	/*
614	 * All data was queued and this is the last completion,
615	 * but there could still be an error.
616	 */
617	scmd_xfer_done = (scmd->len == 0 && scmd->nbufs == 0);
618	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
619	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
620	    (xfer_status != STMF_SUCCESS));
621
622	DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
623	    uint8_t *, NULL, uint64_t, data_size,
624	    uint64_t, zvio->zvio_offset, scsi_task_t *, task);
625
626	if (scmd_err) {
627		/* just return the write buffers */
628		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
629		ret = 0;
630	} else {
631		if (scmd_xfer_done)
632			zvio->zvio_flags = ZVIO_COMMIT;
633		else
634			zvio->zvio_flags = 0;
635		/* write the data */
636		ret = sbd_zvol_rele_write_bufs(sl, dbuf);
637	}
638
639	DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
640	    uint8_t *, NULL, uint64_t, data_size,
641	    uint64_t, zvio->zvio_offset, int, ret,  scsi_task_t *, task);
642
643	if (ret != 0) {
644		/* update the error flag */
645		scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
646		scmd_err = 1;
647	}
648
649	/* Release the dbuf */
650	stmf_free(dbuf);
651
652	/*
653	 * Release the state lock if this is the last completion.
654	 * If this is the last dbuf on task and all data has been
655	 * transferred or an error encountered, then no more dbufs
656	 * will be queued.
657	 */
658	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
659		/* all DMU state has been released */
660		rw_exit(&sl->sl_access_state_lock);
661	}
662	/*
663	 * If there have been no errors, either complete the task
664	 * or issue more data xfer operations.
665	 */
666	if (!scmd_err) {
667		/* This chunk completed successfully */
668		task->task_nbytes_transferred += data_size;
669		if (scmd_xfer_done) {
670			/* This command completed successfully */
671			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
672			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
673			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
674				stmf_scsilib_send_status(task, STATUS_CHECK,
675				    STMF_SAA_WRITE_ERROR);
676			} else {
677				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
678			}
679			return;
680		}
681		/*
682		 * Start more xfers
683		 */
684		sbd_do_sgl_write_xfer(task, scmd, 0);
685		return;
686	}
687	/*
688	 * Sort out the failure
689	 */
690	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
691		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
692			if (scmd->nbufs == 0) {
693				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
694				stmf_scsilib_send_status(task, STATUS_CHECK,
695				    STMF_SAA_WRITE_ERROR);
696			}
697			/*
698			 * Leave the command active until last dbuf completes.
699			 */
700			return;
701		}
702		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
703		ASSERT(xfer_status != STMF_SUCCESS);
704		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
705	}
706}
707
708/*
709 * Handle a copy operation using the zvol interface.
710 *
711 * Similar to the sbd_data_read/write path, except it goes directly through
712 * the zvol interfaces. It can pass a port provider sglist in the
713 * form of uio which is lost through the vn_rdwr path.
714 *
715 * Returns:
716 *	STMF_SUCCESS - request handled
717 *	STMF_FAILURE - request not handled, caller must deal with error
718 */
719static stmf_status_t
720sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
721    int cmd, int commit)
722{
723	sbd_lu_t		*sl = task->task_lu->lu_provider_private;
724	struct uio		uio;
725	struct iovec		*iov, *tiov, iov1[8];
726	uint32_t		len, resid;
727	int			ret, i, iovcnt, flags;
728	boolean_t		is_read;
729
730	ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE);
731
732	is_read = (cmd == SBD_CMD_SCSI_READ) ? B_TRUE : B_FALSE;
733	iovcnt = dbuf->db_sglist_length;
734	/* use the stack for small iovecs */
735	if (iovcnt > 8) {
736		iov = kmem_alloc(iovcnt * sizeof (*iov), KM_SLEEP);
737	} else {
738		iov = &iov1[0];
739	}
740
741	/* Convert dbuf sglist to iovec format */
742	len = dbuf->db_data_size;
743	resid = len;
744	tiov = iov;
745	for (i = 0; i < iovcnt; i++) {
746		tiov->iov_base = (caddr_t)dbuf->db_sglist[i].seg_addr;
747		tiov->iov_len = MIN(resid, dbuf->db_sglist[i].seg_length);
748		resid -= tiov->iov_len;
749		tiov++;
750	}
751	if (resid != 0) {
752		cmn_err(CE_WARN, "inconsistant sglist rem %d", resid);
753		if (iov != &iov1[0])
754			kmem_free(iov, iovcnt * sizeof (*iov));
755		return (STMF_FAILURE);
756	}
757	/* Setup the uio struct */
758	uio.uio_iov = iov;
759	uio.uio_iovcnt = iovcnt;
760	uio.uio_loffset = laddr;
761	uio.uio_segflg = (short)UIO_SYSSPACE;
762	uio.uio_resid = (uint64_t)len;
763	uio.uio_llimit = RLIM64_INFINITY;
764
765	if (is_read == B_TRUE) {
766		uio.uio_fmode = FREAD;
767		uio.uio_extflg = UIO_COPY_CACHED;
768		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
769		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
770		    scsi_task_t *, task);
771
772		/* Fetch the data */
773		ret = sbd_zvol_copy_read(sl, &uio);
774
775		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
776		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
777		    scsi_task_t *, task);
778	} else {
779		uio.uio_fmode = FWRITE;
780		uio.uio_extflg = UIO_COPY_DEFAULT;
781		DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
782		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
783		    scsi_task_t *, task);
784
785		flags = (commit) ? ZVIO_COMMIT : 0;
786		/* Write the data */
787		ret = sbd_zvol_copy_write(sl, &uio, flags);
788
789		DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
790		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
791		    scsi_task_t *, task);
792	}
793
794	if (iov != &iov1[0])
795		kmem_free(iov, iovcnt * sizeof (*iov));
796	if (ret != 0) {
797		/* Backend I/O error */
798		return (STMF_FAILURE);
799	}
800	return (STMF_SUCCESS);
801}
802
803void
804sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
805{
806	uint64_t lba, laddr;
807	uint32_t len;
808	uint8_t op = task->task_cdb[0];
809	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
810	sbd_cmd_t *scmd;
811	stmf_data_buf_t *dbuf;
812	int fast_path;
813
814	if (op == SCMD_READ) {
815		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
816		len = (uint32_t)task->task_cdb[4];
817
818		if (len == 0) {
819			len = 256;
820		}
821	} else if (op == SCMD_READ_G1) {
822		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
823		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
824	} else if (op == SCMD_READ_G5) {
825		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
826		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
827	} else if (op == SCMD_READ_G4) {
828		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
829		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
830	} else {
831		stmf_scsilib_send_status(task, STATUS_CHECK,
832		    STMF_SAA_INVALID_OPCODE);
833		return;
834	}
835
836	laddr = lba << sl->sl_data_blocksize_shift;
837	len <<= sl->sl_data_blocksize_shift;
838
839	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
840		stmf_scsilib_send_status(task, STATUS_CHECK,
841		    STMF_SAA_LBA_OUT_OF_RANGE);
842		return;
843	}
844
845	task->task_cmd_xfer_length = len;
846	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
847		task->task_expected_xfer_length = len;
848	}
849
850	if (len != task->task_expected_xfer_length) {
851		fast_path = 0;
852		len = (len > task->task_expected_xfer_length) ?
853		    task->task_expected_xfer_length : len;
854	} else {
855		fast_path = 1;
856	}
857
858	if (len == 0) {
859		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
860		return;
861	}
862
863	/*
864	 * Determine if this read can directly use DMU buffers.
865	 */
866	if (sbd_zcopy & (2|1) &&		/* Debug switch */
867	    initial_dbuf == NULL &&		/* No PP buffer passed in */
868	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
869	    (task->task_additional_flags &
870	    TASK_AF_ACCEPT_LU_DBUF))		/* PP allows it */
871	{
872		/*
873		 * Reduced copy path
874		 */
875		uint32_t copy_threshold, minsize;
876		int ret;
877
878		/*
879		 * The sl_access_state_lock will be held shared
880		 * for the entire request and released when all
881		 * dbufs have completed.
882		 */
883		rw_enter(&sl->sl_access_state_lock, RW_READER);
884		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
885			rw_exit(&sl->sl_access_state_lock);
886			stmf_scsilib_send_status(task, STATUS_CHECK,
887			    STMF_SAA_READ_ERROR);
888			return;
889		}
890
891		/*
892		 * Check if setup is more expensive than copying the data.
893		 *
894		 * Use the global over-ride sbd_zcopy_threshold if set.
895		 */
896		copy_threshold = (sbd_copy_threshold > 0) ?
897		    sbd_copy_threshold : task->task_copy_threshold;
898		minsize = len;
899		if (len < copy_threshold &&
900		    (dbuf = stmf_alloc_dbuf(task, len, &minsize, 0)) != 0) {
901
902			ret = sbd_copy_rdwr(task, laddr, dbuf,
903			    SBD_CMD_SCSI_READ, 0);
904			/* done with the backend */
905			rw_exit(&sl->sl_access_state_lock);
906			if (ret != 0) {
907				/* backend error */
908				stmf_scsilib_send_status(task, STATUS_CHECK,
909				    STMF_SAA_READ_ERROR);
910			} else {
911				/* send along good data */
912				dbuf->db_relative_offset = 0;
913				dbuf->db_data_size = len;
914				dbuf->db_flags = DB_SEND_STATUS_GOOD |
915				    DB_DIRECTION_TO_RPORT;
916				/* XXX keep for FW? */
917				DTRACE_PROBE4(sbd__xfer,
918				    struct scsi_task *, task,
919				    struct stmf_data_buf *, dbuf,
920				    uint64_t, laddr, uint32_t, len);
921				(void) stmf_xfer_data(task, dbuf,
922				    STMF_IOF_LU_DONE);
923			}
924			return;
925		}
926
927		/* committed to reduced copy */
928		if (task->task_lu_private) {
929			scmd = (sbd_cmd_t *)task->task_lu_private;
930		} else {
931			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
932			    KM_SLEEP);
933			task->task_lu_private = scmd;
934		}
935		/*
936		 * Setup scmd to track read progress.
937		 */
938		scmd->flags = SBD_SCSI_CMD_ACTIVE;
939		scmd->cmd_type = SBD_CMD_SCSI_READ;
940		scmd->nbufs = 0;
941		scmd->addr = laddr;
942		scmd->len = len;
943		scmd->current_ro = 0;
944
945		/*
946		 * Kick-off the read.
947		 */
948		sbd_do_sgl_read_xfer(task, scmd, 1);
949		return;
950	}
951
952	if (initial_dbuf == NULL) {
953		uint32_t maxsize, minsize, old_minsize;
954
955		maxsize = (len > (128*1024)) ? 128*1024 : len;
956		minsize = maxsize >> 2;
957		do {
958			old_minsize = minsize;
959			initial_dbuf = stmf_alloc_dbuf(task, maxsize,
960			    &minsize, 0);
961		} while ((initial_dbuf == NULL) && (old_minsize > minsize) &&
962		    (minsize >= 512));
963		if (initial_dbuf == NULL) {
964			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
965			return;
966		}
967	}
968	dbuf = initial_dbuf;
969
970	if ((dbuf->db_buf_size >= len) && fast_path &&
971	    (dbuf->db_sglist_length == 1)) {
972		if (sbd_data_read(sl, task, laddr, (uint64_t)len,
973		    dbuf->db_sglist[0].seg_addr) == STMF_SUCCESS) {
974			dbuf->db_relative_offset = 0;
975			dbuf->db_data_size = len;
976			dbuf->db_flags = DB_SEND_STATUS_GOOD |
977			    DB_DIRECTION_TO_RPORT;
978			/* XXX keep for FW? */
979			DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
980			    struct stmf_data_buf *, dbuf,
981			    uint64_t, laddr, uint32_t, len);
982			(void) stmf_xfer_data(task, dbuf, STMF_IOF_LU_DONE);
983		} else {
984			stmf_scsilib_send_status(task, STATUS_CHECK,
985			    STMF_SAA_READ_ERROR);
986		}
987		return;
988	}
989
990	if (task->task_lu_private) {
991		scmd = (sbd_cmd_t *)task->task_lu_private;
992	} else {
993		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
994		task->task_lu_private = scmd;
995	}
996	scmd->flags = SBD_SCSI_CMD_ACTIVE;
997	scmd->cmd_type = SBD_CMD_SCSI_READ;
998	scmd->nbufs = 1;
999	scmd->addr = laddr;
1000	scmd->len = len;
1001	scmd->current_ro = 0;
1002
1003	sbd_do_read_xfer(task, scmd, dbuf);
1004}
1005
1006void
1007sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
1008    struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1009{
1010	uint32_t len;
1011	int bufs_to_take;
1012
1013	if (scmd->len == 0) {
1014		goto DO_WRITE_XFER_DONE;
1015	}
1016
1017	/* Lets try not to hog all the buffers the port has. */
1018	bufs_to_take = ((task->task_max_nbufs > 2) &&
1019	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
1020	    task->task_max_nbufs;
1021
1022	if ((dbuf != NULL) &&
1023	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
1024		/* free current dbuf and allocate a new one */
1025		stmf_free_dbuf(task, dbuf);
1026		dbuf = NULL;
1027	}
1028	if (scmd->nbufs >= bufs_to_take) {
1029		goto DO_WRITE_XFER_DONE;
1030	}
1031	if (dbuf == NULL) {
1032		uint32_t maxsize, minsize, old_minsize;
1033
1034		maxsize = (scmd->len > (128*1024)) ? 128*1024 :
1035		    scmd->len;
1036		minsize = maxsize >> 2;
1037		do {
1038			old_minsize = minsize;
1039			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
1040		} while ((dbuf == NULL) && (old_minsize > minsize) &&
1041		    (minsize >= 512));
1042		if (dbuf == NULL) {
1043			if (scmd->nbufs == 0) {
1044				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1045				    STMF_ALLOC_FAILURE, NULL);
1046			}
1047			return;
1048		}
1049	}
1050
1051	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
1052	    scmd->len;
1053
1054	dbuf->db_relative_offset = scmd->current_ro;
1055	dbuf->db_data_size = len;
1056	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1057	(void) stmf_xfer_data(task, dbuf, 0);
1058	scmd->nbufs++; /* outstanding port xfers and bufs used */
1059	scmd->len -= len;
1060	scmd->current_ro += len;
1061
1062	if ((scmd->len != 0) && (scmd->nbufs < bufs_to_take)) {
1063		sbd_do_write_xfer(task, scmd, NULL, 0);
1064	}
1065	return;
1066
1067DO_WRITE_XFER_DONE:
1068	if (dbuf != NULL) {
1069		stmf_free_dbuf(task, dbuf);
1070	}
1071}
1072
1073void
1074sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
1075{
1076	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1077	sbd_zvol_io_t *zvio;
1078	int ret;
1079	uint32_t xfer_len, max_len, first_len;
1080	stmf_status_t xstat;
1081	stmf_data_buf_t *dbuf;
1082	uint_t nblks;
1083	uint64_t blksize = sl->sl_blksize;
1084	uint64_t offset;
1085	size_t db_private_sz;
1086	uintptr_t pad;
1087
1088	ASSERT(rw_read_held(&sl->sl_access_state_lock));
1089	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
1090
1091	/*
1092	 * Calculate the limits on xfer_len to the minimum of :
1093	 *    - task limit
1094	 *    - lun limit
1095	 *    - sbd global limit if set
1096	 *    - first xfer limit if set
1097	 *
1098	 * First, protect against silly over-ride value
1099	 */
1100	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
1101		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
1102		    sbd_max_xfer_len);
1103		sbd_max_xfer_len = 0;
1104	}
1105	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
1106		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
1107		    sbd_1st_xfer_len);
1108		sbd_1st_xfer_len = 0;
1109	}
1110
1111	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
1112	if (sbd_max_xfer_len)
1113		max_len = MIN(max_len, sbd_max_xfer_len);
1114	/*
1115	 * Special case the first xfer if hints are set.
1116	 */
1117	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
1118		/* global over-ride has precedence */
1119		if (sbd_1st_xfer_len)
1120			first_len = sbd_1st_xfer_len;
1121		else
1122			first_len = task->task_1st_xfer_len;
1123	} else {
1124		first_len = 0;
1125	}
1126
1127
1128	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
1129
1130		xfer_len = MIN(max_len, scmd->len);
1131		if (first_len) {
1132			xfer_len = MIN(xfer_len, first_len);
1133			first_len = 0;
1134		}
1135		if (xfer_len < scmd->len) {
1136			/*
1137			 * Attempt to end xfer on a block boundary.
1138			 * The only way this does not happen is if the
1139			 * xfer_len is small enough to stay contained
1140			 * within the same block.
1141			 */
1142			uint64_t xfer_offset, xfer_aligned_end;
1143
1144			xfer_offset = scmd->addr + scmd->current_ro;
1145			xfer_aligned_end =
1146			    P2ALIGN(xfer_offset+xfer_len, blksize);
1147			if (xfer_aligned_end > xfer_offset)
1148				xfer_len = xfer_aligned_end - xfer_offset;
1149		}
1150		/*
1151		 * Allocate object to track the write and reserve
1152		 * enough space for scatter/gather list.
1153		 */
1154		offset = scmd->addr + scmd->current_ro;
1155		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
1156		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
1157		    (nblks * sizeof (stmf_sglist_ent_t));
1158		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
1159		    AF_DONTZERO);
1160
1161		/*
1162		 * Setup the dbuf
1163		 *
1164		 * XXX Framework does not handle variable length sglists
1165		 * properly, so setup db_lu_private and db_port_private
1166		 * fields here. db_stmf_private is properly set for
1167		 * calls to stmf_free.
1168		 */
1169		if (dbuf->db_port_private == NULL) {
1170			/*
1171			 * XXX Framework assigns space to PP after db_sglist[0]
1172			 */
1173			cmn_err(CE_PANIC, "db_port_private == NULL");
1174		}
1175		pad = (uintptr_t)&dbuf->db_sglist[nblks];
1176		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
1177		dbuf->db_port_private = NULL;
1178		dbuf->db_buf_size = xfer_len;
1179		dbuf->db_data_size = xfer_len;
1180		dbuf->db_relative_offset = scmd->current_ro;
1181		dbuf->db_sglist_length = (uint16_t)nblks;
1182		dbuf->db_xfer_status = 0;
1183		dbuf->db_handle = 0;
1184		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
1185		    DB_DIRECTION_FROM_RPORT | DB_LU_DATA_BUF);
1186
1187		zvio = dbuf->db_lu_private;
1188		zvio->zvio_offset = offset;
1189
1190		/* get the buffers */
1191		ret = sbd_zvol_alloc_write_bufs(sl, dbuf);
1192		if (ret != 0) {
1193			/*
1194			 * Could not allocate buffers from the backend;
1195			 * treat it like an IO error.
1196			 */
1197			stmf_free(dbuf);
1198			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1199			if (scmd->nbufs == 0) {
1200				/*
1201				 * Nothing queued, so no completions coming
1202				 */
1203				stmf_scsilib_send_status(task, STATUS_CHECK,
1204				    STMF_SAA_WRITE_ERROR);
1205				rw_exit(&sl->sl_access_state_lock);
1206			}
1207			/*
1208			 * Completions of previous buffers will cleanup.
1209			 */
1210			return;
1211		}
1212
1213		/*
1214		 * Allow PP to do setup
1215		 */
1216		xstat = stmf_setup_dbuf(task, dbuf, 0);
1217		if (xstat != STMF_SUCCESS) {
1218			/*
1219			 * This could happen if the driver cannot get the
1220			 * DDI resources it needs for this request.
1221			 * If other dbufs are queued, try again when the next
1222			 * one completes, otherwise give up.
1223			 */
1224			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1225			stmf_free(dbuf);
1226			if (scmd->nbufs > 0) {
1227				/* completion of previous dbuf will retry */
1228				return;
1229			}
1230			/*
1231			 * Done with this command.
1232			 */
1233			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1234			if (first_xfer)
1235				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1236			else
1237				stmf_scsilib_send_status(task, STATUS_CHECK,
1238				    STMF_SAA_WRITE_ERROR);
1239			rw_exit(&sl->sl_access_state_lock);
1240			return;
1241		}
1242
1243		/*
1244		 * dbuf is now queued on task
1245		 */
1246		scmd->nbufs++;
1247
1248		xstat = stmf_xfer_data(task, dbuf, 0);
1249		switch (xstat) {
1250		case STMF_SUCCESS:
1251			break;
1252		case STMF_BUSY:
1253			/*
1254			 * The dbuf is queued on the task, but unknown
1255			 * to the PP, thus no completion will occur.
1256			 */
1257			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1258			stmf_teardown_dbuf(task, dbuf);
1259			stmf_free(dbuf);
1260			scmd->nbufs--;
1261			if (scmd->nbufs > 0) {
1262				/* completion of previous dbuf will retry */
1263				return;
1264			}
1265			/*
1266			 * Done with this command.
1267			 */
1268			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1269			if (first_xfer)
1270				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1271			else
1272				stmf_scsilib_send_status(task, STATUS_CHECK,
1273				    STMF_SAA_WRITE_ERROR);
1274			rw_exit(&sl->sl_access_state_lock);
1275			return;
1276		case STMF_ABORTED:
1277			/*
1278			 * Completion code will cleanup.
1279			 */
1280			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1281			return;
1282		}
1283		/*
1284		 * Update the xfer progress.
1285		 */
1286		scmd->len -= xfer_len;
1287		scmd->current_ro += xfer_len;
1288	}
1289}
1290
1291void
1292sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1293    struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1294{
1295	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1296	uint64_t laddr;
1297	uint32_t buflen, iolen;
1298	int ndx;
1299
1300	if (scmd->nbufs > 0) {
1301		/*
1302		 * Decrement the count to indicate the port xfer
1303		 * into the dbuf has completed even though the buf is
1304		 * still in use here in the LU provider.
1305		 */
1306		scmd->nbufs--;
1307	}
1308
1309	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1310		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1311		    dbuf->db_xfer_status, NULL);
1312		return;
1313	}
1314
1315	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1316		goto WRITE_XFER_DONE;
1317	}
1318
1319	if (scmd->len != 0) {
1320		/*
1321		 * Initiate the next port xfer to occur in parallel
1322		 * with writing this buf.
1323		 */
1324		sbd_do_write_xfer(task, scmd, NULL, 0);
1325	}
1326
1327	laddr = scmd->addr + dbuf->db_relative_offset;
1328
1329	/*
1330	 * If this is going to a zvol, use the direct call to
1331	 * sbd_zvol_copy_{read,write}. The direct call interface is
1332	 * restricted to PPs that accept sglists, but that is not required.
1333	 */
1334	if (sl->sl_flags & SL_CALL_ZVOL &&
1335	    (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) &&
1336	    (sbd_zcopy & (4|1))) {
1337		int commit;
1338
1339		commit = (scmd->len == 0 && scmd->nbufs == 0);
1340		if (sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
1341		    commit) != STMF_SUCCESS)
1342			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1343		buflen = dbuf->db_data_size;
1344	} else {
1345		for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
1346		    (ndx < dbuf->db_sglist_length); ndx++) {
1347			iolen = min(dbuf->db_data_size - buflen,
1348			    dbuf->db_sglist[ndx].seg_length);
1349			if (iolen == 0)
1350				break;
1351			if (sbd_data_write(sl, task, laddr, (uint64_t)iolen,
1352			    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
1353				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1354				break;
1355			}
1356			buflen += iolen;
1357			laddr += (uint64_t)iolen;
1358		}
1359	}
1360	task->task_nbytes_transferred += buflen;
1361WRITE_XFER_DONE:
1362	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1363		stmf_free_dbuf(task, dbuf);
1364		if (scmd->nbufs)
1365			return;	/* wait for all buffers to complete */
1366		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1367		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1368			stmf_scsilib_send_status(task, STATUS_CHECK,
1369			    STMF_SAA_WRITE_ERROR);
1370		} else {
1371			/*
1372			 * If SYNC_WRITE flag is on then we need to flush
1373			 * cache before sending status.
1374			 * Note: this may be a no-op because of how
1375			 * SL_WRITEBACK_CACHE_DISABLE and
1376			 * SL_FLUSH_ON_DISABLED_WRITECACHE are set, but not
1377			 * worth code complexity of checking those in this code
1378			 * path, SBD_SCSI_CMD_SYNC_WRITE is rarely set.
1379			 */
1380			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
1381			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
1382				stmf_scsilib_send_status(task, STATUS_CHECK,
1383				    STMF_SAA_WRITE_ERROR);
1384			} else {
1385				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1386			}
1387		}
1388		return;
1389	}
1390	sbd_do_write_xfer(task, scmd, dbuf, dbuf_reusable);
1391}
1392
1393/*
1394 * Return true if copy avoidance is beneficial.
1395 */
1396static int
1397sbd_zcopy_write_useful(scsi_task_t *task, uint64_t laddr, uint32_t len,
1398    uint64_t blksize)
1399{
1400	/*
1401	 * If there is a global copy threshold over-ride, use it.
1402	 * Otherwise use the PP value with the caveat that at least
1403	 * 1/2 the data must avoid being copied to be useful.
1404	 */
1405	if (sbd_copy_threshold > 0) {
1406		return (len >= sbd_copy_threshold);
1407	} else {
1408		uint64_t no_copy_span;
1409
1410		/* sub-blocksize writes always copy */
1411		if (len < task->task_copy_threshold || len < blksize)
1412			return (0);
1413		/*
1414		 * Calculate amount of data that will avoid the copy path.
1415		 * The calculation is only valid if len >= blksize.
1416		 */
1417		no_copy_span = P2ALIGN(laddr+len, blksize) -
1418		    P2ROUNDUP(laddr, blksize);
1419		return (no_copy_span >= len/2);
1420	}
1421}
1422
1423void
1424sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
1425{
1426	uint64_t lba, laddr;
1427	uint32_t len;
1428	uint8_t op = task->task_cdb[0], do_immediate_data = 0;
1429	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1430	sbd_cmd_t *scmd;
1431	stmf_data_buf_t *dbuf;
1432	uint8_t	sync_wr_flag = 0;
1433
1434	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1435		stmf_scsilib_send_status(task, STATUS_CHECK,
1436		    STMF_SAA_WRITE_PROTECTED);
1437		return;
1438	}
1439	if (op == SCMD_WRITE) {
1440		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
1441		len = (uint32_t)task->task_cdb[4];
1442
1443		if (len == 0) {
1444			len = 256;
1445		}
1446	} else if (op == SCMD_WRITE_G1) {
1447		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1448		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1449	} else if (op == SCMD_WRITE_G5) {
1450		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1451		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1452	} else if (op == SCMD_WRITE_G4) {
1453		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1454		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1455	} else if (op == SCMD_WRITE_VERIFY) {
1456		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1457		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1458		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1459	} else if (op == SCMD_WRITE_VERIFY_G5) {
1460		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1461		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1462		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1463	} else if (op == SCMD_WRITE_VERIFY_G4) {
1464		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1465		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1466		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1467	} else {
1468		stmf_scsilib_send_status(task, STATUS_CHECK,
1469		    STMF_SAA_INVALID_OPCODE);
1470		return;
1471	}
1472
1473	laddr = lba << sl->sl_data_blocksize_shift;
1474	len <<= sl->sl_data_blocksize_shift;
1475
1476	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
1477		stmf_scsilib_send_status(task, STATUS_CHECK,
1478		    STMF_SAA_LBA_OUT_OF_RANGE);
1479		return;
1480	}
1481
1482	task->task_cmd_xfer_length = len;
1483	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1484		task->task_expected_xfer_length = len;
1485	}
1486
1487	len = (len > task->task_expected_xfer_length) ?
1488	    task->task_expected_xfer_length : len;
1489
1490	if (len == 0) {
1491		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1492		return;
1493	}
1494
1495	if (sbd_zcopy & (4|1) &&		/* Debug switch */
1496	    initial_dbuf == NULL &&		/* No PP buf passed in */
1497	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
1498	    (task->task_additional_flags &
1499	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
1500	    sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize)) {
1501
1502		/*
1503		 * XXX Note that disallowing initial_dbuf will eliminate
1504		 * iSCSI from participating. For small writes, that is
1505		 * probably ok. For large writes, it may be best to just
1506		 * copy the data from the initial dbuf and use zcopy for
1507		 * the rest.
1508		 */
1509		rw_enter(&sl->sl_access_state_lock, RW_READER);
1510		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
1511			rw_exit(&sl->sl_access_state_lock);
1512			stmf_scsilib_send_status(task, STATUS_CHECK,
1513			    STMF_SAA_READ_ERROR);
1514			return;
1515		}
1516		/*
1517		 * Setup scmd to track the write progress.
1518		 */
1519		if (task->task_lu_private) {
1520			scmd = (sbd_cmd_t *)task->task_lu_private;
1521		} else {
1522			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1523			    KM_SLEEP);
1524			task->task_lu_private = scmd;
1525		}
1526		scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
1527		scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1528		scmd->nbufs = 0;
1529		scmd->addr = laddr;
1530		scmd->len = len;
1531		scmd->current_ro = 0;
1532		sbd_do_sgl_write_xfer(task, scmd, 1);
1533		return;
1534	}
1535
1536	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
1537		if (initial_dbuf->db_data_size > len) {
1538			if (initial_dbuf->db_data_size >
1539			    task->task_expected_xfer_length) {
1540				/* protocol error */
1541				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1542				    STMF_INVALID_ARG, NULL);
1543				return;
1544			}
1545			initial_dbuf->db_data_size = len;
1546		}
1547		do_immediate_data = 1;
1548	}
1549	dbuf = initial_dbuf;
1550
1551	if (task->task_lu_private) {
1552		scmd = (sbd_cmd_t *)task->task_lu_private;
1553	} else {
1554		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1555		task->task_lu_private = scmd;
1556	}
1557	scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
1558	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1559	scmd->nbufs = 0;
1560	scmd->addr = laddr;
1561	scmd->len = len;
1562	scmd->current_ro = 0;
1563
1564	if (do_immediate_data) {
1565		/*
1566		 * Account for data passed in this write command
1567		 */
1568		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
1569		scmd->len -= dbuf->db_data_size;
1570		scmd->current_ro += dbuf->db_data_size;
1571		dbuf->db_xfer_status = STMF_SUCCESS;
1572		sbd_handle_write_xfer_completion(task, scmd, dbuf, 0);
1573	} else {
1574		sbd_do_write_xfer(task, scmd, dbuf, 0);
1575	}
1576}
1577
1578/*
1579 * Utility routine to handle small non performance data transfers to the
1580 * initiators. dbuf is an initial data buf (if any), 'p' points to a data
1581 * buffer which is source of data for transfer, cdb_xfer_size is the
1582 * transfer size based on CDB, cmd_xfer_size is the actual amount of data
1583 * which this command would transfer (the size of data pointed to by 'p').
1584 */
1585void
1586sbd_handle_short_read_transfers(scsi_task_t *task, stmf_data_buf_t *dbuf,
1587    uint8_t *p, uint32_t cdb_xfer_size, uint32_t cmd_xfer_size)
1588{
1589	uint32_t bufsize, ndx;
1590	sbd_cmd_t *scmd;
1591
1592	cmd_xfer_size = min(cmd_xfer_size, cdb_xfer_size);
1593
1594	task->task_cmd_xfer_length = cmd_xfer_size;
1595	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1596		task->task_expected_xfer_length = cmd_xfer_size;
1597	} else {
1598		cmd_xfer_size = min(cmd_xfer_size,
1599		    task->task_expected_xfer_length);
1600	}
1601
1602	if (cmd_xfer_size == 0) {
1603		stmf_scsilib_send_status(task, STATUS_CHECK,
1604		    STMF_SAA_INVALID_FIELD_IN_CDB);
1605		return;
1606	}
1607	if (dbuf == NULL) {
1608		uint32_t minsize = cmd_xfer_size;
1609
1610		dbuf = stmf_alloc_dbuf(task, cmd_xfer_size, &minsize, 0);
1611	}
1612	if (dbuf == NULL) {
1613		stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1614		return;
1615	}
1616
1617	for (bufsize = 0, ndx = 0; bufsize < cmd_xfer_size; ndx++) {
1618		uint8_t *d;
1619		uint32_t s;
1620
1621		d = dbuf->db_sglist[ndx].seg_addr;
1622		s = min((cmd_xfer_size - bufsize),
1623		    dbuf->db_sglist[ndx].seg_length);
1624		bcopy(p+bufsize, d, s);
1625		bufsize += s;
1626	}
1627	dbuf->db_relative_offset = 0;
1628	dbuf->db_data_size = cmd_xfer_size;
1629	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
1630
1631	if (task->task_lu_private == NULL) {
1632		task->task_lu_private =
1633		    kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1634	}
1635	scmd = (sbd_cmd_t *)task->task_lu_private;
1636
1637	scmd->cmd_type = SBD_CMD_SMALL_READ;
1638	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1639	(void) stmf_xfer_data(task, dbuf, 0);
1640}
1641
1642void
1643sbd_handle_short_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1644				struct stmf_data_buf *dbuf)
1645{
1646	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1647		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1648		    dbuf->db_xfer_status, NULL);
1649		return;
1650	}
1651	task->task_nbytes_transferred = dbuf->db_data_size;
1652	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1653	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1654}
1655
1656void
1657sbd_handle_short_write_transfers(scsi_task_t *task,
1658    stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size)
1659{
1660	sbd_cmd_t *scmd;
1661
1662	task->task_cmd_xfer_length = cdb_xfer_size;
1663	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1664		task->task_expected_xfer_length = cdb_xfer_size;
1665	} else {
1666		cdb_xfer_size = min(cdb_xfer_size,
1667		    task->task_expected_xfer_length);
1668	}
1669
1670	if (cdb_xfer_size == 0) {
1671		stmf_scsilib_send_status(task, STATUS_CHECK,
1672		    STMF_SAA_INVALID_FIELD_IN_CDB);
1673		return;
1674	}
1675	if (task->task_lu_private == NULL) {
1676		task->task_lu_private = kmem_zalloc(sizeof (sbd_cmd_t),
1677		    KM_SLEEP);
1678	} else {
1679		bzero(task->task_lu_private, sizeof (sbd_cmd_t));
1680	}
1681	scmd = (sbd_cmd_t *)task->task_lu_private;
1682	scmd->cmd_type = SBD_CMD_SMALL_WRITE;
1683	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1684	scmd->len = cdb_xfer_size;
1685	if (dbuf == NULL) {
1686		uint32_t minsize = cdb_xfer_size;
1687
1688		dbuf = stmf_alloc_dbuf(task, cdb_xfer_size, &minsize, 0);
1689		if (dbuf == NULL) {
1690			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1691			    STMF_ALLOC_FAILURE, NULL);
1692			return;
1693		}
1694		dbuf->db_data_size = cdb_xfer_size;
1695		dbuf->db_relative_offset = 0;
1696		dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1697		(void) stmf_xfer_data(task, dbuf, 0);
1698	} else {
1699		if (dbuf->db_data_size < cdb_xfer_size) {
1700			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1701			    STMF_ABORTED, NULL);
1702			return;
1703		}
1704		dbuf->db_data_size = cdb_xfer_size;
1705		sbd_handle_short_write_xfer_completion(task, dbuf);
1706	}
1707}
1708
1709void
1710sbd_handle_short_write_xfer_completion(scsi_task_t *task,
1711    stmf_data_buf_t *dbuf)
1712{
1713	sbd_cmd_t *scmd;
1714	stmf_status_t st_ret;
1715	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1716
1717	/*
1718	 * For now lets assume we will get only one sglist element
1719	 * for short writes. If that ever changes, we should allocate
1720	 * a local buffer and copy all the sg elements to one linear space.
1721	 */
1722	if ((dbuf->db_xfer_status != STMF_SUCCESS) ||
1723	    (dbuf->db_sglist_length > 1)) {
1724		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1725		    dbuf->db_xfer_status, NULL);
1726		return;
1727	}
1728
1729	task->task_nbytes_transferred = dbuf->db_data_size;
1730	scmd = (sbd_cmd_t *)task->task_lu_private;
1731	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1732
1733	/* Lets find out who to call */
1734	switch (task->task_cdb[0]) {
1735	case SCMD_MODE_SELECT:
1736	case SCMD_MODE_SELECT_G1:
1737		if (sl->sl_access_state == SBD_LU_STANDBY) {
1738			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1739			if (st_ret != STMF_SUCCESS) {
1740				stmf_scsilib_send_status(task, STATUS_CHECK,
1741				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1742			}
1743		} else {
1744			sbd_handle_mode_select_xfer(task,
1745			    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1746		}
1747		break;
1748	case SCMD_UNMAP:
1749		sbd_handle_unmap_xfer(task,
1750		    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1751		break;
1752	case SCMD_PERSISTENT_RESERVE_OUT:
1753		if (sl->sl_access_state == SBD_LU_STANDBY) {
1754			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1755			if (st_ret != STMF_SUCCESS) {
1756				stmf_scsilib_send_status(task, STATUS_CHECK,
1757				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1758			}
1759		} else {
1760			sbd_handle_pgr_out_data(task, dbuf);
1761		}
1762		break;
1763	default:
1764		/* This should never happen */
1765		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1766		    STMF_ABORTED, NULL);
1767	}
1768}
1769
1770void
1771sbd_handle_read_capacity(struct scsi_task *task,
1772    struct stmf_data_buf *initial_dbuf)
1773{
1774	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1775	uint32_t cdb_len;
1776	uint8_t p[32];
1777	uint64_t s;
1778	uint16_t blksize;
1779
1780	s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
1781	s--;
1782	blksize = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1783
1784	switch (task->task_cdb[0]) {
1785	case SCMD_READ_CAPACITY:
1786		if (s & 0xffffffff00000000ull) {
1787			p[0] = p[1] = p[2] = p[3] = 0xFF;
1788		} else {
1789			p[0] = (s >> 24) & 0xff;
1790			p[1] = (s >> 16) & 0xff;
1791			p[2] = (s >> 8) & 0xff;
1792			p[3] = s & 0xff;
1793		}
1794		p[4] = 0; p[5] = 0;
1795		p[6] = (blksize >> 8) & 0xff;
1796		p[7] = blksize & 0xff;
1797		sbd_handle_short_read_transfers(task, initial_dbuf, p, 8, 8);
1798		break;
1799
1800	case SCMD_SVC_ACTION_IN_G4:
1801		cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1802		bzero(p, 32);
1803		p[0] = (s >> 56) & 0xff;
1804		p[1] = (s >> 48) & 0xff;
1805		p[2] = (s >> 40) & 0xff;
1806		p[3] = (s >> 32) & 0xff;
1807		p[4] = (s >> 24) & 0xff;
1808		p[5] = (s >> 16) & 0xff;
1809		p[6] = (s >> 8) & 0xff;
1810		p[7] = s & 0xff;
1811		p[10] = (blksize >> 8) & 0xff;
1812		p[11] = blksize & 0xff;
1813		if (sl->sl_flags & SL_UNMAP_ENABLED) {
1814			p[14] = 0x80;
1815		}
1816		sbd_handle_short_read_transfers(task, initial_dbuf, p,
1817		    cdb_len, 32);
1818		break;
1819	}
1820}
1821
1822void
1823sbd_calc_geometry(uint64_t s, uint16_t blksize, uint8_t *nsectors,
1824    uint8_t *nheads, uint32_t *ncyl)
1825{
1826	if (s < (4ull * 1024ull * 1024ull * 1024ull)) {
1827		*nsectors = 32;
1828		*nheads = 8;
1829	} else {
1830		*nsectors = 254;
1831		*nheads = 254;
1832	}
1833	*ncyl = s / ((uint64_t)blksize * (uint64_t)(*nsectors) *
1834	    (uint64_t)(*nheads));
1835}
1836
1837void
1838sbd_handle_mode_sense(struct scsi_task *task,
1839    struct stmf_data_buf *initial_dbuf, uint8_t *buf)
1840{
1841	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1842	uint32_t cmd_size, n;
1843	uint8_t *cdb;
1844	uint32_t ncyl;
1845	uint8_t nsectors, nheads;
1846	uint8_t page, ctrl, header_size, pc_valid;
1847	uint16_t nbytes;
1848	uint8_t *p;
1849	uint64_t s = sl->sl_lu_size;
1850	uint32_t dev_spec_param_offset;
1851
1852	p = buf;	/* buf is assumed to be zeroed out and large enough */
1853	n = 0;
1854	cdb = &task->task_cdb[0];
1855	page = cdb[2] & 0x3F;
1856	ctrl = (cdb[2] >> 6) & 3;
1857	cmd_size = (cdb[0] == SCMD_MODE_SENSE) ? cdb[4] :
1858	    READ_SCSI16(&cdb[7], uint32_t);
1859
1860	if (cdb[0] == SCMD_MODE_SENSE) {
1861		header_size = 4;
1862		dev_spec_param_offset = 2;
1863	} else {
1864		header_size = 8;
1865		dev_spec_param_offset = 3;
1866	}
1867
1868	/* Now validate the command */
1869	if ((cdb[2] == 0) || (page == MODEPAGE_ALLPAGES) || (page == 0x08) ||
1870	    (page == 0x0A) || (page == 0x03) || (page == 0x04)) {
1871		pc_valid = 1;
1872	} else {
1873		pc_valid = 0;
1874	}
1875	if ((cmd_size < header_size) || (pc_valid == 0)) {
1876		stmf_scsilib_send_status(task, STATUS_CHECK,
1877		    STMF_SAA_INVALID_FIELD_IN_CDB);
1878		return;
1879	}
1880
1881	/* We will update the length in the mode header at the end */
1882
1883	/* Block dev device specific param in mode param header has wp bit */
1884	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1885		p[n + dev_spec_param_offset] = BIT_7;
1886	}
1887	n += header_size;
1888	/* We are not going to return any block descriptor */
1889
1890	nbytes = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1891	sbd_calc_geometry(s, nbytes, &nsectors, &nheads, &ncyl);
1892
1893	if ((page == 0x03) || (page == MODEPAGE_ALLPAGES)) {
1894		p[n] = 0x03;
1895		p[n+1] = 0x16;
1896		if (ctrl != 1) {
1897			p[n + 11] = nsectors;
1898			p[n + 12] = nbytes >> 8;
1899			p[n + 13] = nbytes & 0xff;
1900			p[n + 20] = 0x80;
1901		}
1902		n += 24;
1903	}
1904	if ((page == 0x04) || (page == MODEPAGE_ALLPAGES)) {
1905		p[n] = 0x04;
1906		p[n + 1] = 0x16;
1907		if (ctrl != 1) {
1908			p[n + 2] = ncyl >> 16;
1909			p[n + 3] = ncyl >> 8;
1910			p[n + 4] = ncyl & 0xff;
1911			p[n + 5] = nheads;
1912			p[n + 20] = 0x15;
1913			p[n + 21] = 0x18;
1914		}
1915		n += 24;
1916	}
1917	if ((page == MODEPAGE_CACHING) || (page == MODEPAGE_ALLPAGES)) {
1918		struct mode_caching *mode_caching_page;
1919
1920		mode_caching_page = (struct mode_caching *)&p[n];
1921
1922		mode_caching_page->mode_page.code = MODEPAGE_CACHING;
1923		mode_caching_page->mode_page.ps = 1; /* A saveable page */
1924		mode_caching_page->mode_page.length = 0x12;
1925
1926		switch (ctrl) {
1927		case (0):
1928			/* Current */
1929			if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) == 0) {
1930				mode_caching_page->wce = 1;
1931			}
1932			break;
1933
1934		case (1):
1935			/* Changeable */
1936			if ((sl->sl_flags &
1937			    SL_WRITEBACK_CACHE_SET_UNSUPPORTED) == 0) {
1938				mode_caching_page->wce = 1;
1939			}
1940			break;
1941
1942		default:
1943			if ((sl->sl_flags &
1944			    SL_SAVED_WRITE_CACHE_DISABLE) == 0) {
1945				mode_caching_page->wce = 1;
1946			}
1947			break;
1948		}
1949		n += (sizeof (struct mode_page) +
1950		    mode_caching_page->mode_page.length);
1951	}
1952	if ((page == MODEPAGE_CTRL_MODE) || (page == MODEPAGE_ALLPAGES)) {
1953		struct mode_control_scsi3 *mode_control_page;
1954
1955		mode_control_page = (struct mode_control_scsi3 *)&p[n];
1956
1957		mode_control_page->mode_page.code = MODEPAGE_CTRL_MODE;
1958		mode_control_page->mode_page.length =
1959		    PAGELENGTH_MODE_CONTROL_SCSI3;
1960		if (ctrl != 1) {
1961			/* If not looking for changeable values, report this. */
1962			mode_control_page->que_mod = CTRL_QMOD_UNRESTRICT;
1963		}
1964		n += (sizeof (struct mode_page) +
1965		    mode_control_page->mode_page.length);
1966	}
1967
1968	if (cdb[0] == SCMD_MODE_SENSE) {
1969		if (n > 255) {
1970			stmf_scsilib_send_status(task, STATUS_CHECK,
1971			    STMF_SAA_INVALID_FIELD_IN_CDB);
1972			return;
1973		}
1974		/*
1975		 * Mode parameter header length doesn't include the number
1976		 * of bytes in the length field, so adjust the count.
1977		 * Byte count minus header length field size.
1978		 */
1979		buf[0] = (n - 1) & 0xff;
1980	} else {
1981		/* Byte count minus header length field size. */
1982		buf[1] = (n - 2) & 0xff;
1983		buf[0] = ((n - 2) >> 8) & 0xff;
1984	}
1985
1986	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
1987	    cmd_size, n);
1988}
1989
1990void
1991sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf)
1992{
1993	uint32_t cmd_xfer_len;
1994
1995	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
1996		cmd_xfer_len = (uint32_t)task->task_cdb[4];
1997	} else {
1998		cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1999	}
2000
2001	if ((task->task_cdb[1] & 0xFE) != 0x10) {
2002		stmf_scsilib_send_status(task, STATUS_CHECK,
2003		    STMF_SAA_INVALID_FIELD_IN_CDB);
2004		return;
2005	}
2006
2007	if (cmd_xfer_len == 0) {
2008		/* zero byte mode selects are allowed */
2009		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2010		return;
2011	}
2012
2013	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2014}
2015
2016void
2017sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2018{
2019	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2020	sbd_it_data_t *it;
2021	int hdr_len, bd_len;
2022	sbd_status_t sret;
2023	int i;
2024
2025	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2026		hdr_len = 4;
2027	} else {
2028		hdr_len = 8;
2029	}
2030
2031	if (buflen < hdr_len)
2032		goto mode_sel_param_len_err;
2033
2034	bd_len = hdr_len == 4 ? buf[3] : READ_SCSI16(&buf[6], int);
2035
2036	if (buflen < (hdr_len + bd_len + 2))
2037		goto mode_sel_param_len_err;
2038
2039	buf += hdr_len + bd_len;
2040	buflen -= hdr_len + bd_len;
2041
2042	if ((buf[0] != 8) || (buflen != ((uint32_t)buf[1] + 2))) {
2043		goto mode_sel_param_len_err;
2044	}
2045
2046	if (buf[2] & 0xFB) {
2047		goto mode_sel_param_field_err;
2048	}
2049
2050	for (i = 3; i < (buf[1] + 2); i++) {
2051		if (buf[i]) {
2052			goto mode_sel_param_field_err;
2053		}
2054	}
2055
2056	sret = SBD_SUCCESS;
2057
2058	/* All good. Lets handle the write cache change, if any */
2059	if (buf[2] & BIT_2) {
2060		sret = sbd_wcd_set(0, sl);
2061	} else {
2062		sret = sbd_wcd_set(1, sl);
2063	}
2064
2065	if (sret != SBD_SUCCESS) {
2066		stmf_scsilib_send_status(task, STATUS_CHECK,
2067		    STMF_SAA_WRITE_ERROR);
2068		return;
2069	}
2070
2071	/* set on the device passed, now set the flags */
2072	mutex_enter(&sl->sl_lock);
2073	if (buf[2] & BIT_2) {
2074		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
2075	} else {
2076		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2077	}
2078
2079	for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2080		if (it == task->task_lu_itl_handle)
2081			continue;
2082		it->sbd_it_ua_conditions |= SBD_UA_MODE_PARAMETERS_CHANGED;
2083	}
2084
2085	if (task->task_cdb[1] & 1) {
2086		if (buf[2] & BIT_2) {
2087			sl->sl_flags &= ~SL_SAVED_WRITE_CACHE_DISABLE;
2088		} else {
2089			sl->sl_flags |= SL_SAVED_WRITE_CACHE_DISABLE;
2090		}
2091		mutex_exit(&sl->sl_lock);
2092		sret = sbd_write_lu_info(sl);
2093	} else {
2094		mutex_exit(&sl->sl_lock);
2095	}
2096	if (sret == SBD_SUCCESS) {
2097		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2098	} else {
2099		stmf_scsilib_send_status(task, STATUS_CHECK,
2100		    STMF_SAA_WRITE_ERROR);
2101	}
2102	return;
2103
2104mode_sel_param_len_err:
2105	stmf_scsilib_send_status(task, STATUS_CHECK,
2106	    STMF_SAA_PARAM_LIST_LENGTH_ERROR);
2107	return;
2108mode_sel_param_field_err:
2109	stmf_scsilib_send_status(task, STATUS_CHECK,
2110	    STMF_SAA_INVALID_FIELD_IN_PARAM_LIST);
2111}
2112
2113/*
2114 * Command support added from SPC-4 r24
2115 * Supports info type 0, 2, 127
2116 */
2117void
2118sbd_handle_identifying_info(struct scsi_task *task,
2119    stmf_data_buf_t *initial_dbuf)
2120{
2121	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2122	uint8_t *cdb;
2123	uint32_t cmd_size;
2124	uint32_t param_len;
2125	uint32_t xfer_size;
2126	uint8_t info_type;
2127	uint8_t *buf, *p;
2128
2129	cdb = &task->task_cdb[0];
2130	cmd_size = READ_SCSI32(&cdb[6], uint32_t);
2131	info_type = cdb[10]>>1;
2132
2133	/* Validate the command */
2134	if (cmd_size < 4) {
2135		stmf_scsilib_send_status(task, STATUS_CHECK,
2136		    STMF_SAA_INVALID_FIELD_IN_CDB);
2137		return;
2138	}
2139
2140	p = buf = kmem_zalloc(260, KM_SLEEP);
2141
2142	switch (info_type) {
2143		case 0:
2144			/*
2145			 * No value is supplied but this info type
2146			 * is mandatory.
2147			 */
2148			xfer_size = 4;
2149			break;
2150		case 2:
2151			mutex_enter(&sl->sl_lock);
2152			param_len = strlcpy((char *)(p+4), sl->sl_alias, 256);
2153			mutex_exit(&sl->sl_lock);
2154			/* text info must be null terminated */
2155			if (++param_len > 256)
2156				param_len = 256;
2157			SCSI_WRITE16(p+2, param_len);
2158			xfer_size = param_len + 4;
2159			break;
2160		case 127:
2161			/* 0 and 2 descriptor supported */
2162			SCSI_WRITE16(p+2, 8); /* set param length */
2163			p += 8;
2164			*p = 4; /* set type to 2 (7 hi bits) */
2165			p += 2;
2166			SCSI_WRITE16(p, 256); /* 256 max length */
2167			xfer_size = 12;
2168			break;
2169		default:
2170			stmf_scsilib_send_status(task, STATUS_CHECK,
2171			    STMF_SAA_INVALID_FIELD_IN_CDB);
2172			kmem_free(buf, 260);
2173			return;
2174	}
2175	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2176	    cmd_size, xfer_size);
2177	kmem_free(buf, 260);
2178}
2179
2180/*
2181 * This function parse through a string, passed to it as a pointer to a string,
2182 * by adjusting the pointer to the first non-space character and returns
2183 * the count/length of the first bunch of non-space characters. Multiple
2184 * Management URLs are stored as a space delimited string in sl_mgmt_url
2185 * field of sbd_lu_t. This function is used to retrieve one url at a time.
2186 *
2187 * i/p : pointer to pointer to a url string
2188 * o/p : Adjust the pointer to the url to the first non white character
2189 *       and returns the length of the URL
2190 */
2191uint16_t
2192sbd_parse_mgmt_url(char **url_addr) {
2193	uint16_t url_length = 0;
2194	char *url;
2195	url = *url_addr;
2196
2197	while (*url != '\0') {
2198		if (*url == ' ' || *url == '\t' || *url == '\n') {
2199			(*url_addr)++;
2200			url = *url_addr;
2201		} else {
2202			break;
2203		}
2204	}
2205
2206	while (*url != '\0') {
2207		if (*url == ' ' || *url == '\t' ||
2208		    *url == '\n' || *url == '\0') {
2209			break;
2210		}
2211		url++;
2212		url_length++;
2213	}
2214	return (url_length);
2215}
2216
2217/* Try to make this the size of a kmem allocation cache. */
2218static uint_t sbd_write_same_optimal_chunk = 128 * 1024;
2219
2220static sbd_status_t
2221sbd_write_same_data(struct scsi_task *task, sbd_cmd_t *scmd)
2222{
2223	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2224	uint64_t addr, len, sz_done;
2225	uint32_t big_buf_size, xfer_size, off;
2226	uint8_t *big_buf;
2227	sbd_status_t ret;
2228
2229	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2230		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2231		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2232	} else {
2233		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2234		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2235	}
2236	addr <<= sl->sl_data_blocksize_shift;
2237	len <<= sl->sl_data_blocksize_shift;
2238
2239	/*
2240	 * Reminders:
2241	 *    "len" is total size of what we wish to "write same".
2242	 *
2243	 *    xfer_size will be scmd->trans_data_len, which is the length
2244	 *    of the pattern we wish to replicate over "len".  We replicate
2245	 *    "xfer_size" of pattern over "len".
2246	 *
2247	 *    big_buf_size is set to an ideal actual-write size for an output
2248	 *    operation.  It may be the same as "len".  If it's not, it should
2249	 *    be an exact multiple of "xfer_size" so we don't get pattern
2250	 *    breakage until the very end of "len".
2251	 */
2252	big_buf_size = len > sbd_write_same_optimal_chunk ?
2253	    sbd_write_same_optimal_chunk : (uint32_t)len;
2254	xfer_size = scmd->trans_data_len;
2255
2256	/*
2257	 * All transfers should be an integral multiple of the sector size.
2258	 */
2259	ASSERT((big_buf_size % xfer_size) == 0);
2260
2261	/*
2262	 * Don't sleep for the allocation, and don't make the system
2263	 * reclaim memory.  Trade higher I/Os if in a low-memory situation.
2264	 */
2265	big_buf = kmem_alloc(big_buf_size, KM_NOSLEEP | KM_NORMALPRI);
2266
2267	if (big_buf == NULL) {
2268		/*
2269		 * Just send it in terms of of the transmitted data.  This
2270		 * will be very slow.
2271		 */
2272		DTRACE_PROBE1(write__same__low__memory, uint64_t, big_buf_size);
2273		big_buf = scmd->trans_data;
2274		big_buf_size = scmd->trans_data_len;
2275	} else {
2276		/*
2277		 * We already ASSERT()ed big_buf_size is an integral multiple
2278		 * of xfer_size.
2279		 */
2280		for (off = 0; off < big_buf_size; off += xfer_size)
2281			bcopy(scmd->trans_data, big_buf + off, xfer_size);
2282	}
2283
2284	/* Do the actual I/O.  Recycle xfer_size now to be write size. */
2285	DTRACE_PROBE1(write__same__io__begin, uint64_t, len);
2286	for (sz_done = 0; sz_done < len; sz_done += (uint64_t)xfer_size) {
2287		xfer_size = ((big_buf_size + sz_done) <= len) ? big_buf_size :
2288		    len - sz_done;
2289		ret = sbd_data_write(sl, task, addr + sz_done,
2290		    (uint64_t)xfer_size, big_buf);
2291		if (ret != SBD_SUCCESS)
2292			break;
2293	}
2294	DTRACE_PROBE2(write__same__io__end, uint64_t, len, uint64_t, sz_done);
2295
2296	if (big_buf != scmd->trans_data)
2297		kmem_free(big_buf, big_buf_size);
2298
2299	return (ret);
2300}
2301
2302static void
2303sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
2304    struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2305{
2306	uint64_t laddr;
2307	uint32_t buflen, iolen;
2308	int ndx, ret;
2309
2310	if (dbuf->db_xfer_status != STMF_SUCCESS) {
2311		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2312		    dbuf->db_xfer_status, NULL);
2313		return;
2314	}
2315
2316	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2317		goto write_same_xfer_done;
2318	}
2319
2320	if (scmd->len != 0) {
2321		/*
2322		 * Initiate the next port xfer to occur in parallel
2323		 * with writing this buf.
2324		 */
2325		sbd_do_write_same_xfer(task, scmd, NULL, 0);
2326	}
2327
2328	laddr = dbuf->db_relative_offset;
2329
2330	for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
2331	    (ndx < dbuf->db_sglist_length); ndx++) {
2332		iolen = min(dbuf->db_data_size - buflen,
2333		    dbuf->db_sglist[ndx].seg_length);
2334		if (iolen == 0)
2335			break;
2336		bcopy(dbuf->db_sglist[ndx].seg_addr, &scmd->trans_data[laddr],
2337		    iolen);
2338		buflen += iolen;
2339		laddr += (uint64_t)iolen;
2340	}
2341	task->task_nbytes_transferred += buflen;
2342
2343write_same_xfer_done:
2344	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2345		stmf_free_dbuf(task, dbuf);
2346		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
2347		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2348			stmf_scsilib_send_status(task, STATUS_CHECK,
2349			    STMF_SAA_WRITE_ERROR);
2350		} else {
2351			ret = sbd_write_same_data(task, scmd);
2352			if (ret != SBD_SUCCESS) {
2353				stmf_scsilib_send_status(task, STATUS_CHECK,
2354				    STMF_SAA_WRITE_ERROR);
2355			} else {
2356				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2357			}
2358		}
2359		/*
2360		 * Only way we should get here is via handle_write_same(),
2361		 * and that should make the following assertion always pass.
2362		 */
2363		ASSERT((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) &&
2364		    scmd->trans_data != NULL);
2365		kmem_free(scmd->trans_data, scmd->trans_data_len);
2366		scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
2367		return;
2368	}
2369	sbd_do_write_same_xfer(task, scmd, dbuf, dbuf_reusable);
2370}
2371
2372static void
2373sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
2374    struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2375{
2376	uint32_t len;
2377
2378	if (scmd->len == 0) {
2379		if (dbuf != NULL)
2380			stmf_free_dbuf(task, dbuf);
2381		return;
2382	}
2383
2384	if ((dbuf != NULL) &&
2385	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
2386		/* free current dbuf and allocate a new one */
2387		stmf_free_dbuf(task, dbuf);
2388		dbuf = NULL;
2389	}
2390	if (dbuf == NULL) {
2391		uint32_t maxsize, minsize, old_minsize;
2392
2393		maxsize = (scmd->len > (128*1024)) ? 128*1024 :
2394		    scmd->len;
2395		minsize = maxsize >> 2;
2396		do {
2397			old_minsize = minsize;
2398			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
2399		} while ((dbuf == NULL) && (old_minsize > minsize) &&
2400		    (minsize >= 512));
2401		if (dbuf == NULL) {
2402			if (scmd->nbufs == 0) {
2403				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2404				    STMF_ALLOC_FAILURE, NULL);
2405			}
2406			return;
2407		}
2408	}
2409
2410	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
2411	    scmd->len;
2412
2413	dbuf->db_relative_offset = scmd->current_ro;
2414	dbuf->db_data_size = len;
2415	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
2416	(void) stmf_xfer_data(task, dbuf, 0);
2417	scmd->nbufs++; /* outstanding port xfers and bufs used */
2418	scmd->len -= len;
2419	scmd->current_ro += len;
2420}
2421
2422static void
2423sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
2424{
2425	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2426	uint64_t addr, len;
2427	sbd_cmd_t *scmd;
2428	stmf_data_buf_t *dbuf;
2429	uint8_t unmap;
2430	uint8_t do_immediate_data = 0;
2431
2432	task->task_cmd_xfer_length = 0;
2433	if (task->task_additional_flags &
2434	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2435		task->task_expected_xfer_length = 0;
2436	}
2437	if (sl->sl_flags & SL_WRITE_PROTECTED) {
2438		stmf_scsilib_send_status(task, STATUS_CHECK,
2439		    STMF_SAA_WRITE_PROTECTED);
2440		return;
2441	}
2442	if (task->task_cdb[1] & 0xF7) {
2443		stmf_scsilib_send_status(task, STATUS_CHECK,
2444		    STMF_SAA_INVALID_FIELD_IN_CDB);
2445		return;
2446	}
2447	unmap = task->task_cdb[1] & 0x08;
2448	if (unmap && ((sl->sl_flags & SL_UNMAP_ENABLED) == 0)) {
2449		stmf_scsilib_send_status(task, STATUS_CHECK,
2450		    STMF_SAA_INVALID_FIELD_IN_CDB);
2451		return;
2452	}
2453	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2454		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2455		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2456	} else {
2457		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2458		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2459	}
2460	if (len == 0) {
2461		stmf_scsilib_send_status(task, STATUS_CHECK,
2462		    STMF_SAA_INVALID_FIELD_IN_CDB);
2463		return;
2464	}
2465	addr <<= sl->sl_data_blocksize_shift;
2466	len <<= sl->sl_data_blocksize_shift;
2467
2468	/* Check if the command is for the unmap function */
2469	if (unmap) {
2470		if (sbd_unmap(sl, addr, len) != 0) {
2471			stmf_scsilib_send_status(task, STATUS_CHECK,
2472			    STMF_SAA_LBA_OUT_OF_RANGE);
2473		} else {
2474			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2475		}
2476		return;
2477	}
2478
2479	/* Write same function */
2480
2481	task->task_cmd_xfer_length = 1 << sl->sl_data_blocksize_shift;
2482	if (task->task_additional_flags &
2483	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2484		task->task_expected_xfer_length = task->task_cmd_xfer_length;
2485	}
2486	if ((addr + len) > sl->sl_lu_size) {
2487		stmf_scsilib_send_status(task, STATUS_CHECK,
2488		    STMF_SAA_LBA_OUT_OF_RANGE);
2489		return;
2490	}
2491
2492	/* For rest of this I/O the transfer length is 1 block */
2493	len = ((uint64_t)1) << sl->sl_data_blocksize_shift;
2494
2495	/* Some basic checks */
2496	if ((len == 0) || (len != task->task_expected_xfer_length)) {
2497		stmf_scsilib_send_status(task, STATUS_CHECK,
2498		    STMF_SAA_INVALID_FIELD_IN_CDB);
2499		return;
2500	}
2501
2502
2503	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
2504		if (initial_dbuf->db_data_size > len) {
2505			if (initial_dbuf->db_data_size >
2506			    task->task_expected_xfer_length) {
2507				/* protocol error */
2508				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2509				    STMF_INVALID_ARG, NULL);
2510				return;
2511			}
2512			initial_dbuf->db_data_size = (uint32_t)len;
2513		}
2514		do_immediate_data = 1;
2515	}
2516	dbuf = initial_dbuf;
2517
2518	if (task->task_lu_private) {
2519		scmd = (sbd_cmd_t *)task->task_lu_private;
2520	} else {
2521		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
2522		task->task_lu_private = scmd;
2523	}
2524	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA;
2525	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
2526	scmd->nbufs = 0;
2527	scmd->len = (uint32_t)len;
2528	scmd->trans_data_len = (uint32_t)len;
2529	scmd->trans_data = kmem_alloc((size_t)len, KM_SLEEP);
2530	scmd->current_ro = 0;
2531
2532	if (do_immediate_data) {
2533		/*
2534		 * Account for data passed in this write command
2535		 */
2536		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
2537		scmd->len -= dbuf->db_data_size;
2538		scmd->current_ro += dbuf->db_data_size;
2539		dbuf->db_xfer_status = STMF_SUCCESS;
2540		sbd_handle_write_same_xfer_completion(task, scmd, dbuf, 0);
2541	} else {
2542		sbd_do_write_same_xfer(task, scmd, dbuf, 0);
2543	}
2544}
2545
2546static void
2547sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf)
2548{
2549	uint32_t cmd_xfer_len;
2550
2551	cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2552
2553	if (task->task_cdb[1] & 1) {
2554		stmf_scsilib_send_status(task, STATUS_CHECK,
2555		    STMF_SAA_INVALID_FIELD_IN_CDB);
2556		return;
2557	}
2558
2559	if (cmd_xfer_len == 0) {
2560		task->task_cmd_xfer_length = 0;
2561		if (task->task_additional_flags &
2562		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2563			task->task_expected_xfer_length = 0;
2564		}
2565		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2566		return;
2567	}
2568
2569	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2570}
2571
2572static void
2573sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2574{
2575	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2576	uint32_t ulen, dlen, num_desc;
2577	uint64_t addr, len;
2578	uint8_t *p;
2579	int ret;
2580
2581	if (buflen < 24) {
2582		stmf_scsilib_send_status(task, STATUS_CHECK,
2583		    STMF_SAA_INVALID_FIELD_IN_CDB);
2584		return;
2585	}
2586	ulen = READ_SCSI16(buf, uint32_t);
2587	dlen = READ_SCSI16(buf + 2, uint32_t);
2588	num_desc = dlen >> 4;
2589	if (((ulen + 2) != buflen) || ((dlen + 8) != buflen) || (dlen & 0xf) ||
2590	    (num_desc == 0)) {
2591		stmf_scsilib_send_status(task, STATUS_CHECK,
2592		    STMF_SAA_INVALID_FIELD_IN_CDB);
2593		return;
2594	}
2595
2596	for (p = buf + 8; num_desc; num_desc--, p += 16) {
2597		addr = READ_SCSI64(p, uint64_t);
2598		addr <<= sl->sl_data_blocksize_shift;
2599		len = READ_SCSI32(p+8, uint64_t);
2600		len <<= sl->sl_data_blocksize_shift;
2601		ret = sbd_unmap(sl, addr, len);
2602		if (ret != 0) {
2603			stmf_scsilib_send_status(task, STATUS_CHECK,
2604			    STMF_SAA_LBA_OUT_OF_RANGE);
2605			return;
2606		}
2607	}
2608
2609unmap_done:
2610	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2611}
2612
2613void
2614sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
2615{
2616	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2617	uint8_t *cdbp = (uint8_t *)&task->task_cdb[0];
2618	uint8_t *p;
2619	uint8_t byte0;
2620	uint8_t page_length;
2621	uint16_t bsize = 512;
2622	uint16_t cmd_size;
2623	uint32_t xfer_size = 4;
2624	uint32_t mgmt_url_size = 0;
2625	uint8_t exp;
2626	uint64_t s;
2627	char *mgmt_url = NULL;
2628
2629
2630	byte0 = DTYPE_DIRECT;
2631	/*
2632	 * Basic protocol checks.
2633	 */
2634
2635	if ((((cdbp[1] & 1) == 0) && cdbp[2]) || cdbp[5]) {
2636		stmf_scsilib_send_status(task, STATUS_CHECK,
2637		    STMF_SAA_INVALID_FIELD_IN_CDB);
2638		return;
2639	}
2640
2641	/*
2642	 * Zero byte allocation length is not an error.  Just
2643	 * return success.
2644	 */
2645
2646	cmd_size = (((uint16_t)cdbp[3]) << 8) | cdbp[4];
2647
2648	if (cmd_size == 0) {
2649		task->task_cmd_xfer_length = 0;
2650		if (task->task_additional_flags &
2651		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2652			task->task_expected_xfer_length = 0;
2653		}
2654		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2655		return;
2656	}
2657
2658	/*
2659	 * Standard inquiry
2660	 */
2661
2662	if ((cdbp[1] & 1) == 0) {
2663		int	i;
2664		struct scsi_inquiry *inq;
2665
2666		p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2667		inq = (struct scsi_inquiry *)p;
2668
2669		page_length = 69;
2670		xfer_size = page_length + 5;
2671
2672		inq->inq_dtype = DTYPE_DIRECT;
2673		inq->inq_ansi = 5;	/* SPC-3 */
2674		inq->inq_hisup = 1;
2675		inq->inq_rdf = 2;	/* Response data format for SPC-3 */
2676		inq->inq_len = page_length;
2677
2678		inq->inq_tpgs = TPGS_FAILOVER_IMPLICIT;
2679		inq->inq_cmdque = 1;
2680
2681		if (sl->sl_flags & SL_VID_VALID) {
2682			bcopy(sl->sl_vendor_id, inq->inq_vid, 8);
2683		} else {
2684			bcopy(sbd_vendor_id, inq->inq_vid, 8);
2685		}
2686
2687		if (sl->sl_flags & SL_PID_VALID) {
2688			bcopy(sl->sl_product_id, inq->inq_pid, 16);
2689		} else {
2690			bcopy(sbd_product_id, inq->inq_pid, 16);
2691		}
2692
2693		if (sl->sl_flags & SL_REV_VALID) {
2694			bcopy(sl->sl_revision, inq->inq_revision, 4);
2695		} else {
2696			bcopy(sbd_revision, inq->inq_revision, 4);
2697		}
2698
2699		/* Adding Version Descriptors */
2700		i = 0;
2701		/* SAM-3 no version */
2702		inq->inq_vd[i].inq_vd_msb = 0x00;
2703		inq->inq_vd[i].inq_vd_lsb = 0x60;
2704		i++;
2705
2706		/* transport */
2707		switch (task->task_lport->lport_id->protocol_id) {
2708		case PROTOCOL_FIBRE_CHANNEL:
2709			inq->inq_vd[i].inq_vd_msb = 0x09;
2710			inq->inq_vd[i].inq_vd_lsb = 0x00;
2711			i++;
2712			break;
2713
2714		case PROTOCOL_PARALLEL_SCSI:
2715		case PROTOCOL_SSA:
2716		case PROTOCOL_IEEE_1394:
2717			/* Currently no claims of conformance */
2718			break;
2719
2720		case PROTOCOL_SRP:
2721			inq->inq_vd[i].inq_vd_msb = 0x09;
2722			inq->inq_vd[i].inq_vd_lsb = 0x40;
2723			i++;
2724			break;
2725
2726		case PROTOCOL_iSCSI:
2727			inq->inq_vd[i].inq_vd_msb = 0x09;
2728			inq->inq_vd[i].inq_vd_lsb = 0x60;
2729			i++;
2730			break;
2731
2732		case PROTOCOL_SAS:
2733		case PROTOCOL_ADT:
2734		case PROTOCOL_ATAPI:
2735		default:
2736			/* Currently no claims of conformance */
2737			break;
2738		}
2739
2740		/* SPC-3 no version */
2741		inq->inq_vd[i].inq_vd_msb = 0x03;
2742		inq->inq_vd[i].inq_vd_lsb = 0x00;
2743		i++;
2744
2745		/* SBC-2 no version */
2746		inq->inq_vd[i].inq_vd_msb = 0x03;
2747		inq->inq_vd[i].inq_vd_lsb = 0x20;
2748
2749		sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2750		    min(cmd_size, xfer_size));
2751		kmem_free(p, bsize);
2752
2753		return;
2754	}
2755
2756	rw_enter(&sbd_global_prop_lock, RW_READER);
2757	if (sl->sl_mgmt_url) {
2758		mgmt_url_size = strlen(sl->sl_mgmt_url);
2759		mgmt_url = sl->sl_mgmt_url;
2760	} else if (sbd_mgmt_url) {
2761		mgmt_url_size = strlen(sbd_mgmt_url);
2762		mgmt_url = sbd_mgmt_url;
2763	}
2764
2765	/*
2766	 * EVPD handling
2767	 */
2768
2769	/* Default 512 bytes may not be enough, increase bsize if necessary */
2770	if (cdbp[2] == 0x83 || cdbp[2] == 0x85) {
2771		if (bsize <  cmd_size)
2772			bsize = cmd_size;
2773	}
2774	p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2775
2776	switch (cdbp[2]) {
2777	case 0x00:
2778		page_length = 4 + (mgmt_url_size ? 1 : 0);
2779		if (sl->sl_flags & SL_UNMAP_ENABLED)
2780			page_length += 2;
2781
2782		p[0] = byte0;
2783		p[3] = page_length;
2784		/* Supported VPD pages in ascending order */
2785		{
2786			uint8_t i = 5;
2787
2788			p[i++] = 0x80;
2789			p[i++] = 0x83;
2790			if (mgmt_url_size != 0)
2791				p[i++] = 0x85;
2792			p[i++] = 0x86;
2793			if (sl->sl_flags & SL_UNMAP_ENABLED) {
2794				p[i++] = 0xb0;
2795				p[i++] = 0xb2;
2796			}
2797		}
2798		xfer_size = page_length + 4;
2799		break;
2800
2801	case 0x80:
2802		if (sl->sl_serial_no_size) {
2803			page_length = sl->sl_serial_no_size;
2804			bcopy(sl->sl_serial_no, p + 4, sl->sl_serial_no_size);
2805		} else {
2806			/* if no serial num is specified set 4 spaces */
2807			page_length = 4;
2808			bcopy("    ", p + 4, 4);
2809		}
2810		p[0] = byte0;
2811		p[1] = 0x80;
2812		p[3] = page_length;
2813		xfer_size = page_length + 4;
2814		break;
2815
2816	case 0x83:
2817		xfer_size = stmf_scsilib_prepare_vpd_page83(task, p,
2818		    bsize, byte0, STMF_VPD_LU_ID|STMF_VPD_TARGET_ID|
2819		    STMF_VPD_TP_GROUP|STMF_VPD_RELATIVE_TP_ID);
2820		break;
2821
2822	case 0x85:
2823		if (mgmt_url_size == 0) {
2824			stmf_scsilib_send_status(task, STATUS_CHECK,
2825			    STMF_SAA_INVALID_FIELD_IN_CDB);
2826			goto err_done;
2827		}
2828		{
2829			uint16_t idx, newidx, sz, url_size;
2830			char *url;
2831
2832			p[0] = byte0;
2833			p[1] = 0x85;
2834
2835			idx = 4;
2836			url = mgmt_url;
2837			url_size = sbd_parse_mgmt_url(&url);
2838			/* Creating Network Service Descriptors */
2839			while (url_size != 0) {
2840				/* Null terminated and 4 Byte aligned */
2841				sz = url_size + 1;
2842				sz += (sz % 4) ? 4 - (sz % 4) : 0;
2843				newidx = idx + sz + 4;
2844
2845				if (newidx < bsize) {
2846					/*
2847					 * SPC-3r23 : Table 320  (Sec 7.6.5)
2848					 * (Network service descriptor format
2849					 *
2850					 * Note: Hard coding service type as
2851					 * "Storage Configuration Service".
2852					 */
2853					p[idx] = 1;
2854					SCSI_WRITE16(p + idx + 2, sz);
2855					bcopy(url, p + idx + 4, url_size);
2856					xfer_size = newidx + 4;
2857				}
2858				idx = newidx;
2859
2860				/* skip to next mgmt url if any */
2861				url += url_size;
2862				url_size = sbd_parse_mgmt_url(&url);
2863			}
2864
2865			/* Total descriptor length */
2866			SCSI_WRITE16(p + 2, idx - 4);
2867			break;
2868		}
2869
2870	case 0x86:
2871		page_length = 0x3c;
2872
2873		p[0] = byte0;
2874		p[1] = 0x86;		/* Page 86 response */
2875		p[3] = page_length;
2876
2877		/*
2878		 * Bits 0, 1, and 2 will need to be updated
2879		 * to reflect the queue tag handling if/when
2880		 * that is implemented.  For now, we're going
2881		 * to claim support only for Simple TA.
2882		 */
2883		p[5] = 1;
2884		xfer_size = page_length + 4;
2885		break;
2886
2887	case 0xb0:
2888		if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
2889			stmf_scsilib_send_status(task, STATUS_CHECK,
2890			    STMF_SAA_INVALID_FIELD_IN_CDB);
2891			goto err_done;
2892		}
2893		page_length = 0x3c;
2894		p[0] = byte0;
2895		p[1] = 0xb0;
2896		p[3] = page_length;
2897		p[20] = p[21] = p[22] = p[23] = 0xFF;
2898		p[24] = p[25] = p[26] = p[27] = 0xFF;
2899		xfer_size = page_length + 4;
2900		break;
2901
2902	case 0xb2:
2903		if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
2904			stmf_scsilib_send_status(task, STATUS_CHECK,
2905			    STMF_SAA_INVALID_FIELD_IN_CDB);
2906			goto err_done;
2907		}
2908		page_length = 4;
2909		p[0] = byte0;
2910		p[1] = 0xb2;
2911		p[3] = page_length;
2912
2913		exp = (uint8_t)sl->sl_data_blocksize_shift;
2914		s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
2915		while (s & ((uint64_t)0xFFFFFFFF80000000ull)) {
2916			s >>= 1;
2917			exp++;
2918		}
2919		p[4] = exp;
2920		p[5] = 0xc0;
2921		xfer_size = page_length + 4;
2922		break;
2923
2924	default:
2925		stmf_scsilib_send_status(task, STATUS_CHECK,
2926		    STMF_SAA_INVALID_FIELD_IN_CDB);
2927		goto err_done;
2928	}
2929
2930	sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2931	    min(cmd_size, xfer_size));
2932err_done:
2933	kmem_free(p, bsize);
2934	rw_exit(&sbd_global_prop_lock);
2935}
2936
2937stmf_status_t
2938sbd_task_alloc(struct scsi_task *task)
2939{
2940	if ((task->task_lu_private =
2941	    kmem_alloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
2942		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
2943		scmd->flags = 0;
2944		return (STMF_SUCCESS);
2945	}
2946	return (STMF_ALLOC_FAILURE);
2947}
2948
2949void
2950sbd_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *it)
2951{
2952	sbd_it_data_t **ppit;
2953
2954	sbd_pgr_remove_it_handle(sl, it);
2955	mutex_enter(&sl->sl_lock);
2956	for (ppit = &sl->sl_it_list; *ppit != NULL;
2957	    ppit = &((*ppit)->sbd_it_next)) {
2958		if ((*ppit) == it) {
2959			*ppit = it->sbd_it_next;
2960			break;
2961		}
2962	}
2963	mutex_exit(&sl->sl_lock);
2964
2965	DTRACE_PROBE2(itl__nexus__end, stmf_lu_t *, sl->sl_lu,
2966	    sbd_it_data_t *, it);
2967
2968	kmem_free(it, sizeof (*it));
2969}
2970
2971void
2972sbd_check_and_clear_scsi2_reservation(sbd_lu_t *sl, sbd_it_data_t *it)
2973{
2974	mutex_enter(&sl->sl_lock);
2975	if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) == 0) {
2976		/* If we dont have any reservations, just get out. */
2977		mutex_exit(&sl->sl_lock);
2978		return;
2979	}
2980
2981	if (it == NULL) {
2982		/* Find the I_T nexus which is holding the reservation. */
2983		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2984			if (it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) {
2985				ASSERT(it->sbd_it_session_id ==
2986				    sl->sl_rs_owner_session_id);
2987				break;
2988			}
2989		}
2990		ASSERT(it != NULL);
2991	} else {
2992		/*
2993		 * We were passed an I_T nexus. If this nexus does not hold
2994		 * the reservation, do nothing. This is why this function is
2995		 * called "check_and_clear".
2996		 */
2997		if ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0) {
2998			mutex_exit(&sl->sl_lock);
2999			return;
3000		}
3001	}
3002	it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3003	sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3004	mutex_exit(&sl->sl_lock);
3005}
3006
3007
3008
3009void
3010sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
3011{
3012	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3013	sbd_it_data_t *it;
3014	uint8_t cdb0, cdb1;
3015	stmf_status_t st_ret;
3016
3017	if ((it = task->task_lu_itl_handle) == NULL) {
3018		mutex_enter(&sl->sl_lock);
3019		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3020			if (it->sbd_it_session_id ==
3021			    task->task_session->ss_session_id) {
3022				mutex_exit(&sl->sl_lock);
3023				stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3024				return;
3025			}
3026		}
3027		it = (sbd_it_data_t *)kmem_zalloc(sizeof (*it), KM_NOSLEEP);
3028		if (it == NULL) {
3029			mutex_exit(&sl->sl_lock);
3030			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3031			return;
3032		}
3033		it->sbd_it_session_id = task->task_session->ss_session_id;
3034		bcopy(task->task_lun_no, it->sbd_it_lun, 8);
3035		it->sbd_it_next = sl->sl_it_list;
3036		sl->sl_it_list = it;
3037		mutex_exit(&sl->sl_lock);
3038
3039		DTRACE_PROBE1(itl__nexus__start, scsi_task *, task);
3040
3041		sbd_pgr_initialize_it(task, it);
3042		if (stmf_register_itl_handle(task->task_lu, task->task_lun_no,
3043		    task->task_session, it->sbd_it_session_id, it)
3044		    != STMF_SUCCESS) {
3045			sbd_remove_it_handle(sl, it);
3046			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3047			return;
3048		}
3049		task->task_lu_itl_handle = it;
3050		if (sl->sl_access_state != SBD_LU_STANDBY) {
3051			it->sbd_it_ua_conditions = SBD_UA_POR;
3052		}
3053	} else if (it->sbd_it_flags & SBD_IT_PGR_CHECK_FLAG) {
3054		mutex_enter(&sl->sl_lock);
3055		it->sbd_it_flags &= ~SBD_IT_PGR_CHECK_FLAG;
3056		mutex_exit(&sl->sl_lock);
3057		sbd_pgr_initialize_it(task, it);
3058	}
3059
3060	if (task->task_mgmt_function) {
3061		stmf_scsilib_handle_task_mgmt(task);
3062		return;
3063	}
3064
3065	/*
3066	 * if we're transitioning between access
3067	 * states, return NOT READY
3068	 */
3069	if (sl->sl_access_state == SBD_LU_TRANSITION_TO_STANDBY ||
3070	    sl->sl_access_state == SBD_LU_TRANSITION_TO_ACTIVE) {
3071		stmf_scsilib_send_status(task, STATUS_CHECK,
3072		    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
3073		return;
3074	}
3075
3076	/* Checking ua conditions as per SAM3R14 5.3.2 specified order */
3077	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
3078		uint32_t saa = 0;
3079
3080		mutex_enter(&sl->sl_lock);
3081		if (it->sbd_it_ua_conditions & SBD_UA_POR) {
3082			it->sbd_it_ua_conditions &= ~SBD_UA_POR;
3083			saa = STMF_SAA_POR;
3084		}
3085		mutex_exit(&sl->sl_lock);
3086		if (saa) {
3087			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3088			return;
3089		}
3090	}
3091
3092	/* Reservation conflict checks */
3093	if (sl->sl_access_state == SBD_LU_ACTIVE) {
3094		if (SBD_PGR_RSVD(sl->sl_pgr)) {
3095			if (sbd_pgr_reservation_conflict(task)) {
3096				stmf_scsilib_send_status(task,
3097				    STATUS_RESERVATION_CONFLICT, 0);
3098				return;
3099			}
3100		} else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
3101		    ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
3102			if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
3103				stmf_scsilib_send_status(task,
3104				    STATUS_RESERVATION_CONFLICT, 0);
3105				return;
3106			}
3107		}
3108	}
3109
3110	/* Rest of the ua conndition checks */
3111	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
3112		uint32_t saa = 0;
3113
3114		mutex_enter(&sl->sl_lock);
3115		if (it->sbd_it_ua_conditions & SBD_UA_CAPACITY_CHANGED) {
3116			it->sbd_it_ua_conditions &= ~SBD_UA_CAPACITY_CHANGED;
3117			if ((task->task_cdb[0] == SCMD_READ_CAPACITY) ||
3118			    ((task->task_cdb[0] == SCMD_SVC_ACTION_IN_G4) &&
3119			    (task->task_cdb[1] ==
3120			    SSVC_ACTION_READ_CAPACITY_G4))) {
3121				saa = 0;
3122			} else {
3123				saa = STMF_SAA_CAPACITY_DATA_HAS_CHANGED;
3124			}
3125		} else if (it->sbd_it_ua_conditions &
3126		    SBD_UA_MODE_PARAMETERS_CHANGED) {
3127			it->sbd_it_ua_conditions &=
3128			    ~SBD_UA_MODE_PARAMETERS_CHANGED;
3129			saa = STMF_SAA_MODE_PARAMETERS_CHANGED;
3130		} else if (it->sbd_it_ua_conditions &
3131		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
3132			it->sbd_it_ua_conditions &=
3133			    ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
3134			saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
3135		} else if (it->sbd_it_ua_conditions &
3136		    SBD_UA_ACCESS_STATE_TRANSITION) {
3137			it->sbd_it_ua_conditions &=
3138			    ~SBD_UA_ACCESS_STATE_TRANSITION;
3139			saa = STMF_SAA_LU_NO_ACCESS_TRANSITION;
3140		} else {
3141			it->sbd_it_ua_conditions = 0;
3142			saa = 0;
3143		}
3144		mutex_exit(&sl->sl_lock);
3145		if (saa) {
3146			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3147			return;
3148		}
3149	}
3150
3151	cdb0 = task->task_cdb[0];
3152	cdb1 = task->task_cdb[1];
3153
3154	if (sl->sl_access_state == SBD_LU_STANDBY) {
3155		if (cdb0 != SCMD_INQUIRY &&
3156		    cdb0 != SCMD_MODE_SENSE &&
3157		    cdb0 != SCMD_MODE_SENSE_G1 &&
3158		    cdb0 != SCMD_MODE_SELECT &&
3159		    cdb0 != SCMD_MODE_SELECT_G1 &&
3160		    cdb0 != SCMD_RESERVE &&
3161		    cdb0 != SCMD_RELEASE &&
3162		    cdb0 != SCMD_PERSISTENT_RESERVE_OUT &&
3163		    cdb0 != SCMD_PERSISTENT_RESERVE_IN &&
3164		    cdb0 != SCMD_REQUEST_SENSE &&
3165		    cdb0 != SCMD_READ_CAPACITY &&
3166		    cdb0 != SCMD_TEST_UNIT_READY &&
3167		    cdb0 != SCMD_START_STOP &&
3168		    cdb0 != SCMD_READ &&
3169		    cdb0 != SCMD_READ_G1 &&
3170		    cdb0 != SCMD_READ_G4 &&
3171		    cdb0 != SCMD_READ_G5 &&
3172		    !(cdb0 == SCMD_SVC_ACTION_IN_G4 &&
3173		    cdb1 == SSVC_ACTION_READ_CAPACITY_G4) &&
3174		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3175		    (cdb1 & 0x1F) == 0x05) &&
3176		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3177		    (cdb1 & 0x1F) == 0x0A)) {
3178			stmf_scsilib_send_status(task, STATUS_CHECK,
3179			    STMF_SAA_LU_NO_ACCESS_STANDBY);
3180			return;
3181		}
3182
3183		/*
3184		 * is this a short write?
3185		 * if so, we'll need to wait until we have the buffer
3186		 * before proxying the command
3187		 */
3188		switch (cdb0) {
3189			case SCMD_MODE_SELECT:
3190			case SCMD_MODE_SELECT_G1:
3191			case SCMD_PERSISTENT_RESERVE_OUT:
3192				break;
3193			default:
3194				st_ret = stmf_proxy_scsi_cmd(task,
3195				    initial_dbuf);
3196				if (st_ret != STMF_SUCCESS) {
3197					stmf_scsilib_send_status(task,
3198					    STATUS_CHECK,
3199					    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
3200				}
3201				return;
3202		}
3203	}
3204
3205	cdb0 = task->task_cdb[0] & 0x1F;
3206
3207	if ((cdb0 == SCMD_READ) || (cdb0 == SCMD_WRITE)) {
3208		if (task->task_additional_flags & TASK_AF_PORT_LOAD_HIGH) {
3209			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
3210			return;
3211		}
3212		if (cdb0 == SCMD_READ) {
3213			sbd_handle_read(task, initial_dbuf);
3214			return;
3215		}
3216		sbd_handle_write(task, initial_dbuf);
3217		return;
3218	}
3219
3220	cdb0 = task->task_cdb[0];
3221	cdb1 = task->task_cdb[1];
3222
3223	if (cdb0 == SCMD_INQUIRY) {		/* Inquiry */
3224		sbd_handle_inquiry(task, initial_dbuf);
3225		return;
3226	}
3227
3228	if (cdb0  == SCMD_PERSISTENT_RESERVE_OUT) {
3229		sbd_handle_pgr_out_cmd(task, initial_dbuf);
3230		return;
3231	}
3232
3233	if (cdb0  == SCMD_PERSISTENT_RESERVE_IN) {
3234		sbd_handle_pgr_in_cmd(task, initial_dbuf);
3235		return;
3236	}
3237
3238	if (cdb0 == SCMD_RELEASE) {
3239		if (cdb1) {
3240			stmf_scsilib_send_status(task, STATUS_CHECK,
3241			    STMF_SAA_INVALID_FIELD_IN_CDB);
3242			return;
3243		}
3244
3245		mutex_enter(&sl->sl_lock);
3246		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3247			/* If not owner don't release it, just return good */
3248			if (it->sbd_it_session_id !=
3249			    sl->sl_rs_owner_session_id) {
3250				mutex_exit(&sl->sl_lock);
3251				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3252				return;
3253			}
3254		}
3255		sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3256		it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3257		mutex_exit(&sl->sl_lock);
3258		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3259		return;
3260	}
3261
3262	if (cdb0 == SCMD_RESERVE) {
3263		if (cdb1) {
3264			stmf_scsilib_send_status(task, STATUS_CHECK,
3265			    STMF_SAA_INVALID_FIELD_IN_CDB);
3266			return;
3267		}
3268
3269		mutex_enter(&sl->sl_lock);
3270		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3271			/* If not owner, return conflict status */
3272			if (it->sbd_it_session_id !=
3273			    sl->sl_rs_owner_session_id) {
3274				mutex_exit(&sl->sl_lock);
3275				stmf_scsilib_send_status(task,
3276				    STATUS_RESERVATION_CONFLICT, 0);
3277				return;
3278			}
3279		}
3280		sl->sl_flags |= SL_LU_HAS_SCSI2_RESERVATION;
3281		it->sbd_it_flags |= SBD_IT_HAS_SCSI2_RESERVATION;
3282		sl->sl_rs_owner_session_id = it->sbd_it_session_id;
3283		mutex_exit(&sl->sl_lock);
3284		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3285		return;
3286	}
3287
3288	if (cdb0 == SCMD_REQUEST_SENSE) {
3289		/*
3290		 * LU provider needs to store unretrieved sense data
3291		 * (e.g. after power-on/reset).  For now, we'll just
3292		 * return good status with no sense.
3293		 */
3294
3295		if ((cdb1 & ~1) || task->task_cdb[2] || task->task_cdb[3] ||
3296		    task->task_cdb[5]) {
3297			stmf_scsilib_send_status(task, STATUS_CHECK,
3298			    STMF_SAA_INVALID_FIELD_IN_CDB);
3299		} else {
3300			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3301		}
3302
3303		return;
3304	}
3305
3306	/* Report Target Port Groups */
3307	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3308	    ((cdb1 & 0x1F) == 0x0A)) {
3309		stmf_scsilib_handle_report_tpgs(task, initial_dbuf);
3310		return;
3311	}
3312
3313	/* Report Identifying Information */
3314	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3315	    ((cdb1 & 0x1F) == 0x05)) {
3316		sbd_handle_identifying_info(task, initial_dbuf);
3317		return;
3318	}
3319
3320	if (cdb0 == SCMD_START_STOP) {			/* Start stop */
3321		task->task_cmd_xfer_length = 0;
3322		if (task->task_cdb[4] & 0xFC) {
3323			stmf_scsilib_send_status(task, STATUS_CHECK,
3324			    STMF_SAA_INVALID_FIELD_IN_CDB);
3325			return;
3326		}
3327		if (task->task_cdb[4] & 2) {
3328			stmf_scsilib_send_status(task, STATUS_CHECK,
3329			    STMF_SAA_INVALID_FIELD_IN_CDB);
3330		} else {
3331			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3332		}
3333		return;
3334
3335	}
3336
3337	if ((cdb0 == SCMD_MODE_SENSE) || (cdb0 == SCMD_MODE_SENSE_G1)) {
3338		uint8_t *p;
3339		p = kmem_zalloc(512, KM_SLEEP);
3340		sbd_handle_mode_sense(task, initial_dbuf, p);
3341		kmem_free(p, 512);
3342		return;
3343	}
3344
3345	if ((cdb0 == SCMD_MODE_SELECT) || (cdb0 == SCMD_MODE_SELECT_G1)) {
3346		sbd_handle_mode_select(task, initial_dbuf);
3347		return;
3348	}
3349
3350	if ((cdb0 == SCMD_UNMAP) && (sl->sl_flags & SL_UNMAP_ENABLED)) {
3351		sbd_handle_unmap(task, initial_dbuf);
3352		return;
3353	}
3354
3355	if ((cdb0 == SCMD_WRITE_SAME_G4) || (cdb0 == SCMD_WRITE_SAME_G1)) {
3356		sbd_handle_write_same(task, initial_dbuf);
3357		return;
3358	}
3359
3360	if (cdb0 == SCMD_TEST_UNIT_READY) {	/* Test unit ready */
3361		task->task_cmd_xfer_length = 0;
3362		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3363		return;
3364	}
3365
3366	if (cdb0 == SCMD_READ_CAPACITY) {		/* Read Capacity */
3367		sbd_handle_read_capacity(task, initial_dbuf);
3368		return;
3369	}
3370
3371	if (cdb0 == SCMD_SVC_ACTION_IN_G4) { /* Read Capacity or read long */
3372		if (cdb1 == SSVC_ACTION_READ_CAPACITY_G4) {
3373			sbd_handle_read_capacity(task, initial_dbuf);
3374			return;
3375		/*
3376		 * } else if (cdb1 == SSVC_ACTION_READ_LONG_G4) {
3377		 * 	sbd_handle_read(task, initial_dbuf);
3378		 * 	return;
3379		 */
3380		}
3381	}
3382
3383	/*
3384	 * if (cdb0 == SCMD_SVC_ACTION_OUT_G4) {
3385	 *	if (cdb1 == SSVC_ACTION_WRITE_LONG_G4) {
3386	 *		 sbd_handle_write(task, initial_dbuf);
3387	 * 		return;
3388	 *	}
3389	 * }
3390	 */
3391
3392	if (cdb0 == SCMD_VERIFY) {
3393		/*
3394		 * Something more likely needs to be done here.
3395		 */
3396		task->task_cmd_xfer_length = 0;
3397		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3398		return;
3399	}
3400
3401	if (cdb0 == SCMD_SYNCHRONIZE_CACHE ||
3402	    cdb0 == SCMD_SYNCHRONIZE_CACHE_G4) {
3403		sbd_handle_sync_cache(task, initial_dbuf);
3404		return;
3405	}
3406
3407	/*
3408	 * Write and Verify use the same path as write, but don't clutter the
3409	 * performance path above with checking for write_verify opcodes.  We
3410	 * rely on zfs's integrity checks for the "Verify" part of Write &
3411	 * Verify.  (Even if we did a read to "verify" we'd merely be reading
3412	 * cache, not actual media.)
3413	 * Therefore we
3414	 *   a) only support this if sbd_is_zvol, and
3415	 *   b) run the IO through the normal write path with a forced
3416	 *	sbd_flush_data_cache at the end.
3417	 */
3418
3419	if ((sl->sl_flags & SL_ZFS_META) && (
3420	    cdb0 == SCMD_WRITE_VERIFY ||
3421	    cdb0 == SCMD_WRITE_VERIFY_G4 ||
3422	    cdb0 == SCMD_WRITE_VERIFY_G5)) {
3423		sbd_handle_write(task, initial_dbuf);
3424		return;
3425	}
3426
3427	stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_OPCODE);
3428}
3429
3430void
3431sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf)
3432{
3433	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3434
3435	if (dbuf->db_flags & DB_LU_DATA_BUF) {
3436		/*
3437		 * Buffers passed in from the LU always complete
3438		 * even if the task is no longer active.
3439		 */
3440		ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3441		ASSERT(scmd);
3442		switch (scmd->cmd_type) {
3443		case (SBD_CMD_SCSI_READ):
3444			sbd_handle_sgl_read_xfer_completion(task, scmd, dbuf);
3445			break;
3446		case (SBD_CMD_SCSI_WRITE):
3447			sbd_handle_sgl_write_xfer_completion(task, scmd, dbuf);
3448			break;
3449		default:
3450			cmn_err(CE_PANIC, "Unknown cmd type, task = %p",
3451			    (void *)task);
3452			break;
3453		}
3454		return;
3455	}
3456
3457	if ((scmd == NULL) || ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0))
3458		return;
3459
3460	switch (scmd->cmd_type) {
3461	case (SBD_CMD_SCSI_READ):
3462		sbd_handle_read_xfer_completion(task, scmd, dbuf);
3463		break;
3464
3465	case (SBD_CMD_SCSI_WRITE):
3466		if ((task->task_cdb[0] == SCMD_WRITE_SAME_G1) ||
3467		    (task->task_cdb[0] == SCMD_WRITE_SAME_G4)) {
3468			sbd_handle_write_same_xfer_completion(task, scmd, dbuf,
3469			    1);
3470		} else {
3471			sbd_handle_write_xfer_completion(task, scmd, dbuf, 1);
3472		}
3473		break;
3474
3475	case (SBD_CMD_SMALL_READ):
3476		sbd_handle_short_read_xfer_completion(task, scmd, dbuf);
3477		break;
3478
3479	case (SBD_CMD_SMALL_WRITE):
3480		sbd_handle_short_write_xfer_completion(task, dbuf);
3481		break;
3482
3483	default:
3484		cmn_err(CE_PANIC, "Unknown cmd type, task = %p", (void *)task);
3485		break;
3486	}
3487}
3488
3489/* ARGSUSED */
3490void
3491sbd_send_status_done(struct scsi_task *task)
3492{
3493	cmn_err(CE_PANIC,
3494	    "sbd_send_status_done: this should not have been called");
3495}
3496
3497void
3498sbd_task_free(struct scsi_task *task)
3499{
3500	if (task->task_lu_private) {
3501		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3502		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3503			cmn_err(CE_PANIC, "cmd is active, task = %p",
3504			    (void *)task);
3505		}
3506		kmem_free(scmd, sizeof (sbd_cmd_t));
3507	}
3508}
3509
3510/*
3511 * Aborts are synchronus w.r.t. I/O AND
3512 * All the I/O which SBD does is synchronous AND
3513 * Everything within a task is single threaded.
3514 *   IT MEANS
3515 * If this function is called, we are doing nothing with this task
3516 * inside of sbd module.
3517 */
3518/* ARGSUSED */
3519stmf_status_t
3520sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags)
3521{
3522	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3523	scsi_task_t *task;
3524
3525	if (abort_cmd == STMF_LU_RESET_STATE) {
3526		return (sbd_lu_reset_state(lu));
3527	}
3528
3529	if (abort_cmd == STMF_LU_ITL_HANDLE_REMOVED) {
3530		sbd_check_and_clear_scsi2_reservation(sl, (sbd_it_data_t *)arg);
3531		sbd_remove_it_handle(sl, (sbd_it_data_t *)arg);
3532		return (STMF_SUCCESS);
3533	}
3534
3535	ASSERT(abort_cmd == STMF_LU_ABORT_TASK);
3536	task = (scsi_task_t *)arg;
3537	if (task->task_lu_private) {
3538		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3539
3540		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3541			if (scmd->flags & SBD_SCSI_CMD_TRANS_DATA) {
3542				kmem_free(scmd->trans_data,
3543				    scmd->trans_data_len);
3544				scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
3545			}
3546			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
3547			return (STMF_ABORT_SUCCESS);
3548		}
3549	}
3550
3551	return (STMF_NOT_FOUND);
3552}
3553
3554/*
3555 * This function is called during task clean-up if the
3556 * DB_LU_FLAG is set on the dbuf. This should only be called for
3557 * abort processing after sbd_abort has been called for the task.
3558 */
3559void
3560sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf)
3561{
3562	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3563	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3564
3565	ASSERT(dbuf->db_lu_private);
3566	ASSERT(scmd && scmd->nbufs > 0);
3567	ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0);
3568	ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
3569	ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3570	ASSERT((curthread->t_flag & T_INTR_THREAD) == 0);
3571
3572	if (scmd->cmd_type == SBD_CMD_SCSI_READ) {
3573		sbd_zvol_rele_read_bufs(sl, dbuf);
3574	} else if (scmd->cmd_type == SBD_CMD_SCSI_WRITE) {
3575		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
3576	} else {
3577		cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p",
3578		    scmd->cmd_type, (void *)task);
3579	}
3580	if (--scmd->nbufs == 0)
3581		rw_exit(&sl->sl_access_state_lock);
3582	stmf_teardown_dbuf(task, dbuf);
3583	stmf_free(dbuf);
3584}
3585
3586/* ARGSUSED */
3587void
3588sbd_ctl(struct stmf_lu *lu, int cmd, void *arg)
3589{
3590	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3591	stmf_change_status_t st;
3592
3593	ASSERT((cmd == STMF_CMD_LU_ONLINE) ||
3594	    (cmd == STMF_CMD_LU_OFFLINE) ||
3595	    (cmd == STMF_ACK_LU_ONLINE_COMPLETE) ||
3596	    (cmd == STMF_ACK_LU_OFFLINE_COMPLETE));
3597
3598	st.st_completion_status = STMF_SUCCESS;
3599	st.st_additional_info = NULL;
3600
3601	switch (cmd) {
3602	case STMF_CMD_LU_ONLINE:
3603		if (sl->sl_state == STMF_STATE_ONLINE)
3604			st.st_completion_status = STMF_ALREADY;
3605		else if (sl->sl_state != STMF_STATE_OFFLINE)
3606			st.st_completion_status = STMF_FAILURE;
3607		if (st.st_completion_status == STMF_SUCCESS) {
3608			sl->sl_state = STMF_STATE_ONLINE;
3609			sl->sl_state_not_acked = 1;
3610		}
3611		(void) stmf_ctl(STMF_CMD_LU_ONLINE_COMPLETE, lu, &st);
3612		break;
3613
3614	case STMF_CMD_LU_OFFLINE:
3615		if (sl->sl_state == STMF_STATE_OFFLINE)
3616			st.st_completion_status = STMF_ALREADY;
3617		else if (sl->sl_state != STMF_STATE_ONLINE)
3618			st.st_completion_status = STMF_FAILURE;
3619		if (st.st_completion_status == STMF_SUCCESS) {
3620			sl->sl_flags &= ~(SL_MEDIUM_REMOVAL_PREVENTED |
3621			    SL_LU_HAS_SCSI2_RESERVATION);
3622			sl->sl_state = STMF_STATE_OFFLINE;
3623			sl->sl_state_not_acked = 1;
3624			sbd_pgr_reset(sl);
3625		}
3626		(void) stmf_ctl(STMF_CMD_LU_OFFLINE_COMPLETE, lu, &st);
3627		break;
3628
3629	case STMF_ACK_LU_ONLINE_COMPLETE:
3630		/* Fallthrough */
3631	case STMF_ACK_LU_OFFLINE_COMPLETE:
3632		sl->sl_state_not_acked = 0;
3633		break;
3634
3635	}
3636}
3637
3638/* ARGSUSED */
3639stmf_status_t
3640sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg, uint8_t *buf,
3641    uint32_t *bufsizep)
3642{
3643	return (STMF_NOT_SUPPORTED);
3644}
3645
3646stmf_status_t
3647sbd_lu_reset_state(stmf_lu_t *lu)
3648{
3649	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3650
3651	mutex_enter(&sl->sl_lock);
3652	if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE) {
3653		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
3654		mutex_exit(&sl->sl_lock);
3655		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3656			(void) sbd_wcd_set(1, sl);
3657		}
3658	} else {
3659		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
3660		mutex_exit(&sl->sl_lock);
3661		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3662			(void) sbd_wcd_set(0, sl);
3663		}
3664	}
3665	sbd_pgr_reset(sl);
3666	sbd_check_and_clear_scsi2_reservation(sl, NULL);
3667	if (stmf_deregister_all_lu_itl_handles(lu) != STMF_SUCCESS) {
3668		return (STMF_FAILURE);
3669	}
3670	return (STMF_SUCCESS);
3671}
3672
3673sbd_status_t
3674sbd_flush_data_cache(sbd_lu_t *sl, int fsync_done)
3675{
3676	int r = 0;
3677	int ret;
3678
3679	if (fsync_done)
3680		goto over_fsync;
3681	if ((sl->sl_data_vtype == VREG) || (sl->sl_data_vtype == VBLK)) {
3682		if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL))
3683			return (SBD_FAILURE);
3684	}
3685over_fsync:
3686	if (((sl->sl_data_vtype == VCHR) || (sl->sl_data_vtype == VBLK)) &&
3687	    ((sl->sl_flags & SL_NO_DATA_DKIOFLUSH) == 0)) {
3688		ret = VOP_IOCTL(sl->sl_data_vp, DKIOCFLUSHWRITECACHE, NULL,
3689		    FKIOCTL, kcred, &r, NULL);
3690		if ((ret == ENOTTY) || (ret == ENOTSUP)) {
3691			mutex_enter(&sl->sl_lock);
3692			sl->sl_flags |= SL_NO_DATA_DKIOFLUSH;
3693			mutex_exit(&sl->sl_lock);
3694		} else if (ret != 0) {
3695			return (SBD_FAILURE);
3696		}
3697	}
3698
3699	return (SBD_SUCCESS);
3700}
3701
3702/* ARGSUSED */
3703static void
3704sbd_handle_sync_cache(struct scsi_task *task,
3705    struct stmf_data_buf *initial_dbuf)
3706{
3707	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3708	uint64_t	lba, laddr;
3709	sbd_status_t	sret;
3710	uint32_t	len;
3711	int		is_g4 = 0;
3712	int		immed;
3713
3714	task->task_cmd_xfer_length = 0;
3715	/*
3716	 * Determine if this is a 10 or 16 byte CDB
3717	 */
3718
3719	if (task->task_cdb[0] == SCMD_SYNCHRONIZE_CACHE_G4)
3720		is_g4 = 1;
3721
3722	/*
3723	 * Determine other requested parameters
3724	 *
3725	 * We don't have a non-volatile cache, so don't care about SYNC_NV.
3726	 * Do not support the IMMED bit.
3727	 */
3728
3729	immed = (task->task_cdb[1] & 0x02);
3730
3731	if (immed) {
3732		stmf_scsilib_send_status(task, STATUS_CHECK,
3733		    STMF_SAA_INVALID_FIELD_IN_CDB);
3734		return;
3735	}
3736
3737	/*
3738	 * Check to be sure we're not being asked to sync an LBA
3739	 * that is out of range.  While checking, verify reserved fields.
3740	 */
3741
3742	if (is_g4) {
3743		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[14] ||
3744		    task->task_cdb[15]) {
3745			stmf_scsilib_send_status(task, STATUS_CHECK,
3746			    STMF_SAA_INVALID_FIELD_IN_CDB);
3747			return;
3748		}
3749
3750		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
3751		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
3752	} else {
3753		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[6] ||
3754		    task->task_cdb[9]) {
3755			stmf_scsilib_send_status(task, STATUS_CHECK,
3756			    STMF_SAA_INVALID_FIELD_IN_CDB);
3757			return;
3758		}
3759
3760		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
3761		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
3762	}
3763
3764	laddr = lba << sl->sl_data_blocksize_shift;
3765	len <<= sl->sl_data_blocksize_shift;
3766
3767	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
3768		stmf_scsilib_send_status(task, STATUS_CHECK,
3769		    STMF_SAA_LBA_OUT_OF_RANGE);
3770		return;
3771	}
3772
3773	sret = sbd_flush_data_cache(sl, 0);
3774	if (sret != SBD_SUCCESS) {
3775		stmf_scsilib_send_status(task, STATUS_CHECK,
3776		    STMF_SAA_WRITE_ERROR);
3777		return;
3778	}
3779
3780	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3781}
3782