1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24/*
25 * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
26 * Copyright (c) 2016 by Delphix. All rights reserved.
27 */
28
29/*
30 * Multiplexed I/O SCSI vHCI implementation
31 */
32
33#include <sys/conf.h>
34#include <sys/file.h>
35#include <sys/ddi.h>
36#include <sys/sunddi.h>
37#include <sys/scsi/scsi.h>
38#include <sys/scsi/impl/scsi_reset_notify.h>
39#include <sys/scsi/impl/services.h>
40#include <sys/sunmdi.h>
41#include <sys/mdi_impldefs.h>
42#include <sys/scsi/adapters/scsi_vhci.h>
43#include <sys/disp.h>
44#include <sys/byteorder.h>
45
46extern uintptr_t scsi_callback_id;
47extern ddi_dma_attr_t scsi_alloc_attr;
48
49#ifdef	DEBUG
50int	vhci_debug = VHCI_DEBUG_DEFAULT_VAL;
51#endif
52
53/* retry for the vhci_do_prout command when a not ready is returned */
54int vhci_prout_not_ready_retry = 180;
55
56/*
57 * These values are defined to support the internal retry of
58 * SCSI packets for better sense code handling.
59 */
60#define	VHCI_CMD_CMPLT	0
61#define	VHCI_CMD_RETRY	1
62#define	VHCI_CMD_ERROR	-1
63
64#define	PROPFLAGS (DDI_PROP_DONTPASS | DDI_PROP_NOTPROM)
65#define	VHCI_SCSI_PERR		0x47
66#define	VHCI_PGR_ILLEGALOP	-2
67#define	VHCI_NUM_UPDATE_TASKQ	8
68/* changed to 132 to accomodate HDS */
69
70/*
71 * Version Macros
72 */
73#define	VHCI_NAME_VERSION	"SCSI VHCI Driver"
74char		vhci_version_name[] = VHCI_NAME_VERSION;
75
76int		vhci_first_time = 0;
77clock_t		vhci_to_ticks = 0;
78int		vhci_init_wait_timeout = VHCI_INIT_WAIT_TIMEOUT;
79kcondvar_t	vhci_cv;
80kmutex_t	vhci_global_mutex;
81void		*vhci_softstate = NULL; /* for soft state */
82
83/*
84 * Flag to delay the retry of the reserve command
85 */
86int		vhci_reserve_delay = 100000;
87static int	vhci_path_quiesce_timeout = 60;
88static uchar_t	zero_key[MHIOC_RESV_KEY_SIZE];
89
90/* uscsi delay for a TRAN_BUSY */
91static int vhci_uscsi_delay = 100000;
92static int vhci_uscsi_retry_count = 180;
93/* uscsi_restart_sense timeout id in case it needs to get canceled */
94static timeout_id_t vhci_restart_timeid = 0;
95
96static int	vhci_bus_config_debug = 0;
97
98/*
99 * Bidirectional map of 'target-port' to port id <pid> for support of
100 * iostat(1M) '-Xx' and '-Yx' output.
101 */
102static kmutex_t		vhci_targetmap_mutex;
103static uint_t		vhci_targetmap_pid = 1;
104static mod_hash_t	*vhci_targetmap_bypid;	/* <pid> -> 'target-port' */
105static mod_hash_t	*vhci_targetmap_byport;	/* 'target-port' -> <pid> */
106
107/*
108 * functions exported by scsi_vhci struct cb_ops
109 */
110static int vhci_open(dev_t *, int, int, cred_t *);
111static int vhci_close(dev_t, int, int, cred_t *);
112static int vhci_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
113
114/*
115 * functions exported by scsi_vhci struct dev_ops
116 */
117static int vhci_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
118static int vhci_attach(dev_info_t *, ddi_attach_cmd_t);
119static int vhci_detach(dev_info_t *, ddi_detach_cmd_t);
120
121/*
122 * functions exported by scsi_vhci scsi_hba_tran_t transport table
123 */
124static int vhci_scsi_tgt_init(dev_info_t *, dev_info_t *,
125    scsi_hba_tran_t *, struct scsi_device *);
126static void vhci_scsi_tgt_free(dev_info_t *, dev_info_t *, scsi_hba_tran_t *,
127    struct scsi_device *);
128static int vhci_pgr_register_start(scsi_vhci_lun_t *, struct scsi_pkt *);
129static int vhci_scsi_start(struct scsi_address *, struct scsi_pkt *);
130static int vhci_scsi_abort(struct scsi_address *, struct scsi_pkt *);
131static int vhci_scsi_reset(struct scsi_address *, int);
132static int vhci_scsi_reset_target(struct scsi_address *, int level,
133    uint8_t select_path);
134static int vhci_scsi_reset_bus(struct scsi_address *);
135static int vhci_scsi_getcap(struct scsi_address *, char *, int);
136static int vhci_scsi_setcap(struct scsi_address *, char *, int, int);
137static int vhci_commoncap(struct scsi_address *, char *, int, int, int);
138static int vhci_pHCI_cap(struct scsi_address *ap, char *cap, int val, int whom,
139    mdi_pathinfo_t *pip);
140static struct scsi_pkt *vhci_scsi_init_pkt(struct scsi_address *,
141    struct scsi_pkt *, struct buf *, int, int, int, int, int (*)(), caddr_t);
142static void vhci_scsi_destroy_pkt(struct scsi_address *, struct scsi_pkt *);
143static void vhci_scsi_dmafree(struct scsi_address *, struct scsi_pkt *);
144static void vhci_scsi_sync_pkt(struct scsi_address *, struct scsi_pkt *);
145static int vhci_scsi_reset_notify(struct scsi_address *, int, void (*)(caddr_t),
146    caddr_t);
147static int vhci_scsi_get_bus_addr(struct scsi_device *, char *, int);
148static int vhci_scsi_get_name(struct scsi_device *, char *, int);
149static int vhci_scsi_bus_power(dev_info_t *, void *, pm_bus_power_op_t,
150    void *, void *);
151static int vhci_scsi_bus_config(dev_info_t *, uint_t, ddi_bus_config_op_t,
152    void *, dev_info_t **);
153static int vhci_scsi_bus_unconfig(dev_info_t *, uint_t, ddi_bus_config_op_t,
154    void *);
155static struct scsi_failover_ops *vhci_dev_fo(dev_info_t *, struct scsi_device *,
156    void **, char **);
157
158/*
159 * functions registered with the mpxio framework via mdi_vhci_ops_t
160 */
161static int vhci_pathinfo_init(dev_info_t *, mdi_pathinfo_t *, int);
162static int vhci_pathinfo_uninit(dev_info_t *, mdi_pathinfo_t *, int);
163static int vhci_pathinfo_state_change(dev_info_t *, mdi_pathinfo_t *,
164    mdi_pathinfo_state_t, uint32_t, int);
165static int vhci_pathinfo_online(dev_info_t *, mdi_pathinfo_t *, int);
166static int vhci_pathinfo_offline(dev_info_t *, mdi_pathinfo_t *, int);
167static int vhci_failover(dev_info_t *, dev_info_t *, int);
168static void vhci_client_attached(dev_info_t *);
169static int vhci_is_dev_supported(dev_info_t *, dev_info_t *, void *);
170
171static int vhci_ctl(dev_t, int, intptr_t, int, cred_t *, int *);
172static int vhci_devctl(dev_t, int, intptr_t, int, cred_t *, int *);
173static int vhci_ioc_get_phci_path(sv_iocdata_t *, caddr_t, int, caddr_t);
174static int vhci_ioc_get_client_path(sv_iocdata_t *, caddr_t, int, caddr_t);
175static int vhci_ioc_get_paddr(sv_iocdata_t *, caddr_t, int, caddr_t);
176static int vhci_ioc_send_client_path(caddr_t, sv_iocdata_t *, int, caddr_t);
177static void vhci_ioc_devi_to_path(dev_info_t *, caddr_t);
178static int vhci_get_phci_path_list(dev_info_t *, sv_path_info_t *, uint_t);
179static int vhci_get_client_path_list(dev_info_t *, sv_path_info_t *, uint_t);
180static int vhci_get_iocdata(const void *, sv_iocdata_t *, int, caddr_t);
181static int vhci_get_iocswitchdata(const void *, sv_switch_to_cntlr_iocdata_t *,
182    int, caddr_t);
183static int vhci_ioc_alloc_pathinfo(sv_path_info_t **, sv_path_info_t **,
184    uint_t, sv_iocdata_t *, int, caddr_t);
185static void vhci_ioc_free_pathinfo(sv_path_info_t *, sv_path_info_t *, uint_t);
186static int vhci_ioc_send_pathinfo(sv_path_info_t *, sv_path_info_t *, uint_t,
187    sv_iocdata_t *, int, caddr_t);
188static int vhci_handle_ext_fo(struct scsi_pkt *, int);
189static int vhci_efo_watch_cb(caddr_t, struct scsi_watch_result *);
190static int vhci_quiesce_lun(struct scsi_vhci_lun *);
191static int vhci_pgr_validate_and_register(scsi_vhci_priv_t *);
192static void vhci_dispatch_scsi_start(void *);
193static void vhci_efo_done(void *);
194static void vhci_initiate_auto_failback(void *);
195static void vhci_update_pHCI_pkt(struct vhci_pkt *, struct scsi_pkt *);
196static int vhci_update_pathinfo(struct scsi_device *, mdi_pathinfo_t *,
197    struct scsi_failover_ops *, scsi_vhci_lun_t *, struct scsi_vhci *);
198static void vhci_kstat_create_pathinfo(mdi_pathinfo_t *);
199static int vhci_quiesce_paths(dev_info_t *, dev_info_t *,
200    scsi_vhci_lun_t *, char *, char *);
201
202static char *vhci_devnm_to_guid(char *);
203static int vhci_bind_transport(struct scsi_address *, struct vhci_pkt *,
204    int, int (*func)(caddr_t));
205static void vhci_intr(struct scsi_pkt *);
206static int vhci_do_prout(scsi_vhci_priv_t *);
207static void vhci_run_cmd(void *);
208static int vhci_do_prin(struct vhci_pkt **);
209static struct scsi_pkt *vhci_create_retry_pkt(struct vhci_pkt *);
210static struct vhci_pkt *vhci_sync_retry_pkt(struct vhci_pkt *);
211static struct scsi_vhci_lun *vhci_lun_lookup(dev_info_t *);
212static struct scsi_vhci_lun *vhci_lun_lookup_alloc(dev_info_t *, char *, int *);
213static void vhci_lun_free(struct scsi_vhci_lun *dvlp, struct scsi_device *sd);
214static int vhci_recovery_reset(scsi_vhci_lun_t *, struct scsi_address *,
215    uint8_t, uint8_t);
216void vhci_update_pathstates(void *);
217
218#ifdef DEBUG
219static void vhci_print_prin_keys(vhci_prin_readkeys_t *, int);
220static void vhci_print_cdb(dev_info_t *dip, uint_t level,
221    char *title, uchar_t *cdb);
222static void vhci_clean_print(dev_info_t *dev, uint_t level,
223    char *title, uchar_t *data, int len);
224#endif
225static void vhci_print_prout_keys(scsi_vhci_lun_t *, char *);
226static void vhci_uscsi_iodone(struct scsi_pkt *pkt);
227static void vhci_invalidate_mpapi_lu(struct scsi_vhci *, scsi_vhci_lun_t *);
228
229/*
230 * MP-API related functions
231 */
232extern int vhci_mpapi_init(struct scsi_vhci *);
233extern void vhci_mpapi_add_dev_prod(struct scsi_vhci *, char *);
234extern int vhci_mpapi_ctl(dev_t, int, intptr_t, int, cred_t *, int *);
235extern void vhci_update_mpapi_data(struct scsi_vhci *,
236    scsi_vhci_lun_t *, mdi_pathinfo_t *);
237extern void* vhci_get_mpapi_item(struct scsi_vhci *, mpapi_list_header_t *,
238    uint8_t, void*);
239extern void vhci_mpapi_set_path_state(dev_info_t *, mdi_pathinfo_t *, int);
240extern int vhci_mpapi_update_tpg_acc_state_for_lu(struct scsi_vhci *,
241    scsi_vhci_lun_t *);
242
243#define	VHCI_DMA_MAX_XFER_CAP	INT_MAX
244
245#define	VHCI_MAX_PGR_RETRIES	3
246
247/*
248 * Macros for the device-type mpxio options
249 */
250#define	LOAD_BALANCE_OPTIONS		"load-balance-options"
251#define	LOGICAL_BLOCK_REGION_SIZE	"region-size"
252#define	MPXIO_OPTIONS_LIST		"device-type-mpxio-options-list"
253#define	DEVICE_TYPE_STR			"device-type"
254#define	isdigit(ch)			((ch) >= '0' && (ch) <= '9')
255
256static struct cb_ops vhci_cb_ops = {
257	vhci_open,			/* open */
258	vhci_close,			/* close */
259	nodev,				/* strategy */
260	nodev,				/* print */
261	nodev,				/* dump */
262	nodev,				/* read */
263	nodev,				/* write */
264	vhci_ioctl,			/* ioctl */
265	nodev,				/* devmap */
266	nodev,				/* mmap */
267	nodev,				/* segmap */
268	nochpoll,			/* chpoll */
269	ddi_prop_op,			/* cb_prop_op */
270	0,				/* streamtab */
271	D_NEW | D_MP,			/* cb_flag */
272	CB_REV,				/* rev */
273	nodev,				/* aread */
274	nodev				/* awrite */
275};
276
277static struct dev_ops vhci_ops = {
278	DEVO_REV,
279	0,
280	vhci_getinfo,
281	nulldev,		/* identify */
282	nulldev,		/* probe */
283	vhci_attach,		/* attach and detach are mandatory */
284	vhci_detach,
285	nodev,			/* reset */
286	&vhci_cb_ops,		/* cb_ops */
287	NULL,			/* bus_ops */
288	NULL,			/* power */
289	ddi_quiesce_not_needed,	/* quiesce */
290};
291
292extern struct mod_ops mod_driverops;
293
294static struct modldrv modldrv = {
295	&mod_driverops,
296	vhci_version_name,	/* module name */
297	&vhci_ops
298};
299
300static struct modlinkage modlinkage = {
301	MODREV_1,
302	&modldrv,
303	NULL
304};
305
306static mdi_vhci_ops_t vhci_opinfo = {
307	MDI_VHCI_OPS_REV,
308	vhci_pathinfo_init,		/* Pathinfo node init callback */
309	vhci_pathinfo_uninit,		/* Pathinfo uninit callback */
310	vhci_pathinfo_state_change,	/* Pathinfo node state change */
311	vhci_failover,			/* failover callback */
312	vhci_client_attached,		/* client attached callback	*/
313	vhci_is_dev_supported		/* is device supported by mdi */
314};
315
316/*
317 * The scsi_failover table defines an ordered set of 'fops' modules supported
318 * by scsi_vhci.  Currently, initialize this table from the 'ddi-forceload'
319 * property specified in scsi_vhci.conf.
320 */
321static struct scsi_failover {
322	ddi_modhandle_t			sf_mod;
323	struct scsi_failover_ops	*sf_sfo;
324} *scsi_failover_table;
325static uint_t	scsi_nfailover;
326
327int
328_init(void)
329{
330	int	rval;
331
332	/*
333	 * Allocate soft state and prepare to do ddi_soft_state_zalloc()
334	 * before registering with the transport first.
335	 */
336	if ((rval = ddi_soft_state_init(&vhci_softstate,
337	    sizeof (struct scsi_vhci), 1)) != 0) {
338		VHCI_DEBUG(1, (CE_NOTE, NULL,
339		    "!_init:soft state init failed\n"));
340		return (rval);
341	}
342
343	if ((rval = scsi_hba_init(&modlinkage)) != 0) {
344		VHCI_DEBUG(1, (CE_NOTE, NULL,
345		    "!_init: scsi hba init failed\n"));
346		ddi_soft_state_fini(&vhci_softstate);
347		return (rval);
348	}
349
350	mutex_init(&vhci_global_mutex, NULL, MUTEX_DRIVER, NULL);
351	cv_init(&vhci_cv, NULL, CV_DRIVER, NULL);
352
353	mutex_init(&vhci_targetmap_mutex, NULL, MUTEX_DRIVER, NULL);
354	vhci_targetmap_byport = mod_hash_create_strhash(
355	    "vhci_targetmap_byport", 256, mod_hash_null_valdtor);
356	vhci_targetmap_bypid = mod_hash_create_idhash(
357	    "vhci_targetmap_bypid", 256, mod_hash_null_valdtor);
358
359	if ((rval = mod_install(&modlinkage)) != 0) {
360		VHCI_DEBUG(1, (CE_NOTE, NULL, "!_init: mod_install failed\n"));
361		if (vhci_targetmap_bypid)
362			mod_hash_destroy_idhash(vhci_targetmap_bypid);
363		if (vhci_targetmap_byport)
364			mod_hash_destroy_strhash(vhci_targetmap_byport);
365		mutex_destroy(&vhci_targetmap_mutex);
366		cv_destroy(&vhci_cv);
367		mutex_destroy(&vhci_global_mutex);
368		scsi_hba_fini(&modlinkage);
369		ddi_soft_state_fini(&vhci_softstate);
370	}
371	return (rval);
372}
373
374
375/*
376 * the system is done with us as a driver, so clean up
377 */
378int
379_fini(void)
380{
381	int rval;
382
383	/*
384	 * don't start cleaning up until we know that the module remove
385	 * has worked  -- if this works, then we know that each instance
386	 * has successfully been DDI_DETACHed
387	 */
388	if ((rval = mod_remove(&modlinkage)) != 0) {
389		VHCI_DEBUG(4, (CE_NOTE, NULL, "!_fini: mod_remove failed\n"));
390		return (rval);
391	}
392
393	if (vhci_targetmap_bypid)
394		mod_hash_destroy_idhash(vhci_targetmap_bypid);
395	if (vhci_targetmap_byport)
396		mod_hash_destroy_strhash(vhci_targetmap_byport);
397	mutex_destroy(&vhci_targetmap_mutex);
398	cv_destroy(&vhci_cv);
399	mutex_destroy(&vhci_global_mutex);
400	scsi_hba_fini(&modlinkage);
401	ddi_soft_state_fini(&vhci_softstate);
402
403	return (rval);
404}
405
406int
407_info(struct modinfo *modinfop)
408{
409	return (mod_info(&modlinkage, modinfop));
410}
411
412/*
413 * Lookup scsi_failover by "short name" of failover module.
414 */
415struct scsi_failover_ops *
416vhci_failover_ops_by_name(char *name)
417{
418	struct scsi_failover	*sf;
419
420	for (sf = scsi_failover_table; sf->sf_mod; sf++) {
421		if (sf->sf_sfo == NULL)
422			continue;
423		if (strcmp(sf->sf_sfo->sfo_name, name) == 0)
424			return (sf->sf_sfo);
425	}
426	return (NULL);
427}
428
429/*
430 * Load all scsi_failover_ops 'fops' modules.
431 */
432static void
433vhci_failover_modopen(struct scsi_vhci *vhci)
434{
435	char			**module;
436	int			i;
437	struct scsi_failover	*sf;
438	char			**dt;
439	int			e;
440
441	if (scsi_failover_table)
442		return;
443
444	/* Get the list of modules from scsi_vhci.conf */
445	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY,
446	    vhci->vhci_dip, DDI_PROP_DONTPASS, "ddi-forceload",
447	    &module, &scsi_nfailover) != DDI_PROP_SUCCESS) {
448		cmn_err(CE_WARN, "scsi_vhci: "
449		    "scsi_vhci.conf is missing 'ddi-forceload'");
450		return;
451	}
452	if (scsi_nfailover == 0) {
453		cmn_err(CE_WARN, "scsi_vhci: "
454		    "scsi_vhci.conf has empty 'ddi-forceload'");
455		ddi_prop_free(module);
456		return;
457	}
458
459	/* allocate failover table based on number of modules */
460	scsi_failover_table = (struct scsi_failover *)
461	    kmem_zalloc(sizeof (struct scsi_failover) * (scsi_nfailover + 1),
462	    KM_SLEEP);
463
464	/* loop over modules specified in scsi_vhci.conf and open each module */
465	for (i = 0, sf = scsi_failover_table; i < scsi_nfailover; i++) {
466		if (module[i] == NULL)
467			continue;
468
469		sf->sf_mod = ddi_modopen(module[i], KRTLD_MODE_FIRST, &e);
470		if (sf->sf_mod == NULL) {
471			/*
472			 * A module returns EEXIST if other software is
473			 * supporting the intended function: for example
474			 * the scsi_vhci_f_sum_emc module returns EEXIST
475			 * from _init if EMC powerpath software is installed.
476			 */
477			if (e != EEXIST)
478				cmn_err(CE_WARN, "scsi_vhci: unable to open "
479				    "module '%s', error %d", module[i], e);
480			continue;
481		}
482		sf->sf_sfo = ddi_modsym(sf->sf_mod,
483		    "scsi_vhci_failover_ops", &e);
484		if (sf->sf_sfo == NULL) {
485			cmn_err(CE_WARN, "scsi_vhci: "
486			    "unable to import 'scsi_failover_ops' from '%s', "
487			    "error %d", module[i], e);
488			(void) ddi_modclose(sf->sf_mod);
489			sf->sf_mod = NULL;
490			continue;
491		}
492
493		/* register vid/pid of devices supported with mpapi */
494		for (dt = sf->sf_sfo->sfo_devices; *dt; dt++)
495			vhci_mpapi_add_dev_prod(vhci, *dt);
496		sf++;
497	}
498
499	/* verify that at least the "well-known" modules were there */
500	if (vhci_failover_ops_by_name(SFO_NAME_SYM) == NULL)
501		cmn_err(CE_WARN, "scsi_vhci: well-known module \""
502		    SFO_NAME_SYM "\" not defined in scsi_vhci.conf's "
503		    "'ddi-forceload'");
504	if (vhci_failover_ops_by_name(SFO_NAME_TPGS) == NULL)
505		cmn_err(CE_WARN, "scsi_vhci: well-known module \""
506		    SFO_NAME_TPGS "\" not defined in scsi_vhci.conf's "
507		    "'ddi-forceload'");
508
509	/* call sfo_init for modules that need it */
510	for (sf = scsi_failover_table; sf->sf_mod; sf++) {
511		if (sf->sf_sfo && sf->sf_sfo->sfo_init)
512			sf->sf_sfo->sfo_init();
513	}
514
515	ddi_prop_free(module);
516}
517
518/*
519 * unload all loaded scsi_failover_ops modules
520 */
521static void
522vhci_failover_modclose()
523{
524	struct scsi_failover	*sf;
525
526	for (sf = scsi_failover_table; sf->sf_mod; sf++) {
527		if ((sf->sf_mod == NULL) || (sf->sf_sfo == NULL))
528			continue;
529		(void) ddi_modclose(sf->sf_mod);
530		sf->sf_mod = NULL;
531		sf->sf_sfo = NULL;
532	}
533
534	if (scsi_failover_table && scsi_nfailover)
535		kmem_free(scsi_failover_table,
536		    sizeof (struct scsi_failover) * (scsi_nfailover + 1));
537	scsi_failover_table = NULL;
538	scsi_nfailover = 0;
539}
540
541/* ARGSUSED */
542static int
543vhci_open(dev_t *devp, int flag, int otype, cred_t *credp)
544{
545	struct scsi_vhci	*vhci;
546
547	if (otype != OTYP_CHR) {
548		return (EINVAL);
549	}
550
551	vhci = ddi_get_soft_state(vhci_softstate, MINOR2INST(getminor(*devp)));
552	if (vhci == NULL) {
553		VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_open: failed ENXIO\n"));
554		return (ENXIO);
555	}
556
557	mutex_enter(&vhci->vhci_mutex);
558	if ((flag & FEXCL) && (vhci->vhci_state & VHCI_STATE_OPEN)) {
559		mutex_exit(&vhci->vhci_mutex);
560		vhci_log(CE_NOTE, vhci->vhci_dip,
561		    "!vhci%d: Already open\n", getminor(*devp));
562		return (EBUSY);
563	}
564
565	vhci->vhci_state |= VHCI_STATE_OPEN;
566	mutex_exit(&vhci->vhci_mutex);
567	return (0);
568}
569
570
571/* ARGSUSED */
572static int
573vhci_close(dev_t dev, int flag, int otype, cred_t *credp)
574{
575	struct scsi_vhci	*vhci;
576
577	if (otype != OTYP_CHR) {
578		return (EINVAL);
579	}
580
581	vhci = ddi_get_soft_state(vhci_softstate, MINOR2INST(getminor(dev)));
582	if (vhci == NULL) {
583		VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_close: failed ENXIO\n"));
584		return (ENXIO);
585	}
586
587	mutex_enter(&vhci->vhci_mutex);
588	vhci->vhci_state &= ~VHCI_STATE_OPEN;
589	mutex_exit(&vhci->vhci_mutex);
590
591	return (0);
592}
593
594/* ARGSUSED */
595static int
596vhci_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
597    cred_t *credp, int *rval)
598{
599	if (IS_DEVCTL(cmd)) {
600		return (vhci_devctl(dev, cmd, data, mode, credp, rval));
601	} else if (cmd == MP_CMD) {
602		return (vhci_mpapi_ctl(dev, cmd, data, mode, credp, rval));
603	} else {
604		return (vhci_ctl(dev, cmd, data, mode, credp, rval));
605	}
606}
607
608/*
609 * attach the module
610 */
611static int
612vhci_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
613{
614	int			rval = DDI_FAILURE;
615	int			scsi_hba_attached = 0;
616	int			vhci_attached = 0;
617	int			mutex_initted = 0;
618	int			instance;
619	struct scsi_vhci	*vhci;
620	scsi_hba_tran_t		*tran;
621	char			cache_name_buf[64];
622	char			*data;
623
624	VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_attach: cmd=0x%x\n", cmd));
625
626	instance = ddi_get_instance(dip);
627
628	switch (cmd) {
629	case DDI_ATTACH:
630		break;
631
632	case DDI_RESUME:
633	case DDI_PM_RESUME:
634		VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_attach: resume not yet"
635		    "implemented\n"));
636		return (rval);
637
638	default:
639		VHCI_DEBUG(1, (CE_NOTE, NULL,
640		    "!vhci_attach: unknown ddi command\n"));
641		return (rval);
642	}
643
644	/*
645	 * Allocate vhci data structure.
646	 */
647	if (ddi_soft_state_zalloc(vhci_softstate, instance) != DDI_SUCCESS) {
648		VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
649		    "soft state alloc failed\n"));
650		return (DDI_FAILURE);
651	}
652
653	if ((vhci = ddi_get_soft_state(vhci_softstate, instance)) == NULL) {
654		VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
655		    "bad soft state\n"));
656		ddi_soft_state_free(vhci_softstate, instance);
657		return (DDI_FAILURE);
658	}
659
660	/* Allocate packet cache */
661	(void) snprintf(cache_name_buf, sizeof (cache_name_buf),
662	    "vhci%d_cache", instance);
663
664	mutex_init(&vhci->vhci_mutex, NULL, MUTEX_DRIVER, NULL);
665	mutex_initted++;
666
667	/*
668	 * Allocate a transport structure
669	 */
670	tran = scsi_hba_tran_alloc(dip, SCSI_HBA_CANSLEEP);
671	ASSERT(tran != NULL);
672
673	vhci->vhci_tran		= tran;
674	vhci->vhci_dip		= dip;
675	vhci->vhci_instance	= instance;
676
677	tran->tran_hba_private	= vhci;
678	tran->tran_tgt_init	= vhci_scsi_tgt_init;
679	tran->tran_tgt_probe	= NULL;
680	tran->tran_tgt_free	= vhci_scsi_tgt_free;
681
682	tran->tran_start	= vhci_scsi_start;
683	tran->tran_abort	= vhci_scsi_abort;
684	tran->tran_reset	= vhci_scsi_reset;
685	tran->tran_getcap	= vhci_scsi_getcap;
686	tran->tran_setcap	= vhci_scsi_setcap;
687	tran->tran_init_pkt	= vhci_scsi_init_pkt;
688	tran->tran_destroy_pkt	= vhci_scsi_destroy_pkt;
689	tran->tran_dmafree	= vhci_scsi_dmafree;
690	tran->tran_sync_pkt	= vhci_scsi_sync_pkt;
691	tran->tran_reset_notify = vhci_scsi_reset_notify;
692
693	tran->tran_get_bus_addr	= vhci_scsi_get_bus_addr;
694	tran->tran_get_name	= vhci_scsi_get_name;
695	tran->tran_bus_reset	= NULL;
696	tran->tran_quiesce	= NULL;
697	tran->tran_unquiesce	= NULL;
698
699	/*
700	 * register event notification routines with scsa
701	 */
702	tran->tran_get_eventcookie = NULL;
703	tran->tran_add_eventcall = NULL;
704	tran->tran_remove_eventcall = NULL;
705	tran->tran_post_event	= NULL;
706
707	tran->tran_bus_power	= vhci_scsi_bus_power;
708
709	tran->tran_bus_config	= vhci_scsi_bus_config;
710	tran->tran_bus_unconfig	= vhci_scsi_bus_unconfig;
711
712	/*
713	 * Attach this instance with the mpxio framework
714	 */
715	if (mdi_vhci_register(MDI_HCI_CLASS_SCSI, dip, &vhci_opinfo, 0)
716	    != MDI_SUCCESS) {
717		VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
718		    "mdi_vhci_register failed\n"));
719		goto attach_fail;
720	}
721	vhci_attached++;
722
723	/*
724	 * Attach this instance of the hba.
725	 *
726	 * Regarding dma attributes: Since scsi_vhci is a virtual scsi HBA
727	 * driver, it has nothing to do with DMA. However, when calling
728	 * scsi_hba_attach_setup() we need to pass something valid in the
729	 * dma attributes parameter. So we just use scsi_alloc_attr.
730	 * SCSA itself seems to care only for dma_attr_minxfer and
731	 * dma_attr_burstsizes fields of dma attributes structure.
732	 * It expects those fileds to be non-zero.
733	 */
734	if (scsi_hba_attach_setup(dip, &scsi_alloc_attr, tran,
735	    SCSI_HBA_ADDR_COMPLEX) != DDI_SUCCESS) {
736		VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
737		    "hba attach failed\n"));
738		goto attach_fail;
739	}
740	scsi_hba_attached++;
741
742	if (ddi_create_minor_node(dip, "devctl", S_IFCHR,
743	    INST2DEVCTL(instance), DDI_NT_SCSI_NEXUS, 0) != DDI_SUCCESS) {
744		VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
745		    " ddi_create_minor_node failed\n"));
746		goto attach_fail;
747	}
748
749	/*
750	 * Set pm-want-child-notification property for
751	 * power management of the phci and client
752	 */
753	if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
754	    "pm-want-child-notification?", NULL, 0) != DDI_PROP_SUCCESS) {
755		cmn_err(CE_WARN,
756		    "%s%d fail to create pm-want-child-notification? prop",
757		    ddi_driver_name(dip), ddi_get_instance(dip));
758		goto attach_fail;
759	}
760
761	vhci->vhci_taskq = taskq_create("vhci_taskq", 1, MINCLSYSPRI, 1, 4, 0);
762	vhci->vhci_update_pathstates_taskq =
763	    taskq_create("vhci_update_pathstates", VHCI_NUM_UPDATE_TASKQ,
764	    MINCLSYSPRI, 1, 4, 0);
765	ASSERT(vhci->vhci_taskq);
766	ASSERT(vhci->vhci_update_pathstates_taskq);
767
768	/*
769	 * Set appropriate configuration flags based on options set in
770	 * conf file.
771	 */
772	vhci->vhci_conf_flags = 0;
773	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, PROPFLAGS,
774	    "auto-failback", &data) == DDI_SUCCESS) {
775		if (strcmp(data, "enable") == 0)
776			vhci->vhci_conf_flags |= VHCI_CONF_FLAGS_AUTO_FAILBACK;
777		ddi_prop_free(data);
778	}
779
780	if (!(vhci->vhci_conf_flags & VHCI_CONF_FLAGS_AUTO_FAILBACK))
781		vhci_log(CE_NOTE, dip, "!Auto-failback capability "
782		    "disabled through scsi_vhci.conf file.");
783
784	/*
785	 * Allocate an mpapi private structure
786	 */
787	vhci->mp_priv = kmem_zalloc(sizeof (mpapi_priv_t), KM_SLEEP);
788	if (vhci_mpapi_init(vhci) != 0) {
789		VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_attach: "
790		    "vhci_mpapi_init() failed"));
791	}
792
793	vhci_failover_modopen(vhci);		/* load failover modules */
794
795	ddi_report_dev(dip);
796	return (DDI_SUCCESS);
797
798attach_fail:
799	if (vhci_attached)
800		(void) mdi_vhci_unregister(dip, 0);
801
802	if (scsi_hba_attached)
803		(void) scsi_hba_detach(dip);
804
805	if (vhci->vhci_tran)
806		scsi_hba_tran_free(vhci->vhci_tran);
807
808	if (mutex_initted) {
809		mutex_destroy(&vhci->vhci_mutex);
810	}
811
812	ddi_soft_state_free(vhci_softstate, instance);
813	return (DDI_FAILURE);
814}
815
816
817/*ARGSUSED*/
818static int
819vhci_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
820{
821	int			instance = ddi_get_instance(dip);
822	scsi_hba_tran_t		*tran;
823	struct scsi_vhci	*vhci;
824
825	VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_detach: cmd=0x%x\n", cmd));
826
827	if ((tran = ddi_get_driver_private(dip)) == NULL)
828		return (DDI_FAILURE);
829
830	vhci = TRAN2HBAPRIVATE(tran);
831	if (!vhci) {
832		return (DDI_FAILURE);
833	}
834
835	switch (cmd) {
836	case DDI_DETACH:
837		break;
838
839	case DDI_SUSPEND:
840	case DDI_PM_SUSPEND:
841		VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_detach: suspend/pm not yet"
842		    "implemented\n"));
843		return (DDI_FAILURE);
844
845	default:
846		VHCI_DEBUG(1, (CE_NOTE, NULL,
847		    "!vhci_detach: unknown ddi command\n"));
848		return (DDI_FAILURE);
849	}
850
851	(void) mdi_vhci_unregister(dip, 0);
852	(void) scsi_hba_detach(dip);
853	scsi_hba_tran_free(tran);
854
855	if (ddi_prop_remove(DDI_DEV_T_NONE, dip,
856	    "pm-want-child-notification?") != DDI_PROP_SUCCESS) {
857		cmn_err(CE_WARN,
858		    "%s%d unable to remove prop pm-want_child_notification?",
859		    ddi_driver_name(dip), ddi_get_instance(dip));
860	}
861	if (vhci_restart_timeid != 0) {
862		(void) untimeout(vhci_restart_timeid);
863	}
864	vhci_restart_timeid = 0;
865
866	mutex_destroy(&vhci->vhci_mutex);
867	vhci->vhci_dip = NULL;
868	vhci->vhci_tran = NULL;
869	taskq_destroy(vhci->vhci_taskq);
870	taskq_destroy(vhci->vhci_update_pathstates_taskq);
871	ddi_remove_minor_node(dip, NULL);
872	ddi_soft_state_free(vhci_softstate, instance);
873
874	vhci_failover_modclose();		/* unload failover modules */
875	return (DDI_SUCCESS);
876}
877
878/*
879 * vhci_getinfo()
880 * Given the device number, return the devinfo pointer or the
881 * instance number.
882 * Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach.
883 */
884
885/*ARGSUSED*/
886static int
887vhci_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
888{
889	struct scsi_vhci	*vhcip;
890	int			instance = MINOR2INST(getminor((dev_t)arg));
891
892	switch (cmd) {
893	case DDI_INFO_DEVT2DEVINFO:
894		vhcip = ddi_get_soft_state(vhci_softstate, instance);
895		if (vhcip != NULL)
896			*result = vhcip->vhci_dip;
897		else {
898			*result = NULL;
899			return (DDI_FAILURE);
900		}
901		break;
902
903	case DDI_INFO_DEVT2INSTANCE:
904		*result = (void *)(uintptr_t)instance;
905		break;
906
907	default:
908		return (DDI_FAILURE);
909	}
910
911	return (DDI_SUCCESS);
912}
913
914/*ARGSUSED*/
915static int
916vhci_scsi_tgt_init(dev_info_t *hba_dip, dev_info_t *tgt_dip,
917    scsi_hba_tran_t *hba_tran, struct scsi_device *sd)
918{
919	char			*guid;
920	scsi_vhci_lun_t		*vlun;
921	struct scsi_vhci	*vhci;
922	clock_t			from_ticks;
923	mdi_pathinfo_t		*pip;
924	int			rval;
925
926	ASSERT(hba_dip != NULL);
927	ASSERT(tgt_dip != NULL);
928
929	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip, PROPFLAGS,
930	    MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS) {
931		/*
932		 * This must be the .conf node without GUID property.
933		 * The node under fp already inserts a delay, so we
934		 * just return from here. We rely on this delay to have
935		 * all dips be posted to the ndi hotplug thread's newdev
936		 * list. This is necessary for the deferred attach
937		 * mechanism to work and opens() done soon after boot to
938		 * succeed.
939		 */
940		VHCI_DEBUG(4, (CE_WARN, hba_dip, "tgt_init: lun guid "
941		    "property failed"));
942		return (DDI_NOT_WELL_FORMED);
943	}
944
945	if (ndi_dev_is_persistent_node(tgt_dip) == 0) {
946		/*
947		 * This must be .conf node with the GUID property. We don't
948		 * merge property by ndi_merge_node() here  because the
949		 * devi_addr_buf of .conf node is "" always according the
950		 * implementation of vhci_scsi_get_name_bus_addr().
951		 */
952		ddi_set_name_addr(tgt_dip, NULL);
953		return (DDI_FAILURE);
954	}
955
956	vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(hba_dip));
957	ASSERT(vhci != NULL);
958
959	VHCI_DEBUG(4, (CE_NOTE, hba_dip,
960	    "!tgt_init: called for %s (instance %d)\n",
961	    ddi_driver_name(tgt_dip), ddi_get_instance(tgt_dip)));
962
963	vlun = vhci_lun_lookup(tgt_dip);
964
965	mutex_enter(&vhci_global_mutex);
966
967	from_ticks = ddi_get_lbolt();
968	if (vhci_to_ticks == 0) {
969		vhci_to_ticks = from_ticks +
970		    drv_usectohz(vhci_init_wait_timeout);
971	}
972
973#if DEBUG
974	if (vlun) {
975		VHCI_DEBUG(1, (CE_WARN, hba_dip, "tgt_init: "
976		    "vhci_scsi_tgt_init: guid %s : found vlun 0x%p "
977		    "from_ticks %lx to_ticks %lx",
978		    guid, (void *)vlun, from_ticks, vhci_to_ticks));
979	} else {
980		VHCI_DEBUG(1, (CE_WARN, hba_dip, "tgt_init: "
981		    "vhci_scsi_tgt_init: guid %s : vlun not found "
982		    "from_ticks %lx to_ticks %lx", guid, from_ticks,
983		    vhci_to_ticks));
984	}
985#endif
986
987	rval = mdi_select_path(tgt_dip, NULL,
988	    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH), NULL, &pip);
989	if (rval == MDI_SUCCESS) {
990		mdi_rele_path(pip);
991	}
992
993	/*
994	 * Wait for the following conditions :
995	 *	1. no vlun available yet
996	 *	2. no path established
997	 *	3. timer did not expire
998	 */
999	while ((vlun == NULL) || (mdi_client_get_path_count(tgt_dip) == 0) ||
1000	    (rval != MDI_SUCCESS)) {
1001		if (vlun && vlun->svl_not_supported) {
1002			VHCI_DEBUG(1, (CE_WARN, hba_dip, "tgt_init: "
1003			    "vlun 0x%p lun guid %s not supported!",
1004			    (void *)vlun, guid));
1005			mutex_exit(&vhci_global_mutex);
1006			ddi_prop_free(guid);
1007			return (DDI_NOT_WELL_FORMED);
1008		}
1009		if ((vhci_first_time == 0) && (from_ticks >= vhci_to_ticks)) {
1010			vhci_first_time = 1;
1011		}
1012		if (vhci_first_time == 1) {
1013			VHCI_DEBUG(1, (CE_WARN, hba_dip, "vhci_scsi_tgt_init: "
1014			    "no wait for %s. from_tick %lx, to_tick %lx",
1015			    guid, from_ticks, vhci_to_ticks));
1016			mutex_exit(&vhci_global_mutex);
1017			ddi_prop_free(guid);
1018			return (DDI_NOT_WELL_FORMED);
1019		}
1020
1021		if (cv_timedwait(&vhci_cv,
1022		    &vhci_global_mutex, vhci_to_ticks) == -1) {
1023			/* Timed out */
1024#ifdef DEBUG
1025			if (vlun == NULL) {
1026				VHCI_DEBUG(1, (CE_WARN, hba_dip,
1027				    "tgt_init: no vlun for %s!", guid));
1028			} else if (mdi_client_get_path_count(tgt_dip) == 0) {
1029				VHCI_DEBUG(1, (CE_WARN, hba_dip,
1030				    "tgt_init: client path count is "
1031				    "zero for %s!", guid));
1032			} else {
1033				VHCI_DEBUG(1, (CE_WARN, hba_dip,
1034				    "tgt_init: client path not "
1035				    "available yet for %s!", guid));
1036			}
1037#endif /* DEBUG */
1038			mutex_exit(&vhci_global_mutex);
1039			ddi_prop_free(guid);
1040			return (DDI_NOT_WELL_FORMED);
1041		}
1042		vlun = vhci_lun_lookup(tgt_dip);
1043		rval = mdi_select_path(tgt_dip, NULL,
1044		    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
1045		    NULL, &pip);
1046		if (rval == MDI_SUCCESS) {
1047			mdi_rele_path(pip);
1048		}
1049		from_ticks = ddi_get_lbolt();
1050	}
1051	mutex_exit(&vhci_global_mutex);
1052
1053	ASSERT(vlun != NULL);
1054	ddi_prop_free(guid);
1055
1056	scsi_device_hba_private_set(sd, vlun);
1057
1058	return (DDI_SUCCESS);
1059}
1060
1061/*ARGSUSED*/
1062static void
1063vhci_scsi_tgt_free(dev_info_t *hba_dip, dev_info_t *tgt_dip,
1064    scsi_hba_tran_t *hba_tran, struct scsi_device *sd)
1065{
1066	struct scsi_vhci_lun *dvlp;
1067	ASSERT(mdi_client_get_path_count(tgt_dip) <= 0);
1068	dvlp = (struct scsi_vhci_lun *)scsi_device_hba_private_get(sd);
1069	ASSERT(dvlp != NULL);
1070
1071	vhci_lun_free(dvlp, sd);
1072}
1073
1074/*
1075 * a PGR register command has started; copy the info we need
1076 */
1077int
1078vhci_pgr_register_start(scsi_vhci_lun_t *vlun, struct scsi_pkt *pkt)
1079{
1080	struct vhci_pkt		*vpkt = TGTPKT2VHCIPKT(pkt);
1081	void			*addr;
1082
1083	if (!vpkt->vpkt_tgt_init_bp)
1084		return (TRAN_BADPKT);
1085
1086	addr = bp_mapin_common(vpkt->vpkt_tgt_init_bp,
1087	    (vpkt->vpkt_flags & CFLAG_NOWAIT) ? VM_NOSLEEP : VM_SLEEP);
1088	if (addr == NULL)
1089		return (TRAN_BUSY);
1090
1091	mutex_enter(&vlun->svl_mutex);
1092
1093	vhci_print_prout_keys(vlun, "v_pgr_reg_start: before bcopy:");
1094
1095	bcopy(addr, &vlun->svl_prout, sizeof (vhci_prout_t) -
1096	    (2 * MHIOC_RESV_KEY_SIZE * sizeof (char)));
1097	bcopy(pkt->pkt_cdbp, vlun->svl_cdb, sizeof (vlun->svl_cdb));
1098
1099	vhci_print_prout_keys(vlun, "v_pgr_reg_start: after bcopy:");
1100
1101	vlun->svl_time = pkt->pkt_time;
1102	vlun->svl_bcount = vpkt->vpkt_tgt_init_bp->b_bcount;
1103	vlun->svl_first_path = vpkt->vpkt_path;
1104	mutex_exit(&vlun->svl_mutex);
1105	return (0);
1106}
1107
1108/*
1109 * Function name : vhci_scsi_start()
1110 *
1111 * Return Values : TRAN_FATAL_ERROR	- vhci has been shutdown
1112 *					  or other fatal failure
1113 *					  preventing packet transportation
1114 *		   TRAN_BUSY		- request queue is full
1115 *		   TRAN_ACCEPT		- pkt has been submitted to phci
1116 *					  (or is held in the waitQ)
1117 * Description	 : Implements SCSA's tran_start() entry point for
1118 *		   packet transport
1119 *
1120 */
1121static int
1122vhci_scsi_start(struct scsi_address *ap, struct scsi_pkt *pkt)
1123{
1124	int			rval = TRAN_ACCEPT;
1125	int			instance, held;
1126	struct scsi_vhci	*vhci = ADDR2VHCI(ap);
1127	struct scsi_vhci_lun	*vlun = ADDR2VLUN(ap);
1128	struct vhci_pkt		*vpkt = TGTPKT2VHCIPKT(pkt);
1129	int			flags = 0;
1130	scsi_vhci_priv_t	*svp, *svp_resrv;
1131	dev_info_t		*cdip;
1132	client_lb_t		lbp;
1133	int			restore_lbp = 0;
1134	/* set if pkt is SCSI-II RESERVE cmd */
1135	int			pkt_reserve_cmd = 0;
1136	int			reserve_failed = 0;
1137	int			resrv_instance = 0;
1138	mdi_pathinfo_t		*pip;
1139	struct scsi_pkt		*rel_pkt;
1140
1141	ASSERT(vhci != NULL);
1142	ASSERT(vpkt != NULL);
1143	ASSERT(vpkt->vpkt_state != VHCI_PKT_ISSUED);
1144	cdip = ADDR2DIP(ap);
1145
1146	/*
1147	 * Block IOs if LUN is held or QUIESCED for IOs.
1148	 */
1149	if ((VHCI_LUN_IS_HELD(vlun)) ||
1150	    ((vlun->svl_flags & VLUN_QUIESCED_FLG) == VLUN_QUIESCED_FLG)) {
1151		return (TRAN_BUSY);
1152	}
1153
1154	/*
1155	 * vhci_lun needs to be quiesced before SCSI-II RESERVE command
1156	 * can be issued.  This may require a cv_timedwait, which is
1157	 * dangerous to perform in an interrupt context.  So if this
1158	 * is a RESERVE command a taskq is dispatched to service it.
1159	 * This taskq shall again call vhci_scsi_start, but we shall be
1160	 * sure its not in an interrupt context.
1161	 */
1162	if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
1163	    (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
1164		if (!(vpkt->vpkt_state & VHCI_PKT_THRU_TASKQ)) {
1165			if (taskq_dispatch(vhci->vhci_taskq,
1166			    vhci_dispatch_scsi_start, (void *) vpkt,
1167			    KM_NOSLEEP) != TASKQID_INVALID) {
1168				return (TRAN_ACCEPT);
1169			} else {
1170				return (TRAN_BUSY);
1171			}
1172		}
1173
1174		/*
1175		 * Here we ensure that simultaneous SCSI-II RESERVE cmds don't
1176		 * get serviced for a lun.
1177		 */
1178		VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
1179		if (!held) {
1180			return (TRAN_BUSY);
1181		} else if ((vlun->svl_flags & VLUN_QUIESCED_FLG) ==
1182		    VLUN_QUIESCED_FLG) {
1183			VHCI_RELEASE_LUN(vlun);
1184			return (TRAN_BUSY);
1185		}
1186
1187		/*
1188		 * To ensure that no IOs occur for this LUN for the duration
1189		 * of this pkt set the VLUN_QUIESCED_FLG.
1190		 * In case this routine needs to exit on error make sure that
1191		 * this flag is cleared.
1192		 */
1193		vlun->svl_flags |= VLUN_QUIESCED_FLG;
1194		pkt_reserve_cmd = 1;
1195
1196		/*
1197		 * if this is a SCSI-II RESERVE command, set load balancing
1198		 * policy to be ALTERNATE PATH to ensure that all subsequent
1199		 * IOs are routed on the same path.  This is because if commands
1200		 * are routed across multiple paths then IOs on paths other than
1201		 * the one on which the RESERVE was executed will get a
1202		 * RESERVATION CONFLICT
1203		 */
1204		lbp = mdi_get_lb_policy(cdip);
1205		if (lbp != LOAD_BALANCE_NONE) {
1206			if (vhci_quiesce_lun(vlun) != 1) {
1207				vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1208				VHCI_RELEASE_LUN(vlun);
1209				return (TRAN_FATAL_ERROR);
1210			}
1211			vlun->svl_lb_policy_save = lbp;
1212			if (mdi_set_lb_policy(cdip, LOAD_BALANCE_NONE) !=
1213			    MDI_SUCCESS) {
1214				vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1215				VHCI_RELEASE_LUN(vlun);
1216				return (TRAN_FATAL_ERROR);
1217			}
1218			restore_lbp = 1;
1219		}
1220
1221		VHCI_DEBUG(2, (CE_NOTE, vhci->vhci_dip,
1222		    "!vhci_scsi_start: sending SCSI-2 RESERVE, vlun 0x%p, "
1223		    "svl_resrv_pip 0x%p, svl_flags: %x, lb_policy %x",
1224		    (void *)vlun, (void *)vlun->svl_resrv_pip, vlun->svl_flags,
1225		    mdi_get_lb_policy(cdip)));
1226
1227		/*
1228		 * See comments for VLUN_RESERVE_ACTIVE_FLG in scsi_vhci.h
1229		 * To narrow this window where a reserve command may be sent
1230		 * down an inactive path the path states first need to be
1231		 * updated.  Before calling vhci_update_pathstates reset
1232		 * VLUN_RESERVE_ACTIVE_FLG, just in case it was already set
1233		 * for this lun.  This shall prevent an unnecessary reset
1234		 * from being sent out.  Also remember currently reserved path
1235		 * just for a case the new reservation will go to another path.
1236		 */
1237		if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
1238			resrv_instance = mdi_pi_get_path_instance(
1239			    vlun->svl_resrv_pip);
1240		}
1241		vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
1242		vhci_update_pathstates((void *)vlun);
1243	}
1244
1245	instance = ddi_get_instance(vhci->vhci_dip);
1246
1247	/*
1248	 * If the command is PRIN with action of zero, then the cmd
1249	 * is reading PR keys which requires filtering on completion.
1250	 * Data cache sync must be guaranteed.
1251	 */
1252	if ((pkt->pkt_cdbp[0] == SCMD_PRIN) && (pkt->pkt_cdbp[1] == 0) &&
1253	    (vpkt->vpkt_org_vpkt == NULL)) {
1254		vpkt->vpkt_tgt_init_pkt_flags |= PKT_CONSISTENT;
1255	}
1256
1257	/*
1258	 * Do not defer bind for PKT_DMA_PARTIAL
1259	 */
1260	if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1261
1262		/* This is a non pkt_dma_partial case */
1263		if ((rval = vhci_bind_transport(
1264		    ap, vpkt, vpkt->vpkt_tgt_init_pkt_flags, NULL_FUNC))
1265		    != TRAN_ACCEPT) {
1266			VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1267			    "!vhci%d %x: failed to bind transport: "
1268			    "vlun 0x%p pkt_reserved %x restore_lbp %x,"
1269			    "lbp %x", instance, rval, (void *)vlun,
1270			    pkt_reserve_cmd, restore_lbp, lbp));
1271			if (restore_lbp)
1272				(void) mdi_set_lb_policy(cdip, lbp);
1273			if (pkt_reserve_cmd)
1274				vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1275			return (rval);
1276		}
1277		VHCI_DEBUG(8, (CE_NOTE, NULL,
1278		    "vhci_scsi_start: v_b_t called 0x%p\n", (void *)vpkt));
1279	}
1280	ASSERT(vpkt->vpkt_hba_pkt != NULL);
1281	ASSERT(vpkt->vpkt_path != NULL);
1282
1283	/*
1284	 * This is the chance to adjust the pHCI's pkt and other information
1285	 * from target driver's pkt.
1286	 */
1287	VHCI_DEBUG(8, (CE_NOTE, vhci->vhci_dip, "vhci_scsi_start vpkt %p\n",
1288	    (void *)vpkt));
1289	vhci_update_pHCI_pkt(vpkt, pkt);
1290
1291	if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
1292		if (vpkt->vpkt_path != vlun->svl_resrv_pip) {
1293			VHCI_DEBUG(1, (CE_WARN, vhci->vhci_dip,
1294			    "!vhci_bind: reserve flag set for vlun 0x%p, but, "
1295			    "pktpath 0x%p resrv path 0x%p differ. lb_policy %x",
1296			    (void *)vlun, (void *)vpkt->vpkt_path,
1297			    (void *)vlun->svl_resrv_pip,
1298			    mdi_get_lb_policy(cdip)));
1299			reserve_failed = 1;
1300		}
1301	}
1302
1303	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(vpkt->vpkt_path);
1304	if (svp == NULL || reserve_failed) {
1305		if (pkt_reserve_cmd) {
1306			VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1307			    "!vhci_bind returned null svp vlun 0x%p",
1308			    (void *)vlun));
1309			vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1310			if (restore_lbp)
1311				(void) mdi_set_lb_policy(cdip, lbp);
1312		}
1313pkt_cleanup:
1314		if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1315			scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1316			vpkt->vpkt_hba_pkt = NULL;
1317			if (vpkt->vpkt_path) {
1318				mdi_rele_path(vpkt->vpkt_path);
1319				vpkt->vpkt_path = NULL;
1320			}
1321		}
1322		if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
1323		    (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
1324		    ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE))) {
1325			sema_v(&vlun->svl_pgr_sema);
1326		}
1327		return (TRAN_BUSY);
1328	}
1329
1330	if ((resrv_instance != 0) && (resrv_instance !=
1331	    mdi_pi_get_path_instance(vpkt->vpkt_path))) {
1332		/*
1333		 * This is an attempt to reserve vpkt->vpkt_path.  But the
1334		 * previously reserved path referred by resrv_instance might
1335		 * still be reserved.  Hence we will send a release command
1336		 * there in order to avoid a reservation conflict.
1337		 */
1338		VHCI_DEBUG(1, (CE_NOTE, vhci->vhci_dip, "!vhci_scsi_start: "
1339		    "conflicting reservation on another path, vlun 0x%p, "
1340		    "reserved instance %d, new instance: %d, pip: 0x%p",
1341		    (void *)vlun, resrv_instance,
1342		    mdi_pi_get_path_instance(vpkt->vpkt_path),
1343		    (void *)vpkt->vpkt_path));
1344
1345		/*
1346		 * In rare cases, the path referred by resrv_instance could
1347		 * disappear in the meantime. Calling mdi_select_path() below
1348		 * is an attempt to find out if the path still exists. It also
1349		 * ensures that the path will be held when the release is sent.
1350		 */
1351		rval = mdi_select_path(cdip, NULL, MDI_SELECT_PATH_INSTANCE,
1352		    (void *)(intptr_t)resrv_instance, &pip);
1353
1354		if ((rval == MDI_SUCCESS) && (pip != NULL)) {
1355			svp_resrv = (scsi_vhci_priv_t *)
1356			    mdi_pi_get_vhci_private(pip);
1357			rel_pkt = scsi_init_pkt(&svp_resrv->svp_psd->sd_address,
1358			    NULL, NULL, CDB_GROUP0,
1359			    sizeof (struct scsi_arq_status), 0, 0, SLEEP_FUNC,
1360			    NULL);
1361
1362			if (rel_pkt == NULL) {
1363				char	*p_path;
1364
1365				/*
1366				 * This is very unlikely.
1367				 * scsi_init_pkt(SLEEP_FUNC) does not fail
1368				 * because of resources. But in theory it could
1369				 * fail for some other reason. There is not an
1370				 * easy way how to recover though. Log a warning
1371				 * and return.
1372				 */
1373				p_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1374				vhci_log(CE_WARN, vhci->vhci_dip, "!Sending "
1375				    "RELEASE(6) to %s failed, a potential "
1376				    "reservation conflict ahead.",
1377				    ddi_pathname(mdi_pi_get_phci(pip), p_path));
1378				kmem_free(p_path, MAXPATHLEN);
1379
1380				if (restore_lbp)
1381					(void) mdi_set_lb_policy(cdip, lbp);
1382
1383				/* no need to check pkt_reserve_cmd here */
1384				vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1385				return (TRAN_FATAL_ERROR);
1386			}
1387
1388			rel_pkt->pkt_cdbp[0] = SCMD_RELEASE;
1389			rel_pkt->pkt_time = 60;
1390
1391			/*
1392			 * Ignore the return value.  If it will fail
1393			 * then most likely it is no longer reserved
1394			 * anyway.
1395			 */
1396			(void) vhci_do_scsi_cmd(rel_pkt);
1397			VHCI_DEBUG(1, (CE_NOTE, NULL,
1398			    "!vhci_scsi_start: path 0x%p, issued SCSI-2"
1399			    " RELEASE\n", (void *)pip));
1400			scsi_destroy_pkt(rel_pkt);
1401			mdi_rele_path(pip);
1402		}
1403	}
1404
1405	VHCI_INCR_PATH_CMDCOUNT(svp);
1406
1407	/*
1408	 * Ensure that no other IOs raced ahead, while a RESERVE cmd was
1409	 * QUIESCING the same lun.
1410	 */
1411	if ((!pkt_reserve_cmd) &&
1412	    ((vlun->svl_flags & VLUN_QUIESCED_FLG) == VLUN_QUIESCED_FLG)) {
1413		VHCI_DECR_PATH_CMDCOUNT(svp);
1414		goto pkt_cleanup;
1415	}
1416
1417	if ((pkt->pkt_cdbp[0] == SCMD_PRIN) ||
1418	    (pkt->pkt_cdbp[0] == SCMD_PROUT)) {
1419		/*
1420		 * currently this thread only handles running PGR
1421		 * commands, so don't bother creating it unless
1422		 * something interesting is going to happen (like
1423		 * either a PGR out, or a PGR in with enough space
1424		 * to hold the keys that are getting returned)
1425		 */
1426		mutex_enter(&vlun->svl_mutex);
1427		if (((vlun->svl_flags & VLUN_TASK_D_ALIVE_FLG) == 0) &&
1428		    (pkt->pkt_cdbp[0] == SCMD_PROUT)) {
1429			vlun->svl_taskq = taskq_create("vlun_pgr_task_daemon",
1430			    1, MINCLSYSPRI, 1, 4, 0);
1431			vlun->svl_flags |= VLUN_TASK_D_ALIVE_FLG;
1432		}
1433		mutex_exit(&vlun->svl_mutex);
1434		if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
1435		    (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
1436		    ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE))) {
1437			if (rval = vhci_pgr_register_start(vlun, pkt)) {
1438				/* an error */
1439				sema_v(&vlun->svl_pgr_sema);
1440				return (rval);
1441			}
1442		}
1443	}
1444
1445	/*
1446	 * SCSI-II RESERVE cmd is not expected in polled mode.
1447	 * If this changes it needs to be handled for the polled scenario.
1448	 */
1449	flags = vpkt->vpkt_hba_pkt->pkt_flags;
1450
1451	/*
1452	 * Set the path_instance *before* sending the scsi_pkt down the path
1453	 * to mpxio's pHCI so that additional path abstractions at a pHCI
1454	 * level (like maybe iSCSI at some point in the future) can update
1455	 * the path_instance.
1456	 */
1457	if (scsi_pkt_allocated_correctly(vpkt->vpkt_hba_pkt))
1458		vpkt->vpkt_hba_pkt->pkt_path_instance =
1459		    mdi_pi_get_path_instance(vpkt->vpkt_path);
1460
1461	rval = scsi_transport(vpkt->vpkt_hba_pkt);
1462	if (rval == TRAN_ACCEPT) {
1463		if (flags & FLAG_NOINTR) {
1464			struct scsi_pkt *tpkt = vpkt->vpkt_tgt_pkt;
1465			struct scsi_pkt *pkt = vpkt->vpkt_hba_pkt;
1466
1467			ASSERT(tpkt != NULL);
1468			*(tpkt->pkt_scbp) = *(pkt->pkt_scbp);
1469			tpkt->pkt_resid = pkt->pkt_resid;
1470			tpkt->pkt_state = pkt->pkt_state;
1471			tpkt->pkt_statistics = pkt->pkt_statistics;
1472			tpkt->pkt_reason = pkt->pkt_reason;
1473
1474			if ((*(pkt->pkt_scbp) == STATUS_CHECK) &&
1475			    (pkt->pkt_state & STATE_ARQ_DONE)) {
1476				bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
1477				    vpkt->vpkt_tgt_init_scblen);
1478			}
1479
1480			VHCI_DECR_PATH_CMDCOUNT(svp);
1481			if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1482				scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1483				vpkt->vpkt_hba_pkt = NULL;
1484				if (vpkt->vpkt_path) {
1485					mdi_rele_path(vpkt->vpkt_path);
1486					vpkt->vpkt_path = NULL;
1487				}
1488			}
1489			/*
1490			 * This path will not automatically retry pkts
1491			 * internally, therefore, vpkt_org_vpkt should
1492			 * never be set.
1493			 */
1494			ASSERT(vpkt->vpkt_org_vpkt == NULL);
1495			scsi_hba_pkt_comp(tpkt);
1496		}
1497		return (rval);
1498	} else if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
1499	    (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
1500	    ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE))) {
1501		/* the command exited with bad status */
1502		sema_v(&vlun->svl_pgr_sema);
1503	} else if (vpkt->vpkt_tgt_pkt->pkt_cdbp[0] == SCMD_PRIN) {
1504		/* the command exited with bad status */
1505		sema_v(&vlun->svl_pgr_sema);
1506	} else if (pkt_reserve_cmd) {
1507		VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1508		    "!vhci_scsi_start: reserve failed vlun 0x%p",
1509		    (void *)vlun));
1510		vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1511		if (restore_lbp)
1512			(void) mdi_set_lb_policy(cdip, lbp);
1513	}
1514
1515	ASSERT(vpkt->vpkt_hba_pkt != NULL);
1516	VHCI_DECR_PATH_CMDCOUNT(svp);
1517
1518	/* Do not destroy phci packet information for PKT_DMA_PARTIAL */
1519	if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1520		scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1521		vpkt->vpkt_hba_pkt = NULL;
1522		if (vpkt->vpkt_path) {
1523			MDI_PI_ERRSTAT(vpkt->vpkt_path, MDI_PI_TRANSERR);
1524			mdi_rele_path(vpkt->vpkt_path);
1525			vpkt->vpkt_path = NULL;
1526		}
1527	}
1528	return (TRAN_BUSY);
1529}
1530
1531/*
1532 * Function name : vhci_scsi_reset()
1533 *
1534 * Return Values : 0 - reset failed
1535 *		   1 - reset succeeded
1536 */
1537
1538/* ARGSUSED */
1539static int
1540vhci_scsi_reset(struct scsi_address *ap, int level)
1541{
1542	int rval = 0;
1543
1544	cmn_err(CE_WARN, "!vhci_scsi_reset 0x%x", level);
1545	if ((level == RESET_TARGET) || (level == RESET_LUN)) {
1546		return (vhci_scsi_reset_target(ap, level, TRUE));
1547	} else if (level == RESET_ALL) {
1548		return (vhci_scsi_reset_bus(ap));
1549	}
1550
1551	return (rval);
1552}
1553
1554/*
1555 * vhci_recovery_reset:
1556 *	Issues reset to the device
1557 * Input:
1558 *	vlun - vhci lun pointer of the device
1559 *	ap - address of the device
1560 *	select_path:
1561 *		If select_path is FALSE, then the address specified in ap is
1562 *		the path on which reset will be issued.
1563 *		If select_path is TRUE, then path is obtained by calling
1564 *		mdi_select_path.
1565 *
1566 *	recovery_depth:
1567 *		Caller can specify the level of reset.
1568 *		VHCI_DEPTH_LUN -
1569 *			Issues LUN RESET if device supports lun reset.
1570 *		VHCI_DEPTH_TARGET -
1571 *			If Lun Reset fails or the device does not support
1572 *			Lun Reset, issues TARGET RESET
1573 *		VHCI_DEPTH_ALL -
1574 *			If Lun Reset fails or the device does not support
1575 *			Lun Reset, issues TARGET RESET.
1576 *			If TARGET RESET does not succeed, issues Bus Reset.
1577 */
1578
1579static int
1580vhci_recovery_reset(scsi_vhci_lun_t *vlun, struct scsi_address *ap,
1581    uint8_t select_path, uint8_t recovery_depth)
1582{
1583	int	ret = 0;
1584
1585	ASSERT(ap != NULL);
1586
1587	if (vlun && vlun->svl_support_lun_reset == 1) {
1588		ret = vhci_scsi_reset_target(ap, RESET_LUN,
1589		    select_path);
1590	}
1591
1592	recovery_depth--;
1593
1594	if ((ret == 0) && recovery_depth) {
1595		ret = vhci_scsi_reset_target(ap, RESET_TARGET,
1596		    select_path);
1597		recovery_depth--;
1598	}
1599
1600	if ((ret == 0) && recovery_depth) {
1601		(void) scsi_reset(ap, RESET_ALL);
1602	}
1603
1604	return (ret);
1605}
1606
1607/*
1608 * Note: The scsi_address passed to this routine could be the scsi_address
1609 * for the virtual device or the physical device. No assumptions should be
1610 * made in this routine about the contents of the ap structure.
1611 * Further, note that the child dip would be the dip of the ssd node regardless
1612 * of the scsi_address passed in.
1613 */
1614static int
1615vhci_scsi_reset_target(struct scsi_address *ap, int level, uint8_t select_path)
1616{
1617	dev_info_t		*vdip, *cdip;
1618	mdi_pathinfo_t		*pip = NULL;
1619	mdi_pathinfo_t		*npip = NULL;
1620	int			rval = -1;
1621	scsi_vhci_priv_t	*svp = NULL;
1622	struct scsi_address	*pap = NULL;
1623	scsi_hba_tran_t		*hba = NULL;
1624	int			sps;
1625	struct scsi_vhci	*vhci = NULL;
1626
1627	if (select_path != TRUE) {
1628		ASSERT(ap != NULL);
1629		if (level == RESET_LUN) {
1630			hba = ap->a_hba_tran;
1631			ASSERT(hba != NULL);
1632			return (hba->tran_reset(ap, RESET_LUN));
1633		}
1634		return (scsi_reset(ap, level));
1635	}
1636
1637	cdip = ADDR2DIP(ap);
1638	ASSERT(cdip != NULL);
1639	vdip = ddi_get_parent(cdip);
1640	ASSERT(vdip != NULL);
1641	vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
1642	ASSERT(vhci != NULL);
1643
1644	rval = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH, NULL, &pip);
1645	if ((rval != MDI_SUCCESS) || (pip == NULL)) {
1646		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_scsi_reset_target: "
1647		    "Unable to get a path, dip 0x%p", (void *)cdip));
1648		return (0);
1649	}
1650again:
1651	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
1652	if (svp == NULL) {
1653		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_scsi_reset_target: "
1654		    "priv is NULL, pip 0x%p", (void *)pip));
1655		mdi_rele_path(pip);
1656		return (0);
1657	}
1658
1659	if (svp->svp_psd == NULL) {
1660		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_scsi_reset_target: "
1661		    "psd is NULL, pip 0x%p, svp 0x%p",
1662		    (void *)pip, (void *)svp));
1663		mdi_rele_path(pip);
1664		return (0);
1665	}
1666
1667	pap = &svp->svp_psd->sd_address;
1668	hba = pap->a_hba_tran;
1669
1670	ASSERT(pap != NULL);
1671	ASSERT(hba != NULL);
1672
1673	if (hba->tran_reset != NULL) {
1674		if (hba->tran_reset(pap, level) == 0) {
1675			vhci_log(CE_WARN, vdip, "!%s%d: "
1676			    "path %s, reset %d failed",
1677			    ddi_driver_name(cdip), ddi_get_instance(cdip),
1678			    mdi_pi_spathname(pip), level);
1679
1680			/*
1681			 * Select next path and issue the reset, repeat
1682			 * until all paths are exhausted
1683			 */
1684			sps = mdi_select_path(cdip, NULL,
1685			    MDI_SELECT_ONLINE_PATH, pip, &npip);
1686			if ((sps != MDI_SUCCESS) || (npip == NULL)) {
1687				mdi_rele_path(pip);
1688				return (0);
1689			}
1690			mdi_rele_path(pip);
1691			pip = npip;
1692			goto again;
1693		}
1694		mdi_rele_path(pip);
1695		mutex_enter(&vhci->vhci_mutex);
1696		scsi_hba_reset_notify_callback(&vhci->vhci_mutex,
1697		    &vhci->vhci_reset_notify_listf);
1698		mutex_exit(&vhci->vhci_mutex);
1699		VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_scsi_reset_target: "
1700		    "reset %d sent down pip:%p for cdip:%p\n", level,
1701		    (void *)pip, (void *)cdip));
1702		return (1);
1703	}
1704	mdi_rele_path(pip);
1705	return (0);
1706}
1707
1708
1709/* ARGSUSED */
1710static int
1711vhci_scsi_reset_bus(struct scsi_address *ap)
1712{
1713	return (1);
1714}
1715
1716
1717/*
1718 * called by vhci_getcap and vhci_setcap to get and set (respectively)
1719 * SCSI capabilities
1720 */
1721/* ARGSUSED */
1722static int
1723vhci_commoncap(struct scsi_address *ap, char *cap,
1724    int val, int tgtonly, int doset)
1725{
1726	struct scsi_vhci		*vhci = ADDR2VHCI(ap);
1727	struct scsi_vhci_lun		*vlun = ADDR2VLUN(ap);
1728	int			cidx;
1729	int			rval = 0;
1730
1731	if (cap == (char *)0) {
1732		VHCI_DEBUG(3, (CE_WARN, vhci->vhci_dip,
1733		    "!vhci_commoncap: invalid arg"));
1734		return (rval);
1735	}
1736
1737	if (vlun == NULL) {
1738		VHCI_DEBUG(3, (CE_WARN, vhci->vhci_dip,
1739		    "!vhci_commoncap: vlun is null"));
1740		return (rval);
1741	}
1742
1743	if ((cidx = scsi_hba_lookup_capstr(cap)) == -1) {
1744		return (UNDEFINED);
1745	}
1746
1747	/*
1748	 * Process setcap request.
1749	 */
1750	if (doset) {
1751		/*
1752		 * At present, we can only set binary (0/1) values
1753		 */
1754		switch (cidx) {
1755		case SCSI_CAP_ARQ:
1756			if (val == 0) {
1757				rval = 0;
1758			} else {
1759				rval = 1;
1760			}
1761			break;
1762
1763		case SCSI_CAP_LUN_RESET:
1764			if (tgtonly == 0) {
1765				VHCI_DEBUG(1, (CE_WARN, vhci->vhci_dip,
1766				    "scsi_vhci_setcap: "
1767				    "Returning error since whom = 0"));
1768				rval = -1;
1769				break;
1770			}
1771			/*
1772			 * Set the capability accordingly.
1773			 */
1774			mutex_enter(&vlun->svl_mutex);
1775			vlun->svl_support_lun_reset = val;
1776			rval = val;
1777			mutex_exit(&vlun->svl_mutex);
1778			break;
1779
1780		case SCSI_CAP_SECTOR_SIZE:
1781			mutex_enter(&vlun->svl_mutex);
1782			vlun->svl_sector_size = val;
1783			vlun->svl_setcap_done = 1;
1784			mutex_exit(&vlun->svl_mutex);
1785			(void) vhci_pHCI_cap(ap, cap, val, tgtonly, NULL);
1786
1787			/* Always return success */
1788			rval = 1;
1789			break;
1790
1791		default:
1792			VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1793			    "!vhci_setcap: unsupported %d", cidx));
1794			rval = UNDEFINED;
1795			break;
1796		}
1797
1798		VHCI_DEBUG(6, (CE_NOTE, vhci->vhci_dip,
1799		    "!set cap: cap=%s, val/tgtonly/doset/rval = "
1800		    "0x%x/0x%x/0x%x/%d\n",
1801		    cap, val, tgtonly, doset, rval));
1802
1803	} else {
1804		/*
1805		 * Process getcap request.
1806		 */
1807		switch (cidx) {
1808		case SCSI_CAP_DMA_MAX:
1809			/*
1810			 * For X86 this capability is caught in scsi_ifgetcap().
1811			 * XXX Should this be getting the value from the pHCI?
1812			 */
1813			rval = (int)VHCI_DMA_MAX_XFER_CAP;
1814			break;
1815
1816		case SCSI_CAP_INITIATOR_ID:
1817			rval = 0x00;
1818			break;
1819
1820		case SCSI_CAP_ARQ:
1821		case SCSI_CAP_RESET_NOTIFICATION:
1822		case SCSI_CAP_TAGGED_QING:
1823			rval = 1;
1824			break;
1825
1826		case SCSI_CAP_SCSI_VERSION:
1827			rval = 3;
1828			break;
1829
1830		case SCSI_CAP_INTERCONNECT_TYPE:
1831			rval = INTERCONNECT_FABRIC;
1832			break;
1833
1834		case SCSI_CAP_LUN_RESET:
1835			/*
1836			 * scsi_vhci will always return success for LUN reset.
1837			 * When request for doing LUN reset comes
1838			 * through scsi_reset entry point, at that time attempt
1839			 * will be made to do reset through all the possible
1840			 * paths.
1841			 */
1842			mutex_enter(&vlun->svl_mutex);
1843			rval = vlun->svl_support_lun_reset;
1844			mutex_exit(&vlun->svl_mutex);
1845			VHCI_DEBUG(4, (CE_WARN, vhci->vhci_dip,
1846			    "scsi_vhci_getcap:"
1847			    "Getting the Lun reset capability %d", rval));
1848			break;
1849
1850		case SCSI_CAP_SECTOR_SIZE:
1851			mutex_enter(&vlun->svl_mutex);
1852			rval = vlun->svl_sector_size;
1853			mutex_exit(&vlun->svl_mutex);
1854			break;
1855
1856		case SCSI_CAP_CDB_LEN:
1857			rval = VHCI_SCSI_CDB_SIZE;
1858			break;
1859
1860		case SCSI_CAP_DMA_MAX_ARCH:
1861			/*
1862			 * For X86 this capability is caught in scsi_ifgetcap().
1863			 * XXX Should this be getting the value from the pHCI?
1864			 */
1865			rval = 0;
1866			break;
1867
1868		default:
1869			VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1870			    "!vhci_getcap: unsupported %d", cidx));
1871			rval = UNDEFINED;
1872			break;
1873		}
1874
1875		VHCI_DEBUG(6, (CE_NOTE, vhci->vhci_dip,
1876		    "!get cap: cap=%s, val/tgtonly/doset/rval = "
1877		    "0x%x/0x%x/0x%x/%d\n",
1878		    cap, val, tgtonly, doset, rval));
1879	}
1880	return (rval);
1881}
1882
1883
1884/*
1885 * Function name : vhci_scsi_getcap()
1886 *
1887 */
1888static int
1889vhci_scsi_getcap(struct scsi_address *ap, char *cap, int whom)
1890{
1891	return (vhci_commoncap(ap, cap, 0, whom, 0));
1892}
1893
1894static int
1895vhci_scsi_setcap(struct scsi_address *ap, char *cap, int value, int whom)
1896{
1897	return (vhci_commoncap(ap, cap, value, whom, 1));
1898}
1899
1900/*
1901 * Function name : vhci_scsi_abort()
1902 */
1903/* ARGSUSED */
1904static int
1905vhci_scsi_abort(struct scsi_address *ap, struct scsi_pkt *pkt)
1906{
1907	return (0);
1908}
1909
1910/*
1911 * Function name : vhci_scsi_init_pkt
1912 *
1913 * Return Values : pointer to scsi_pkt, or NULL
1914 */
1915/* ARGSUSED */
1916static struct scsi_pkt *
1917vhci_scsi_init_pkt(struct scsi_address *ap, struct scsi_pkt *pkt,
1918    struct buf *bp, int cmdlen, int statuslen, int tgtlen,
1919    int flags, int (*callback)(caddr_t), caddr_t arg)
1920{
1921	struct scsi_vhci	*vhci = ADDR2VHCI(ap);
1922	struct vhci_pkt		*vpkt;
1923	int			rval;
1924	int			newpkt = 0;
1925	struct scsi_pkt		*pktp;
1926
1927
1928	if (pkt == NULL) {
1929		if (cmdlen > VHCI_SCSI_CDB_SIZE) {
1930			if ((cmdlen != VHCI_SCSI_OSD_CDB_SIZE) ||
1931			    ((flags & VHCI_SCSI_OSD_PKT_FLAGS) !=
1932			    VHCI_SCSI_OSD_PKT_FLAGS)) {
1933				VHCI_DEBUG(1, (CE_NOTE, NULL,
1934				    "!init pkt: cdb size not supported\n"));
1935				return (NULL);
1936			}
1937		}
1938
1939		pktp = scsi_hba_pkt_alloc(vhci->vhci_dip,
1940		    ap, cmdlen, statuslen, tgtlen, sizeof (*vpkt), callback,
1941		    arg);
1942
1943		if (pktp == NULL) {
1944			return (NULL);
1945		}
1946
1947		/* Get the vhci's private structure */
1948		vpkt = (struct vhci_pkt *)(pktp->pkt_ha_private);
1949		ASSERT(vpkt);
1950
1951		/* Save the target driver's packet */
1952		vpkt->vpkt_tgt_pkt = pktp;
1953
1954		/*
1955		 * Save pkt_tgt_init_pkt fields if deferred binding
1956		 * is needed or for other purposes.
1957		 */
1958		vpkt->vpkt_tgt_init_pkt_flags = flags;
1959		vpkt->vpkt_flags = (callback == NULL_FUNC) ? CFLAG_NOWAIT : 0;
1960		vpkt->vpkt_state = VHCI_PKT_IDLE;
1961		vpkt->vpkt_tgt_init_cdblen = cmdlen;
1962		vpkt->vpkt_tgt_init_scblen = statuslen;
1963		newpkt = 1;
1964	} else { /* pkt not NULL */
1965		vpkt = pkt->pkt_ha_private;
1966	}
1967
1968	VHCI_DEBUG(8, (CE_NOTE, NULL, "vhci_scsi_init_pkt "
1969	    "vpkt %p flags %x\n", (void *)vpkt, flags));
1970
1971	/* Clear any stale error flags */
1972	if (bp) {
1973		bioerror(bp, 0);
1974	}
1975
1976	vpkt->vpkt_tgt_init_bp = bp;
1977
1978	if (flags & PKT_DMA_PARTIAL) {
1979
1980		/*
1981		 * Immediate binding is needed.
1982		 * Target driver may not set this flag in next invocation.
1983		 * vhci has to remember this flag was set during first
1984		 * invocation of vhci_scsi_init_pkt.
1985		 */
1986		vpkt->vpkt_flags |= CFLAG_DMA_PARTIAL;
1987	}
1988
1989	if (vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) {
1990
1991		/*
1992		 * Re-initialize some of the target driver packet state
1993		 * information.
1994		 */
1995		vpkt->vpkt_tgt_pkt->pkt_state = 0;
1996		vpkt->vpkt_tgt_pkt->pkt_statistics = 0;
1997		vpkt->vpkt_tgt_pkt->pkt_reason = 0;
1998
1999		/*
2000		 * Binding a vpkt->vpkt_path for this IO at init_time.
2001		 * If an IO error happens later, target driver will clear
2002		 * this vpkt->vpkt_path binding before re-init IO again.
2003		 */
2004		VHCI_DEBUG(8, (CE_NOTE, NULL,
2005		    "vhci_scsi_init_pkt: calling v_b_t %p, newpkt %d\n",
2006		    (void *)vpkt, newpkt));
2007		if (pkt && vpkt->vpkt_hba_pkt) {
2008			VHCI_DEBUG(4, (CE_NOTE, NULL,
2009			    "v_s_i_p calling update_pHCI_pkt resid %ld\n",
2010			    pkt->pkt_resid));
2011			vhci_update_pHCI_pkt(vpkt, pkt);
2012		}
2013		if (callback == SLEEP_FUNC) {
2014			rval = vhci_bind_transport(
2015			    ap, vpkt, flags, callback);
2016		} else {
2017			rval = vhci_bind_transport(
2018			    ap, vpkt, flags, NULL_FUNC);
2019		}
2020		VHCI_DEBUG(8, (CE_NOTE, NULL,
2021		    "vhci_scsi_init_pkt: v_b_t called 0x%p rval 0x%x\n",
2022		    (void *)vpkt, rval));
2023		if (bp) {
2024			if (rval == TRAN_FATAL_ERROR) {
2025				/*
2026				 * No paths available. Could not bind
2027				 * any pHCI. Setting EFAULT as a way
2028				 * to indicate no DMA is mapped.
2029				 */
2030				bioerror(bp, EFAULT);
2031			} else {
2032				/*
2033				 * Do not indicate any pHCI errors to
2034				 * target driver otherwise.
2035				 */
2036				bioerror(bp, 0);
2037			}
2038		}
2039		if (rval != TRAN_ACCEPT) {
2040			VHCI_DEBUG(8, (CE_NOTE, NULL,
2041			    "vhci_scsi_init_pkt: "
2042			    "v_b_t failed 0x%p newpkt %x\n",
2043			    (void *)vpkt, newpkt));
2044			if (newpkt) {
2045				scsi_hba_pkt_free(ap,
2046				    vpkt->vpkt_tgt_pkt);
2047			}
2048			return (NULL);
2049		}
2050		ASSERT(vpkt->vpkt_hba_pkt != NULL);
2051		ASSERT(vpkt->vpkt_path != NULL);
2052
2053		/* Update the resid for the target driver */
2054		vpkt->vpkt_tgt_pkt->pkt_resid =
2055		    vpkt->vpkt_hba_pkt->pkt_resid;
2056	}
2057
2058	return (vpkt->vpkt_tgt_pkt);
2059}
2060
2061/*
2062 * Function name : vhci_scsi_destroy_pkt
2063 *
2064 * Return Values : none
2065 */
2066static void
2067vhci_scsi_destroy_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
2068{
2069	struct vhci_pkt		*vpkt = (struct vhci_pkt *)pkt->pkt_ha_private;
2070
2071	VHCI_DEBUG(8, (CE_NOTE, NULL,
2072	    "vhci_scsi_destroy_pkt: vpkt 0x%p\n", (void *)vpkt));
2073
2074	vpkt->vpkt_tgt_init_pkt_flags = 0;
2075	if (vpkt->vpkt_hba_pkt) {
2076		scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
2077		vpkt->vpkt_hba_pkt = NULL;
2078	}
2079	if (vpkt->vpkt_path) {
2080		mdi_rele_path(vpkt->vpkt_path);
2081		vpkt->vpkt_path = NULL;
2082	}
2083
2084	ASSERT(vpkt->vpkt_state != VHCI_PKT_ISSUED);
2085	scsi_hba_pkt_free(ap, vpkt->vpkt_tgt_pkt);
2086}
2087
2088/*
2089 * Function name : vhci_scsi_dmafree()
2090 *
2091 * Return Values : none
2092 */
2093/*ARGSUSED*/
2094static void
2095vhci_scsi_dmafree(struct scsi_address *ap, struct scsi_pkt *pkt)
2096{
2097	struct vhci_pkt	*vpkt = (struct vhci_pkt *)pkt->pkt_ha_private;
2098
2099	VHCI_DEBUG(6, (CE_NOTE, NULL,
2100	    "vhci_scsi_dmafree: vpkt 0x%p\n", (void *)vpkt));
2101
2102	ASSERT(vpkt != NULL);
2103	if (vpkt->vpkt_hba_pkt) {
2104		scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
2105		vpkt->vpkt_hba_pkt = NULL;
2106	}
2107	if (vpkt->vpkt_path) {
2108		mdi_rele_path(vpkt->vpkt_path);
2109		vpkt->vpkt_path = NULL;
2110	}
2111}
2112
2113/*
2114 * Function name : vhci_scsi_sync_pkt()
2115 *
2116 * Return Values : none
2117 */
2118/*ARGSUSED*/
2119static void
2120vhci_scsi_sync_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
2121{
2122	struct vhci_pkt	*vpkt = (struct vhci_pkt *)pkt->pkt_ha_private;
2123
2124	ASSERT(vpkt != NULL);
2125	if (vpkt->vpkt_hba_pkt) {
2126		scsi_sync_pkt(vpkt->vpkt_hba_pkt);
2127	}
2128}
2129
2130/*
2131 * routine for reset notification setup, to register or cancel.
2132 */
2133static int
2134vhci_scsi_reset_notify(struct scsi_address *ap, int flag,
2135    void (*callback)(caddr_t), caddr_t arg)
2136{
2137	struct scsi_vhci *vhci = ADDR2VHCI(ap);
2138	return (scsi_hba_reset_notify_setup(ap, flag, callback, arg,
2139	    &vhci->vhci_mutex, &vhci->vhci_reset_notify_listf));
2140}
2141
2142static int
2143vhci_scsi_get_name_bus_addr(struct scsi_device *sd,
2144    char *name, int len, int bus_addr)
2145{
2146	dev_info_t		*cdip;
2147	char			*guid;
2148	scsi_vhci_lun_t		*vlun;
2149
2150	ASSERT(sd != NULL);
2151	ASSERT(name != NULL);
2152
2153	*name = 0;
2154	cdip = sd->sd_dev;
2155
2156	ASSERT(cdip != NULL);
2157
2158	if (mdi_component_is_client(cdip, NULL) != MDI_SUCCESS)
2159		return (1);
2160
2161	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, PROPFLAGS,
2162	    MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS)
2163		return (1);
2164
2165	/*
2166	 * Message is "sd# at scsi_vhci0: unit-address <guid>: <bus_addr>".
2167	 *	<guid>		bus_addr argument == 0
2168	 *	<bus_addr>	bus_addr argument != 0
2169	 * Since the <guid> is already provided with unit-address, we just
2170	 * provide failover module in <bus_addr> to keep output shorter.
2171	 */
2172	vlun = ADDR2VLUN(&sd->sd_address);
2173	if (bus_addr == 0) {
2174		/* report the guid:  */
2175		(void) snprintf(name, len, "g%s", guid);
2176	} else if (vlun && vlun->svl_fops_name) {
2177		/* report the name of the failover module */
2178		(void) snprintf(name, len, "%s", vlun->svl_fops_name);
2179	}
2180
2181	ddi_prop_free(guid);
2182	return (1);
2183}
2184
2185static int
2186vhci_scsi_get_bus_addr(struct scsi_device *sd, char *name, int len)
2187{
2188	return (vhci_scsi_get_name_bus_addr(sd, name, len, 1));
2189}
2190
2191static int
2192vhci_scsi_get_name(struct scsi_device *sd, char *name, int len)
2193{
2194	return (vhci_scsi_get_name_bus_addr(sd, name, len, 0));
2195}
2196
2197/*
2198 * Return a pointer to the guid part of the devnm.
2199 * devnm format is "nodename@busaddr", busaddr format is "gGUID".
2200 */
2201static char *
2202vhci_devnm_to_guid(char *devnm)
2203{
2204	char *cp = devnm;
2205
2206	if (devnm == NULL)
2207		return (NULL);
2208
2209	while (*cp != '\0' && *cp != '@')
2210		cp++;
2211	if (*cp == '@' && *(cp + 1) == 'g')
2212		return (cp + 2);
2213	return (NULL);
2214}
2215
2216static int
2217vhci_bind_transport(struct scsi_address *ap, struct vhci_pkt *vpkt, int flags,
2218    int (*func)(caddr_t))
2219{
2220	struct scsi_vhci	*vhci = ADDR2VHCI(ap);
2221	dev_info_t		*cdip = ADDR2DIP(ap);
2222	mdi_pathinfo_t		*pip = NULL;
2223	mdi_pathinfo_t		*npip = NULL;
2224	scsi_vhci_priv_t	*svp = NULL;
2225	struct scsi_device	*psd = NULL;
2226	struct scsi_address	*address = NULL;
2227	struct scsi_pkt		*pkt = NULL;
2228	int			rval = -1;
2229	int			pgr_sema_held = 0;
2230	int			held;
2231	int			mps_flag = MDI_SELECT_ONLINE_PATH;
2232	struct scsi_vhci_lun	*vlun;
2233	int			path_instance = 0;
2234
2235	vlun = ADDR2VLUN(ap);
2236	ASSERT(vlun != 0);
2237
2238	if ((vpkt->vpkt_tgt_pkt->pkt_cdbp[0] == SCMD_PROUT) &&
2239	    (((vpkt->vpkt_tgt_pkt->pkt_cdbp[1] & 0x1f) ==
2240	    VHCI_PROUT_REGISTER) ||
2241	    ((vpkt->vpkt_tgt_pkt->pkt_cdbp[1] & 0x1f) ==
2242	    VHCI_PROUT_R_AND_IGNORE))) {
2243		if (!sema_tryp(&vlun->svl_pgr_sema))
2244			return (TRAN_BUSY);
2245		pgr_sema_held = 1;
2246		if (vlun->svl_first_path != NULL) {
2247			rval = mdi_select_path(cdip, NULL,
2248			    MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH,
2249			    NULL, &pip);
2250			if ((rval != MDI_SUCCESS) || (pip == NULL)) {
2251				VHCI_DEBUG(4, (CE_NOTE, NULL,
2252				    "vhci_bind_transport: path select fail\n"));
2253			} else {
2254				npip = pip;
2255				do {
2256					if (npip == vlun->svl_first_path) {
2257						VHCI_DEBUG(4, (CE_NOTE, NULL,
2258						    "vhci_bind_transport: "
2259						    "valid first path 0x%p\n",
2260						    (void *)
2261						    vlun->svl_first_path));
2262						pip = vlun->svl_first_path;
2263						goto bind_path;
2264					}
2265					pip = npip;
2266					rval = mdi_select_path(cdip, NULL,
2267					    MDI_SELECT_ONLINE_PATH |
2268					    MDI_SELECT_STANDBY_PATH,
2269					    pip, &npip);
2270					mdi_rele_path(pip);
2271				} while ((rval == MDI_SUCCESS) &&
2272				    (npip != NULL));
2273			}
2274		}
2275
2276		if (vlun->svl_first_path) {
2277			VHCI_DEBUG(4, (CE_NOTE, NULL,
2278			    "vhci_bind_transport: invalid first path 0x%p\n",
2279			    (void *)vlun->svl_first_path));
2280			vlun->svl_first_path = NULL;
2281		}
2282	} else if (vpkt->vpkt_tgt_pkt->pkt_cdbp[0] == SCMD_PRIN) {
2283		if ((vpkt->vpkt_state & VHCI_PKT_THRU_TASKQ) == 0) {
2284			if (!sema_tryp(&vlun->svl_pgr_sema))
2285				return (TRAN_BUSY);
2286		}
2287		pgr_sema_held = 1;
2288	}
2289
2290	/*
2291	 * If the path is already bound for PKT_PARTIAL_DMA case,
2292	 * try to use the same path.
2293	 */
2294	if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) && vpkt->vpkt_path) {
2295		VHCI_DEBUG(4, (CE_NOTE, NULL,
2296		    "vhci_bind_transport: PKT_PARTIAL_DMA "
2297		    "vpkt 0x%p, path 0x%p\n",
2298		    (void *)vpkt, (void *)vpkt->vpkt_path));
2299		pip = vpkt->vpkt_path;
2300		goto bind_path;
2301	}
2302
2303	/*
2304	 * Get path_instance. Non-zero with FLAG_PKT_PATH_INSTANCE set
2305	 * indicates that mdi_select_path should be called to select a
2306	 * specific instance.
2307	 *
2308	 * NB: Condition pkt_path_instance reference on proper allocation.
2309	 */
2310	if ((vpkt->vpkt_tgt_pkt->pkt_flags & FLAG_PKT_PATH_INSTANCE) &&
2311	    scsi_pkt_allocated_correctly(vpkt->vpkt_tgt_pkt)) {
2312		path_instance = vpkt->vpkt_tgt_pkt->pkt_path_instance;
2313	}
2314
2315	/*
2316	 * If reservation is active bind the transport directly to the pip
2317	 * with the reservation.
2318	 */
2319	if (vpkt->vpkt_hba_pkt == NULL) {
2320		if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
2321			if (MDI_PI_IS_ONLINE(vlun->svl_resrv_pip)) {
2322				pip = vlun->svl_resrv_pip;
2323				mdi_hold_path(pip);
2324				vlun->svl_waiting_for_activepath = 0;
2325				rval = MDI_SUCCESS;
2326				goto bind_path;
2327			} else {
2328				if (pgr_sema_held) {
2329					sema_v(&vlun->svl_pgr_sema);
2330				}
2331				return (TRAN_BUSY);
2332			}
2333		}
2334try_again:
2335		rval = mdi_select_path(cdip, vpkt->vpkt_tgt_init_bp,
2336		    path_instance ? MDI_SELECT_PATH_INSTANCE : 0,
2337		    (void *)(intptr_t)path_instance, &pip);
2338		if (rval == MDI_BUSY) {
2339			if (pgr_sema_held) {
2340				sema_v(&vlun->svl_pgr_sema);
2341			}
2342			return (TRAN_BUSY);
2343		} else if (rval == MDI_DEVI_ONLINING) {
2344			/*
2345			 * if we are here then we are in the midst of
2346			 * an attach/probe of the client device.
2347			 * We attempt to bind to ONLINE path if available,
2348			 * else it is OK to bind to a STANDBY path (instead
2349			 * of triggering a failover) because IO associated
2350			 * with attach/probe (eg. INQUIRY, block 0 read)
2351			 * are completed by targets even on passive paths
2352			 * If no ONLINE paths available, it is important
2353			 * to set svl_waiting_for_activepath for two
2354			 * reasons: (1) avoid sense analysis in the
2355			 * "external failure detection" codepath in
2356			 * vhci_intr().  Failure to do so will result in
2357			 * infinite loop (unless an ONLINE path becomes
2358			 * available at some point) (2) avoid
2359			 * unnecessary failover (see "---Waiting For Active
2360			 * Path---" comment below).
2361			 */
2362			VHCI_DEBUG(1, (CE_NOTE, NULL, "!%p in onlining "
2363			    "state\n", (void *)cdip));
2364			pip = NULL;
2365			rval = mdi_select_path(cdip, vpkt->vpkt_tgt_init_bp,
2366			    mps_flag, NULL, &pip);
2367			if ((rval != MDI_SUCCESS) || (pip == NULL)) {
2368				if (vlun->svl_waiting_for_activepath == 0) {
2369					vlun->svl_waiting_for_activepath = 1;
2370					vlun->svl_wfa_time = gethrtime();
2371				}
2372				mps_flag |= MDI_SELECT_STANDBY_PATH;
2373				rval = mdi_select_path(cdip,
2374				    vpkt->vpkt_tgt_init_bp,
2375				    mps_flag, NULL, &pip);
2376				if ((rval != MDI_SUCCESS) || (pip == NULL)) {
2377					if (pgr_sema_held) {
2378						sema_v(&vlun->svl_pgr_sema);
2379					}
2380					return (TRAN_FATAL_ERROR);
2381				}
2382				goto bind_path;
2383			}
2384		} else if ((rval == MDI_FAILURE) ||
2385		    ((rval == MDI_NOPATH) && (path_instance))) {
2386			if (pgr_sema_held) {
2387				sema_v(&vlun->svl_pgr_sema);
2388			}
2389			return (TRAN_FATAL_ERROR);
2390		}
2391
2392		if ((pip == NULL) || (rval == MDI_NOPATH)) {
2393			while (vlun->svl_waiting_for_activepath) {
2394				/*
2395				 * ---Waiting For Active Path---
2396				 * This device was discovered across a
2397				 * passive path; lets wait for a little
2398				 * bit, hopefully an active path will
2399				 * show up obviating the need for a
2400				 * failover
2401				 */
2402				if ((gethrtime() - vlun->svl_wfa_time) >=
2403				    (60 * NANOSEC)) {
2404					vlun->svl_waiting_for_activepath = 0;
2405				} else {
2406					drv_usecwait(1000);
2407					if (vlun->svl_waiting_for_activepath
2408					    == 0) {
2409						/*
2410						 * an active path has come
2411						 * online!
2412						 */
2413						goto try_again;
2414					}
2415				}
2416			}
2417			VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
2418			if (!held) {
2419				VHCI_DEBUG(4, (CE_NOTE, NULL,
2420				    "!Lun not held\n"));
2421				if (pgr_sema_held) {
2422					sema_v(&vlun->svl_pgr_sema);
2423				}
2424				return (TRAN_BUSY);
2425			}
2426			/*
2427			 * now that the LUN is stable, one last check
2428			 * to make sure no other changes sneaked in
2429			 * (like a path coming online or a
2430			 * failover initiated by another thread)
2431			 */
2432			pip = NULL;
2433			rval = mdi_select_path(cdip, vpkt->vpkt_tgt_init_bp,
2434			    0, NULL, &pip);
2435			if (pip != NULL) {
2436				VHCI_RELEASE_LUN(vlun);
2437				vlun->svl_waiting_for_activepath = 0;
2438				goto bind_path;
2439			}
2440
2441			/*
2442			 * Check if there is an ONLINE path OR a STANDBY path
2443			 * available. If none is available, do not attempt
2444			 * to do a failover, just return a fatal error at this
2445			 * point.
2446			 */
2447			npip = NULL;
2448			rval = mdi_select_path(cdip, NULL,
2449			    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
2450			    NULL, &npip);
2451			if ((npip == NULL) || (rval != MDI_SUCCESS)) {
2452				/*
2453				 * No paths available, jus return FATAL error.
2454				 */
2455				VHCI_RELEASE_LUN(vlun);
2456				if (pgr_sema_held) {
2457					sema_v(&vlun->svl_pgr_sema);
2458				}
2459				return (TRAN_FATAL_ERROR);
2460			}
2461			mdi_rele_path(npip);
2462			if (!(vpkt->vpkt_state & VHCI_PKT_IN_FAILOVER)) {
2463				VHCI_DEBUG(1, (CE_NOTE, NULL, "!invoking "
2464				    "mdi_failover\n"));
2465				rval = mdi_failover(vhci->vhci_dip, cdip,
2466				    MDI_FAILOVER_ASYNC);
2467			} else {
2468				rval = vlun->svl_failover_status;
2469			}
2470			if (rval == MDI_FAILURE) {
2471				VHCI_RELEASE_LUN(vlun);
2472				if (pgr_sema_held) {
2473					sema_v(&vlun->svl_pgr_sema);
2474				}
2475				return (TRAN_FATAL_ERROR);
2476			} else if (rval == MDI_BUSY) {
2477				VHCI_RELEASE_LUN(vlun);
2478				if (pgr_sema_held) {
2479					sema_v(&vlun->svl_pgr_sema);
2480				}
2481				return (TRAN_BUSY);
2482			} else {
2483				if (pgr_sema_held) {
2484					sema_v(&vlun->svl_pgr_sema);
2485				}
2486				vpkt->vpkt_state |= VHCI_PKT_IN_FAILOVER;
2487				return (TRAN_BUSY);
2488			}
2489		}
2490		vlun->svl_waiting_for_activepath = 0;
2491bind_path:
2492		vpkt->vpkt_path = pip;
2493		svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
2494		ASSERT(svp != NULL);
2495
2496		psd = svp->svp_psd;
2497		ASSERT(psd != NULL);
2498		address = &psd->sd_address;
2499	} else {
2500		pkt = vpkt->vpkt_hba_pkt;
2501		address = &pkt->pkt_address;
2502	}
2503
2504	/* Verify match of specified path_instance and selected path_instance */
2505	ASSERT((path_instance == 0) ||
2506	    (path_instance == mdi_pi_get_path_instance(vpkt->vpkt_path)));
2507
2508	/*
2509	 * For PKT_PARTIAL_DMA case, call pHCI's scsi_init_pkt whenever
2510	 * target driver calls vhci_scsi_init_pkt.
2511	 */
2512	if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) &&
2513	    vpkt->vpkt_path && vpkt->vpkt_hba_pkt) {
2514		VHCI_DEBUG(4, (CE_NOTE, NULL,
2515		    "vhci_bind_transport: PKT_PARTIAL_DMA "
2516		    "vpkt 0x%p, path 0x%p hba_pkt 0x%p\n",
2517		    (void *)vpkt, (void *)vpkt->vpkt_path, (void *)pkt));
2518		pkt = vpkt->vpkt_hba_pkt;
2519		address = &pkt->pkt_address;
2520	}
2521
2522	if (pkt == NULL || (vpkt->vpkt_flags & CFLAG_DMA_PARTIAL)) {
2523		pkt = scsi_init_pkt(address, pkt,
2524		    vpkt->vpkt_tgt_init_bp, vpkt->vpkt_tgt_init_cdblen,
2525		    vpkt->vpkt_tgt_init_scblen, 0, flags, func, NULL);
2526
2527		if (pkt == NULL) {
2528			VHCI_DEBUG(4, (CE_NOTE, NULL,
2529			    "!bind transport: 0x%p 0x%p 0x%p\n",
2530			    (void *)vhci, (void *)psd, (void *)vpkt));
2531			if ((vpkt->vpkt_hba_pkt == NULL) && vpkt->vpkt_path) {
2532				MDI_PI_ERRSTAT(vpkt->vpkt_path,
2533				    MDI_PI_TRANSERR);
2534				mdi_rele_path(vpkt->vpkt_path);
2535				vpkt->vpkt_path = NULL;
2536			}
2537			if (pgr_sema_held) {
2538				sema_v(&vlun->svl_pgr_sema);
2539			}
2540			/*
2541			 * Consider it a fatal error if b_error is
2542			 * set as a result of DMA binding failure
2543			 * vs. a condition of being temporarily out of
2544			 * some resource
2545			 */
2546			if (vpkt->vpkt_tgt_init_bp == NULL ||
2547			    geterror(vpkt->vpkt_tgt_init_bp))
2548				return (TRAN_FATAL_ERROR);
2549			else
2550				return (TRAN_BUSY);
2551		}
2552	}
2553
2554	pkt->pkt_private = vpkt;
2555	vpkt->vpkt_hba_pkt = pkt;
2556	return (TRAN_ACCEPT);
2557}
2558
2559
2560/*PRINTFLIKE3*/
2561void
2562vhci_log(int level, dev_info_t *dip, const char *fmt, ...)
2563{
2564	char		buf[256];
2565	va_list		ap;
2566
2567	va_start(ap, fmt);
2568	(void) vsprintf(buf, fmt, ap);
2569	va_end(ap);
2570
2571	scsi_log(dip, "scsi_vhci", level, buf);
2572}
2573
2574/* do a PGR out with the information we've saved away */
2575static int
2576vhci_do_prout(scsi_vhci_priv_t *svp)
2577{
2578
2579	struct scsi_pkt			*new_pkt;
2580	struct buf			*bp;
2581	scsi_vhci_lun_t			*vlun = svp->svp_svl;
2582	int				rval, retry, nr_retry, ua_retry;
2583	uint8_t				*sns, skey;
2584
2585	bp = getrbuf(KM_SLEEP);
2586	bp->b_flags = B_WRITE;
2587	bp->b_resid = 0;
2588	bp->b_un.b_addr = (caddr_t)&vlun->svl_prout;
2589	bp->b_bcount = vlun->svl_bcount;
2590
2591	VHCI_INCR_PATH_CMDCOUNT(svp);
2592
2593	new_pkt = scsi_init_pkt(&svp->svp_psd->sd_address, NULL, bp,
2594	    CDB_GROUP1, sizeof (struct scsi_arq_status), 0, 0,
2595	    SLEEP_FUNC, NULL);
2596	if (new_pkt == NULL) {
2597		VHCI_DECR_PATH_CMDCOUNT(svp);
2598		freerbuf(bp);
2599		cmn_err(CE_WARN, "!vhci_do_prout: scsi_init_pkt failed");
2600		return (0);
2601	}
2602	mutex_enter(&vlun->svl_mutex);
2603	bp->b_un.b_addr = (caddr_t)&vlun->svl_prout;
2604	bp->b_bcount = vlun->svl_bcount;
2605	bcopy(vlun->svl_cdb, new_pkt->pkt_cdbp,
2606	    sizeof (vlun->svl_cdb));
2607	new_pkt->pkt_time = vlun->svl_time;
2608	mutex_exit(&vlun->svl_mutex);
2609	new_pkt->pkt_flags = FLAG_NOINTR;
2610
2611	ua_retry = nr_retry = retry = 0;
2612again:
2613	rval = vhci_do_scsi_cmd(new_pkt);
2614	if (rval != 1) {
2615		if ((new_pkt->pkt_reason == CMD_CMPLT) &&
2616		    (SCBP_C(new_pkt) == STATUS_CHECK) &&
2617		    (new_pkt->pkt_state & STATE_ARQ_DONE)) {
2618			sns = (uint8_t *)
2619			    &(((struct scsi_arq_status *)(uintptr_t)
2620			    (new_pkt->pkt_scbp))->sts_sensedata);
2621			skey = scsi_sense_key(sns);
2622			if ((skey == KEY_UNIT_ATTENTION) ||
2623			    (skey == KEY_NOT_READY)) {
2624				int max_retry;
2625				struct scsi_failover_ops *fops;
2626				fops = vlun->svl_fops;
2627				rval = fops->sfo_analyze_sense(svp->svp_psd,
2628				    sns, vlun->svl_fops_ctpriv);
2629				if (rval == SCSI_SENSE_NOT_READY) {
2630					max_retry = vhci_prout_not_ready_retry;
2631					retry = nr_retry++;
2632					delay(1 * drv_usectohz(1000000));
2633				} else {
2634					/* chk for state change and update */
2635					if (rval == SCSI_SENSE_STATE_CHANGED) {
2636						int held;
2637						VHCI_HOLD_LUN(vlun,
2638						    VH_NOSLEEP, held);
2639						if (!held) {
2640							rval = TRAN_BUSY;
2641						} else {
2642							/* chk for alua first */
2643							vhci_update_pathstates(
2644							    (void *)vlun);
2645						}
2646					}
2647					retry = ua_retry++;
2648					max_retry = VHCI_MAX_PGR_RETRIES;
2649				}
2650				if (retry < max_retry) {
2651					VHCI_DEBUG(4, (CE_WARN, NULL,
2652					    "!vhci_do_prout retry 0x%x "
2653					    "(0x%x 0x%x 0x%x)",
2654					    SCBP_C(new_pkt),
2655					    new_pkt->pkt_cdbp[0],
2656					    new_pkt->pkt_cdbp[1],
2657					    new_pkt->pkt_cdbp[2]));
2658					goto again;
2659				}
2660				rval = 0;
2661				VHCI_DEBUG(4, (CE_WARN, NULL,
2662				    "!vhci_do_prout 0x%x "
2663				    "(0x%x 0x%x 0x%x)",
2664				    SCBP_C(new_pkt),
2665				    new_pkt->pkt_cdbp[0],
2666				    new_pkt->pkt_cdbp[1],
2667				    new_pkt->pkt_cdbp[2]));
2668			} else if (skey == KEY_ILLEGAL_REQUEST)
2669				rval = VHCI_PGR_ILLEGALOP;
2670		}
2671	} else {
2672		rval = 1;
2673	}
2674	scsi_destroy_pkt(new_pkt);
2675	VHCI_DECR_PATH_CMDCOUNT(svp);
2676	freerbuf(bp);
2677	return (rval);
2678}
2679
2680static void
2681vhci_run_cmd(void *arg)
2682{
2683	struct scsi_pkt		*pkt = (struct scsi_pkt *)arg;
2684	struct scsi_pkt		*tpkt;
2685	scsi_vhci_priv_t	*svp;
2686	mdi_pathinfo_t		*pip, *npip;
2687	scsi_vhci_lun_t		*vlun;
2688	dev_info_t		*cdip;
2689	scsi_vhci_priv_t	*nsvp;
2690	int			fail = 0;
2691	int			rval;
2692	struct vhci_pkt		*vpkt;
2693	uchar_t			cdb_1;
2694	vhci_prout_t		*prout;
2695
2696	vpkt = (struct vhci_pkt *)pkt->pkt_private;
2697	tpkt = vpkt->vpkt_tgt_pkt;
2698	pip = vpkt->vpkt_path;
2699	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
2700	if (svp == NULL) {
2701		tpkt->pkt_reason = CMD_TRAN_ERR;
2702		tpkt->pkt_statistics = STAT_ABORTED;
2703		goto done;
2704	}
2705	vlun = svp->svp_svl;
2706	prout = &vlun->svl_prout;
2707	if (SCBP_C(pkt) != STATUS_GOOD)
2708		fail++;
2709	cdip = vlun->svl_dip;
2710	pip = npip = NULL;
2711	rval = mdi_select_path(cdip, NULL,
2712	    MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH, NULL, &npip);
2713	if ((rval != MDI_SUCCESS) || (npip == NULL)) {
2714		VHCI_DEBUG(4, (CE_NOTE, NULL,
2715		    "vhci_run_cmd: no path! 0x%p\n", (void *)svp));
2716		tpkt->pkt_reason = CMD_TRAN_ERR;
2717		tpkt->pkt_statistics = STAT_ABORTED;
2718		goto done;
2719	}
2720
2721	cdb_1 = vlun->svl_cdb[1];
2722	vlun->svl_cdb[1] &= 0xe0;
2723	vlun->svl_cdb[1] |= VHCI_PROUT_R_AND_IGNORE;
2724
2725	do {
2726		nsvp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(npip);
2727		if (nsvp == NULL) {
2728			VHCI_DEBUG(4, (CE_NOTE, NULL,
2729			    "vhci_run_cmd: no "
2730			    "client priv! 0x%p offlined?\n",
2731			    (void *)npip));
2732			goto next_path;
2733		}
2734		if (vlun->svl_first_path == npip) {
2735			goto next_path;
2736		} else {
2737			if (vhci_do_prout(nsvp) != 1)
2738				fail++;
2739		}
2740next_path:
2741		pip = npip;
2742		rval = mdi_select_path(cdip, NULL,
2743		    MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH,
2744		    pip, &npip);
2745		mdi_rele_path(pip);
2746	} while ((rval == MDI_SUCCESS) && (npip != NULL));
2747
2748	vlun->svl_cdb[1] = cdb_1;
2749
2750	if (fail) {
2751		VHCI_DEBUG(4, (CE_WARN, NULL, "%s%d: key registration failed, "
2752		    "couldn't be replicated on all paths",
2753		    ddi_driver_name(cdip), ddi_get_instance(cdip)));
2754		vhci_print_prout_keys(vlun, "vhci_run_cmd: ");
2755
2756		if (SCBP_C(pkt) != STATUS_GOOD) {
2757			tpkt->pkt_reason = CMD_TRAN_ERR;
2758			tpkt->pkt_statistics = STAT_ABORTED;
2759		}
2760	} else {
2761		vlun->svl_pgr_active = 1;
2762		vhci_print_prout_keys(vlun, "vhci_run_cmd: before bcopy:");
2763
2764		bcopy((const void *)prout->service_key,
2765		    (void *)prout->active_service_key, MHIOC_RESV_KEY_SIZE);
2766		bcopy((const void *)prout->res_key,
2767		    (void *)prout->active_res_key, MHIOC_RESV_KEY_SIZE);
2768
2769		vhci_print_prout_keys(vlun, "vhci_run_cmd: after bcopy:");
2770	}
2771done:
2772	if (SCBP_C(pkt) == STATUS_GOOD)
2773		vlun->svl_first_path = NULL;
2774
2775	if (svp)
2776		VHCI_DECR_PATH_CMDCOUNT(svp);
2777
2778	if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
2779		scsi_destroy_pkt(pkt);
2780		vpkt->vpkt_hba_pkt = NULL;
2781		if (vpkt->vpkt_path) {
2782			mdi_rele_path(vpkt->vpkt_path);
2783			vpkt->vpkt_path = NULL;
2784		}
2785	}
2786
2787	sema_v(&vlun->svl_pgr_sema);
2788	/*
2789	 * The PROUT commands are not included in the automatic retry
2790	 * mechanism, therefore, vpkt_org_vpkt should never be set here.
2791	 */
2792	ASSERT(vpkt->vpkt_org_vpkt == NULL);
2793	scsi_hba_pkt_comp(tpkt);
2794}
2795
2796/*
2797 * Get the keys registered with this target.  Since we will have
2798 * registered the same key with multiple initiators, strip out
2799 * any duplicate keys.
2800 *
2801 * The pointers which will be used to filter the registered keys from
2802 * the device will be stored in filter_prin and filter_pkt.  If the
2803 * allocation length of the buffer was sufficient for the number of
2804 * parameter data bytes available to be returned by the device then the
2805 * key filtering will use the keylist returned from the original
2806 * request.  If the allocation length of the buffer was not sufficient,
2807 * then the filtering will use the keylist returned from the request
2808 * that is resent below.
2809 *
2810 * If the device returns an additional length field that is greater than
2811 * the allocation length of the buffer, then allocate a new buffer which
2812 * can accommodate the number of parameter data bytes available to be
2813 * returned.  Resend the scsi PRIN command, filter out the duplicate
2814 * keys and return as many of the unique keys found that was originally
2815 * requested and set the additional length field equal to the data bytes
2816 * of unique reservation keys available to be returned.
2817 *
2818 * If the device returns an additional length field that is less than or
2819 * equal to the allocation length of the buffer, then all the available
2820 * keys registered were returned by the device.  Filter out the
2821 * duplicate keys and return all of the unique keys found and set the
2822 * additional length field equal to the data bytes of the reservation
2823 * keys to be returned.
2824 */
2825
2826#define	VHCI_PRIN_HEADER_SZ (sizeof (prin->length) + sizeof (prin->generation))
2827
2828static int
2829vhci_do_prin(struct vhci_pkt **intr_vpkt)
2830{
2831	scsi_vhci_priv_t *svp;
2832	struct vhci_pkt *vpkt = *intr_vpkt;
2833	vhci_prin_readkeys_t *prin;
2834	scsi_vhci_lun_t *vlun;
2835	struct scsi_vhci *vhci = ADDR2VHCI(&vpkt->vpkt_tgt_pkt->pkt_address);
2836
2837	struct buf		*new_bp = NULL;
2838	struct scsi_pkt		*new_pkt = NULL;
2839	struct vhci_pkt		*new_vpkt = NULL;
2840	uint32_t		needed_length;
2841	int			rval = VHCI_CMD_CMPLT;
2842	uint32_t		prin_length = 0;
2843	uint32_t		svl_prin_length = 0;
2844
2845	ASSERT(vpkt->vpkt_path);
2846	svp = mdi_pi_get_vhci_private(vpkt->vpkt_path);
2847	ASSERT(svp);
2848	vlun = svp->svp_svl;
2849	ASSERT(vlun);
2850
2851	/*
2852	 * If the caller only asked for an amount of data that would not
2853	 * be enough to include any key data it is likely that they will
2854	 * send the next command with a buffer size based on the information
2855	 * from this header. Doing recovery on this would be a duplication
2856	 * of efforts.
2857	 */
2858	if (vpkt->vpkt_tgt_init_bp->b_bcount <= VHCI_PRIN_HEADER_SZ) {
2859		rval = VHCI_CMD_CMPLT;
2860		goto exit;
2861	}
2862
2863	if (vpkt->vpkt_org_vpkt == NULL) {
2864		/*
2865		 * Can fail as sleep is not allowed.
2866		 */
2867		prin = (vhci_prin_readkeys_t *)
2868		    bp_mapin_common(vpkt->vpkt_tgt_init_bp, VM_NOSLEEP);
2869	} else {
2870		/*
2871		 * The retry buf doesn't need to be mapped in.
2872		 */
2873		prin = (vhci_prin_readkeys_t *)
2874		    vpkt->vpkt_tgt_init_bp->b_un.b_daddr;
2875	}
2876
2877	if (prin == NULL) {
2878		VHCI_DEBUG(5, (CE_WARN, NULL,
2879		    "vhci_do_prin: bp_mapin_common failed."));
2880		rval = VHCI_CMD_ERROR;
2881		goto fail;
2882	}
2883
2884	prin_length = BE_32(prin->length);
2885
2886	/*
2887	 * According to SPC-3r22, sec 4.3.4.6: "If the amount of
2888	 * information to be transferred exceeds the maximum value
2889	 * that the ALLOCATION LENGTH field is capable of specifying,
2890	 * the device server shall...terminate the command with CHECK
2891	 * CONDITION status".  The ALLOCATION LENGTH field of the
2892	 * PERSISTENT RESERVE IN command is 2 bytes. We should never
2893	 * get here with an ADDITIONAL LENGTH greater than 0xFFFF
2894	 * so if we do, then it is an error!
2895	 */
2896
2897
2898	if ((prin_length + VHCI_PRIN_HEADER_SZ) > 0xFFFF) {
2899		VHCI_DEBUG(5, (CE_NOTE, NULL,
2900		    "vhci_do_prin: Device returned invalid "
2901		    "length 0x%x\n", prin_length));
2902		rval = VHCI_CMD_ERROR;
2903		goto fail;
2904	}
2905	needed_length = prin_length + VHCI_PRIN_HEADER_SZ;
2906
2907	/*
2908	 * If prin->length is greater than the byte count allocated in the
2909	 * original buffer, then resend the request with enough buffer
2910	 * allocated to get all of the available registered keys.
2911	 */
2912	if ((vpkt->vpkt_tgt_init_bp->b_bcount < needed_length) &&
2913	    (vpkt->vpkt_org_vpkt == NULL)) {
2914
2915		new_pkt = vhci_create_retry_pkt(vpkt);
2916		if (new_pkt == NULL) {
2917			rval = VHCI_CMD_ERROR;
2918			goto fail;
2919		}
2920		new_vpkt = TGTPKT2VHCIPKT(new_pkt);
2921
2922		/*
2923		 * This is the buf with buffer pointer
2924		 * where the prin readkeys will be
2925		 * returned from the device
2926		 */
2927		new_bp = scsi_alloc_consistent_buf(&svp->svp_psd->sd_address,
2928		    NULL, needed_length, B_READ, NULL_FUNC, NULL);
2929		if ((new_bp == NULL) || (new_bp->b_un.b_addr == NULL)) {
2930			if (new_bp) {
2931				scsi_free_consistent_buf(new_bp);
2932			}
2933			vhci_scsi_destroy_pkt(&new_pkt->pkt_address, new_pkt);
2934			rval = VHCI_CMD_ERROR;
2935			goto fail;
2936		}
2937		new_bp->b_bcount = needed_length;
2938		new_pkt->pkt_cdbp[7] = (uchar_t)(needed_length >> 8);
2939		new_pkt->pkt_cdbp[8] = (uchar_t)needed_length;
2940
2941		rval = VHCI_CMD_RETRY;
2942
2943		new_vpkt->vpkt_tgt_init_bp = new_bp;
2944	}
2945
2946	if (rval == VHCI_CMD_RETRY) {
2947
2948		/*
2949		 * There were more keys then the original request asked for.
2950		 */
2951		mdi_pathinfo_t *path_holder = vpkt->vpkt_path;
2952
2953		/*
2954		 * Release the old path because it does not matter which path
2955		 * this command is sent down.  This allows the normal bind
2956		 * transport mechanism to be used.
2957		 */
2958		if (vpkt->vpkt_path != NULL) {
2959			mdi_rele_path(vpkt->vpkt_path);
2960			vpkt->vpkt_path = NULL;
2961		}
2962
2963		/*
2964		 * Dispatch the retry command
2965		 */
2966		if (taskq_dispatch(vhci->vhci_taskq, vhci_dispatch_scsi_start,
2967		    (void *) new_vpkt, KM_NOSLEEP) == TASKQID_INVALID) {
2968			if (path_holder) {
2969				vpkt->vpkt_path = path_holder;
2970				mdi_hold_path(path_holder);
2971			}
2972			scsi_free_consistent_buf(new_bp);
2973			vhci_scsi_destroy_pkt(&new_pkt->pkt_address, new_pkt);
2974			rval = VHCI_CMD_ERROR;
2975			goto fail;
2976		}
2977
2978		/*
2979		 * If we return VHCI_CMD_RETRY, that means the caller
2980		 * is going to bail and wait for the reissued command
2981		 * to complete.  In that case, we need to decrement
2982		 * the path command count right now.  In any other
2983		 * case, it'll be decremented by the caller.
2984		 */
2985		VHCI_DECR_PATH_CMDCOUNT(svp);
2986		goto exit;
2987
2988	}
2989
2990	if (rval == VHCI_CMD_CMPLT) {
2991		/*
2992		 * The original request got all of the keys or the recovery
2993		 * packet returns.
2994		 */
2995		int new;
2996		int old;
2997		int num_keys = prin_length / MHIOC_RESV_KEY_SIZE;
2998
2999		VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_do_prin: %d keys read\n",
3000		    num_keys));
3001
3002#ifdef DEBUG
3003		VHCI_DEBUG(5, (CE_NOTE, NULL, "vhci_do_prin: from storage\n"));
3004		if (vhci_debug == 5)
3005			vhci_print_prin_keys(prin, num_keys);
3006		VHCI_DEBUG(5, (CE_NOTE, NULL,
3007		    "vhci_do_prin: MPxIO old keys:\n"));
3008		if (vhci_debug == 5)
3009			vhci_print_prin_keys(&vlun->svl_prin, num_keys);
3010#endif
3011
3012		/*
3013		 * Filter out all duplicate keys returned from the device
3014		 * We know that we use a different key for every host, so we
3015		 * can simply strip out duplicates. Otherwise we would need to
3016		 * do more bookkeeping to figure out which keys to strip out.
3017		 */
3018
3019		new = 0;
3020
3021		/*
3022		 * If we got at least 1 key copy it.
3023		 */
3024		if (num_keys > 0) {
3025			vlun->svl_prin.keylist[0] = prin->keylist[0];
3026			new++;
3027		}
3028
3029		/*
3030		 * find next unique key.
3031		 */
3032		for (old = 1; old < num_keys; old++) {
3033			int j;
3034			int match = 0;
3035
3036			if (new >= VHCI_NUM_RESV_KEYS)
3037				break;
3038			for (j = 0; j < new; j++) {
3039				if (bcmp(&prin->keylist[old],
3040				    &vlun->svl_prin.keylist[j],
3041				    sizeof (mhioc_resv_key_t)) == 0) {
3042					match = 1;
3043					break;
3044				}
3045			}
3046			if (!match) {
3047				vlun->svl_prin.keylist[new] =
3048				    prin->keylist[old];
3049				new++;
3050			}
3051		}
3052
3053		/* Stored Big Endian */
3054		vlun->svl_prin.generation = prin->generation;
3055		svl_prin_length = new * sizeof (mhioc_resv_key_t);
3056		/* Stored Big Endian */
3057		vlun->svl_prin.length = BE_32(svl_prin_length);
3058		svl_prin_length += VHCI_PRIN_HEADER_SZ;
3059
3060		/*
3061		 * If we arrived at this point after issuing a retry, make sure
3062		 * that we put everything back the way it originally was so
3063		 * that the target driver can complete the command correctly.
3064		 */
3065		if (vpkt->vpkt_org_vpkt != NULL) {
3066			new_bp = vpkt->vpkt_tgt_init_bp;
3067
3068			scsi_free_consistent_buf(new_bp);
3069
3070			vpkt = vhci_sync_retry_pkt(vpkt);
3071			*intr_vpkt = vpkt;
3072
3073			/*
3074			 * Make sure the original buffer is mapped into kernel
3075			 * space before we try to copy the filtered keys into
3076			 * it.
3077			 */
3078			prin = (vhci_prin_readkeys_t *)bp_mapin_common(
3079			    vpkt->vpkt_tgt_init_bp, VM_NOSLEEP);
3080		}
3081
3082		/*
3083		 * Now copy the desired number of prin keys into the original
3084		 * target buffer.
3085		 */
3086		if (svl_prin_length <= vpkt->vpkt_tgt_init_bp->b_bcount) {
3087			/*
3088			 * It is safe to return all of the available unique
3089			 * keys
3090			 */
3091			bcopy(&vlun->svl_prin, prin, svl_prin_length);
3092		} else {
3093			/*
3094			 * Not all of the available keys were requested by the
3095			 * original command.
3096			 */
3097			bcopy(&vlun->svl_prin, prin,
3098			    vpkt->vpkt_tgt_init_bp->b_bcount);
3099		}
3100#ifdef DEBUG
3101		VHCI_DEBUG(5, (CE_NOTE, NULL,
3102		    "vhci_do_prin: To Application:\n"));
3103		if (vhci_debug == 5)
3104			vhci_print_prin_keys(prin, new);
3105		VHCI_DEBUG(5, (CE_NOTE, NULL,
3106		    "vhci_do_prin: MPxIO new keys:\n"));
3107		if (vhci_debug == 5)
3108			vhci_print_prin_keys(&vlun->svl_prin, new);
3109#endif
3110	}
3111fail:
3112	if (rval == VHCI_CMD_ERROR) {
3113		/*
3114		 * If we arrived at this point after issuing a
3115		 * retry, make sure that we put everything back
3116		 * the way it originally was so that ssd can
3117		 * complete the command correctly.
3118		 */
3119
3120		if (vpkt->vpkt_org_vpkt != NULL) {
3121			new_bp = vpkt->vpkt_tgt_init_bp;
3122			if (new_bp != NULL) {
3123				scsi_free_consistent_buf(new_bp);
3124			}
3125
3126			new_vpkt = vpkt;
3127			vpkt = vpkt->vpkt_org_vpkt;
3128
3129			vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
3130			    new_vpkt->vpkt_tgt_pkt);
3131		}
3132
3133		/*
3134		 * Mark this command completion as having an error so that
3135		 * ssd will retry the command.
3136		 */
3137
3138		vpkt->vpkt_tgt_pkt->pkt_reason = CMD_ABORTED;
3139		vpkt->vpkt_tgt_pkt->pkt_statistics |= STAT_ABORTED;
3140
3141		rval = VHCI_CMD_CMPLT;
3142	}
3143exit:
3144	/*
3145	 * Make sure that the semaphore is only released once.
3146	 */
3147	if (rval == VHCI_CMD_CMPLT) {
3148		sema_v(&vlun->svl_pgr_sema);
3149	}
3150
3151	return (rval);
3152}
3153
3154static void
3155vhci_intr(struct scsi_pkt *pkt)
3156{
3157	struct vhci_pkt		*vpkt = (struct vhci_pkt *)pkt->pkt_private;
3158	struct scsi_pkt		*tpkt;
3159	scsi_vhci_priv_t	*svp;
3160	scsi_vhci_lun_t		*vlun;
3161	int			rval, held;
3162	struct scsi_failover_ops	*fops;
3163	uint8_t			*sns, skey, asc, ascq;
3164	mdi_pathinfo_t		*lpath;
3165	static char		*timeout_err = "Command Timeout";
3166	static char		*parity_err = "Parity Error";
3167	char			*err_str = NULL;
3168	dev_info_t		*vdip, *cdip;
3169	char			*cpath;
3170
3171	ASSERT(vpkt != NULL);
3172	tpkt = vpkt->vpkt_tgt_pkt;
3173	ASSERT(tpkt != NULL);
3174	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(vpkt->vpkt_path);
3175	ASSERT(svp != NULL);
3176	vlun = svp->svp_svl;
3177	ASSERT(vlun != NULL);
3178	lpath = vpkt->vpkt_path;
3179
3180	/*
3181	 * sync up the target driver's pkt with the pkt that
3182	 * we actually used
3183	 */
3184	*(tpkt->pkt_scbp) = *(pkt->pkt_scbp);
3185	tpkt->pkt_resid = pkt->pkt_resid;
3186	tpkt->pkt_state = pkt->pkt_state;
3187	tpkt->pkt_statistics = pkt->pkt_statistics;
3188	tpkt->pkt_reason = pkt->pkt_reason;
3189
3190	/* Return path_instance information back to the target driver. */
3191	if (scsi_pkt_allocated_correctly(tpkt)) {
3192		if (scsi_pkt_allocated_correctly(pkt)) {
3193			/*
3194			 * If both packets were correctly allocated,
3195			 * return path returned by pHCI.
3196			 */
3197			tpkt->pkt_path_instance = pkt->pkt_path_instance;
3198		} else {
3199			/* Otherwise return path of pHCI we used */
3200			tpkt->pkt_path_instance =
3201			    mdi_pi_get_path_instance(lpath);
3202		}
3203	}
3204
3205	if (pkt->pkt_cdbp[0] == SCMD_PROUT &&
3206	    ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
3207	    ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE)) {
3208		if ((SCBP_C(pkt) != STATUS_GOOD) ||
3209		    (pkt->pkt_reason != CMD_CMPLT)) {
3210			sema_v(&vlun->svl_pgr_sema);
3211		}
3212	} else if (pkt->pkt_cdbp[0] == SCMD_PRIN) {
3213		if (pkt->pkt_reason != CMD_CMPLT ||
3214		    (SCBP_C(pkt) != STATUS_GOOD)) {
3215			sema_v(&vlun->svl_pgr_sema);
3216		}
3217	}
3218
3219	switch (pkt->pkt_reason) {
3220	case CMD_CMPLT:
3221		/*
3222		 * cmd completed successfully, check for scsi errors
3223		 */
3224		switch (*(pkt->pkt_scbp)) {
3225		case STATUS_CHECK:
3226			if (pkt->pkt_state & STATE_ARQ_DONE) {
3227				sns = (uint8_t *)
3228				    &(((struct scsi_arq_status *)(uintptr_t)
3229				    (pkt->pkt_scbp))->sts_sensedata);
3230				skey = scsi_sense_key(sns);
3231				asc = scsi_sense_asc(sns);
3232				ascq = scsi_sense_ascq(sns);
3233				fops = vlun->svl_fops;
3234				ASSERT(fops != NULL);
3235				VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_intr: "
3236				    "Received sns key %x  esc %x  escq %x\n",
3237				    skey, asc, ascq));
3238
3239				if (vlun->svl_waiting_for_activepath == 1) {
3240					/*
3241					 * if we are here it means we are
3242					 * in the midst of a probe/attach
3243					 * through a passive path; this
3244					 * case is exempt from sense analysis
3245					 * for detection of ext. failover
3246					 * because that would unnecessarily
3247					 * increase attach time.
3248					 */
3249					bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
3250					    vpkt->vpkt_tgt_init_scblen);
3251					break;
3252				}
3253				if (asc == VHCI_SCSI_PERR) {
3254					/*
3255					 * parity error
3256					 */
3257					err_str = parity_err;
3258					bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
3259					    vpkt->vpkt_tgt_init_scblen);
3260					break;
3261				}
3262				rval = fops->sfo_analyze_sense(svp->svp_psd,
3263				    sns, vlun->svl_fops_ctpriv);
3264				if ((rval == SCSI_SENSE_NOFAILOVER) ||
3265				    (rval == SCSI_SENSE_UNKNOWN) ||
3266				    (rval == SCSI_SENSE_NOT_READY)) {
3267					bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
3268					    vpkt->vpkt_tgt_init_scblen);
3269					break;
3270				} else if (rval == SCSI_SENSE_STATE_CHANGED) {
3271					struct scsi_vhci	*vhci;
3272					vhci = ADDR2VHCI(&tpkt->pkt_address);
3273					VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
3274					if (!held) {
3275						/*
3276						 * looks like some other thread
3277						 * has already detected this
3278						 * condition
3279						 */
3280						tpkt->pkt_state &=
3281						    ~STATE_ARQ_DONE;
3282						*(tpkt->pkt_scbp) =
3283						    STATUS_BUSY;
3284						break;
3285					}
3286					(void) taskq_dispatch(
3287					    vhci->vhci_update_pathstates_taskq,
3288					    vhci_update_pathstates,
3289					    (void *)vlun, KM_SLEEP);
3290				} else {
3291					/*
3292					 * externally initiated failover
3293					 * has occurred or is in progress
3294					 */
3295					VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
3296					if (!held) {
3297						/*
3298						 * looks like some other thread
3299						 * has already detected this
3300						 * condition
3301						 */
3302						tpkt->pkt_state &=
3303						    ~STATE_ARQ_DONE;
3304						*(tpkt->pkt_scbp) =
3305						    STATUS_BUSY;
3306						break;
3307					} else {
3308						rval = vhci_handle_ext_fo
3309						    (pkt, rval);
3310						if (rval == BUSY_RETURN) {
3311							tpkt->pkt_state &=
3312							    ~STATE_ARQ_DONE;
3313							*(tpkt->pkt_scbp) =
3314							    STATUS_BUSY;
3315							break;
3316						}
3317						bcopy(pkt->pkt_scbp,
3318						    tpkt->pkt_scbp,
3319						    vpkt->vpkt_tgt_init_scblen);
3320						break;
3321					}
3322				}
3323			}
3324			break;
3325
3326		/*
3327		 * If this is a good SCSI-II RELEASE cmd completion then restore
3328		 * the load balancing policy and reset VLUN_RESERVE_ACTIVE_FLG.
3329		 * If this is a good SCSI-II RESERVE cmd completion then set
3330		 * VLUN_RESERVE_ACTIVE_FLG.
3331		 */
3332		case STATUS_GOOD:
3333			if ((pkt->pkt_cdbp[0] == SCMD_RELEASE) ||
3334			    (pkt->pkt_cdbp[0] == SCMD_RELEASE_G1)) {
3335				(void) mdi_set_lb_policy(vlun->svl_dip,
3336				    vlun->svl_lb_policy_save);
3337				vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
3338				VHCI_DEBUG(1, (CE_WARN, NULL,
3339				    "!vhci_intr: vlun 0x%p release path 0x%p",
3340				    (void *)vlun, (void *)vpkt->vpkt_path));
3341			}
3342
3343			if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
3344			    (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
3345				vlun->svl_flags |= VLUN_RESERVE_ACTIVE_FLG;
3346				vlun->svl_resrv_pip = vpkt->vpkt_path;
3347				VHCI_DEBUG(1, (CE_WARN, NULL,
3348				    "!vhci_intr: vlun 0x%p reserved path 0x%p",
3349				    (void *)vlun, (void *)vpkt->vpkt_path));
3350			}
3351			break;
3352
3353		case STATUS_RESERVATION_CONFLICT:
3354			VHCI_DEBUG(1, (CE_WARN, NULL,
3355			    "!vhci_intr: vlun 0x%p "
3356			    "reserve conflict on path 0x%p",
3357			    (void *)vlun, (void *)vpkt->vpkt_path));
3358			/* FALLTHROUGH */
3359		default:
3360			break;
3361		}
3362
3363		/*
3364		 * Update I/O completion statistics for the path
3365		 */
3366		mdi_pi_kstat_iosupdate(vpkt->vpkt_path, vpkt->vpkt_tgt_init_bp);
3367
3368		/*
3369		 * Command completed successfully, release the dma binding and
3370		 * destroy the transport side of the packet.
3371		 */
3372		if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
3373		    (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
3374		    ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE))) {
3375			if (SCBP_C(pkt) == STATUS_GOOD) {
3376				ASSERT(vlun->svl_taskq);
3377				svp->svp_last_pkt_reason = pkt->pkt_reason;
3378				(void) taskq_dispatch(vlun->svl_taskq,
3379				    vhci_run_cmd, pkt, KM_SLEEP);
3380				return;
3381			}
3382		}
3383		if ((SCBP_C(pkt) == STATUS_GOOD) &&
3384		    (pkt->pkt_cdbp[0] == SCMD_PRIN) && vpkt->vpkt_tgt_init_bp) {
3385			/*
3386			 * If the action (value in byte 1 of the cdb) is zero,
3387			 * we're reading keys, and that's the only condition
3388			 * where we need to be concerned with filtering keys
3389			 * and potential retries.  Otherwise, we simply signal
3390			 * the semaphore and move on.
3391			 */
3392			if (pkt->pkt_cdbp[1] == 0) {
3393				/*
3394				 * If this is the completion of an internal
3395				 * retry then we need to make sure that the
3396				 * pkt and tpkt pointers are readjusted so
3397				 * the calls to scsi_destroy_pkt and pkt_comp
3398				 * below work * correctly.
3399				 */
3400				if (vpkt->vpkt_org_vpkt != NULL) {
3401					pkt = vpkt->vpkt_org_vpkt->vpkt_hba_pkt;
3402					tpkt = vpkt->vpkt_org_vpkt->
3403					    vpkt_tgt_pkt;
3404
3405					/*
3406					 * If this command was issued through
3407					 * the taskq then we need to clear
3408					 * this flag for proper processing in
3409					 * the case of a retry from the target
3410					 * driver.
3411					 */
3412					vpkt->vpkt_state &=
3413					    ~VHCI_PKT_THRU_TASKQ;
3414				}
3415
3416				/*
3417				 * if vhci_do_prin returns VHCI_CMD_CMPLT then
3418				 * vpkt will contain the address of the
3419				 * original vpkt
3420				 */
3421				if (vhci_do_prin(&vpkt) == VHCI_CMD_RETRY) {
3422					/*
3423					 * The command has been resent to get
3424					 * all the keys from the device.  Don't
3425					 * complete the command with ssd until
3426					 * the retry completes.
3427					 */
3428					return;
3429				}
3430			} else {
3431				sema_v(&vlun->svl_pgr_sema);
3432			}
3433		}
3434
3435		break;
3436
3437	case CMD_TIMEOUT:
3438		if ((pkt->pkt_statistics &
3439		    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) == 0) {
3440
3441			VHCI_DEBUG(1, (CE_NOTE, NULL,
3442			    "!scsi vhci timeout invoked\n"));
3443
3444			(void) vhci_recovery_reset(vlun, &pkt->pkt_address,
3445			    FALSE, VHCI_DEPTH_ALL);
3446		}
3447		MDI_PI_ERRSTAT(lpath, MDI_PI_TRANSERR);
3448		tpkt->pkt_statistics |= STAT_ABORTED;
3449		err_str = timeout_err;
3450		break;
3451
3452	case CMD_TRAN_ERR:
3453		/*
3454		 * This status is returned if the transport has sent the cmd
3455		 * down the link to the target and then some error occurs.
3456		 * In case of SCSI-II RESERVE cmd, we don't know if the
3457		 * reservation been accepted by the target or not, so we need
3458		 * to clear the reservation.
3459		 */
3460		if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
3461		    (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
3462			VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_intr received"
3463			    " cmd_tran_err for scsi-2 reserve cmd\n"));
3464			if (!vhci_recovery_reset(vlun, &pkt->pkt_address,
3465			    TRUE, VHCI_DEPTH_TARGET)) {
3466				VHCI_DEBUG(1, (CE_WARN, NULL,
3467				    "!vhci_intr cmd_tran_err reset failed!"));
3468			}
3469		}
3470		break;
3471
3472	case CMD_DEV_GONE:
3473		/*
3474		 * If this is the last path then report CMD_DEV_GONE to the
3475		 * target driver, otherwise report BUSY to triggger retry.
3476		 */
3477		if (vlun->svl_dip &&
3478		    (mdi_client_get_path_count(vlun->svl_dip) <= 1)) {
3479			struct scsi_vhci	*vhci;
3480			vhci = ADDR2VHCI(&tpkt->pkt_address);
3481			VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_intr received "
3482			    "cmd_dev_gone on last path\n"));
3483			(void) vhci_invalidate_mpapi_lu(vhci, vlun);
3484			break;
3485		}
3486
3487		/* Report CMD_CMPLT-with-BUSY to cause retry. */
3488		VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_intr received "
3489		    "cmd_dev_gone\n"));
3490		tpkt->pkt_reason = CMD_CMPLT;
3491		tpkt->pkt_state = STATE_GOT_BUS |
3492		    STATE_GOT_TARGET | STATE_SENT_CMD |
3493		    STATE_GOT_STATUS;
3494		*(tpkt->pkt_scbp) = STATUS_BUSY;
3495		break;
3496
3497	default:
3498		break;
3499	}
3500
3501	/*
3502	 * SCSI-II RESERVE cmd has been serviced by the lower layers clear
3503	 * the flag so the lun is not QUIESCED any longer.
3504	 * Also clear the VHCI_PKT_THRU_TASKQ flag, to ensure that if this pkt
3505	 * is retried, a taskq shall again be dispatched to service it.  Else
3506	 * it may lead to a system hang if the retry is within interrupt
3507	 * context.
3508	 */
3509	if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
3510	    (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
3511		vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
3512		vpkt->vpkt_state &= ~VHCI_PKT_THRU_TASKQ;
3513	}
3514
3515	/*
3516	 * vpkt_org_vpkt should always be NULL here if the retry command
3517	 * has been successfully processed.  If vpkt_org_vpkt != NULL at
3518	 * this point, it is an error so restore the original vpkt and
3519	 * return an error to the target driver so it can retry the
3520	 * command as appropriate.
3521	 */
3522	if (vpkt->vpkt_org_vpkt != NULL) {
3523		struct vhci_pkt *new_vpkt = vpkt;
3524		vpkt = vpkt->vpkt_org_vpkt;
3525
3526		vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
3527		    new_vpkt->vpkt_tgt_pkt);
3528
3529		/*
3530		 * Mark this command completion as having an error so that
3531		 * ssd will retry the command.
3532		 */
3533		vpkt->vpkt_tgt_pkt->pkt_reason = CMD_ABORTED;
3534		vpkt->vpkt_tgt_pkt->pkt_statistics |= STAT_ABORTED;
3535
3536		pkt = vpkt->vpkt_hba_pkt;
3537		tpkt = vpkt->vpkt_tgt_pkt;
3538	}
3539
3540	if ((err_str != NULL) && (pkt->pkt_reason !=
3541	    svp->svp_last_pkt_reason)) {
3542		cdip = vlun->svl_dip;
3543		vdip = ddi_get_parent(cdip);
3544		cpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3545		vhci_log(CE_WARN, vdip, "!%s (%s%d): %s on path %s",
3546		    ddi_pathname(cdip, cpath), ddi_driver_name(cdip),
3547		    ddi_get_instance(cdip), err_str,
3548		    mdi_pi_spathname(vpkt->vpkt_path));
3549		kmem_free(cpath, MAXPATHLEN);
3550	}
3551	svp->svp_last_pkt_reason = pkt->pkt_reason;
3552	VHCI_DECR_PATH_CMDCOUNT(svp);
3553
3554	/*
3555	 * For PARTIAL_DMA, vhci should not free the path.
3556	 * Target driver will call into vhci_scsi_dmafree or
3557	 * destroy pkt to release this path.
3558	 */
3559	if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
3560		scsi_destroy_pkt(pkt);
3561		vpkt->vpkt_hba_pkt = NULL;
3562		if (vpkt->vpkt_path) {
3563			mdi_rele_path(vpkt->vpkt_path);
3564			vpkt->vpkt_path = NULL;
3565		}
3566	}
3567
3568	scsi_hba_pkt_comp(tpkt);
3569}
3570
3571/*
3572 * two possibilities: (1) failover has completed
3573 * or (2) is in progress; update our path states for
3574 * the former case; for the latter case,
3575 * initiate a scsi_watch request to
3576 * determine when failover completes - vlun is HELD
3577 * until failover completes; BUSY is returned to upper
3578 * layer in both the cases
3579 */
3580static int
3581vhci_handle_ext_fo(struct scsi_pkt *pkt, int fostat)
3582{
3583	struct vhci_pkt		*vpkt = (struct vhci_pkt *)pkt->pkt_private;
3584	struct scsi_pkt		*tpkt;
3585	scsi_vhci_priv_t	*svp;
3586	scsi_vhci_lun_t		*vlun;
3587	struct scsi_vhci	*vhci;
3588	scsi_vhci_swarg_t	*swarg;
3589	char			*path;
3590
3591	ASSERT(vpkt != NULL);
3592	tpkt = vpkt->vpkt_tgt_pkt;
3593	ASSERT(tpkt != NULL);
3594	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(vpkt->vpkt_path);
3595	ASSERT(svp != NULL);
3596	vlun = svp->svp_svl;
3597	ASSERT(vlun != NULL);
3598	ASSERT(VHCI_LUN_IS_HELD(vlun));
3599
3600	vhci = ADDR2VHCI(&tpkt->pkt_address);
3601
3602	if (fostat == SCSI_SENSE_INACTIVE) {
3603		VHCI_DEBUG(1, (CE_NOTE, NULL, "!Failover "
3604		    "detected for %s; updating path states...\n",
3605		    vlun->svl_lun_wwn));
3606		/*
3607		 * set the vlun flag to indicate to the task that the target
3608		 * port group needs updating
3609		 */
3610		vlun->svl_flags |= VLUN_UPDATE_TPG;
3611		(void) taskq_dispatch(vhci->vhci_update_pathstates_taskq,
3612		    vhci_update_pathstates, (void *)vlun, KM_SLEEP);
3613	} else {
3614		path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3615		vhci_log(CE_NOTE, ddi_get_parent(vlun->svl_dip),
3616		    "!%s (%s%d): Waiting for externally initiated failover "
3617		    "to complete", ddi_pathname(vlun->svl_dip, path),
3618		    ddi_driver_name(vlun->svl_dip),
3619		    ddi_get_instance(vlun->svl_dip));
3620		kmem_free(path, MAXPATHLEN);
3621		swarg = kmem_alloc(sizeof (*swarg), KM_NOSLEEP);
3622		if (swarg == NULL) {
3623			VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_handle_ext_fo: "
3624			    "request packet allocation for %s failed....\n",
3625			    vlun->svl_lun_wwn));
3626			VHCI_RELEASE_LUN(vlun);
3627			return (PKT_RETURN);
3628		}
3629		swarg->svs_svp = svp;
3630		swarg->svs_tos = gethrtime();
3631		swarg->svs_pi = vpkt->vpkt_path;
3632		swarg->svs_release_lun = 0;
3633		swarg->svs_done = 0;
3634		/*
3635		 * place a hold on the path...we don't want it to
3636		 * vanish while scsi_watch is in progress
3637		 */
3638		mdi_hold_path(vpkt->vpkt_path);
3639		svp->svp_sw_token = scsi_watch_request_submit(svp->svp_psd,
3640		    VHCI_FOWATCH_INTERVAL, SENSE_LENGTH, vhci_efo_watch_cb,
3641		    (caddr_t)swarg);
3642	}
3643	return (BUSY_RETURN);
3644}
3645
3646/*
3647 * vhci_efo_watch_cb:
3648 *	Callback from scsi_watch request to check the failover status.
3649 *	Completion is either due to successful failover or timeout.
3650 *	Upon successful completion, vhci_update_path_states is called.
3651 *	For timeout condition, vhci_efo_done is called.
3652 *	Always returns 0 to scsi_watch to keep retrying till vhci_efo_done
3653 *	terminates this request properly in a separate thread.
3654 */
3655
3656static int
3657vhci_efo_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
3658{
3659	struct scsi_status		*statusp = resultp->statusp;
3660	uint8_t				*sensep = (uint8_t *)resultp->sensep;
3661	struct scsi_pkt			*pkt = resultp->pkt;
3662	scsi_vhci_swarg_t		*swarg;
3663	scsi_vhci_priv_t		*svp;
3664	scsi_vhci_lun_t			*vlun;
3665	struct scsi_vhci		*vhci;
3666	dev_info_t			*vdip;
3667	int				rval, updt_paths;
3668
3669	swarg = (scsi_vhci_swarg_t *)(uintptr_t)arg;
3670	svp = swarg->svs_svp;
3671	if (swarg->svs_done) {
3672		/*
3673		 * Already completed failover or timedout.
3674		 * Waiting for vhci_efo_done to terminate this scsi_watch.
3675		 */
3676		return (0);
3677	}
3678
3679	ASSERT(svp != NULL);
3680	vlun = svp->svp_svl;
3681	ASSERT(vlun != NULL);
3682	ASSERT(VHCI_LUN_IS_HELD(vlun));
3683	vlun->svl_efo_update_path = 0;
3684	vdip = ddi_get_parent(vlun->svl_dip);
3685	vhci = ddi_get_soft_state(vhci_softstate,
3686	    ddi_get_instance(vdip));
3687
3688	updt_paths = 0;
3689
3690	if (pkt->pkt_reason != CMD_CMPLT) {
3691		if ((gethrtime() - swarg->svs_tos) >= VHCI_EXTFO_TIMEOUT) {
3692			swarg->svs_release_lun = 1;
3693			goto done;
3694		}
3695		return (0);
3696	}
3697	if (*((unsigned char *)statusp) == STATUS_CHECK) {
3698		rval = vlun->svl_fops->sfo_analyze_sense(svp->svp_psd, sensep,
3699		    vlun->svl_fops_ctpriv);
3700		switch (rval) {
3701			/*
3702			 * Only update path states in case path is definitely
3703			 * inactive, or no failover occurred.  For all other
3704			 * check conditions continue pinging.  A unexpected
3705			 * check condition shouldn't cause pinging to complete
3706			 * prematurely.
3707			 */
3708			case SCSI_SENSE_INACTIVE:
3709			case SCSI_SENSE_NOFAILOVER:
3710				updt_paths = 1;
3711				break;
3712			default:
3713				if ((gethrtime() - swarg->svs_tos)
3714				    >= VHCI_EXTFO_TIMEOUT) {
3715					swarg->svs_release_lun = 1;
3716					goto done;
3717				}
3718				return (0);
3719		}
3720	} else if (*((unsigned char *)statusp) ==
3721	    STATUS_RESERVATION_CONFLICT) {
3722		updt_paths = 1;
3723	} else if ((*((unsigned char *)statusp)) &
3724	    (STATUS_BUSY | STATUS_QFULL)) {
3725		return (0);
3726	}
3727	if ((*((unsigned char *)statusp) == STATUS_GOOD) ||
3728	    (updt_paths == 1)) {
3729		/*
3730		 * we got here because we had detected an
3731		 * externally initiated failover; things
3732		 * have settled down now, so let's
3733		 * start up a task to update the
3734		 * path states and target port group
3735		 */
3736		vlun->svl_efo_update_path = 1;
3737		swarg->svs_done = 1;
3738		vlun->svl_swarg = swarg;
3739		vlun->svl_flags |= VLUN_UPDATE_TPG;
3740		(void) taskq_dispatch(vhci->vhci_update_pathstates_taskq,
3741		    vhci_update_pathstates, (void *)vlun,
3742		    KM_SLEEP);
3743		return (0);
3744	}
3745	if ((gethrtime() - swarg->svs_tos) >= VHCI_EXTFO_TIMEOUT) {
3746		swarg->svs_release_lun = 1;
3747		goto done;
3748	}
3749	return (0);
3750done:
3751	swarg->svs_done = 1;
3752	(void) taskq_dispatch(vhci->vhci_taskq,
3753	    vhci_efo_done, (void *)swarg, KM_SLEEP);
3754	return (0);
3755}
3756
3757/*
3758 * vhci_efo_done:
3759 *	cleanly terminates scsi_watch and free up resources.
3760 *	Called as taskq function in vhci_efo_watch_cb for EFO timeout condition
3761 *	or by vhci_update_path_states invoked during external initiated
3762 *	failover completion.
3763 */
3764static void
3765vhci_efo_done(void *arg)
3766{
3767	scsi_vhci_lun_t			*vlun;
3768	scsi_vhci_swarg_t		*swarg = (scsi_vhci_swarg_t *)arg;
3769	scsi_vhci_priv_t		*svp = swarg->svs_svp;
3770	ASSERT(svp);
3771
3772	vlun = svp->svp_svl;
3773	ASSERT(vlun);
3774
3775	/* Wait for clean termination of scsi_watch */
3776	(void) scsi_watch_request_terminate(svp->svp_sw_token,
3777	    SCSI_WATCH_TERMINATE_ALL_WAIT);
3778	svp->svp_sw_token = NULL;
3779
3780	/* release path and freeup resources to indicate failover completion */
3781	mdi_rele_path(swarg->svs_pi);
3782	if (swarg->svs_release_lun) {
3783		VHCI_RELEASE_LUN(vlun);
3784	}
3785	kmem_free((void *)swarg, sizeof (*swarg));
3786}
3787
3788/*
3789 * Update the path states
3790 * vlun should be HELD when this is invoked.
3791 * Calls vhci_efo_done to cleanup resources allocated for EFO.
3792 */
3793void
3794vhci_update_pathstates(void *arg)
3795{
3796	mdi_pathinfo_t			*pip, *npip;
3797	dev_info_t			*dip;
3798	struct scsi_failover_ops	*fo;
3799	struct scsi_vhci_priv		*svp;
3800	struct scsi_device		*psd;
3801	struct scsi_path_opinfo		opinfo;
3802	char				*pclass, *tptr;
3803	struct scsi_vhci_lun		*vlun = (struct scsi_vhci_lun *)arg;
3804	int				sps; /* mdi_select_path() status */
3805	char				*cpath;
3806	struct scsi_vhci		*vhci;
3807	struct scsi_pkt			*pkt;
3808	struct buf			*bp;
3809	struct scsi_vhci_priv		*svp_conflict = NULL;
3810
3811	ASSERT(VHCI_LUN_IS_HELD(vlun));
3812	dip  = vlun->svl_dip;
3813	pip = npip = NULL;
3814
3815	vhci = ddi_get_soft_state(vhci_softstate,
3816	    ddi_get_instance(ddi_get_parent(dip)));
3817
3818	sps = mdi_select_path(dip, NULL, (MDI_SELECT_ONLINE_PATH |
3819	    MDI_SELECT_STANDBY_PATH | MDI_SELECT_NO_PREFERRED), NULL, &npip);
3820	if ((npip == NULL) || (sps != MDI_SUCCESS)) {
3821		goto done;
3822	}
3823
3824	fo = vlun->svl_fops;
3825	do {
3826		pip = npip;
3827		svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
3828		psd = svp->svp_psd;
3829		if (fo->sfo_path_get_opinfo(psd, &opinfo,
3830		    vlun->svl_fops_ctpriv) != 0) {
3831			sps = mdi_select_path(dip, NULL,
3832			    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH |
3833			    MDI_SELECT_NO_PREFERRED), pip, &npip);
3834			mdi_rele_path(pip);
3835			continue;
3836		}
3837
3838		if (mdi_prop_lookup_string(pip, "path-class", &pclass) !=
3839		    MDI_SUCCESS) {
3840			VHCI_DEBUG(1, (CE_NOTE, NULL,
3841			    "!vhci_update_pathstates: prop lookup failed for "
3842			    "path 0x%p\n", (void *)pip));
3843			sps = mdi_select_path(dip, NULL,
3844			    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH |
3845			    MDI_SELECT_NO_PREFERRED), pip, &npip);
3846			mdi_rele_path(pip);
3847			continue;
3848		}
3849
3850		/*
3851		 * Need to update the "path-class" property
3852		 * value in the device tree if different
3853		 * from the existing value.
3854		 */
3855		if (strcmp(pclass, opinfo.opinfo_path_attr) != 0) {
3856			(void) mdi_prop_update_string(pip, "path-class",
3857			    opinfo.opinfo_path_attr);
3858		}
3859
3860		/*
3861		 * Only change the state if needed. i.e. Don't call
3862		 * mdi_pi_set_state to ONLINE a path if its already
3863		 * ONLINE. Same for STANDBY paths.
3864		 */
3865
3866		if ((opinfo.opinfo_path_state == SCSI_PATH_ACTIVE ||
3867		    opinfo.opinfo_path_state == SCSI_PATH_ACTIVE_NONOPT)) {
3868			if (!(MDI_PI_IS_ONLINE(pip))) {
3869				VHCI_DEBUG(1, (CE_NOTE, NULL,
3870				    "!vhci_update_pathstates: marking path"
3871				    " 0x%p as ONLINE\n", (void *)pip));
3872				cpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3873				vhci_log(CE_NOTE, ddi_get_parent(dip), "!%s "
3874				    "(%s%d): path %s "
3875				    "is now ONLINE because of "
3876				    "an externally initiated failover",
3877				    ddi_pathname(dip, cpath),
3878				    ddi_driver_name(dip),
3879				    ddi_get_instance(dip),
3880				    mdi_pi_spathname(pip));
3881				kmem_free(cpath, MAXPATHLEN);
3882				mdi_pi_set_state(pip,
3883				    MDI_PATHINFO_STATE_ONLINE);
3884				mdi_pi_set_preferred(pip,
3885				    opinfo.opinfo_preferred);
3886				tptr = kmem_alloc(strlen
3887				    (opinfo.opinfo_path_attr) + 1, KM_SLEEP);
3888				(void) strlcpy(tptr, opinfo.opinfo_path_attr,
3889				    (strlen(opinfo.opinfo_path_attr) + 1));
3890				mutex_enter(&vlun->svl_mutex);
3891				if (vlun->svl_active_pclass != NULL) {
3892					kmem_free(vlun->svl_active_pclass,
3893					    strlen(vlun->svl_active_pclass) +
3894					    1);
3895				}
3896				vlun->svl_active_pclass = tptr;
3897				if (vlun->svl_waiting_for_activepath) {
3898					vlun->svl_waiting_for_activepath = 0;
3899				}
3900				mutex_exit(&vlun->svl_mutex);
3901			} else if (MDI_PI_IS_ONLINE(pip)) {
3902				if (strcmp(pclass, opinfo.opinfo_path_attr)
3903				    != 0) {
3904					mdi_pi_set_preferred(pip,
3905					    opinfo.opinfo_preferred);
3906					mutex_enter(&vlun->svl_mutex);
3907					if (vlun->svl_active_pclass == NULL ||
3908					    strcmp(opinfo.opinfo_path_attr,
3909					    vlun->svl_active_pclass) != 0) {
3910						mutex_exit(&vlun->svl_mutex);
3911						tptr = kmem_alloc(strlen
3912						    (opinfo.opinfo_path_attr) +
3913						    1, KM_SLEEP);
3914						(void) strlcpy(tptr,
3915						    opinfo.opinfo_path_attr,
3916						    (strlen
3917						    (opinfo.opinfo_path_attr)
3918						    + 1));
3919						mutex_enter(&vlun->svl_mutex);
3920					} else {
3921						/*
3922						 * No need to update
3923						 * svl_active_pclass
3924						 */
3925						tptr = NULL;
3926						mutex_exit(&vlun->svl_mutex);
3927					}
3928					if (tptr) {
3929						if (vlun->svl_active_pclass
3930						    != NULL) {
3931							kmem_free(vlun->
3932							    svl_active_pclass,
3933							    strlen(vlun->
3934							    svl_active_pclass)
3935							    + 1);
3936						}
3937						vlun->svl_active_pclass = tptr;
3938						mutex_exit(&vlun->svl_mutex);
3939					}
3940				}
3941			}
3942
3943			/* Check for Reservation Conflict */
3944			bp = scsi_alloc_consistent_buf(
3945			    &svp->svp_psd->sd_address, (struct buf *)NULL,
3946			    DEV_BSIZE, B_READ, NULL, NULL);
3947			if (!bp) {
3948				VHCI_DEBUG(1, (CE_NOTE, NULL,
3949				    "!vhci_update_pathstates: No resources "
3950				    "(buf)\n"));
3951				mdi_rele_path(pip);
3952				goto done;
3953			}
3954			pkt = scsi_init_pkt(&svp->svp_psd->sd_address, NULL, bp,
3955			    CDB_GROUP1, sizeof (struct scsi_arq_status), 0,
3956			    PKT_CONSISTENT, NULL, NULL);
3957			if (pkt) {
3958				(void) scsi_setup_cdb((union scsi_cdb *)
3959				    (uintptr_t)pkt->pkt_cdbp, SCMD_READ, 1, 1,
3960				    0);
3961				pkt->pkt_time = 3 * 30;
3962				pkt->pkt_flags = FLAG_NOINTR;
3963				pkt->pkt_path_instance =
3964				    mdi_pi_get_path_instance(pip);
3965
3966				if ((scsi_transport(pkt) == TRAN_ACCEPT) &&
3967				    (pkt->pkt_reason == CMD_CMPLT) &&
3968				    (SCBP_C(pkt) ==
3969				    STATUS_RESERVATION_CONFLICT)) {
3970					VHCI_DEBUG(1, (CE_NOTE, NULL,
3971					    "!vhci_update_pathstates: reserv. "
3972					    "conflict to be resolved on 0x%p\n",
3973					    (void *)pip));
3974					svp_conflict = svp;
3975				}
3976				scsi_destroy_pkt(pkt);
3977			}
3978			scsi_free_consistent_buf(bp);
3979		} else if ((opinfo.opinfo_path_state == SCSI_PATH_INACTIVE) &&
3980		    !(MDI_PI_IS_STANDBY(pip))) {
3981			VHCI_DEBUG(1, (CE_NOTE, NULL,
3982			    "!vhci_update_pathstates: marking path"
3983			    " 0x%p as STANDBY\n", (void *)pip));
3984			cpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3985			vhci_log(CE_NOTE, ddi_get_parent(dip), "!%s "
3986			    "(%s%d): path %s "
3987			    "is now STANDBY because of "
3988			    "an externally initiated failover",
3989			    ddi_pathname(dip, cpath),
3990			    ddi_driver_name(dip),
3991			    ddi_get_instance(dip),
3992			    mdi_pi_spathname(pip));
3993			kmem_free(cpath, MAXPATHLEN);
3994			mdi_pi_set_state(pip,
3995			    MDI_PATHINFO_STATE_STANDBY);
3996			mdi_pi_set_preferred(pip,
3997			    opinfo.opinfo_preferred);
3998			mutex_enter(&vlun->svl_mutex);
3999			if (vlun->svl_active_pclass != NULL) {
4000				if (strcmp(vlun->svl_active_pclass,
4001				    opinfo.opinfo_path_attr) == 0) {
4002					kmem_free(vlun->
4003					    svl_active_pclass,
4004					    strlen(vlun->
4005					    svl_active_pclass) + 1);
4006					vlun->svl_active_pclass = NULL;
4007				}
4008			}
4009			mutex_exit(&vlun->svl_mutex);
4010		}
4011		(void) mdi_prop_free(pclass);
4012		sps = mdi_select_path(dip, NULL,
4013		    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH |
4014		    MDI_SELECT_NO_PREFERRED), pip, &npip);
4015		mdi_rele_path(pip);
4016
4017	} while ((npip != NULL) && (sps == MDI_SUCCESS));
4018
4019	/*
4020	 * Check to see if this vlun has an active SCSI-II RESERVE.  If so
4021	 * clear the reservation by sending a reset, so the host doesn't
4022	 * receive a reservation conflict.  The reset has to be sent via a
4023	 * working path.  Let's use a path referred to by svp_conflict as it
4024	 * should be working.
4025	 * Reset VLUN_RESERVE_ACTIVE_FLG for this vlun.  Also notify ssd
4026	 * of the reset, explicitly.
4027	 */
4028	if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
4029		if (svp_conflict && (vlun->svl_xlf_capable == 0)) {
4030			VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_update_pathstates:"
4031			    " sending recovery reset on 0x%p, path_state: %x",
4032			    svp_conflict->svp_psd->sd_private,
4033			    mdi_pi_get_state((mdi_pathinfo_t *)
4034			    svp_conflict->svp_psd->sd_private)));
4035
4036			(void) vhci_recovery_reset(vlun,
4037			    &svp_conflict->svp_psd->sd_address, FALSE,
4038			    VHCI_DEPTH_TARGET);
4039		}
4040		vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
4041		mutex_enter(&vhci->vhci_mutex);
4042		scsi_hba_reset_notify_callback(&vhci->vhci_mutex,
4043		    &vhci->vhci_reset_notify_listf);
4044		mutex_exit(&vhci->vhci_mutex);
4045	}
4046	if (vlun->svl_flags & VLUN_UPDATE_TPG) {
4047		/*
4048		 * Update the AccessState of related MP-API TPGs
4049		 */
4050		(void) vhci_mpapi_update_tpg_acc_state_for_lu(vhci, vlun);
4051		vlun->svl_flags &= ~VLUN_UPDATE_TPG;
4052	}
4053done:
4054	if (vlun->svl_efo_update_path) {
4055		vlun->svl_efo_update_path = 0;
4056		vhci_efo_done(vlun->svl_swarg);
4057		vlun->svl_swarg = 0;
4058	}
4059	VHCI_RELEASE_LUN(vlun);
4060}
4061
4062/* ARGSUSED */
4063static int
4064vhci_pathinfo_init(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
4065{
4066	scsi_hba_tran_t		*hba = NULL;
4067	struct scsi_device	*psd = NULL;
4068	scsi_vhci_lun_t		*vlun = NULL;
4069	dev_info_t		*pdip = NULL;
4070	dev_info_t		*tgt_dip;
4071	struct scsi_vhci	*vhci;
4072	char			*guid;
4073	scsi_vhci_priv_t	*svp = NULL;
4074	int			rval = MDI_FAILURE;
4075	int			vlun_alloced = 0;
4076
4077	ASSERT(vdip != NULL);
4078	ASSERT(pip != NULL);
4079
4080	vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
4081	ASSERT(vhci != NULL);
4082
4083	pdip = mdi_pi_get_phci(pip);
4084	ASSERT(pdip != NULL);
4085
4086	hba = ddi_get_driver_private(pdip);
4087	ASSERT(hba != NULL);
4088
4089	tgt_dip = mdi_pi_get_client(pip);
4090	ASSERT(tgt_dip != NULL);
4091
4092	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip, PROPFLAGS,
4093	    MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS) {
4094		VHCI_DEBUG(1, (CE_WARN, NULL,
4095		    "vhci_pathinfo_init: lun guid property failed"));
4096		goto failure;
4097	}
4098
4099	vlun = vhci_lun_lookup_alloc(tgt_dip, guid, &vlun_alloced);
4100	ddi_prop_free(guid);
4101
4102	vlun->svl_dip = tgt_dip;
4103
4104	svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
4105	svp->svp_svl = vlun;
4106
4107	/*
4108	 * Initialize svl_lb_policy_save only for newly allocated vlun. Writing
4109	 * to svl_lb_policy_save later could accidentally overwrite saved lb
4110	 * policy.
4111	 */
4112	if (vlun_alloced) {
4113		vlun->svl_lb_policy_save = mdi_get_lb_policy(tgt_dip);
4114	}
4115
4116	mutex_init(&svp->svp_mutex, NULL, MUTEX_DRIVER, NULL);
4117	cv_init(&svp->svp_cv, NULL, CV_DRIVER, NULL);
4118
4119	psd = kmem_zalloc(sizeof (*psd), KM_SLEEP);
4120	mutex_init(&psd->sd_mutex, NULL, MUTEX_DRIVER, NULL);
4121
4122	if (hba->tran_hba_flags & SCSI_HBA_ADDR_COMPLEX) {
4123		/*
4124		 * For a SCSI_HBA_ADDR_COMPLEX transport we store a pointer to
4125		 * scsi_device in the scsi_address structure.  This allows an
4126		 * an HBA driver to find its scsi_device(9S) and
4127		 * per-scsi_device(9S) HBA private data given a
4128		 * scsi_address(9S) by using scsi_address_device(9F) and
4129		 * scsi_device_hba_private_get(9F)).
4130		 */
4131		psd->sd_address.a.a_sd = psd;
4132	} else if (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE) {
4133		/*
4134		 * Clone transport structure if requested, so
4135		 * Self enumerating HBAs always need to use cloning
4136		 */
4137		scsi_hba_tran_t	*clone =
4138		    kmem_alloc(sizeof (scsi_hba_tran_t), KM_SLEEP);
4139		bcopy(hba, clone, sizeof (scsi_hba_tran_t));
4140		hba = clone;
4141		hba->tran_sd = psd;
4142	} else {
4143		/*
4144		 * SPI pHCI unit-address. If we ever need to support this
4145		 * we could set a.spi.a_target/a.spi.a_lun based on pathinfo
4146		 * node unit-address properties.  For now we fail...
4147		 */
4148		goto failure;
4149	}
4150
4151	psd->sd_dev = tgt_dip;
4152	psd->sd_address.a_hba_tran = hba;
4153
4154	/*
4155	 * Mark scsi_device as being associated with a pathinfo node. For
4156	 * a scsi_device structure associated with a devinfo node,
4157	 * scsi_ctlops_initchild sets this field to NULL.
4158	 */
4159	psd->sd_pathinfo = pip;
4160
4161	/*
4162	 * LEGACY: sd_private: set for older mpxio-capable pHCI drivers with
4163	 * too much scsi_vhci/mdi/ndi knowledge. Remove this code when all
4164	 * mpxio-capable pHCI drivers use SCSA enumeration services (or at
4165	 * least have been changed to use sd_pathinfo instead).
4166	 */
4167	psd->sd_private = (caddr_t)pip;
4168
4169	/* See scsi_hba.c for info on sd_tran_safe kludge */
4170	psd->sd_tran_safe = hba;
4171
4172	svp->svp_psd = psd;
4173	mdi_pi_set_vhci_private(pip, (caddr_t)svp);
4174
4175	/*
4176	 * call hba's target init entry point if it exists
4177	 */
4178	if (hba->tran_tgt_init != NULL) {
4179		psd->sd_tran_tgt_free_done = 0;
4180		if ((rval = (*hba->tran_tgt_init)(pdip, tgt_dip,
4181		    hba, psd)) != DDI_SUCCESS) {
4182			VHCI_DEBUG(1, (CE_WARN, pdip,
4183			    "!vhci_pathinfo_init: tran_tgt_init failed for "
4184			    "path=0x%p rval=%x", (void *)pip, rval));
4185			goto failure;
4186		}
4187	}
4188
4189	svp->svp_new_path = 1;
4190
4191	VHCI_DEBUG(4, (CE_NOTE, NULL, "!vhci_pathinfo_init: path:%p\n",
4192	    (void *)pip));
4193	return (MDI_SUCCESS);
4194
4195failure:
4196	if (psd) {
4197		mutex_destroy(&psd->sd_mutex);
4198		kmem_free(psd, sizeof (*psd));
4199	}
4200	if (svp) {
4201		mdi_pi_set_vhci_private(pip, NULL);
4202		mutex_destroy(&svp->svp_mutex);
4203		cv_destroy(&svp->svp_cv);
4204		kmem_free(svp, sizeof (*svp));
4205	}
4206	if (hba && (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE))
4207		kmem_free(hba, sizeof (scsi_hba_tran_t));
4208
4209	if (vlun_alloced)
4210		vhci_lun_free(vlun, NULL);
4211
4212	return (rval);
4213}
4214
4215/* ARGSUSED */
4216static int
4217vhci_pathinfo_uninit(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
4218{
4219	scsi_hba_tran_t		*hba = NULL;
4220	struct scsi_device	*psd = NULL;
4221	dev_info_t		*pdip = NULL;
4222	dev_info_t		*cdip = NULL;
4223	scsi_vhci_priv_t	*svp = NULL;
4224
4225	ASSERT(vdip != NULL);
4226	ASSERT(pip != NULL);
4227
4228	pdip = mdi_pi_get_phci(pip);
4229	ASSERT(pdip != NULL);
4230
4231	cdip = mdi_pi_get_client(pip);
4232	ASSERT(cdip != NULL);
4233
4234	hba = ddi_get_driver_private(pdip);
4235	ASSERT(hba != NULL);
4236
4237	vhci_mpapi_set_path_state(vdip, pip, MP_DRVR_PATH_STATE_UNINIT);
4238	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
4239	if (svp == NULL) {
4240		/* path already freed. Nothing to do. */
4241		return (MDI_SUCCESS);
4242	}
4243
4244	psd = svp->svp_psd;
4245	ASSERT(psd != NULL);
4246
4247	if (hba->tran_hba_flags & SCSI_HBA_ADDR_COMPLEX) {
4248		/* Verify plumbing */
4249		ASSERT(psd->sd_address.a_hba_tran == hba);
4250		ASSERT(psd->sd_address.a.a_sd == psd);
4251	} else if (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE) {
4252		/* Switch to cloned scsi_hba_tran(9S) structure */
4253		hba = psd->sd_address.a_hba_tran;
4254		ASSERT(hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE);
4255		ASSERT(hba->tran_sd == psd);
4256	}
4257
4258	if ((hba->tran_tgt_free != NULL) && !psd->sd_tran_tgt_free_done) {
4259		(*hba->tran_tgt_free) (pdip, cdip, hba, psd);
4260		psd->sd_tran_tgt_free_done = 1;
4261	}
4262	mutex_destroy(&psd->sd_mutex);
4263	if (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE) {
4264		kmem_free(hba, sizeof (*hba));
4265	}
4266
4267	mdi_pi_set_vhci_private(pip, NULL);
4268
4269	/*
4270	 * Free the pathinfo related scsi_device inquiry data. Note that this
4271	 * matches what happens for scsi_hba.c devinfo case at uninitchild time.
4272	 */
4273	if (psd->sd_inq)
4274		kmem_free((caddr_t)psd->sd_inq, sizeof (struct scsi_inquiry));
4275	kmem_free((caddr_t)psd, sizeof (*psd));
4276
4277	mutex_destroy(&svp->svp_mutex);
4278	cv_destroy(&svp->svp_cv);
4279	kmem_free((caddr_t)svp, sizeof (*svp));
4280
4281	VHCI_DEBUG(4, (CE_NOTE, NULL, "!vhci_pathinfo_uninit: path=0x%p\n",
4282	    (void *)pip));
4283	return (MDI_SUCCESS);
4284}
4285
4286/* ARGSUSED */
4287static int
4288vhci_pathinfo_state_change(dev_info_t *vdip, mdi_pathinfo_t *pip,
4289    mdi_pathinfo_state_t state, uint32_t ext_state, int flags)
4290{
4291	int			rval = MDI_SUCCESS;
4292	scsi_vhci_priv_t	*svp;
4293	scsi_vhci_lun_t		*vlun;
4294	int			held;
4295	int			op = (flags & 0xf00) >> 8;
4296	struct scsi_vhci	*vhci;
4297
4298	vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
4299
4300	if (flags & MDI_EXT_STATE_CHANGE) {
4301		/*
4302		 * We do not want to issue any commands down the path in case
4303		 * sync flag is set. Lower layers might not be ready to accept
4304		 * any I/O commands.
4305		 */
4306		if (op == DRIVER_DISABLE)
4307			return (MDI_SUCCESS);
4308
4309		svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
4310		if (svp == NULL) {
4311			return (MDI_FAILURE);
4312		}
4313		vlun = svp->svp_svl;
4314
4315		if (flags & MDI_BEFORE_STATE_CHANGE) {
4316			/*
4317			 * Hold the LUN.
4318			 */
4319			VHCI_HOLD_LUN(vlun, VH_SLEEP, held);
4320			if (flags & MDI_DISABLE_OP)  {
4321				/*
4322				 * Issue scsi reset if it happens to be
4323				 * reserved path.
4324				 */
4325				if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
4326					/*
4327					 * if reservation pending on
4328					 * this path, dont' mark the
4329					 * path busy
4330					 */
4331					if (op == DRIVER_DISABLE_TRANSIENT) {
4332						VHCI_DEBUG(1, (CE_NOTE, NULL,
4333						    "!vhci_pathinfo"
4334						    "_state_change (pip:%p): "
4335						    " reservation: fail busy\n",
4336						    (void *)pip));
4337						return (MDI_FAILURE);
4338					}
4339					if (pip == vlun->svl_resrv_pip) {
4340						if (vhci_recovery_reset(
4341						    svp->svp_svl,
4342						    &svp->svp_psd->sd_address,
4343						    TRUE,
4344						    VHCI_DEPTH_TARGET) == 0) {
4345							VHCI_DEBUG(1,
4346							    (CE_NOTE, NULL,
4347							    "!vhci_pathinfo"
4348							    "_state_change "
4349							    " (pip:%p): "
4350							    "reset failed, "
4351							    "give up!\n",
4352							    (void *)pip));
4353						}
4354						vlun->svl_flags &=
4355						    ~VLUN_RESERVE_ACTIVE_FLG;
4356					}
4357				}
4358			} else if (flags & MDI_ENABLE_OP)  {
4359				if (((vhci->vhci_conf_flags &
4360				    VHCI_CONF_FLAGS_AUTO_FAILBACK) ==
4361				    VHCI_CONF_FLAGS_AUTO_FAILBACK) &&
4362				    MDI_PI_IS_USER_DISABLE(pip) &&
4363				    MDI_PI_IS_STANDBY(pip)) {
4364					struct scsi_failover_ops	*fo;
4365					char *best_pclass, *pclass = NULL;
4366					int  best_class, rv;
4367					/*
4368					 * Failback if enabling a standby path
4369					 * and it is the primary class or
4370					 * preferred class
4371					 */
4372					best_class = mdi_pi_get_preferred(pip);
4373					if (best_class == 0) {
4374						/*
4375						 * if not preferred - compare
4376						 * path-class with class
4377						 */
4378						fo = vlun->svl_fops;
4379						(void) fo->sfo_pathclass_next(
4380						    NULL, &best_pclass,
4381						    vlun->svl_fops_ctpriv);
4382						pclass = NULL;
4383						rv = mdi_prop_lookup_string(pip,
4384						    "path-class", &pclass);
4385						if (rv != MDI_SUCCESS ||
4386						    pclass == NULL) {
4387							vhci_log(CE_NOTE, vdip,
4388							    "!path-class "
4389							    " lookup "
4390							    "failed. rv: %d"
4391							    "class: %p", rv,
4392							    (void *)pclass);
4393						} else if (strncmp(pclass,
4394						    best_pclass,
4395						    strlen(best_pclass)) == 0) {
4396							best_class = 1;
4397						}
4398						if (rv == MDI_SUCCESS &&
4399						    pclass != NULL) {
4400							rv = mdi_prop_free(
4401							    pclass);
4402							if (rv !=
4403							    DDI_PROP_SUCCESS) {
4404								vhci_log(
4405								    CE_NOTE,
4406								    vdip,
4407								    "!path-"
4408								    "class"
4409								    " free"
4410								    " failed"
4411								    " rv: %d"
4412								    " class: "
4413								    "%p",
4414								    rv,
4415								    (void *)
4416								    pclass);
4417							}
4418						}
4419					}
4420					if (best_class == 1) {
4421						VHCI_DEBUG(1, (CE_NOTE, NULL,
4422						    "preferred path: %p "
4423						    "USER_DISABLE->USER_ENABLE "
4424						    "transition for lun %s\n",
4425						    (void *)pip,
4426						    vlun->svl_lun_wwn));
4427						(void) taskq_dispatch(
4428						    vhci->vhci_taskq,
4429						    vhci_initiate_auto_failback,
4430						    (void *) vlun, KM_SLEEP);
4431					}
4432				}
4433				/*
4434				 * if PGR is active, revalidate key and
4435				 * register on this path also, if key is
4436				 * still valid
4437				 */
4438				sema_p(&vlun->svl_pgr_sema);
4439				if (vlun->svl_pgr_active)
4440					(void)
4441					    vhci_pgr_validate_and_register(svp);
4442				sema_v(&vlun->svl_pgr_sema);
4443				/*
4444				 * Inform target driver about any
4445				 * reservations to be reinstated if target
4446				 * has dropped reservation during the busy
4447				 * period.
4448				 */
4449				mutex_enter(&vhci->vhci_mutex);
4450				scsi_hba_reset_notify_callback(
4451				    &vhci->vhci_mutex,
4452				    &vhci->vhci_reset_notify_listf);
4453				mutex_exit(&vhci->vhci_mutex);
4454			}
4455		}
4456		if (flags & MDI_AFTER_STATE_CHANGE) {
4457			if (flags & MDI_ENABLE_OP)  {
4458				mutex_enter(&vhci_global_mutex);
4459				cv_broadcast(&vhci_cv);
4460				mutex_exit(&vhci_global_mutex);
4461			}
4462			if (vlun->svl_setcap_done) {
4463				(void) vhci_pHCI_cap(&svp->svp_psd->sd_address,
4464				    "sector-size", vlun->svl_sector_size,
4465				    1, pip);
4466			}
4467
4468			/*
4469			 * Release the LUN
4470			 */
4471			VHCI_RELEASE_LUN(vlun);
4472
4473			/*
4474			 * Path transition is complete.
4475			 * Run callback to indicate target driver to
4476			 * retry to prevent IO starvation.
4477			 */
4478			if (scsi_callback_id != 0) {
4479				ddi_run_callback(&scsi_callback_id);
4480			}
4481		}
4482	} else {
4483		switch (state) {
4484		case MDI_PATHINFO_STATE_ONLINE:
4485			rval = vhci_pathinfo_online(vdip, pip, flags);
4486			break;
4487
4488		case MDI_PATHINFO_STATE_OFFLINE:
4489			rval = vhci_pathinfo_offline(vdip, pip, flags);
4490			break;
4491
4492		default:
4493			break;
4494		}
4495		/*
4496		 * Path transition is complete.
4497		 * Run callback to indicate target driver to
4498		 * retry to prevent IO starvation.
4499		 */
4500		if ((rval == MDI_SUCCESS) && (scsi_callback_id != 0)) {
4501			ddi_run_callback(&scsi_callback_id);
4502		}
4503		return (rval);
4504	}
4505
4506	return (MDI_SUCCESS);
4507}
4508
4509/*
4510 * Parse the mpxio load balancing options. The datanameptr
4511 * will point to a string containing the load-balance-options value.
4512 * The load-balance-options value will be a property that
4513 * defines the load-balance algorithm and any arguments to that
4514 * algorithm.
4515 * For example:
4516 * device-type-mpxio-options-list=
4517 * "device-type=SUN    SENA", "load-balance-options=logical-block-options"
4518 * "device-type=SUN     SE6920", "round-robin-options";
4519 * logical-block-options="load-balance=logical-block", "region-size=15";
4520 * round-robin-options="load-balance=round-robin";
4521 *
4522 * If the load-balance is not defined the load balance algorithm will
4523 * default to the global setting. There will be default values assigned
4524 * to the arguments (region-size=18) and if an argument is one
4525 * that is not known, it will be ignored.
4526 */
4527static void
4528vhci_parse_mpxio_lb_options(dev_info_t *dip, dev_info_t *cdip,
4529    caddr_t datanameptr)
4530{
4531	char			*dataptr, *next_entry;
4532	caddr_t			config_list	= NULL;
4533	int			config_list_len = 0, list_len = 0;
4534	int			region_size = -1;
4535	client_lb_t		load_balance;
4536
4537	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, datanameptr,
4538	    (caddr_t)&config_list, &config_list_len) != DDI_PROP_SUCCESS) {
4539		return;
4540	}
4541
4542	list_len = config_list_len;
4543	next_entry = config_list;
4544	while (config_list_len > 0) {
4545		dataptr = next_entry;
4546
4547		if (strncmp(mdi_load_balance, dataptr,
4548		    strlen(mdi_load_balance)) == 0) {
4549			/* get the load-balance scheme */
4550			dataptr += strlen(mdi_load_balance) + 1;
4551			if (strcmp(dataptr, LOAD_BALANCE_PROP_RR) == 0) {
4552				(void) mdi_set_lb_policy(cdip, LOAD_BALANCE_RR);
4553				load_balance = LOAD_BALANCE_RR;
4554			} else if (strcmp(dataptr,
4555			    LOAD_BALANCE_PROP_LBA) == 0) {
4556				(void) mdi_set_lb_policy(cdip,
4557				    LOAD_BALANCE_LBA);
4558				load_balance = LOAD_BALANCE_LBA;
4559			} else if (strcmp(dataptr,
4560			    LOAD_BALANCE_PROP_NONE) == 0) {
4561				(void) mdi_set_lb_policy(cdip,
4562				    LOAD_BALANCE_NONE);
4563				load_balance = LOAD_BALANCE_NONE;
4564			}
4565		} else if (strncmp(dataptr, LOGICAL_BLOCK_REGION_SIZE,
4566		    strlen(LOGICAL_BLOCK_REGION_SIZE)) == 0) {
4567			int	i = 0;
4568			char	*ptr;
4569			char	*tmp;
4570
4571			tmp = dataptr + (strlen(LOGICAL_BLOCK_REGION_SIZE) + 1);
4572			/* check for numeric value */
4573			for (ptr = tmp; i < strlen(tmp); i++, ptr++) {
4574				if (!isdigit(*ptr)) {
4575					cmn_err(CE_WARN,
4576					    "Illegal region size: %s."
4577					    " Setting to default value: %d",
4578					    tmp,
4579					    LOAD_BALANCE_DEFAULT_REGION_SIZE);
4580					region_size =
4581					    LOAD_BALANCE_DEFAULT_REGION_SIZE;
4582					break;
4583				}
4584			}
4585			if (i >= strlen(tmp)) {
4586				region_size = stoi(&tmp);
4587			}
4588			(void) mdi_set_lb_region_size(cdip, region_size);
4589		}
4590		config_list_len -= (strlen(next_entry) + 1);
4591		next_entry += strlen(next_entry) + 1;
4592	}
4593#ifdef DEBUG
4594	if ((region_size >= 0) && (load_balance != LOAD_BALANCE_LBA)) {
4595		VHCI_DEBUG(1, (CE_NOTE, dip,
4596		    "!vhci_parse_mpxio_lb_options: region-size: %d"
4597		    "only valid for load-balance=logical-block\n",
4598		    region_size));
4599	}
4600#endif
4601	if ((region_size == -1) && (load_balance == LOAD_BALANCE_LBA)) {
4602		VHCI_DEBUG(1, (CE_NOTE, dip,
4603		    "!vhci_parse_mpxio_lb_options: No region-size"
4604		    " defined load-balance=logical-block."
4605		    " Default to: %d\n", LOAD_BALANCE_DEFAULT_REGION_SIZE));
4606		(void) mdi_set_lb_region_size(cdip,
4607		    LOAD_BALANCE_DEFAULT_REGION_SIZE);
4608	}
4609	if (list_len > 0) {
4610		kmem_free(config_list, list_len);
4611	}
4612}
4613
4614/*
4615 * Parse the device-type-mpxio-options-list looking for the key of
4616 * "load-balance-options". If found, parse the load balancing options.
4617 * Check the comment of the vhci_get_device_type_mpxio_options()
4618 * for the device-type-mpxio-options-list.
4619 */
4620static void
4621vhci_parse_mpxio_options(dev_info_t *dip, dev_info_t *cdip,
4622    caddr_t datanameptr, int list_len)
4623{
4624	char		*dataptr;
4625	int		len;
4626
4627	/*
4628	 * get the data list
4629	 */
4630	dataptr = datanameptr;
4631	len = 0;
4632	while (len < list_len &&
4633	    strncmp(dataptr, DEVICE_TYPE_STR, strlen(DEVICE_TYPE_STR))
4634	    != 0) {
4635		if (strncmp(dataptr, LOAD_BALANCE_OPTIONS,
4636		    strlen(LOAD_BALANCE_OPTIONS)) == 0) {
4637			len += strlen(LOAD_BALANCE_OPTIONS) + 1;
4638			dataptr += strlen(LOAD_BALANCE_OPTIONS) + 1;
4639			vhci_parse_mpxio_lb_options(dip, cdip, dataptr);
4640		}
4641		len += strlen(dataptr) + 1;
4642		dataptr += strlen(dataptr) + 1;
4643	}
4644}
4645
4646/*
4647 * Check the inquriy string returned from the device with the device-type
4648 * Check for the existence of the device-type-mpxio-options-list and
4649 * if found parse the list checking for a match with the device-type
4650 * value and the inquiry string returned from the device. If a match
4651 * is found, parse the mpxio options list. The format of the
4652 * device-type-mpxio-options-list is:
4653 * device-type-mpxio-options-list=
4654 * "device-type=SUN    SENA", "load-balance-options=logical-block-options"
4655 * "device-type=SUN     SE6920", "round-robin-options";
4656 * logical-block-options="load-balance=logical-block", "region-size=15";
4657 * round-robin-options="load-balance=round-robin";
4658 */
4659void
4660vhci_get_device_type_mpxio_options(dev_info_t *dip, dev_info_t *cdip,
4661    struct scsi_device *devp)
4662{
4663
4664	caddr_t			config_list	= NULL;
4665	caddr_t			vidptr, datanameptr;
4666	int			vidlen, dupletlen = 0;
4667	int			config_list_len = 0, len;
4668	struct scsi_inquiry	*inq = devp->sd_inq;
4669
4670	/*
4671	 * look up the device-type-mpxio-options-list and walk thru
4672	 * the list compare the vendor ids of the earlier inquiry command and
4673	 * with those vids in the list if there is a match, lookup
4674	 * the mpxio-options value
4675	 */
4676	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4677	    MPXIO_OPTIONS_LIST,
4678	    (caddr_t)&config_list, &config_list_len) == DDI_PROP_SUCCESS) {
4679
4680		/*
4681		 * Compare vids in each duplet - if it matches,
4682		 * parse the mpxio options list.
4683		 */
4684		for (len = config_list_len, vidptr = config_list; len > 0;
4685		    len -= dupletlen) {
4686
4687			dupletlen = 0;
4688
4689			if (strlen(vidptr) != 0 &&
4690			    strncmp(vidptr, DEVICE_TYPE_STR,
4691			    strlen(DEVICE_TYPE_STR)) == 0) {
4692				/* point to next duplet */
4693				datanameptr = vidptr + strlen(vidptr) + 1;
4694				/* add len of this duplet */
4695				dupletlen += strlen(vidptr) + 1;
4696				/* get to device type */
4697				vidptr += strlen(DEVICE_TYPE_STR) + 1;
4698				vidlen = strlen(vidptr);
4699				if ((vidlen != 0) &&
4700				    bcmp(inq->inq_vid, vidptr, vidlen) == 0) {
4701					vhci_parse_mpxio_options(dip, cdip,
4702					    datanameptr, len - dupletlen);
4703					break;
4704				}
4705				/* get to next duplet */
4706				vidptr += strlen(vidptr) + 1;
4707			}
4708			/* get to the next device-type */
4709			while (len - dupletlen > 0 &&
4710			    strlen(vidptr) != 0 &&
4711			    strncmp(vidptr, DEVICE_TYPE_STR,
4712			    strlen(DEVICE_TYPE_STR)) != 0) {
4713				dupletlen += strlen(vidptr) + 1;
4714				vidptr += strlen(vidptr) + 1;
4715			}
4716		}
4717		if (config_list_len > 0) {
4718			kmem_free(config_list, config_list_len);
4719		}
4720	}
4721}
4722
4723static int
4724vhci_update_pathinfo(struct scsi_device *psd,  mdi_pathinfo_t *pip,
4725    struct scsi_failover_ops *fo, scsi_vhci_lun_t *vlun,
4726    struct scsi_vhci *vhci)
4727{
4728	struct scsi_path_opinfo		opinfo;
4729	char				*pclass, *best_pclass;
4730	char				*resrv_pclass = NULL;
4731	int				force_rereserve = 0;
4732	int				update_pathinfo_done = 0;
4733
4734	if (fo->sfo_path_get_opinfo(psd, &opinfo, vlun->svl_fops_ctpriv) != 0) {
4735		VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_update_pathinfo: "
4736		    "Failed to get operation info for path:%p\n", (void *)pip));
4737		return (MDI_FAILURE);
4738	}
4739	/* set the xlf capable flag in the vlun for future use */
4740	vlun->svl_xlf_capable = opinfo.opinfo_xlf_capable;
4741	(void) mdi_prop_update_string(pip, "path-class",
4742	    opinfo.opinfo_path_attr);
4743
4744	pclass = opinfo.opinfo_path_attr;
4745	if (opinfo.opinfo_path_state == SCSI_PATH_ACTIVE) {
4746		mutex_enter(&vlun->svl_mutex);
4747		if (vlun->svl_active_pclass != NULL) {
4748			if (strcmp(vlun->svl_active_pclass, pclass) != 0) {
4749				mutex_exit(&vlun->svl_mutex);
4750				/*
4751				 * Externally initiated failover has happened;
4752				 * force the path state to be STANDBY/ONLINE,
4753				 * next IO will trigger failover and thus
4754				 * sync-up the pathstates.  Reason we don't
4755				 * sync-up immediately by invoking
4756				 * vhci_update_pathstates() is because it
4757				 * needs a VHCI_HOLD_LUN() and we don't
4758				 * want to block here.
4759				 *
4760				 * Further, if the device is an ALUA device,
4761				 * then failure to exactly match 'pclass' and
4762				 * 'svl_active_pclass'(as is the case here)
4763				 * indicates that the currently active path
4764				 * is a 'non-optimized' path - which means
4765				 * that 'svl_active_pclass' needs to be
4766				 * replaced with opinfo.opinfo_path_state
4767				 * value.
4768				 */
4769
4770				if (SCSI_FAILOVER_IS_TPGS(vlun->svl_fops)) {
4771					char	*tptr;
4772
4773					/*
4774					 * The device is ALUA compliant. The
4775					 * state need to be changed to online
4776					 * rather than standby state which is
4777					 * done typically for a asymmetric
4778					 * device that is non ALUA compliant.
4779					 */
4780					mdi_pi_set_state(pip,
4781					    MDI_PATHINFO_STATE_ONLINE);
4782					tptr = kmem_alloc(strlen
4783					    (opinfo.opinfo_path_attr) + 1,
4784					    KM_SLEEP);
4785					(void) strlcpy(tptr,
4786					    opinfo.opinfo_path_attr,
4787					    (strlen(opinfo.opinfo_path_attr)
4788					    + 1));
4789					mutex_enter(&vlun->svl_mutex);
4790					kmem_free(vlun->svl_active_pclass,
4791					    strlen(vlun->svl_active_pclass) +
4792					    1);
4793					vlun->svl_active_pclass = tptr;
4794					mutex_exit(&vlun->svl_mutex);
4795				} else {
4796					/*
4797					 * Non ALUA device case.
4798					 */
4799					mdi_pi_set_state(pip,
4800					    MDI_PATHINFO_STATE_STANDBY);
4801				}
4802				vlun->svl_fo_support = opinfo.opinfo_mode;
4803				mdi_pi_set_preferred(pip,
4804				    opinfo.opinfo_preferred);
4805				update_pathinfo_done = 1;
4806			}
4807
4808			/*
4809			 * Find out a class of currently reserved path if there
4810			 * is any.
4811			 */
4812			if ((vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) &&
4813			    mdi_prop_lookup_string(vlun->svl_resrv_pip,
4814			    "path-class", &resrv_pclass) != MDI_SUCCESS) {
4815				VHCI_DEBUG(1, (CE_NOTE, NULL,
4816				    "!vhci_update_pathinfo: prop lookup "
4817				    "failed for path 0x%p\n",
4818				    (void *)vlun->svl_resrv_pip));
4819				/*
4820				 * Something is wrong with the reserved path.
4821				 * We can't do much with that right here. Just
4822				 * force re-reservation to another path.
4823				 */
4824				force_rereserve = 1;
4825			}
4826
4827			(void) fo->sfo_pathclass_next(NULL, &best_pclass,
4828			    vlun->svl_fops_ctpriv);
4829			if ((force_rereserve == 1) || ((resrv_pclass != NULL) &&
4830			    (strcmp(pclass, best_pclass) == 0) &&
4831			    (strcmp(resrv_pclass, best_pclass) != 0))) {
4832				/*
4833				 * Inform target driver that a reservation
4834				 * should be reinstated because the reserved
4835				 * path is not the most preferred one.
4836				 */
4837				mutex_enter(&vhci->vhci_mutex);
4838				scsi_hba_reset_notify_callback(
4839				    &vhci->vhci_mutex,
4840				    &vhci->vhci_reset_notify_listf);
4841				mutex_exit(&vhci->vhci_mutex);
4842			}
4843
4844			if (update_pathinfo_done == 1) {
4845				return (MDI_SUCCESS);
4846			}
4847		} else {
4848			char	*tptr;
4849
4850			/*
4851			 * lets release the mutex before we try to
4852			 * allocate since the potential to sleep is
4853			 * possible.
4854			 */
4855			mutex_exit(&vlun->svl_mutex);
4856			tptr = kmem_alloc(strlen(pclass) + 1, KM_SLEEP);
4857			(void) strlcpy(tptr, pclass, (strlen(pclass) + 1));
4858			mutex_enter(&vlun->svl_mutex);
4859			vlun->svl_active_pclass = tptr;
4860		}
4861		mutex_exit(&vlun->svl_mutex);
4862		mdi_pi_set_state(pip, MDI_PATHINFO_STATE_ONLINE);
4863		vlun->svl_waiting_for_activepath = 0;
4864	} else if (opinfo.opinfo_path_state == SCSI_PATH_ACTIVE_NONOPT) {
4865		mutex_enter(&vlun->svl_mutex);
4866		if (vlun->svl_active_pclass == NULL) {
4867			char	*tptr;
4868
4869			mutex_exit(&vlun->svl_mutex);
4870			tptr = kmem_alloc(strlen(pclass) + 1, KM_SLEEP);
4871			(void) strlcpy(tptr, pclass, (strlen(pclass) + 1));
4872			mutex_enter(&vlun->svl_mutex);
4873			vlun->svl_active_pclass = tptr;
4874		}
4875		mutex_exit(&vlun->svl_mutex);
4876		mdi_pi_set_state(pip, MDI_PATHINFO_STATE_ONLINE);
4877		vlun->svl_waiting_for_activepath = 0;
4878	} else if (opinfo.opinfo_path_state == SCSI_PATH_INACTIVE) {
4879		mutex_enter(&vlun->svl_mutex);
4880		if (vlun->svl_active_pclass != NULL) {
4881			if (strcmp(vlun->svl_active_pclass, pclass) == 0) {
4882				mutex_exit(&vlun->svl_mutex);
4883				/*
4884				 * externally initiated failover has happened;
4885				 * force state to ONLINE (see comment above)
4886				 */
4887				mdi_pi_set_state(pip,
4888				    MDI_PATHINFO_STATE_ONLINE);
4889				vlun->svl_fo_support = opinfo.opinfo_mode;
4890				mdi_pi_set_preferred(pip,
4891				    opinfo.opinfo_preferred);
4892				return (MDI_SUCCESS);
4893			}
4894		}
4895		mutex_exit(&vlun->svl_mutex);
4896		mdi_pi_set_state(pip, MDI_PATHINFO_STATE_STANDBY);
4897
4898		/*
4899		 * Initiate auto-failback, if enabled, for path if path-state
4900		 * is transitioning from OFFLINE->STANDBY and pathclass is the
4901		 * preferred pathclass for this storage.
4902		 * NOTE: In case where opinfo_path_state is SCSI_PATH_ACTIVE
4903		 * (above), where the pi state is set to STANDBY, we don't
4904		 * initiate auto-failback as the next IO shall take care of.
4905		 * this. See comment above.
4906		 */
4907		(void) fo->sfo_pathclass_next(NULL, &best_pclass,
4908		    vlun->svl_fops_ctpriv);
4909		if (((vhci->vhci_conf_flags & VHCI_CONF_FLAGS_AUTO_FAILBACK) ==
4910		    VHCI_CONF_FLAGS_AUTO_FAILBACK) &&
4911		    (strcmp(pclass, best_pclass) == 0) &&
4912		    ((MDI_PI_OLD_STATE(pip) == MDI_PATHINFO_STATE_OFFLINE) ||
4913		    (MDI_PI_OLD_STATE(pip) == MDI_PATHINFO_STATE_INIT))) {
4914			VHCI_DEBUG(1, (CE_NOTE, NULL, "%s pathclass path: %p"
4915			    " OFFLINE->STANDBY transition for lun %s\n",
4916			    best_pclass, (void *)pip, vlun->svl_lun_wwn));
4917			(void) taskq_dispatch(vhci->vhci_taskq,
4918			    vhci_initiate_auto_failback, (void *) vlun,
4919			    KM_SLEEP);
4920		}
4921	}
4922	vlun->svl_fo_support = opinfo.opinfo_mode;
4923	mdi_pi_set_preferred(pip, opinfo.opinfo_preferred);
4924
4925	VHCI_DEBUG(8, (CE_NOTE, NULL, "vhci_update_pathinfo: opinfo_rev = %x,"
4926	    " opinfo_path_state = %x opinfo_preferred = %x, opinfo_mode = %x\n",
4927	    opinfo.opinfo_rev, opinfo.opinfo_path_state,
4928	    opinfo.opinfo_preferred, opinfo.opinfo_mode));
4929
4930	return (MDI_SUCCESS);
4931}
4932
4933/*
4934 * Form the kstat name and and call mdi_pi_kstat_create()
4935 */
4936void
4937vhci_kstat_create_pathinfo(mdi_pathinfo_t *pip)
4938{
4939	dev_info_t	*tgt_dip;
4940	dev_info_t	*pdip;
4941	char		*guid;
4942	char		*target_port, *target_port_dup;
4943	char		ks_name[KSTAT_STRLEN];
4944	uint_t		pid;
4945	int		by_id;
4946	mod_hash_val_t	hv;
4947
4948
4949	/* return if we have already allocated kstats */
4950	if (mdi_pi_kstat_exists(pip))
4951		return;
4952
4953	/*
4954	 * We need instance numbers to create a kstat name, return if we don't
4955	 * have instance numbers assigned yet.
4956	 */
4957	tgt_dip = mdi_pi_get_client(pip);
4958	pdip = mdi_pi_get_phci(pip);
4959	if ((ddi_get_instance(tgt_dip) == -1) || (ddi_get_instance(pdip) == -1))
4960		return;
4961
4962	/*
4963	 * A path oriented kstat has a ks_name of the form:
4964	 *
4965	 * <client-driver><instance>.t<pid>.<pHCI-driver><instance>
4966	 *
4967	 * We maintain a bidirectional 'target-port' to <pid> map,
4968	 * called targetmap. All pathinfo nodes with the same
4969	 * 'target-port' map to the same <pid>. The iostat(1M) code,
4970	 * when parsing a path oriented kstat name, uses the <pid> as
4971	 * a SCSI_VHCI_GET_TARGET_LONGNAME ioctl argument in order
4972	 * to get the 'target-port'. For KSTAT_FLAG_PERSISTENT kstats,
4973	 * this ioctl needs to translate a <pid> to a 'target-port'
4974	 * even after all pathinfo nodes associated with the
4975	 * 'target-port' have been destroyed. This is needed to support
4976	 * consistent first-iteration activity-since-boot iostat(1M)
4977	 * output. Because of this requirement, the mapping can't be
4978	 * based on pathinfo information in a devinfo snapshot.
4979	 */
4980
4981	/* determine 'target-port' */
4982	if (mdi_prop_lookup_string(pip,
4983	    SCSI_ADDR_PROP_TARGET_PORT, &target_port) == MDI_SUCCESS) {
4984		target_port_dup = i_ddi_strdup(target_port, KM_SLEEP);
4985		(void) mdi_prop_free(target_port);
4986		by_id = 1;
4987	} else {
4988		/*
4989		 * If the pHCI did not set up 'target-port' on this
4990		 * pathinfo node, assume that our client is the only
4991		 * one with paths to the device by using the guid
4992		 * value as the 'target-port'. Since no other client
4993		 * will have the same guid, no other client will use
4994		 * the same <pid>.  NOTE: a client with an instance
4995		 * number always has a guid.
4996		 */
4997		(void) ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip,
4998		    PROPFLAGS, MDI_CLIENT_GUID_PROP, &guid);
4999		target_port_dup = i_ddi_strdup(guid, KM_SLEEP);
5000		ddi_prop_free(guid);
5001
5002		/*
5003		 * For this type of mapping we don't want the
5004		 * <id> -> 'target-port' mapping to be made.  This
5005		 * will cause the SCSI_VHCI_GET_TARGET_LONGNAME ioctl
5006		 * to fail, and the iostat(1M) long '-n' output will
5007		 * still use the <pid>.  We do this because we just
5008		 * made up the 'target-port' using the guid, and we
5009		 * don't want to expose that fact in iostat output.
5010		 */
5011		by_id = 0;
5012	}
5013
5014	/* find/establish <pid> given 'target-port' */
5015	mutex_enter(&vhci_targetmap_mutex);
5016	if (mod_hash_find(vhci_targetmap_byport,
5017	    (mod_hash_key_t)target_port_dup, &hv) == 0) {
5018		pid = (int)(intptr_t)hv;	/* mapping exists */
5019	} else {
5020		pid = vhci_targetmap_pid++;	/* new mapping */
5021
5022		(void) mod_hash_insert(vhci_targetmap_byport,
5023		    (mod_hash_key_t)target_port_dup,
5024		    (mod_hash_val_t)(intptr_t)pid);
5025		if (by_id) {
5026			(void) mod_hash_insert(vhci_targetmap_bypid,
5027			    (mod_hash_key_t)(uintptr_t)pid,
5028			    (mod_hash_val_t)(uintptr_t)target_port_dup);
5029		}
5030		target_port_dup = NULL;		/* owned by hash */
5031	}
5032	mutex_exit(&vhci_targetmap_mutex);
5033
5034	/* form kstat name */
5035	(void) snprintf(ks_name, KSTAT_STRLEN, "%s%d.t%d.%s%d",
5036	    ddi_driver_name(tgt_dip), ddi_get_instance(tgt_dip),
5037	    pid, ddi_driver_name(pdip), ddi_get_instance(pdip));
5038
5039	VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_path_online: path:%p "
5040	    "kstat %s: pid %x <-> port %s\n", (void *)pip,
5041	    ks_name, pid, target_port_dup));
5042	if (target_port_dup)
5043		kmem_free(target_port_dup, strlen(target_port_dup) + 1);
5044
5045	/* call mdi to create kstats with the name we built */
5046	(void) mdi_pi_kstat_create(pip, ks_name);
5047}
5048
5049/* ARGSUSED */
5050static int
5051vhci_pathinfo_online(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
5052{
5053	scsi_hba_tran_t			*hba = NULL;
5054	struct scsi_device		*psd = NULL;
5055	scsi_vhci_lun_t			*vlun = NULL;
5056	dev_info_t			*pdip = NULL;
5057	dev_info_t			*cdip;
5058	dev_info_t			*tgt_dip;
5059	struct scsi_vhci		*vhci;
5060	char				*guid;
5061	struct scsi_failover_ops	*sfo;
5062	scsi_vhci_priv_t		*svp = NULL;
5063	struct scsi_address		*ap;
5064	struct scsi_pkt			*pkt;
5065	int				rval = MDI_FAILURE;
5066	mpapi_item_list_t		*list_ptr;
5067	mpapi_lu_data_t			*ld;
5068
5069	ASSERT(vdip != NULL);
5070	ASSERT(pip != NULL);
5071
5072	vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
5073	ASSERT(vhci != NULL);
5074
5075	pdip = mdi_pi_get_phci(pip);
5076	hba = ddi_get_driver_private(pdip);
5077	ASSERT(hba != NULL);
5078
5079	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
5080	ASSERT(svp != NULL);
5081
5082	cdip = mdi_pi_get_client(pip);
5083	ASSERT(cdip != NULL);
5084	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, PROPFLAGS,
5085	    MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS) {
5086		VHCI_DEBUG(1, (CE_WARN, NULL, "vhci_path_online: lun guid "
5087		    "property failed"));
5088		goto failure;
5089	}
5090
5091	vlun = vhci_lun_lookup(cdip);
5092	ASSERT(vlun != NULL);
5093
5094	ddi_prop_free(guid);
5095
5096	vlun->svl_dip = mdi_pi_get_client(pip);
5097	ASSERT(vlun->svl_dip != NULL);
5098
5099	psd = svp->svp_psd;
5100	ASSERT(psd != NULL);
5101
5102	ap = &psd->sd_address;
5103
5104	/*
5105	 * Get inquiry data into pathinfo related scsi_device structure.
5106	 * Free sq_inq when pathinfo related scsi_device structure is destroyed
5107	 * by vhci_pathinfo_uninit(). In other words, vhci maintains its own
5108	 * copy of scsi_device and scsi_inquiry data on a per-path basis.
5109	 */
5110	if (scsi_probe(psd, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
5111		VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_pathinfo_online: "
5112		    "scsi_probe failed path:%p rval:%x\n", (void *)pip, rval));
5113		rval = MDI_FAILURE;
5114		goto failure;
5115	}
5116
5117	/*
5118	 * See if we have a failover module to support the device.
5119	 *
5120	 * We re-probe to determine the failover ops for each path. This
5121	 * is done in case there are any path-specific side-effects associated
5122	 * with the sfo_device_probe implementation.
5123	 *
5124	 * Give the first successfull sfo_device_probe the opportunity to
5125	 * establish 'ctpriv', vlun/client private data. The ctpriv will
5126	 * then be passed into the failover module on all other sfo_device_*()
5127	 * operations (and must be freed by sfo_device_unprobe implementation).
5128	 *
5129	 * NOTE: While sfo_device_probe is done once per path,
5130	 * sfo_device_unprobe only occurs once - when the vlun is destroyed.
5131	 *
5132	 * NOTE: We don't currently support per-path fops private data
5133	 * mechanism.
5134	 */
5135	sfo = vhci_dev_fo(vdip, psd,
5136	    &vlun->svl_fops_ctpriv, &vlun->svl_fops_name);
5137
5138	/* check path configuration result with current vlun state */
5139	if (((sfo && vlun->svl_fops) && (sfo != vlun->svl_fops)) ||
5140	    (sfo && vlun->svl_not_supported) ||
5141	    ((sfo == NULL) && vlun->svl_fops)) {
5142		/* Getting different results for different paths. */
5143		VHCI_DEBUG(1, (CE_NOTE, vhci->vhci_dip,
5144		    "!vhci_pathinfo_online: dev (path 0x%p) contradiction\n",
5145		    (void *)pip));
5146		cmn_err(CE_WARN, "scsi_vhci: failover contradiction: "
5147		    "'%s'.vs.'%s': path %s\n",
5148		    vlun->svl_fops ? vlun->svl_fops->sfo_name : "NULL",
5149		    sfo ? sfo->sfo_name : "NULL", mdi_pi_pathname(pip));
5150		vlun->svl_not_supported = 1;
5151		rval = MDI_NOT_SUPPORTED;
5152		goto done;
5153	} else if (sfo == NULL) {
5154		/* No failover module - device not supported under vHCI.  */
5155		VHCI_DEBUG(1, (CE_NOTE, vhci->vhci_dip,
5156		    "!vhci_pathinfo_online: dev (path 0x%p) not "
5157		    "supported\n", (void *)pip));
5158
5159		/* XXX does this contradict vhci_is_dev_supported ? */
5160		vlun->svl_not_supported = 1;
5161		rval = MDI_NOT_SUPPORTED;
5162		goto done;
5163	}
5164
5165	/* failover supported for device - save failover_ops in vlun */
5166	vlun->svl_fops = sfo;
5167	ASSERT(vlun->svl_fops_name != NULL);
5168
5169	/*
5170	 * Obtain the device-type based mpxio options as specified in
5171	 * scsi_vhci.conf file.
5172	 *
5173	 * NOTE: currently, the end result is a call to
5174	 * mdi_set_lb_region_size().
5175	 */
5176	tgt_dip = psd->sd_dev;
5177	ASSERT(tgt_dip != NULL);
5178	vhci_get_device_type_mpxio_options(vdip, tgt_dip, psd);
5179
5180	/*
5181	 * if PGR is active, revalidate key and register on this path also,
5182	 * if key is still valid
5183	 */
5184	sema_p(&vlun->svl_pgr_sema);
5185	if (vlun->svl_pgr_active) {
5186		rval = vhci_pgr_validate_and_register(svp);
5187		if (rval != 1) {
5188			rval = MDI_FAILURE;
5189			sema_v(&vlun->svl_pgr_sema);
5190			goto failure;
5191		}
5192	}
5193	sema_v(&vlun->svl_pgr_sema);
5194
5195	if (svp->svp_new_path) {
5196		/*
5197		 * Last chance to perform any cleanup operations on this
5198		 * new path before making this path completely online.
5199		 */
5200		svp->svp_new_path = 0;
5201
5202		/*
5203		 * If scsi_vhci knows the lun is alread RESERVE'd,
5204		 * then skip the issue of RELEASE on new path.
5205		 */
5206		if ((vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) == 0) {
5207			/*
5208			 * Issue SCSI-2 RELEASE only for the first time on
5209			 * a new path just in case the host rebooted and
5210			 * a reservation is still pending on this path.
5211			 * IBM Shark storage does not clear RESERVE upon
5212			 * host reboot.
5213			 */
5214			pkt = scsi_init_pkt(ap, NULL, NULL, CDB_GROUP0,
5215			    sizeof (struct scsi_arq_status), 0, 0,
5216			    SLEEP_FUNC, NULL);
5217			if (pkt == NULL) {
5218				VHCI_DEBUG(1, (CE_NOTE, NULL,
5219				    "!vhci_pathinfo_online: "
5220				    "Release init_pkt failed :%p\n",
5221				    (void *)pip));
5222				rval = MDI_FAILURE;
5223				goto failure;
5224			}
5225			pkt->pkt_cdbp[0] = SCMD_RELEASE;
5226			pkt->pkt_time = 60;
5227
5228			VHCI_DEBUG(1, (CE_NOTE, NULL,
5229			    "!vhci_path_online: path:%p "
5230			    "Issued SCSI-2 RELEASE\n", (void *)pip));
5231
5232			/* Ignore the return value */
5233			(void) vhci_do_scsi_cmd(pkt);
5234			scsi_destroy_pkt(pkt);
5235		}
5236	}
5237
5238	rval = vhci_update_pathinfo(psd, pip, sfo, vlun, vhci);
5239	if (rval == MDI_FAILURE) {
5240		goto failure;
5241	}
5242
5243	/* Initialize MP-API data */
5244	vhci_update_mpapi_data(vhci, vlun, pip);
5245
5246	/*
5247	 * MP-API also needs the Inquiry data to be maintained in the
5248	 * mp_vendor_prop_t structure, so find the lun and update its
5249	 * structure with this data.
5250	 */
5251	list_ptr = (mpapi_item_list_t *)vhci_get_mpapi_item(vhci, NULL,
5252	    MP_OBJECT_TYPE_MULTIPATH_LU, (void *)vlun);
5253	ld = (mpapi_lu_data_t *)list_ptr->item->idata;
5254	if (ld != NULL) {
5255		bcopy(psd->sd_inq->inq_vid, ld->prop.prodInfo.vendor, 8);
5256		bcopy(psd->sd_inq->inq_pid, ld->prop.prodInfo.product, 16);
5257		bcopy(psd->sd_inq->inq_revision, ld->prop.prodInfo.revision, 4);
5258	} else {
5259		VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_pathinfo_online: "
5260		    "mpapi_lu_data_t is NULL"));
5261	}
5262
5263	/* create kstats for path */
5264	vhci_kstat_create_pathinfo(pip);
5265
5266done:
5267	mutex_enter(&vhci_global_mutex);
5268	cv_broadcast(&vhci_cv);
5269	mutex_exit(&vhci_global_mutex);
5270
5271	if (vlun->svl_setcap_done) {
5272		(void) vhci_pHCI_cap(ap, "sector-size",
5273		    vlun->svl_sector_size, 1, pip);
5274	}
5275
5276	VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_path_online: path:%p\n",
5277	    (void *)pip));
5278
5279failure:
5280	return (rval);
5281}
5282
5283/*
5284 * path offline handler.  Release all bindings that will not be
5285 * released by the normal packet transport/completion code path.
5286 * Since we don't (presently) keep any bindings alive outside of
5287 * the in-transport packets (which will be released on completion)
5288 * there is not much to do here.
5289 */
5290/* ARGSUSED */
5291static int
5292vhci_pathinfo_offline(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
5293{
5294	scsi_hba_tran_t		*hba = NULL;
5295	struct scsi_device	*psd = NULL;
5296	dev_info_t		*pdip = NULL;
5297	dev_info_t		*cdip = NULL;
5298	scsi_vhci_priv_t	*svp = NULL;
5299
5300	ASSERT(vdip != NULL);
5301	ASSERT(pip != NULL);
5302
5303	pdip = mdi_pi_get_phci(pip);
5304	ASSERT(pdip != NULL);
5305	if (pdip == NULL) {
5306		VHCI_DEBUG(1, (CE_WARN, vdip, "Invalid path 0x%p: NULL "
5307		    "phci dip", (void *)pip));
5308		return (MDI_FAILURE);
5309	}
5310
5311	cdip = mdi_pi_get_client(pip);
5312	ASSERT(cdip != NULL);
5313	if (cdip == NULL) {
5314		VHCI_DEBUG(1, (CE_WARN, vdip, "Invalid path 0x%p: NULL "
5315		    "client dip", (void *)pip));
5316		return (MDI_FAILURE);
5317	}
5318
5319	hba = ddi_get_driver_private(pdip);
5320	ASSERT(hba != NULL);
5321
5322	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
5323	if (svp == NULL) {
5324		/*
5325		 * mdi_pathinfo node in INIT state can have vHCI private
5326		 * information set to null
5327		 */
5328		VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5329		    "svp is NULL for pip 0x%p\n", (void *)pip));
5330		return (MDI_SUCCESS);
5331	}
5332
5333	psd = svp->svp_psd;
5334	ASSERT(psd != NULL);
5335
5336	mutex_enter(&svp->svp_mutex);
5337
5338	VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5339	    "%d cmds pending on path: 0x%p\n", svp->svp_cmds, (void *)pip));
5340	while (svp->svp_cmds != 0) {
5341		if (cv_reltimedwait(&svp->svp_cv, &svp->svp_mutex,
5342		    drv_usectohz(vhci_path_quiesce_timeout * 1000000),
5343		    TR_CLOCK_TICK) == -1) {
5344			/*
5345			 * The timeout time reached without the condition
5346			 * being signaled.
5347			 */
5348			VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5349			    "Timeout reached on path 0x%p without the cond\n",
5350			    (void *)pip));
5351			VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5352			    "%d cmds still pending on path: 0x%p\n",
5353			    svp->svp_cmds, (void *)pip));
5354			break;
5355		}
5356	}
5357	mutex_exit(&svp->svp_mutex);
5358
5359	/*
5360	 * Check to see if this vlun has an active SCSI-II RESERVE. And this
5361	 * is the pip for the path that has been reserved.
5362	 * If so clear the reservation by sending a reset, so the host will not
5363	 * get a reservation conflict.  Reset the flag VLUN_RESERVE_ACTIVE_FLG
5364	 * for this lun.  Also a reset notify is sent to the target driver
5365	 * just in case the POR check condition is cleared by some other layer
5366	 * in the stack.
5367	 */
5368	if (svp->svp_svl->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
5369		if (pip == svp->svp_svl->svl_resrv_pip) {
5370			if (vhci_recovery_reset(svp->svp_svl,
5371			    &svp->svp_psd->sd_address, TRUE,
5372			    VHCI_DEPTH_TARGET) == 0) {
5373				VHCI_DEBUG(1, (CE_NOTE, NULL,
5374				    "!vhci_pathinfo_offline (pip:%p):"
5375				    "reset failed, retrying\n", (void *)pip));
5376				delay(1 * drv_usectohz(1000000));
5377				if (vhci_recovery_reset(svp->svp_svl,
5378				    &svp->svp_psd->sd_address, TRUE,
5379				    VHCI_DEPTH_TARGET) == 0) {
5380					VHCI_DEBUG(1, (CE_NOTE, NULL,
5381					    "!vhci_pathinfo_offline "
5382					    "(pip:%p): reset failed, "
5383					    "giving up!\n", (void *)pip));
5384				}
5385			}
5386			svp->svp_svl->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
5387		}
5388	}
5389
5390	mdi_pi_set_state(pip, MDI_PATHINFO_STATE_OFFLINE);
5391	vhci_mpapi_set_path_state(vdip, pip, MP_DRVR_PATH_STATE_REMOVED);
5392
5393	VHCI_DEBUG(1, (CE_NOTE, NULL,
5394	    "!vhci_pathinfo_offline: offlined path 0x%p\n", (void *)pip));
5395	return (MDI_SUCCESS);
5396}
5397
5398
5399/*
5400 * routine for SCSI VHCI IOCTL implementation.
5401 */
5402/* ARGSUSED */
5403static int
5404vhci_ctl(dev_t dev, int cmd, intptr_t data, int mode, cred_t *credp, int *rval)
5405{
5406	struct scsi_vhci		*vhci;
5407	dev_info_t			*vdip;
5408	mdi_pathinfo_t			*pip;
5409	int				instance, held;
5410	int				retval = 0;
5411	caddr_t				phci_path = NULL, client_path = NULL;
5412	caddr_t				paddr = NULL;
5413	sv_iocdata_t			ioc;
5414	sv_iocdata_t			*pioc = &ioc;
5415	sv_switch_to_cntlr_iocdata_t	iocsc;
5416	sv_switch_to_cntlr_iocdata_t	*piocsc = &iocsc;
5417	caddr_t				s;
5418	scsi_vhci_lun_t			*vlun;
5419	struct scsi_failover_ops	*fo;
5420	char				*pclass;
5421
5422	/* Check for validity of vhci structure */
5423	vhci = ddi_get_soft_state(vhci_softstate, MINOR2INST(getminor(dev)));
5424	if (vhci == NULL) {
5425		return (ENXIO);
5426	}
5427
5428	mutex_enter(&vhci->vhci_mutex);
5429	if ((vhci->vhci_state & VHCI_STATE_OPEN) == 0) {
5430		mutex_exit(&vhci->vhci_mutex);
5431		return (ENXIO);
5432	}
5433	mutex_exit(&vhci->vhci_mutex);
5434
5435	/* Get the vhci dip */
5436	vdip = vhci->vhci_dip;
5437	ASSERT(vdip != NULL);
5438	instance = ddi_get_instance(vdip);
5439
5440	/* Allocate memory for getting parameters from userland */
5441	phci_path	= kmem_zalloc(MAXPATHLEN, KM_SLEEP);
5442	client_path	= kmem_zalloc(MAXPATHLEN, KM_SLEEP);
5443	paddr		= kmem_zalloc(MAXNAMELEN, KM_SLEEP);
5444
5445	/*
5446	 * Set a local variable indicating the ioctl name. Used for
5447	 * printing debug strings.
5448	 */
5449	switch (cmd) {
5450	case SCSI_VHCI_GET_CLIENT_MULTIPATH_INFO:
5451		s = "GET_CLIENT_MULTIPATH_INFO";
5452		break;
5453
5454	case SCSI_VHCI_GET_PHCI_MULTIPATH_INFO:
5455		s = "GET_PHCI_MULTIPATH_INFO";
5456		break;
5457
5458	case SCSI_VHCI_GET_CLIENT_NAME:
5459		s = "GET_CLIENT_NAME";
5460		break;
5461
5462	case SCSI_VHCI_PATH_ONLINE:
5463		s = "PATH_ONLINE";
5464		break;
5465
5466	case SCSI_VHCI_PATH_OFFLINE:
5467		s = "PATH_OFFLINE";
5468		break;
5469
5470	case SCSI_VHCI_PATH_STANDBY:
5471		s = "PATH_STANDBY";
5472		break;
5473
5474	case SCSI_VHCI_PATH_TEST:
5475		s = "PATH_TEST";
5476		break;
5477
5478	case SCSI_VHCI_SWITCH_TO_CNTLR:
5479		s = "SWITCH_TO_CNTLR";
5480		break;
5481	case SCSI_VHCI_PATH_DISABLE:
5482		s = "PATH_DISABLE";
5483		break;
5484	case SCSI_VHCI_PATH_ENABLE:
5485		s = "PATH_ENABLE";
5486		break;
5487
5488	case SCSI_VHCI_GET_TARGET_LONGNAME:
5489		s = "GET_TARGET_LONGNAME";
5490		break;
5491
5492#ifdef	DEBUG
5493	case SCSI_VHCI_CONFIGURE_PHCI:
5494		s = "CONFIGURE_PHCI";
5495		break;
5496
5497	case SCSI_VHCI_UNCONFIGURE_PHCI:
5498		s = "UNCONFIGURE_PHCI";
5499		break;
5500#endif
5501
5502	default:
5503		s = "Unknown";
5504		vhci_log(CE_NOTE, vdip,
5505		    "!vhci%d: ioctl %x (unsupported ioctl)", instance, cmd);
5506		retval = ENOTSUP;
5507		break;
5508	}
5509	if (retval != 0) {
5510		goto end;
5511	}
5512
5513	VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci%d: ioctl <%s>", instance, s));
5514
5515	/*
5516	 * Get IOCTL parameters from userland
5517	 */
5518	switch (cmd) {
5519	case SCSI_VHCI_GET_CLIENT_MULTIPATH_INFO:
5520	case SCSI_VHCI_GET_PHCI_MULTIPATH_INFO:
5521	case SCSI_VHCI_GET_CLIENT_NAME:
5522	case SCSI_VHCI_PATH_ONLINE:
5523	case SCSI_VHCI_PATH_OFFLINE:
5524	case SCSI_VHCI_PATH_STANDBY:
5525	case SCSI_VHCI_PATH_TEST:
5526	case SCSI_VHCI_PATH_DISABLE:
5527	case SCSI_VHCI_PATH_ENABLE:
5528	case SCSI_VHCI_GET_TARGET_LONGNAME:
5529#ifdef	DEBUG
5530	case SCSI_VHCI_CONFIGURE_PHCI:
5531	case SCSI_VHCI_UNCONFIGURE_PHCI:
5532#endif
5533		retval = vhci_get_iocdata((const void *)data, pioc, mode, s);
5534		break;
5535
5536	case SCSI_VHCI_SWITCH_TO_CNTLR:
5537		retval = vhci_get_iocswitchdata((const void *)data, piocsc,
5538		    mode, s);
5539		break;
5540	}
5541	if (retval != 0) {
5542		goto end;
5543	}
5544
5545
5546	/*
5547	 * Process the IOCTL
5548	 */
5549	switch (cmd) {
5550	case SCSI_VHCI_GET_CLIENT_MULTIPATH_INFO:
5551	{
5552		uint_t		num_paths;	/* Num paths to client dev */
5553		sv_path_info_t	*upibuf = NULL;	/* To keep userland values */
5554		sv_path_info_t	*kpibuf = NULL; /* Kernel data for ioctls */
5555		dev_info_t	*cdip;		/* Client device dip */
5556
5557		if (pioc->ret_elem == NULL) {
5558			retval = EINVAL;
5559			break;
5560		}
5561
5562		/* Get client device path from user land */
5563		if (vhci_ioc_get_client_path(pioc, client_path, mode, s)) {
5564			retval = EFAULT;
5565			break;
5566		}
5567
5568		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5569		    "client <%s>", s, client_path));
5570
5571		/* Get number of paths to this client device */
5572		if ((cdip = mdi_client_path2devinfo(vdip, client_path))
5573		    == NULL) {
5574			retval = ENXIO;
5575			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5576			    "client dip doesn't exist. invalid path <%s>",
5577			    s, client_path));
5578			break;
5579		}
5580		num_paths = mdi_client_get_path_count(cdip);
5581
5582		if (ddi_copyout(&num_paths, pioc->ret_elem,
5583		    sizeof (num_paths), mode)) {
5584			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5585			    "num_paths copyout failed", s));
5586			retval = EFAULT;
5587			break;
5588		}
5589
5590		/* If  user just wanted num_paths, then return */
5591		if (pioc->buf_elem == 0 || pioc->ret_buf == NULL ||
5592		    num_paths == 0) {
5593			break;
5594		}
5595
5596		/* Set num_paths to value as much as can be sent to userland */
5597		if (num_paths > pioc->buf_elem) {
5598			num_paths = pioc->buf_elem;
5599		}
5600
5601		/* Allocate memory and get userland pointers */
5602		if (vhci_ioc_alloc_pathinfo(&upibuf, &kpibuf, num_paths,
5603		    pioc, mode, s) != 0) {
5604			retval = EFAULT;
5605			break;
5606		}
5607		ASSERT(upibuf != NULL);
5608		ASSERT(kpibuf != NULL);
5609
5610		/*
5611		 * Get the path information and send it to userland.
5612		 */
5613		if (vhci_get_client_path_list(cdip, kpibuf, num_paths)
5614		    != MDI_SUCCESS) {
5615			retval = ENXIO;
5616			vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5617			break;
5618		}
5619
5620		if (vhci_ioc_send_pathinfo(upibuf, kpibuf, num_paths,
5621		    pioc, mode, s)) {
5622			retval = EFAULT;
5623			vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5624			break;
5625		}
5626
5627		/* Free the memory allocated for path information */
5628		vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5629		break;
5630	}
5631
5632	case SCSI_VHCI_GET_PHCI_MULTIPATH_INFO:
5633	{
5634		uint_t		num_paths;	/* Num paths to client dev */
5635		sv_path_info_t	*upibuf = NULL;	/* To keep userland values */
5636		sv_path_info_t	*kpibuf = NULL; /* Kernel data for ioctls */
5637		dev_info_t	*pdip;		/* PHCI device dip */
5638
5639		if (pioc->ret_elem == NULL) {
5640			retval = EINVAL;
5641			break;
5642		}
5643
5644		/* Get PHCI device path from user land */
5645		if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
5646			retval = EFAULT;
5647			break;
5648		}
5649
5650		VHCI_DEBUG(6, (CE_WARN, vdip,
5651		    "!vhci_ioctl: ioctl <%s> phci <%s>", s, phci_path));
5652
5653		/* Get number of devices associated with this PHCI device */
5654		if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5655			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5656			    "phci dip doesn't exist. invalid path <%s>",
5657			    s, phci_path));
5658			retval = ENXIO;
5659			break;
5660		}
5661
5662		num_paths = mdi_phci_get_path_count(pdip);
5663
5664		if (ddi_copyout(&num_paths, pioc->ret_elem,
5665		    sizeof (num_paths), mode)) {
5666			VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5667			    "num_paths copyout failed", s));
5668			retval = EFAULT;
5669			break;
5670		}
5671
5672		/* If  user just wanted num_paths, then return */
5673		if (pioc->buf_elem == 0 || pioc->ret_buf == NULL ||
5674		    num_paths == 0) {
5675			break;
5676		}
5677
5678		/* Set num_paths to value as much as can be sent to userland */
5679		if (num_paths > pioc->buf_elem) {
5680			num_paths = pioc->buf_elem;
5681		}
5682
5683		/* Allocate memory and get userland pointers */
5684		if (vhci_ioc_alloc_pathinfo(&upibuf, &kpibuf, num_paths,
5685		    pioc, mode, s) != 0) {
5686			retval = EFAULT;
5687			break;
5688		}
5689		ASSERT(upibuf != NULL);
5690		ASSERT(kpibuf != NULL);
5691
5692		/*
5693		 * Get the path information and send it to userland.
5694		 */
5695		if (vhci_get_phci_path_list(pdip, kpibuf, num_paths)
5696		    != MDI_SUCCESS) {
5697			retval = ENXIO;
5698			vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5699			break;
5700		}
5701
5702		if (vhci_ioc_send_pathinfo(upibuf, kpibuf, num_paths,
5703		    pioc, mode, s)) {
5704			retval = EFAULT;
5705			vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5706			break;
5707		}
5708
5709		/* Free the memory allocated for path information */
5710		vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5711		break;
5712	}
5713
5714	case SCSI_VHCI_GET_CLIENT_NAME:
5715	{
5716		dev_info_t		*cdip, *pdip;
5717
5718		/* Get PHCI path and device address from user land */
5719		if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s) ||
5720		    vhci_ioc_get_paddr(pioc, paddr, mode, s)) {
5721			retval = EFAULT;
5722			break;
5723		}
5724
5725		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5726		    "phci <%s>, paddr <%s>", s, phci_path, paddr));
5727
5728		/* Get the PHCI dip */
5729		if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5730			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5731			    "phci dip doesn't exist. invalid path <%s>",
5732			    s, phci_path));
5733			retval = ENXIO;
5734			break;
5735		}
5736
5737		if ((pip = mdi_pi_find(pdip, NULL, paddr)) == NULL) {
5738			VHCI_DEBUG(1, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5739			    "pathinfo doesn't exist. invalid device addr", s));
5740			retval = ENXIO;
5741			break;
5742		}
5743
5744		/* Get the client device pathname and send to userland */
5745		cdip = mdi_pi_get_client(pip);
5746		vhci_ioc_devi_to_path(cdip, client_path);
5747
5748		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5749		    "client <%s>", s, client_path));
5750
5751		if (vhci_ioc_send_client_path(client_path, pioc, mode, s)) {
5752			retval = EFAULT;
5753			break;
5754		}
5755		break;
5756	}
5757
5758	case SCSI_VHCI_PATH_ONLINE:
5759	case SCSI_VHCI_PATH_OFFLINE:
5760	case SCSI_VHCI_PATH_STANDBY:
5761	case SCSI_VHCI_PATH_TEST:
5762	{
5763		dev_info_t		*pdip;	/* PHCI dip */
5764
5765		/* Get PHCI path and device address from user land */
5766		if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s) ||
5767		    vhci_ioc_get_paddr(pioc, paddr, mode, s)) {
5768			retval = EFAULT;
5769			break;
5770		}
5771
5772		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5773		    "phci <%s>, paddr <%s>", s, phci_path, paddr));
5774
5775		/* Get the PHCI dip */
5776		if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5777			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5778			    "phci dip doesn't exist. invalid path <%s>",
5779			    s, phci_path));
5780			retval = ENXIO;
5781			break;
5782		}
5783
5784		if ((pip = mdi_pi_find(pdip, NULL, paddr)) == NULL) {
5785			VHCI_DEBUG(1, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5786			    "pathinfo doesn't exist. invalid device addr", s));
5787			retval = ENXIO;
5788			break;
5789		}
5790
5791		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5792		    "Calling MDI function to change device state", s));
5793
5794		switch (cmd) {
5795		case SCSI_VHCI_PATH_ONLINE:
5796			retval = mdi_pi_online(pip, 0);
5797			break;
5798
5799		case SCSI_VHCI_PATH_OFFLINE:
5800			retval = mdi_pi_offline(pip, 0);
5801			break;
5802
5803		case SCSI_VHCI_PATH_STANDBY:
5804			retval = mdi_pi_standby(pip, 0);
5805			break;
5806
5807		case SCSI_VHCI_PATH_TEST:
5808			break;
5809		}
5810		break;
5811	}
5812
5813	case SCSI_VHCI_SWITCH_TO_CNTLR:
5814	{
5815		dev_info_t *cdip;
5816		struct scsi_device *devp;
5817
5818		/* Get the client device pathname */
5819		if (ddi_copyin(piocsc->client, client_path,
5820		    MAXPATHLEN, mode)) {
5821			VHCI_DEBUG(2, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5822			    "client_path copyin failed", s));
5823			retval = EFAULT;
5824			break;
5825		}
5826
5827		/* Get the path class to which user wants to switch */
5828		if (ddi_copyin(piocsc->class, paddr, MAXNAMELEN, mode)) {
5829			VHCI_DEBUG(2, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5830			    "controller_class copyin failed", s));
5831			retval = EFAULT;
5832			break;
5833		}
5834
5835		/* Perform validity checks */
5836		if ((cdip = mdi_client_path2devinfo(vdip,
5837		    client_path)) == NULL) {
5838			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5839			    "client dip doesn't exist. invalid path <%s>",
5840			    s, client_path));
5841			retval = ENXIO;
5842			break;
5843		}
5844
5845		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: Calling MDI func "
5846		    "to switch controller"));
5847		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: client <%s> "
5848		    "class <%s>", client_path, paddr));
5849
5850		if (strcmp(paddr, PCLASS_PRIMARY) &&
5851		    strcmp(paddr, PCLASS_SECONDARY)) {
5852			VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5853			    "invalid path class <%s>", s, paddr));
5854			retval = ENXIO;
5855			break;
5856		}
5857
5858		devp = ddi_get_driver_private(cdip);
5859		if (devp == NULL) {
5860			VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5861			    "invalid scsi device <%s>", s, client_path));
5862			retval = ENXIO;
5863			break;
5864		}
5865		vlun = ADDR2VLUN(&devp->sd_address);
5866		ASSERT(vlun);
5867
5868		/*
5869		 * Checking to see if device has only one pclass, PRIMARY.
5870		 * If so this device doesn't support failovers.  Assumed
5871		 * that the devices with one pclass is PRIMARY, as thats the
5872		 * case today.  If this is not true and in future other
5873		 * symmetric devices are supported with other pclass, this
5874		 * IOCTL shall have to be overhauled anyways as now the only
5875		 * arguments it accepts are PRIMARY and SECONDARY.
5876		 */
5877		fo = vlun->svl_fops;
5878		if (fo->sfo_pathclass_next(PCLASS_PRIMARY, &pclass,
5879		    vlun->svl_fops_ctpriv)) {
5880			retval = ENOTSUP;
5881			break;
5882		}
5883
5884		VHCI_HOLD_LUN(vlun, VH_SLEEP, held);
5885		mutex_enter(&vlun->svl_mutex);
5886		if (vlun->svl_active_pclass != NULL) {
5887			if (strcmp(vlun->svl_active_pclass, paddr) == 0) {
5888				mutex_exit(&vlun->svl_mutex);
5889				retval = EALREADY;
5890				VHCI_RELEASE_LUN(vlun);
5891				break;
5892			}
5893		}
5894		mutex_exit(&vlun->svl_mutex);
5895		/* Call mdi function to cause  a switch over */
5896		retval = mdi_failover(vdip, cdip, MDI_FAILOVER_SYNC);
5897		if (retval == MDI_SUCCESS) {
5898			retval = 0;
5899		} else if (retval == MDI_BUSY) {
5900			retval = EBUSY;
5901		} else {
5902			retval = EIO;
5903		}
5904		VHCI_RELEASE_LUN(vlun);
5905		break;
5906	}
5907
5908	case SCSI_VHCI_PATH_ENABLE:
5909	case SCSI_VHCI_PATH_DISABLE:
5910	{
5911		dev_info_t	*cdip, *pdip;
5912
5913		/*
5914		 * Get client device path from user land
5915		 */
5916		if (vhci_ioc_get_client_path(pioc, client_path, mode, s)) {
5917			retval = EFAULT;
5918			break;
5919		}
5920
5921		/*
5922		 * Get Phci device path from user land
5923		 */
5924		if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
5925			retval = EFAULT;
5926			break;
5927		}
5928
5929		/*
5930		 * Get the devinfo for the Phci.
5931		 */
5932		if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5933			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5934			    "phci dip doesn't exist. invalid path <%s>",
5935			    s, phci_path));
5936			retval = ENXIO;
5937			break;
5938		}
5939
5940		/*
5941		 * If the client path is set to /scsi_vhci then we need
5942		 * to do the operation on all the clients so set cdip to NULL.
5943		 * Else, try to get the client dip.
5944		 */
5945		if (strcmp(client_path, "/scsi_vhci") == 0) {
5946			cdip = NULL;
5947		} else {
5948			if ((cdip = mdi_client_path2devinfo(vdip,
5949			    client_path)) == NULL) {
5950				retval = ENXIO;
5951				VHCI_DEBUG(1, (CE_WARN, NULL,
5952				    "!vhci_ioctl: ioctl <%s> client dip "
5953				    "doesn't exist. invalid path <%s>",
5954				    s, client_path));
5955				break;
5956			}
5957		}
5958
5959		if (cmd == SCSI_VHCI_PATH_ENABLE)
5960			retval = mdi_pi_enable(cdip, pdip, USER_DISABLE);
5961		else
5962			retval = mdi_pi_disable(cdip, pdip, USER_DISABLE);
5963
5964		break;
5965	}
5966
5967	case SCSI_VHCI_GET_TARGET_LONGNAME:
5968	{
5969		uint_t		pid = pioc->buf_elem;
5970		char		*target_port;
5971		mod_hash_val_t	hv;
5972
5973		/* targetmap lookup of 'target-port' by <pid> */
5974		if (mod_hash_find(vhci_targetmap_bypid,
5975		    (mod_hash_key_t)(uintptr_t)pid, &hv) != 0) {
5976			/*
5977			 * NOTE: failure to find the mapping is OK for guid
5978			 * based 'target-port' values.
5979			 */
5980			VHCI_DEBUG(3, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5981			    "targetport mapping doesn't exist: pid %d",
5982			    s, pid));
5983			retval = ENXIO;
5984			break;
5985		}
5986
5987		/* copyout 'target-port' result */
5988		target_port = (char *)hv;
5989		if (copyoutstr(target_port, pioc->addr, MAXNAMELEN, NULL)) {
5990			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5991			    "targetport copyout failed: len: %d",
5992			    s, (int)strlen(target_port)));
5993			retval = EFAULT;
5994		}
5995		break;
5996	}
5997
5998#ifdef	DEBUG
5999	case SCSI_VHCI_CONFIGURE_PHCI:
6000	{
6001		dev_info_t		*pdip;
6002
6003		/* Get PHCI path and device address from user land */
6004		if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
6005			retval = EFAULT;
6006			break;
6007		}
6008
6009		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
6010		    "phci <%s>", s, phci_path));
6011
6012		/* Get the PHCI dip */
6013		if ((pdip = e_ddi_hold_devi_by_path(phci_path, 0)) == NULL) {
6014			VHCI_DEBUG(3, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
6015			    "phci dip doesn't exist. invalid path <%s>",
6016			    s, phci_path));
6017			retval = ENXIO;
6018			break;
6019		}
6020
6021		if (ndi_devi_config(pdip,
6022		    NDI_DEVFS_CLEAN | NDI_DEVI_PERSIST) != NDI_SUCCESS) {
6023			retval = EIO;
6024		}
6025
6026		ddi_release_devi(pdip);
6027		break;
6028	}
6029
6030	case SCSI_VHCI_UNCONFIGURE_PHCI:
6031	{
6032		dev_info_t		*pdip;
6033
6034		/* Get PHCI path and device address from user land */
6035		if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
6036			retval = EFAULT;
6037			break;
6038		}
6039
6040		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
6041		    "phci <%s>", s, phci_path));
6042
6043		/* Get the PHCI dip */
6044		if ((pdip = e_ddi_hold_devi_by_path(phci_path, 0)) == NULL) {
6045			VHCI_DEBUG(3, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
6046			    "phci dip doesn't exist. invalid path <%s>",
6047			    s, phci_path));
6048			retval = ENXIO;
6049			break;
6050		}
6051
6052		if (ndi_devi_unconfig(pdip,
6053		    NDI_DEVI_REMOVE | NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
6054			retval = EBUSY;
6055		}
6056
6057		ddi_release_devi(pdip);
6058		break;
6059	}
6060#endif
6061	}
6062
6063end:
6064	/* Free the memory allocated above */
6065	if (phci_path != NULL) {
6066		kmem_free(phci_path, MAXPATHLEN);
6067	}
6068	if (client_path != NULL) {
6069		kmem_free(client_path, MAXPATHLEN);
6070	}
6071	if (paddr != NULL) {
6072		kmem_free(paddr, MAXNAMELEN);
6073	}
6074	return (retval);
6075}
6076
6077/*
6078 * devctl IOCTL support for client device DR
6079 */
6080/* ARGSUSED */
6081int
6082vhci_devctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
6083    int *rvalp)
6084{
6085	dev_info_t *self;
6086	dev_info_t *child;
6087	scsi_hba_tran_t *hba;
6088	struct devctl_iocdata *dcp;
6089	struct scsi_vhci *vhci;
6090	int rv = 0;
6091	int retval = 0;
6092	scsi_vhci_priv_t *svp;
6093	mdi_pathinfo_t  *pip;
6094
6095	if ((vhci = ddi_get_soft_state(vhci_softstate,
6096	    MINOR2INST(getminor(dev)))) == NULL)
6097		return (ENXIO);
6098
6099	/*
6100	 * check if :devctl minor device has been opened
6101	 */
6102	mutex_enter(&vhci->vhci_mutex);
6103	if ((vhci->vhci_state & VHCI_STATE_OPEN) == 0) {
6104		mutex_exit(&vhci->vhci_mutex);
6105		return (ENXIO);
6106	}
6107	mutex_exit(&vhci->vhci_mutex);
6108
6109	self = vhci->vhci_dip;
6110	hba = ddi_get_driver_private(self);
6111	if (hba == NULL)
6112		return (ENXIO);
6113
6114	/*
6115	 * We can use the generic implementation for these ioctls
6116	 */
6117	switch (cmd) {
6118	case DEVCTL_DEVICE_GETSTATE:
6119	case DEVCTL_DEVICE_ONLINE:
6120	case DEVCTL_DEVICE_OFFLINE:
6121	case DEVCTL_DEVICE_REMOVE:
6122	case DEVCTL_BUS_GETSTATE:
6123		return (ndi_devctl_ioctl(self, cmd, arg, mode, 0));
6124	}
6125
6126	/*
6127	 * read devctl ioctl data
6128	 */
6129	if (ndi_dc_allochdl((void *)arg, &dcp) != NDI_SUCCESS)
6130		return (EFAULT);
6131
6132	switch (cmd) {
6133
6134	case DEVCTL_DEVICE_RESET:
6135		/*
6136		 * lookup and hold child device
6137		 */
6138		if ((child = ndi_devi_find(self, ndi_dc_getname(dcp),
6139		    ndi_dc_getaddr(dcp))) == NULL) {
6140			rv = ENXIO;
6141			break;
6142		}
6143		retval = mdi_select_path(child, NULL,
6144		    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
6145		    NULL, &pip);
6146		if ((retval != MDI_SUCCESS) || (pip == NULL)) {
6147			VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl:"
6148			    "Unable to get a path, dip 0x%p", (void *)child));
6149			rv = ENXIO;
6150			break;
6151		}
6152		svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
6153		if (vhci_recovery_reset(svp->svp_svl,
6154		    &svp->svp_psd->sd_address, TRUE,
6155		    VHCI_DEPTH_TARGET) == 0) {
6156			VHCI_DEBUG(1, (CE_NOTE, NULL,
6157			    "!vhci_ioctl(pip:%p): "
6158			    "reset failed\n", (void *)pip));
6159			rv = ENXIO;
6160		}
6161		mdi_rele_path(pip);
6162		break;
6163
6164	case DEVCTL_BUS_QUIESCE:
6165	case DEVCTL_BUS_UNQUIESCE:
6166	case DEVCTL_BUS_RESET:
6167	case DEVCTL_BUS_RESETALL:
6168#ifdef	DEBUG
6169	case DEVCTL_BUS_CONFIGURE:
6170	case DEVCTL_BUS_UNCONFIGURE:
6171#endif
6172		rv = ENOTSUP;
6173		break;
6174
6175	default:
6176		rv = ENOTTY;
6177	} /* end of outer switch */
6178
6179	ndi_dc_freehdl(dcp);
6180	return (rv);
6181}
6182
6183/*
6184 * Routine to get the PHCI pathname from ioctl structures in userland
6185 */
6186/* ARGSUSED */
6187static int
6188vhci_ioc_get_phci_path(sv_iocdata_t *pioc, caddr_t phci_path,
6189    int mode, caddr_t s)
6190{
6191	int retval = 0;
6192
6193	if (ddi_copyin(pioc->phci, phci_path, MAXPATHLEN, mode)) {
6194		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_get_phci: ioctl <%s> "
6195		    "phci_path copyin failed", s));
6196		retval = EFAULT;
6197	}
6198	return (retval);
6199
6200}
6201
6202
6203/*
6204 * Routine to get the Client device pathname from ioctl structures in userland
6205 */
6206/* ARGSUSED */
6207static int
6208vhci_ioc_get_client_path(sv_iocdata_t *pioc, caddr_t client_path,
6209    int mode, caddr_t s)
6210{
6211	int retval = 0;
6212
6213	if (ddi_copyin(pioc->client, client_path, MAXPATHLEN, mode)) {
6214		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_get_client: "
6215		    "ioctl <%s> client_path copyin failed", s));
6216		retval = EFAULT;
6217	}
6218	return (retval);
6219}
6220
6221
6222/*
6223 * Routine to get physical device address from ioctl structure in userland
6224 */
6225/* ARGSUSED */
6226static int
6227vhci_ioc_get_paddr(sv_iocdata_t *pioc, caddr_t paddr, int mode, caddr_t s)
6228{
6229	int retval = 0;
6230
6231	if (ddi_copyin(pioc->addr, paddr, MAXNAMELEN, mode)) {
6232		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_get_paddr: "
6233		    "ioctl <%s> device addr copyin failed", s));
6234		retval = EFAULT;
6235	}
6236	return (retval);
6237}
6238
6239
6240/*
6241 * Routine to send client device pathname to userland.
6242 */
6243/* ARGSUSED */
6244static int
6245vhci_ioc_send_client_path(caddr_t client_path, sv_iocdata_t *pioc,
6246    int mode, caddr_t s)
6247{
6248	int retval = 0;
6249
6250	if (ddi_copyout(client_path, pioc->client, MAXPATHLEN, mode)) {
6251		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_send_client: "
6252		    "ioctl <%s> client_path copyout failed", s));
6253		retval = EFAULT;
6254	}
6255	return (retval);
6256}
6257
6258
6259/*
6260 * Routine to translated dev_info pointer (dip) to device pathname.
6261 */
6262static void
6263vhci_ioc_devi_to_path(dev_info_t *dip, caddr_t path)
6264{
6265	(void) ddi_pathname(dip, path);
6266}
6267
6268
6269/*
6270 * vhci_get_phci_path_list:
6271 *		get information about devices associated with a
6272 *		given PHCI device.
6273 *
6274 * Return Values:
6275 *		path information elements
6276 */
6277int
6278vhci_get_phci_path_list(dev_info_t *pdip, sv_path_info_t *pibuf,
6279    uint_t num_elems)
6280{
6281	uint_t			count, done;
6282	mdi_pathinfo_t		*pip;
6283	sv_path_info_t		*ret_pip;
6284	int			status;
6285	size_t			prop_size;
6286	int			circular;
6287
6288	/*
6289	 * Get the PHCI structure and retrieve the path information
6290	 * from the GUID hash table.
6291	 */
6292
6293	ret_pip = pibuf;
6294	count = 0;
6295
6296	ndi_devi_enter(pdip, &circular);
6297
6298	done = (count >= num_elems);
6299	pip = mdi_get_next_client_path(pdip, NULL);
6300	while (pip && !done) {
6301		mdi_pi_lock(pip);
6302		(void) ddi_pathname(mdi_pi_get_phci(pip),
6303		    ret_pip->device.ret_phci);
6304		(void) strcpy(ret_pip->ret_addr, mdi_pi_get_addr(pip));
6305		(void) mdi_pi_get_state2(pip, &ret_pip->ret_state,
6306		    &ret_pip->ret_ext_state);
6307
6308		status = mdi_prop_size(pip, &prop_size);
6309		if (status == MDI_SUCCESS && ret_pip->ret_prop.ret_buf_size) {
6310			*ret_pip->ret_prop.ret_buf_size = (uint_t)prop_size;
6311		}
6312
6313#ifdef DEBUG
6314		if (status != MDI_SUCCESS) {
6315			VHCI_DEBUG(2, (CE_WARN, NULL,
6316			    "!vhci_get_phci_path_list: "
6317			    "phci <%s>, prop size failure 0x%x",
6318			    ret_pip->device.ret_phci, status));
6319		}
6320#endif /* DEBUG */
6321
6322
6323		if (status == MDI_SUCCESS && ret_pip->ret_prop.buf &&
6324		    prop_size && ret_pip->ret_prop.buf_size >= prop_size) {
6325			status = mdi_prop_pack(pip,
6326			    &ret_pip->ret_prop.buf,
6327			    ret_pip->ret_prop.buf_size);
6328
6329#ifdef DEBUG
6330			if (status != MDI_SUCCESS) {
6331				VHCI_DEBUG(2, (CE_WARN, NULL,
6332				    "!vhci_get_phci_path_list: "
6333				    "phci <%s>, prop pack failure 0x%x",
6334				    ret_pip->device.ret_phci, status));
6335			}