1145e0143Sdh /*
2145e0143Sdh  * CDDL HEADER START
3145e0143Sdh  *
4145e0143Sdh  * The contents of this file are subject to the terms of the
5145e0143Sdh  * Common Development and Distribution License (the "License").
6145e0143Sdh  * You may not use this file except in compliance with the License.
7145e0143Sdh  *
8145e0143Sdh  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9145e0143Sdh  * or http://www.opensolaris.org/os/licensing.
10145e0143Sdh  * See the License for the specific language governing permissions
11145e0143Sdh  * and limitations under the License.
12145e0143Sdh  *
13145e0143Sdh  * When distributing Covered Code, include this CDDL HEADER in each
14145e0143Sdh  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15145e0143Sdh  * If applicable, add the following below this CDDL HEADER, with the
16145e0143Sdh  * fields enclosed by brackets "[]" replaced with your own identifying
17145e0143Sdh  * information: Portions Copyright [yyyy] [name of copyright owner]
18145e0143Sdh  *
19145e0143Sdh  * CDDL HEADER END
20658280b6SDavid Hollister  */
21658280b6SDavid Hollister /*
22658280b6SDavid Hollister  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23145e0143Sdh  */
24145e0143Sdh 
25145e0143Sdh /*
26145e0143Sdh  * PM8001 device state recovery routines
27145e0143Sdh  */
28145e0143Sdh 
29145e0143Sdh #include <sys/scsi/adapters/pmcs/pmcs.h>
30145e0143Sdh 
31145e0143Sdh /*
32145e0143Sdh  * SAS Topology Configuration
33145e0143Sdh  */
346745c559SJesse Butler static void pmcs_ds_operational(pmcs_phy_t *pptr, pmcs_xscsi_t *tgt);
35145e0143Sdh static void pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp,
36601c90f1SSrikanth, Ramana     pmcs_xscsi_t *tgt, pmcs_hw_t *pwp, const char *func_name,
37145e0143Sdh     char *reason_string);
38145e0143Sdh 
39145e0143Sdh /*
40145e0143Sdh  * Get device state.  Called with statlock and PHY lock held.
41145e0143Sdh  */
42145e0143Sdh static int
pmcs_get_dev_state(pmcs_hw_t * pwp,pmcs_phy_t * phyp,pmcs_xscsi_t * xp,uint8_t * ds)43145e0143Sdh pmcs_get_dev_state(pmcs_hw_t *pwp, pmcs_phy_t *phyp, pmcs_xscsi_t *xp,
44145e0143Sdh     uint8_t *ds)
45145e0143Sdh {
46145e0143Sdh 	uint32_t htag, *ptr, msg[PMCS_MSG_SIZE];
47145e0143Sdh 	int result;
48145e0143Sdh 	struct pmcwork *pwrk;
49145e0143Sdh 
50145e0143Sdh 	pmcs_prt(pwp, PMCS_PRT_DEBUG3, phyp, xp, "%s: tgt(0x%p)", __func__,
51145e0143Sdh 	    (void *)xp);
52145e0143Sdh 
53145e0143Sdh 	if (xp != NULL) {
54145e0143Sdh 		ASSERT(mutex_owned(&xp->statlock));
55145e0143Sdh 	}
56f96f3b56SSrikanth, Ramana 
57f96f3b56SSrikanth, Ramana 	if (phyp == NULL) {
58f96f3b56SSrikanth, Ramana 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, NULL, xp,
59f96f3b56SSrikanth, Ramana 		    "%s: PHY is NULL", __func__);
60f96f3b56SSrikanth, Ramana 		return (-1);
61f96f3b56SSrikanth, Ramana 	}
62145e0143Sdh 	ASSERT(mutex_owned(&phyp->phy_lock));
63145e0143Sdh 
64145e0143Sdh 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp);
65145e0143Sdh 	if (pwrk == NULL) {
66145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nowrk, __func__);
67145e0143Sdh 		return (-1);
68145e0143Sdh 	}
69145e0143Sdh 	pwrk->arg = msg;
70145e0143Sdh 	pwrk->dtype = phyp->dtype;
71145e0143Sdh 
72145e0143Sdh 	if (phyp->valid_device_id == 0) {
73145e0143Sdh 		pmcs_pwork(pwp, pwrk);
74145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, xp,
75145e0143Sdh 		    "%s: Invalid DeviceID", __func__);
76145e0143Sdh 		return (-1);
77145e0143Sdh 	}
78145e0143Sdh 	htag = pwrk->htag;
79145e0143Sdh 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
80145e0143Sdh 	    PMCIN_GET_DEVICE_STATE));
81145e0143Sdh 	msg[1] = LE_32(pwrk->htag);
82145e0143Sdh 	msg[2] = LE_32(phyp->device_id);
83601c90f1SSrikanth, Ramana 	CLEAN_MESSAGE(msg, 3);
84145e0143Sdh 
85145e0143Sdh 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
86145e0143Sdh 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
87145e0143Sdh 	if (ptr == NULL) {
88145e0143Sdh 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
89145e0143Sdh 		pmcs_pwork(pwp, pwrk);
90145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nomsg, __func__);
91145e0143Sdh 		return (-1);
92145e0143Sdh 	}
93145e0143Sdh 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
94145e0143Sdh 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
95145e0143Sdh 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
96145e0143Sdh 
97145e0143Sdh 	if (xp != NULL) {
98145e0143Sdh 		mutex_exit(&xp->statlock);
99145e0143Sdh 	}
100145e0143Sdh 	pmcs_unlock_phy(phyp);
101145e0143Sdh 	WAIT_FOR(pwrk, 1000, result);
102145e0143Sdh 	pmcs_pwork(pwp, pwrk);
1033be32c0fSJesse Butler 	pmcs_lock_phy(phyp);
104145e0143Sdh 
105145e0143Sdh 	if (xp != NULL) {
106145e0143Sdh 		mutex_enter(&xp->statlock);
107145e0143Sdh 	}
108145e0143Sdh 
109145e0143Sdh 	if (result) {
110145e0143Sdh 		pmcs_timed_out(pwp, htag, __func__);
111145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, xp,
112145e0143Sdh 		    "%s: cmd timed out, returning", __func__);
113145e0143Sdh 		return (-1);
114145e0143Sdh 	}
115145e0143Sdh 	if (LE_32(msg[2]) == 0) {
116145e0143Sdh 		*ds = (uint8_t)(LE_32(msg[4]));
117145e0143Sdh 		if (xp == NULL) {
118145e0143Sdh 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
119145e0143Sdh 			    "%s: retrieved_ds=0x%x", __func__, *ds);
120145e0143Sdh 		} else if (*ds !=  xp->dev_state) {
121145e0143Sdh 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
122145e0143Sdh 			    "%s: retrieved_ds=0x%x, target_ds=0x%x", __func__,
123145e0143Sdh 			    *ds, xp->dev_state);
124145e0143Sdh 		}
125145e0143Sdh 		return (0);
126145e0143Sdh 	} else {
127145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
128145e0143Sdh 		    "%s: cmd failed Status(0x%x), returning ", __func__,
129145e0143Sdh 		    LE_32(msg[2]));
130145e0143Sdh 		return (-1);
131145e0143Sdh 	}
132145e0143Sdh }
133145e0143Sdh 
134145e0143Sdh /*
135145e0143Sdh  * Set device state.  Called with target's statlock and PHY lock held.
136145e0143Sdh  */
137145e0143Sdh static int
pmcs_set_dev_state(pmcs_hw_t * pwp,pmcs_phy_t * phyp,pmcs_xscsi_t * xp,uint8_t ds)138145e0143Sdh pmcs_set_dev_state(pmcs_hw_t *pwp, pmcs_phy_t *phyp, pmcs_xscsi_t *xp,
139145e0143Sdh     uint8_t ds)
140145e0143Sdh {
141145e0143Sdh 	uint32_t htag, *ptr, msg[PMCS_MSG_SIZE];
142145e0143Sdh 	int result;
143145e0143Sdh 	uint8_t pds, nds;
144145e0143Sdh 	struct pmcwork *pwrk;
145145e0143Sdh 
146145e0143Sdh 	pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
147145e0143Sdh 	    "%s: ds: 0x%x tgt: 0x%p phy: 0x%p", __func__, ds, (void *)xp,
148145e0143Sdh 	    (void *)phyp);
149145e0143Sdh 
150145e0143Sdh 	if (phyp == NULL) {
151145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, NULL, xp,
152145e0143Sdh 		    "%s: PHY is NULL", __func__);
153145e0143Sdh 		return (-1);
154145e0143Sdh 	}
155145e0143Sdh 
156145e0143Sdh 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp);
157145e0143Sdh 	if (pwrk == NULL) {
158145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nowrk, __func__);
159145e0143Sdh 		return (-1);
160145e0143Sdh 	}
161145e0143Sdh 	if (phyp->valid_device_id == 0) {
162145e0143Sdh 		pmcs_pwork(pwp, pwrk);
163145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
164145e0143Sdh 		    "%s: Invalid DeviceID", __func__);
165145e0143Sdh 		return (-1);
166145e0143Sdh 	}
167145e0143Sdh 	pwrk->arg = msg;
168145e0143Sdh 	pwrk->dtype = phyp->dtype;
169145e0143Sdh 	htag = pwrk->htag;
170145e0143Sdh 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
171145e0143Sdh 	    PMCIN_SET_DEVICE_STATE));
172145e0143Sdh 	msg[1] = LE_32(pwrk->htag);
173145e0143Sdh 	msg[2] = LE_32(phyp->device_id);
174145e0143Sdh 	msg[3] = LE_32(ds);
175601c90f1SSrikanth, Ramana 	CLEAN_MESSAGE(msg, 4);
176145e0143Sdh 
177145e0143Sdh 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
178145e0143Sdh 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
179145e0143Sdh 	if (ptr == NULL) {
180145e0143Sdh 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
181145e0143Sdh 		pmcs_pwork(pwp, pwrk);
182145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nomsg, __func__);
183145e0143Sdh 		return (-1);
184145e0143Sdh 	}
185145e0143Sdh 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
186145e0143Sdh 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
187145e0143Sdh 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
188145e0143Sdh 
189145e0143Sdh 	if (xp != NULL) {
190145e0143Sdh 		mutex_exit(&xp->statlock);
191145e0143Sdh 	}
192145e0143Sdh 	pmcs_unlock_phy(phyp);
193145e0143Sdh 	WAIT_FOR(pwrk, 1000, result);
194145e0143Sdh 	pmcs_pwork(pwp, pwrk);
1953be32c0fSJesse Butler 	pmcs_lock_phy(phyp);
196145e0143Sdh 	if (xp != NULL) {
197145e0143Sdh 		mutex_enter(&xp->statlock);
198145e0143Sdh 	}
199145e0143Sdh 
200145e0143Sdh 	if (result) {
201145e0143Sdh 		pmcs_timed_out(pwp, htag, __func__);
202145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
203145e0143Sdh 		    "%s: cmd timed out, returning", __func__);
204145e0143Sdh 		return (-1);
205145e0143Sdh 	}
206145e0143Sdh 	if (LE_32(msg[2]) == 0) {
207145e0143Sdh 		pds = (uint8_t)(LE_32(msg[4]) >> 4);
208145e0143Sdh 		nds = (uint8_t)(LE_32(msg[4]) & 0x0000000f);
209145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
210145e0143Sdh 		    "%s: previous_ds=0x%x, new_ds=0x%x", __func__, pds, nds);
211145e0143Sdh 		if (xp != NULL) {
212145e0143Sdh 			xp->dev_state = nds;
213145e0143Sdh 		}
214145e0143Sdh 		return (0);
215145e0143Sdh 	} else {
216145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
217145e0143Sdh 		    "%s: cmd failed Status(0x%x), returning ", __func__,
218145e0143Sdh 		    LE_32(msg[2]));
219145e0143Sdh 		return (-1);
220145e0143Sdh 	}
221145e0143Sdh }
222145e0143Sdh 
2236745c559SJesse Butler static void
pmcs_ds_operational(pmcs_phy_t * pptr,pmcs_xscsi_t * tgt)2246745c559SJesse Butler pmcs_ds_operational(pmcs_phy_t *pptr, pmcs_xscsi_t *tgt)
2256745c559SJesse Butler {
2266745c559SJesse Butler 	pmcs_hw_t	*pwp;
2276745c559SJesse Butler 
2286745c559SJesse Butler 	ASSERT(pptr);
2296745c559SJesse Butler 	pwp = pptr->pwp;
2306745c559SJesse Butler 
2316745c559SJesse Butler 	if (tgt != NULL) {
2326745c559SJesse Butler 		tgt->recover_wait = 0;
2336745c559SJesse Butler 	}
2346745c559SJesse Butler 	pptr->ds_recovery_retries = 0;
2356745c559SJesse Butler 
2366745c559SJesse Butler 	if ((pptr->ds_prev_good_recoveries == 0) ||
2376745c559SJesse Butler 	    (ddi_get_lbolt() - pptr->last_good_recovery >
2386745c559SJesse Butler 	    drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME))) {
2396745c559SJesse Butler 		pptr->last_good_recovery = ddi_get_lbolt();
2406745c559SJesse Butler 		pptr->ds_prev_good_recoveries = 1;
2416745c559SJesse Butler 	} else if (ddi_get_lbolt() < pptr->last_good_recovery +
2426745c559SJesse Butler 	    drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME)) {
2436745c559SJesse Butler 		pptr->ds_prev_good_recoveries++;
2446745c559SJesse Butler 	} else {
245601c90f1SSrikanth, Ramana 		pmcs_handle_ds_recovery_error(pptr, tgt, pwp, __func__,
246601c90f1SSrikanth, Ramana 		    "Max recovery attempts reached. Declaring PHY dead");
2476745c559SJesse Butler 	}
2486745c559SJesse Butler 
2496745c559SJesse Butler 	/* Don't bother to run the work queues if the PHY is dead */
2506745c559SJesse Butler 	if (!pptr->dead) {
2516745c559SJesse Butler 		SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES);
2526745c559SJesse Butler 		(void) ddi_taskq_dispatch(pwp->tq, pmcs_worker,
2536745c559SJesse Butler 		    pwp, DDI_NOSLEEP);
2546745c559SJesse Butler 	}
2556745c559SJesse Butler }
2566745c559SJesse Butler 
257145e0143Sdh void
pmcs_dev_state_recovery(pmcs_hw_t * pwp,pmcs_phy_t * phyp)258145e0143Sdh pmcs_dev_state_recovery(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
259145e0143Sdh {
260b0e5d1e5SSrikanth, Ramana 	boolean_t reschedule = B_FALSE;
261145e0143Sdh 	uint8_t	ds, tgt_dev_state;
262145e0143Sdh 	int rc;
263145e0143Sdh 	pmcs_xscsi_t *tgt;
264145e0143Sdh 	pmcs_phy_t *pptr, *pnext, *pchild;
265145e0143Sdh 
266145e0143Sdh 	/*
267145e0143Sdh 	 * First time, check to see if we're already performing recovery
268145e0143Sdh 	 */
269145e0143Sdh 	if (phyp == NULL) {
270145e0143Sdh 		mutex_enter(&pwp->lock);
271145e0143Sdh 		if (pwp->ds_err_recovering) {
272145e0143Sdh 			mutex_exit(&pwp->lock);
273145e0143Sdh 			SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
274145e0143Sdh 			return;
275145e0143Sdh 		}
276145e0143Sdh 
277145e0143Sdh 		pwp->ds_err_recovering = 1;
278145e0143Sdh 		pptr = pwp->root_phys;
279145e0143Sdh 		mutex_exit(&pwp->lock);
280145e0143Sdh 	} else {
281145e0143Sdh 		pptr = phyp;
282145e0143Sdh 	}
283145e0143Sdh 
284145e0143Sdh 	while (pptr) {
285145e0143Sdh 		/*
286145e0143Sdh 		 * Since ds_err_recovering is set, we can be assured these
287145e0143Sdh 		 * PHYs won't disappear on us while we do this.
288145e0143Sdh 		 */
289145e0143Sdh 		pmcs_lock_phy(pptr);
290145e0143Sdh 		pchild = pptr->children;
291145e0143Sdh 		pnext = pptr->sibling;
292145e0143Sdh 		pmcs_unlock_phy(pptr);
293145e0143Sdh 
294145e0143Sdh 		if (pchild) {
295145e0143Sdh 			pmcs_dev_state_recovery(pwp, pchild);
296145e0143Sdh 		}
297145e0143Sdh 
298145e0143Sdh 		tgt = NULL;
299145e0143Sdh 		pmcs_lock_phy(pptr);
300145e0143Sdh 
301601c90f1SSrikanth, Ramana 		if (pptr->dead || !pptr->valid_device_id) {
302601c90f1SSrikanth, Ramana 			goto next_phy;
303601c90f1SSrikanth, Ramana 		}
304601c90f1SSrikanth, Ramana 
305601c90f1SSrikanth, Ramana 		if (pptr->iport && (pptr->iport->ua_state != UA_ACTIVE)) {
306601c90f1SSrikanth, Ramana 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, pptr->target,
307601c90f1SSrikanth, Ramana 			    "%s: No DS recovery on PHY %s, iport not active",
308601c90f1SSrikanth, Ramana 			    __func__, pptr->path);
309145e0143Sdh 			goto next_phy;
310145e0143Sdh 		}
311145e0143Sdh 
312145e0143Sdh 		tgt = pptr->target;
313145e0143Sdh 
314145e0143Sdh 		if (tgt != NULL) {
315145e0143Sdh 			mutex_enter(&tgt->statlock);
316145e0143Sdh 			if (tgt->recover_wait == 0) {
317145e0143Sdh 				goto next_phy;
318145e0143Sdh 			}
319145e0143Sdh 			tgt_dev_state = tgt->dev_state;
320145e0143Sdh 		} else {
321145e0143Sdh 			tgt_dev_state = PMCS_DEVICE_STATE_NOT_AVAILABLE;
322145e0143Sdh 		}
323145e0143Sdh 
324145e0143Sdh 		if (pptr->prev_recovery) {
325145e0143Sdh 			if (ddi_get_lbolt() - pptr->prev_recovery <
326145e0143Sdh 			    drv_usectohz(PMCS_DS_RECOVERY_INTERVAL)) {
327145e0143Sdh 				pmcs_prt(pwp, PMCS_PRT_DEBUG2, pptr, tgt,
328145e0143Sdh 				    "%s: DS recovery on PHY %s "
329145e0143Sdh 				    "re-invoked too soon. Skipping...",
330145e0143Sdh 				    __func__, pptr->path);
331b0e5d1e5SSrikanth, Ramana 				if ((tgt) && (tgt->recover_wait)) {
332b0e5d1e5SSrikanth, Ramana 					reschedule = B_TRUE;
333b0e5d1e5SSrikanth, Ramana 				}
334145e0143Sdh 				goto next_phy;
335145e0143Sdh 			}
336145e0143Sdh 		}
337145e0143Sdh 		pptr->prev_recovery = ddi_get_lbolt();
338145e0143Sdh 
339145e0143Sdh 		/*
340145e0143Sdh 		 * Step 1: Put the device into the IN_RECOVERY state
341145e0143Sdh 		 */
342145e0143Sdh 		rc = pmcs_get_dev_state(pwp, pptr, tgt, &ds);
343145e0143Sdh 		if (rc != 0) {
344145e0143Sdh 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
345145e0143Sdh 			    "%s: pmcs_get_dev_state on PHY %s "
346145e0143Sdh 			    "failed (rc=%d)",
347145e0143Sdh 			    __func__, pptr->path, rc);
348145e0143Sdh 
349145e0143Sdh 			pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
350601c90f1SSrikanth, Ramana 			    __func__, "pmcs_get_dev_state");
351145e0143Sdh 
352145e0143Sdh 			goto next_phy;
353145e0143Sdh 		}
354145e0143Sdh 
3556745c559SJesse Butler 		/* If the chip says it's operational, we're done */
3566745c559SJesse Butler 		if (ds == PMCS_DEVICE_STATE_OPERATIONAL) {
3576745c559SJesse Butler 			pmcs_ds_operational(pptr, tgt);
3586745c559SJesse Butler 			goto next_phy;
3596745c559SJesse Butler 		}
3606745c559SJesse Butler 
361145e0143Sdh 		if ((tgt_dev_state == ds) &&
362145e0143Sdh 		    (ds == PMCS_DEVICE_STATE_IN_RECOVERY)) {
363145e0143Sdh 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
364145e0143Sdh 			    "%s: Target 0x%p already IN_RECOVERY", __func__,
365145e0143Sdh 			    (void *)tgt);
366145e0143Sdh 		} else {
367145e0143Sdh 			if (tgt != NULL) {
368145e0143Sdh 				tgt->dev_state = ds;
369145e0143Sdh 			}
370145e0143Sdh 			tgt_dev_state = ds;
371145e0143Sdh 			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
372145e0143Sdh 			rc = pmcs_send_err_recovery_cmd(pwp, ds, pptr, tgt);
373145e0143Sdh 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
374145e0143Sdh 			    "%s: pmcs_send_err_recovery_cmd "
375145e0143Sdh 			    "result(%d) tgt(0x%p) ds(0x%x) tgt->ds(0x%x)",
376145e0143Sdh 			    __func__, rc, (void *)tgt, ds, tgt_dev_state);
377145e0143Sdh 
378145e0143Sdh 			if (rc) {
379145e0143Sdh 				pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
380145e0143Sdh 				    "%s: pmcs_send_err_recovery_cmd to PHY %s "
381145e0143Sdh 				    "failed (rc=%d)",
382145e0143Sdh 				    __func__, pptr->path, rc);
383145e0143Sdh 
384145e0143Sdh 				pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
385601c90f1SSrikanth, Ramana 				    __func__, "pmcs_send_err_recovery_cmd");
386145e0143Sdh 
387145e0143Sdh 				goto next_phy;
388145e0143Sdh 			}
389145e0143Sdh 		}
390145e0143Sdh 
391145e0143Sdh 		/*
3926745c559SJesse Butler 		 * Step 2: Perform a hard reset on the PHY.
393145e0143Sdh 		 */
394601c90f1SSrikanth, Ramana 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
395601c90f1SSrikanth, Ramana 		    "%s: Issue HARD_RESET to PHY %s", __func__,
396601c90f1SSrikanth, Ramana 		    pptr->path);
397601c90f1SSrikanth, Ramana 		/*
398601c90f1SSrikanth, Ramana 		 * Must release statlock here because pmcs_reset_phy
399601c90f1SSrikanth, Ramana 		 * will drop and reacquire the PHY lock.
400601c90f1SSrikanth, Ramana 		 */
401601c90f1SSrikanth, Ramana 		if (tgt != NULL) {
402601c90f1SSrikanth, Ramana 			mutex_exit(&tgt->statlock);
403601c90f1SSrikanth, Ramana 		}
404601c90f1SSrikanth, Ramana 		rc = pmcs_reset_phy(pwp, pptr, PMCS_PHYOP_HARD_RESET);
405601c90f1SSrikanth, Ramana 		if (tgt != NULL) {
406601c90f1SSrikanth, Ramana 			mutex_enter(&tgt->statlock);
407601c90f1SSrikanth, Ramana 		}
408601c90f1SSrikanth, Ramana 		if (rc) {
409601c90f1SSrikanth, Ramana 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
410601c90f1SSrikanth, Ramana 			    "%s: HARD_RESET to PHY %s failed (rc=%d)",
411601c90f1SSrikanth, Ramana 			    __func__, pptr->path, rc);
412145e0143Sdh 
413601c90f1SSrikanth, Ramana 			pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
414601c90f1SSrikanth, Ramana 			    __func__, "HARD_RESET");
415145e0143Sdh 
416601c90f1SSrikanth, Ramana 			goto next_phy;
417145e0143Sdh 		}
418145e0143Sdh 
419145e0143Sdh 		/*
420145e0143Sdh 		 * Step 3: Abort all I/Os to the device
421145e0143Sdh 		 */
422145e0143Sdh 		if (pptr->abort_all_start) {
423145e0143Sdh 			while (pptr->abort_all_start) {
424145e0143Sdh 				pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
425145e0143Sdh 				    "%s: Waiting for outstanding ABORT_ALL on "
426145e0143Sdh 				    "PHY 0x%p", __func__, (void *)pptr);
427145e0143Sdh 				cv_wait(&pptr->abort_all_cv, &pptr->phy_lock);
428145e0143Sdh 			}
429145e0143Sdh 		} else {
430145e0143Sdh 			if (tgt != NULL) {
431145e0143Sdh 				mutex_exit(&tgt->statlock);
432145e0143Sdh 			}
433145e0143Sdh 			rc = pmcs_abort(pwp, pptr, pptr->device_id, 1, 1);
434145e0143Sdh 			if (tgt != NULL) {
435145e0143Sdh 				mutex_enter(&tgt->statlock);
436145e0143Sdh 			}
437145e0143Sdh 			if (rc != 0) {
438145e0143Sdh 				pptr->abort_pending = 1;
439145e0143Sdh 				pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
440145e0143Sdh 				    "%s: pmcs_abort to PHY %s failed (rc=%d)",
441145e0143Sdh 				    __func__, pptr->path, rc);
442145e0143Sdh 
443145e0143Sdh 				pmcs_handle_ds_recovery_error(pptr, tgt,
444601c90f1SSrikanth, Ramana 				    pwp, __func__, "pmcs_abort");
445145e0143Sdh 
446145e0143Sdh 				goto next_phy;
447145e0143Sdh 			}
448145e0143Sdh 		}
449145e0143Sdh 
450145e0143Sdh 		/*
451145e0143Sdh 		 * Step 4: Set the device back to OPERATIONAL state
452145e0143Sdh 		 */
453145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
454145e0143Sdh 		    "%s: Set PHY/tgt 0x%p/0x%p to OPERATIONAL state",
455145e0143Sdh 		    __func__, (void *)pptr, (void *)tgt);
456145e0143Sdh 		rc = pmcs_set_dev_state(pwp, pptr, tgt,
457145e0143Sdh 		    PMCS_DEVICE_STATE_OPERATIONAL);
458145e0143Sdh 		if (rc == 0) {
4596745c559SJesse Butler 			pmcs_ds_operational(pptr, tgt);
460145e0143Sdh 		} else {
461145e0143Sdh 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
462145e0143Sdh 			    "%s: Failed to SET tgt 0x%p to OPERATIONAL state",
463145e0143Sdh 			    __func__, (void *)tgt);
464145e0143Sdh 
465145e0143Sdh 			pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
466601c90f1SSrikanth, Ramana 			    __func__, "SET tgt to OPERATIONAL state");
467145e0143Sdh 
468145e0143Sdh 			goto next_phy;
469145e0143Sdh 		}
470145e0143Sdh 
471145e0143Sdh next_phy:
472145e0143Sdh 		if (tgt) {
473145e0143Sdh 			mutex_exit(&tgt->statlock);
474145e0143Sdh 		}
475145e0143Sdh 		pmcs_unlock_phy(pptr);
476145e0143Sdh 		pptr = pnext;
477145e0143Sdh 	}
478145e0143Sdh 
479145e0143Sdh 	/*
480145e0143Sdh 	 * Only clear ds_err_recovering if we're exiting for good and not
481145e0143Sdh 	 * just unwinding from recursion
482145e0143Sdh 	 */
483145e0143Sdh 	if (phyp == NULL) {
484145e0143Sdh 		mutex_enter(&pwp->lock);
485145e0143Sdh 		pwp->ds_err_recovering = 0;
486145e0143Sdh 		mutex_exit(&pwp->lock);
487145e0143Sdh 	}
488b0e5d1e5SSrikanth, Ramana 
489b0e5d1e5SSrikanth, Ramana 	if (reschedule) {
490b0e5d1e5SSrikanth, Ramana 		SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
491b0e5d1e5SSrikanth, Ramana 	}
492145e0143Sdh }
493145e0143Sdh 
494145e0143Sdh /*
495145e0143Sdh  * Called with target's statlock held (if target is non-NULL) and PHY lock held.
496145e0143Sdh  */
497145e0143Sdh int
pmcs_send_err_recovery_cmd(pmcs_hw_t * pwp,uint8_t dev_state,pmcs_phy_t * phyp,pmcs_xscsi_t * tgt)498145e0143Sdh pmcs_send_err_recovery_cmd(pmcs_hw_t *pwp, uint8_t dev_state, pmcs_phy_t *phyp,
499145e0143Sdh     pmcs_xscsi_t *tgt)
500145e0143Sdh {
501145e0143Sdh 	int rc = -1;
502145e0143Sdh 	uint8_t tgt_dev_state = PMCS_DEVICE_STATE_NOT_AVAILABLE;
503145e0143Sdh 
504145e0143Sdh 	if (tgt != NULL) {
505145e0143Sdh 		ASSERT(mutex_owned(&tgt->statlock));
506145e0143Sdh 		if (tgt->recovering) {
507145e0143Sdh 			return (0);
508145e0143Sdh 		}
509145e0143Sdh 
510145e0143Sdh 		tgt->recovering = 1;
511145e0143Sdh 		tgt_dev_state = tgt->dev_state;
512145e0143Sdh 	}
513145e0143Sdh 
514145e0143Sdh 	if (phyp == NULL) {
515145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, NULL, tgt,
516145e0143Sdh 		    "%s: PHY is NULL", __func__);
517145e0143Sdh 		return (-1);
518145e0143Sdh 	}
519145e0143Sdh 
520145e0143Sdh 	ASSERT(mutex_owned(&phyp->phy_lock));
521145e0143Sdh 
522145e0143Sdh 	pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
523145e0143Sdh 	    "%s: ds: 0x%x, tgt ds(0x%x)", __func__, dev_state, tgt_dev_state);
524145e0143Sdh 
525145e0143Sdh 	switch (dev_state) {
526145e0143Sdh 	case PMCS_DEVICE_STATE_IN_RECOVERY:
527145e0143Sdh 		if (tgt_dev_state == PMCS_DEVICE_STATE_IN_RECOVERY) {
528145e0143Sdh 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
529145e0143Sdh 			    "%s: Target 0x%p already IN_RECOVERY", __func__,
530145e0143Sdh 			    (void *)tgt);
531145e0143Sdh 			rc = 0;	/* This is not an error */
532145e0143Sdh 			goto no_action;
533145e0143Sdh 		}
534145e0143Sdh 
535145e0143Sdh 		rc = pmcs_set_dev_state(pwp, phyp, tgt,
536145e0143Sdh 		    PMCS_DEVICE_STATE_IN_RECOVERY);
537145e0143Sdh 		if (rc != 0) {
538145e0143Sdh 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
539145e0143Sdh 			    "%s(1): Failed to set tgt(0x%p) to IN_RECOVERY",
540145e0143Sdh 			    __func__, (void *)tgt);
541145e0143Sdh 		}
542145e0143Sdh 
543145e0143Sdh 		break;
544145e0143Sdh 
545145e0143Sdh 	case PMCS_DEVICE_STATE_OPERATIONAL:
546145e0143Sdh 		if (tgt_dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) {
547145e0143Sdh 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
548145e0143Sdh 			    "%s: Target 0x%p not ready to go OPERATIONAL",
549145e0143Sdh 			    __func__, (void *)tgt);
550145e0143Sdh 			goto no_action;
551145e0143Sdh 		}
552145e0143Sdh 
553145e0143Sdh 		rc = pmcs_set_dev_state(pwp, phyp, tgt,
554145e0143Sdh 		    PMCS_DEVICE_STATE_OPERATIONAL);
555145e0143Sdh 		if (tgt != NULL) {
556145e0143Sdh 			tgt->reset_success = 1;
557145e0143Sdh 		}
558145e0143Sdh 		if (rc != 0) {
559145e0143Sdh 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
560145e0143Sdh 			    "%s(2): Failed to SET tgt(0x%p) to OPERATIONAL",
561145e0143Sdh 			    __func__, (void *)tgt);
562145e0143Sdh 			if (tgt != NULL) {
563145e0143Sdh 				tgt->reset_success = 0;
564145e0143Sdh 			}
565145e0143Sdh 		}
566145e0143Sdh 
567145e0143Sdh 		break;
568145e0143Sdh 
569145e0143Sdh 	case PMCS_DEVICE_STATE_NON_OPERATIONAL:
570145e0143Sdh 		PHY_CHANGED(pwp, phyp);
571145e0143Sdh 		RESTART_DISCOVERY(pwp);
572145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
573145e0143Sdh 		    "%s: Device at %s is non-operational",
574145e0143Sdh 		    __func__, phyp->path);
575145e0143Sdh 		if (tgt != NULL) {
576145e0143Sdh 			tgt->dev_state = PMCS_DEVICE_STATE_NON_OPERATIONAL;
577145e0143Sdh 		}
578145e0143Sdh 		rc = 0;
579145e0143Sdh 
580145e0143Sdh 		break;
581145e0143Sdh 
582145e0143Sdh 	default:
583145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
584145e0143Sdh 		    "%s: Invalid state requested (%d)", __func__,
585145e0143Sdh 		    dev_state);
586145e0143Sdh 		break;
587145e0143Sdh 
588145e0143Sdh 	}
589145e0143Sdh 
590145e0143Sdh no_action:
591145e0143Sdh 	if (tgt != NULL) {
592145e0143Sdh 		tgt->recovering = 0;
593145e0143Sdh 	}
594145e0143Sdh 	return (rc);
595145e0143Sdh }
596145e0143Sdh 
597145e0143Sdh /*
598145e0143Sdh  * Start ssp event recovery. We have to schedule recovery operation because
599145e0143Sdh  * it involves sending multiple commands to device and we should not do it
600145e0143Sdh  * in the interrupt context.
601145e0143Sdh  * If it is failure of a recovery command, let the recovery thread deal with it.
602225bf905SJesse Butler  * Called with the work lock held.
603145e0143Sdh  */
604145e0143Sdh void
pmcs_start_ssp_event_recovery(pmcs_hw_t * pwp,pmcwork_t * pwrk,uint32_t * iomb,size_t amt)605145e0143Sdh pmcs_start_ssp_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb,
606145e0143Sdh     size_t amt)
607145e0143Sdh {
608145e0143Sdh 	pmcs_xscsi_t *tgt = pwrk->xp;
609145e0143Sdh 	uint32_t event = LE_32(iomb[2]);
610145e0143Sdh 	pmcs_phy_t *pptr = pwrk->phy;
611658280b6SDavid Hollister 	pmcs_cb_t callback;
612145e0143Sdh 	uint32_t tag;
613145e0143Sdh 
614145e0143Sdh 	if (tgt != NULL) {
615145e0143Sdh 		mutex_enter(&tgt->statlock);
616145e0143Sdh 		if (!tgt->assigned) {
617145e0143Sdh 			if (pptr) {
618145e0143Sdh 				pmcs_dec_phy_ref_count(pptr);
619145e0143Sdh 			}
620145e0143Sdh 			pptr = NULL;
621145e0143Sdh 			pwrk->phy = NULL;
622145e0143Sdh 		}
623145e0143Sdh 		mutex_exit(&tgt->statlock);
624145e0143Sdh 	}
62556976565SDavid Hollister 
626145e0143Sdh 	if (pptr == NULL) {
627145e0143Sdh 		/*
628145e0143Sdh 		 * No target, need to run RE-DISCOVERY here.
629145e0143Sdh 		 */
630145e0143Sdh 		if (pwrk->state != PMCS_WORK_STATE_TIMED_OUT) {
631145e0143Sdh 			pwrk->state = PMCS_WORK_STATE_INTR;
632145e0143Sdh 		}
633145e0143Sdh 		/*
634145e0143Sdh 		 * Although we cannot mark phy to force abort nor mark phy
635145e0143Sdh 		 * as changed, killing of a target would take care of aborting
636145e0143Sdh 		 * commands for the device.
637145e0143Sdh 		 */
638145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
639145e0143Sdh 		    "%s: No valid target for event processing. Reconfigure.",
640145e0143Sdh 		    __func__);
641145e0143Sdh 		pmcs_pwork(pwp, pwrk);
642145e0143Sdh 		RESTART_DISCOVERY(pwp);
643145e0143Sdh 		return;
644145e0143Sdh 	} else {
645225bf905SJesse Butler 		/* We have a phy pointer, we'll need to lock it */
646225bf905SJesse Butler 		mutex_exit(&pwrk->lock);
647145e0143Sdh 		pmcs_lock_phy(pptr);
648225bf905SJesse Butler 		mutex_enter(&pwrk->lock);
6493be32c0fSJesse Butler 		if (tgt != NULL) {
65056976565SDavid Hollister 			mutex_enter(&tgt->statlock);
65156976565SDavid Hollister 		}
652145e0143Sdh 		if (event == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS) {
6533be32c0fSJesse Butler 			if ((tgt != NULL) && (tgt->dev_state !=
6543be32c0fSJesse Butler 			    PMCS_DEVICE_STATE_NON_OPERATIONAL)) {
655145e0143Sdh 				pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
656145e0143Sdh 				    "%s: Device at %s is non-operational",
657145e0143Sdh 				    __func__, pptr->path);
658145e0143Sdh 				tgt->dev_state =
659145e0143Sdh 				    PMCS_DEVICE_STATE_NON_OPERATIONAL;
660145e0143Sdh 			}
661145e0143Sdh 			pptr->abort_pending = 1;
6623be32c0fSJesse Butler 			if (tgt != NULL) {
66356976565SDavid Hollister 				mutex_exit(&tgt->statlock);
66456976565SDavid Hollister 			}
665145e0143Sdh 			mutex_exit(&pwrk->lock);
666225bf905SJesse Butler 			pmcs_unlock_phy(pptr);
667145e0143Sdh 			SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
668145e0143Sdh 			RESTART_DISCOVERY(pwp);
669145e0143Sdh 			return;
670145e0143Sdh 		}
671145e0143Sdh 
672145e0143Sdh 		/*
673145e0143Sdh 		 * If this command is run in WAIT mode, it is a failing recovery
674145e0143Sdh 		 * command. If so, just wake up recovery thread waiting for
675145e0143Sdh 		 * command completion.
676145e0143Sdh 		 */
677145e0143Sdh 		tag = PMCS_TAG_TYPE(pwrk->htag);
678145e0143Sdh 		if (tag == PMCS_TAG_TYPE_WAIT) {
679145e0143Sdh 			pwrk->htag |= PMCS_TAG_DONE;
680145e0143Sdh 			if (pwrk->arg && amt) {
681145e0143Sdh 				(void) memcpy(pwrk->arg, iomb, amt);
682145e0143Sdh 			}
683145e0143Sdh 			cv_signal(&pwrk->sleep_cv);
6843be32c0fSJesse Butler 			if (tgt != NULL) {
68556976565SDavid Hollister 				mutex_exit(&tgt->statlock);
68656976565SDavid Hollister 			}
687225bf905SJesse Butler 			mutex_exit(&pwrk->lock);
688145e0143Sdh 			pmcs_unlock_phy(pptr);
689145e0143Sdh 			return;
690145e0143Sdh 		}
691145e0143Sdh 
6923be32c0fSJesse Butler 		if (tgt == NULL) {
69356976565SDavid Hollister 			pmcs_prt(pwp, PMCS_PRT_DEBUG1, pptr, NULL,
69456976565SDavid Hollister 			    "%s: Not scheduling SSP event recovery for NULL tgt"
69556976565SDavid Hollister 			    " pwrk(%p) tag(0x%x)", __func__, (void *)pwrk,
69656976565SDavid Hollister 			    pwrk->htag);
697225bf905SJesse Butler 			mutex_exit(&pwrk->lock);
698225bf905SJesse Butler 			pmcs_unlock_phy(pptr);
69956976565SDavid Hollister 			return;
70056976565SDavid Hollister 		}
70156976565SDavid Hollister 
702658280b6SDavid Hollister 		/*
703658280b6SDavid Hollister 		 * If the SSP event was an OPEN_RETRY_TIMEOUT, we don't want
704658280b6SDavid Hollister 		 * to go through the recovery (abort/LU reset) process.
705658280b6SDavid Hollister 		 * Simply complete the command and return it as STATUS_BUSY.
706658280b6SDavid Hollister 		 * This will cause the target driver to simply retry.
707658280b6SDavid Hollister 		 */
708658280b6SDavid Hollister 		if (event == PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT) {
709658280b6SDavid Hollister 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
710658280b6SDavid Hollister 			    "%s: Got OPEN_RETRY_TIMEOUT event (htag 0x%08x)",
711658280b6SDavid Hollister 			    __func__, pwrk->htag);
712658280b6SDavid Hollister 
713658280b6SDavid Hollister 			mutex_exit(&tgt->statlock);
714225bf905SJesse Butler 			/* Note: work remains locked for the callback */
715658280b6SDavid Hollister 			pmcs_unlock_phy(pptr);
716658280b6SDavid Hollister 			pwrk->ssp_event = event;
717658280b6SDavid Hollister 			callback = (pmcs_cb_t)pwrk->ptr;
718658280b6SDavid Hollister 			(*callback)(pwp, pwrk, iomb);
719658280b6SDavid Hollister 			return;
720658280b6SDavid Hollister 		}
721658280b6SDavid Hollister 
722145e0143Sdh 		/*
723145e0143Sdh 		 * To recover from primary failures,
724145e0143Sdh 		 * we need to schedule handling events recovery.
725145e0143Sdh 		 */
726145e0143Sdh 		tgt->event_recovery = 1;
727145e0143Sdh 		mutex_exit(&tgt->statlock);
728145e0143Sdh 		pwrk->ssp_event = event;
729225bf905SJesse Butler 		mutex_exit(&pwrk->lock);
730225bf905SJesse Butler 		pmcs_unlock_phy(pptr);
731145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
732145e0143Sdh 		    "%s: Scheduling SSP event recovery for tgt(0x%p) "
733145e0143Sdh 		    "pwrk(%p) tag(0x%x)", __func__, (void *)tgt, (void *)pwrk,
734145e0143Sdh 		    pwrk->htag);
735145e0143Sdh 		SCHEDULE_WORK(pwp, PMCS_WORK_SSP_EVT_RECOVERY);
736145e0143Sdh 	}
737145e0143Sdh 
738145e0143Sdh 	/* Work cannot be completed until event recovery is completed. */
739145e0143Sdh }
740145e0143Sdh 
741145e0143Sdh /*
742145e0143Sdh  * SSP target event recovery
743*219ebc8eSSrikanth Suravajhala  * phy->lock should be held upon entry.
744*219ebc8eSSrikanth Suravajhala  * pwrk->lock should be held upon entry and gets released by this routine.
745*219ebc8eSSrikanth Suravajhala  * tgt->statlock should not be held.
746145e0143Sdh  */
747145e0143Sdh void
pmcs_tgt_event_recovery(pmcs_hw_t * pwp,pmcwork_t * pwrk)748145e0143Sdh pmcs_tgt_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk)
749145e0143Sdh {
750145e0143Sdh 	pmcs_phy_t *pptr = pwrk->phy;
751145e0143Sdh 	pmcs_cmd_t *sp = pwrk->arg;
752145e0143Sdh 	pmcs_lun_t *lun = sp->cmd_lun;
753145e0143Sdh 	pmcs_xscsi_t *tgt = pwrk->xp;
754145e0143Sdh 	uint32_t event;
755145e0143Sdh 	uint32_t htag;
756145e0143Sdh 	uint32_t status;
757145e0143Sdh 	int rv;
758145e0143Sdh 
759145e0143Sdh 	ASSERT(pwrk->arg != NULL);
760145e0143Sdh 	ASSERT(pwrk->xp != NULL);
761145e0143Sdh 	pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
762145e0143Sdh 	    "%s: event recovery for target 0x%p", __func__, (void *)pwrk->xp);
763145e0143Sdh 	htag = pwrk->htag;
764145e0143Sdh 	event = pwrk->ssp_event;
765145e0143Sdh 	pwrk->ssp_event = 0xffffffff;
766658280b6SDavid Hollister 
767*219ebc8eSSrikanth Suravajhala 	mutex_exit(&pwrk->lock);
768*219ebc8eSSrikanth Suravajhala 
769145e0143Sdh 	if (event == PMCOUT_STATUS_XFER_ERR_BREAK ||
770145e0143Sdh 	    event == PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY ||
771145e0143Sdh 	    event == PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT) {
772145e0143Sdh 		/* Command may be still pending on device */
773145e0143Sdh 		rv = pmcs_ssp_tmf(pwp, pptr, SAS_QUERY_TASK, htag,
774145e0143Sdh 		    lun->lun_num, &status);
775145e0143Sdh 		if (rv != 0) {
776145e0143Sdh 			goto out;
777145e0143Sdh 		}
778145e0143Sdh 		if (status == SAS_RSP_TMF_COMPLETE) {
779145e0143Sdh 			/* Command NOT pending on a device */
780145e0143Sdh 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
781145e0143Sdh 			    "%s: No pending command for tgt 0x%p",
782145e0143Sdh 			    __func__, (void *)tgt);
783145e0143Sdh 			/* Nothing more to do, just abort it on chip */
784145e0143Sdh 			htag = 0;
785145e0143Sdh 		}
786145e0143Sdh 	}
787145e0143Sdh 	/*
788145e0143Sdh 	 * All other events left the command pending in the host
789145e0143Sdh 	 * Send abort task and abort it on the chip
790145e0143Sdh 	 */
791145e0143Sdh 	if (htag != 0) {
792145e0143Sdh 		if (pmcs_ssp_tmf(pwp, pptr, SAS_ABORT_TASK, htag,
793145e0143Sdh 		    lun->lun_num, &status))
794145e0143Sdh 			goto out;
795145e0143Sdh 	}
796*219ebc8eSSrikanth Suravajhala 	(void) pmcs_abort(pwp, pptr, htag, 0, 1);
797145e0143Sdh 	/*
798145e0143Sdh 	 * Abort either took care of work completion, or put device in
799145e0143Sdh 	 * a recovery state
800145e0143Sdh 	 */
801145e0143Sdh 	return;
802145e0143Sdh out:
803145e0143Sdh 	/* Abort failed, do full device recovery */
804*219ebc8eSSrikanth Suravajhala 	mutex_enter(&pwrk->lock);
805*219ebc8eSSrikanth Suravajhala 	tgt = pwrk->xp;
806*219ebc8eSSrikanth Suravajhala 	mutex_exit(&pwrk->lock);
807*219ebc8eSSrikanth Suravajhala 	if (tgt != NULL) {
808*219ebc8eSSrikanth Suravajhala 		mutex_enter(&tgt->statlock);
809*219ebc8eSSrikanth Suravajhala 		pmcs_start_dev_state_recovery(tgt, pptr);
810*219ebc8eSSrikanth Suravajhala 		mutex_exit(&tgt->statlock);
811145e0143Sdh 	}
812145e0143Sdh }
813145e0143Sdh 
814145e0143Sdh /*
815145e0143Sdh  * SSP event recovery task.
816145e0143Sdh  */
817145e0143Sdh void
pmcs_ssp_event_recovery(pmcs_hw_t * pwp)818145e0143Sdh pmcs_ssp_event_recovery(pmcs_hw_t *pwp)
819145e0143Sdh {
820145e0143Sdh 	int idx;
821145e0143Sdh 	pmcs_xscsi_t *tgt;
822145e0143Sdh 	pmcs_cmd_t *cp;
823145e0143Sdh 	pmcwork_t *pwrk;
824145e0143Sdh 	pmcs_phy_t *pphy;
825145e0143Sdh 	int er_flag;
826145e0143Sdh 	uint32_t idxpwrk;
827145e0143Sdh 
828145e0143Sdh restart:
829145e0143Sdh 	for (idx = 0; idx < pwp->max_dev; idx++) {
830145e0143Sdh 		mutex_enter(&pwp->lock);
831145e0143Sdh 		tgt = pwp->targets[idx];
832145e0143Sdh 		mutex_exit(&pwp->lock);
833601c90f1SSrikanth, Ramana 		if (tgt == NULL) {
834601c90f1SSrikanth, Ramana 			continue;
835601c90f1SSrikanth, Ramana 		}
836601c90f1SSrikanth, Ramana 
837601c90f1SSrikanth, Ramana 		mutex_enter(&tgt->statlock);
838601c90f1SSrikanth, Ramana 		if (!tgt->assigned) {
839145e0143Sdh 			mutex_exit(&tgt->statlock);
840601c90f1SSrikanth, Ramana 			continue;
841601c90f1SSrikanth, Ramana 		}
842601c90f1SSrikanth, Ramana 		pphy = tgt->phy;
843601c90f1SSrikanth, Ramana 		er_flag = tgt->event_recovery;
844601c90f1SSrikanth, Ramana 		mutex_exit(&tgt->statlock);
845601c90f1SSrikanth, Ramana 
846601c90f1SSrikanth, Ramana 		if ((pphy == NULL) || (er_flag == 0)) {
847601c90f1SSrikanth, Ramana 			continue;
848601c90f1SSrikanth, Ramana 		}
849601c90f1SSrikanth, Ramana 
850601c90f1SSrikanth, Ramana 		pmcs_lock_phy(pphy);
851601c90f1SSrikanth, Ramana 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt,
852601c90f1SSrikanth, Ramana 		    "%s: found target(0x%p)", __func__, (void *) tgt);
853601c90f1SSrikanth, Ramana 
854601c90f1SSrikanth, Ramana 		/* Check what cmd expects recovery */
855601c90f1SSrikanth, Ramana 		mutex_enter(&tgt->aqlock);
856601c90f1SSrikanth, Ramana 		STAILQ_FOREACH(cp, &tgt->aq, cmd_next) {
857601c90f1SSrikanth, Ramana 			idxpwrk = PMCS_TAG_INDEX(cp->cmd_tag);
858601c90f1SSrikanth, Ramana 			pwrk = &pwp->work[idxpwrk];
859*219ebc8eSSrikanth Suravajhala 			mutex_enter(&pwrk->lock);
860601c90f1SSrikanth, Ramana 			if (pwrk->htag != cp->cmd_tag) {
861601c90f1SSrikanth, Ramana 				/*
862601c90f1SSrikanth, Ramana 				 * aq may contain TMF commands, so we
863601c90f1SSrikanth, Ramana 				 * may not find work structure with htag
864601c90f1SSrikanth, Ramana 				 */
865*219ebc8eSSrikanth Suravajhala 				mutex_exit(&pwrk->lock);
866*219ebc8eSSrikanth Suravajhala 				continue;
867601c90f1SSrikanth, Ramana 			}
868*219ebc8eSSrikanth Suravajhala 			if (!PMCS_COMMAND_DONE(pwrk) &&
869*219ebc8eSSrikanth Suravajhala 			    (pwrk->ssp_event != 0) &&
870601c90f1SSrikanth, Ramana 			    (pwrk->ssp_event != PMCS_REC_EVENT)) {
871145e0143Sdh 				pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt,
872601c90f1SSrikanth, Ramana 				    "%s: pwrk(%p) htag(0x%x)",
873601c90f1SSrikanth, Ramana 				    __func__, (void *) pwrk, cp->cmd_tag);
874145e0143Sdh 				mutex_exit(&tgt->aqlock);
875601c90f1SSrikanth, Ramana 				/*
876*219ebc8eSSrikanth Suravajhala 				 * pwrk->lock gets dropped in
877*219ebc8eSSrikanth Suravajhala 				 * pmcs_tgt_event_recovery()
878601c90f1SSrikanth, Ramana 				 */
879*219ebc8eSSrikanth Suravajhala 				pmcs_tgt_event_recovery(pwp, pwrk);
880145e0143Sdh 				pmcs_unlock_phy(pphy);
881*219ebc8eSSrikanth Suravajhala 				/* All bets are off on tgt/aq now, restart */
882601c90f1SSrikanth, Ramana 				goto restart;
883145e0143Sdh 			}
884*219ebc8eSSrikanth Suravajhala 			mutex_exit(&pwrk->lock);
885145e0143Sdh 		}
886601c90f1SSrikanth, Ramana 		mutex_exit(&tgt->aqlock);
887*219ebc8eSSrikanth Suravajhala 		mutex_enter(&tgt->statlock);
888601c90f1SSrikanth, Ramana 		tgt->event_recovery = 0;
889601c90f1SSrikanth, Ramana 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt,
890601c90f1SSrikanth, Ramana 		    "%s: end of SSP event recovery for target(0x%p)",
891601c90f1SSrikanth, Ramana 		    __func__, (void *) tgt);
892601c90f1SSrikanth, Ramana 		mutex_exit(&tgt->statlock);
893601c90f1SSrikanth, Ramana 		pmcs_unlock_phy(pphy);
894145e0143Sdh 	}
895145e0143Sdh 	pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
896145e0143Sdh 	    "%s: end of SSP event recovery for pwp(0x%p)", __func__,
897145e0143Sdh 	    (void *) pwp);
898145e0143Sdh }
899145e0143Sdh 
900145e0143Sdh void
pmcs_start_dev_state_recovery(pmcs_xscsi_t * xp,pmcs_phy_t * phyp)901145e0143Sdh pmcs_start_dev_state_recovery(pmcs_xscsi_t *xp, pmcs_phy_t *phyp)
902145e0143Sdh {
903145e0143Sdh 	ASSERT(mutex_owned(&xp->statlock));
904145e0143Sdh 	ASSERT(xp->pwp != NULL);
905145e0143Sdh 
906145e0143Sdh 	if (xp->recover_wait == 0) {
907145e0143Sdh 		pmcs_prt(xp->pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
908145e0143Sdh 		    "%s: Start ds_recovery for tgt 0x%p/PHY 0x%p (%s)",
909145e0143Sdh 		    __func__, (void *)xp, (void *)phyp, phyp->path);
910145e0143Sdh 		xp->recover_wait = 1;
911145e0143Sdh 
912145e0143Sdh 		/*
913145e0143Sdh 		 * Rather than waiting for the watchdog timer, we'll
914145e0143Sdh 		 * kick it right now.
915145e0143Sdh 		 */
916145e0143Sdh 		SCHEDULE_WORK(xp->pwp, PMCS_WORK_DS_ERR_RECOVERY);
917145e0143Sdh 		(void) ddi_taskq_dispatch(xp->pwp->tq, pmcs_worker, xp->pwp,
918145e0143Sdh 		    DDI_NOSLEEP);
919145e0143Sdh 	}
920145e0143Sdh }
921145e0143Sdh 
922145e0143Sdh /*
923145e0143Sdh  * Increment the phy ds error retry count.
924145e0143Sdh  * If too many retries, mark phy dead and restart discovery;
925145e0143Sdh  * otherwise schedule ds recovery.
926145e0143Sdh  */
927145e0143Sdh static void
pmcs_handle_ds_recovery_error(pmcs_phy_t * phyp,pmcs_xscsi_t * tgt,pmcs_hw_t * pwp,const char * func_name,char * reason_string)928145e0143Sdh pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp, pmcs_xscsi_t *tgt,
929601c90f1SSrikanth, Ramana     pmcs_hw_t *pwp, const char *func_name, char *reason_string)
930145e0143Sdh {
931145e0143Sdh 	ASSERT(mutex_owned(&phyp->phy_lock));
932145e0143Sdh 	ASSERT((tgt == NULL) || mutex_owned(&tgt->statlock));
933145e0143Sdh 
934145e0143Sdh 	phyp->ds_recovery_retries++;
935145e0143Sdh 
936145e0143Sdh 	if (phyp->ds_recovery_retries > PMCS_MAX_DS_RECOVERY_RETRIES) {
937145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, tgt,
938145e0143Sdh 		    "%s: retry limit reached after %s to PHY %s failed",
939145e0143Sdh 		    func_name, reason_string, phyp->path);
940145e0143Sdh 		if (tgt != NULL) {
941145e0143Sdh 			tgt->recover_wait = 0;
942145e0143Sdh 		}
943601c90f1SSrikanth, Ramana 		/*
944601c90f1SSrikanth, Ramana 		 * Mark the PHY as dead and it and its parent as changed,
945601c90f1SSrikanth, Ramana 		 * then restart discovery
946601c90f1SSrikanth, Ramana 		 */
947145e0143Sdh 		phyp->dead = 1;
948601c90f1SSrikanth, Ramana 		PHY_CHANGED(pwp, phyp);
949601c90f1SSrikanth, Ramana 		if (phyp->parent)
950601c90f1SSrikanth, Ramana 			PHY_CHANGED(pwp, phyp->parent);
951145e0143Sdh 		RESTART_DISCOVERY(pwp);
952145e0143Sdh 	} else if ((phyp->ds_prev_good_recoveries >
953145e0143Sdh 	    PMCS_MAX_DS_RECOVERY_RETRIES) &&
954145e0143Sdh 	    (phyp->last_good_recovery + drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME)
955145e0143Sdh 	    < ddi_get_lbolt())) {
956145e0143Sdh 		pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, tgt, "%s: max number of "
957145e0143Sdh 		    "successful recoveries reached, declaring PHY %s dead",
958145e0143Sdh 		    __func__, phyp->path);
959145e0143Sdh 		if (tgt != NULL) {
960145e0143Sdh 			tgt->recover_wait = 0;
961145e0143Sdh 		}
962601c90f1SSrikanth, Ramana 		/*
963601c90f1SSrikanth, Ramana 		 * Mark the PHY as dead and its parent as changed,
964601c90f1SSrikanth, Ramana 		 * then restart discovery
965601c90f1SSrikanth, Ramana 		 */
966145e0143Sdh 		phyp->dead = 1;
967601c90f1SSrikanth, Ramana 		PHY_CHANGED(pwp, phyp);
968601c90f1SSrikanth, Ramana 		if (phyp->parent)
969601c90f1SSrikanth, Ramana 			PHY_CHANGED(pwp, phyp->parent);
970145e0143Sdh 		RESTART_DISCOVERY(pwp);
971145e0143Sdh 	} else {
972145e0143Sdh 		SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
973145e0143Sdh 	}
974145e0143Sdh }
975