1145e0143Sdh /*
2145e0143Sdh * CDDL HEADER START
3145e0143Sdh *
4145e0143Sdh * The contents of this file are subject to the terms of the
5145e0143Sdh * Common Development and Distribution License (the "License").
6145e0143Sdh * You may not use this file except in compliance with the License.
7145e0143Sdh *
8145e0143Sdh * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9145e0143Sdh * or http://www.opensolaris.org/os/licensing.
10145e0143Sdh * See the License for the specific language governing permissions
11145e0143Sdh * and limitations under the License.
12145e0143Sdh *
13145e0143Sdh * When distributing Covered Code, include this CDDL HEADER in each
14145e0143Sdh * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15145e0143Sdh * If applicable, add the following below this CDDL HEADER, with the
16145e0143Sdh * fields enclosed by brackets "[]" replaced with your own identifying
17145e0143Sdh * information: Portions Copyright [yyyy] [name of copyright owner]
18145e0143Sdh *
19145e0143Sdh * CDDL HEADER END
20658280b6SDavid Hollister */
21658280b6SDavid Hollister /*
22658280b6SDavid Hollister * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23145e0143Sdh */
24145e0143Sdh
25145e0143Sdh /*
26145e0143Sdh * PM8001 device state recovery routines
27145e0143Sdh */
28145e0143Sdh
29145e0143Sdh #include <sys/scsi/adapters/pmcs/pmcs.h>
30145e0143Sdh
31145e0143Sdh /*
32145e0143Sdh * SAS Topology Configuration
33145e0143Sdh */
346745c559SJesse Butler static void pmcs_ds_operational(pmcs_phy_t *pptr, pmcs_xscsi_t *tgt);
35145e0143Sdh static void pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp,
36601c90f1SSrikanth, Ramana pmcs_xscsi_t *tgt, pmcs_hw_t *pwp, const char *func_name,
37145e0143Sdh char *reason_string);
38145e0143Sdh
39145e0143Sdh /*
40145e0143Sdh * Get device state. Called with statlock and PHY lock held.
41145e0143Sdh */
42145e0143Sdh static int
pmcs_get_dev_state(pmcs_hw_t * pwp,pmcs_phy_t * phyp,pmcs_xscsi_t * xp,uint8_t * ds)43145e0143Sdh pmcs_get_dev_state(pmcs_hw_t *pwp, pmcs_phy_t *phyp, pmcs_xscsi_t *xp,
44145e0143Sdh uint8_t *ds)
45145e0143Sdh {
46145e0143Sdh uint32_t htag, *ptr, msg[PMCS_MSG_SIZE];
47145e0143Sdh int result;
48145e0143Sdh struct pmcwork *pwrk;
49145e0143Sdh
50145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG3, phyp, xp, "%s: tgt(0x%p)", __func__,
51145e0143Sdh (void *)xp);
52145e0143Sdh
53145e0143Sdh if (xp != NULL) {
54145e0143Sdh ASSERT(mutex_owned(&xp->statlock));
55145e0143Sdh }
56f96f3b56SSrikanth, Ramana
57f96f3b56SSrikanth, Ramana if (phyp == NULL) {
58f96f3b56SSrikanth, Ramana pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, NULL, xp,
59f96f3b56SSrikanth, Ramana "%s: PHY is NULL", __func__);
60f96f3b56SSrikanth, Ramana return (-1);
61f96f3b56SSrikanth, Ramana }
62145e0143Sdh ASSERT(mutex_owned(&phyp->phy_lock));
63145e0143Sdh
64145e0143Sdh pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp);
65145e0143Sdh if (pwrk == NULL) {
66145e0143Sdh pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nowrk, __func__);
67145e0143Sdh return (-1);
68145e0143Sdh }
69145e0143Sdh pwrk->arg = msg;
70145e0143Sdh pwrk->dtype = phyp->dtype;
71145e0143Sdh
72145e0143Sdh if (phyp->valid_device_id == 0) {
73145e0143Sdh pmcs_pwork(pwp, pwrk);
74145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, xp,
75145e0143Sdh "%s: Invalid DeviceID", __func__);
76145e0143Sdh return (-1);
77145e0143Sdh }
78145e0143Sdh htag = pwrk->htag;
79145e0143Sdh msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
80145e0143Sdh PMCIN_GET_DEVICE_STATE));
81145e0143Sdh msg[1] = LE_32(pwrk->htag);
82145e0143Sdh msg[2] = LE_32(phyp->device_id);
83601c90f1SSrikanth, Ramana CLEAN_MESSAGE(msg, 3);
84145e0143Sdh
85145e0143Sdh mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
86145e0143Sdh ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
87145e0143Sdh if (ptr == NULL) {
88145e0143Sdh mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
89145e0143Sdh pmcs_pwork(pwp, pwrk);
90145e0143Sdh pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nomsg, __func__);
91145e0143Sdh return (-1);
92145e0143Sdh }
93145e0143Sdh COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
94145e0143Sdh pwrk->state = PMCS_WORK_STATE_ONCHIP;
95145e0143Sdh INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
96145e0143Sdh
97145e0143Sdh if (xp != NULL) {
98145e0143Sdh mutex_exit(&xp->statlock);
99145e0143Sdh }
100145e0143Sdh pmcs_unlock_phy(phyp);
101145e0143Sdh WAIT_FOR(pwrk, 1000, result);
102145e0143Sdh pmcs_pwork(pwp, pwrk);
1033be32c0fSJesse Butler pmcs_lock_phy(phyp);
104145e0143Sdh
105145e0143Sdh if (xp != NULL) {
106145e0143Sdh mutex_enter(&xp->statlock);
107145e0143Sdh }
108145e0143Sdh
109145e0143Sdh if (result) {
110145e0143Sdh pmcs_timed_out(pwp, htag, __func__);
111145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, xp,
112145e0143Sdh "%s: cmd timed out, returning", __func__);
113145e0143Sdh return (-1);
114145e0143Sdh }
115145e0143Sdh if (LE_32(msg[2]) == 0) {
116145e0143Sdh *ds = (uint8_t)(LE_32(msg[4]));
117145e0143Sdh if (xp == NULL) {
118145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
119145e0143Sdh "%s: retrieved_ds=0x%x", __func__, *ds);
120145e0143Sdh } else if (*ds != xp->dev_state) {
121145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
122145e0143Sdh "%s: retrieved_ds=0x%x, target_ds=0x%x", __func__,
123145e0143Sdh *ds, xp->dev_state);
124145e0143Sdh }
125145e0143Sdh return (0);
126145e0143Sdh } else {
127145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
128145e0143Sdh "%s: cmd failed Status(0x%x), returning ", __func__,
129145e0143Sdh LE_32(msg[2]));
130145e0143Sdh return (-1);
131145e0143Sdh }
132145e0143Sdh }
133145e0143Sdh
134145e0143Sdh /*
135145e0143Sdh * Set device state. Called with target's statlock and PHY lock held.
136145e0143Sdh */
137145e0143Sdh static int
pmcs_set_dev_state(pmcs_hw_t * pwp,pmcs_phy_t * phyp,pmcs_xscsi_t * xp,uint8_t ds)138145e0143Sdh pmcs_set_dev_state(pmcs_hw_t *pwp, pmcs_phy_t *phyp, pmcs_xscsi_t *xp,
139145e0143Sdh uint8_t ds)
140145e0143Sdh {
141145e0143Sdh uint32_t htag, *ptr, msg[PMCS_MSG_SIZE];
142145e0143Sdh int result;
143145e0143Sdh uint8_t pds, nds;
144145e0143Sdh struct pmcwork *pwrk;
145145e0143Sdh
146145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
147145e0143Sdh "%s: ds: 0x%x tgt: 0x%p phy: 0x%p", __func__, ds, (void *)xp,
148145e0143Sdh (void *)phyp);
149145e0143Sdh
150145e0143Sdh if (phyp == NULL) {
151145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, NULL, xp,
152145e0143Sdh "%s: PHY is NULL", __func__);
153145e0143Sdh return (-1);
154145e0143Sdh }
155145e0143Sdh
156145e0143Sdh pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp);
157145e0143Sdh if (pwrk == NULL) {
158145e0143Sdh pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nowrk, __func__);
159145e0143Sdh return (-1);
160145e0143Sdh }
161145e0143Sdh if (phyp->valid_device_id == 0) {
162145e0143Sdh pmcs_pwork(pwp, pwrk);
163145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
164145e0143Sdh "%s: Invalid DeviceID", __func__);
165145e0143Sdh return (-1);
166145e0143Sdh }
167145e0143Sdh pwrk->arg = msg;
168145e0143Sdh pwrk->dtype = phyp->dtype;
169145e0143Sdh htag = pwrk->htag;
170145e0143Sdh msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
171145e0143Sdh PMCIN_SET_DEVICE_STATE));
172145e0143Sdh msg[1] = LE_32(pwrk->htag);
173145e0143Sdh msg[2] = LE_32(phyp->device_id);
174145e0143Sdh msg[3] = LE_32(ds);
175601c90f1SSrikanth, Ramana CLEAN_MESSAGE(msg, 4);
176145e0143Sdh
177145e0143Sdh mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
178145e0143Sdh ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
179145e0143Sdh if (ptr == NULL) {
180145e0143Sdh mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
181145e0143Sdh pmcs_pwork(pwp, pwrk);
182145e0143Sdh pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nomsg, __func__);
183145e0143Sdh return (-1);
184145e0143Sdh }
185145e0143Sdh COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
186145e0143Sdh pwrk->state = PMCS_WORK_STATE_ONCHIP;
187145e0143Sdh INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
188145e0143Sdh
189145e0143Sdh if (xp != NULL) {
190145e0143Sdh mutex_exit(&xp->statlock);
191145e0143Sdh }
192145e0143Sdh pmcs_unlock_phy(phyp);
193145e0143Sdh WAIT_FOR(pwrk, 1000, result);
194145e0143Sdh pmcs_pwork(pwp, pwrk);
1953be32c0fSJesse Butler pmcs_lock_phy(phyp);
196145e0143Sdh if (xp != NULL) {
197145e0143Sdh mutex_enter(&xp->statlock);
198145e0143Sdh }
199145e0143Sdh
200145e0143Sdh if (result) {
201145e0143Sdh pmcs_timed_out(pwp, htag, __func__);
202145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
203145e0143Sdh "%s: cmd timed out, returning", __func__);
204145e0143Sdh return (-1);
205145e0143Sdh }
206145e0143Sdh if (LE_32(msg[2]) == 0) {
207145e0143Sdh pds = (uint8_t)(LE_32(msg[4]) >> 4);
208145e0143Sdh nds = (uint8_t)(LE_32(msg[4]) & 0x0000000f);
209145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
210145e0143Sdh "%s: previous_ds=0x%x, new_ds=0x%x", __func__, pds, nds);
211145e0143Sdh if (xp != NULL) {
212145e0143Sdh xp->dev_state = nds;
213145e0143Sdh }
214145e0143Sdh return (0);
215145e0143Sdh } else {
216145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
217145e0143Sdh "%s: cmd failed Status(0x%x), returning ", __func__,
218145e0143Sdh LE_32(msg[2]));
219145e0143Sdh return (-1);
220145e0143Sdh }
221145e0143Sdh }
222145e0143Sdh
2236745c559SJesse Butler static void
pmcs_ds_operational(pmcs_phy_t * pptr,pmcs_xscsi_t * tgt)2246745c559SJesse Butler pmcs_ds_operational(pmcs_phy_t *pptr, pmcs_xscsi_t *tgt)
2256745c559SJesse Butler {
2266745c559SJesse Butler pmcs_hw_t *pwp;
2276745c559SJesse Butler
2286745c559SJesse Butler ASSERT(pptr);
2296745c559SJesse Butler pwp = pptr->pwp;
2306745c559SJesse Butler
2316745c559SJesse Butler if (tgt != NULL) {
2326745c559SJesse Butler tgt->recover_wait = 0;
2336745c559SJesse Butler }
2346745c559SJesse Butler pptr->ds_recovery_retries = 0;
2356745c559SJesse Butler
2366745c559SJesse Butler if ((pptr->ds_prev_good_recoveries == 0) ||
2376745c559SJesse Butler (ddi_get_lbolt() - pptr->last_good_recovery >
2386745c559SJesse Butler drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME))) {
2396745c559SJesse Butler pptr->last_good_recovery = ddi_get_lbolt();
2406745c559SJesse Butler pptr->ds_prev_good_recoveries = 1;
2416745c559SJesse Butler } else if (ddi_get_lbolt() < pptr->last_good_recovery +
2426745c559SJesse Butler drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME)) {
2436745c559SJesse Butler pptr->ds_prev_good_recoveries++;
2446745c559SJesse Butler } else {
245601c90f1SSrikanth, Ramana pmcs_handle_ds_recovery_error(pptr, tgt, pwp, __func__,
246601c90f1SSrikanth, Ramana "Max recovery attempts reached. Declaring PHY dead");
2476745c559SJesse Butler }
2486745c559SJesse Butler
2496745c559SJesse Butler /* Don't bother to run the work queues if the PHY is dead */
2506745c559SJesse Butler if (!pptr->dead) {
2516745c559SJesse Butler SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES);
2526745c559SJesse Butler (void) ddi_taskq_dispatch(pwp->tq, pmcs_worker,
2536745c559SJesse Butler pwp, DDI_NOSLEEP);
2546745c559SJesse Butler }
2556745c559SJesse Butler }
2566745c559SJesse Butler
257145e0143Sdh void
pmcs_dev_state_recovery(pmcs_hw_t * pwp,pmcs_phy_t * phyp)258145e0143Sdh pmcs_dev_state_recovery(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
259145e0143Sdh {
260b0e5d1e5SSrikanth, Ramana boolean_t reschedule = B_FALSE;
261145e0143Sdh uint8_t ds, tgt_dev_state;
262145e0143Sdh int rc;
263145e0143Sdh pmcs_xscsi_t *tgt;
264145e0143Sdh pmcs_phy_t *pptr, *pnext, *pchild;
265145e0143Sdh
266145e0143Sdh /*
267145e0143Sdh * First time, check to see if we're already performing recovery
268145e0143Sdh */
269145e0143Sdh if (phyp == NULL) {
270145e0143Sdh mutex_enter(&pwp->lock);
271145e0143Sdh if (pwp->ds_err_recovering) {
272145e0143Sdh mutex_exit(&pwp->lock);
273145e0143Sdh SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
274145e0143Sdh return;
275145e0143Sdh }
276145e0143Sdh
277145e0143Sdh pwp->ds_err_recovering = 1;
278145e0143Sdh pptr = pwp->root_phys;
279145e0143Sdh mutex_exit(&pwp->lock);
280145e0143Sdh } else {
281145e0143Sdh pptr = phyp;
282145e0143Sdh }
283145e0143Sdh
284145e0143Sdh while (pptr) {
285145e0143Sdh /*
286145e0143Sdh * Since ds_err_recovering is set, we can be assured these
287145e0143Sdh * PHYs won't disappear on us while we do this.
288145e0143Sdh */
289145e0143Sdh pmcs_lock_phy(pptr);
290145e0143Sdh pchild = pptr->children;
291145e0143Sdh pnext = pptr->sibling;
292145e0143Sdh pmcs_unlock_phy(pptr);
293145e0143Sdh
294145e0143Sdh if (pchild) {
295145e0143Sdh pmcs_dev_state_recovery(pwp, pchild);
296145e0143Sdh }
297145e0143Sdh
298145e0143Sdh tgt = NULL;
299145e0143Sdh pmcs_lock_phy(pptr);
300145e0143Sdh
301601c90f1SSrikanth, Ramana if (pptr->dead || !pptr->valid_device_id) {
302601c90f1SSrikanth, Ramana goto next_phy;
303601c90f1SSrikanth, Ramana }
304601c90f1SSrikanth, Ramana
305601c90f1SSrikanth, Ramana if (pptr->iport && (pptr->iport->ua_state != UA_ACTIVE)) {
306601c90f1SSrikanth, Ramana pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, pptr->target,
307601c90f1SSrikanth, Ramana "%s: No DS recovery on PHY %s, iport not active",
308601c90f1SSrikanth, Ramana __func__, pptr->path);
309145e0143Sdh goto next_phy;
310145e0143Sdh }
311145e0143Sdh
312145e0143Sdh tgt = pptr->target;
313145e0143Sdh
314145e0143Sdh if (tgt != NULL) {
315145e0143Sdh mutex_enter(&tgt->statlock);
316145e0143Sdh if (tgt->recover_wait == 0) {
317145e0143Sdh goto next_phy;
318145e0143Sdh }
319145e0143Sdh tgt_dev_state = tgt->dev_state;
320145e0143Sdh } else {
321145e0143Sdh tgt_dev_state = PMCS_DEVICE_STATE_NOT_AVAILABLE;
322145e0143Sdh }
323145e0143Sdh
324145e0143Sdh if (pptr->prev_recovery) {
325145e0143Sdh if (ddi_get_lbolt() - pptr->prev_recovery <
326145e0143Sdh drv_usectohz(PMCS_DS_RECOVERY_INTERVAL)) {
327145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG2, pptr, tgt,
328145e0143Sdh "%s: DS recovery on PHY %s "
329145e0143Sdh "re-invoked too soon. Skipping...",
330145e0143Sdh __func__, pptr->path);
331b0e5d1e5SSrikanth, Ramana if ((tgt) && (tgt->recover_wait)) {
332b0e5d1e5SSrikanth, Ramana reschedule = B_TRUE;
333b0e5d1e5SSrikanth, Ramana }
334145e0143Sdh goto next_phy;
335145e0143Sdh }
336145e0143Sdh }
337145e0143Sdh pptr->prev_recovery = ddi_get_lbolt();
338145e0143Sdh
339145e0143Sdh /*
340145e0143Sdh * Step 1: Put the device into the IN_RECOVERY state
341145e0143Sdh */
342145e0143Sdh rc = pmcs_get_dev_state(pwp, pptr, tgt, &ds);
343145e0143Sdh if (rc != 0) {
344145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
345145e0143Sdh "%s: pmcs_get_dev_state on PHY %s "
346145e0143Sdh "failed (rc=%d)",
347145e0143Sdh __func__, pptr->path, rc);
348145e0143Sdh
349145e0143Sdh pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
350601c90f1SSrikanth, Ramana __func__, "pmcs_get_dev_state");
351145e0143Sdh
352145e0143Sdh goto next_phy;
353145e0143Sdh }
354145e0143Sdh
3556745c559SJesse Butler /* If the chip says it's operational, we're done */
3566745c559SJesse Butler if (ds == PMCS_DEVICE_STATE_OPERATIONAL) {
3576745c559SJesse Butler pmcs_ds_operational(pptr, tgt);
3586745c559SJesse Butler goto next_phy;
3596745c559SJesse Butler }
3606745c559SJesse Butler
361145e0143Sdh if ((tgt_dev_state == ds) &&
362145e0143Sdh (ds == PMCS_DEVICE_STATE_IN_RECOVERY)) {
363145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
364145e0143Sdh "%s: Target 0x%p already IN_RECOVERY", __func__,
365145e0143Sdh (void *)tgt);
366145e0143Sdh } else {
367145e0143Sdh if (tgt != NULL) {
368145e0143Sdh tgt->dev_state = ds;
369145e0143Sdh }
370145e0143Sdh tgt_dev_state = ds;
371145e0143Sdh ds = PMCS_DEVICE_STATE_IN_RECOVERY;
372145e0143Sdh rc = pmcs_send_err_recovery_cmd(pwp, ds, pptr, tgt);
373145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
374145e0143Sdh "%s: pmcs_send_err_recovery_cmd "
375145e0143Sdh "result(%d) tgt(0x%p) ds(0x%x) tgt->ds(0x%x)",
376145e0143Sdh __func__, rc, (void *)tgt, ds, tgt_dev_state);
377145e0143Sdh
378145e0143Sdh if (rc) {
379145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
380145e0143Sdh "%s: pmcs_send_err_recovery_cmd to PHY %s "
381145e0143Sdh "failed (rc=%d)",
382145e0143Sdh __func__, pptr->path, rc);
383145e0143Sdh
384145e0143Sdh pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
385601c90f1SSrikanth, Ramana __func__, "pmcs_send_err_recovery_cmd");
386145e0143Sdh
387145e0143Sdh goto next_phy;
388145e0143Sdh }
389145e0143Sdh }
390145e0143Sdh
391145e0143Sdh /*
3926745c559SJesse Butler * Step 2: Perform a hard reset on the PHY.
393145e0143Sdh */
394601c90f1SSrikanth, Ramana pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
395601c90f1SSrikanth, Ramana "%s: Issue HARD_RESET to PHY %s", __func__,
396601c90f1SSrikanth, Ramana pptr->path);
397601c90f1SSrikanth, Ramana /*
398601c90f1SSrikanth, Ramana * Must release statlock here because pmcs_reset_phy
399601c90f1SSrikanth, Ramana * will drop and reacquire the PHY lock.
400601c90f1SSrikanth, Ramana */
401601c90f1SSrikanth, Ramana if (tgt != NULL) {
402601c90f1SSrikanth, Ramana mutex_exit(&tgt->statlock);
403601c90f1SSrikanth, Ramana }
404601c90f1SSrikanth, Ramana rc = pmcs_reset_phy(pwp, pptr, PMCS_PHYOP_HARD_RESET);
405601c90f1SSrikanth, Ramana if (tgt != NULL) {
406601c90f1SSrikanth, Ramana mutex_enter(&tgt->statlock);
407601c90f1SSrikanth, Ramana }
408601c90f1SSrikanth, Ramana if (rc) {
409601c90f1SSrikanth, Ramana pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
410601c90f1SSrikanth, Ramana "%s: HARD_RESET to PHY %s failed (rc=%d)",
411601c90f1SSrikanth, Ramana __func__, pptr->path, rc);
412145e0143Sdh
413601c90f1SSrikanth, Ramana pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
414601c90f1SSrikanth, Ramana __func__, "HARD_RESET");
415145e0143Sdh
416601c90f1SSrikanth, Ramana goto next_phy;
417145e0143Sdh }
418145e0143Sdh
419145e0143Sdh /*
420145e0143Sdh * Step 3: Abort all I/Os to the device
421145e0143Sdh */
422145e0143Sdh if (pptr->abort_all_start) {
423145e0143Sdh while (pptr->abort_all_start) {
424145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
425145e0143Sdh "%s: Waiting for outstanding ABORT_ALL on "
426145e0143Sdh "PHY 0x%p", __func__, (void *)pptr);
427145e0143Sdh cv_wait(&pptr->abort_all_cv, &pptr->phy_lock);
428145e0143Sdh }
429145e0143Sdh } else {
430145e0143Sdh if (tgt != NULL) {
431145e0143Sdh mutex_exit(&tgt->statlock);
432145e0143Sdh }
433145e0143Sdh rc = pmcs_abort(pwp, pptr, pptr->device_id, 1, 1);
434145e0143Sdh if (tgt != NULL) {
435145e0143Sdh mutex_enter(&tgt->statlock);
436145e0143Sdh }
437145e0143Sdh if (rc != 0) {
438145e0143Sdh pptr->abort_pending = 1;
439145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
440145e0143Sdh "%s: pmcs_abort to PHY %s failed (rc=%d)",
441145e0143Sdh __func__, pptr->path, rc);
442145e0143Sdh
443145e0143Sdh pmcs_handle_ds_recovery_error(pptr, tgt,
444601c90f1SSrikanth, Ramana pwp, __func__, "pmcs_abort");
445145e0143Sdh
446145e0143Sdh goto next_phy;
447145e0143Sdh }
448145e0143Sdh }
449145e0143Sdh
450145e0143Sdh /*
451145e0143Sdh * Step 4: Set the device back to OPERATIONAL state
452145e0143Sdh */
453145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
454145e0143Sdh "%s: Set PHY/tgt 0x%p/0x%p to OPERATIONAL state",
455145e0143Sdh __func__, (void *)pptr, (void *)tgt);
456145e0143Sdh rc = pmcs_set_dev_state(pwp, pptr, tgt,
457145e0143Sdh PMCS_DEVICE_STATE_OPERATIONAL);
458145e0143Sdh if (rc == 0) {
4596745c559SJesse Butler pmcs_ds_operational(pptr, tgt);
460145e0143Sdh } else {
461145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
462145e0143Sdh "%s: Failed to SET tgt 0x%p to OPERATIONAL state",
463145e0143Sdh __func__, (void *)tgt);
464145e0143Sdh
465145e0143Sdh pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
466601c90f1SSrikanth, Ramana __func__, "SET tgt to OPERATIONAL state");
467145e0143Sdh
468145e0143Sdh goto next_phy;
469145e0143Sdh }
470145e0143Sdh
471145e0143Sdh next_phy:
472145e0143Sdh if (tgt) {
473145e0143Sdh mutex_exit(&tgt->statlock);
474145e0143Sdh }
475145e0143Sdh pmcs_unlock_phy(pptr);
476145e0143Sdh pptr = pnext;
477145e0143Sdh }
478145e0143Sdh
479145e0143Sdh /*
480145e0143Sdh * Only clear ds_err_recovering if we're exiting for good and not
481145e0143Sdh * just unwinding from recursion
482145e0143Sdh */
483145e0143Sdh if (phyp == NULL) {
484145e0143Sdh mutex_enter(&pwp->lock);
485145e0143Sdh pwp->ds_err_recovering = 0;
486145e0143Sdh mutex_exit(&pwp->lock);
487145e0143Sdh }
488b0e5d1e5SSrikanth, Ramana
489b0e5d1e5SSrikanth, Ramana if (reschedule) {
490b0e5d1e5SSrikanth, Ramana SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
491b0e5d1e5SSrikanth, Ramana }
492145e0143Sdh }
493145e0143Sdh
494145e0143Sdh /*
495145e0143Sdh * Called with target's statlock held (if target is non-NULL) and PHY lock held.
496145e0143Sdh */
497145e0143Sdh int
pmcs_send_err_recovery_cmd(pmcs_hw_t * pwp,uint8_t dev_state,pmcs_phy_t * phyp,pmcs_xscsi_t * tgt)498145e0143Sdh pmcs_send_err_recovery_cmd(pmcs_hw_t *pwp, uint8_t dev_state, pmcs_phy_t *phyp,
499145e0143Sdh pmcs_xscsi_t *tgt)
500145e0143Sdh {
501145e0143Sdh int rc = -1;
502145e0143Sdh uint8_t tgt_dev_state = PMCS_DEVICE_STATE_NOT_AVAILABLE;
503145e0143Sdh
504145e0143Sdh if (tgt != NULL) {
505145e0143Sdh ASSERT(mutex_owned(&tgt->statlock));
506145e0143Sdh if (tgt->recovering) {
507145e0143Sdh return (0);
508145e0143Sdh }
509145e0143Sdh
510145e0143Sdh tgt->recovering = 1;
511145e0143Sdh tgt_dev_state = tgt->dev_state;
512145e0143Sdh }
513145e0143Sdh
514145e0143Sdh if (phyp == NULL) {
515145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, NULL, tgt,
516145e0143Sdh "%s: PHY is NULL", __func__);
517145e0143Sdh return (-1);
518145e0143Sdh }
519145e0143Sdh
520145e0143Sdh ASSERT(mutex_owned(&phyp->phy_lock));
521145e0143Sdh
522145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
523145e0143Sdh "%s: ds: 0x%x, tgt ds(0x%x)", __func__, dev_state, tgt_dev_state);
524145e0143Sdh
525145e0143Sdh switch (dev_state) {
526145e0143Sdh case PMCS_DEVICE_STATE_IN_RECOVERY:
527145e0143Sdh if (tgt_dev_state == PMCS_DEVICE_STATE_IN_RECOVERY) {
528145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
529145e0143Sdh "%s: Target 0x%p already IN_RECOVERY", __func__,
530145e0143Sdh (void *)tgt);
531145e0143Sdh rc = 0; /* This is not an error */
532145e0143Sdh goto no_action;
533145e0143Sdh }
534145e0143Sdh
535145e0143Sdh rc = pmcs_set_dev_state(pwp, phyp, tgt,
536145e0143Sdh PMCS_DEVICE_STATE_IN_RECOVERY);
537145e0143Sdh if (rc != 0) {
538145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
539145e0143Sdh "%s(1): Failed to set tgt(0x%p) to IN_RECOVERY",
540145e0143Sdh __func__, (void *)tgt);
541145e0143Sdh }
542145e0143Sdh
543145e0143Sdh break;
544145e0143Sdh
545145e0143Sdh case PMCS_DEVICE_STATE_OPERATIONAL:
546145e0143Sdh if (tgt_dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) {
547145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
548145e0143Sdh "%s: Target 0x%p not ready to go OPERATIONAL",
549145e0143Sdh __func__, (void *)tgt);
550145e0143Sdh goto no_action;
551145e0143Sdh }
552145e0143Sdh
553145e0143Sdh rc = pmcs_set_dev_state(pwp, phyp, tgt,
554145e0143Sdh PMCS_DEVICE_STATE_OPERATIONAL);
555145e0143Sdh if (tgt != NULL) {
556145e0143Sdh tgt->reset_success = 1;
557145e0143Sdh }
558145e0143Sdh if (rc != 0) {
559145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
560145e0143Sdh "%s(2): Failed to SET tgt(0x%p) to OPERATIONAL",
561145e0143Sdh __func__, (void *)tgt);
562145e0143Sdh if (tgt != NULL) {
563145e0143Sdh tgt->reset_success = 0;
564145e0143Sdh }
565145e0143Sdh }
566145e0143Sdh
567145e0143Sdh break;
568145e0143Sdh
569145e0143Sdh case PMCS_DEVICE_STATE_NON_OPERATIONAL:
570145e0143Sdh PHY_CHANGED(pwp, phyp);
571145e0143Sdh RESTART_DISCOVERY(pwp);
572145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
573145e0143Sdh "%s: Device at %s is non-operational",
574145e0143Sdh __func__, phyp->path);
575145e0143Sdh if (tgt != NULL) {
576145e0143Sdh tgt->dev_state = PMCS_DEVICE_STATE_NON_OPERATIONAL;
577145e0143Sdh }
578145e0143Sdh rc = 0;
579145e0143Sdh
580145e0143Sdh break;
581145e0143Sdh
582145e0143Sdh default:
583145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
584145e0143Sdh "%s: Invalid state requested (%d)", __func__,
585145e0143Sdh dev_state);
586145e0143Sdh break;
587145e0143Sdh
588145e0143Sdh }
589145e0143Sdh
590145e0143Sdh no_action:
591145e0143Sdh if (tgt != NULL) {
592145e0143Sdh tgt->recovering = 0;
593145e0143Sdh }
594145e0143Sdh return (rc);
595145e0143Sdh }
596145e0143Sdh
597145e0143Sdh /*
598145e0143Sdh * Start ssp event recovery. We have to schedule recovery operation because
599145e0143Sdh * it involves sending multiple commands to device and we should not do it
600145e0143Sdh * in the interrupt context.
601145e0143Sdh * If it is failure of a recovery command, let the recovery thread deal with it.
602225bf905SJesse Butler * Called with the work lock held.
603145e0143Sdh */
604145e0143Sdh void
pmcs_start_ssp_event_recovery(pmcs_hw_t * pwp,pmcwork_t * pwrk,uint32_t * iomb,size_t amt)605145e0143Sdh pmcs_start_ssp_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb,
606145e0143Sdh size_t amt)
607145e0143Sdh {
608145e0143Sdh pmcs_xscsi_t *tgt = pwrk->xp;
609145e0143Sdh uint32_t event = LE_32(iomb[2]);
610145e0143Sdh pmcs_phy_t *pptr = pwrk->phy;
611658280b6SDavid Hollister pmcs_cb_t callback;
612145e0143Sdh uint32_t tag;
613145e0143Sdh
614145e0143Sdh if (tgt != NULL) {
615145e0143Sdh mutex_enter(&tgt->statlock);
616145e0143Sdh if (!tgt->assigned) {
617145e0143Sdh if (pptr) {
618145e0143Sdh pmcs_dec_phy_ref_count(pptr);
619145e0143Sdh }
620145e0143Sdh pptr = NULL;
621145e0143Sdh pwrk->phy = NULL;
622145e0143Sdh }
623145e0143Sdh mutex_exit(&tgt->statlock);
624145e0143Sdh }
62556976565SDavid Hollister
626145e0143Sdh if (pptr == NULL) {
627145e0143Sdh /*
628145e0143Sdh * No target, need to run RE-DISCOVERY here.
629145e0143Sdh */
630145e0143Sdh if (pwrk->state != PMCS_WORK_STATE_TIMED_OUT) {
631145e0143Sdh pwrk->state = PMCS_WORK_STATE_INTR;
632145e0143Sdh }
633145e0143Sdh /*
634145e0143Sdh * Although we cannot mark phy to force abort nor mark phy
635145e0143Sdh * as changed, killing of a target would take care of aborting
636145e0143Sdh * commands for the device.
637145e0143Sdh */
638145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
639145e0143Sdh "%s: No valid target for event processing. Reconfigure.",
640145e0143Sdh __func__);
641145e0143Sdh pmcs_pwork(pwp, pwrk);
642145e0143Sdh RESTART_DISCOVERY(pwp);
643145e0143Sdh return;
644145e0143Sdh } else {
645225bf905SJesse Butler /* We have a phy pointer, we'll need to lock it */
646225bf905SJesse Butler mutex_exit(&pwrk->lock);
647145e0143Sdh pmcs_lock_phy(pptr);
648225bf905SJesse Butler mutex_enter(&pwrk->lock);
6493be32c0fSJesse Butler if (tgt != NULL) {
65056976565SDavid Hollister mutex_enter(&tgt->statlock);
65156976565SDavid Hollister }
652145e0143Sdh if (event == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS) {
6533be32c0fSJesse Butler if ((tgt != NULL) && (tgt->dev_state !=
6543be32c0fSJesse Butler PMCS_DEVICE_STATE_NON_OPERATIONAL)) {
655145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
656145e0143Sdh "%s: Device at %s is non-operational",
657145e0143Sdh __func__, pptr->path);
658145e0143Sdh tgt->dev_state =
659145e0143Sdh PMCS_DEVICE_STATE_NON_OPERATIONAL;
660145e0143Sdh }
661145e0143Sdh pptr->abort_pending = 1;
6623be32c0fSJesse Butler if (tgt != NULL) {
66356976565SDavid Hollister mutex_exit(&tgt->statlock);
66456976565SDavid Hollister }
665145e0143Sdh mutex_exit(&pwrk->lock);
666225bf905SJesse Butler pmcs_unlock_phy(pptr);
667145e0143Sdh SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
668145e0143Sdh RESTART_DISCOVERY(pwp);
669145e0143Sdh return;
670145e0143Sdh }
671145e0143Sdh
672145e0143Sdh /*
673145e0143Sdh * If this command is run in WAIT mode, it is a failing recovery
674145e0143Sdh * command. If so, just wake up recovery thread waiting for
675145e0143Sdh * command completion.
676145e0143Sdh */
677145e0143Sdh tag = PMCS_TAG_TYPE(pwrk->htag);
678145e0143Sdh if (tag == PMCS_TAG_TYPE_WAIT) {
679145e0143Sdh pwrk->htag |= PMCS_TAG_DONE;
680145e0143Sdh if (pwrk->arg && amt) {
681145e0143Sdh (void) memcpy(pwrk->arg, iomb, amt);
682145e0143Sdh }
683145e0143Sdh cv_signal(&pwrk->sleep_cv);
6843be32c0fSJesse Butler if (tgt != NULL) {
68556976565SDavid Hollister mutex_exit(&tgt->statlock);
68656976565SDavid Hollister }
687225bf905SJesse Butler mutex_exit(&pwrk->lock);
688145e0143Sdh pmcs_unlock_phy(pptr);
689145e0143Sdh return;
690145e0143Sdh }
691145e0143Sdh
6923be32c0fSJesse Butler if (tgt == NULL) {
69356976565SDavid Hollister pmcs_prt(pwp, PMCS_PRT_DEBUG1, pptr, NULL,
69456976565SDavid Hollister "%s: Not scheduling SSP event recovery for NULL tgt"
69556976565SDavid Hollister " pwrk(%p) tag(0x%x)", __func__, (void *)pwrk,
69656976565SDavid Hollister pwrk->htag);
697225bf905SJesse Butler mutex_exit(&pwrk->lock);
698225bf905SJesse Butler pmcs_unlock_phy(pptr);
69956976565SDavid Hollister return;
70056976565SDavid Hollister }
70156976565SDavid Hollister
702658280b6SDavid Hollister /*
703658280b6SDavid Hollister * If the SSP event was an OPEN_RETRY_TIMEOUT, we don't want
704658280b6SDavid Hollister * to go through the recovery (abort/LU reset) process.
705658280b6SDavid Hollister * Simply complete the command and return it as STATUS_BUSY.
706658280b6SDavid Hollister * This will cause the target driver to simply retry.
707658280b6SDavid Hollister */
708658280b6SDavid Hollister if (event == PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT) {
709658280b6SDavid Hollister pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
710658280b6SDavid Hollister "%s: Got OPEN_RETRY_TIMEOUT event (htag 0x%08x)",
711658280b6SDavid Hollister __func__, pwrk->htag);
712658280b6SDavid Hollister
713658280b6SDavid Hollister mutex_exit(&tgt->statlock);
714225bf905SJesse Butler /* Note: work remains locked for the callback */
715658280b6SDavid Hollister pmcs_unlock_phy(pptr);
716658280b6SDavid Hollister pwrk->ssp_event = event;
717658280b6SDavid Hollister callback = (pmcs_cb_t)pwrk->ptr;
718658280b6SDavid Hollister (*callback)(pwp, pwrk, iomb);
719658280b6SDavid Hollister return;
720658280b6SDavid Hollister }
721658280b6SDavid Hollister
722145e0143Sdh /*
723145e0143Sdh * To recover from primary failures,
724145e0143Sdh * we need to schedule handling events recovery.
725145e0143Sdh */
726145e0143Sdh tgt->event_recovery = 1;
727145e0143Sdh mutex_exit(&tgt->statlock);
728145e0143Sdh pwrk->ssp_event = event;
729225bf905SJesse Butler mutex_exit(&pwrk->lock);
730225bf905SJesse Butler pmcs_unlock_phy(pptr);
731145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
732145e0143Sdh "%s: Scheduling SSP event recovery for tgt(0x%p) "
733145e0143Sdh "pwrk(%p) tag(0x%x)", __func__, (void *)tgt, (void *)pwrk,
734145e0143Sdh pwrk->htag);
735145e0143Sdh SCHEDULE_WORK(pwp, PMCS_WORK_SSP_EVT_RECOVERY);
736145e0143Sdh }
737145e0143Sdh
738145e0143Sdh /* Work cannot be completed until event recovery is completed. */
739145e0143Sdh }
740145e0143Sdh
741145e0143Sdh /*
742145e0143Sdh * SSP target event recovery
743*219ebc8eSSrikanth Suravajhala * phy->lock should be held upon entry.
744*219ebc8eSSrikanth Suravajhala * pwrk->lock should be held upon entry and gets released by this routine.
745*219ebc8eSSrikanth Suravajhala * tgt->statlock should not be held.
746145e0143Sdh */
747145e0143Sdh void
pmcs_tgt_event_recovery(pmcs_hw_t * pwp,pmcwork_t * pwrk)748145e0143Sdh pmcs_tgt_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk)
749145e0143Sdh {
750145e0143Sdh pmcs_phy_t *pptr = pwrk->phy;
751145e0143Sdh pmcs_cmd_t *sp = pwrk->arg;
752145e0143Sdh pmcs_lun_t *lun = sp->cmd_lun;
753145e0143Sdh pmcs_xscsi_t *tgt = pwrk->xp;
754145e0143Sdh uint32_t event;
755145e0143Sdh uint32_t htag;
756145e0143Sdh uint32_t status;
757145e0143Sdh int rv;
758145e0143Sdh
759145e0143Sdh ASSERT(pwrk->arg != NULL);
760145e0143Sdh ASSERT(pwrk->xp != NULL);
761145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
762145e0143Sdh "%s: event recovery for target 0x%p", __func__, (void *)pwrk->xp);
763145e0143Sdh htag = pwrk->htag;
764145e0143Sdh event = pwrk->ssp_event;
765145e0143Sdh pwrk->ssp_event = 0xffffffff;
766658280b6SDavid Hollister
767*219ebc8eSSrikanth Suravajhala mutex_exit(&pwrk->lock);
768*219ebc8eSSrikanth Suravajhala
769145e0143Sdh if (event == PMCOUT_STATUS_XFER_ERR_BREAK ||
770145e0143Sdh event == PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY ||
771145e0143Sdh event == PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT) {
772145e0143Sdh /* Command may be still pending on device */
773145e0143Sdh rv = pmcs_ssp_tmf(pwp, pptr, SAS_QUERY_TASK, htag,
774145e0143Sdh lun->lun_num, &status);
775145e0143Sdh if (rv != 0) {
776145e0143Sdh goto out;
777145e0143Sdh }
778145e0143Sdh if (status == SAS_RSP_TMF_COMPLETE) {
779145e0143Sdh /* Command NOT pending on a device */
780145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
781145e0143Sdh "%s: No pending command for tgt 0x%p",
782145e0143Sdh __func__, (void *)tgt);
783145e0143Sdh /* Nothing more to do, just abort it on chip */
784145e0143Sdh htag = 0;
785145e0143Sdh }
786145e0143Sdh }
787145e0143Sdh /*
788145e0143Sdh * All other events left the command pending in the host
789145e0143Sdh * Send abort task and abort it on the chip
790145e0143Sdh */
791145e0143Sdh if (htag != 0) {
792145e0143Sdh if (pmcs_ssp_tmf(pwp, pptr, SAS_ABORT_TASK, htag,
793145e0143Sdh lun->lun_num, &status))
794145e0143Sdh goto out;
795145e0143Sdh }
796*219ebc8eSSrikanth Suravajhala (void) pmcs_abort(pwp, pptr, htag, 0, 1);
797145e0143Sdh /*
798145e0143Sdh * Abort either took care of work completion, or put device in
799145e0143Sdh * a recovery state
800145e0143Sdh */
801145e0143Sdh return;
802145e0143Sdh out:
803145e0143Sdh /* Abort failed, do full device recovery */
804*219ebc8eSSrikanth Suravajhala mutex_enter(&pwrk->lock);
805*219ebc8eSSrikanth Suravajhala tgt = pwrk->xp;
806*219ebc8eSSrikanth Suravajhala mutex_exit(&pwrk->lock);
807*219ebc8eSSrikanth Suravajhala if (tgt != NULL) {
808*219ebc8eSSrikanth Suravajhala mutex_enter(&tgt->statlock);
809*219ebc8eSSrikanth Suravajhala pmcs_start_dev_state_recovery(tgt, pptr);
810*219ebc8eSSrikanth Suravajhala mutex_exit(&tgt->statlock);
811145e0143Sdh }
812145e0143Sdh }
813145e0143Sdh
814145e0143Sdh /*
815145e0143Sdh * SSP event recovery task.
816145e0143Sdh */
817145e0143Sdh void
pmcs_ssp_event_recovery(pmcs_hw_t * pwp)818145e0143Sdh pmcs_ssp_event_recovery(pmcs_hw_t *pwp)
819145e0143Sdh {
820145e0143Sdh int idx;
821145e0143Sdh pmcs_xscsi_t *tgt;
822145e0143Sdh pmcs_cmd_t *cp;
823145e0143Sdh pmcwork_t *pwrk;
824145e0143Sdh pmcs_phy_t *pphy;
825145e0143Sdh int er_flag;
826145e0143Sdh uint32_t idxpwrk;
827145e0143Sdh
828145e0143Sdh restart:
829145e0143Sdh for (idx = 0; idx < pwp->max_dev; idx++) {
830145e0143Sdh mutex_enter(&pwp->lock);
831145e0143Sdh tgt = pwp->targets[idx];
832145e0143Sdh mutex_exit(&pwp->lock);
833601c90f1SSrikanth, Ramana if (tgt == NULL) {
834601c90f1SSrikanth, Ramana continue;
835601c90f1SSrikanth, Ramana }
836601c90f1SSrikanth, Ramana
837601c90f1SSrikanth, Ramana mutex_enter(&tgt->statlock);
838601c90f1SSrikanth, Ramana if (!tgt->assigned) {
839145e0143Sdh mutex_exit(&tgt->statlock);
840601c90f1SSrikanth, Ramana continue;
841601c90f1SSrikanth, Ramana }
842601c90f1SSrikanth, Ramana pphy = tgt->phy;
843601c90f1SSrikanth, Ramana er_flag = tgt->event_recovery;
844601c90f1SSrikanth, Ramana mutex_exit(&tgt->statlock);
845601c90f1SSrikanth, Ramana
846601c90f1SSrikanth, Ramana if ((pphy == NULL) || (er_flag == 0)) {
847601c90f1SSrikanth, Ramana continue;
848601c90f1SSrikanth, Ramana }
849601c90f1SSrikanth, Ramana
850601c90f1SSrikanth, Ramana pmcs_lock_phy(pphy);
851601c90f1SSrikanth, Ramana pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt,
852601c90f1SSrikanth, Ramana "%s: found target(0x%p)", __func__, (void *) tgt);
853601c90f1SSrikanth, Ramana
854601c90f1SSrikanth, Ramana /* Check what cmd expects recovery */
855601c90f1SSrikanth, Ramana mutex_enter(&tgt->aqlock);
856601c90f1SSrikanth, Ramana STAILQ_FOREACH(cp, &tgt->aq, cmd_next) {
857601c90f1SSrikanth, Ramana idxpwrk = PMCS_TAG_INDEX(cp->cmd_tag);
858601c90f1SSrikanth, Ramana pwrk = &pwp->work[idxpwrk];
859*219ebc8eSSrikanth Suravajhala mutex_enter(&pwrk->lock);
860601c90f1SSrikanth, Ramana if (pwrk->htag != cp->cmd_tag) {
861601c90f1SSrikanth, Ramana /*
862601c90f1SSrikanth, Ramana * aq may contain TMF commands, so we
863601c90f1SSrikanth, Ramana * may not find work structure with htag
864601c90f1SSrikanth, Ramana */
865*219ebc8eSSrikanth Suravajhala mutex_exit(&pwrk->lock);
866*219ebc8eSSrikanth Suravajhala continue;
867601c90f1SSrikanth, Ramana }
868*219ebc8eSSrikanth Suravajhala if (!PMCS_COMMAND_DONE(pwrk) &&
869*219ebc8eSSrikanth Suravajhala (pwrk->ssp_event != 0) &&
870601c90f1SSrikanth, Ramana (pwrk->ssp_event != PMCS_REC_EVENT)) {
871145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt,
872601c90f1SSrikanth, Ramana "%s: pwrk(%p) htag(0x%x)",
873601c90f1SSrikanth, Ramana __func__, (void *) pwrk, cp->cmd_tag);
874145e0143Sdh mutex_exit(&tgt->aqlock);
875601c90f1SSrikanth, Ramana /*
876*219ebc8eSSrikanth Suravajhala * pwrk->lock gets dropped in
877*219ebc8eSSrikanth Suravajhala * pmcs_tgt_event_recovery()
878601c90f1SSrikanth, Ramana */
879*219ebc8eSSrikanth Suravajhala pmcs_tgt_event_recovery(pwp, pwrk);
880145e0143Sdh pmcs_unlock_phy(pphy);
881*219ebc8eSSrikanth Suravajhala /* All bets are off on tgt/aq now, restart */
882601c90f1SSrikanth, Ramana goto restart;
883145e0143Sdh }
884*219ebc8eSSrikanth Suravajhala mutex_exit(&pwrk->lock);
885145e0143Sdh }
886601c90f1SSrikanth, Ramana mutex_exit(&tgt->aqlock);
887*219ebc8eSSrikanth Suravajhala mutex_enter(&tgt->statlock);
888601c90f1SSrikanth, Ramana tgt->event_recovery = 0;
889601c90f1SSrikanth, Ramana pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt,
890601c90f1SSrikanth, Ramana "%s: end of SSP event recovery for target(0x%p)",
891601c90f1SSrikanth, Ramana __func__, (void *) tgt);
892601c90f1SSrikanth, Ramana mutex_exit(&tgt->statlock);
893601c90f1SSrikanth, Ramana pmcs_unlock_phy(pphy);
894145e0143Sdh }
895145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
896145e0143Sdh "%s: end of SSP event recovery for pwp(0x%p)", __func__,
897145e0143Sdh (void *) pwp);
898145e0143Sdh }
899145e0143Sdh
900145e0143Sdh void
pmcs_start_dev_state_recovery(pmcs_xscsi_t * xp,pmcs_phy_t * phyp)901145e0143Sdh pmcs_start_dev_state_recovery(pmcs_xscsi_t *xp, pmcs_phy_t *phyp)
902145e0143Sdh {
903145e0143Sdh ASSERT(mutex_owned(&xp->statlock));
904145e0143Sdh ASSERT(xp->pwp != NULL);
905145e0143Sdh
906145e0143Sdh if (xp->recover_wait == 0) {
907145e0143Sdh pmcs_prt(xp->pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
908145e0143Sdh "%s: Start ds_recovery for tgt 0x%p/PHY 0x%p (%s)",
909145e0143Sdh __func__, (void *)xp, (void *)phyp, phyp->path);
910145e0143Sdh xp->recover_wait = 1;
911145e0143Sdh
912145e0143Sdh /*
913145e0143Sdh * Rather than waiting for the watchdog timer, we'll
914145e0143Sdh * kick it right now.
915145e0143Sdh */
916145e0143Sdh SCHEDULE_WORK(xp->pwp, PMCS_WORK_DS_ERR_RECOVERY);
917145e0143Sdh (void) ddi_taskq_dispatch(xp->pwp->tq, pmcs_worker, xp->pwp,
918145e0143Sdh DDI_NOSLEEP);
919145e0143Sdh }
920145e0143Sdh }
921145e0143Sdh
922145e0143Sdh /*
923145e0143Sdh * Increment the phy ds error retry count.
924145e0143Sdh * If too many retries, mark phy dead and restart discovery;
925145e0143Sdh * otherwise schedule ds recovery.
926145e0143Sdh */
927145e0143Sdh static void
pmcs_handle_ds_recovery_error(pmcs_phy_t * phyp,pmcs_xscsi_t * tgt,pmcs_hw_t * pwp,const char * func_name,char * reason_string)928145e0143Sdh pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp, pmcs_xscsi_t *tgt,
929601c90f1SSrikanth, Ramana pmcs_hw_t *pwp, const char *func_name, char *reason_string)
930145e0143Sdh {
931145e0143Sdh ASSERT(mutex_owned(&phyp->phy_lock));
932145e0143Sdh ASSERT((tgt == NULL) || mutex_owned(&tgt->statlock));
933145e0143Sdh
934145e0143Sdh phyp->ds_recovery_retries++;
935145e0143Sdh
936145e0143Sdh if (phyp->ds_recovery_retries > PMCS_MAX_DS_RECOVERY_RETRIES) {
937145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, tgt,
938145e0143Sdh "%s: retry limit reached after %s to PHY %s failed",
939145e0143Sdh func_name, reason_string, phyp->path);
940145e0143Sdh if (tgt != NULL) {
941145e0143Sdh tgt->recover_wait = 0;
942145e0143Sdh }
943601c90f1SSrikanth, Ramana /*
944601c90f1SSrikanth, Ramana * Mark the PHY as dead and it and its parent as changed,
945601c90f1SSrikanth, Ramana * then restart discovery
946601c90f1SSrikanth, Ramana */
947145e0143Sdh phyp->dead = 1;
948601c90f1SSrikanth, Ramana PHY_CHANGED(pwp, phyp);
949601c90f1SSrikanth, Ramana if (phyp->parent)
950601c90f1SSrikanth, Ramana PHY_CHANGED(pwp, phyp->parent);
951145e0143Sdh RESTART_DISCOVERY(pwp);
952145e0143Sdh } else if ((phyp->ds_prev_good_recoveries >
953145e0143Sdh PMCS_MAX_DS_RECOVERY_RETRIES) &&
954145e0143Sdh (phyp->last_good_recovery + drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME)
955145e0143Sdh < ddi_get_lbolt())) {
956145e0143Sdh pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, tgt, "%s: max number of "
957145e0143Sdh "successful recoveries reached, declaring PHY %s dead",
958145e0143Sdh __func__, phyp->path);
959145e0143Sdh if (tgt != NULL) {
960145e0143Sdh tgt->recover_wait = 0;
961145e0143Sdh }
962601c90f1SSrikanth, Ramana /*
963601c90f1SSrikanth, Ramana * Mark the PHY as dead and its parent as changed,
964601c90f1SSrikanth, Ramana * then restart discovery
965601c90f1SSrikanth, Ramana */
966145e0143Sdh phyp->dead = 1;
967601c90f1SSrikanth, Ramana PHY_CHANGED(pwp, phyp);
968601c90f1SSrikanth, Ramana if (phyp->parent)
969601c90f1SSrikanth, Ramana PHY_CHANGED(pwp, phyp->parent);
970145e0143Sdh RESTART_DISCOVERY(pwp);
971145e0143Sdh } else {
972145e0143Sdh SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
973145e0143Sdh }
974145e0143Sdh }
975