xref: /illumos-gate/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c (revision b18a19c275d2531444fcd2f66664cbe3c6897f6a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  *
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This file contains various support routines.
28  */
29 
30 #include <sys/scsi/adapters/pmcs/pmcs.h>
31 
32 /*
33  * Local static data
34  */
35 static int tgtmap_usec = MICROSEC;
36 
37 /*
38  * SAS Topology Configuration
39  */
40 static void pmcs_new_tport(pmcs_hw_t *, pmcs_phy_t *);
41 static void pmcs_configure_expander(pmcs_hw_t *, pmcs_phy_t *, pmcs_iport_t *);
42 
43 static boolean_t pmcs_check_expanders(pmcs_hw_t *, pmcs_phy_t *);
44 static void pmcs_check_expander(pmcs_hw_t *, pmcs_phy_t *);
45 static void pmcs_clear_expander(pmcs_hw_t *, pmcs_phy_t *, int);
46 
47 static int pmcs_expander_get_nphy(pmcs_hw_t *, pmcs_phy_t *);
48 static int pmcs_expander_content_discover(pmcs_hw_t *, pmcs_phy_t *,
49     pmcs_phy_t *);
50 
51 static int pmcs_smp_function_result(pmcs_hw_t *, smp_response_frame_t *);
52 static boolean_t pmcs_validate_devid(pmcs_phy_t *, pmcs_phy_t *, uint32_t);
53 static void pmcs_clear_phys(pmcs_hw_t *, pmcs_phy_t *);
54 static int pmcs_configure_new_devices(pmcs_hw_t *, pmcs_phy_t *);
55 static boolean_t pmcs_report_observations(pmcs_hw_t *);
56 static boolean_t pmcs_report_iport_observations(pmcs_hw_t *, pmcs_iport_t *,
57     pmcs_phy_t *);
58 static pmcs_phy_t *pmcs_find_phy_needing_work(pmcs_hw_t *, pmcs_phy_t *);
59 static int pmcs_kill_devices(pmcs_hw_t *, pmcs_phy_t *);
60 static void pmcs_lock_phy_impl(pmcs_phy_t *, int);
61 static void pmcs_unlock_phy_impl(pmcs_phy_t *, int);
62 static pmcs_phy_t *pmcs_clone_phy(pmcs_phy_t *);
63 static boolean_t pmcs_configure_phy(pmcs_hw_t *, pmcs_phy_t *);
64 static void pmcs_reap_dead_phy(pmcs_phy_t *);
65 static pmcs_iport_t *pmcs_get_iport_by_ua(pmcs_hw_t *, char *);
66 static boolean_t pmcs_phy_target_match(pmcs_phy_t *);
67 static void pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp,
68     pmcs_xscsi_t *tgt, pmcs_hw_t *pwp, const char *func_name, int line,
69     char *reason_string);
70 
71 /*
72  * Often used strings
73  */
74 const char pmcs_nowrk[] = "%s: unable to get work structure";
75 const char pmcs_nomsg[] = "%s: unable to get Inbound Message entry";
76 const char pmcs_timeo[] = "!%s: command timed out";
77 
78 extern const ddi_dma_attr_t pmcs_dattr;
79 
80 /*
81  * Some Initial setup steps.
82  */
83 
84 int
85 pmcs_setup(pmcs_hw_t *pwp)
86 {
87 	uint32_t barval = pwp->mpibar;
88 	uint32_t i, scratch, regbar, regoff, barbar, baroff;
89 	uint32_t new_ioq_depth, ferr = 0;
90 
91 	/*
92 	 * Check current state. If we're not at READY state,
93 	 * we can't go further.
94 	 */
95 	scratch = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
96 	if ((scratch & PMCS_MSGU_AAP_STATE_MASK) == PMCS_MSGU_AAP_STATE_ERROR) {
97 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: AAP Error State (0x%x)",
98 		    __func__, pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
99 		    PMCS_MSGU_AAP_ERROR_MASK);
100 		pmcs_fm_ereport(pwp, DDI_FM_DEVICE_INVAL_STATE);
101 		ddi_fm_service_impact(pwp->dip, DDI_SERVICE_LOST);
102 		return (-1);
103 	}
104 	if ((scratch & PMCS_MSGU_AAP_STATE_MASK) != PMCS_MSGU_AAP_STATE_READY) {
105 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
106 		    "%s: AAP unit not ready (state 0x%x)",
107 		    __func__, scratch & PMCS_MSGU_AAP_STATE_MASK);
108 		pmcs_fm_ereport(pwp, DDI_FM_DEVICE_INVAL_STATE);
109 		ddi_fm_service_impact(pwp->dip, DDI_SERVICE_LOST);
110 		return (-1);
111 	}
112 
113 	/*
114 	 * Read the offset from the Message Unit scratchpad 0 register.
115 	 * This allows us to read the MPI Configuration table.
116 	 *
117 	 * Check its signature for validity.
118 	 */
119 	baroff = barval;
120 	barbar = barval >> PMCS_MSGU_MPI_BAR_SHIFT;
121 	baroff &= PMCS_MSGU_MPI_OFFSET_MASK;
122 
123 	regoff = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH0);
124 	regbar = regoff >> PMCS_MSGU_MPI_BAR_SHIFT;
125 	regoff &= PMCS_MSGU_MPI_OFFSET_MASK;
126 
127 	if (regoff > baroff) {
128 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad MPI Table Length "
129 		    "(register offset=0x%08x, passed offset=0x%08x)", __func__,
130 		    regoff, baroff);
131 		return (-1);
132 	}
133 	if (regbar != barbar) {
134 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad MPI BAR (register "
135 		    "BAROFF=0x%08x, passed BAROFF=0x%08x)", __func__,
136 		    regbar, barbar);
137 		return (-1);
138 	}
139 	pwp->mpi_offset = regoff;
140 	if (pmcs_rd_mpi_tbl(pwp, PMCS_MPI_AS) != PMCS_SIGNATURE) {
141 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
142 		    "%s: Bad MPI Configuration Table Signature 0x%x", __func__,
143 		    pmcs_rd_mpi_tbl(pwp, PMCS_MPI_AS));
144 		return (-1);
145 	}
146 
147 	if (pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IR) != PMCS_MPI_REVISION1) {
148 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
149 		    "%s: Bad MPI Configuration Revision 0x%x", __func__,
150 		    pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IR));
151 		return (-1);
152 	}
153 
154 	/*
155 	 * Generate offsets for the General System, Inbound Queue Configuration
156 	 * and Outbound Queue configuration tables. This way the macros to
157 	 * access those tables will work correctly.
158 	 */
159 	pwp->mpi_gst_offset =
160 	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_GSTO);
161 	pwp->mpi_iqc_offset =
162 	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IQCTO);
163 	pwp->mpi_oqc_offset =
164 	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_OQCTO);
165 
166 	pwp->fw = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_FW);
167 
168 	pwp->max_cmd = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_MOIO);
169 	pwp->max_dev = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO0) >> 16;
170 
171 	pwp->max_iq = PMCS_MNIQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
172 	pwp->max_oq = PMCS_MNOQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
173 	pwp->nphy = PMCS_NPHY(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
174 	if (pwp->max_iq <= PMCS_NIQ) {
175 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: not enough Inbound Queues "
176 		    "supported (need %d, max_oq=%d)", __func__, pwp->max_iq,
177 		    PMCS_NIQ);
178 		return (-1);
179 	}
180 	if (pwp->max_oq <= PMCS_NOQ) {
181 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: not enough Outbound Queues "
182 		    "supported (need %d, max_oq=%d)", __func__, pwp->max_oq,
183 		    PMCS_NOQ);
184 		return (-1);
185 	}
186 	if (pwp->nphy == 0) {
187 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: zero phys reported",
188 		    __func__);
189 		return (-1);
190 	}
191 	if (PMCS_HPIQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1))) {
192 		pwp->hipri_queue = (1 << PMCS_IQ_OTHER);
193 	}
194 
195 
196 	for (i = 0; i < pwp->nphy; i++) {
197 		PMCS_MPI_EVQSET(pwp, PMCS_OQ_EVENTS, i);
198 		PMCS_MPI_NCQSET(pwp, PMCS_OQ_EVENTS, i);
199 	}
200 
201 	pmcs_wr_mpi_tbl(pwp, PMCS_MPI_INFO2,
202 	    (PMCS_OQ_EVENTS << GENERAL_EVENT_OQ_SHIFT) |
203 	    (PMCS_OQ_EVENTS << DEVICE_HANDLE_REMOVED_SHIFT));
204 
205 	/*
206 	 * Verify that ioq_depth is valid (> 0 and not so high that it
207 	 * would cause us to overrun the chip with commands).
208 	 */
209 	if (pwp->ioq_depth == 0) {
210 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
211 		    "%s: I/O queue depth set to 0. Setting to %d",
212 		    __func__, PMCS_NQENTRY);
213 		pwp->ioq_depth = PMCS_NQENTRY;
214 	}
215 
216 	if (pwp->ioq_depth < PMCS_MIN_NQENTRY) {
217 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
218 		    "%s: I/O queue depth set too low (%d). Setting to %d",
219 		    __func__, pwp->ioq_depth, PMCS_MIN_NQENTRY);
220 		pwp->ioq_depth = PMCS_MIN_NQENTRY;
221 	}
222 
223 	if (pwp->ioq_depth > (pwp->max_cmd / (PMCS_IO_IQ_MASK + 1))) {
224 		new_ioq_depth = pwp->max_cmd / (PMCS_IO_IQ_MASK + 1);
225 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
226 		    "%s: I/O queue depth set too high (%d). Setting to %d",
227 		    __func__, pwp->ioq_depth, new_ioq_depth);
228 		pwp->ioq_depth = new_ioq_depth;
229 	}
230 
231 	/*
232 	 * Allocate consistent memory for OQs and IQs.
233 	 */
234 	pwp->iqp_dma_attr = pwp->oqp_dma_attr = pmcs_dattr;
235 	pwp->iqp_dma_attr.dma_attr_align =
236 	    pwp->oqp_dma_attr.dma_attr_align = PMCS_QENTRY_SIZE;
237 
238 	/*
239 	 * The Rev C chip has the ability to do PIO to or from consistent
240 	 * memory anywhere in a 64 bit address space, but the firmware is
241 	 * not presently set up to do so.
242 	 */
243 	pwp->iqp_dma_attr.dma_attr_addr_hi =
244 	    pwp->oqp_dma_attr.dma_attr_addr_hi = 0x000000FFFFFFFFFFull;
245 
246 	for (i = 0; i < PMCS_NIQ; i++) {
247 		if (pmcs_dma_setup(pwp, &pwp->iqp_dma_attr,
248 		    &pwp->iqp_acchdls[i],
249 		    &pwp->iqp_handles[i], PMCS_QENTRY_SIZE * pwp->ioq_depth,
250 		    (caddr_t *)&pwp->iqp[i], &pwp->iqaddr[i]) == B_FALSE) {
251 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
252 			    "Failed to setup DMA for iqp[%d]", i);
253 			return (-1);
254 		}
255 		bzero(pwp->iqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
256 	}
257 
258 	for (i = 0; i < PMCS_NOQ; i++) {
259 		if (pmcs_dma_setup(pwp, &pwp->oqp_dma_attr,
260 		    &pwp->oqp_acchdls[i],
261 		    &pwp->oqp_handles[i], PMCS_QENTRY_SIZE * pwp->ioq_depth,
262 		    (caddr_t *)&pwp->oqp[i], &pwp->oqaddr[i]) == B_FALSE) {
263 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
264 			    "Failed to setup DMA for oqp[%d]", i);
265 			return (-1);
266 		}
267 		bzero(pwp->oqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
268 	}
269 
270 	/*
271 	 * Install the IQ and OQ addresses (and null out the rest).
272 	 */
273 	for (i = 0; i < pwp->max_iq; i++) {
274 		pwp->iqpi_offset[i] = pmcs_rd_iqc_tbl(pwp, PMCS_IQPIOFFX(i));
275 		if (i < PMCS_NIQ) {
276 			if (i != PMCS_IQ_OTHER) {
277 				pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i),
278 				    pwp->ioq_depth | (PMCS_QENTRY_SIZE << 16));
279 			} else {
280 				pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i),
281 				    (1 << 30) | pwp->ioq_depth |
282 				    (PMCS_QENTRY_SIZE << 16));
283 			}
284 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i),
285 			    DWORD1(pwp->iqaddr[i]));
286 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i),
287 			    DWORD0(pwp->iqaddr[i]));
288 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i),
289 			    DWORD1(pwp->ciaddr+IQ_OFFSET(i)));
290 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i),
291 			    DWORD0(pwp->ciaddr+IQ_OFFSET(i)));
292 		} else {
293 			pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i), 0);
294 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i), 0);
295 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i), 0);
296 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i), 0);
297 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i), 0);
298 		}
299 	}
300 
301 	for (i = 0; i < pwp->max_oq; i++) {
302 		pwp->oqci_offset[i] = pmcs_rd_oqc_tbl(pwp, PMCS_OQCIOFFX(i));
303 		if (i < PMCS_NOQ) {
304 			pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), pwp->ioq_depth |
305 			    (PMCS_QENTRY_SIZE << 16) | OQIEX);
306 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i),
307 			    DWORD1(pwp->oqaddr[i]));
308 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i),
309 			    DWORD0(pwp->oqaddr[i]));
310 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i),
311 			    DWORD1(pwp->ciaddr+OQ_OFFSET(i)));
312 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i),
313 			    DWORD0(pwp->ciaddr+OQ_OFFSET(i)));
314 			pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i),
315 			    pwp->oqvec[i] << 24);
316 			pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
317 		} else {
318 			pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), 0);
319 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i), 0);
320 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i), 0);
321 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i), 0);
322 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i), 0);
323 			pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i), 0);
324 			pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
325 		}
326 	}
327 
328 	/*
329 	 * Set up logging, if defined.
330 	 */
331 	if (pwp->fwlog) {
332 		uint64_t logdma = pwp->fwaddr;
333 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBAH, DWORD1(logdma));
334 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBAL, DWORD0(logdma));
335 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBS, PMCS_FWLOG_SIZE >> 1);
336 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELSEV, pwp->fwlog);
337 		logdma += (PMCS_FWLOG_SIZE >> 1);
338 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBAH, DWORD1(logdma));
339 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBAL, DWORD0(logdma));
340 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBS, PMCS_FWLOG_SIZE >> 1);
341 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELSEV, pwp->fwlog);
342 	}
343 
344 	/*
345 	 * Interrupt vectors, outbound queues, and odb_auto_clear
346 	 *
347 	 * MSI/MSI-X:
348 	 * If we got 4 interrupt vectors, we'll assign one to each outbound
349 	 * queue as well as the fatal interrupt, and auto clear can be set
350 	 * for each.
351 	 *
352 	 * If we only got 2 vectors, one will be used for I/O completions
353 	 * and the other for the other two vectors.  In this case, auto_
354 	 * clear can only be set for I/Os, which is fine.  The fatal
355 	 * interrupt will be mapped to the PMCS_FATAL_INTERRUPT bit, which
356 	 * is not an interrupt vector.
357 	 *
358 	 * MSI/MSI-X/INT-X:
359 	 * If we only got 1 interrupt vector, auto_clear must be set to 0,
360 	 * and again the fatal interrupt will be mapped to the
361 	 * PMCS_FATAL_INTERRUPT bit (again, not an interrupt vector).
362 	 */
363 
364 	switch (pwp->int_type) {
365 	case PMCS_INT_MSIX:
366 	case PMCS_INT_MSI:
367 		switch (pwp->intr_cnt) {
368 		case 1:
369 			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
370 			    (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
371 			pwp->odb_auto_clear = 0;
372 			break;
373 		case 2:
374 			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
375 			    (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
376 			pwp->odb_auto_clear = (1 << PMCS_FATAL_INTERRUPT) |
377 			    (1 << PMCS_MSIX_IODONE);
378 			break;
379 		case 4:
380 			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
381 			    (PMCS_MSIX_FATAL << PMCS_FERIV_SHIFT));
382 			pwp->odb_auto_clear = (1 << PMCS_MSIX_FATAL) |
383 			    (1 << PMCS_MSIX_GENERAL) | (1 << PMCS_MSIX_IODONE) |
384 			    (1 << PMCS_MSIX_EVENTS);
385 			break;
386 		}
387 		break;
388 
389 	case PMCS_INT_FIXED:
390 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR,
391 		    PMCS_FERRIE | (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
392 		pwp->odb_auto_clear = 0;
393 		break;
394 	}
395 
396 	/*
397 	 * Enable Interrupt Reassertion
398 	 * Default Delay 1000us
399 	 */
400 	ferr = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_FERR);
401 	if ((ferr & PMCS_MPI_IRAE) == 0) {
402 		ferr &= ~(PMCS_MPI_IRAU | PMCS_MPI_IRAD_MASK);
403 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, ferr | PMCS_MPI_IRAE);
404 	}
405 
406 	pmcs_wr_topunit(pwp, PMCS_OBDB_AUTO_CLR, pwp->odb_auto_clear);
407 	pwp->mpi_table_setup = 1;
408 	return (0);
409 }
410 
411 /*
412  * Start the Message Passing protocol with the PMC chip.
413  */
414 int
415 pmcs_start_mpi(pmcs_hw_t *pwp)
416 {
417 	int i;
418 
419 	pmcs_wr_msgunit(pwp, PMCS_MSGU_IBDB, PMCS_MSGU_IBDB_MPIINI);
420 	for (i = 0; i < 1000; i++) {
421 		if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) &
422 		    PMCS_MSGU_IBDB_MPIINI) == 0) {
423 			break;
424 		}
425 		drv_usecwait(1000);
426 	}
427 	if (pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) & PMCS_MSGU_IBDB_MPIINI) {
428 		return (-1);
429 	}
430 	drv_usecwait(500000);
431 
432 	/*
433 	 * Check to make sure we got to INIT state.
434 	 */
435 	if (PMCS_MPI_S(pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE)) !=
436 	    PMCS_MPI_STATE_INIT) {
437 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: MPI launch failed (GST 0x%x "
438 		    "DBCLR 0x%x)", __func__,
439 		    pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE),
440 		    pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB_CLEAR));
441 		return (-1);
442 	}
443 	return (0);
444 }
445 
446 /*
447  * Stop the Message Passing protocol with the PMC chip.
448  */
449 int
450 pmcs_stop_mpi(pmcs_hw_t *pwp)
451 {
452 	int i;
453 
454 	for (i = 0; i < pwp->max_iq; i++) {
455 		pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i), 0);
456 		pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i), 0);
457 		pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i), 0);
458 		pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i), 0);
459 		pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i), 0);
460 	}
461 	for (i = 0; i < pwp->max_oq; i++) {
462 		pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), 0);
463 		pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i), 0);
464 		pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i), 0);
465 		pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i), 0);
466 		pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i), 0);
467 		pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i), 0);
468 		pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
469 	}
470 	pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, 0);
471 	pmcs_wr_msgunit(pwp, PMCS_MSGU_IBDB, PMCS_MSGU_IBDB_MPICTU);
472 	for (i = 0; i < 2000; i++) {
473 		if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) &
474 		    PMCS_MSGU_IBDB_MPICTU) == 0) {
475 			break;
476 		}
477 		drv_usecwait(1000);
478 	}
479 	if (pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) & PMCS_MSGU_IBDB_MPICTU) {
480 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: MPI stop failed", __func__);
481 		return (-1);
482 	}
483 	return (0);
484 }
485 
486 /*
487  * Do a sequence of ECHO messages to test for MPI functionality,
488  * all inbound and outbound queue functionality and interrupts.
489  */
490 int
491 pmcs_echo_test(pmcs_hw_t *pwp)
492 {
493 	echo_test_t fred;
494 	struct pmcwork *pwrk;
495 	uint32_t *msg, count;
496 	int iqe = 0, iqo = 0, result, rval = 0;
497 	int iterations;
498 	hrtime_t echo_start, echo_end, echo_total;
499 
500 	ASSERT(pwp->max_cmd > 0);
501 
502 	/*
503 	 * We want iterations to be max_cmd * 3 to ensure that we run the
504 	 * echo test enough times to iterate through every inbound queue
505 	 * at least twice.
506 	 */
507 	iterations = pwp->max_cmd * 3;
508 
509 	echo_total = 0;
510 	count = 0;
511 
512 	while (count < iterations) {
513 		pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
514 		if (pwrk == NULL) {
515 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
516 			rval = -1;
517 			break;
518 		}
519 
520 		mutex_enter(&pwp->iqp_lock[iqe]);
521 		msg = GET_IQ_ENTRY(pwp, iqe);
522 		if (msg == NULL) {
523 			mutex_exit(&pwp->iqp_lock[iqe]);
524 			pmcs_pwork(pwp, pwrk);
525 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
526 			rval = -1;
527 			break;
528 		}
529 
530 		bzero(msg, PMCS_QENTRY_SIZE);
531 
532 		if (iqe == PMCS_IQ_OTHER) {
533 			/* This is on the high priority queue */
534 			msg[0] = LE_32(PMCS_HIPRI(pwp, iqo, PMCIN_ECHO));
535 		} else {
536 			msg[0] = LE_32(PMCS_IOMB_IN_SAS(iqo, PMCIN_ECHO));
537 		}
538 		msg[1] = LE_32(pwrk->htag);
539 		fred.signature = 0xdeadbeef;
540 		fred.count = count;
541 		fred.ptr = &count;
542 		(void) memcpy(&msg[2], &fred, sizeof (fred));
543 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
544 
545 		INC_IQ_ENTRY(pwp, iqe);
546 
547 		echo_start = gethrtime();
548 		DTRACE_PROBE2(pmcs__echo__test__wait__start,
549 		    hrtime_t, echo_start, uint32_t, pwrk->htag);
550 
551 		if (++iqe == PMCS_NIQ) {
552 			iqe = 0;
553 		}
554 		if (++iqo == PMCS_NOQ) {
555 			iqo = 0;
556 		}
557 
558 		WAIT_FOR(pwrk, 250, result);
559 
560 		echo_end = gethrtime();
561 		DTRACE_PROBE2(pmcs__echo__test__wait__end,
562 		    hrtime_t, echo_end, int, result);
563 
564 		echo_total += (echo_end - echo_start);
565 
566 		pmcs_pwork(pwp, pwrk);
567 		if (result) {
568 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
569 			    "%s: command timed out on echo test #%d",
570 			    __func__, count);
571 			rval = -1;
572 			break;
573 		}
574 	}
575 
576 	/*
577 	 * The intr_threshold is adjusted by PMCS_INTR_THRESHOLD in order to
578 	 * remove the overhead of things like the delay in getting signaled
579 	 * for completion.
580 	 */
581 	if (echo_total != 0) {
582 		pwp->io_intr_coal.intr_latency =
583 		    (echo_total / iterations) / 2;
584 		pwp->io_intr_coal.intr_threshold =
585 		    PMCS_INTR_THRESHOLD(PMCS_QUANTUM_TIME_USECS * 1000 /
586 		    pwp->io_intr_coal.intr_latency);
587 	}
588 
589 	return (rval);
590 }
591 
592 /*
593  * Start the (real) phys
594  */
595 int
596 pmcs_start_phy(pmcs_hw_t *pwp, int phynum, int linkmode, int speed)
597 {
598 	int result;
599 	uint32_t *msg;
600 	struct pmcwork *pwrk;
601 	pmcs_phy_t *pptr;
602 	sas_identify_af_t sap;
603 
604 	mutex_enter(&pwp->lock);
605 	pptr = pwp->root_phys + phynum;
606 	if (pptr == NULL) {
607 		mutex_exit(&pwp->lock);
608 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: cannot find port %d",
609 		    __func__, phynum);
610 		return (0);
611 	}
612 
613 	pmcs_lock_phy(pptr);
614 	mutex_exit(&pwp->lock);
615 
616 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
617 	if (pwrk == NULL) {
618 		pmcs_unlock_phy(pptr);
619 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
620 		return (-1);
621 	}
622 
623 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
624 	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
625 
626 	if (msg == NULL) {
627 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
628 		pmcs_unlock_phy(pptr);
629 		pmcs_pwork(pwp, pwrk);
630 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
631 		return (-1);
632 	}
633 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_PHY_START));
634 	msg[1] = LE_32(pwrk->htag);
635 	msg[2] = LE_32(linkmode | speed | phynum);
636 	bzero(&sap, sizeof (sap));
637 	sap.device_type = SAS_IF_DTYPE_ENDPOINT;
638 	sap.ssp_ini_port = 1;
639 
640 	if (pwp->separate_ports) {
641 		pmcs_wwn2barray(pwp->sas_wwns[phynum], sap.sas_address);
642 	} else {
643 		pmcs_wwn2barray(pwp->sas_wwns[0], sap.sas_address);
644 	}
645 
646 	ASSERT(phynum < SAS2_PHYNUM_MAX);
647 	sap.phy_identifier = phynum & SAS2_PHYNUM_MASK;
648 	(void) memcpy(&msg[3], &sap, sizeof (sas_identify_af_t));
649 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
650 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
651 
652 	pptr->state.prog_min_rate = (lowbit((ulong_t)speed) - 1);
653 	pptr->state.prog_max_rate = (highbit((ulong_t)speed) - 1);
654 	pptr->state.hw_min_rate = PMCS_HW_MIN_LINK_RATE;
655 	pptr->state.hw_max_rate = PMCS_HW_MAX_LINK_RATE;
656 
657 	pmcs_unlock_phy(pptr);
658 	WAIT_FOR(pwrk, 1000, result);
659 	pmcs_pwork(pwp, pwrk);
660 
661 	if (result) {
662 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
663 	} else {
664 		mutex_enter(&pwp->lock);
665 		pwp->phys_started |= (1 << phynum);
666 		mutex_exit(&pwp->lock);
667 	}
668 
669 	return (0);
670 }
671 
672 int
673 pmcs_start_phys(pmcs_hw_t *pwp)
674 {
675 	int i;
676 
677 	for (i = 0; i < pwp->nphy; i++) {
678 		if ((pwp->phyid_block_mask & (1 << i)) == 0) {
679 			if (pmcs_start_phy(pwp, i,
680 			    (pwp->phymode << PHY_MODE_SHIFT),
681 			    pwp->physpeed << PHY_LINK_SHIFT)) {
682 				return (-1);
683 			}
684 			if (pmcs_clear_diag_counters(pwp, i)) {
685 				pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: failed to "
686 				    "reset counters on PHY (%d)", __func__, i);
687 			}
688 		}
689 	}
690 	return (0);
691 }
692 
693 /*
694  * Called with PHY locked
695  */
696 int
697 pmcs_reset_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint8_t type)
698 {
699 	uint32_t *msg;
700 	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
701 	const char *mbar;
702 	uint32_t amt;
703 	uint32_t pdevid;
704 	uint32_t stsoff;
705 	uint32_t status;
706 	int result, level, phynum;
707 	struct pmcwork *pwrk;
708 	uint32_t htag;
709 
710 	ASSERT(mutex_owned(&pptr->phy_lock));
711 
712 	bzero(iomb, PMCS_QENTRY_SIZE);
713 	phynum = pptr->phynum;
714 	level = pptr->level;
715 	if (level > 0) {
716 		pdevid = pptr->parent->device_id;
717 	}
718 
719 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
720 
721 	if (pwrk == NULL) {
722 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
723 		return (ENOMEM);
724 	}
725 
726 	pwrk->arg = iomb;
727 
728 	/*
729 	 * If level > 0, we need to issue an SMP_REQUEST with a PHY_CONTROL
730 	 * function to do either a link reset or hard reset.  If level == 0,
731 	 * then we do a LOCAL_PHY_CONTROL IOMB to do link/hard reset to the
732 	 * root (local) PHY
733 	 */
734 	if (level) {
735 		stsoff = 2;
736 		iomb[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
737 		    PMCIN_SMP_REQUEST));
738 		iomb[1] = LE_32(pwrk->htag);
739 		iomb[2] = LE_32(pdevid);
740 		iomb[3] = LE_32(40 << SMP_REQUEST_LENGTH_SHIFT);
741 		/*
742 		 * Send SMP PHY CONTROL/HARD or LINK RESET
743 		 */
744 		iomb[4] = BE_32(0x40910000);
745 		iomb[5] = 0;
746 
747 		if (type == PMCS_PHYOP_HARD_RESET) {
748 			mbar = "SMP PHY CONTROL/HARD RESET";
749 			iomb[6] = BE_32((phynum << 24) |
750 			    (PMCS_PHYOP_HARD_RESET << 16));
751 		} else {
752 			mbar = "SMP PHY CONTROL/LINK RESET";
753 			iomb[6] = BE_32((phynum << 24) |
754 			    (PMCS_PHYOP_LINK_RESET << 16));
755 		}
756 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
757 		    "%s: sending %s to %s for phy 0x%x",
758 		    __func__, mbar, pptr->parent->path, pptr->phynum);
759 		amt = 7;
760 	} else {
761 		/*
762 		 * Unlike most other Outbound messages, status for
763 		 * a local phy operation is in DWORD 3.
764 		 */
765 		stsoff = 3;
766 		iomb[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
767 		    PMCIN_LOCAL_PHY_CONTROL));
768 		iomb[1] = LE_32(pwrk->htag);
769 		if (type == PMCS_PHYOP_LINK_RESET) {
770 			mbar = "LOCAL PHY LINK RESET";
771 			iomb[2] = LE_32((PMCS_PHYOP_LINK_RESET << 8) | phynum);
772 		} else {
773 			mbar = "LOCAL PHY HARD RESET";
774 			iomb[2] = LE_32((PMCS_PHYOP_HARD_RESET << 8) | phynum);
775 		}
776 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
777 		    "%s: sending %s to %s", __func__, mbar, pptr->path);
778 		amt = 3;
779 	}
780 
781 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
782 	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
783 	if (msg == NULL) {
784 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
785 		pmcs_pwork(pwp, pwrk);
786 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
787 		return (ENOMEM);
788 	}
789 	COPY_MESSAGE(msg, iomb, amt);
790 	htag = pwrk->htag;
791 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
792 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
793 
794 	pmcs_unlock_phy(pptr);
795 	WAIT_FOR(pwrk, 1000, result);
796 	pmcs_pwork(pwp, pwrk);
797 	pmcs_lock_phy(pptr);
798 
799 	if (result) {
800 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
801 
802 		if (pmcs_abort(pwp, pptr, htag, 0, 0)) {
803 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
804 			    "%s: Unable to issue SMP abort for htag 0x%08x",
805 			    __func__, htag);
806 		} else {
807 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
808 			    "%s: Issuing SMP ABORT for htag 0x%08x",
809 			    __func__, htag);
810 		}
811 		return (EIO);
812 	}
813 	status = LE_32(iomb[stsoff]);
814 
815 	if (status != PMCOUT_STATUS_OK) {
816 		char buf[32];
817 		const char *es =  pmcs_status_str(status);
818 		if (es == NULL) {
819 			(void) snprintf(buf, sizeof (buf), "Status 0x%x",
820 			    status);
821 			es = buf;
822 		}
823 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
824 		    "%s: %s action returned %s for %s", __func__, mbar, es,
825 		    pptr->path);
826 		return (EIO);
827 	}
828 
829 	return (0);
830 }
831 
832 /*
833  * Stop the (real) phys.  No PHY or softstate locks are required as this only
834  * happens during detach.
835  */
836 void
837 pmcs_stop_phy(pmcs_hw_t *pwp, int phynum)
838 {
839 	int result;
840 	pmcs_phy_t *pptr;
841 	uint32_t *msg;
842 	struct pmcwork *pwrk;
843 
844 	pptr =  pwp->root_phys + phynum;
845 	if (pptr == NULL) {
846 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
847 		    "%s: unable to find port %d", __func__, phynum);
848 		return;
849 	}
850 
851 	if (pwp->phys_started & (1 << phynum)) {
852 		pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
853 
854 		if (pwrk == NULL) {
855 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
856 			return;
857 		}
858 
859 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
860 		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
861 
862 		if (msg == NULL) {
863 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
864 			pmcs_pwork(pwp, pwrk);
865 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
866 			return;
867 		}
868 
869 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_PHY_STOP));
870 		msg[1] = LE_32(pwrk->htag);
871 		msg[2] = LE_32(phynum);
872 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
873 		/*
874 		 * Make this unconfigured now.
875 		 */
876 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
877 		WAIT_FOR(pwrk, 1000, result);
878 
879 		pmcs_pwork(pwp, pwrk);
880 		if (result) {
881 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
882 		}
883 
884 		pwp->phys_started &= ~(1 << phynum);
885 	}
886 
887 	pptr->configured = 0;
888 }
889 
890 /*
891  * No locks should be required as this is only called during detach
892  */
893 void
894 pmcs_stop_phys(pmcs_hw_t *pwp)
895 {
896 	int i;
897 	for (i = 0; i < pwp->nphy; i++) {
898 		if ((pwp->phyid_block_mask & (1 << i)) == 0) {
899 			pmcs_stop_phy(pwp, i);
900 		}
901 	}
902 }
903 
904 /*
905  * Run SAS_DIAG_EXECUTE with cmd and cmd_desc passed.
906  * 	ERR_CNT_RESET: return status of cmd
907  *	DIAG_REPORT_GET: return value of the counter
908  */
909 int
910 pmcs_sas_diag_execute(pmcs_hw_t *pwp, uint32_t cmd, uint32_t cmd_desc,
911     uint8_t phynum)
912 {
913 	uint32_t htag, *ptr, status, msg[PMCS_MSG_SIZE << 1];
914 	int result;
915 	struct pmcwork *pwrk;
916 
917 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
918 	if (pwrk == NULL) {
919 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
920 		return (DDI_FAILURE);
921 	}
922 	pwrk->arg = msg;
923 	htag = pwrk->htag;
924 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_SAS_DIAG_EXECUTE));
925 	msg[1] = LE_32(htag);
926 	msg[2] = LE_32((cmd << PMCS_DIAG_CMD_SHIFT) |
927 	    (cmd_desc << PMCS_DIAG_CMD_DESC_SHIFT) | phynum);
928 
929 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
930 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
931 	if (ptr == NULL) {
932 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
933 		pmcs_pwork(pwp, pwrk);
934 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
935 		return (DDI_FAILURE);
936 	}
937 	COPY_MESSAGE(ptr, msg, 3);
938 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
939 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
940 
941 	WAIT_FOR(pwrk, 1000, result);
942 
943 	pmcs_pwork(pwp, pwrk);
944 
945 	if (result) {
946 		pmcs_timed_out(pwp, htag, __func__);
947 		return (DDI_FAILURE);
948 	}
949 
950 	status = LE_32(msg[3]);
951 
952 	/* Return for counter reset */
953 	if (cmd == PMCS_ERR_CNT_RESET)
954 		return (status);
955 
956 	/* Return for counter value */
957 	if (status) {
958 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: failed, status (0x%x)",
959 		    __func__, status);
960 		return (DDI_FAILURE);
961 	}
962 	return (LE_32(msg[4]));
963 }
964 
965 /* Get the current value of the counter for desc on phynum and return it. */
966 int
967 pmcs_get_diag_report(pmcs_hw_t *pwp, uint32_t desc, uint8_t phynum)
968 {
969 	return (pmcs_sas_diag_execute(pwp, PMCS_DIAG_REPORT_GET, desc, phynum));
970 }
971 
972 /* Clear all of the counters for phynum. Returns the status of the command. */
973 int
974 pmcs_clear_diag_counters(pmcs_hw_t *pwp, uint8_t phynum)
975 {
976 	uint32_t	cmd = PMCS_ERR_CNT_RESET;
977 	uint32_t	cmd_desc;
978 
979 	cmd_desc = PMCS_INVALID_DWORD_CNT;
980 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
981 		return (DDI_FAILURE);
982 
983 	cmd_desc = PMCS_DISPARITY_ERR_CNT;
984 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
985 		return (DDI_FAILURE);
986 
987 	cmd_desc = PMCS_LOST_DWORD_SYNC_CNT;
988 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
989 		return (DDI_FAILURE);
990 
991 	cmd_desc = PMCS_RESET_FAILED_CNT;
992 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
993 		return (DDI_FAILURE);
994 
995 	return (DDI_SUCCESS);
996 }
997 
998 /*
999  * Get firmware timestamp
1000  */
1001 int
1002 pmcs_get_time_stamp(pmcs_hw_t *pwp, uint64_t *ts)
1003 {
1004 	uint32_t htag, *ptr, msg[PMCS_MSG_SIZE << 1];
1005 	int result;
1006 	struct pmcwork *pwrk;
1007 
1008 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
1009 	if (pwrk == NULL) {
1010 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
1011 		return (-1);
1012 	}
1013 	pwrk->arg = msg;
1014 	htag = pwrk->htag;
1015 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_GET_TIME_STAMP));
1016 	msg[1] = LE_32(pwrk->htag);
1017 
1018 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1019 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1020 	if (ptr == NULL) {
1021 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1022 		pmcs_pwork(pwp, pwrk);
1023 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
1024 		return (-1);
1025 	}
1026 	COPY_MESSAGE(ptr, msg, 2);
1027 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1028 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1029 
1030 	WAIT_FOR(pwrk, 1000, result);
1031 
1032 	pmcs_pwork(pwp, pwrk);
1033 
1034 	if (result) {
1035 		pmcs_timed_out(pwp, htag, __func__);
1036 		return (-1);
1037 	}
1038 	*ts = LE_32(msg[2]) | (((uint64_t)LE_32(msg[3])) << 32);
1039 	return (0);
1040 }
1041 
1042 /*
1043  * Dump all pertinent registers
1044  */
1045 
1046 void
1047 pmcs_register_dump(pmcs_hw_t *pwp)
1048 {
1049 	int i;
1050 	uint32_t val;
1051 
1052 	pmcs_prt(pwp, PMCS_PRT_INFO, "pmcs%d: Register dump start",
1053 	    ddi_get_instance(pwp->dip));
1054 	pmcs_prt(pwp, PMCS_PRT_INFO,
1055 	    "OBDB (intr): 0x%08x (mask): 0x%08x (clear): 0x%08x",
1056 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB),
1057 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB_MASK),
1058 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR));
1059 	pmcs_prt(pwp, PMCS_PRT_INFO, "SCRATCH0: 0x%08x",
1060 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH0));
1061 	pmcs_prt(pwp, PMCS_PRT_INFO, "SCRATCH1: 0x%08x",
1062 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1));
1063 	pmcs_prt(pwp, PMCS_PRT_INFO, "SCRATCH2: 0x%08x",
1064 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2));
1065 	pmcs_prt(pwp, PMCS_PRT_INFO, "SCRATCH3: 0x%08x",
1066 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH3));
1067 	for (i = 0; i < PMCS_NIQ; i++) {
1068 		pmcs_prt(pwp, PMCS_PRT_INFO, "IQ %d: CI %u PI %u",
1069 		    i, pmcs_rd_iqci(pwp, i), pmcs_rd_iqpi(pwp, i));
1070 	}
1071 	for (i = 0; i < PMCS_NOQ; i++) {
1072 		pmcs_prt(pwp, PMCS_PRT_INFO, "OQ %d: CI %u PI %u",
1073 		    i, pmcs_rd_oqci(pwp, i), pmcs_rd_oqpi(pwp, i));
1074 	}
1075 	val = pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE);
1076 	pmcs_prt(pwp, PMCS_PRT_INFO,
1077 	    "GST TABLE BASE: 0x%08x (STATE=0x%x QF=%d GSTLEN=%d HMI_ERR=0x%x)",
1078 	    val, PMCS_MPI_S(val), PMCS_QF(val), PMCS_GSTLEN(val) * 4,
1079 	    PMCS_HMI_ERR(val));
1080 	pmcs_prt(pwp, PMCS_PRT_INFO, "GST TABLE IQFRZ0: 0x%08x",
1081 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IQFRZ0));
1082 	pmcs_prt(pwp, PMCS_PRT_INFO, "GST TABLE IQFRZ1: 0x%08x",
1083 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IQFRZ1));
1084 	pmcs_prt(pwp, PMCS_PRT_INFO, "GST TABLE MSGU TICK: 0x%08x",
1085 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_MSGU_TICK));
1086 	pmcs_prt(pwp, PMCS_PRT_INFO, "GST TABLE IOP TICK: 0x%08x",
1087 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IOP_TICK));
1088 	for (i = 0; i < pwp->nphy; i++) {
1089 		uint32_t rerrf, pinfo, started = 0, link = 0;
1090 		pinfo = pmcs_rd_gst_tbl(pwp, PMCS_GST_PHY_INFO(i));
1091 		if (pinfo & 1) {
1092 			started = 1;
1093 			link = pinfo & 2;
1094 		}
1095 		rerrf = pmcs_rd_gst_tbl(pwp, PMCS_GST_RERR_INFO(i));
1096 		pmcs_prt(pwp, PMCS_PRT_INFO,
1097 		    "GST TABLE PHY%d STARTED=%d LINK=%d RERR=0x%08x",
1098 		    i, started, link, rerrf);
1099 	}
1100 	pmcs_prt(pwp, PMCS_PRT_INFO, "pmcs%d: Register dump end",
1101 	    ddi_get_instance(pwp->dip));
1102 }
1103 
1104 /*
1105  * Handle SATA Abort and other error processing
1106  */
1107 int
1108 pmcs_abort_handler(pmcs_hw_t *pwp)
1109 {
1110 	pmcs_phy_t *pptr, *pnext, *pnext_uplevel[PMCS_MAX_XPND];
1111 	int r, level = 0;
1112 
1113 	pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s", __func__);
1114 
1115 	mutex_enter(&pwp->lock);
1116 	pptr = pwp->root_phys;
1117 	mutex_exit(&pwp->lock);
1118 
1119 	while (pptr) {
1120 		/*
1121 		 * XXX: Need to make sure this doesn't happen
1122 		 * XXX: when non-NCQ commands are running.
1123 		 */
1124 		pmcs_lock_phy(pptr);
1125 		if (pptr->need_rl_ext) {
1126 			ASSERT(pptr->dtype == SATA);
1127 			if (pmcs_acquire_scratch(pwp, B_FALSE)) {
1128 				goto next_phy;
1129 			}
1130 			r = pmcs_sata_abort_ncq(pwp, pptr);
1131 			pmcs_release_scratch(pwp);
1132 			if (r == ENOMEM) {
1133 				goto next_phy;
1134 			}
1135 			if (r) {
1136 				r = pmcs_reset_phy(pwp, pptr,
1137 				    PMCS_PHYOP_LINK_RESET);
1138 				if (r == ENOMEM) {
1139 					goto next_phy;
1140 				}
1141 				/* what if other failures happened? */
1142 				pptr->abort_pending = 1;
1143 				pptr->abort_sent = 0;
1144 			}
1145 		}
1146 		if (pptr->abort_pending == 0 || pptr->abort_sent) {
1147 			goto next_phy;
1148 		}
1149 		pptr->abort_pending = 0;
1150 		if (pmcs_abort(pwp, pptr, pptr->device_id, 1, 1) == ENOMEM) {
1151 			pptr->abort_pending = 1;
1152 			goto next_phy;
1153 		}
1154 		pptr->abort_sent = 1;
1155 
1156 next_phy:
1157 		if (pptr->children) {
1158 			pnext = pptr->children;
1159 			pnext_uplevel[level++] = pptr->sibling;
1160 		} else {
1161 			pnext = pptr->sibling;
1162 			while ((pnext == NULL) && (level > 0)) {
1163 				pnext = pnext_uplevel[--level];
1164 			}
1165 		}
1166 
1167 		pmcs_unlock_phy(pptr);
1168 		pptr = pnext;
1169 	}
1170 
1171 	return (0);
1172 }
1173 
1174 /*
1175  * Register a device (get a device handle for it).
1176  * Called with PHY lock held.
1177  */
1178 int
1179 pmcs_register_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1180 {
1181 	struct pmcwork *pwrk;
1182 	int result = 0;
1183 	uint32_t *msg;
1184 	uint32_t tmp, status;
1185 	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
1186 
1187 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1188 	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1189 
1190 	if (msg == NULL ||
1191 	    (pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr)) == NULL) {
1192 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1193 		result = ENOMEM;
1194 		goto out;
1195 	}
1196 
1197 	pwrk->arg = iomb;
1198 	pwrk->dtype = pptr->dtype;
1199 
1200 	msg[1] = LE_32(pwrk->htag);
1201 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_REGISTER_DEVICE));
1202 	tmp = PMCS_DEVREG_TLR |
1203 	    (pptr->link_rate << PMCS_DEVREG_LINK_RATE_SHIFT);
1204 	if (IS_ROOT_PHY(pptr)) {
1205 		msg[2] = LE_32(pptr->portid |
1206 		    (pptr->phynum << PMCS_PHYID_SHIFT));
1207 	} else {
1208 		msg[2] = LE_32(pptr->portid);
1209 	}
1210 	if (pptr->dtype == SATA) {
1211 		if (IS_ROOT_PHY(pptr)) {
1212 			tmp |= PMCS_DEVREG_TYPE_SATA_DIRECT;
1213 		} else {
1214 			tmp |= PMCS_DEVREG_TYPE_SATA;
1215 		}
1216 	} else {
1217 		tmp |= PMCS_DEVREG_TYPE_SAS;
1218 	}
1219 	msg[3] = LE_32(tmp);
1220 	msg[4] = LE_32(PMCS_DEVREG_IT_NEXUS_TIMEOUT);
1221 	(void) memcpy(&msg[5], pptr->sas_address, 8);
1222 
1223 	CLEAN_MESSAGE(msg, 7);
1224 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1225 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1226 
1227 	pmcs_unlock_phy(pptr);
1228 	WAIT_FOR(pwrk, 250, result);
1229 	pmcs_lock_phy(pptr);
1230 	pmcs_pwork(pwp, pwrk);
1231 
1232 	if (result) {
1233 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
1234 		result = ETIMEDOUT;
1235 		goto out;
1236 	}
1237 	status = LE_32(iomb[2]);
1238 	tmp = LE_32(iomb[3]);
1239 	switch (status) {
1240 	case PMCS_DEVREG_OK:
1241 	case PMCS_DEVREG_DEVICE_ALREADY_REGISTERED:
1242 	case PMCS_DEVREG_PHY_ALREADY_REGISTERED:
1243 		if (pmcs_validate_devid(pwp->root_phys, pptr, tmp) == B_FALSE) {
1244 			result = EEXIST;
1245 			goto out;
1246 		} else if (status != PMCS_DEVREG_OK) {
1247 			if (tmp == 0xffffffff) {	/* F/W bug */
1248 				pmcs_prt(pwp, PMCS_PRT_INFO,
1249 				    "%s: phy %s already has bogus devid 0x%x",
1250 				    __func__, pptr->path, tmp);
1251 				result = EIO;
1252 				goto out;
1253 			} else {
1254 				pmcs_prt(pwp, PMCS_PRT_INFO,
1255 				    "%s: phy %s already has a device id 0x%x",
1256 				    __func__, pptr->path, tmp);
1257 			}
1258 		}
1259 		break;
1260 	default:
1261 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: status 0x%x when trying to "
1262 		    "register device %s", __func__, status, pptr->path);
1263 		result = EIO;
1264 		goto out;
1265 	}
1266 	pptr->device_id = tmp;
1267 	pptr->valid_device_id = 1;
1268 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "Phy %s/" SAS_ADDR_FMT
1269 	    " registered with device_id 0x%x (portid %d)", pptr->path,
1270 	    SAS_ADDR_PRT(pptr->sas_address), tmp, pptr->portid);
1271 out:
1272 	return (result);
1273 }
1274 
1275 /*
1276  * Deregister a device (remove a device handle).
1277  * Called with PHY locked.
1278  */
1279 void
1280 pmcs_deregister_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1281 {
1282 	struct pmcwork *pwrk;
1283 	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
1284 	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
1285 	int result;
1286 
1287 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
1288 	if (pwrk == NULL) {
1289 		return;
1290 	}
1291 
1292 	pwrk->arg = iomb;
1293 	pwrk->dtype = pptr->dtype;
1294 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1295 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1296 	if (ptr == NULL) {
1297 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1298 		pmcs_pwork(pwp, pwrk);
1299 		return;
1300 	}
1301 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
1302 	    PMCIN_DEREGISTER_DEVICE_HANDLE));
1303 	msg[1] = LE_32(pwrk->htag);
1304 	msg[2] = LE_32(pptr->device_id);
1305 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1306 	COPY_MESSAGE(ptr, msg, 3);
1307 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1308 
1309 	pmcs_unlock_phy(pptr);
1310 	WAIT_FOR(pwrk, 250, result);
1311 	pmcs_pwork(pwp, pwrk);
1312 	pmcs_lock_phy(pptr);
1313 
1314 	if (result) {
1315 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
1316 		return;
1317 	}
1318 	status = LE_32(iomb[2]);
1319 	if (status != PMCOUT_STATUS_OK) {
1320 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: status 0x%x when trying to "
1321 		    "deregister device %s", __func__, status, pptr->path);
1322 	} else {
1323 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: device %s deregistered",
1324 		    __func__, pptr->path);
1325 		pptr->valid_device_id = 0;
1326 		pptr->device_id = PMCS_INVALID_DEVICE_ID;
1327 	}
1328 }
1329 
1330 /*
1331  * Deregister all registered devices.
1332  */
1333 void
1334 pmcs_deregister_devices(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
1335 {
1336 	/*
1337 	 * Start at the maximum level and walk back to level 0.  This only
1338 	 * gets done during detach after all threads and timers have been
1339 	 * destroyed, so there's no need to hold the softstate or PHY lock.
1340 	 */
1341 	while (phyp) {
1342 		if (phyp->children) {
1343 			pmcs_deregister_devices(pwp, phyp->children);
1344 		}
1345 		if (phyp->valid_device_id) {
1346 			pmcs_deregister_device(pwp, phyp);
1347 		}
1348 		phyp = phyp->sibling;
1349 	}
1350 }
1351 
1352 /*
1353  * Perform a 'soft' reset on the PMC chip
1354  */
1355 int
1356 pmcs_soft_reset(pmcs_hw_t *pwp, boolean_t no_restart)
1357 {
1358 	uint32_t s2, sfrbits, gsm, rapchk, wapchk, wdpchk, spc, tsmode;
1359 	pmcs_phy_t *pptr;
1360 	char *msg = NULL;
1361 	int i;
1362 
1363 	/*
1364 	 * Disable interrupts
1365 	 */
1366 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, 0xffffffff);
1367 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1368 
1369 	pmcs_prt(pwp, PMCS_PRT_INFO, "%s", __func__);
1370 
1371 	if (pwp->locks_initted) {
1372 		mutex_enter(&pwp->lock);
1373 	}
1374 	pwp->blocked = 1;
1375 
1376 	/*
1377 	 * Step 1
1378 	 */
1379 	s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2);
1380 	if ((s2 & PMCS_MSGU_HOST_SOFT_RESET_READY) == 0) {
1381 		pmcs_wr_gsm_reg(pwp, RB6_ACCESS, RB6_NMI_SIGNATURE);
1382 		pmcs_wr_gsm_reg(pwp, RB6_ACCESS, RB6_NMI_SIGNATURE);
1383 		for (i = 0; i < 100; i++) {
1384 			s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1385 			    PMCS_MSGU_HOST_SOFT_RESET_READY;
1386 			if (s2) {
1387 				break;
1388 			}
1389 			drv_usecwait(10000);
1390 		}
1391 		s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1392 		    PMCS_MSGU_HOST_SOFT_RESET_READY;
1393 		if (s2 == 0) {
1394 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: PMCS_MSGU_HOST_"
1395 			    "SOFT_RESET_READY never came ready", __func__);
1396 			pmcs_register_dump(pwp);
1397 			if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1398 			    PMCS_MSGU_CPU_SOFT_RESET_READY) == 0 ||
1399 			    (pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1400 			    PMCS_MSGU_CPU_SOFT_RESET_READY) == 0) {
1401 				pwp->state = STATE_DEAD;
1402 				pwp->blocked = 0;
1403 				if (pwp->locks_initted) {
1404 					mutex_exit(&pwp->lock);
1405 				}
1406 				return (-1);
1407 			}
1408 		}
1409 	}
1410 
1411 	/*
1412 	 * Step 2
1413 	 */
1414 	pmcs_wr_gsm_reg(pwp, NMI_EN_VPE0_IOP, 0);
1415 	drv_usecwait(10);
1416 	pmcs_wr_gsm_reg(pwp, NMI_EN_VPE0_AAP1, 0);
1417 	drv_usecwait(10);
1418 	pmcs_wr_topunit(pwp, PMCS_EVENT_INT_ENABLE, 0);
1419 	drv_usecwait(10);
1420 	pmcs_wr_topunit(pwp, PMCS_EVENT_INT_STAT,
1421 	    pmcs_rd_topunit(pwp, PMCS_EVENT_INT_STAT));
1422 	drv_usecwait(10);
1423 	pmcs_wr_topunit(pwp, PMCS_ERROR_INT_ENABLE, 0);
1424 	drv_usecwait(10);
1425 	pmcs_wr_topunit(pwp, PMCS_ERROR_INT_STAT,
1426 	    pmcs_rd_topunit(pwp, PMCS_ERROR_INT_STAT));
1427 	drv_usecwait(10);
1428 
1429 	sfrbits = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1430 	    PMCS_MSGU_AAP_SFR_PROGRESS;
1431 	sfrbits ^= PMCS_MSGU_AAP_SFR_PROGRESS;
1432 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "PMCS_MSGU_HOST_SCRATCH0 %08x -> %08x",
1433 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_HOST_SCRATCH0), HST_SFT_RESET_SIG);
1434 	pmcs_wr_msgunit(pwp, PMCS_MSGU_HOST_SCRATCH0, HST_SFT_RESET_SIG);
1435 
1436 	/*
1437 	 * Step 3
1438 	 */
1439 	gsm = pmcs_rd_gsm_reg(pwp, GSM_CFG_AND_RESET);
1440 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "GSM %08x -> %08x", gsm,
1441 	    gsm & ~PMCS_SOFT_RESET_BITS);
1442 	pmcs_wr_gsm_reg(pwp, GSM_CFG_AND_RESET, gsm & ~PMCS_SOFT_RESET_BITS);
1443 
1444 	/*
1445 	 * Step 4
1446 	 */
1447 	rapchk = pmcs_rd_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN);
1448 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "READ_ADR_PARITY_CHK_EN %08x -> %08x",
1449 	    rapchk, 0);
1450 	pmcs_wr_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN, 0);
1451 	wapchk = pmcs_rd_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN);
1452 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "WRITE_ADR_PARITY_CHK_EN %08x -> %08x",
1453 	    wapchk, 0);
1454 	pmcs_wr_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN, 0);
1455 	wdpchk = pmcs_rd_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN);
1456 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "WRITE_DATA_PARITY_CHK_EN %08x -> %08x",
1457 	    wdpchk, 0);
1458 	pmcs_wr_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN, 0);
1459 
1460 	/*
1461 	 * Step 5
1462 	 */
1463 	drv_usecwait(100);
1464 
1465 	/*
1466 	 * Step 5.5 (Temporary workaround for 1.07.xx Beta)
1467 	 */
1468 	tsmode = pmcs_rd_gsm_reg(pwp, PMCS_GPIO_TRISTATE_MODE_ADDR);
1469 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "GPIO TSMODE %08x -> %08x", tsmode,
1470 	    tsmode & ~(PMCS_GPIO_TSMODE_BIT0|PMCS_GPIO_TSMODE_BIT1));
1471 	pmcs_wr_gsm_reg(pwp, PMCS_GPIO_TRISTATE_MODE_ADDR,
1472 	    tsmode & ~(PMCS_GPIO_TSMODE_BIT0|PMCS_GPIO_TSMODE_BIT1));
1473 	drv_usecwait(10);
1474 
1475 	/*
1476 	 * Step 6
1477 	 */
1478 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1479 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "SPC_RESET %08x -> %08x", spc,
1480 	    spc & ~(PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1481 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET,
1482 	    spc & ~(PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1483 	drv_usecwait(10);
1484 
1485 	/*
1486 	 * Step 7
1487 	 */
1488 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1489 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "SPC_RESET %08x -> %08x", spc,
1490 	    spc & ~(BDMA_CORE_RSTB|OSSP_RSTB));
1491 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET, spc & ~(BDMA_CORE_RSTB|OSSP_RSTB));
1492 
1493 	/*
1494 	 * Step 8
1495 	 */
1496 	drv_usecwait(100);
1497 
1498 	/*
1499 	 * Step 9
1500 	 */
1501 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1502 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "SPC_RESET %08x -> %08x", spc,
1503 	    spc | (BDMA_CORE_RSTB|OSSP_RSTB));
1504 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET, spc | (BDMA_CORE_RSTB|OSSP_RSTB));
1505 
1506 	/*
1507 	 * Step 10
1508 	 */
1509 	drv_usecwait(100);
1510 
1511 	/*
1512 	 * Step 11
1513 	 */
1514 	gsm = pmcs_rd_gsm_reg(pwp, GSM_CFG_AND_RESET);
1515 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "GSM %08x -> %08x", gsm,
1516 	    gsm | PMCS_SOFT_RESET_BITS);
1517 	pmcs_wr_gsm_reg(pwp, GSM_CFG_AND_RESET, gsm | PMCS_SOFT_RESET_BITS);
1518 	drv_usecwait(10);
1519 
1520 	/*
1521 	 * Step 12
1522 	 */
1523 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "READ_ADR_PARITY_CHK_EN %08x -> %08x",
1524 	    pmcs_rd_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN), rapchk);
1525 	pmcs_wr_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN, rapchk);
1526 	drv_usecwait(10);
1527 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "WRITE_ADR_PARITY_CHK_EN %08x -> %08x",
1528 	    pmcs_rd_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN), wapchk);
1529 	pmcs_wr_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN, wapchk);
1530 	drv_usecwait(10);
1531 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "WRITE_DATA_PARITY_CHK_EN %08x -> %08x",
1532 	    pmcs_rd_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN), wapchk);
1533 	pmcs_wr_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN, wdpchk);
1534 	drv_usecwait(10);
1535 
1536 	/*
1537 	 * Step 13
1538 	 */
1539 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1540 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "SPC_RESET %08x -> %08x", spc,
1541 	    spc | (PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1542 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET,
1543 	    spc | (PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1544 
1545 	/*
1546 	 * Step 14
1547 	 */
1548 	drv_usecwait(100);
1549 
1550 	/*
1551 	 * Step 15
1552 	 */
1553 	for (spc = 0, i = 0; i < 1000; i++) {
1554 		drv_usecwait(1000);
1555 		spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
1556 		if ((spc & PMCS_MSGU_AAP_SFR_PROGRESS) == sfrbits) {
1557 			break;
1558 		}
1559 	}
1560 
1561 	if ((spc & PMCS_MSGU_AAP_SFR_PROGRESS) != sfrbits) {
1562 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
1563 		    "SFR didn't toggle (sfr 0x%x)", spc);
1564 		pwp->state = STATE_DEAD;
1565 		pwp->blocked = 0;
1566 		if (pwp->locks_initted) {
1567 			mutex_exit(&pwp->lock);
1568 		}
1569 		return (-1);
1570 	}
1571 
1572 	/*
1573 	 * Step 16
1574 	 */
1575 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, 0xffffffff);
1576 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1577 
1578 	/*
1579 	 * Wait for up to 5 seconds for AAP state to come either ready or error.
1580 	 */
1581 	for (i = 0; i < 50; i++) {
1582 		spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1583 		    PMCS_MSGU_AAP_STATE_MASK;
1584 		if (spc == PMCS_MSGU_AAP_STATE_ERROR ||
1585 		    spc == PMCS_MSGU_AAP_STATE_READY) {
1586 			break;
1587 		}
1588 		drv_usecwait(100000);
1589 	}
1590 	spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
1591 	if ((spc & PMCS_MSGU_AAP_STATE_MASK) != PMCS_MSGU_AAP_STATE_READY) {
1592 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
1593 		    "soft reset failed (state 0x%x)", spc);
1594 		pwp->state = STATE_DEAD;
1595 		pwp->blocked = 0;
1596 		if (pwp->locks_initted) {
1597 			mutex_exit(&pwp->lock);
1598 		}
1599 		return (-1);
1600 	}
1601 
1602 
1603 	if (pwp->state == STATE_DEAD || pwp->state == STATE_UNPROBING ||
1604 	    pwp->state == STATE_PROBING || pwp->locks_initted == 0) {
1605 		pwp->blocked = 0;
1606 		if (pwp->locks_initted) {
1607 			mutex_exit(&pwp->lock);
1608 		}
1609 		return (0);
1610 	}
1611 
1612 	/*
1613 	 * Return at this point if we dont need to startup.
1614 	 */
1615 	if (no_restart) {
1616 		return (0);
1617 	}
1618 
1619 	ASSERT(pwp->locks_initted != 0);
1620 
1621 	/*
1622 	 * Clean up various soft state.
1623 	 */
1624 	bzero(pwp->ports, sizeof (pwp->ports));
1625 
1626 	pmcs_free_all_phys(pwp, pwp->root_phys);
1627 
1628 	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
1629 		pmcs_lock_phy(pptr);
1630 		pmcs_clear_phy(pwp, pptr);
1631 		pmcs_unlock_phy(pptr);
1632 	}
1633 
1634 	if (pwp->targets) {
1635 		for (i = 0; i < pwp->max_dev; i++) {
1636 			pmcs_xscsi_t *xp = pwp->targets[i];
1637 
1638 			if (xp == NULL) {
1639 				continue;
1640 			}
1641 			mutex_enter(&xp->statlock);
1642 			pmcs_clear_xp(pwp, xp);
1643 			mutex_exit(&xp->statlock);
1644 		}
1645 	}
1646 
1647 	bzero(pwp->shadow_iqpi, sizeof (pwp->shadow_iqpi));
1648 	for (i = 0; i < PMCS_NIQ; i++) {
1649 		if (pwp->iqp[i]) {
1650 			bzero(pwp->iqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
1651 			pmcs_wr_iqpi(pwp, i, 0);
1652 			pmcs_wr_iqci(pwp, i, 0);
1653 		}
1654 	}
1655 	for (i = 0; i < PMCS_NOQ; i++) {
1656 		if (pwp->oqp[i]) {
1657 			bzero(pwp->oqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
1658 			pmcs_wr_oqpi(pwp, i, 0);
1659 			pmcs_wr_oqci(pwp, i, 0);
1660 		}
1661 
1662 	}
1663 	if (pwp->fwlogp) {
1664 		bzero(pwp->fwlogp, PMCS_FWLOG_SIZE);
1665 	}
1666 	STAILQ_INIT(&pwp->wf);
1667 	bzero(pwp->work, sizeof (pmcwork_t) * pwp->max_cmd);
1668 	for (i = 0; i < pwp->max_cmd - 1; i++) {
1669 		pmcwork_t *pwrk = &pwp->work[i];
1670 		STAILQ_INSERT_TAIL(&pwp->wf, pwrk, next);
1671 	}
1672 
1673 	/*
1674 	 * Clear out any leftover commands sitting in the work list
1675 	 */
1676 	for (i = 0; i < pwp->max_cmd; i++) {
1677 		pmcwork_t *pwrk = &pwp->work[i];
1678 		mutex_enter(&pwrk->lock);
1679 		if (pwrk->state == PMCS_WORK_STATE_ONCHIP) {
1680 			switch (PMCS_TAG_TYPE(pwrk->htag)) {
1681 			case PMCS_TAG_TYPE_WAIT:
1682 				mutex_exit(&pwrk->lock);
1683 				break;
1684 			case PMCS_TAG_TYPE_CBACK:
1685 			case PMCS_TAG_TYPE_NONE:
1686 				pmcs_pwork(pwp, pwrk);
1687 				break;
1688 			default:
1689 				break;
1690 			}
1691 		} else if (pwrk->state == PMCS_WORK_STATE_IOCOMPQ) {
1692 			pwrk->dead = 1;
1693 			mutex_exit(&pwrk->lock);
1694 		} else {
1695 			/*
1696 			 * The other states of NIL, READY and INTR
1697 			 * should not be visible outside of a lock being held.
1698 			 */
1699 			pmcs_pwork(pwp, pwrk);
1700 		}
1701 	}
1702 
1703 	/*
1704 	 * Restore Interrupt Mask
1705 	 */
1706 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, pwp->intr_mask);
1707 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1708 
1709 	pwp->blocked = 0;
1710 	pwp->mpi_table_setup = 0;
1711 	mutex_exit(&pwp->lock);
1712 
1713 	/*
1714 	 * Set up MPI again.
1715 	 */
1716 	if (pmcs_setup(pwp)) {
1717 		msg = "unable to setup MPI tables again";
1718 		goto fail_restart;
1719 	}
1720 	pmcs_report_fwversion(pwp);
1721 
1722 	/*
1723 	 * Restart MPI
1724 	 */
1725 	if (pmcs_start_mpi(pwp)) {
1726 		msg = "unable to restart MPI again";
1727 		goto fail_restart;
1728 	}
1729 
1730 	mutex_enter(&pwp->lock);
1731 	pwp->blocked = 0;
1732 	SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES);
1733 	mutex_exit(&pwp->lock);
1734 
1735 	/*
1736 	 * Run any completions
1737 	 */
1738 	PMCS_CQ_RUN(pwp);
1739 
1740 	/*
1741 	 * Delay
1742 	 */
1743 	drv_usecwait(1000000);
1744 	return (0);
1745 
1746 fail_restart:
1747 	mutex_enter(&pwp->lock);
1748 	pwp->state = STATE_DEAD;
1749 	mutex_exit(&pwp->lock);
1750 	pmcs_prt(pwp, PMCS_PRT_ERR, "%s: Failed: %s", __func__, msg);
1751 	return (-1);
1752 }
1753 
1754 /*
1755  * Reset a device or a logical unit.
1756  */
1757 int
1758 pmcs_reset_dev(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint64_t lun)
1759 {
1760 	int rval = 0;
1761 
1762 	if (pptr == NULL) {
1763 		return (ENXIO);
1764 	}
1765 
1766 	pmcs_lock_phy(pptr);
1767 	if (pptr->dtype == SAS) {
1768 		/*
1769 		 * Some devices do not support SAS_I_T_NEXUS_RESET as
1770 		 * it is not a mandatory (in SAM4) task management
1771 		 * function, while LOGIC_UNIT_RESET is mandatory.
1772 		 *
1773 		 * The problem here is that we need to iterate over
1774 		 * all known LUNs to emulate the semantics of
1775 		 * "RESET_TARGET".
1776 		 *
1777 		 * XXX: FIX ME
1778 		 */
1779 		if (lun == (uint64_t)-1) {
1780 			lun = 0;
1781 		}
1782 		rval = pmcs_ssp_tmf(pwp, pptr, SAS_LOGICAL_UNIT_RESET, 0, lun,
1783 		    NULL);
1784 	} else if (pptr->dtype == SATA) {
1785 		if (lun != 0ull) {
1786 			pmcs_unlock_phy(pptr);
1787 			return (EINVAL);
1788 		}
1789 		rval = pmcs_reset_phy(pwp, pptr, PMCS_PHYOP_LINK_RESET);
1790 	} else {
1791 		pmcs_unlock_phy(pptr);
1792 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
1793 		    "%s: cannot reset a SMP device yet (%s)",
1794 		    __func__, pptr->path);
1795 		return (EINVAL);
1796 	}
1797 
1798 	/*
1799 	 * Now harvest any commands killed by this action
1800 	 * by issuing an ABORT for all commands on this device.
1801 	 *
1802 	 * We do this even if the the tmf or reset fails (in case there
1803 	 * are any dead commands around to be harvested *anyway*).
1804 	 * We don't have to await for the abort to complete.
1805 	 */
1806 	if (pmcs_abort(pwp, pptr, 0, 1, 0)) {
1807 		pptr->abort_pending = 1;
1808 		SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
1809 	}
1810 
1811 	pmcs_unlock_phy(pptr);
1812 	return (rval);
1813 }
1814 
1815 /*
1816  * Called with PHY locked.
1817  */
1818 static int
1819 pmcs_get_device_handle(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1820 {
1821 	if (pptr->valid_device_id == 0) {
1822 		int result = pmcs_register_device(pwp, pptr);
1823 
1824 		/*
1825 		 * If we changed while registering, punt
1826 		 */
1827 		if (pptr->changed) {
1828 			RESTART_DISCOVERY(pwp);
1829 			return (-1);
1830 		}
1831 
1832 		/*
1833 		 * If we had a failure to register, check against errors.
1834 		 * An ENOMEM error means we just retry (temp resource shortage).
1835 		 */
1836 		if (result == ENOMEM) {
1837 			PHY_CHANGED(pwp, pptr);
1838 			RESTART_DISCOVERY(pwp);
1839 			return (-1);
1840 		}
1841 
1842 		/*
1843 		 * An ETIMEDOUT error means we retry (if our counter isn't
1844 		 * exhausted)
1845 		 */
1846 		if (result == ETIMEDOUT) {
1847 			if (ddi_get_lbolt() < pptr->config_stop) {
1848 				PHY_CHANGED(pwp, pptr);
1849 				RESTART_DISCOVERY(pwp);
1850 			} else {
1851 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
1852 				    "%s: Retries exhausted for %s, killing",
1853 				    __func__, pptr->path);
1854 				pptr->config_stop = 0;
1855 				pmcs_kill_changed(pwp, pptr, 0);
1856 			}
1857 			return (-1);
1858 		}
1859 		/*
1860 		 * Other errors or no valid device id is fatal, but don't
1861 		 * preclude a future action.
1862 		 */
1863 		if (result || pptr->valid_device_id == 0) {
1864 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s could not "
1865 			    "be registered", __func__,  pptr->path);
1866 			return (-1);
1867 		}
1868 	}
1869 	return (0);
1870 }
1871 
1872 int
1873 pmcs_iport_tgtmap_create(pmcs_iport_t *iport)
1874 {
1875 	ASSERT(iport);
1876 	if (iport == NULL)
1877 		return (B_FALSE);
1878 
1879 	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_MAP, "%s", __func__);
1880 
1881 	/* create target map */
1882 	if (scsi_hba_tgtmap_create(iport->dip, SCSI_TM_FULLSET, tgtmap_usec,
1883 	    2048, NULL, NULL, NULL, &iport->iss_tgtmap) != DDI_SUCCESS) {
1884 		pmcs_prt(iport->pwp, PMCS_PRT_DEBUG,
1885 		    "%s: failed to create tgtmap", __func__);
1886 		return (B_FALSE);
1887 	}
1888 	return (B_TRUE);
1889 }
1890 
1891 int
1892 pmcs_iport_tgtmap_destroy(pmcs_iport_t *iport)
1893 {
1894 	ASSERT(iport && iport->iss_tgtmap);
1895 	if ((iport == NULL) || (iport->iss_tgtmap == NULL))
1896 		return (B_FALSE);
1897 
1898 	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_MAP, "%s", __func__);
1899 
1900 	/* destroy target map */
1901 	scsi_hba_tgtmap_destroy(iport->iss_tgtmap);
1902 	return (B_TRUE);
1903 }
1904 
1905 /*
1906  * Query the phymap and populate the iport handle passed in.
1907  * Called with iport lock held.
1908  */
1909 int
1910 pmcs_iport_configure_phys(pmcs_iport_t *iport)
1911 {
1912 	pmcs_hw_t		*pwp;
1913 	pmcs_phy_t		*pptr;
1914 	sas_phymap_phys_t	*phys;
1915 	int			phynum;
1916 	int			inst;
1917 
1918 	ASSERT(iport);
1919 	ASSERT(mutex_owned(&iport->lock));
1920 	pwp = iport->pwp;
1921 	ASSERT(pwp);
1922 	inst = ddi_get_instance(iport->dip);
1923 
1924 	mutex_enter(&pwp->lock);
1925 	ASSERT(pwp->root_phys != NULL);
1926 
1927 	/*
1928 	 * Query the phymap regarding the phys in this iport and populate
1929 	 * the iport's phys list. Hereafter this list is maintained via
1930 	 * port up and down events in pmcs_intr.c
1931 	 */
1932 	ASSERT(list_is_empty(&iport->phys));
1933 	phys = sas_phymap_ua2phys(pwp->hss_phymap, iport->ua);
1934 	while ((phynum = sas_phymap_phys_next(phys)) != -1) {
1935 		/* Grab the phy pointer from root_phys */
1936 		pptr = pwp->root_phys + phynum;
1937 		ASSERT(pptr);
1938 		pmcs_lock_phy(pptr);
1939 		ASSERT(pptr->phynum == phynum);
1940 
1941 		/*
1942 		 * Set a back pointer in the phy to this iport.
1943 		 */
1944 		pptr->iport = iport;
1945 
1946 		/*
1947 		 * If this phy is the primary, set a pointer to it on our
1948 		 * iport handle, and set our portid from it.
1949 		 */
1950 		if (!pptr->subsidiary) {
1951 			iport->pptr = pptr;
1952 			iport->portid = pptr->portid;
1953 		}
1954 
1955 		/*
1956 		 * Finally, insert the phy into our list
1957 		 */
1958 		pmcs_add_phy_to_iport(iport, pptr);
1959 		pmcs_unlock_phy(pptr);
1960 
1961 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: found phy %d [0x%p] "
1962 		    "on iport%d, refcnt(%d)", __func__, phynum,
1963 		    (void *)pptr, inst, iport->refcnt);
1964 	}
1965 	mutex_exit(&pwp->lock);
1966 	sas_phymap_phys_free(phys);
1967 	RESTART_DISCOVERY(pwp);
1968 	return (DDI_SUCCESS);
1969 }
1970 
1971 /*
1972  * Return the iport that ua is associated with, or NULL.  If an iport is
1973  * returned, it will be held and the caller must release the hold.
1974  */
1975 static pmcs_iport_t *
1976 pmcs_get_iport_by_ua(pmcs_hw_t *pwp, char *ua)
1977 {
1978 	pmcs_iport_t	*iport = NULL;
1979 
1980 	rw_enter(&pwp->iports_lock, RW_READER);
1981 	for (iport = list_head(&pwp->iports);
1982 	    iport != NULL;
1983 	    iport = list_next(&pwp->iports, iport)) {
1984 		mutex_enter(&iport->lock);
1985 		if (strcmp(iport->ua, ua) == 0) {
1986 			mutex_exit(&iport->lock);
1987 			mutex_enter(&iport->refcnt_lock);
1988 			iport->refcnt++;
1989 			mutex_exit(&iport->refcnt_lock);
1990 			break;
1991 		}
1992 		mutex_exit(&iport->lock);
1993 	}
1994 	rw_exit(&pwp->iports_lock);
1995 
1996 	return (iport);
1997 }
1998 
1999 /*
2000  * Return the iport that pptr is associated with, or NULL.
2001  * If an iport is returned, there is a hold that the caller must release.
2002  */
2003 pmcs_iport_t *
2004 pmcs_get_iport_by_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2005 {
2006 	pmcs_iport_t	*iport = NULL;
2007 	char		*ua;
2008 
2009 	ua = sas_phymap_lookup_ua(pwp->hss_phymap, pwp->sas_wwns[0],
2010 	    pmcs_barray2wwn(pptr->sas_address));
2011 	if (ua) {
2012 		iport = pmcs_get_iport_by_ua(pwp, ua);
2013 		if (iport) {
2014 			mutex_enter(&iport->lock);
2015 			iport->ua_state = UA_ACTIVE;
2016 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: "
2017 			    "found iport [0x%p] on ua (%s) for phy [0x%p], "
2018 			    "refcnt (%d)", __func__, (void *)iport, ua,
2019 			    (void *)pptr, iport->refcnt);
2020 			mutex_exit(&iport->lock);
2021 		}
2022 	}
2023 
2024 	return (iport);
2025 }
2026 
2027 void
2028 pmcs_rele_iport(pmcs_iport_t *iport)
2029 {
2030 	/*
2031 	 * Release a refcnt on this iport. If this is the last reference,
2032 	 * signal the potential waiter in pmcs_iport_unattach().
2033 	 */
2034 	ASSERT(iport->refcnt > 0);
2035 	mutex_enter(&iport->refcnt_lock);
2036 	iport->refcnt--;
2037 	mutex_exit(&iport->refcnt_lock);
2038 	if (iport->refcnt == 0) {
2039 		cv_signal(&iport->refcnt_cv);
2040 	}
2041 	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_CONFIG, "%s: iport [0x%p] "
2042 	    "refcnt (%d)", __func__, (void *)iport, iport->refcnt);
2043 }
2044 
2045 void
2046 pmcs_phymap_activate(void *arg, char *ua, void **privp)
2047 {
2048 	_NOTE(ARGUNUSED(privp));
2049 	pmcs_hw_t	*pwp = arg;
2050 	pmcs_iport_t	*iport = NULL;
2051 
2052 	mutex_enter(&pwp->lock);
2053 	if ((pwp->state == STATE_UNPROBING) || (pwp->state == STATE_DEAD)) {
2054 		mutex_exit(&pwp->lock);
2055 		return;
2056 	}
2057 	pwp->phymap_active++;
2058 	mutex_exit(&pwp->lock);
2059 
2060 	if (scsi_hba_iportmap_iport_add(pwp->hss_iportmap, ua, NULL) !=
2061 	    DDI_SUCCESS) {
2062 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, "%s: failed to add "
2063 		    "iport handle on unit address [%s]", __func__, ua);
2064 	} else {
2065 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, "%s: phymap_active count "
2066 		    "(%d), added iport handle on unit address [%s]", __func__,
2067 		    pwp->phymap_active, ua);
2068 	}
2069 
2070 	/* Set the HBA softstate as our private data for this unit address */
2071 	*privp = (void *)pwp;
2072 
2073 	/*
2074 	 * We are waiting on attach for this iport node, unless it is still
2075 	 * attached. This can happen if a consumer has an outstanding open
2076 	 * on our iport node, but the port is down.  If this is the case, we
2077 	 * need to configure our iport here for reuse.
2078 	 */
2079 	iport = pmcs_get_iport_by_ua(pwp, ua);
2080 	if (iport) {
2081 		mutex_enter(&iport->lock);
2082 		if (pmcs_iport_configure_phys(iport) != DDI_SUCCESS) {
2083 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: "
2084 			    "failed to configure phys on iport [0x%p] at "
2085 			    "unit address (%s)", __func__, (void *)iport, ua);
2086 		}
2087 		iport->ua_state = UA_ACTIVE;
2088 		pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS,
2089 		    &iport->nphy);
2090 		mutex_exit(&iport->lock);
2091 		pmcs_rele_iport(iport);
2092 	}
2093 
2094 }
2095 
2096 void
2097 pmcs_phymap_deactivate(void *arg, char *ua, void *privp)
2098 {
2099 	_NOTE(ARGUNUSED(privp));
2100 	pmcs_hw_t	*pwp = arg;
2101 	pmcs_iport_t	*iport;
2102 
2103 	mutex_enter(&pwp->lock);
2104 	pwp->phymap_active--;
2105 	mutex_exit(&pwp->lock);
2106 
2107 	if (scsi_hba_iportmap_iport_remove(pwp->hss_iportmap, ua) !=
2108 	    DDI_SUCCESS) {
2109 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, "%s: failed to remove "
2110 		    "iport handle on unit address [%s]", __func__, ua);
2111 	} else {
2112 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, "%s: phymap_active "
2113 		    "count (%d), removed iport handle on unit address [%s]",
2114 		    __func__, pwp->phymap_active, ua);
2115 	}
2116 
2117 	iport = pmcs_get_iport_by_ua(pwp, ua);
2118 
2119 	if (iport == NULL) {
2120 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: failed lookup of "
2121 		    "iport handle on unit address (%s)", __func__, ua);
2122 		return;
2123 	}
2124 
2125 	mutex_enter(&iport->lock);
2126 	iport->ua_state = UA_INACTIVE;
2127 	iport->portid = PMCS_IPORT_INVALID_PORT_ID;
2128 	pmcs_remove_phy_from_iport(iport, NULL);
2129 	mutex_exit(&iport->lock);
2130 	pmcs_rele_iport(iport);
2131 }
2132 
2133 /*
2134  * Top-level discovery function
2135  */
2136 void
2137 pmcs_discover(pmcs_hw_t *pwp)
2138 {
2139 	pmcs_phy_t		*pptr;
2140 	pmcs_phy_t		*root_phy;
2141 
2142 	DTRACE_PROBE2(pmcs__discover__entry, ulong_t, pwp->work_flags,
2143 	    boolean_t, pwp->config_changed);
2144 
2145 	mutex_enter(&pwp->lock);
2146 
2147 	if (pwp->state != STATE_RUNNING) {
2148 		mutex_exit(&pwp->lock);
2149 		return;
2150 	}
2151 
2152 	/* Ensure we have at least one phymap active */
2153 	if (pwp->phymap_active == 0) {
2154 		mutex_exit(&pwp->lock);
2155 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2156 		    "%s: phymap inactive, exiting", __func__);
2157 		return;
2158 	}
2159 
2160 	mutex_exit(&pwp->lock);
2161 
2162 	/*
2163 	 * If no iports have attached, but we have PHYs that are up, we
2164 	 * are waiting for iport attach to complete.  Restart discovery.
2165 	 */
2166 	rw_enter(&pwp->iports_lock, RW_READER);
2167 	if (!pwp->iports_attached) {
2168 		rw_exit(&pwp->iports_lock);
2169 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2170 		    "%s: no iports attached, retry discovery", __func__);
2171 		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2172 		return;
2173 	}
2174 	rw_exit(&pwp->iports_lock);
2175 
2176 	mutex_enter(&pwp->config_lock);
2177 	if (pwp->configuring) {
2178 		mutex_exit(&pwp->config_lock);
2179 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2180 		    "%s: configuration already in progress", __func__);
2181 		return;
2182 	}
2183 
2184 	if (pmcs_acquire_scratch(pwp, B_FALSE)) {
2185 		mutex_exit(&pwp->config_lock);
2186 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2187 		    "%s: cannot allocate scratch", __func__);
2188 		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2189 		return;
2190 	}
2191 
2192 	pwp->configuring = 1;
2193 	pwp->config_changed = B_FALSE;
2194 	mutex_exit(&pwp->config_lock);
2195 
2196 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "Discovery begin");
2197 
2198 	/*
2199 	 * The order of the following traversals is important.
2200 	 *
2201 	 * The first one checks for changed expanders.
2202 	 *
2203 	 * The second one aborts commands for dead devices and deregisters them.
2204 	 *
2205 	 * The third one clears the contents of dead expanders from the tree
2206 	 *
2207 	 * The fourth one clears now dead devices in expanders that remain.
2208 	 */
2209 
2210 	/*
2211 	 * 1. Check expanders marked changed (but not dead) to see if they still
2212 	 * have the same number of phys and the same SAS address. Mark them,
2213 	 * their subsidiary phys (if wide) and their descendents dead if
2214 	 * anything has changed. Check the devices they contain to see if
2215 	 * *they* have changed. If they've changed from type NOTHING we leave
2216 	 * them marked changed to be configured later (picking up a new SAS
2217 	 * address and link rate if possible). Otherwise, any change in type,
2218 	 * SAS address or removal of target role will cause us to mark them
2219 	 * (and their descendents) as dead (and cause any pending commands
2220 	 * and associated devices to be removed).
2221 	 */
2222 	root_phy = pwp->root_phys;
2223 	if (pmcs_check_expanders(pwp, root_phy) == B_TRUE) {
2224 		goto out;
2225 	}
2226 
2227 	/*
2228 	 * 2. Descend the tree looking for dead devices and kill them
2229 	 * by aborting all active commands and then deregistering them.
2230 	 */
2231 	if (pmcs_kill_devices(pwp, root_phy)) {
2232 		goto out;
2233 	}
2234 
2235 	/*
2236 	 * 3. Check for dead expanders and remove their children from the tree.
2237 	 * By the time we get here, the devices and commands for them have
2238 	 * already been terminated and removed.
2239 	 *
2240 	 * We do this independent of the configuration count changing so we can
2241 	 * free any dead device PHYs that were discovered while checking
2242 	 * expanders. We ignore any subsidiary phys as pmcs_clear_expander
2243 	 * will take care of those.
2244 	 *
2245 	 * NOTE: pmcs_clear_expander requires softstate lock
2246 	 */
2247 	mutex_enter(&pwp->lock);
2248 	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
2249 		/*
2250 		 * Call pmcs_clear_expander for every root PHY.  It will
2251 		 * recurse and determine which (if any) expanders actually
2252 		 * need to be cleared.
2253 		 */
2254 		pmcs_lock_phy(pptr);
2255 		pmcs_clear_expander(pwp, pptr, 0);
2256 		pmcs_unlock_phy(pptr);
2257 	}
2258 	mutex_exit(&pwp->lock);
2259 
2260 	/*
2261 	 * 4. Check for dead devices and nullify them. By the time we get here,
2262 	 * the devices and commands for them have already been terminated
2263 	 * and removed. This is different from step 2 in that this just nulls
2264 	 * phys that are part of expanders that are still here but used to
2265 	 * be something but are no longer something (e.g., after a pulled
2266 	 * disk drive). Note that dead expanders had their contained phys
2267 	 * removed from the tree- here, the expanders themselves are
2268 	 * nullified (unless they were removed by being contained in another
2269 	 * expander phy).
2270 	 */
2271 	pmcs_clear_phys(pwp, root_phy);
2272 
2273 	/*
2274 	 * 5. Now check for and configure new devices.
2275 	 */
2276 	if (pmcs_configure_new_devices(pwp, root_phy)) {
2277 		goto restart;
2278 	}
2279 
2280 out:
2281 	DTRACE_PROBE2(pmcs__discover__exit, ulong_t, pwp->work_flags,
2282 	    boolean_t, pwp->config_changed);
2283 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "Discovery end");
2284 
2285 	mutex_enter(&pwp->config_lock);
2286 
2287 	if (pwp->config_changed == B_FALSE) {
2288 		/*
2289 		 * Observation is stable, report what we currently see to
2290 		 * the tgtmaps for delta processing. Start by setting
2291 		 * BEGIN on all tgtmaps.
2292 		 */
2293 		mutex_exit(&pwp->config_lock);
2294 		if (pmcs_report_observations(pwp) == B_FALSE) {
2295 			goto restart;
2296 		}
2297 		mutex_enter(&pwp->config_lock);
2298 	} else {
2299 		/*
2300 		 * If config_changed is TRUE, we need to reschedule
2301 		 * discovery now.
2302 		 */
2303 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2304 		    "%s: Config has changed, will re-run discovery", __func__);
2305 		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2306 	}
2307 
2308 	pmcs_release_scratch(pwp);
2309 	pwp->configuring = 0;
2310 	mutex_exit(&pwp->config_lock);
2311 
2312 #ifdef DEBUG
2313 	pptr = pmcs_find_phy_needing_work(pwp, pwp->root_phys);
2314 	if (pptr != NULL) {
2315 		if (!WORK_IS_SCHEDULED(pwp, PMCS_WORK_DISCOVER)) {
2316 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
2317 			    "PHY %s dead=%d changed=%d configured=%d "
2318 			    "but no work scheduled", pptr->path, pptr->dead,
2319 			    pptr->changed, pptr->configured);
2320 		}
2321 		pmcs_unlock_phy(pptr);
2322 	}
2323 #endif
2324 
2325 	return;
2326 
2327 restart:
2328 	/* Clean up and restart discovery */
2329 	pmcs_release_scratch(pwp);
2330 	mutex_enter(&pwp->config_lock);
2331 	pwp->configuring = 0;
2332 	RESTART_DISCOVERY_LOCKED(pwp);
2333 	mutex_exit(&pwp->config_lock);
2334 }
2335 
2336 /*
2337  * Return any PHY that needs to have scheduled work done.  The PHY is returned
2338  * locked.
2339  */
2340 static pmcs_phy_t *
2341 pmcs_find_phy_needing_work(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2342 {
2343 	pmcs_phy_t *cphyp, *pnext;
2344 
2345 	while (pptr) {
2346 		pmcs_lock_phy(pptr);
2347 
2348 		if (pptr->changed || (pptr->dead && pptr->valid_device_id)) {
2349 			return (pptr);
2350 		}
2351 
2352 		pnext = pptr->sibling;
2353 
2354 		if (pptr->children) {
2355 			cphyp = pptr->children;
2356 			pmcs_unlock_phy(pptr);
2357 			cphyp = pmcs_find_phy_needing_work(pwp, cphyp);
2358 			if (cphyp) {
2359 				return (cphyp);
2360 			}
2361 		} else {
2362 			pmcs_unlock_phy(pptr);
2363 		}
2364 
2365 		pptr = pnext;
2366 	}
2367 
2368 	return (NULL);
2369 }
2370 
2371 /*
2372  * Report current observations to SCSA.
2373  */
2374 static boolean_t
2375 pmcs_report_observations(pmcs_hw_t *pwp)
2376 {
2377 	pmcs_iport_t		*iport;
2378 	scsi_hba_tgtmap_t	*tgtmap;
2379 	char			*ap;
2380 	pmcs_phy_t		*pptr;
2381 	uint64_t		wwn;
2382 
2383 	/*
2384 	 * Observation is stable, report what we currently see to the tgtmaps
2385 	 * for delta processing. Start by setting BEGIN on all tgtmaps.
2386 	 */
2387 	rw_enter(&pwp->iports_lock, RW_READER);
2388 	for (iport = list_head(&pwp->iports); iport != NULL;
2389 	    iport = list_next(&pwp->iports, iport)) {
2390 		/*
2391 		 * Unless we have at least one phy up, skip this iport.
2392 		 * Note we don't need to lock the iport for report_skip
2393 		 * since it is only used here.  We are doing the skip so that
2394 		 * the phymap and iportmap stabilization times are honored -
2395 		 * giving us the ability to recover port operation within the
2396 		 * stabilization time without unconfiguring targets using the
2397 		 * port.
2398 		 */
2399 		if (!sas_phymap_uahasphys(pwp->hss_phymap, iport->ua)) {
2400 			iport->report_skip = 1;
2401 			continue;		/* skip set_begin */
2402 		}
2403 		iport->report_skip = 0;
2404 
2405 		tgtmap = iport->iss_tgtmap;
2406 		ASSERT(tgtmap);
2407 		if (scsi_hba_tgtmap_set_begin(tgtmap) != DDI_SUCCESS) {
2408 			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2409 			    "%s: cannot set_begin tgtmap ", __func__);
2410 			rw_exit(&pwp->iports_lock);
2411 			return (B_FALSE);
2412 		}
2413 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2414 		    "%s: set begin on tgtmap [0x%p]", __func__,
2415 		    (void *)tgtmap);
2416 	}
2417 	rw_exit(&pwp->iports_lock);
2418 
2419 	/*
2420 	 * Now, cycle through all levels of all phys and report
2421 	 * observations into their respective tgtmaps.
2422 	 */
2423 	pptr = pwp->root_phys;
2424 
2425 	while (pptr) {
2426 		pmcs_lock_phy(pptr);
2427 
2428 		/*
2429 		 * Skip PHYs that have nothing attached or are dead.
2430 		 */
2431 		if ((pptr->dtype == NOTHING) || pptr->dead) {
2432 			pmcs_unlock_phy(pptr);
2433 			pptr = pptr->sibling;
2434 			continue;
2435 		}
2436 
2437 		if (pptr->changed) {
2438 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2439 			    "%s: oops, PHY %s changed; restart discovery",
2440 			    __func__, pptr->path);
2441 			pmcs_unlock_phy(pptr);
2442 			return (B_FALSE);
2443 		}
2444 
2445 		/*
2446 		 * Get the iport for this root PHY, then call the helper
2447 		 * to report observations for this iport's targets
2448 		 */
2449 		iport = pmcs_get_iport_by_phy(pwp, pptr);
2450 		if (iport == NULL) {
2451 			/* No iport for this tgt */
2452 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2453 			    "%s: no iport for this target",
2454 			    __func__);
2455 			pmcs_unlock_phy(pptr);
2456 			pptr = pptr->sibling;
2457 			continue;
2458 		}
2459 
2460 		if (!iport->report_skip) {
2461 			if (pmcs_report_iport_observations(
2462 			    pwp, iport, pptr) == B_FALSE) {
2463 				pmcs_rele_iport(iport);
2464 				pmcs_unlock_phy(pptr);
2465 				return (B_FALSE);
2466 			}
2467 		}
2468 		pmcs_rele_iport(iport);
2469 		pmcs_unlock_phy(pptr);
2470 		pptr = pptr->sibling;
2471 	}
2472 
2473 	/*
2474 	 * The observation is complete, end sets. Note we will skip any
2475 	 * iports that are active, but have no PHYs in them (i.e. awaiting
2476 	 * unconfigure). Set to restart discovery if we find this.
2477 	 */
2478 	rw_enter(&pwp->iports_lock, RW_READER);
2479 	for (iport = list_head(&pwp->iports);
2480 	    iport != NULL;
2481 	    iport = list_next(&pwp->iports, iport)) {
2482 
2483 		if (iport->report_skip)
2484 			continue;		/* skip set_end */
2485 
2486 		tgtmap = iport->iss_tgtmap;
2487 		ASSERT(tgtmap);
2488 		if (scsi_hba_tgtmap_set_end(tgtmap, 0) != DDI_SUCCESS) {
2489 			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2490 			    "%s: cannot set_end tgtmap ", __func__);
2491 			rw_exit(&pwp->iports_lock);
2492 			return (B_FALSE);
2493 		}
2494 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2495 		    "%s: set end on tgtmap [0x%p]", __func__,
2496 		    (void *)tgtmap);
2497 	}
2498 
2499 	/*
2500 	 * Now that discovery is complete, set up the necessary
2501 	 * DDI properties on each iport node.
2502 	 */
2503 	for (iport = list_head(&pwp->iports); iport != NULL;
2504 	    iport = list_next(&pwp->iports, iport)) {
2505 		/* Set up the DDI properties on each phy */
2506 		pmcs_smhba_set_phy_props(iport);
2507 
2508 		/* Set up the 'attached-port' property on the iport */
2509 		ap = kmem_zalloc(PMCS_MAX_UA_SIZE, KM_SLEEP);
2510 		mutex_enter(&iport->lock);
2511 		pptr = iport->pptr;
2512 		mutex_exit(&iport->lock);
2513 		if (pptr == NULL) {
2514 			/*
2515 			 * This iport is down, but has not been
2516 			 * removed from our list (unconfigured).
2517 			 * Set our value to '0'.
2518 			 */
2519 			(void) snprintf(ap, 1, "%s", "0");
2520 		} else {
2521 			/* Otherwise, set it to remote phy's wwn */
2522 			pmcs_lock_phy(pptr);
2523 			wwn = pmcs_barray2wwn(pptr->sas_address);
2524 			(void) scsi_wwn_to_wwnstr(wwn, 1, ap);
2525 			pmcs_unlock_phy(pptr);
2526 		}
2527 		if (ndi_prop_update_string(DDI_DEV_T_NONE, iport->dip,
2528 		    SCSI_ADDR_PROP_ATTACHED_PORT,  ap) != DDI_SUCCESS) {
2529 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Failed to "
2530 			    "set prop ("SCSI_ADDR_PROP_ATTACHED_PORT")",
2531 			    __func__);
2532 		}
2533 		kmem_free(ap, PMCS_MAX_UA_SIZE);
2534 	}
2535 	rw_exit(&pwp->iports_lock);
2536 
2537 	return (B_TRUE);
2538 }
2539 
2540 /*
2541  * Report observations into a particular iport's target map
2542  *
2543  * Called with phyp (and all descendents) locked
2544  */
2545 static boolean_t
2546 pmcs_report_iport_observations(pmcs_hw_t *pwp, pmcs_iport_t *iport,
2547     pmcs_phy_t *phyp)
2548 {
2549 	pmcs_phy_t		*lphyp;
2550 	scsi_hba_tgtmap_t	*tgtmap;
2551 	scsi_tgtmap_tgt_type_t	tgt_type;
2552 	char			*ua;
2553 	uint64_t		wwn;
2554 
2555 	tgtmap = iport->iss_tgtmap;
2556 	ASSERT(tgtmap);
2557 
2558 	lphyp = phyp;
2559 	while (lphyp) {
2560 		switch (lphyp->dtype) {
2561 		default:		/* Skip unknown PHYs. */
2562 			/* for non-root phys, skip to sibling */
2563 			goto next_phy;
2564 
2565 		case SATA:
2566 		case SAS:
2567 			tgt_type = SCSI_TGT_SCSI_DEVICE;
2568 			break;
2569 
2570 		case EXPANDER:
2571 			tgt_type = SCSI_TGT_SMP_DEVICE;
2572 			break;
2573 		}
2574 
2575 		if (lphyp->dead) {
2576 			goto next_phy;
2577 		}
2578 
2579 		wwn = pmcs_barray2wwn(lphyp->sas_address);
2580 		ua = scsi_wwn_to_wwnstr(wwn, 1, NULL);
2581 
2582 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2583 		    "iport_observation: adding %s on tgtmap [0x%p] phy [0x%p]",
2584 		    ua, (void *)tgtmap, (void*)lphyp);
2585 
2586 		if (scsi_hba_tgtmap_set_add(tgtmap, tgt_type, ua, NULL) !=
2587 		    DDI_SUCCESS) {
2588 			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2589 			    "%s: failed to add address %s", __func__, ua);
2590 			scsi_free_wwnstr(ua);
2591 			return (B_FALSE);
2592 		}
2593 		scsi_free_wwnstr(ua);
2594 
2595 		if (lphyp->children) {
2596 			if (pmcs_report_iport_observations(pwp, iport,
2597 			    lphyp->children) == B_FALSE) {
2598 				return (B_FALSE);
2599 			}
2600 		}
2601 
2602 		/* for non-root phys, report siblings too */
2603 next_phy:
2604 		if (IS_ROOT_PHY(lphyp)) {
2605 			lphyp = NULL;
2606 		} else {
2607 			lphyp = lphyp->sibling;
2608 		}
2609 	}
2610 
2611 	return (B_TRUE);
2612 }
2613 
2614 /*
2615  * Check for and configure new devices.
2616  *
2617  * If the changed device is a SATA device, add a SATA device.
2618  *
2619  * If the changed device is a SAS device, add a SAS device.
2620  *
2621  * If the changed device is an EXPANDER device, do a REPORT
2622  * GENERAL SMP command to find out the number of contained phys.
2623  *
2624  * For each number of contained phys, allocate a phy, do a
2625  * DISCOVERY SMP command to find out what kind of device it
2626  * is and add it to the linked list of phys on the *next* level.
2627  *
2628  * NOTE: pptr passed in by the caller will be a root PHY
2629  */
2630 static int
2631 pmcs_configure_new_devices(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2632 {
2633 	int rval = 0;
2634 	pmcs_iport_t *iport;
2635 	pmcs_phy_t *pnext, *orig_pptr = pptr, *root_phy, *pchild;
2636 
2637 	/*
2638 	 * First, walk through each PHY at this level
2639 	 */
2640 	while (pptr) {
2641 		pmcs_lock_phy(pptr);
2642 		pnext = pptr->sibling;
2643 
2644 		/*
2645 		 * Set the new dtype if it has changed
2646 		 */
2647 		if ((pptr->pend_dtype != NEW) &&
2648 		    (pptr->pend_dtype != pptr->dtype)) {
2649 			pptr->dtype = pptr->pend_dtype;
2650 		}
2651 
2652 		if (pptr->changed == 0 || pptr->dead || pptr->configured) {
2653 			goto next_phy;
2654 		}
2655 
2656 		/*
2657 		 * Confirm that this target's iport is configured
2658 		 */
2659 		root_phy = pmcs_get_root_phy(pptr);
2660 		iport = pmcs_get_iport_by_phy(pwp, root_phy);
2661 		if (iport == NULL) {
2662 			/* No iport for this tgt, restart */
2663 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2664 			    "%s: iport not yet configured, "
2665 			    "retry discovery", __func__);
2666 			pnext = NULL;
2667 			rval = -1;
2668 			goto next_phy;
2669 		}
2670 
2671 		switch (pptr->dtype) {
2672 		case NOTHING:
2673 			pptr->changed = 0;
2674 			break;
2675 		case SATA:
2676 		case SAS:
2677 			pptr->iport = iport;
2678 			pmcs_new_tport(pwp, pptr);
2679 			break;
2680 		case EXPANDER:
2681 			pmcs_configure_expander(pwp, pptr, iport);
2682 			break;
2683 		}
2684 		pmcs_rele_iport(iport);
2685 
2686 		mutex_enter(&pwp->config_lock);
2687 		if (pwp->config_changed) {
2688 			mutex_exit(&pwp->config_lock);
2689 			pnext = NULL;
2690 			goto next_phy;
2691 		}
2692 		mutex_exit(&pwp->config_lock);
2693 
2694 next_phy:
2695 		pmcs_unlock_phy(pptr);
2696 		pptr = pnext;
2697 	}
2698 
2699 	if (rval != 0) {
2700 		return (rval);
2701 	}
2702 
2703 	/*
2704 	 * Now walk through each PHY again, recalling ourselves if they
2705 	 * have children
2706 	 */
2707 	pptr = orig_pptr;
2708 	while (pptr) {
2709 		pmcs_lock_phy(pptr);
2710 		pnext = pptr->sibling;
2711 		pchild = pptr->children;
2712 		pmcs_unlock_phy(pptr);
2713 
2714 		if (pchild) {
2715 			rval = pmcs_configure_new_devices(pwp, pchild);
2716 			if (rval != 0) {
2717 				break;
2718 			}
2719 		}
2720 
2721 		pptr = pnext;
2722 	}
2723 
2724 	return (rval);
2725 }
2726 
2727 /*
2728  * Set all phys and descendent phys as changed if changed == B_TRUE, otherwise
2729  * mark them all as not changed.
2730  *
2731  * Called with parent PHY locked.
2732  */
2733 void
2734 pmcs_set_changed(pmcs_hw_t *pwp, pmcs_phy_t *parent, boolean_t changed,
2735     int level)
2736 {
2737 	pmcs_phy_t *pptr;
2738 
2739 	if (level == 0) {
2740 		if (changed) {
2741 			PHY_CHANGED(pwp, parent);
2742 		} else {
2743 			parent->changed = 0;
2744 		}
2745 		if (parent->dtype == EXPANDER && parent->level) {
2746 			parent->width = 1;
2747 		}
2748 		if (parent->children) {
2749 			pmcs_set_changed(pwp, parent->children, changed,
2750 			    level + 1);
2751 		}
2752 	} else {
2753 		pptr = parent;
2754 		while (pptr) {
2755 			if (changed) {
2756 				PHY_CHANGED(pwp, pptr);
2757 			} else {
2758 				pptr->changed = 0;
2759 			}
2760 			if (pptr->dtype == EXPANDER && pptr->level) {
2761 				pptr->width = 1;
2762 			}
2763 			if (pptr->children) {
2764 				pmcs_set_changed(pwp, pptr->children, changed,
2765 				    level + 1);
2766 			}
2767 			pptr = pptr->sibling;
2768 		}
2769 	}
2770 }
2771 
2772 /*
2773  * Take the passed phy mark it and its descendants as dead.
2774  * Fire up reconfiguration to abort commands and bury it.
2775  *
2776  * Called with the parent PHY locked.
2777  */
2778 void
2779 pmcs_kill_changed(pmcs_hw_t *pwp, pmcs_phy_t *parent, int level)
2780 {
2781 	pmcs_phy_t *pptr = parent;
2782 
2783 	while (pptr) {
2784 		pptr->link_rate = 0;
2785 		pptr->abort_sent = 0;
2786 		pptr->abort_pending = 1;
2787 		SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
2788 		pptr->need_rl_ext = 0;
2789 
2790 		if (pptr->dead == 0) {
2791 			PHY_CHANGED(pwp, pptr);
2792 			RESTART_DISCOVERY(pwp);
2793 		}
2794 
2795 		pptr->dead = 1;
2796 
2797 		if (pptr->children) {
2798 			pmcs_kill_changed(pwp, pptr->children, level + 1);
2799 		}
2800 
2801 		/*
2802 		 * Only kill siblings at level > 0
2803 		 */
2804 		if (level == 0) {
2805 			return;
2806 		}
2807 
2808 		pptr = pptr->sibling;
2809 	}
2810 }
2811 
2812 /*
2813  * Go through every PHY and clear any that are dead (unless they're expanders)
2814  */
2815 static void
2816 pmcs_clear_phys(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2817 {
2818 	pmcs_phy_t *pnext, *phyp;
2819 
2820 	phyp = pptr;
2821 	while (phyp) {
2822 		if (IS_ROOT_PHY(phyp)) {
2823 			pmcs_lock_phy(phyp);
2824 		}
2825 
2826 		if ((phyp->dtype != EXPANDER) && phyp->dead) {
2827 			pmcs_clear_phy(pwp, phyp);
2828 		}
2829 
2830 		if (phyp->children) {
2831 			pmcs_clear_phys(pwp, phyp->children);
2832 		}
2833 
2834 		pnext = phyp->sibling;
2835 
2836 		if (IS_ROOT_PHY(phyp)) {
2837 			pmcs_unlock_phy(phyp);
2838 		}
2839 
2840 		phyp = pnext;
2841 	}
2842 }
2843 
2844 /*
2845  * Clear volatile parts of a phy.  Called with PHY locked.
2846  */
2847 void
2848 pmcs_clear_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2849 {
2850 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s", __func__, pptr->path);
2851 	ASSERT(mutex_owned(&pptr->phy_lock));
2852 	/* keep sibling */
2853 	/* keep children */
2854 	/* keep parent */
2855 	pptr->device_id = PMCS_INVALID_DEVICE_ID;
2856 	/* keep hw_event_ack */
2857 	pptr->ncphy = 0;
2858 	/* keep phynum */
2859 	pptr->width = 0;
2860 	pptr->ds_recovery_retries = 0;
2861 	/* keep dtype */
2862 	pptr->config_stop = 0;
2863 	pptr->spinup_hold = 0;
2864 	pptr->atdt = 0;
2865 	/* keep portid */
2866 	pptr->link_rate = 0;
2867 	pptr->valid_device_id = 0;
2868 	pptr->abort_sent = 0;
2869 	pptr->abort_pending = 0;
2870 	pptr->need_rl_ext = 0;
2871 	pptr->subsidiary = 0;
2872 	pptr->configured = 0;
2873 	/* Only mark dead if it's not a root PHY and its dtype isn't NOTHING */
2874 	/* XXX: What about directly attached disks? */
2875 	if (!IS_ROOT_PHY(pptr) && (pptr->dtype != NOTHING))
2876 		pptr->dead = 1;
2877 	pptr->changed = 0;
2878 	/* keep SAS address */
2879 	/* keep path */
2880 	/* keep ref_count */
2881 	/* Don't clear iport on root PHYs - they are handled in pmcs_intr.c */
2882 	if (!IS_ROOT_PHY(pptr)) {
2883 		pptr->iport = NULL;
2884 	}
2885 	/* keep target */
2886 }
2887 
2888 /*
2889  * Allocate softstate for this target if there isn't already one.  If there
2890  * is, just redo our internal configuration.  If it is actually "new", we'll
2891  * soon get a tran_tgt_init for it.
2892  *
2893  * Called with PHY locked.
2894  */
2895 static void
2896 pmcs_new_tport(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2897 {
2898 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: phy 0x%p @ %s", __func__,
2899 	    (void *)pptr, pptr->path);
2900 
2901 	if (pmcs_configure_phy(pwp, pptr) == B_FALSE) {
2902 		/*
2903 		 * If the config failed, mark the PHY as changed.
2904 		 */
2905 		PHY_CHANGED(pwp, pptr);
2906 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2907 		    "%s: pmcs_configure_phy failed for phy 0x%p", __func__,
2908 		    (void *)pptr);
2909 		return;
2910 	}
2911 
2912 	/* Mark PHY as no longer changed */
2913 	pptr->changed = 0;
2914 
2915 	/*
2916 	 * If the PHY has no target pointer, see if there's a dead PHY that
2917 	 * matches.
2918 	 */
2919 	if (pptr->target == NULL) {
2920 		pmcs_reap_dead_phy(pptr);
2921 	}
2922 
2923 	/*
2924 	 * Only assign the device if there is a target for this PHY with a
2925 	 * matching SAS address.  If an iport is disconnected from one piece
2926 	 * of storage and connected to another within the iport stabilization
2927 	 * time, we can get the PHY/target mismatch situation.
2928 	 *
2929 	 * Otherwise, it'll get done in tran_tgt_init.
2930 	 */
2931 	if (pptr->target) {
2932 		mutex_enter(&pptr->target->statlock);
2933 		if (pmcs_phy_target_match(pptr) == B_FALSE) {
2934 			mutex_exit(&pptr->target->statlock);
2935 			if (!IS_ROOT_PHY(pptr)) {
2936 				pmcs_dec_phy_ref_count(pptr);
2937 			}
2938 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
2939 			    "%s: Not assigning existing tgt %p for PHY %p "
2940 			    "(WWN mismatch)", __func__, (void *)pptr->target,
2941 			    (void *)pptr);
2942 			pptr->target = NULL;
2943 			return;
2944 		}
2945 
2946 		if (!pmcs_assign_device(pwp, pptr->target)) {
2947 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2948 			    "%s: pmcs_assign_device failed for target 0x%p",
2949 			    __func__, (void *)pptr->target);
2950 		}
2951 		mutex_exit(&pptr->target->statlock);
2952 	}
2953 }
2954 
2955 /*
2956  * Called with PHY lock held.
2957  */
2958 static boolean_t
2959 pmcs_configure_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2960 {
2961 	char *dtype;
2962 
2963 	ASSERT(mutex_owned(&pptr->phy_lock));
2964 
2965 	/*
2966 	 * Mark this device as no longer changed.
2967 	 */
2968 	pptr->changed = 0;
2969 
2970 	/*
2971 	 * If we don't have a device handle, get one.
2972 	 */
2973 	if (pmcs_get_device_handle(pwp, pptr)) {
2974 		return (B_FALSE);
2975 	}
2976 
2977 	pptr->configured = 1;
2978 
2979 	switch (pptr->dtype) {
2980 	case SAS:
2981 		dtype = "SAS";
2982 		break;
2983 	case SATA:
2984 		dtype = "SATA";
2985 		break;
2986 	case EXPANDER:
2987 		dtype = "SMP";
2988 		break;
2989 	default:
2990 		dtype = "???";
2991 	}
2992 
2993 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "config_dev: %s dev %s "
2994 	    SAS_ADDR_FMT " dev id 0x%x lr 0x%x", dtype, pptr->path,
2995 	    SAS_ADDR_PRT(pptr->sas_address), pptr->device_id, pptr->link_rate);
2996 
2997 	return (B_TRUE);
2998 }
2999 
3000 /*
3001  * Called with PHY locked
3002  */
3003 static void
3004 pmcs_configure_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr, pmcs_iport_t *iport)
3005 {
3006 	pmcs_phy_t *ctmp, *clist = NULL, *cnext;
3007 	int result, i, nphy = 0;
3008 	boolean_t root_phy = B_FALSE;
3009 
3010 	ASSERT(iport);
3011 
3012 	/*
3013 	 * Step 1- clear our "changed" bit. If we need to retry/restart due
3014 	 * to resource shortages, we'll set it again. While we're doing
3015 	 * configuration, other events may set it again as well.  If the PHY
3016 	 * is a root PHY and is currently marked as having changed, reset the
3017 	 * config_stop timer as well.
3018 	 */
3019 	if (IS_ROOT_PHY(pptr) && pptr->changed) {
3020 		pptr->config_stop = ddi_get_lbolt() +
3021 		    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3022 	}
3023 	pptr->changed = 0;
3024 
3025 	/*
3026 	 * Step 2- make sure we don't overflow
3027 	 */
3028 	if (pptr->level == PMCS_MAX_XPND-1) {
3029 		pmcs_prt(pwp, PMCS_PRT_WARN,
3030 		    "%s: SAS expansion tree too deep", __func__);
3031 		return;
3032 	}
3033 
3034 	/*
3035 	 * Step 3- Check if this expander is part of a wide phy that has
3036 	 * already been configured.
3037 	 *
3038 	 * This is known by checking this level for another EXPANDER device
3039 	 * with the same SAS address and isn't already marked as a subsidiary
3040 	 * phy and a parent whose SAS address is the same as our SAS address
3041 	 * (if there are parents).
3042 	 */
3043 	if (!IS_ROOT_PHY(pptr)) {
3044 		/*
3045 		 * No need to lock the parent here because we're in discovery
3046 		 * and the only time a PHY's children pointer can change is
3047 		 * in discovery; either in pmcs_clear_expander (which has
3048 		 * already been called) or here, down below.  Plus, trying to
3049 		 * grab the parent's lock here can cause deadlock.
3050 		 */
3051 		ctmp = pptr->parent->children;
3052 	} else {
3053 		ctmp = pwp->root_phys;
3054 		root_phy = B_TRUE;
3055 	}
3056 
3057 	while (ctmp) {
3058 		/*
3059 		 * If we've checked all PHYs up to pptr, we stop. Otherwise,
3060 		 * we'll be checking for a primary PHY with a higher PHY
3061 		 * number than pptr, which will never happen.  The primary
3062 		 * PHY on non-root expanders will ALWAYS be the lowest
3063 		 * numbered PHY.
3064 		 */
3065 		if (ctmp == pptr) {
3066 			break;
3067 		}
3068 
3069 		/*
3070 		 * If pptr and ctmp are root PHYs, just grab the mutex on
3071 		 * ctmp.  No need to lock the entire tree.  If they are not
3072 		 * root PHYs, there is no need to lock since a non-root PHY's
3073 		 * SAS address and other characteristics can only change in
3074 		 * discovery anyway.
3075 		 */
3076 		if (root_phy) {
3077 			mutex_enter(&ctmp->phy_lock);
3078 		}
3079 
3080 		if (ctmp->dtype == EXPANDER && ctmp->width &&
3081 		    memcmp(ctmp->sas_address, pptr->sas_address, 8) == 0) {
3082 			int widephy = 0;
3083 			/*
3084 			 * If these phys are not root PHYs, compare their SAS
3085 			 * addresses too.
3086 			 */
3087 			if (!root_phy) {
3088 				if (memcmp(ctmp->parent->sas_address,
3089 				    pptr->parent->sas_address, 8) == 0) {
3090 					widephy = 1;
3091 				}
3092 			} else {
3093 				widephy = 1;
3094 			}
3095 			if (widephy) {
3096 				ctmp->width++;
3097 				pptr->subsidiary = 1;
3098 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: PHY "
3099 				    "%s part of wide PHY %s (now %d wide)",
3100 				    __func__, pptr->path, ctmp->path,
3101 				    ctmp->width);
3102 				if (root_phy) {
3103 					mutex_exit(&ctmp->phy_lock);
3104 				}
3105 				return;
3106 			}
3107 		}
3108 
3109 		cnext = ctmp->sibling;
3110 		if (root_phy) {
3111 			mutex_exit(&ctmp->phy_lock);
3112 		}
3113 		ctmp = cnext;
3114 	}
3115 
3116 	/*
3117 	 * Step 4- If we don't have a device handle, get one.  Since this
3118 	 * is the primary PHY, make sure subsidiary is cleared.
3119 	 */
3120 	pptr->subsidiary = 0;
3121 	if (pmcs_get_device_handle(pwp, pptr)) {
3122 		goto out;
3123 	}
3124 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "Config expander %s "
3125 	    SAS_ADDR_FMT " dev id 0x%x lr 0x%x", pptr->path,
3126 	    SAS_ADDR_PRT(pptr->sas_address), pptr->device_id, pptr->link_rate);
3127 
3128 	/*
3129 	 * Step 5- figure out how many phys are in this expander.
3130 	 */
3131 	nphy = pmcs_expander_get_nphy(pwp, pptr);
3132 	if (nphy <= 0) {
3133 		if (nphy == 0 && ddi_get_lbolt() < pptr->config_stop) {
3134 			PHY_CHANGED(pwp, pptr);
3135 			RESTART_DISCOVERY(pwp);
3136 		} else {
3137 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3138 			    "%s: Retries exhausted for %s, killing", __func__,
3139 			    pptr->path);
3140 			pptr->config_stop = 0;
3141 			pmcs_kill_changed(pwp, pptr, 0);
3142 		}
3143 		goto out;
3144 	}
3145 
3146 	/*
3147 	 * Step 6- Allocate a list of phys for this expander and figure out
3148 	 * what each one is.
3149 	 */
3150 	for (i = 0; i < nphy; i++) {
3151 		ctmp = kmem_cache_alloc(pwp->phy_cache, KM_SLEEP);
3152 		bzero(ctmp, sizeof (pmcs_phy_t));
3153 		ctmp->device_id = PMCS_INVALID_DEVICE_ID;
3154 		ctmp->sibling = clist;
3155 		ctmp->pend_dtype = NEW;	/* Init pending dtype */
3156 		ctmp->config_stop = ddi_get_lbolt() +
3157 		    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3158 		clist = ctmp;
3159 	}
3160 
3161 	mutex_enter(&pwp->config_lock);
3162 	if (pwp->config_changed) {
3163 		RESTART_DISCOVERY_LOCKED(pwp);
3164 		mutex_exit(&pwp->config_lock);
3165 		/*
3166 		 * Clean up the newly allocated PHYs and return
3167 		 */
3168 		while (clist) {
3169 			ctmp = clist->sibling;
3170 			kmem_cache_free(pwp->phy_cache, clist);
3171 			clist = ctmp;
3172 		}
3173 		return;
3174 	}
3175 	mutex_exit(&pwp->config_lock);
3176 
3177 	/*
3178 	 * Step 7- Now fill in the rest of the static portions of the phy.
3179 	 */
3180 	for (i = 0, ctmp = clist; ctmp; ctmp = ctmp->sibling, i++) {
3181 		ctmp->parent = pptr;
3182 		ctmp->pwp = pwp;
3183 		ctmp->level = pptr->level+1;
3184 		ctmp->portid = pptr->portid;
3185 		if (ctmp->tolerates_sas2) {
3186 			ASSERT(i < SAS2_PHYNUM_MAX);
3187 			ctmp->phynum = i & SAS2_PHYNUM_MASK;
3188 		} else {
3189 			ASSERT(i < SAS_PHYNUM_MAX);
3190 			ctmp->phynum = i & SAS_PHYNUM_MASK;
3191 		}
3192 		pmcs_phy_name(pwp, ctmp, ctmp->path, sizeof (ctmp->path));
3193 		pmcs_lock_phy(ctmp);
3194 	}
3195 
3196 	/*
3197 	 * Step 8- Discover things about each phy in the expander.
3198 	 */
3199 	for (i = 0, ctmp = clist; ctmp; ctmp = ctmp->sibling, i++) {
3200 		result = pmcs_expander_content_discover(pwp, pptr, ctmp);
3201 		if (result <= 0) {
3202 			if (ddi_get_lbolt() < pptr->config_stop) {
3203 				PHY_CHANGED(pwp, pptr);
3204 				RESTART_DISCOVERY(pwp);
3205 			} else {
3206 				pptr->config_stop = 0;
3207 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3208 				    "%s: Retries exhausted for %s, killing",
3209 				    __func__, pptr->path);
3210 				pmcs_kill_changed(pwp, pptr, 0);
3211 			}
3212 			goto out;
3213 		}
3214 
3215 		/* Set pend_dtype to dtype for 1st time initialization */
3216 		ctmp->pend_dtype = ctmp->dtype;
3217 	}
3218 
3219 	/*
3220 	 * Step 9- Install the new list on the next level. There should be
3221 	 * no children pointer on this PHY.  If there is, we'd need to know
3222 	 * how it happened (The expander suddenly got more PHYs?).
3223 	 */
3224 	ASSERT(pptr->children == NULL);
3225 	if (pptr->children != NULL) {
3226 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Already child PHYs attached "
3227 		    " to PHY %s: This should never happen", __func__,
3228 		    pptr->path);
3229 		goto out;
3230 	} else {
3231 		pptr->children = clist;
3232 	}
3233 
3234 	clist = NULL;
3235 	pptr->ncphy = nphy;
3236 	pptr->configured = 1;
3237 
3238 	/*
3239 	 * We only set width if we're greater than level 0.
3240 	 */
3241 	if (pptr->level) {
3242 		pptr->width = 1;
3243 	}
3244 
3245 	/*
3246 	 * Now tell the rest of the world about us, as an SMP node.
3247 	 */
3248 	pptr->iport = iport;
3249 	pmcs_new_tport(pwp, pptr);
3250 
3251 out:
3252 	while (clist) {
3253 		ctmp = clist->sibling;
3254 		pmcs_unlock_phy(clist);
3255 		kmem_cache_free(pwp->phy_cache, clist);
3256 		clist = ctmp;
3257 	}
3258 }
3259 
3260 /*
3261  * 2. Check expanders marked changed (but not dead) to see if they still have
3262  * the same number of phys and the same SAS address. Mark them, their subsidiary
3263  * phys (if wide) and their descendents dead if anything has changed. Check the
3264  * the devices they contain to see if *they* have changed. If they've changed
3265  * from type NOTHING we leave them marked changed to be configured later
3266  * (picking up a new SAS address and link rate if possible). Otherwise, any
3267  * change in type, SAS address or removal of target role will cause us to
3268  * mark them (and their descendents) as dead and cause any pending commands
3269  * and associated devices to be removed.
3270  *
3271  * Called with PHY (pptr) locked.
3272  */
3273 
3274 static void
3275 pmcs_check_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3276 {
3277 	int nphy, result;
3278 	pmcs_phy_t *ctmp, *local, *local_list = NULL, *local_tail = NULL;
3279 	boolean_t kill_changed, changed;
3280 
3281 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3282 	    "%s: check %s", __func__, pptr->path);
3283 
3284 	/*
3285 	 * Step 1: Mark phy as not changed. We will mark it changed if we need
3286 	 * to retry.
3287 	 */
3288 	pptr->changed = 0;
3289 
3290 	/*
3291 	 * Reset the config_stop time. Although we're not actually configuring
3292 	 * anything here, we do want some indication of when to give up trying
3293 	 * if we can't communicate with the expander.
3294 	 */
3295 	pptr->config_stop = ddi_get_lbolt() +
3296 	    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3297 
3298 	/*
3299 	 * Step 2: Figure out how many phys are in this expander. If
3300 	 * pmcs_expander_get_nphy returns 0 we ran out of resources,
3301 	 * so reschedule and try later. If it returns another error,
3302 	 * just return.
3303 	 */
3304 	nphy = pmcs_expander_get_nphy(pwp, pptr);
3305 	if (nphy <= 0) {
3306 		if ((nphy == 0) && (ddi_get_lbolt() < pptr->config_stop)) {
3307 			PHY_CHANGED(pwp, pptr);
3308 			RESTART_DISCOVERY(pwp);
3309 		} else {
3310 			pptr->config_stop = 0;
3311 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3312 			    "%s: Retries exhausted for %s, killing", __func__,
3313 			    pptr->path);
3314 			pmcs_kill_changed(pwp, pptr, 0);
3315 		}
3316 		return;
3317 	}
3318 
3319 	/*
3320 	 * Step 3: If the number of phys don't agree, kill the old sub-tree.
3321 	 */
3322 	if (nphy != pptr->ncphy) {
3323 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3324 		    "%s: number of contained phys for %s changed from %d to %d",
3325 		    __func__, pptr->path, pptr->ncphy, nphy);
3326 		/*
3327 		 * Force a rescan of this expander after dead contents
3328 		 * are cleared and removed.
3329 		 */
3330 		pmcs_kill_changed(pwp, pptr, 0);
3331 		return;
3332 	}
3333 
3334 	/*
3335 	 * Step 4: if we're at the bottom of the stack, we're done
3336 	 * (we can't have any levels below us)
3337 	 */
3338 	if (pptr->level == PMCS_MAX_XPND-1) {
3339 		return;
3340 	}
3341 
3342 	/*
3343 	 * Step 5: Discover things about each phy in this expander.  We do
3344 	 * this by walking the current list of contained phys and doing a
3345 	 * content discovery for it to a local phy.
3346 	 */
3347 	ctmp = pptr->children;
3348 	ASSERT(ctmp);
3349 	if (ctmp == NULL) {
3350 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3351 		    "%s: No children attached to expander @ %s?", __func__,
3352 		    pptr->path);
3353 		return;
3354 	}
3355 
3356 	while (ctmp) {
3357 		/*
3358 		 * Allocate a local PHY to contain the proposed new contents
3359 		 * and link it to the rest of the local PHYs so that they
3360 		 * can all be freed later.
3361 		 */
3362 		local = pmcs_clone_phy(ctmp);
3363 
3364 		if (local_list == NULL) {
3365 			local_list = local;
3366 			local_tail = local;
3367 		} else {
3368 			local_tail->sibling = local;
3369 			local_tail = local;
3370 		}
3371 
3372 		/*
3373 		 * Need to lock the local PHY since pmcs_expander_content_
3374 		 * discovery may call pmcs_clear_phy on it, which expects
3375 		 * the PHY to be locked.
3376 		 */
3377 		pmcs_lock_phy(local);
3378 		result = pmcs_expander_content_discover(pwp, pptr, local);
3379 		pmcs_unlock_phy(local);
3380 		if (result <= 0) {
3381 			if (ddi_get_lbolt() < pptr->config_stop) {
3382 				PHY_CHANGED(pwp, pptr);
3383 				RESTART_DISCOVERY(pwp);
3384 			} else {
3385 				pptr->config_stop = 0;
3386 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3387 				    "%s: Retries exhausted for %s, killing",
3388 				    __func__, pptr->path);
3389 				pmcs_kill_changed(pwp, pptr, 0);
3390 			}
3391 
3392 			/*
3393 			 * Release all the local PHYs that we allocated.
3394 			 */
3395 			pmcs_free_phys(pwp, local_list);
3396 			return;
3397 		}
3398 
3399 		ctmp = ctmp->sibling;
3400 	}
3401 
3402 	/*
3403 	 * Step 6: Compare the local PHY's contents to our current PHY.  If
3404 	 * there are changes, take the appropriate action.
3405 	 * This is done in two steps (step 5 above, and 6 here) so that if we
3406 	 * have to bail during this process (e.g. pmcs_expander_content_discover
3407 	 * fails), we haven't actually changed the state of any of the real
3408 	 * PHYs.  Next time we come through here, we'll be starting over from
3409 	 * scratch.  This keeps us from marking a changed PHY as no longer
3410 	 * changed, but then having to bail only to come back next time and
3411 	 * think that the PHY hadn't changed.  If this were to happen, we
3412 	 * would fail to properly configure the device behind this PHY.
3413 	 */
3414 	local = local_list;
3415 	ctmp = pptr->children;
3416 
3417 	while (ctmp) {
3418 		changed = B_FALSE;
3419 		kill_changed = B_FALSE;
3420 
3421 		/*
3422 		 * We set local to local_list prior to this loop so that we
3423 		 * can simply walk the local_list while we walk this list.  The
3424 		 * two lists should be completely in sync.
3425 		 *
3426 		 * Clear the changed flag here.
3427 		 */
3428 		ctmp->changed = 0;
3429 
3430 		if (ctmp->dtype != local->dtype) {
3431 			if (ctmp->dtype != NOTHING) {
3432 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s "
3433 				    "type changed from %s to %s (killing)",
3434 				    __func__, ctmp->path, PHY_TYPE(ctmp),
3435 				    PHY_TYPE(local));
3436 				/*
3437 				 * Force a rescan of this expander after dead
3438 				 * contents are cleared and removed.
3439 				 */
3440 				changed = B_TRUE;
3441 				kill_changed = B_TRUE;
3442 			} else {
3443 				changed = B_TRUE;
3444 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3445 				    "%s: %s type changed from NOTHING to %s",
3446 				    __func__, ctmp->path, PHY_TYPE(local));
3447 			}
3448 
3449 		} else if (ctmp->atdt != local->atdt) {
3450 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s attached "
3451 			    "device type changed from %d to %d (killing)",
3452 			    __func__, ctmp->path, ctmp->atdt, local->atdt);
3453 			/*
3454 			 * Force a rescan of this expander after dead
3455 			 * contents are cleared and removed.
3456 			 */
3457 			changed = B_TRUE;
3458 
3459 			if (local->atdt == 0) {
3460 				kill_changed = B_TRUE;
3461 			}
3462 		} else if (ctmp->link_rate != local->link_rate) {
3463 			pmcs_prt(pwp, PMCS_PRT_INFO, "%s: %s changed speed from"
3464 			    " %s to %s", __func__, ctmp->path,
3465 			    pmcs_get_rate(ctmp->link_rate),
3466 			    pmcs_get_rate(local->link_rate));
3467 			/* If the speed changed from invalid, force rescan */
3468 			if (!PMCS_VALID_LINK_RATE(ctmp->link_rate)) {
3469 				changed = B_TRUE;
3470 				RESTART_DISCOVERY(pwp);
3471 			} else {
3472 				/* Just update to the new link rate */
3473 				ctmp->link_rate = local->link_rate;
3474 			}
3475 
3476 			if (!PMCS_VALID_LINK_RATE(local->link_rate)) {
3477 				kill_changed = B_TRUE;
3478 			}
3479 		} else if (memcmp(ctmp->sas_address, local->sas_address,
3480 		    sizeof (ctmp->sas_address)) != 0) {
3481 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: SASAddr "
3482 			    "for %s changed from " SAS_ADDR_FMT " to "
3483 			    SAS_ADDR_FMT " (kill old tree)", __func__,
3484 			    ctmp->path, SAS_ADDR_PRT(ctmp->sas_address),
3485 			    SAS_ADDR_PRT(local->sas_address));
3486 			/*
3487 			 * Force a rescan of this expander after dead
3488 			 * contents are cleared and removed.
3489 			 */
3490 			changed = B_TRUE;
3491 		} else {
3492 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3493 			    "%s: %s looks the same (type %s)",
3494 			    __func__, ctmp->path, PHY_TYPE(ctmp));
3495 			/*
3496 			 * If EXPANDER, still mark it changed so we
3497 			 * re-evaluate its contents.  If it's not an expander,
3498 			 * but it hasn't been configured, also mark it as
3499 			 * changed so that it will undergo configuration.
3500 			 */
3501 			if (ctmp->dtype == EXPANDER) {
3502 				changed = B_TRUE;
3503 			} else if ((ctmp->dtype != NOTHING) &&
3504 			    !ctmp->configured) {
3505 				ctmp->changed = 1;
3506 			} else {
3507 				/* It simply hasn't changed */
3508 				ctmp->changed = 0;
3509 			}
3510 		}
3511 
3512 		/*
3513 		 * If the PHY changed, call pmcs_kill_changed if indicated,
3514 		 * update its contents to reflect its current state and mark it
3515 		 * as changed.
3516 		 */
3517 		if (changed) {
3518 			/*
3519 			 * pmcs_kill_changed will mark the PHY as changed, so
3520 			 * only do PHY_CHANGED if we did not do kill_changed.
3521 			 */
3522 			if (kill_changed) {
3523 				pmcs_kill_changed(pwp, ctmp, 0);
3524 			} else {
3525 				/*
3526 				 * If we're not killing the device, it's not
3527 				 * dead.  Mark the PHY as changed.
3528 				 */
3529 				PHY_CHANGED(pwp, ctmp);
3530 
3531 				if (ctmp->dead) {
3532 					pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3533 					    "%s: Unmarking PHY %s dead, "
3534 					    "restarting discovery",
3535 					    __func__, ctmp->path);
3536 					ctmp->dead = 0;
3537 					RESTART_DISCOVERY(pwp);
3538 				}
3539 			}
3540 
3541 			/*
3542 			 * If the dtype of this PHY is now NOTHING, mark it as
3543 			 * unconfigured.  Set pend_dtype to what the new dtype
3544 			 * is.  It'll get updated at the end of the discovery
3545 			 * process.
3546 			 */
3547 			if (local->dtype == NOTHING) {
3548 				bzero(ctmp->sas_address,
3549 				    sizeof (local->sas_address));
3550 				ctmp->atdt = 0;
3551 				ctmp->link_rate = 0;
3552 				ctmp->pend_dtype = NOTHING;
3553 				ctmp->configured = 0;
3554 			} else {
3555 				(void) memcpy(ctmp->sas_address,
3556 				    local->sas_address,
3557 				    sizeof (local->sas_address));
3558 				ctmp->atdt = local->atdt;
3559 				ctmp->link_rate = local->link_rate;
3560 				ctmp->pend_dtype = local->dtype;
3561 			}
3562 		}
3563 
3564 		local = local->sibling;
3565 		ctmp = ctmp->sibling;
3566 	}
3567 
3568 	/*
3569 	 * If we got to here, that means we were able to see all the PHYs
3570 	 * and we can now update all of the real PHYs with the information
3571 	 * we got on the local PHYs.  Once that's done, free all the local
3572 	 * PHYs.
3573 	 */
3574 
3575 	pmcs_free_phys(pwp, local_list);
3576 }
3577 
3578 /*
3579  * Top level routine to check expanders.  We call pmcs_check_expander for
3580  * each expander.  Since we're not doing any configuration right now, it
3581  * doesn't matter if this is breadth-first.
3582  */
3583 static boolean_t
3584 pmcs_check_expanders(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3585 {
3586 	pmcs_phy_t *phyp, *pnext, *pchild;
3587 	boolean_t config_changed = B_FALSE;
3588 
3589 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s", __func__, pptr->path);
3590 
3591 	/*
3592 	 * Check each expander at this level
3593 	 */
3594 	phyp = pptr;
3595 	while (phyp && !config_changed) {
3596 		pmcs_lock_phy(phyp);
3597 
3598 		if ((phyp->dtype == EXPANDER) && phyp->changed &&
3599 		    !phyp->dead && !phyp->subsidiary &&
3600 		    phyp->configured) {
3601 			pmcs_check_expander(pwp, phyp);
3602 		}
3603 
3604 		pnext = phyp->sibling;
3605 		pmcs_unlock_phy(phyp);
3606 
3607 		mutex_enter(&pwp->config_lock);
3608 		config_changed = pwp->config_changed;
3609 		mutex_exit(&pwp->config_lock);
3610 
3611 		phyp = pnext;
3612 	}
3613 
3614 	if (config_changed) {
3615 		return (config_changed);
3616 	}
3617 
3618 	/*
3619 	 * Now check the children
3620 	 */
3621 	phyp = pptr;
3622 	while (phyp && !config_changed) {
3623 		pmcs_lock_phy(phyp);
3624 		pnext = phyp->sibling;
3625 		pchild = phyp->children;
3626 		pmcs_unlock_phy(phyp);
3627 
3628 		if (pchild) {
3629 			(void) pmcs_check_expanders(pwp, pchild);
3630 		}
3631 
3632 		mutex_enter(&pwp->config_lock);
3633 		config_changed = pwp->config_changed;
3634 		mutex_exit(&pwp->config_lock);
3635 
3636 		phyp = pnext;
3637 	}
3638 
3639 	/*
3640 	 * We're done
3641 	 */
3642 	return (config_changed);
3643 }
3644 
3645 /*
3646  * Called with softstate and PHY locked
3647  */
3648 static void
3649 pmcs_clear_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr, int level)
3650 {
3651 	pmcs_phy_t *ctmp;
3652 
3653 	ASSERT(mutex_owned(&pwp->lock));
3654 	ASSERT(mutex_owned(&pptr->phy_lock));
3655 	ASSERT(pptr->level < PMCS_MAX_XPND - 1);
3656 
3657 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: checking %s", __func__,
3658 	    pptr->path);
3659 
3660 	ctmp = pptr->children;
3661 	while (ctmp) {
3662 		/*
3663 		 * If the expander is dead, mark its children dead
3664 		 */
3665 		if (pptr->dead) {
3666 			ctmp->dead = 1;
3667 		}
3668 		if (ctmp->dtype == EXPANDER) {
3669 			pmcs_clear_expander(pwp, ctmp, level + 1);
3670 		}
3671 		ctmp = ctmp->sibling;
3672 	}
3673 
3674 	/*
3675 	 * If this expander is not dead, we're done here.
3676 	 */
3677 	if (!pptr->dead) {
3678 		return;
3679 	}
3680 
3681 	/*
3682 	 * Now snip out the list of children below us and release them
3683 	 */
3684 	ctmp = pptr->children;
3685 	while (ctmp) {
3686 		pmcs_phy_t *nxt = ctmp->sibling;
3687 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3688 		    "%s: dead PHY 0x%p (%s) (ref_count %d)", __func__,
3689 		    (void *)ctmp, ctmp->path, ctmp->ref_count);
3690 		/*
3691 		 * Put this PHY on the dead PHY list for the watchdog to
3692 		 * clean up after any outstanding work has completed.
3693 		 */
3694 		mutex_enter(&pwp->dead_phylist_lock);
3695 		ctmp->dead_next = pwp->dead_phys;
3696 		pwp->dead_phys = ctmp;
3697 		mutex_exit(&pwp->dead_phylist_lock);
3698 		pmcs_unlock_phy(ctmp);
3699 		ctmp = nxt;
3700 	}
3701 
3702 	pptr->children = NULL;
3703 
3704 	/*
3705 	 * Clear subsidiary phys as well.  Getting the parent's PHY lock
3706 	 * is only necessary if level == 0 since otherwise the parent is
3707 	 * already locked.
3708 	 */
3709 	if (!IS_ROOT_PHY(pptr)) {
3710 		if (level == 0) {
3711 			mutex_enter(&pptr->parent->phy_lock);
3712 		}
3713 		ctmp = pptr->parent->children;
3714 		if (level == 0) {
3715 			mutex_exit(&pptr->parent->phy_lock);
3716 		}
3717 	} else {
3718 		ctmp = pwp->root_phys;
3719 	}
3720 
3721 	while (ctmp) {
3722 		if (ctmp == pptr) {
3723 			ctmp = ctmp->sibling;
3724 			continue;
3725 		}
3726 		/*
3727 		 * We only need to lock subsidiary PHYs on the level 0
3728 		 * expander.  Any children of that expander, subsidiaries or
3729 		 * not, will already be locked.
3730 		 */
3731 		if (level == 0) {
3732 			pmcs_lock_phy(ctmp);
3733 		}
3734 		if (ctmp->dtype != EXPANDER || ctmp->subsidiary == 0 ||
3735 		    memcmp(ctmp->sas_address, pptr->sas_address,
3736 		    sizeof (ctmp->sas_address)) != 0) {
3737 			if (level == 0) {
3738 				pmcs_unlock_phy(ctmp);
3739 			}
3740 			ctmp = ctmp->sibling;
3741 			continue;
3742 		}
3743 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: subsidiary %s",
3744 		    __func__, ctmp->path);
3745 		pmcs_clear_phy(pwp, ctmp);
3746 		if (level == 0) {
3747 			pmcs_unlock_phy(ctmp);
3748 		}
3749 		ctmp = ctmp->sibling;
3750 	}
3751 
3752 	pmcs_clear_phy(pwp, pptr);
3753 }
3754 
3755 /*
3756  * Called with PHY locked and with scratch acquired. We return 0 if
3757  * we fail to allocate resources or notice that the configuration
3758  * count changed while we were running the command. We return
3759  * less than zero if we had an I/O error or received an unsupported
3760  * configuration. Otherwise we return the number of phys in the
3761  * expander.
3762  */
3763 #define	DFM(m, y) if (m == NULL) m = y
3764 static int
3765 pmcs_expander_get_nphy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3766 {
3767 	struct pmcwork *pwrk;
3768 	char buf[64];
3769 	const uint_t rdoff = 0x100;	/* returned data offset */
3770 	smp_response_frame_t *srf;
3771 	smp_report_general_resp_t *srgr;
3772 	uint32_t msg[PMCS_MSG_SIZE], *ptr, htag, status, ival;
3773 	int result;
3774 
3775 	ival = 0x40001100;
3776 again:
3777 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
3778 	if (pwrk == NULL) {
3779 		result = 0;
3780 		goto out;
3781 	}
3782 	(void) memset(pwp->scratch, 0x77, PMCS_SCRATCH_SIZE);
3783 	pwrk->arg = pwp->scratch;
3784 	pwrk->dtype = pptr->dtype;
3785 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
3786 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
3787 	if (ptr == NULL) {
3788 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
3789 		pmcs_prt(pwp, PMCS_PRT_DEBUG2, "%s: GET_IQ_ENTRY failed",
3790 		    __func__);
3791 		pmcs_pwork(pwp, pwrk);
3792 		result = 0;
3793 		goto out;
3794 	}
3795 
3796 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_SMP_REQUEST));
3797 	msg[1] = LE_32(pwrk->htag);
3798 	msg[2] = LE_32(pptr->device_id);
3799 	msg[3] = LE_32((4 << SMP_REQUEST_LENGTH_SHIFT) | SMP_INDIRECT_RESPONSE);
3800 	/*
3801 	 * Send SMP REPORT GENERAL (of either SAS1.1 or SAS2 flavors).
3802 	 */
3803 	msg[4] = BE_32(ival);
3804 	msg[5] = 0;
3805 	msg[6] = 0;
3806 	msg[7] = 0;
3807 	msg[8] = 0;
3808 	msg[9] = 0;
3809 	msg[10] = 0;
3810 	msg[11] = 0;
3811 	msg[12] = LE_32(DWORD0(pwp->scratch_dma+rdoff));
3812 	msg[13] = LE_32(DWORD1(pwp->scratch_dma+rdoff));
3813 	msg[14] = LE_32(PMCS_SCRATCH_SIZE - rdoff);
3814 	msg[15] = 0;
3815 
3816 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
3817 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
3818 	htag = pwrk->htag;
3819 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
3820 
3821 	pmcs_unlock_phy(pptr);
3822 	WAIT_FOR(pwrk, 1000, result);
3823 	pmcs_lock_phy(pptr);
3824 	pmcs_pwork(pwp, pwrk);
3825 
3826 	mutex_enter(&pwp->config_lock);
3827 	if (pwp->config_changed) {
3828 		RESTART_DISCOVERY_LOCKED(pwp);
3829 		mutex_exit(&pwp->config_lock);
3830 		result = 0;
3831 		goto out;
3832 	}
3833 	mutex_exit(&pwp->config_lock);
3834 
3835 	if (result) {
3836 		pmcs_timed_out(pwp, htag, __func__);
3837 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3838 		    "%s: Issuing SMP ABORT for htag 0x%08x", __func__, htag);
3839 		if (pmcs_abort(pwp, pptr, htag, 0, 0)) {
3840 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3841 			    "%s: Unable to issue SMP ABORT for htag 0x%08x",
3842 			    __func__, htag);
3843 		} else {
3844 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3845 			    "%s: Issuing SMP ABORT for htag 0x%08x",
3846 			    __func__, htag);
3847 		}
3848 		result = 0;
3849 		goto out;
3850 	}
3851 	ptr = (void *)pwp->scratch;
3852 	status = LE_32(ptr[2]);
3853 	if (status == PMCOUT_STATUS_UNDERFLOW ||
3854 	    status == PMCOUT_STATUS_OVERFLOW) {
3855 		pmcs_prt(pwp, PMCS_PRT_DEBUG_UNDERFLOW,
3856 		    "%s: over/underflow", __func__);
3857 		status = PMCOUT_STATUS_OK;
3858 	}
3859 	srf = (smp_response_frame_t *)&((uint32_t *)pwp->scratch)[rdoff >> 2];
3860 	srgr = (smp_report_general_resp_t *)
3861 	    &((uint32_t *)pwp->scratch)[(rdoff >> 2)+1];
3862 
3863 	if (status != PMCOUT_STATUS_OK) {
3864 		char *nag = NULL;
3865 		(void) snprintf(buf, sizeof (buf),
3866 		    "%s: SMP op failed (0x%x)", __func__, status);
3867 		switch (status) {
3868 		case PMCOUT_STATUS_IO_PORT_IN_RESET:
3869 			DFM(nag, "I/O Port In Reset");
3870 			/* FALLTHROUGH */
3871 		case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
3872 			DFM(nag, "Hardware Timeout");
3873 			/* FALLTHROUGH */
3874 		case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
3875 			DFM(nag, "Internal SMP Resource Failure");
3876 			/* FALLTHROUGH */
3877 		case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
3878 			DFM(nag, "PHY Not Ready");
3879 			/* FALLTHROUGH */
3880 		case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
3881 			DFM(nag, "Connection Rate Not Supported");
3882 			/* FALLTHROUGH */
3883 		case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
3884 			DFM(nag, "Open Retry Timeout");
3885 			/* FALLTHROUGH */
3886 		case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
3887 			DFM(nag, "Response Connection Error");
3888 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
3889 			    "%s: expander %s SMP operation failed (%s)",
3890 			    __func__, pptr->path, nag);
3891 			break;
3892 
3893 		/*
3894 		 * For the IO_DS_NON_OPERATIONAL case, we need to kick off
3895 		 * device state recovery and return 0 so that the caller
3896 		 * doesn't assume this expander is dead for good.
3897 		 */
3898 		case PMCOUT_STATUS_IO_DS_NON_OPERATIONAL: {
3899 			pmcs_xscsi_t *xp = pptr->target;
3900 
3901 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
3902 			    "%s: expander %s device state non-operational",
3903 			    __func__, pptr->path);
3904 
3905 			if (xp == NULL) {
3906 				pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
3907 				    "%s: No target to do DS recovery for PHY "
3908 				    "%p (%s), attempting PHY hard reset",
3909 				    __func__, (void *)pptr, pptr->path);
3910 				(void) pmcs_reset_phy(pwp, pptr,
3911 				    PMCS_PHYOP_HARD_RESET);
3912 				break;
3913 			}
3914 
3915 			mutex_enter(&xp->statlock);
3916 			pmcs_start_dev_state_recovery(xp, pptr);
3917 			mutex_exit(&xp->statlock);
3918 			break;
3919 		}
3920 
3921 		default:
3922 			pmcs_print_entry(pwp, PMCS_PRT_DEBUG, buf, ptr);
3923 			result = -EIO;
3924 			break;
3925 		}
3926 	} else if (srf->srf_frame_type != SMP_FRAME_TYPE_RESPONSE) {
3927 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
3928 		    "%s: bad response frame type 0x%x",
3929 		    __func__, srf->srf_frame_type);
3930 		result = -EINVAL;
3931 	} else if (srf->srf_function != SMP_FUNC_REPORT_GENERAL) {
3932 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad response function 0x%x",
3933 		    __func__, srf->srf_function);
3934 		result = -EINVAL;
3935 	} else if (srf->srf_result != 0) {
3936 		/*
3937 		 * Check to see if we have a value of 3 for failure and
3938 		 * whether we were using a SAS2.0 allocation length value
3939 		 * and retry without it.
3940 		 */
3941 		if (srf->srf_result == 3 && (ival & 0xff00)) {
3942 			ival &= ~0xff00;
3943 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
3944 			    "%s: err 0x%x with SAS2 request- retry with SAS1",
3945 			    __func__, srf->srf_result);
3946 			goto again;
3947 		}
3948 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad response 0x%x",
3949 		    __func__, srf->srf_result);
3950 		result = -EINVAL;
3951 	} else if (srgr->srgr_configuring) {
3952 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
3953 		    "%s: expander at phy %s is still configuring",
3954 		    __func__, pptr->path);
3955 		result = 0;
3956 	} else {
3957 		result = srgr->srgr_number_of_phys;
3958 		if (ival & 0xff00) {
3959 			pptr->tolerates_sas2 = 1;
3960 		}
3961 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3962 		    "%s has %d phys and %s SAS2", pptr->path, result,
3963 		    pptr->tolerates_sas2? "tolerates" : "does not tolerate");
3964 	}
3965 out:
3966 	return (result);
3967 }
3968 
3969 /*
3970  * Called with expander locked (and thus, pptr) as well as all PHYs up to
3971  * the root, and scratch acquired. Return 0 if we fail to allocate resources
3972  * or notice that the configuration changed while we were running the command.
3973  *
3974  * We return less than zero if we had an I/O error or received an
3975  * unsupported configuration.
3976  */
3977 static int
3978 pmcs_expander_content_discover(pmcs_hw_t *pwp, pmcs_phy_t *expander,
3979     pmcs_phy_t *pptr)
3980 {
3981 	struct pmcwork *pwrk;
3982 	char buf[64];
3983 	uint8_t sas_address[8];
3984 	uint8_t att_sas_address[8];
3985 	smp_response_frame_t *srf;
3986 	smp_discover_resp_t *sdr;
3987 	const uint_t rdoff = 0x100;	/* returned data offset */
3988 	uint8_t *roff;
3989 	uint32_t status, *ptr, msg[PMCS_MSG_SIZE], htag;
3990 	int result;
3991 	uint8_t	ini_support;
3992 	uint8_t	tgt_support;
3993 
3994 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, expander);
3995 	if (pwrk == NULL) {
3996 		result = 0;
3997 		goto out;
3998 	}
3999 	(void) memset(pwp->scratch, 0x77, PMCS_SCRATCH_SIZE);
4000 	pwrk->arg = pwp->scratch;
4001 	pwrk->dtype = expander->dtype;
4002 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_SMP_REQUEST));
4003 	msg[1] = LE_32(pwrk->htag);
4004 	msg[2] = LE_32(expander->device_id);
4005 	msg[3] = LE_32((12 << SMP_REQUEST_LENGTH_SHIFT) |
4006 	    SMP_INDIRECT_RESPONSE);
4007 	/*
4008 	 * Send SMP DISCOVER (of either SAS1.1 or SAS2 flavors).
4009 	 */
4010 	if (expander->tolerates_sas2) {
4011 		msg[4] = BE_32(0x40101B00);
4012 	} else {
4013 		msg[4] = BE_32(0x40100000);
4014 	}
4015 	msg[5] = 0;
4016 	msg[6] = BE_32((pptr->phynum << 16));
4017 	msg[7] = 0;
4018 	msg[8] = 0;
4019 	msg[9] = 0;
4020 	msg[10] = 0;
4021 	msg[11] = 0;
4022 	msg[12] = LE_32(DWORD0(pwp->scratch_dma+rdoff));
4023 	msg[13] = LE_32(DWORD1(pwp->scratch_dma+rdoff));
4024 	msg[14] = LE_32(PMCS_SCRATCH_SIZE - rdoff);
4025 	msg[15] = 0;
4026 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4027 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4028 	if (ptr == NULL) {
4029 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4030 		result = 0;
4031 		goto out;
4032 	}
4033 
4034 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
4035 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4036 	htag = pwrk->htag;
4037 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4038 
4039 	/*
4040 	 * Drop PHY lock while waiting so other completions aren't potentially
4041 	 * blocked.
4042 	 */
4043 	pmcs_unlock_phy(expander);
4044 	WAIT_FOR(pwrk, 1000, result);
4045 	pmcs_lock_phy(expander);
4046 	pmcs_pwork(pwp, pwrk);
4047 
4048 	mutex_enter(&pwp->config_lock);
4049 	if (pwp->config_changed) {
4050 		RESTART_DISCOVERY_LOCKED(pwp);
4051 		mutex_exit(&pwp->config_lock);
4052 		result = 0;
4053 		goto out;
4054 	}
4055 	mutex_exit(&pwp->config_lock);
4056 
4057 	if (result) {
4058 		pmcs_prt(pwp, PMCS_PRT_WARN, pmcs_timeo, __func__);
4059 		if (pmcs_abort(pwp, expander, htag, 0, 0)) {
4060 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4061 			    "%s: Unable to issue SMP ABORT for htag 0x%08x",
4062 			    __func__, htag);
4063 		} else {
4064 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4065 			    "%s: Issuing SMP ABORT for htag 0x%08x",
4066 			    __func__, htag);
4067 		}
4068 		result = -ETIMEDOUT;
4069 		goto out;
4070 	}
4071 	ptr = (void *)pwp->scratch;
4072 	/*
4073 	 * Point roff to the DMA offset for returned data
4074 	 */
4075 	roff = pwp->scratch;
4076 	roff += rdoff;
4077 	srf = (smp_response_frame_t *)roff;
4078 	sdr = (smp_discover_resp_t *)(roff+4);
4079 	status = LE_32(ptr[2]);
4080 	if (status == PMCOUT_STATUS_UNDERFLOW ||
4081 	    status == PMCOUT_STATUS_OVERFLOW) {
4082 		pmcs_prt(pwp, PMCS_PRT_DEBUG_UNDERFLOW,
4083 		    "%s: over/underflow", __func__);
4084 		status = PMCOUT_STATUS_OK;
4085 	}
4086 	if (status != PMCOUT_STATUS_OK) {
4087 		char *nag = NULL;
4088 		(void) snprintf(buf, sizeof (buf),
4089 		    "%s: SMP op failed (0x%x)", __func__, status);
4090 		switch (status) {
4091 		case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
4092 			DFM(nag, "Hardware Timeout");
4093 			/* FALLTHROUGH */
4094 		case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
4095 			DFM(nag, "Internal SMP Resource Failure");
4096 			/* FALLTHROUGH */
4097 		case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
4098 			DFM(nag, "PHY Not Ready");
4099 			/* FALLTHROUGH */
4100 		case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
4101 			DFM(nag, "Connection Rate Not Supported");
4102 			/* FALLTHROUGH */
4103 		case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
4104 			DFM(nag, "Open Retry Timeout");
4105 			/* FALLTHROUGH */
4106 		case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
4107 			DFM(nag, "Response Connection Error");
4108 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
4109 			    "%s: expander %s SMP operation failed (%s)",
4110 			    __func__, pptr->path, nag);
4111 			break;
4112 		default:
4113 			pmcs_print_entry(pwp, PMCS_PRT_DEBUG, buf, ptr);
4114 			result = -EIO;
4115 			break;
4116 		}
4117 		goto out;
4118 	} else if (srf->srf_frame_type != SMP_FRAME_TYPE_RESPONSE) {
4119 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4120 		    "%s: bad response frame type 0x%x",
4121 		    __func__, srf->srf_frame_type);
4122 		result = -EINVAL;
4123 		goto out;
4124 	} else if (srf->srf_function != SMP_FUNC_DISCOVER) {
4125 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad response function 0x%x",
4126 		    __func__, srf->srf_function);
4127 		result = -EINVAL;
4128 		goto out;
4129 	} else if (srf->srf_result != SMP_RES_FUNCTION_ACCEPTED) {
4130 		result = pmcs_smp_function_result(pwp, srf);
4131 		/* Need not fail if PHY is Vacant */
4132 		if (result != SMP_RES_PHY_VACANT) {
4133 			result = -EINVAL;
4134 			goto out;
4135 		}
4136 	}
4137 
4138 	ini_support = (sdr->sdr_attached_sata_host |
4139 	    (sdr->sdr_attached_smp_initiator << 1) |
4140 	    (sdr->sdr_attached_stp_initiator << 2) |
4141 	    (sdr->sdr_attached_ssp_initiator << 3));
4142 
4143 	tgt_support = (sdr->sdr_attached_sata_device |
4144 	    (sdr->sdr_attached_smp_target << 1) |
4145 	    (sdr->sdr_attached_stp_target << 2) |
4146 	    (sdr->sdr_attached_ssp_target << 3));
4147 
4148 	pmcs_wwn2barray(BE_64(sdr->sdr_sas_addr), sas_address);
4149 	pmcs_wwn2barray(BE_64(sdr->sdr_attached_sas_addr), att_sas_address);
4150 
4151 	switch (sdr->sdr_attached_device_type) {
4152 	case SAS_IF_DTYPE_ENDPOINT:
4153 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4154 		    "exp_content: %s atdt=0x%x lr=%x is=%x ts=%x SAS="
4155 		    SAS_ADDR_FMT " attSAS=" SAS_ADDR_FMT " atPHY=%x",
4156 		    pptr->path,
4157 		    sdr->sdr_attached_device_type,
4158 		    sdr->sdr_negotiated_logical_link_rate,
4159 		    ini_support,
4160 		    tgt_support,
4161 		    SAS_ADDR_PRT(sas_address),
4162 		    SAS_ADDR_PRT(att_sas_address),
4163 		    sdr->sdr_attached_phy_identifier);
4164 
4165 		if (sdr->sdr_attached_sata_device ||
4166 		    sdr->sdr_attached_stp_target) {
4167 			pptr->dtype = SATA;
4168 		} else if (sdr->sdr_attached_ssp_target) {
4169 			pptr->dtype = SAS;
4170 		} else if (tgt_support || ini_support) {
4171 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s has "
4172 			    "tgt support=%x init support=(%x)",
4173 			    __func__, pptr->path, tgt_support, ini_support);
4174 		}
4175 		break;
4176 	case SAS_IF_DTYPE_EDGE:
4177 	case SAS_IF_DTYPE_FANOUT:
4178 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4179 		    "exp_content: %s atdt=0x%x lr=%x is=%x ts=%x SAS="
4180 		    SAS_ADDR_FMT " attSAS=" SAS_ADDR_FMT " atPHY=%x",
4181 		    pptr->path,
4182 		    sdr->sdr_attached_device_type,
4183 		    sdr->sdr_negotiated_logical_link_rate,
4184 		    ini_support,
4185 		    tgt_support,
4186 		    SAS_ADDR_PRT(sas_address),
4187 		    SAS_ADDR_PRT(att_sas_address),
4188 		    sdr->sdr_attached_phy_identifier);
4189 		if (sdr->sdr_attached_smp_target) {
4190 			/*
4191 			 * Avoid configuring phys that just point back
4192 			 * at a parent phy
4193 			 */
4194 			if (expander->parent &&
4195 			    memcmp(expander->parent->sas_address,
4196 			    att_sas_address,
4197 			    sizeof (expander->parent->sas_address)) == 0) {
4198 				pmcs_prt(pwp, PMCS_PRT_DEBUG3,
4199 				    "%s: skipping port back to parent "
4200 				    "expander (%s)", __func__, pptr->path);
4201 				pptr->dtype = NOTHING;
4202 				break;
4203 			}
4204 			pptr->dtype = EXPANDER;
4205 
4206 		} else if (tgt_support || ini_support) {
4207 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s has "
4208 			    "tgt support=%x init support=(%x)",
4209 			    pptr->path, tgt_support, ini_support);
4210 			pptr->dtype = EXPANDER;
4211 		}
4212 		break;
4213 	default:
4214 		pptr->dtype = NOTHING;
4215 		break;
4216 	}
4217 	if (pptr->dtype != NOTHING) {
4218 		pmcs_phy_t *ctmp;
4219 
4220 		/*
4221 		 * If the attached device is a SATA device and the expander
4222 		 * is (possibly) a SAS2 compliant expander, check for whether
4223 		 * there is a NAA=5 WWN field starting at this offset and
4224 		 * use that for the SAS Address for this device.
4225 		 */
4226 		if (expander->tolerates_sas2 && pptr->dtype == SATA &&
4227 		    (roff[SAS_ATTACHED_NAME_OFFSET] >> 8) == 0x5) {
4228 			(void) memcpy(pptr->sas_address,
4229 			    &roff[SAS_ATTACHED_NAME_OFFSET], 8);
4230 		} else {
4231 			(void) memcpy(pptr->sas_address, att_sas_address, 8);
4232 		}
4233 		pptr->atdt = (sdr->sdr_attached_device_type);
4234 		/*
4235 		 * Now run up from the expander's parent up to the top to
4236 		 * make sure we only use the least common link_rate.
4237 		 */
4238 		for (ctmp = expander->parent; ctmp; ctmp = ctmp->parent) {
4239 			if (ctmp->link_rate <
4240 			    sdr->sdr_negotiated_logical_link_rate) {
4241 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4242 				    "%s: derating link rate from %x to %x due "
4243 				    "to %s being slower", pptr->path,
4244 				    sdr->sdr_negotiated_logical_link_rate,
4245 				    ctmp->link_rate,
4246 				    ctmp->path);
4247 				sdr->sdr_negotiated_logical_link_rate =
4248 				    ctmp->link_rate;
4249 			}
4250 		}
4251 		pptr->link_rate = sdr->sdr_negotiated_logical_link_rate;
4252 		pptr->state.prog_min_rate = sdr->sdr_prog_min_phys_link_rate;
4253 		pptr->state.hw_min_rate = sdr->sdr_hw_min_phys_link_rate;
4254 		pptr->state.prog_max_rate = sdr->sdr_prog_max_phys_link_rate;
4255 		pptr->state.hw_max_rate = sdr->sdr_hw_max_phys_link_rate;
4256 		PHY_CHANGED(pwp, pptr);
4257 	} else {
4258 		pmcs_clear_phy(pwp, pptr);
4259 	}
4260 	result = 1;
4261 out:
4262 	return (result);
4263 }
4264 
4265 /*
4266  * Get a work structure and assign it a tag with type and serial number
4267  * If a structure is returned, it is returned locked.
4268  */
4269 pmcwork_t *
4270 pmcs_gwork(pmcs_hw_t *pwp, uint32_t tag_type, pmcs_phy_t *phyp)
4271 {
4272 	pmcwork_t *p;
4273 	uint16_t snum;
4274 	uint32_t off;
4275 
4276 	mutex_enter(&pwp->wfree_lock);
4277 	p = STAILQ_FIRST(&pwp->wf);
4278 	if (p == NULL) {
4279 		/*
4280 		 * If we couldn't get a work structure, it's time to bite
4281 		 * the bullet, grab the pfree_lock and copy over all the
4282 		 * work structures from the pending free list to the actual
4283 		 * free list.  This shouldn't happen all that often.
4284 		 */
4285 		mutex_enter(&pwp->pfree_lock);
4286 		pwp->wf.stqh_first = pwp->pf.stqh_first;
4287 		pwp->wf.stqh_last = pwp->pf.stqh_last;
4288 		STAILQ_INIT(&pwp->pf);
4289 		mutex_exit(&pwp->pfree_lock);
4290 
4291 		p = STAILQ_FIRST(&pwp->wf);
4292 		if (p == NULL) {
4293 			mutex_exit(&pwp->wfree_lock);
4294 			return (NULL);
4295 		}
4296 	}
4297 	STAILQ_REMOVE(&pwp->wf, p, pmcwork, next);
4298 	snum = pwp->wserno++;
4299 	mutex_exit(&pwp->wfree_lock);
4300 
4301 	off = p - pwp->work;
4302 
4303 	mutex_enter(&p->lock);
4304 	ASSERT(p->state == PMCS_WORK_STATE_NIL);
4305 	ASSERT(p->htag == PMCS_TAG_FREE);
4306 	p->htag = (tag_type << PMCS_TAG_TYPE_SHIFT) & PMCS_TAG_TYPE_MASK;
4307 	p->htag |= ((snum << PMCS_TAG_SERNO_SHIFT) & PMCS_TAG_SERNO_MASK);
4308 	p->htag |= ((off << PMCS_TAG_INDEX_SHIFT) & PMCS_TAG_INDEX_MASK);
4309 	p->start = gethrtime();
4310 	p->state = PMCS_WORK_STATE_READY;
4311 	p->ssp_event = 0;
4312 	p->dead = 0;
4313 
4314 	if (phyp) {
4315 		p->phy = phyp;
4316 		pmcs_inc_phy_ref_count(phyp);
4317 	}
4318 
4319 	return (p);
4320 }
4321 
4322 /*
4323  * Called with pwrk lock held.  Returned with lock released.
4324  */
4325 void
4326 pmcs_pwork(pmcs_hw_t *pwp, pmcwork_t *p)
4327 {
4328 	ASSERT(p != NULL);
4329 	ASSERT(mutex_owned(&p->lock));
4330 
4331 	p->last_ptr = p->ptr;
4332 	p->last_arg = p->arg;
4333 	p->last_phy = p->phy;
4334 	p->last_xp = p->xp;
4335 	p->last_htag = p->htag;
4336 	p->last_state = p->state;
4337 	p->finish = gethrtime();
4338 
4339 	if (p->phy) {
4340 		pmcs_dec_phy_ref_count(p->phy);
4341 	}
4342 
4343 	p->state = PMCS_WORK_STATE_NIL;
4344 	p->htag = PMCS_TAG_FREE;
4345 	p->xp = NULL;
4346 	p->ptr = NULL;
4347 	p->arg = NULL;
4348 	p->phy = NULL;
4349 	p->timer = 0;
4350 	mutex_exit(&p->lock);
4351 
4352 	if (mutex_tryenter(&pwp->wfree_lock) == 0) {
4353 		mutex_enter(&pwp->pfree_lock);
4354 		STAILQ_INSERT_TAIL(&pwp->pf, p, next);
4355 		mutex_exit(&pwp->pfree_lock);
4356 	} else {
4357 		STAILQ_INSERT_TAIL(&pwp->wf, p, next);
4358 		mutex_exit(&pwp->wfree_lock);
4359 	}
4360 }
4361 
4362 /*
4363  * Find a work structure based upon a tag and make sure that the tag
4364  * serial number matches the work structure we've found.
4365  * If a structure is found, its lock is held upon return.
4366  */
4367 pmcwork_t *
4368 pmcs_tag2wp(pmcs_hw_t *pwp, uint32_t htag)
4369 {
4370 	pmcwork_t *p;
4371 	uint32_t idx = PMCS_TAG_INDEX(htag);
4372 
4373 	p = &pwp->work[idx];
4374 
4375 	mutex_enter(&p->lock);
4376 	if (p->htag == htag) {
4377 		return (p);
4378 	}
4379 	mutex_exit(&p->lock);
4380 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "INDEX 0x%x HTAG 0x%x got p->htag 0x%x",
4381 	    idx, htag, p->htag);
4382 	return (NULL);
4383 }
4384 
4385 /*
4386  * Issue an abort for a command or for all commands.
4387  *
4388  * Since this can be called from interrupt context,
4389  * we don't wait for completion if wait is not set.
4390  *
4391  * Called with PHY lock held.
4392  */
4393 int
4394 pmcs_abort(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint32_t tag, int all_cmds,
4395     int wait)
4396 {
4397 	pmcwork_t *pwrk;
4398 	pmcs_xscsi_t *tgt;
4399 	uint32_t msg[PMCS_MSG_SIZE], *ptr;
4400 	int result, abt_type;
4401 	uint32_t abt_htag, status;
4402 
4403 	if (pptr->abort_all_start) {
4404 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ABORT_ALL for (%s) already"
4405 		    " in progress.", __func__, pptr->path);
4406 		return (EBUSY);
4407 	}
4408 
4409 	switch (pptr->dtype) {
4410 	case SAS:
4411 		abt_type = PMCIN_SSP_ABORT;
4412 		break;
4413 	case SATA:
4414 		abt_type = PMCIN_SATA_ABORT;
4415 		break;
4416 	case EXPANDER:
4417 		abt_type = PMCIN_SMP_ABORT;
4418 		break;
4419 	default:
4420 		return (0);
4421 	}
4422 
4423 	pwrk = pmcs_gwork(pwp, wait ? PMCS_TAG_TYPE_WAIT : PMCS_TAG_TYPE_NONE,
4424 	    pptr);
4425 
4426 	if (pwrk == NULL) {
4427 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
4428 		return (ENOMEM);
4429 	}
4430 
4431 	pwrk->dtype = pptr->dtype;
4432 	if (wait) {
4433 		pwrk->arg = msg;
4434 	}
4435 	if (pptr->valid_device_id == 0) {
4436 		pmcs_pwork(pwp, pwrk);
4437 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Invalid DeviceID", __func__);
4438 		return (ENODEV);
4439 	}
4440 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, abt_type));
4441 	msg[1] = LE_32(pwrk->htag);
4442 	msg[2] = LE_32(pptr->device_id);
4443 	if (all_cmds) {
4444 		msg[3] = 0;
4445 		msg[4] = LE_32(1);
4446 		pwrk->ptr = NULL;
4447 		pptr->abort_all_start = gethrtime();
4448 	} else {
4449 		msg[3] = LE_32(tag);
4450 		msg[4] = 0;
4451 		pwrk->ptr = &tag;
4452 	}
4453 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4454 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4455 	if (ptr == NULL) {
4456 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4457 		pmcs_pwork(pwp, pwrk);
4458 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
4459 		return (ENOMEM);
4460 	}
4461 
4462 	COPY_MESSAGE(ptr, msg, 5);
4463 	if (all_cmds) {
4464 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4465 		    "%s: aborting all commands for %s device %s. (htag=0x%x)",
4466 		    __func__, pmcs_get_typename(pptr->dtype), pptr->path,
4467 		    msg[1]);
4468 	} else {
4469 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4470 		    "%s: aborting tag 0x%x for %s device %s. (htag=0x%x)",
4471 		    __func__, tag, pmcs_get_typename(pptr->dtype), pptr->path,
4472 		    msg[1]);
4473 	}
4474 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4475 
4476 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4477 	if (!wait) {
4478 		mutex_exit(&pwrk->lock);
4479 		return (0);
4480 	}
4481 
4482 	abt_htag = pwrk->htag;
4483 	pmcs_unlock_phy(pwrk->phy);
4484 	WAIT_FOR(pwrk, 1000, result);
4485 	pmcs_lock_phy(pwrk->phy);
4486 
4487 	tgt = pwrk->xp;
4488 	pmcs_pwork(pwp, pwrk);
4489 
4490 	if (tgt != NULL) {
4491 		mutex_enter(&tgt->aqlock);
4492 		if (!STAILQ_EMPTY(&tgt->aq)) {
4493 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
4494 			    "%s: Abort complete (result=0x%x), but "
4495 			    "aq not empty (tgt 0x%p), waiting",
4496 			    __func__, result, (void *)tgt);
4497 			cv_wait(&tgt->abort_cv, &tgt->aqlock);
4498 		}
4499 		mutex_exit(&tgt->aqlock);
4500 	}
4501 
4502 	if (all_cmds) {
4503 		pptr->abort_all_start = 0;
4504 		cv_signal(&pptr->abort_all_cv);
4505 	}
4506 
4507 	if (result) {
4508 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4509 		    "%s: Abort (htag 0x%08x) request timed out",
4510 		    __func__, abt_htag);
4511 		if (tgt != NULL) {
4512 			mutex_enter(&tgt->statlock);
4513 			if ((tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) &&
4514 			    (tgt->dev_state !=
4515 			    PMCS_DEVICE_STATE_NON_OPERATIONAL)) {
4516 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
4517 				    "%s: Trying DS error recovery for tgt 0x%p",
4518 				    __func__, (void *)tgt);
4519 				(void) pmcs_send_err_recovery_cmd(pwp,
4520 				    PMCS_DEVICE_STATE_IN_RECOVERY, tgt);
4521 			}
4522 			mutex_exit(&tgt->statlock);
4523 		}
4524 		return (ETIMEDOUT);
4525 	}
4526 
4527 	status = LE_32(msg[2]);
4528 	if (status != PMCOUT_STATUS_OK) {
4529 		/*
4530 		 * The only non-success status are IO_NOT_VALID &
4531 		 * IO_ABORT_IN_PROGRESS.
4532 		 * In case of IO_ABORT_IN_PROGRESS, the other ABORT cmd's
4533 		 * status is of concern and this duplicate cmd status can
4534 		 * be ignored.
4535 		 * If IO_NOT_VALID, that's not an error per-se.
4536 		 * For abort of single I/O complete the command anyway.
4537 		 * If, however, we were aborting all, that is a problem
4538 		 * as IO_NOT_VALID really means that the IO or device is
4539 		 * not there. So, discovery process will take of the cleanup.
4540 		 */
4541 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: abort result 0x%x",
4542 		    __func__, LE_32(msg[2]));
4543 		if (all_cmds) {
4544 			PHY_CHANGED(pwp, pptr);
4545 			RESTART_DISCOVERY(pwp);
4546 		} else {
4547 			return (EINVAL);
4548 		}
4549 
4550 		return (0);
4551 	}
4552 
4553 	if (tgt != NULL) {
4554 		mutex_enter(&tgt->statlock);
4555 		if (tgt->dev_state == PMCS_DEVICE_STATE_IN_RECOVERY) {
4556 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
4557 			    "%s: Restoring OPERATIONAL dev_state for tgt 0x%p",
4558 			    __func__, (void *)tgt);
4559 			(void) pmcs_send_err_recovery_cmd(pwp,
4560 			    PMCS_DEVICE_STATE_OPERATIONAL, tgt);
4561 		}
4562 		mutex_exit(&tgt->statlock);
4563 	}
4564 
4565 	return (0);
4566 }
4567 
4568 /*
4569  * Issue a task management function to an SSP device.
4570  *
4571  * Called with PHY lock held.
4572  * statlock CANNOT be held upon entry.
4573  */
4574 int
4575 pmcs_ssp_tmf(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint8_t tmf, uint32_t tag,
4576     uint64_t lun, uint32_t *response)
4577 {
4578 	int result, ds;
4579 	uint8_t local[PMCS_QENTRY_SIZE << 1], *xd;
4580 	sas_ssp_rsp_iu_t *rptr = (void *)local;
4581 	static const uint8_t ssp_rsp_evec[] = {
4582 		0x58, 0x61, 0x56, 0x72, 0x00
4583 	};
4584 	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
4585 	struct pmcwork *pwrk;
4586 	pmcs_xscsi_t *xp;
4587 
4588 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
4589 	if (pwrk == NULL) {
4590 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
4591 		return (ENOMEM);
4592 	}
4593 	/*
4594 	 * NB: We use the PMCS_OQ_GENERAL outbound queue
4595 	 * NB: so as to not get entangled in normal I/O
4596 	 * NB: processing.
4597 	 */
4598 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
4599 	    PMCIN_SSP_INI_TM_START));
4600 	msg[1] = LE_32(pwrk->htag);
4601 	msg[2] = LE_32(pptr->device_id);
4602 	if (tmf == SAS_ABORT_TASK || tmf == SAS_QUERY_TASK) {
4603 		msg[3] = LE_32(tag);
4604 	} else {
4605 		msg[3] = 0;
4606 	}
4607 	msg[4] = LE_32(tmf);
4608 	msg[5] = BE_32((uint32_t)lun);
4609 	msg[6] = BE_32((uint32_t)(lun >> 32));
4610 	msg[7] = LE_32(PMCIN_MESSAGE_REPORT);
4611 
4612 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4613 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4614 	if (ptr == NULL) {
4615 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4616 		pmcs_pwork(pwp, pwrk);
4617 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
4618 		return (ENOMEM);
4619 	}
4620 	COPY_MESSAGE(ptr, msg, 7);
4621 	pwrk->arg = msg;
4622 	pwrk->dtype = pptr->dtype;
4623 
4624 	xp = pptr->target;
4625 	if (xp != NULL) {
4626 		mutex_enter(&xp->statlock);
4627 		if (xp->dev_state == PMCS_DEVICE_STATE_NON_OPERATIONAL) {
4628 			mutex_exit(&xp->statlock);
4629 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4630 			pmcs_pwork(pwp, pwrk);
4631 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Not sending '%s'"
4632 			    " because DS is '%s'", __func__, pmcs_tmf2str(tmf),
4633 			    pmcs_status_str
4634 			    (PMCOUT_STATUS_IO_DS_NON_OPERATIONAL));
4635 			return (EIO);
4636 		}
4637 		mutex_exit(&xp->statlock);
4638 	}
4639 
4640 	pmcs_prt(pwp, PMCS_PRT_DEBUG,
4641 	    "%s: sending '%s' to %s (lun %llu) tag 0x%x", __func__,
4642 	    pmcs_tmf2str(tmf), pptr->path, (unsigned long long) lun, tag);
4643 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4644 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4645 
4646 	pmcs_unlock_phy(pptr);
4647 	/*
4648 	 * This is a command sent to the target device, so it can take
4649 	 * significant amount of time to complete when path & device is busy.
4650 	 * Set a timeout to 20 seconds
4651 	 */
4652 	WAIT_FOR(pwrk, 20000, result);
4653 	pmcs_lock_phy(pptr);
4654 	pmcs_pwork(pwp, pwrk);
4655 
4656 	if (result) {
4657 		if (xp == NULL) {
4658 			return (ETIMEDOUT);
4659 		}
4660 
4661 		mutex_enter(&xp->statlock);
4662 		pmcs_start_dev_state_recovery(xp, pptr);
4663 		mutex_exit(&xp->statlock);
4664 		return (ETIMEDOUT);
4665 	}
4666 
4667 	status = LE_32(msg[2]);
4668 	if (status != PMCOUT_STATUS_OK) {
4669 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4670 		    "%s: status %s for TMF %s action to %s, lun %llu",
4671 		    __func__, pmcs_status_str(status),  pmcs_tmf2str(tmf),
4672 		    pptr->path, (unsigned long long) lun);
4673 		if ((status == PMCOUT_STATUS_IO_DS_NON_OPERATIONAL) ||
4674 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK) ||
4675 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS)) {
4676 			ds = PMCS_DEVICE_STATE_NON_OPERATIONAL;
4677 		} else if (status == PMCOUT_STATUS_IO_DS_IN_RECOVERY) {
4678 			/*
4679 			 * If the status is IN_RECOVERY, it's an indication
4680 			 * that it's now time for us to request to have the
4681 			 * device state set to OPERATIONAL since we're the ones
4682 			 * that requested recovery to begin with.
4683 			 */
4684 			ds = PMCS_DEVICE_STATE_OPERATIONAL;
4685 		} else {
4686 			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
4687 		}
4688 		if (xp != NULL) {
4689 			mutex_enter(&xp->statlock);
4690 			if (xp->dev_state != ds) {
4691 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
4692 				    "%s: Sending err recovery cmd"
4693 				    " for tgt 0x%p (status = %s)",
4694 				    __func__, (void *)xp,
4695 				    pmcs_status_str(status));
4696 				(void) pmcs_send_err_recovery_cmd(pwp, ds, xp);
4697 			}
4698 			mutex_exit(&xp->statlock);
4699 		}
4700 		return (EIO);
4701 	} else {
4702 		ds = PMCS_DEVICE_STATE_OPERATIONAL;
4703 		if (xp != NULL) {
4704 			mutex_enter(&xp->statlock);
4705 			if (xp->dev_state != ds) {
4706 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
4707 				    "%s: Sending err recovery cmd"
4708 				    " for tgt 0x%p (status = %s)",
4709 				    __func__, (void *)xp,
4710 				    pmcs_status_str(status));
4711 				(void) pmcs_send_err_recovery_cmd(pwp, ds, xp);
4712 			}
4713 			mutex_exit(&xp->statlock);
4714 		}
4715 	}
4716 	if (LE_32(msg[3]) == 0) {
4717 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "TMF completed with no response");
4718 		return (EIO);
4719 	}
4720 	pmcs_endian_transform(pwp, local, &msg[5], ssp_rsp_evec);
4721 	xd = (uint8_t *)(&msg[5]);
4722 	xd += SAS_RSP_HDR_SIZE;
4723 	if (rptr->datapres != SAS_RSP_DATAPRES_RESPONSE_DATA) {
4724 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4725 		    "%s: TMF response not RESPONSE DATA (0x%x)",
4726 		    __func__, rptr->datapres);
4727 		return (EIO);
4728 	}
4729 	if (rptr->response_data_length != 4) {
4730 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
4731 		    "Bad SAS RESPONSE DATA LENGTH", msg);
4732 		return (EIO);
4733 	}
4734 	(void) memcpy(&status, xd, sizeof (uint32_t));
4735 	status = BE_32(status);
4736 	if (response != NULL)
4737 		*response = status;
4738 	/*
4739 	 * The status is actually in the low-order byte.  The upper three
4740 	 * bytes contain additional information for the TMFs that support them.
4741 	 * However, at this time we do not issue any of those.  In the other
4742 	 * cases, the upper three bytes are supposed to be 0, but it appears
4743 	 * they aren't always.  Just mask them off.
4744 	 */
4745 	switch (status & 0xff) {
4746 	case SAS_RSP_TMF_COMPLETE:
4747 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: TMF complete", __func__);
4748 		result = 0;
4749 		break;
4750 	case SAS_RSP_TMF_SUCCEEDED:
4751 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: TMF succeeded", __func__);
4752 		result = 0;
4753 		break;
4754 	case SAS_RSP_INVALID_FRAME:
4755 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4756 		    "%s: TMF returned INVALID FRAME", __func__);
4757 		result = EIO;
4758 		break;
4759 	case SAS_RSP_TMF_NOT_SUPPORTED:
4760 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4761 		    "%s: TMF returned TMF NOT SUPPORTED", __func__);
4762 		result = EIO;
4763 		break;
4764 	case SAS_RSP_TMF_FAILED:
4765 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4766 		    "%s: TMF returned TMF FAILED", __func__);
4767 		result = EIO;
4768 		break;
4769 	case SAS_RSP_TMF_INCORRECT_LUN:
4770 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4771 		    "%s: TMF returned INCORRECT LUN", __func__);
4772 		result = EIO;
4773 		break;
4774 	case SAS_RSP_OVERLAPPED_OIPTTA:
4775 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4776 		    "%s: TMF returned OVERLAPPED INITIATOR PORT TRANSFER TAG "
4777 		    "ATTEMPTED", __func__);
4778 		result = EIO;
4779 		break;
4780 	default:
4781 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4782 		    "%s: TMF returned unknown code 0x%x", __func__, status);
4783 		result = EIO;
4784 		break;
4785 	}
4786 	return (result);
4787 }
4788 
4789 /*
4790  * Called with PHY lock held and scratch acquired
4791  */
4792 int
4793 pmcs_sata_abort_ncq(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
4794 {
4795 	const char *utag_fail_fmt = "%s: untagged NCQ command failure";
4796 	const char *tag_fail_fmt = "%s: NCQ command failure (tag 0x%x)";
4797 	uint32_t msg[PMCS_QENTRY_SIZE], *ptr, result, status;
4798 	uint8_t *fp = pwp->scratch, ds;
4799 	fis_t fis;
4800 	pmcwork_t *pwrk;
4801 	pmcs_xscsi_t *tgt;
4802 
4803 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
4804 	if (pwrk == NULL) {
4805 		return (ENOMEM);
4806 	}
4807 	msg[0] = LE_32(PMCS_IOMB_IN_SAS(PMCS_OQ_IODONE,
4808 	    PMCIN_SATA_HOST_IO_START));
4809 	msg[1] = LE_32(pwrk->htag);
4810 	msg[2] = LE_32(pptr->device_id);
4811 	msg[3] = LE_32(512);
4812 	msg[4] = LE_32(SATA_PROTOCOL_PIO | PMCIN_DATADIR_2_INI);
4813 	msg[5] = LE_32((READ_LOG_EXT << 16) | (C_BIT << 8) | FIS_REG_H2DEV);
4814 	msg[6] = LE_32(0x10);
4815 	msg[8] = LE_32(1);
4816 	msg[9] = 0;
4817 	msg[10] = 0;
4818 	msg[11] = 0;
4819 	msg[12] = LE_32(DWORD0(pwp->scratch_dma));
4820 	msg[13] = LE_32(DWORD1(pwp->scratch_dma));
4821 	msg[14] = LE_32(512);
4822 	msg[15] = 0;
4823 
4824 	pwrk->arg = msg;
4825 	pwrk->dtype = pptr->dtype;
4826 
4827 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4828 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4829 	if (ptr == NULL) {
4830 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4831 		pmcs_pwork(pwp, pwrk);
4832 		return (ENOMEM);
4833 	}
4834 	COPY_MESSAGE(ptr, msg, PMCS_QENTRY_SIZE);
4835 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4836 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4837 
4838 	pmcs_unlock_phy(pptr);
4839 	WAIT_FOR(pwrk, 250, result);
4840 	pmcs_lock_phy(pptr);
4841 	pmcs_pwork(pwp, pwrk);
4842 
4843 	if (result) {
4844 		pmcs_prt(pwp, PMCS_PRT_INFO, pmcs_timeo, __func__);
4845 		return (EIO);
4846 	}
4847 	status = LE_32(msg[2]);
4848 	if (status != PMCOUT_STATUS_OK || LE_32(msg[3])) {
4849 		tgt = pptr->target;
4850 		if (tgt == NULL) {
4851 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
4852 			    "%s: cannot find target for phy 0x%p for "
4853 			    "dev state recovery", __func__, (void *)pptr);
4854 			return (EIO);
4855 		}
4856 
4857 		mutex_enter(&tgt->statlock);
4858 
4859 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG, "READ LOG EXT", msg);
4860 		if ((status == PMCOUT_STATUS_IO_DS_NON_OPERATIONAL) ||
4861 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK) ||
4862 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS)) {
4863 			ds = PMCS_DEVICE_STATE_NON_OPERATIONAL;
4864 		} else {
4865 			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
4866 		}
4867 		if (tgt->dev_state != ds) {
4868 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Trying SATA DS Error"
4869 			    " Recovery for tgt(0x%p) for status(%s)",
4870 			    __func__, (void *)tgt, pmcs_status_str(status));
4871 			(void) pmcs_send_err_recovery_cmd(pwp, ds, tgt);
4872 		}
4873 
4874 		mutex_exit(&tgt->statlock);
4875 		return (EIO);
4876 	}
4877 	fis[0] = (fp[4] << 24) | (fp[3] << 16) | (fp[2] << 8) | FIS_REG_D2H;
4878 	fis[1] = (fp[8] << 24) | (fp[7] << 16) | (fp[6] << 8) | fp[5];
4879 	fis[2] = (fp[12] << 24) | (fp[11] << 16) | (fp[10] << 8) | fp[9];
4880 	fis[3] = (fp[16] << 24) | (fp[15] << 16) | (fp[14] << 8) | fp[13];
4881 	fis[4] = 0;
4882 	if (fp[0] & 0x80) {
4883 		pmcs_prt(pwp, PMCS_PRT_DEBUG, utag_fail_fmt, __func__);
4884 	} else {
4885 		pmcs_prt(pwp, PMCS_PRT_DEBUG, tag_fail_fmt, __func__,
4886 		    fp[0] & 0x1f);
4887 	}
4888 	pmcs_fis_dump(pwp, fis);
4889 	pptr->need_rl_ext = 0;
4890 	return (0);
4891 }
4892 
4893 /*
4894  * Transform a structure from CPU to Device endian format, or
4895  * vice versa, based upon a transformation vector.
4896  *
4897  * A transformation vector is an array of bytes, each byte
4898  * of which is defined thusly:
4899  *
4900  *  bit 7: from CPU to desired endian, otherwise from desired endian
4901  *	   to CPU format
4902  *  bit 6: Big Endian, else Little Endian
4903  *  bits 5-4:
4904  *       00 Undefined
4905  *       01 One Byte quantities
4906  *       02 Two Byte quantities
4907  *       03 Four Byte quantities
4908  *
4909  *  bits 3-0:
4910  *       00 Undefined
4911  *       Number of quantities to transform
4912  *
4913  * The vector is terminated by a 0 value.
4914  */
4915 
4916 void
4917 pmcs_endian_transform(pmcs_hw_t *pwp, void *orig_out, void *orig_in,
4918     const uint8_t *xfvec)
4919 {
4920 	uint8_t c, *out = orig_out, *in = orig_in;
4921 
4922 	if (xfvec == NULL) {
4923 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: null xfvec", __func__);
4924 		return;
4925 	}
4926 	if (out == NULL) {
4927 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: null out", __func__);
4928 		return;
4929 	}
4930 	if (in == NULL) {
4931 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: null in", __func__);
4932 		return;
4933 	}
4934 	while ((c = *xfvec++) != 0) {
4935 		int nbyt = (c & 0xf);
4936 		int size = (c >> 4) & 0x3;
4937 		int bige = (c >> 4) & 0x4;
4938 
4939 		switch (size) {
4940 		case 1:
4941 		{
4942 			while (nbyt-- > 0) {
4943 				*out++ = *in++;
4944 			}
4945 			break;
4946 		}
4947 		case 2:
4948 		{
4949 			uint16_t tmp;
4950 			while (nbyt-- > 0) {
4951 				(void) memcpy(&tmp, in, sizeof (uint16_t));
4952 				if (bige) {
4953 					tmp = BE_16(tmp);
4954 				} else {
4955 					tmp = LE_16(tmp);
4956 				}
4957 				(void) memcpy(out, &tmp, sizeof (uint16_t));
4958 				out += sizeof (uint16_t);
4959 				in += sizeof (uint16_t);
4960 			}
4961 			break;
4962 		}
4963 		case 3:
4964 		{
4965 			uint32_t tmp;
4966 			while (nbyt-- > 0) {
4967 				(void) memcpy(&tmp, in, sizeof (uint32_t));
4968 				if (bige) {
4969 					tmp = BE_32(tmp);
4970 				} else {
4971 					tmp = LE_32(tmp);
4972 				}
4973 				(void) memcpy(out, &tmp, sizeof (uint32_t));
4974 				out += sizeof (uint32_t);
4975 				in += sizeof (uint32_t);
4976 			}
4977 			break;
4978 		}
4979 		default:
4980 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad size", __func__);
4981 			return;
4982 		}
4983 	}
4984 }
4985 
4986 const char *
4987 pmcs_get_rate(unsigned int linkrt)
4988 {
4989 	const char *rate;
4990 	switch (linkrt) {
4991 	case SAS_LINK_RATE_1_5GBIT:
4992 		rate = "1.5";
4993 		break;
4994 	case SAS_LINK_RATE_3GBIT:
4995 		rate = "3.0";
4996 		break;
4997 	case SAS_LINK_RATE_6GBIT:
4998 		rate = "6.0";
4999 		break;
5000 	default:
5001 		rate = "???";
5002 		break;
5003 	}
5004 	return (rate);
5005 }
5006 
5007 const char *
5008 pmcs_get_typename(pmcs_dtype_t type)
5009 {
5010 	switch (type) {
5011 	case NOTHING:
5012 		return ("NIL");
5013 	case SATA:
5014 		return ("SATA");
5015 	case SAS:
5016 		return ("SSP");
5017 	case EXPANDER:
5018 		return ("EXPANDER");
5019 	}
5020 	return ("????");
5021 }
5022 
5023 const char *
5024 pmcs_tmf2str(int tmf)
5025 {
5026 	switch (tmf) {
5027 	case SAS_ABORT_TASK:
5028 		return ("Abort Task");
5029 	case SAS_ABORT_TASK_SET:
5030 		return ("Abort Task Set");
5031 	case SAS_CLEAR_TASK_SET:
5032 		return ("Clear Task Set");
5033 	case SAS_LOGICAL_UNIT_RESET:
5034 		return ("Logical Unit Reset");
5035 	case SAS_I_T_NEXUS_RESET:
5036 		return ("I_T Nexus Reset");
5037 	case SAS_CLEAR_ACA:
5038 		return ("Clear ACA");
5039 	case SAS_QUERY_TASK:
5040 		return ("Query Task");
5041 	case SAS_QUERY_TASK_SET:
5042 		return ("Query Task Set");
5043 	case SAS_QUERY_UNIT_ATTENTION:
5044 		return ("Query Unit Attention");
5045 	default:
5046 		return ("Unknown");
5047 	}
5048 }
5049 
5050 const char *
5051 pmcs_status_str(uint32_t status)
5052 {
5053 	switch (status) {
5054 	case PMCOUT_STATUS_OK:
5055 		return ("OK");
5056 	case PMCOUT_STATUS_ABORTED:
5057 		return ("ABORTED");
5058 	case PMCOUT_STATUS_OVERFLOW:
5059 		return ("OVERFLOW");
5060 	case PMCOUT_STATUS_UNDERFLOW:
5061 		return ("UNDERFLOW");
5062 	case PMCOUT_STATUS_FAILED:
5063 		return ("FAILED");
5064 	case PMCOUT_STATUS_ABORT_RESET:
5065 		return ("ABORT_RESET");
5066 	case PMCOUT_STATUS_IO_NOT_VALID:
5067 		return ("IO_NOT_VALID");
5068 	case PMCOUT_STATUS_NO_DEVICE:
5069 		return ("NO_DEVICE");
5070 	case PMCOUT_STATUS_ILLEGAL_PARAMETER:
5071 		return ("ILLEGAL_PARAMETER");
5072 	case PMCOUT_STATUS_LINK_FAILURE:
5073 		return ("LINK_FAILURE");
5074 	case PMCOUT_STATUS_PROG_ERROR:
5075 		return ("PROG_ERROR");
5076 	case PMCOUT_STATUS_EDC_IN_ERROR:
5077 		return ("EDC_IN_ERROR");
5078 	case PMCOUT_STATUS_EDC_OUT_ERROR:
5079 		return ("EDC_OUT_ERROR");
5080 	case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
5081 		return ("ERROR_HW_TIMEOUT");
5082 	case PMCOUT_STATUS_XFER_ERR_BREAK:
5083 		return ("XFER_ERR_BREAK");
5084 	case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
5085 		return ("XFER_ERR_PHY_NOT_READY");
5086 	case PMCOUT_STATUS_OPEN_CNX_PROTOCOL_NOT_SUPPORTED:
5087 		return ("OPEN_CNX_PROTOCOL_NOT_SUPPORTED");
5088 	case PMCOUT_STATUS_OPEN_CNX_ERROR_ZONE_VIOLATION:
5089 		return ("OPEN_CNX_ERROR_ZONE_VIOLATION");
5090 	case PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK:
5091 		return ("OPEN_CNX_ERROR_BREAK");
5092 	case PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS:
5093 		return ("OPEN_CNX_ERROR_IT_NEXUS_LOSS");
5094 	case PMCOUT_STATUS_OPENCNX_ERROR_BAD_DESTINATION:
5095 		return ("OPENCNX_ERROR_BAD_DESTINATION");
5096 	case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
5097 		return ("OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED");
5098 	case PMCOUT_STATUS_OPEN_CNX_ERROR_STP_RESOURCES_BUSY:
5099 		return ("OPEN_CNX_ERROR_STP_RESOURCES_BUSY");
5100 	case PMCOUT_STATUS_OPEN_CNX_ERROR_WRONG_DESTINATION:
5101 		return ("OPEN_CNX_ERROR_WRONG_DESTINATION");
5102 	case PMCOUT_STATUS_OPEN_CNX_ERROR_UNKNOWN_EROOR:
5103 		return ("OPEN_CNX_ERROR_UNKNOWN_EROOR");
5104 	case PMCOUT_STATUS_IO_XFER_ERROR_NAK_RECEIVED:
5105 		return ("IO_XFER_ERROR_NAK_RECEIVED");
5106 	case PMCOUT_STATUS_XFER_ERROR_ACK_NAK_TIMEOUT:
5107 		return ("XFER_ERROR_ACK_NAK_TIMEOUT");
5108 	case PMCOUT_STATUS_XFER_ERROR_PEER_ABORTED:
5109 		return ("XFER_ERROR_PEER_ABORTED");
5110 	case PMCOUT_STATUS_XFER_ERROR_RX_FRAME:
5111 		return ("XFER_ERROR_RX_FRAME");
5112 	case PMCOUT_STATUS_IO_XFER_ERROR_DMA:
5113 		return ("IO_XFER_ERROR_DMA");
5114 	case PMCOUT_STATUS_XFER_ERROR_CREDIT_TIMEOUT:
5115 		return ("XFER_ERROR_CREDIT_TIMEOUT");
5116 	case PMCOUT_STATUS_XFER_ERROR_SATA_LINK_TIMEOUT:
5117 		return ("XFER_ERROR_SATA_LINK_TIMEOUT");
5118 	case PMCOUT_STATUS_XFER_ERROR_SATA:
5119 		return ("XFER_ERROR_SATA");
5120 	case PMCOUT_STATUS_XFER_ERROR_REJECTED_NCQ_MODE:
5121 		return ("XFER_ERROR_REJECTED_NCQ_MODE");
5122 	case PMCOUT_STATUS_XFER_ERROR_ABORTED_DUE_TO_SRST:
5123 		return ("XFER_ERROR_ABORTED_DUE_TO_SRST");
5124 	case PMCOUT_STATUS_XFER_ERROR_ABORTED_NCQ_MODE:
5125 		return ("XFER_ERROR_ABORTED_NCQ_MODE");
5126 	case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
5127 		return ("IO_XFER_OPEN_RETRY_TIMEOUT");
5128 	case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
5129 		return ("SMP_RESP_CONNECTION_ERROR");
5130 	case PMCOUT_STATUS_XFER_ERROR_UNEXPECTED_PHASE:
5131 		return ("XFER_ERROR_UNEXPECTED_PHASE");
5132 	case PMCOUT_STATUS_XFER_ERROR_RDY_OVERRUN:
5133 		return ("XFER_ERROR_RDY_OVERRUN");
5134 	case PMCOUT_STATUS_XFER_ERROR_RDY_NOT_EXPECTED:
5135 		return ("XFER_ERROR_RDY_NOT_EXPECTED");
5136 	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT:
5137 		return ("XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT");
5138 	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_BREAK_BEFORE_ACK_NACK:
5139 		return ("XFER_ERROR_CMD_ISSUE_BREAK_BEFORE_ACK_NACK");
5140 	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_PHY_DOWN_BEFORE_ACK_NAK:
5141 		return ("XFER_ERROR_CMD_ISSUE_PHY_DOWN_BEFORE_ACK_NAK");
5142 	case PMCOUT_STATUS_XFER_ERROR_OFFSET_MISMATCH:
5143 		return ("XFER_ERROR_OFFSET_MISMATCH");
5144 	case PMCOUT_STATUS_XFER_ERROR_ZERO_DATA_LEN:
5145 		return ("XFER_ERROR_ZERO_DATA_LEN");
5146 	case PMCOUT_STATUS_XFER_CMD_FRAME_ISSUED:
5147 		return ("XFER_CMD_FRAME_ISSUED");
5148 	case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
5149 		return ("ERROR_INTERNAL_SMP_RESOURCE");
5150 	case PMCOUT_STATUS_IO_PORT_IN_RESET:
5151 		return ("IO_PORT_IN_RESET");
5152 	case PMCOUT_STATUS_IO_DS_NON_OPERATIONAL:
5153 		return ("DEVICE STATE NON-OPERATIONAL");
5154 	case PMCOUT_STATUS_IO_DS_IN_RECOVERY:
5155 		return ("DEVICE STATE IN RECOVERY");
5156 	default:
5157 		return (NULL);
5158 	}
5159 }
5160 
5161 uint64_t
5162 pmcs_barray2wwn(uint8_t ba[8])
5163 {
5164 	uint64_t result = 0;
5165 	int i;
5166 
5167 	for (i = 0; i < 8; i++) {
5168 		result <<= 8;
5169 		result |= ba[i];
5170 	}
5171 	return (result);
5172 }
5173 
5174 void
5175 pmcs_wwn2barray(uint64_t wwn, uint8_t ba[8])
5176 {
5177 	int i;
5178 	for (i = 0; i < 8; i++) {
5179 		ba[7 - i] = wwn & 0xff;
5180 		wwn >>= 8;
5181 	}
5182 }
5183 
5184 void
5185 pmcs_report_fwversion(pmcs_hw_t *pwp)
5186 {
5187 	const char *fwsupport;
5188 	switch (PMCS_FW_TYPE(pwp)) {
5189 	case PMCS_FW_TYPE_RELEASED:
5190 		fwsupport = "Released";
5191 		break;
5192 	case PMCS_FW_TYPE_DEVELOPMENT:
5193 		fwsupport = "Development";
5194 		break;
5195 	case PMCS_FW_TYPE_ALPHA:
5196 		fwsupport = "Alpha";
5197 		break;
5198 	case PMCS_FW_TYPE_BETA:
5199 		fwsupport = "Beta";
5200 		break;
5201 	default:
5202 		fwsupport = "Special";
5203 		break;
5204 	}
5205 	pmcs_prt(pwp, PMCS_PRT_INFO,
5206 	    "Chip Revision: %c; F/W Revision %x.%x.%x %s", 'A' + pwp->chiprev,
5207 	    PMCS_FW_MAJOR(pwp), PMCS_FW_MINOR(pwp), PMCS_FW_MICRO(pwp),
5208 	    fwsupport);
5209 }
5210 
5211 void
5212 pmcs_phy_name(pmcs_hw_t *pwp, pmcs_phy_t *pptr, char *obuf, size_t olen)
5213 {
5214 	if (pptr->parent) {
5215 		pmcs_phy_name(pwp, pptr->parent, obuf, olen);
5216 		(void) snprintf(obuf, olen, "%s.%02x", obuf, pptr->phynum);
5217 	} else {
5218 		(void) snprintf(obuf, olen, "pp%02x", pptr->phynum);
5219 	}
5220 }
5221 
5222 /*
5223  * Implementation for pmcs_find_phy_by_devid.
5224  * If the PHY is found, it is returned locked.
5225  */
5226 static pmcs_phy_t *
5227 pmcs_find_phy_by_devid_impl(pmcs_phy_t *phyp, uint32_t device_id)
5228 {
5229 	pmcs_phy_t *match, *cphyp, *nphyp;
5230 
5231 	ASSERT(!mutex_owned(&phyp->phy_lock));
5232 
5233 	while (phyp) {
5234 		pmcs_lock_phy(phyp);
5235 
5236 		if ((phyp->valid_device_id) && (phyp->device_id == device_id)) {
5237 			return (phyp);
5238 		}
5239 		if (phyp->children) {
5240 			cphyp = phyp->children;
5241 			pmcs_unlock_phy(phyp);
5242 			match = pmcs_find_phy_by_devid_impl(cphyp, device_id);
5243 			if (match) {
5244 				ASSERT(mutex_owned(&match->phy_lock));
5245 				return (match);
5246 			}
5247 			pmcs_lock_phy(phyp);
5248 		}
5249 
5250 		if (IS_ROOT_PHY(phyp)) {
5251 			pmcs_unlock_phy(phyp);
5252 			phyp = NULL;
5253 		} else {
5254 			nphyp = phyp->sibling;
5255 			pmcs_unlock_phy(phyp);
5256 			phyp = nphyp;
5257 		}
5258 	}
5259 
5260 	return (NULL);
5261 }
5262 
5263 /*
5264  * If the PHY is found, it is returned locked
5265  */
5266 pmcs_phy_t *
5267 pmcs_find_phy_by_devid(pmcs_hw_t *pwp, uint32_t device_id)
5268 {
5269 	pmcs_phy_t *phyp, *match = NULL;
5270 
5271 	phyp = pwp->root_phys;
5272 
5273 	while (phyp) {
5274 		match = pmcs_find_phy_by_devid_impl(phyp, device_id);
5275 		if (match) {
5276 			ASSERT(mutex_owned(&match->phy_lock));
5277 			return (match);
5278 		}
5279 		phyp = phyp->sibling;
5280 	}
5281 
5282 	return (NULL);
5283 }
5284 
5285 /*
5286  * This function is called as a sanity check to ensure that a newly registered
5287  * PHY doesn't have a device_id that exists with another registered PHY.
5288  */
5289 static boolean_t
5290 pmcs_validate_devid(pmcs_phy_t *parent, pmcs_phy_t *phyp, uint32_t device_id)
5291 {
5292 	pmcs_phy_t *pptr;
5293 	boolean_t rval;
5294 
5295 	pptr = parent;
5296 
5297 	while (pptr) {
5298 		if (pptr->valid_device_id && (pptr != phyp) &&
5299 		    (pptr->device_id == device_id)) {
5300 			pmcs_prt(pptr->pwp, PMCS_PRT_DEBUG,
5301 			    "%s: phy %s already exists as %s with "
5302 			    "device id 0x%x", __func__, phyp->path,
5303 			    pptr->path, device_id);
5304 			return (B_FALSE);
5305 		}
5306 
5307 		if (pptr->children) {
5308 			rval = pmcs_validate_devid(pptr->children, phyp,
5309 			    device_id);
5310 			if (rval == B_FALSE) {
5311 				return (rval);
5312 			}
5313 		}
5314 
5315 		pptr = pptr->sibling;
5316 	}
5317 
5318 	/* This PHY and device_id are valid */
5319 	return (B_TRUE);
5320 }
5321 
5322 /*
5323  * If the PHY is found, it is returned locked
5324  */
5325 static pmcs_phy_t *
5326 pmcs_find_phy_by_wwn_impl(pmcs_phy_t *phyp, uint8_t *wwn)
5327 {
5328 	pmcs_phy_t *matched_phy, *cphyp, *nphyp;
5329 
5330 	ASSERT(!mutex_owned(&phyp->phy_lock));
5331 
5332 	while (phyp) {
5333 		pmcs_lock_phy(phyp);
5334 
5335 		if (phyp->valid_device_id) {
5336 			if (memcmp(phyp->sas_address, wwn, 8) == 0) {
5337 				return (phyp);
5338 			}
5339 		}
5340 
5341 		if (phyp->children) {
5342 			cphyp = phyp->children;
5343 			pmcs_unlock_phy(phyp);
5344 			matched_phy = pmcs_find_phy_by_wwn_impl(cphyp, wwn);
5345 			if (matched_phy) {
5346 				ASSERT(mutex_owned(&matched_phy->phy_lock));
5347 				return (matched_phy);
5348 			}
5349 			pmcs_lock_phy(phyp);
5350 		}
5351 
5352 		/*
5353 		 * Only iterate through non-root PHYs
5354 		 */
5355 		if (IS_ROOT_PHY(phyp)) {
5356 			pmcs_unlock_phy(phyp);
5357 			phyp = NULL;
5358 		} else {
5359 			nphyp = phyp->sibling;
5360 			pmcs_unlock_phy(phyp);
5361 			phyp = nphyp;
5362 		}
5363 	}
5364 
5365 	return (NULL);
5366 }
5367 
5368 pmcs_phy_t *
5369 pmcs_find_phy_by_wwn(pmcs_hw_t *pwp, uint64_t wwn)
5370 {
5371 	uint8_t ebstr[8];
5372 	pmcs_phy_t *pptr, *matched_phy;
5373 
5374 	pmcs_wwn2barray(wwn, ebstr);
5375 
5376 	pptr = pwp->root_phys;
5377 	while (pptr) {
5378 		matched_phy = pmcs_find_phy_by_wwn_impl(pptr, ebstr);
5379 		if (matched_phy) {
5380 			ASSERT(mutex_owned(&matched_phy->phy_lock));
5381 			return (matched_phy);
5382 		}
5383 
5384 		pptr = pptr->sibling;
5385 	}
5386 
5387 	return (NULL);
5388 }
5389 
5390 
5391 /*
5392  * pmcs_find_phy_by_sas_address
5393  *
5394  * Find a PHY that both matches "sas_addr" and is on "iport".
5395  * If a matching PHY is found, it is returned locked.
5396  */
5397 pmcs_phy_t *
5398 pmcs_find_phy_by_sas_address(pmcs_hw_t *pwp, pmcs_iport_t *iport,
5399     pmcs_phy_t *root, char *sas_addr)
5400 {
5401 	int ua_form = 1;
5402 	uint64_t wwn;
5403 	char addr[PMCS_MAX_UA_SIZE];
5404 	pmcs_phy_t *pptr, *pnext, *pchild;
5405 
5406 	if (root == NULL) {
5407 		pptr = pwp->root_phys;
5408 	} else {
5409 		pptr = root;
5410 	}
5411 
5412 	while (pptr) {
5413 		pmcs_lock_phy(pptr);
5414 		/*
5415 		 * If the PHY is dead or does not have a valid device ID,
5416 		 * skip it.
5417 		 */
5418 		if ((pptr->dead) || (!pptr->valid_device_id)) {
5419 			goto next_phy;
5420 		}
5421 
5422 		if (pptr->iport != iport) {
5423 			goto next_phy;
5424 		}
5425 
5426 		wwn = pmcs_barray2wwn(pptr->sas_address);
5427 		(void *) scsi_wwn_to_wwnstr(wwn, ua_form, addr);
5428 		if (strncmp(addr, sas_addr, strlen(addr)) == 0) {
5429 			return (pptr);
5430 		}
5431 
5432 		if (pptr->children) {
5433 			pchild = pptr->children;
5434 			pmcs_unlock_phy(pptr);
5435 			pnext = pmcs_find_phy_by_sas_address(pwp, iport, pchild,
5436 			    sas_addr);
5437 			if (pnext) {
5438 				return (pnext);
5439 			}
5440 			pmcs_lock_phy(pptr);
5441 		}
5442 
5443 next_phy:
5444 		pnext = pptr->sibling;
5445 		pmcs_unlock_phy(pptr);
5446 		pptr = pnext;
5447 	}
5448 
5449 	return (NULL);
5450 }
5451 
5452 void
5453 pmcs_fis_dump(pmcs_hw_t *pwp, fis_t fis)
5454 {
5455 	switch (fis[0] & 0xff) {
5456 	case FIS_REG_H2DEV:
5457 		pmcs_prt(pwp, PMCS_PRT_INFO, "FIS REGISTER HOST TO DEVICE: "
5458 		    "OP=0x%02x Feature=0x%04x Count=0x%04x Device=0x%02x "
5459 		    "LBA=%llu", BYTE2(fis[0]), BYTE3(fis[2]) << 8 |
5460 		    BYTE3(fis[0]), WORD0(fis[3]), BYTE3(fis[1]),
5461 		    (unsigned long long)
5462 		    (((uint64_t)fis[2] & 0x00ffffff) << 24 |
5463 		    ((uint64_t)fis[1] & 0x00ffffff)));
5464 		break;
5465 	case FIS_REG_D2H:
5466 		pmcs_prt(pwp, PMCS_PRT_INFO, "FIS REGISTER DEVICE TO HOST: Stat"
5467 		    "us=0x%02x Error=0x%02x Dev=0x%02x Count=0x%04x LBA=%llu",
5468 		    BYTE2(fis[0]), BYTE3(fis[0]), BYTE3(fis[1]), WORD0(fis[3]),
5469 		    (unsigned long long)(((uint64_t)fis[2] & 0x00ffffff) << 24 |
5470 		    ((uint64_t)fis[1] & 0x00ffffff)));
5471 		break;
5472 	default:
5473 		pmcs_prt(pwp, PMCS_PRT_INFO, "FIS: 0x%08x 0x%08x 0x%08x 0x%08x "
5474 		    "0x%08x 0x%08x 0x%08x",
5475 		    fis[0], fis[1], fis[2], fis[3], fis[4], fis[5], fis[6]);
5476 		break;
5477 	}
5478 }
5479 
5480 void
5481 pmcs_print_entry(pmcs_hw_t *pwp, int level, char *msg, void *arg)
5482 {
5483 	uint32_t *mb = arg;
5484 	size_t i;
5485 
5486 	pmcs_prt(pwp, level, msg);
5487 	for (i = 0; i < (PMCS_QENTRY_SIZE / sizeof (uint32_t)); i += 4) {
5488 		pmcs_prt(pwp, level, "Offset %2lu: 0x%08x 0x%08x 0x%08"
5489 		    "x 0x%08x", i * sizeof (uint32_t), LE_32(mb[i]),
5490 		    LE_32(mb[i+1]), LE_32(mb[i+2]),
5491 		    LE_32(mb[i+3]));
5492 	}
5493 }
5494 
5495 /*
5496  * If phyp == NULL we're being called from the worker thread, in which
5497  * case we need to check all the PHYs.  In this case, the softstate lock
5498  * will be held.
5499  * If phyp is non-NULL, just issue the spinup release for the specified PHY
5500  * (which will already be locked).
5501  */
5502 void
5503 pmcs_spinup_release(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
5504 {
5505 	uint32_t *msg;
5506 	struct pmcwork *pwrk;
5507 	pmcs_phy_t *tphyp;
5508 
5509 	if (phyp != NULL) {
5510 		ASSERT(mutex_owned(&phyp->phy_lock));
5511 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
5512 		    "%s: Issuing spinup release only for PHY %s", __func__,
5513 		    phyp->path);
5514 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5515 		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5516 		if (msg == NULL || (pwrk =
5517 		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
5518 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5519 			SCHEDULE_WORK(pwp, PMCS_WORK_SPINUP_RELEASE);
5520 			return;
5521 		}
5522 
5523 		phyp->spinup_hold = 0;
5524 		bzero(msg, PMCS_QENTRY_SIZE);
5525 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5526 		    PMCIN_LOCAL_PHY_CONTROL));
5527 		msg[1] = LE_32(pwrk->htag);
5528 		msg[2] = LE_32((0x10 << 8) | phyp->phynum);
5529 
5530 		pwrk->dtype = phyp->dtype;
5531 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
5532 		mutex_exit(&pwrk->lock);
5533 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5534 		return;
5535 	}
5536 
5537 	ASSERT(mutex_owned(&pwp->lock));
5538 
5539 	tphyp = pwp->root_phys;
5540 	while (tphyp) {
5541 		pmcs_lock_phy(tphyp);
5542 		if (tphyp->spinup_hold == 0) {
5543 			pmcs_unlock_phy(tphyp);
5544 			tphyp = tphyp->sibling;
5545 			continue;
5546 		}
5547 
5548 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
5549 		    "%s: Issuing spinup release for PHY %s", __func__,
5550 		    phyp->path);
5551 
5552 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5553 		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5554 		if (msg == NULL || (pwrk =
5555 		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
5556 			pmcs_unlock_phy(tphyp);
5557 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5558 			SCHEDULE_WORK(pwp, PMCS_WORK_SPINUP_RELEASE);
5559 			break;
5560 		}
5561 
5562 		tphyp->spinup_hold = 0;
5563 		bzero(msg, PMCS_QENTRY_SIZE);
5564 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5565 		    PMCIN_LOCAL_PHY_CONTROL));
5566 		msg[1] = LE_32(pwrk->htag);
5567 		msg[2] = LE_32((0x10 << 8) | tphyp->phynum);
5568 
5569 		pwrk->dtype = phyp->dtype;
5570 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
5571 		mutex_exit(&pwrk->lock);
5572 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5573 		pmcs_unlock_phy(tphyp);
5574 
5575 		tphyp = tphyp->sibling;
5576 	}
5577 }
5578 
5579 /*
5580  * Abort commands on dead PHYs and deregister them as well as removing
5581  * the associated targets.
5582  */
5583 static int
5584 pmcs_kill_devices(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
5585 {
5586 	pmcs_phy_t *pnext, *pchild;
5587 	boolean_t remove_device;
5588 	int rval = 0;
5589 
5590 	while (phyp) {
5591 		pmcs_lock_phy(phyp);
5592 		pchild = phyp->children;
5593 		pnext = phyp->sibling;
5594 		pmcs_unlock_phy(phyp);
5595 
5596 		if (pchild) {
5597 			rval = pmcs_kill_devices(pwp, pchild);
5598 			if (rval) {
5599 				return (rval);
5600 			}
5601 		}
5602 
5603 		/*
5604 		 * pmcs_remove_device requires the softstate lock.
5605 		 */
5606 		mutex_enter(&pwp->lock);
5607 		pmcs_lock_phy(phyp);
5608 		if (phyp->dead && phyp->valid_device_id) {
5609 			remove_device = B_TRUE;
5610 		} else {
5611 			remove_device = B_FALSE;
5612 		}
5613 
5614 		if (remove_device) {
5615 			pmcs_remove_device(pwp, phyp);
5616 			mutex_exit(&pwp->lock);
5617 
5618 			rval = pmcs_kill_device(pwp, phyp);
5619 
5620 			if (rval) {
5621 				pmcs_unlock_phy(phyp);
5622 				return (rval);
5623 			}
5624 		} else {
5625 			mutex_exit(&pwp->lock);
5626 		}
5627 
5628 		pmcs_unlock_phy(phyp);
5629 		phyp = pnext;
5630 	}
5631 
5632 	return (rval);
5633 }
5634 
5635 /*
5636  * Called with PHY locked
5637  */
5638 int
5639 pmcs_kill_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
5640 {
5641 	int r, result;
5642 	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
5643 	struct pmcwork *pwrk;
5644 	pmcs_xscsi_t *tgt;
5645 
5646 	pmcs_prt(pwp, PMCS_PRT_DEBUG, "kill %s device @ %s",
5647 	    pmcs_get_typename(pptr->dtype), pptr->path);
5648 
5649 	/*
5650 	 * There may be an outstanding ABORT_ALL running, which we wouldn't
5651 	 * know just by checking abort_pending.  We can, however, check
5652 	 * abort_all_start.  If it's non-zero, there is one, and we'll just
5653 	 * sit here and wait for it to complete.  If we don't, we'll remove
5654 	 * the device while there are still commands pending.
5655 	 */
5656 	if (pptr->abort_all_start) {
5657 		while (pptr->abort_all_start) {
5658 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
5659 			    "%s: Waiting for outstanding ABORT_ALL on PHY 0x%p",
5660 			    __func__, (void *)pptr);
5661 			cv_wait(&pptr->abort_all_cv, &pptr->phy_lock);
5662 		}
5663 	} else if (pptr->abort_pending) {
5664 		r = pmcs_abort(pwp, pptr, pptr->device_id, 1, 1);
5665 
5666 		if (r) {
5667 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
5668 			    "%s: ABORT_ALL returned non-zero status (%d) for "
5669 			    "PHY 0x%p", __func__, r, (void *)pptr);
5670 			return (r);
5671 		}
5672 		pptr->abort_pending = 0;
5673 	}
5674 
5675 	/*
5676 	 * Now that everything is aborted from the chip's perspective (or even
5677 	 * if it is not), flush out the wait queue.  We won't flush the active
5678 	 * queue since it is possible that abort completions may follow after
5679 	 * the notification that the abort all has completed.
5680 	 */
5681 	tgt = pptr->target;
5682 	if (tgt) {
5683 		mutex_enter(&tgt->statlock);
5684 		pmcs_flush_target_queues(pwp, tgt, PMCS_TGT_WAIT_QUEUE);
5685 		mutex_exit(&tgt->statlock);
5686 	}
5687 
5688 	if (pptr->valid_device_id == 0) {
5689 		return (0);
5690 	}
5691 
5692 	if ((pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr)) == NULL) {
5693 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
5694 		return (ENOMEM);
5695 	}
5696 	pwrk->arg = msg;
5697 	pwrk->dtype = pptr->dtype;
5698 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5699 	    PMCIN_DEREGISTER_DEVICE_HANDLE));
5700 	msg[1] = LE_32(pwrk->htag);
5701 	msg[2] = LE_32(pptr->device_id);
5702 
5703 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5704 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5705 	if (ptr == NULL) {
5706 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5707 		mutex_exit(&pwrk->lock);
5708 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
5709 		return (ENOMEM);
5710 	}
5711 
5712 	COPY_MESSAGE(ptr, msg, 3);
5713 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
5714 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5715 
5716 	pmcs_unlock_phy(pptr);
5717 	WAIT_FOR(pwrk, 250, result);
5718 	pmcs_lock_phy(pptr);
5719 	pmcs_pwork(pwp, pwrk);
5720 
5721 	if (result) {
5722 		return (ETIMEDOUT);
5723 	}
5724 	status = LE_32(msg[2]);
5725 	if (status != PMCOUT_STATUS_OK) {
5726 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
5727 		    "%s: status 0x%x when trying to deregister device %s",
5728 		    __func__, status, pptr->path);
5729 	}
5730 
5731 	pptr->device_id = PMCS_INVALID_DEVICE_ID;
5732 	PHY_CHANGED(pwp, pptr);
5733 	RESTART_DISCOVERY(pwp);
5734 	pptr->valid_device_id = 0;
5735 	return (0);
5736 }
5737 
5738 /*
5739  * Acknowledge the SAS h/w events that need acknowledgement.
5740  * This is only needed for first level PHYs.
5741  */
5742 void
5743 pmcs_ack_events(pmcs_hw_t *pwp)
5744 {
5745 	uint32_t msg[PMCS_MSG_SIZE], *ptr;
5746 	struct pmcwork *pwrk;
5747 	pmcs_phy_t *pptr;
5748 
5749 	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
5750 		pmcs_lock_phy(pptr);
5751 		if (pptr->hw_event_ack == 0) {
5752 			pmcs_unlock_phy(pptr);
5753 			continue;
5754 		}
5755 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5756 		ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5757 
5758 		if ((ptr == NULL) || (pwrk =
5759 		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
5760 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5761 			pmcs_unlock_phy(pptr);
5762 			SCHEDULE_WORK(pwp, PMCS_WORK_SAS_HW_ACK);
5763 			break;
5764 		}
5765 
5766 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5767 		    PMCIN_SAW_HW_EVENT_ACK));
5768 		msg[1] = LE_32(pwrk->htag);
5769 		msg[2] = LE_32(pptr->hw_event_ack);
5770 
5771 		mutex_exit(&pwrk->lock);
5772 		pwrk->dtype = pptr->dtype;
5773 		pptr->hw_event_ack = 0;
5774 		COPY_MESSAGE(ptr, msg, 3);
5775 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5776 		pmcs_unlock_phy(pptr);
5777 	}
5778 }
5779 
5780 /*
5781  * Load DMA
5782  */
5783 int
5784 pmcs_dma_load(pmcs_hw_t *pwp, pmcs_cmd_t *sp, uint32_t *msg)
5785 {
5786 	ddi_dma_cookie_t *sg;
5787 	pmcs_dmachunk_t *tc;
5788 	pmcs_dmasgl_t *sgl, *prior;
5789 	int seg, tsc;
5790 	uint64_t sgl_addr;
5791 
5792 	/*
5793 	 * If we have no data segments, we're done.
5794 	 */
5795 	if (CMD2PKT(sp)->pkt_numcookies == 0) {
5796 		return (0);
5797 	}
5798 
5799 	/*
5800 	 * Get the S/G list pointer.
5801 	 */
5802 	sg = CMD2PKT(sp)->pkt_cookies;
5803 
5804 	/*
5805 	 * If we only have one dma segment, we can directly address that
5806 	 * data within the Inbound message itself.
5807 	 */
5808 	if (CMD2PKT(sp)->pkt_numcookies == 1) {
5809 		msg[12] = LE_32(DWORD0(sg->dmac_laddress));
5810 		msg[13] = LE_32(DWORD1(sg->dmac_laddress));
5811 		msg[14] = LE_32(sg->dmac_size);
5812 		msg[15] = 0;
5813 		return (0);
5814 	}
5815 
5816 	/*
5817 	 * Otherwise, we'll need one or more external S/G list chunks.
5818 	 * Get the first one and its dma address into the Inbound message.
5819 	 */
5820 	mutex_enter(&pwp->dma_lock);
5821 	tc = pwp->dma_freelist;
5822 	if (tc == NULL) {
5823 		SCHEDULE_WORK(pwp, PMCS_WORK_ADD_DMA_CHUNKS);
5824 		mutex_exit(&pwp->dma_lock);
5825 		pmcs_prt(pwp, PMCS_PRT_DEBUG2, "%s: out of SG lists", __func__);
5826 		return (-1);
5827 	}
5828 	pwp->dma_freelist = tc->nxt;
5829 	mutex_exit(&pwp->dma_lock);
5830 
5831 	tc->nxt = NULL;
5832 	sp->cmd_clist = tc;
5833 	sgl = tc->chunks;
5834 	(void) memset(tc->chunks, 0, PMCS_SGL_CHUNKSZ);
5835 	sgl_addr = tc->addr;
5836 	msg[12] = LE_32(DWORD0(sgl_addr));
5837 	msg[13] = LE_32(DWORD1(sgl_addr));
5838 	msg[14] = 0;
5839 	msg[15] = LE_32(PMCS_DMASGL_EXTENSION);
5840 
5841 	prior = sgl;
5842 	tsc = 0;
5843 
5844 	for (seg = 0; seg < CMD2PKT(sp)->pkt_numcookies; seg++) {
5845 		/*
5846 		 * If the current segment count for this chunk is one less than
5847 		 * the number s/g lists per chunk and we have more than one seg
5848 		 * to go, we need another chunk. Get it, and make sure that the
5849 		 * tail end of the the previous chunk points the new chunk
5850 		 * (if remembering an offset can be called 'pointing to').
5851 		 *
5852 		 * Note that we can store the offset into our command area that
5853 		 * represents the new chunk in the length field of the part
5854 		 * that points the PMC chip at the next chunk- the PMC chip
5855 		 * ignores this field when the EXTENSION bit is set.
5856 		 *
5857 		 * This is required for dma unloads later.
5858 		 */
5859 		if (tsc == (PMCS_SGL_NCHUNKS - 1) &&
5860 		    seg < (CMD2PKT(sp)->pkt_numcookies - 1)) {
5861 			mutex_enter(&pwp->dma_lock);
5862 			tc = pwp->dma_freelist;
5863 			if (tc == NULL) {
5864 				SCHEDULE_WORK(pwp, PMCS_WORK_ADD_DMA_CHUNKS);
5865 				mutex_exit(&pwp->dma_lock);
5866 				pmcs_dma_unload(pwp, sp);
5867 				pmcs_prt(pwp, PMCS_PRT_DEBUG2,
5868 				    "%s: out of SG lists", __func__);
5869 				return (-1);
5870 			}
5871 			pwp->dma_freelist = tc->nxt;
5872 			tc->nxt = sp->cmd_clist;
5873 			mutex_exit(&pwp->dma_lock);
5874 
5875 			sp->cmd_clist = tc;
5876 			(void) memset(tc->chunks, 0, PMCS_SGL_CHUNKSZ);
5877 			sgl = tc->chunks;
5878 			sgl_addr = tc->addr;
5879 			prior[PMCS_SGL_NCHUNKS-1].sglal =
5880 			    LE_32(DWORD0(sgl_addr));
5881 			prior[PMCS_SGL_NCHUNKS-1].sglah =
5882 			    LE_32(DWORD1(sgl_addr));
5883 			prior[PMCS_SGL_NCHUNKS-1].sglen = 0;
5884 			prior[PMCS_SGL_NCHUNKS-1].flags =
5885 			    LE_32(PMCS_DMASGL_EXTENSION);
5886 			prior = sgl;
5887 			tsc = 0;
5888 		}
5889 		sgl[tsc].sglal = LE_32(DWORD0(sg->dmac_laddress));
5890 		sgl[tsc].sglah = LE_32(DWORD1(sg->dmac_laddress));
5891 		sgl[tsc].sglen = LE_32(sg->dmac_size);
5892 		sgl[tsc++].flags = 0;
5893 		sg++;
5894 	}
5895 	return (0);
5896 }
5897 
5898 /*
5899  * Unload DMA
5900  */
5901 void
5902 pmcs_dma_unload(pmcs_hw_t *pwp, pmcs_cmd_t *sp)
5903 {
5904 	pmcs_dmachunk_t *cp;
5905 
5906 	mutex_enter(&pwp->dma_lock);
5907 	while ((cp = sp->cmd_clist) != NULL) {
5908 		sp->cmd_clist = cp->nxt;
5909 		cp->nxt = pwp->dma_freelist;
5910 		pwp->dma_freelist = cp;
5911 	}
5912 	mutex_exit(&pwp->dma_lock);
5913 }
5914 
5915 /*
5916  * Take a chunk of consistent memory that has just been allocated and inserted
5917  * into the cip indices and prepare it for DMA chunk usage and add it to the
5918  * freelist.
5919  *
5920  * Called with dma_lock locked (except during attach when it's unnecessary)
5921  */
5922 void
5923 pmcs_idma_chunks(pmcs_hw_t *pwp, pmcs_dmachunk_t *dcp,
5924     pmcs_chunk_t *pchunk, unsigned long lim)
5925 {
5926 	unsigned long off, n;
5927 	pmcs_dmachunk_t *np = dcp;
5928 	pmcs_chunk_t *tmp_chunk;
5929 
5930 	if (pwp->dma_chunklist == NULL) {
5931 		pwp->dma_chunklist = pchunk;
5932 	} else {
5933 		tmp_chunk = pwp->dma_chunklist;
5934 		while (tmp_chunk->next) {
5935 			tmp_chunk = tmp_chunk->next;
5936 		}
5937 		tmp_chunk->next = pchunk;
5938 	}
5939 
5940 	/*
5941 	 * Install offsets into chunk lists.
5942 	 */
5943 	for (n = 0, off = 0; off < lim; off += PMCS_SGL_CHUNKSZ, n++) {
5944 		np->chunks = (void *)&pchunk->addrp[off];
5945 		np->addr = pchunk->dma_addr + off;
5946 		np->acc_handle = pchunk->acc_handle;
5947 		np->dma_handle = pchunk->dma_handle;
5948 		if ((off + PMCS_SGL_CHUNKSZ) < lim) {
5949 			np = np->nxt;
5950 		}
5951 	}
5952 	np->nxt = pwp->dma_freelist;
5953 	pwp->dma_freelist = dcp;
5954 	pmcs_prt(pwp, PMCS_PRT_DEBUG2,
5955 	    "added %lu DMA chunks ", n);
5956 }
5957 
5958 /*
5959  * Change the value of the interrupt coalescing timer.  This is done currently
5960  * only for I/O completions.  If we're using the "auto clear" feature, it can
5961  * be turned back on when interrupt coalescing is turned off and must be
5962  * turned off when the coalescing timer is on.
5963  * NOTE: PMCS_MSIX_GENERAL and PMCS_OQ_IODONE are the same value.  As long
5964  * as that's true, we don't need to distinguish between them.
5965  */
5966 
5967 void
5968 pmcs_set_intr_coal_timer(pmcs_hw_t *pwp, pmcs_coal_timer_adj_t adj)
5969 {
5970 	if (adj == DECREASE_TIMER) {
5971 		/* If the timer is already off, nothing to do. */
5972 		if (pwp->io_intr_coal.timer_on == B_FALSE) {
5973 			return;
5974 		}
5975 
5976 		pwp->io_intr_coal.intr_coal_timer -= PMCS_COAL_TIMER_GRAN;
5977 
5978 		if (pwp->io_intr_coal.intr_coal_timer == 0) {
5979 			/* Disable the timer */
5980 			pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_CONTROL, 0);
5981 
5982 			if (pwp->odb_auto_clear & (1 << PMCS_MSIX_IODONE)) {
5983 				pmcs_wr_topunit(pwp, PMCS_OBDB_AUTO_CLR,
5984 				    pwp->odb_auto_clear);
5985 			}
5986 
5987 			pwp->io_intr_coal.timer_on = B_FALSE;
5988 			pwp->io_intr_coal.max_io_completions = B_FALSE;
5989 			pwp->io_intr_coal.num_intrs = 0;
5990 			pwp->io_intr_coal.int_cleared = B_FALSE;
5991 			pwp->io_intr_coal.num_io_completions = 0;
5992 
5993 			DTRACE_PROBE1(pmcs__intr__coalesce__timer__off,
5994 			    pmcs_io_intr_coal_t *, &pwp->io_intr_coal);
5995 		} else {
5996 			pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_TIMER,
5997 			    pwp->io_intr_coal.intr_coal_timer);
5998 		}
5999 	} else {
6000 		/*
6001 		 * If the timer isn't on yet, do the setup for it now.
6002 		 */
6003 		if (pwp->io_intr_coal.timer_on == B_FALSE) {
6004 			/* If auto clear is being used, turn it off. */
6005 			if (pwp->odb_auto_clear & (1 << PMCS_MSIX_IODONE)) {
6006 				pmcs_wr_topunit(pwp, PMCS_OBDB_AUTO_CLR,
6007 				    (pwp->odb_auto_clear &
6008 				    ~(1 << PMCS_MSIX_IODONE)));
6009 			}
6010 
6011 			pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_CONTROL,
6012 			    (1 << PMCS_MSIX_IODONE));
6013 			pwp->io_intr_coal.timer_on = B_TRUE;
6014 			pwp->io_intr_coal.intr_coal_timer =
6015 			    PMCS_COAL_TIMER_GRAN;
6016 
6017 			DTRACE_PROBE1(pmcs__intr__coalesce__timer__on,
6018 			    pmcs_io_intr_coal_t *, &pwp->io_intr_coal);
6019 		} else {
6020 			pwp->io_intr_coal.intr_coal_timer +=
6021 			    PMCS_COAL_TIMER_GRAN;
6022 		}
6023 
6024 		if (pwp->io_intr_coal.intr_coal_timer > PMCS_MAX_COAL_TIMER) {
6025 			pwp->io_intr_coal.intr_coal_timer = PMCS_MAX_COAL_TIMER;
6026 		}
6027 
6028 		pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_TIMER,
6029 		    pwp->io_intr_coal.intr_coal_timer);
6030 	}
6031 
6032 	/*
6033 	 * Adjust the interrupt threshold based on the current timer value
6034 	 */
6035 	pwp->io_intr_coal.intr_threshold =
6036 	    PMCS_INTR_THRESHOLD(PMCS_QUANTUM_TIME_USECS * 1000 /
6037 	    (pwp->io_intr_coal.intr_latency +
6038 	    (pwp->io_intr_coal.intr_coal_timer * 1000)));
6039 }
6040 
6041 /*
6042  * Register Access functions
6043  */
6044 uint32_t
6045 pmcs_rd_iqci(pmcs_hw_t *pwp, uint32_t qnum)
6046 {
6047 	uint32_t iqci;
6048 
6049 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORKERNEL) !=
6050 	    DDI_SUCCESS) {
6051 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ddi_dma_sync failed?",
6052 		    __func__);
6053 	}
6054 
6055 	iqci = LE_32(
6056 	    ((uint32_t *)((void *)pwp->cip))[IQ_OFFSET(qnum) >> 2]);
6057 
6058 	return (iqci);
6059 }
6060 
6061 uint32_t
6062 pmcs_rd_oqpi(pmcs_hw_t *pwp, uint32_t qnum)
6063 {
6064 	uint32_t oqpi;
6065 
6066 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORKERNEL) !=
6067 	    DDI_SUCCESS) {
6068 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ddi_dma_sync failed?",
6069 		    __func__);
6070 	}
6071 
6072 	oqpi = LE_32(
6073 	    ((uint32_t *)((void *)pwp->cip))[OQ_OFFSET(qnum) >> 2]);
6074 
6075 	return (oqpi);
6076 }
6077 
6078 uint32_t
6079 pmcs_rd_gsm_reg(pmcs_hw_t *pwp, uint32_t off)
6080 {
6081 	uint32_t rv, newaxil, oldaxil;
6082 
6083 	newaxil = off & ~GSM_BASE_MASK;
6084 	off &= GSM_BASE_MASK;
6085 	mutex_enter(&pwp->axil_lock);
6086 	oldaxil = ddi_get32(pwp->top_acc_handle,
6087 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]);
6088 	ddi_put32(pwp->top_acc_handle,
6089 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], newaxil);
6090 	drv_usecwait(10);
6091 	if (ddi_get32(pwp->top_acc_handle,
6092 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != newaxil) {
6093 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "AXIL register update failed");
6094 	}
6095 	rv = ddi_get32(pwp->gsm_acc_handle, &pwp->gsm_regs[off >> 2]);
6096 	ddi_put32(pwp->top_acc_handle,
6097 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], oldaxil);
6098 	drv_usecwait(10);
6099 	if (ddi_get32(pwp->top_acc_handle,
6100 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != oldaxil) {
6101 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "AXIL register restore failed");
6102 	}
6103 	mutex_exit(&pwp->axil_lock);
6104 	return (rv);
6105 }
6106 
6107 void
6108 pmcs_wr_gsm_reg(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6109 {
6110 	uint32_t newaxil, oldaxil;
6111 
6112 	newaxil = off & ~GSM_BASE_MASK;
6113 	off &= GSM_BASE_MASK;
6114 	mutex_enter(&pwp->axil_lock);
6115 	oldaxil = ddi_get32(pwp->top_acc_handle,
6116 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]);
6117 	ddi_put32(pwp->top_acc_handle,
6118 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], newaxil);
6119 	drv_usecwait(10);
6120 	if (ddi_get32(pwp->top_acc_handle,
6121 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != newaxil) {
6122 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "AXIL register update failed");
6123 	}
6124 	ddi_put32(pwp->gsm_acc_handle, &pwp->gsm_regs[off >> 2], val);
6125 	ddi_put32(pwp->top_acc_handle,
6126 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], oldaxil);
6127 	drv_usecwait(10);
6128 	if (ddi_get32(pwp->top_acc_handle,
6129 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != oldaxil) {
6130 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "AXIL register restore failed");
6131 	}
6132 	mutex_exit(&pwp->axil_lock);
6133 }
6134 
6135 uint32_t
6136 pmcs_rd_topunit(pmcs_hw_t *pwp, uint32_t off)
6137 {
6138 	switch (off) {
6139 	case PMCS_SPC_RESET:
6140 	case PMCS_SPC_BOOT_STRAP:
6141 	case PMCS_SPC_DEVICE_ID:
6142 	case PMCS_DEVICE_REVISION:
6143 		off = pmcs_rd_gsm_reg(pwp, off);
6144 		break;
6145 	default:
6146 		off = ddi_get32(pwp->top_acc_handle,
6147 		    &pwp->top_regs[off >> 2]);
6148 		break;
6149 	}
6150 	return (off);
6151 }
6152 
6153 void
6154 pmcs_wr_topunit(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6155 {
6156 	switch (off) {
6157 	case PMCS_SPC_RESET:
6158 	case PMCS_DEVICE_REVISION:
6159 		pmcs_wr_gsm_reg(pwp, off, val);
6160 		break;
6161 	default:
6162 		ddi_put32(pwp->top_acc_handle, &pwp->top_regs[off >> 2], val);
6163 		break;
6164 	}
6165 }
6166 
6167 uint32_t
6168 pmcs_rd_msgunit(pmcs_hw_t *pwp, uint32_t off)
6169 {
6170 	return (ddi_get32(pwp->msg_acc_handle, &pwp->msg_regs[off >> 2]));
6171 }
6172 
6173 uint32_t
6174 pmcs_rd_mpi_tbl(pmcs_hw_t *pwp, uint32_t off)
6175 {
6176 	return (ddi_get32(pwp->mpi_acc_handle,
6177 	    &pwp->mpi_regs[(pwp->mpi_offset + off) >> 2]));
6178 }
6179 
6180 uint32_t
6181 pmcs_rd_gst_tbl(pmcs_hw_t *pwp, uint32_t off)
6182 {
6183 	return (ddi_get32(pwp->mpi_acc_handle,
6184 	    &pwp->mpi_regs[(pwp->mpi_gst_offset + off) >> 2]));
6185 }
6186 
6187 uint32_t
6188 pmcs_rd_iqc_tbl(pmcs_hw_t *pwp, uint32_t off)
6189 {
6190 	return (ddi_get32(pwp->mpi_acc_handle,
6191 	    &pwp->mpi_regs[(pwp->mpi_iqc_offset + off) >> 2]));
6192 }
6193 
6194 uint32_t
6195 pmcs_rd_oqc_tbl(pmcs_hw_t *pwp, uint32_t off)
6196 {
6197 	return (ddi_get32(pwp->mpi_acc_handle,
6198 	    &pwp->mpi_regs[(pwp->mpi_oqc_offset + off) >> 2]));
6199 }
6200 
6201 uint32_t
6202 pmcs_rd_iqpi(pmcs_hw_t *pwp, uint32_t qnum)
6203 {
6204 	return (ddi_get32(pwp->mpi_acc_handle,
6205 	    &pwp->mpi_regs[pwp->iqpi_offset[qnum] >> 2]));
6206 }
6207 
6208 uint32_t
6209 pmcs_rd_oqci(pmcs_hw_t *pwp, uint32_t qnum)
6210 {
6211 	return (ddi_get32(pwp->mpi_acc_handle,
6212 	    &pwp->mpi_regs[pwp->oqci_offset[qnum] >> 2]));
6213 }
6214 
6215 void
6216 pmcs_wr_msgunit(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6217 {
6218 	ddi_put32(pwp->msg_acc_handle, &pwp->msg_regs[off >> 2], val);
6219 }
6220 
6221 void
6222 pmcs_wr_mpi_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6223 {
6224 	ddi_put32(pwp->mpi_acc_handle,
6225 	    &pwp->mpi_regs[(pwp->mpi_offset + off) >> 2], (val));
6226 }
6227 
6228 void
6229 pmcs_wr_gst_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6230 {
6231 	ddi_put32(pwp->mpi_acc_handle,
6232 	    &pwp->mpi_regs[(pwp->mpi_gst_offset + off) >> 2], val);
6233 }
6234 
6235 void
6236 pmcs_wr_iqc_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6237 {
6238 	ddi_put32(pwp->mpi_acc_handle,
6239 	    &pwp->mpi_regs[(pwp->mpi_iqc_offset + off) >> 2], val);
6240 }
6241 
6242 void
6243 pmcs_wr_oqc_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6244 {
6245 	ddi_put32(pwp->mpi_acc_handle,
6246 	    &pwp->mpi_regs[(pwp->mpi_oqc_offset + off) >> 2], val);
6247 }
6248 
6249 void
6250 pmcs_wr_iqci(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6251 {
6252 	((uint32_t *)((void *)pwp->cip))[IQ_OFFSET(qnum) >> 2] = val;
6253 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORDEV) !=
6254 	    DDI_SUCCESS) {
6255 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ddi_dma_sync failed?",
6256 		    __func__);
6257 	}
6258 }
6259 
6260 void
6261 pmcs_wr_iqpi(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6262 {
6263 	ddi_put32(pwp->mpi_acc_handle,
6264 	    &pwp->mpi_regs[pwp->iqpi_offset[qnum] >> 2], val);
6265 }
6266 
6267 void
6268 pmcs_wr_oqci(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6269 {
6270 	ddi_put32(pwp->mpi_acc_handle,
6271 	    &pwp->mpi_regs[pwp->oqci_offset[qnum] >> 2], val);
6272 }
6273 
6274 void
6275 pmcs_wr_oqpi(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6276 {
6277 	((uint32_t *)((void *)pwp->cip))[OQ_OFFSET(qnum) >> 2] = val;
6278 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORDEV) !=
6279 	    DDI_SUCCESS) {
6280 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ddi_dma_sync failed?",
6281 		    __func__);
6282 	}
6283 }
6284 
6285 /*
6286  * Check the status value of an outbound IOMB and report anything bad
6287  */
6288 
6289 void
6290 pmcs_check_iomb_status(pmcs_hw_t *pwp, uint32_t *iomb)
6291 {
6292 	uint16_t 	opcode;
6293 	int		offset;
6294 
6295 	if (iomb == NULL) {
6296 		return;
6297 	}
6298 
6299 	opcode = LE_32(iomb[0]) & 0xfff;
6300 
6301 	switch (opcode) {
6302 		/*
6303 		 * The following have no status field, so ignore them
6304 		 */
6305 	case PMCOUT_ECHO:
6306 	case PMCOUT_SAS_HW_EVENT:
6307 	case PMCOUT_GET_DEVICE_HANDLE:
6308 	case PMCOUT_SATA_EVENT:
6309 	case PMCOUT_SSP_EVENT:
6310 	case PMCOUT_DEVICE_HANDLE_ARRIVED:
6311 	case PMCOUT_SMP_REQUEST_RECEIVED:
6312 	case PMCOUT_GPIO:
6313 	case PMCOUT_GPIO_EVENT:
6314 	case PMCOUT_GET_TIME_STAMP:
6315 	case PMCOUT_SKIP_ENTRIES:
6316 	case PMCOUT_GET_NVMD_DATA:	/* Actually lower 16 bits of word 3 */
6317 	case PMCOUT_SET_NVMD_DATA:	/* but ignore - we don't use these */
6318 	case PMCOUT_DEVICE_HANDLE_REMOVED:
6319 	case PMCOUT_SSP_REQUEST_RECEIVED:
6320 		return;
6321 
6322 	case PMCOUT_GENERAL_EVENT:
6323 		offset = 1;
6324 		break;
6325 
6326 	case PMCOUT_SSP_COMPLETION:
6327 	case PMCOUT_SMP_COMPLETION:
6328 	case PMCOUT_DEVICE_REGISTRATION:
6329 	case PMCOUT_DEREGISTER_DEVICE_HANDLE:
6330 	case PMCOUT_SATA_COMPLETION:
6331 	case PMCOUT_DEVICE_INFO:
6332 	case PMCOUT_FW_FLASH_UPDATE:
6333 	case PMCOUT_SSP_ABORT:
6334 	case PMCOUT_SATA_ABORT:
6335 	case PMCOUT_SAS_DIAG_MODE_START_END:
6336 	case PMCOUT_SAS_HW_EVENT_ACK_ACK:
6337 	case PMCOUT_SMP_ABORT:
6338 	case PMCOUT_SET_DEVICE_STATE:
6339 	case PMCOUT_GET_DEVICE_STATE:
6340 	case PMCOUT_SET_DEVICE_INFO:
6341 		offset = 2;
6342 		break;
6343 
6344 	case PMCOUT_LOCAL_PHY_CONTROL:
6345 	case PMCOUT_SAS_DIAG_EXECUTE:
6346 	case PMCOUT_PORT_CONTROL:
6347 		offset = 3;
6348 		break;
6349 
6350 	case PMCOUT_GET_INFO:
6351 	case PMCOUT_GET_VPD:
6352 	case PMCOUT_SAS_ASSISTED_DISCOVERY_EVENT:
6353 	case PMCOUT_SATA_ASSISTED_DISCOVERY_EVENT:
6354 	case PMCOUT_SET_VPD:
6355 	case PMCOUT_TWI:
6356 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
6357 		    "Got response for deprecated opcode", iomb);
6358 		return;
6359 
6360 	default:
6361 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
6362 		    "Got response for unknown opcode", iomb);
6363 		return;
6364 	}
6365 
6366 	if (LE_32(iomb[offset]) != PMCOUT_STATUS_OK) {
6367 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
6368 		    "bad status on TAG_TYPE_NONE command", iomb);
6369 	}
6370 }
6371 
6372 /*
6373  * Called with statlock held
6374  */
6375 void
6376 pmcs_clear_xp(pmcs_hw_t *pwp, pmcs_xscsi_t *xp)
6377 {
6378 	_NOTE(ARGUNUSED(pwp));
6379 
6380 	ASSERT(mutex_owned(&xp->statlock));
6381 
6382 	pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Device 0x%p is gone.", __func__,
6383 	    (void *)xp);
6384 
6385 	/*
6386 	 * Clear the dip now.  This keeps pmcs_remove_device from attempting
6387 	 * to call us on the same device while we're still flushing queues.
6388 	 * The only side effect is we can no longer update SM-HBA properties,
6389 	 * but this device is going away anyway, so no matter.
6390 	 */
6391 	xp->dip = NULL;
6392 
6393 	xp->special_running = 0;
6394 	xp->recovering = 0;
6395 	xp->recover_wait = 0;
6396 	xp->draining = 0;
6397 	xp->new = 0;
6398 	xp->assigned = 0;
6399 	xp->dev_state = 0;
6400 	xp->tagmap = 0;
6401 	xp->dev_gone = 1;
6402 	xp->event_recovery = 0;
6403 	xp->dtype = NOTHING;
6404 	xp->wq_recovery_tail = NULL;
6405 	/* Don't clear xp->phy */
6406 	/* Don't clear xp->actv_cnt */
6407 
6408 	/*
6409 	 * Flush all target queues
6410 	 */
6411 	pmcs_flush_target_queues(pwp, xp, PMCS_TGT_ALL_QUEUES);
6412 }
6413 
6414 static int
6415 pmcs_smp_function_result(pmcs_hw_t *pwp, smp_response_frame_t *srf)
6416 {
6417 	int result = srf->srf_result;
6418 
6419 	switch (result) {
6420 	case SMP_RES_UNKNOWN_FUNCTION:
6421 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6422 		    "Function Result: Unknown SMP Function(0x%x)",
6423 		    __func__, result);
6424 		break;
6425 	case SMP_RES_FUNCTION_FAILED:
6426 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6427 		    "Function Result: SMP Function Failed(0x%x)",
6428 		    __func__, result);
6429 		break;
6430 	case SMP_RES_INVALID_REQUEST_FRAME_LENGTH:
6431 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6432 		    "Function Result: Invalid Request Frame Length(0x%x)",
6433 		    __func__, result);
6434 		break;
6435 	case SMP_RES_INCOMPLETE_DESCRIPTOR_LIST:
6436 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6437 		    "Function Result: Incomplete Descriptor List(0x%x)",
6438 		    __func__, result);
6439 		break;
6440 	case SMP_RES_PHY_DOES_NOT_EXIST:
6441 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6442 		    "Function Result: PHY does not exist(0x%x)",
6443 		    __func__, result);
6444 		break;
6445 	case SMP_RES_PHY_VACANT:
6446 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6447 		    "Function Result: PHY Vacant(0x%x)",
6448 		    __func__, result);
6449 		break;
6450 	default:
6451 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6452 		    "Function Result: (0x%x)",
6453 		    __func__, result);
6454 		break;
6455 	}
6456 
6457 	return (result);
6458 }
6459 
6460 /*
6461  * Do all the repetitive stuff necessary to setup for DMA
6462  *
6463  * pwp: Used for dip
6464  * dma_attr: ddi_dma_attr_t to use for the mapping
6465  * acch: ddi_acc_handle_t to use for the mapping
6466  * dmah: ddi_dma_handle_t to use
6467  * length: Amount of memory for mapping
6468  * kvp: Pointer filled in with kernel virtual address on successful return
6469  * dma_addr: Pointer filled in with DMA address on successful return
6470  */
6471 boolean_t
6472 pmcs_dma_setup(pmcs_hw_t *pwp, ddi_dma_attr_t *dma_attr, ddi_acc_handle_t *acch,
6473     ddi_dma_handle_t *dmah, size_t length, caddr_t *kvp, uint64_t *dma_addr)
6474 {
6475 	dev_info_t		*dip = pwp->dip;
6476 	ddi_dma_cookie_t	cookie;
6477 	size_t			real_length;
6478 	uint_t			ddma_flag = DDI_DMA_CONSISTENT;
6479 	uint_t			ddabh_flag = DDI_DMA_CONSISTENT | DDI_DMA_RDWR;
6480 	uint_t			cookie_cnt;
6481 	ddi_device_acc_attr_t	mattr = {
6482 		DDI_DEVICE_ATTR_V0,
6483 		DDI_NEVERSWAP_ACC,
6484 		DDI_STRICTORDER_ACC,
6485 		DDI_DEFAULT_ACC
6486 	};
6487 
6488 	*acch = NULL;
6489 	*dmah = NULL;
6490 
6491 	if (ddi_dma_alloc_handle(dip, dma_attr, DDI_DMA_SLEEP, NULL, dmah) !=
6492 	    DDI_SUCCESS) {
6493 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "Failed to allocate DMA handle");
6494 		return (B_FALSE);
6495 	}
6496 
6497 	if (ddi_dma_mem_alloc(*dmah, length, &mattr, ddma_flag, DDI_DMA_SLEEP,
6498 	    NULL, kvp, &real_length, acch) != DDI_SUCCESS) {
6499 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "Failed to allocate DMA mem");
6500 		ddi_dma_free_handle(dmah);
6501 		*dmah = NULL;
6502 		return (B_FALSE);
6503 	}
6504 
6505 	if (ddi_dma_addr_bind_handle(*dmah, NULL, *kvp, real_length,
6506 	    ddabh_flag, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt)
6507 	    != DDI_DMA_MAPPED) {
6508 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "Failed to bind DMA");
6509 		ddi_dma_free_handle(dmah);
6510 		ddi_dma_mem_free(acch);
6511 		*dmah = NULL;
6512 		*acch = NULL;
6513 		return (B_FALSE);
6514 	}
6515 
6516 	if (cookie_cnt != 1) {
6517 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "Multiple cookies");
6518 		if (ddi_dma_unbind_handle(*dmah) != DDI_SUCCESS) {
6519 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "Condition failed at "
6520 			    "%s():%d", __func__, __LINE__);
6521 		}
6522 		ddi_dma_free_handle(dmah);
6523 		ddi_dma_mem_free(acch);
6524 		*dmah = NULL;
6525 		*acch = NULL;
6526 		return (B_FALSE);
6527 	}
6528 
6529 	*dma_addr = cookie.dmac_laddress;
6530 
6531 	return (B_TRUE);
6532 }
6533 
6534 /*
6535  * Flush requested queues for a particular target.  Called with statlock held
6536  */
6537 void
6538 pmcs_flush_target_queues(pmcs_hw_t *pwp, pmcs_xscsi_t *tgt, uint8_t queues)
6539 {
6540 	pmcs_cmd_t	*sp;
6541 	pmcwork_t	*pwrk;
6542 
6543 	ASSERT(pwp != NULL);
6544 	ASSERT(tgt != NULL);
6545 
6546 	pmcs_prt(pwp, PMCS_PRT_DEBUG,
6547 	    "%s: Flushing queues (%d) for target 0x%p", __func__,
6548 	    queues, (void *)tgt);
6549 
6550 	/*
6551 	 * Commands on the wait queue (or the special queue below) don't have
6552 	 * work structures associated with them.
6553 	 */
6554 	if (queues & PMCS_TGT_WAIT_QUEUE) {
6555 		mutex_enter(&tgt->wqlock);
6556 		while ((sp = STAILQ_FIRST(&tgt->wq)) != NULL) {
6557 			STAILQ_REMOVE(&tgt->wq, sp, pmcs_cmd, cmd_next);
6558 			pmcs_prt(pwp, PMCS_PRT_DEBUG1,
6559 			    "%s: Removing cmd 0x%p from wq for target 0x%p",
6560 			    __func__, (void *)sp, (void *)tgt);
6561 			CMD2PKT(sp)->pkt_reason = CMD_DEV_GONE;
6562 			CMD2PKT(sp)->pkt_state = STATE_GOT_BUS;
6563 			mutex_exit(&tgt->wqlock);
6564 			pmcs_dma_unload(pwp, sp);
6565 			mutex_enter(&pwp->cq_lock);
6566 			STAILQ_INSERT_TAIL(&pwp->cq, sp, cmd_next);
6567 			mutex_exit(&pwp->cq_lock);
6568 			mutex_enter(&tgt->wqlock);
6569 		}
6570 		mutex_exit(&tgt->wqlock);
6571 	}
6572 
6573 	/*
6574 	 * Commands on the active queue will have work structures associated
6575 	 * with them.
6576 	 */
6577 	if (queues & PMCS_TGT_ACTIVE_QUEUE) {
6578 		mutex_enter(&tgt->aqlock);
6579 		while ((sp = STAILQ_FIRST(&tgt->aq)) != NULL) {
6580 			STAILQ_REMOVE(&tgt->aq, sp, pmcs_cmd, cmd_next);
6581 			pwrk = pmcs_tag2wp(pwp, sp->cmd_tag);
6582 			mutex_exit(&tgt->aqlock);
6583 			mutex_exit(&tgt->statlock);
6584 			/*
6585 			 * If we found a work structure, mark it as dead
6586 			 * and complete it
6587 			 */
6588 			if (pwrk != NULL) {
6589 				pwrk->dead = 1;
6590 				CMD2PKT(sp)->pkt_reason = CMD_DEV_GONE;
6591 				CMD2PKT(sp)->pkt_state = STATE_GOT_BUS;
6592 				pmcs_complete_work_impl(pwp, pwrk, NULL, 0);
6593 			}
6594 			pmcs_prt(pwp, PMCS_PRT_DEBUG1,
6595 			    "%s: Removing cmd 0x%p from aq for target 0x%p",
6596 			    __func__, (void *)sp, (void *)tgt);
6597 			pmcs_dma_unload(pwp, sp);
6598 			mutex_enter(&pwp->cq_lock);
6599 			STAILQ_INSERT_TAIL(&pwp->cq, sp, cmd_next);
6600 			mutex_exit(&pwp->cq_lock);
6601 			mutex_enter(&tgt->aqlock);
6602 			mutex_enter(&tgt->statlock);
6603 		}
6604 		mutex_exit(&tgt->aqlock);
6605 	}
6606 
6607 	if (queues & PMCS_TGT_SPECIAL_QUEUE) {
6608 		while ((sp = STAILQ_FIRST(&tgt->sq)) != NULL) {
6609 			STAILQ_REMOVE(&tgt->sq, sp, pmcs_cmd, cmd_next);
6610 			pmcs_prt(pwp, PMCS_PRT_DEBUG1,
6611 			    "%s: Removing cmd 0x%p from sq for target 0x%p",
6612 			    __func__, (void *)sp, (void *)tgt);
6613 			CMD2PKT(sp)->pkt_reason = CMD_DEV_GONE;
6614 			CMD2PKT(sp)->pkt_state = STATE_GOT_BUS;
6615 			pmcs_dma_unload(pwp, sp);
6616 			mutex_enter(&pwp->cq_lock);
6617 			STAILQ_INSERT_TAIL(&pwp->cq, sp, cmd_next);
6618 			mutex_exit(&pwp->cq_lock);
6619 		}
6620 	}
6621 }
6622 
6623 void
6624 pmcs_complete_work_impl(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb,
6625     size_t amt)
6626 {
6627 	switch (PMCS_TAG_TYPE(pwrk->htag)) {
6628 	case PMCS_TAG_TYPE_CBACK:
6629 	{
6630 		pmcs_cb_t callback = (pmcs_cb_t)pwrk->ptr;
6631 		(*callback)(pwp, pwrk, iomb);
6632 		break;
6633 	}
6634 	case PMCS_TAG_TYPE_WAIT:
6635 		if (pwrk->arg && iomb && amt) {
6636 			(void) memcpy(pwrk->arg, iomb, amt);
6637 		}
6638 		cv_signal(&pwrk->sleep_cv);
6639 		mutex_exit(&pwrk->lock);
6640 		break;
6641 	case PMCS_TAG_TYPE_NONE:
6642 #ifdef DEBUG
6643 		pmcs_check_iomb_status(pwp, iomb);
6644 #endif
6645 		pmcs_pwork(pwp, pwrk);
6646 		break;
6647 	default:
6648 		/*
6649 		 * We will leak a structure here if we don't know
6650 		 * what happened
6651 		 */
6652 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Unknown PMCS_TAG_TYPE (%x)",
6653 		    __func__, PMCS_TAG_TYPE(pwrk->htag));
6654 		break;
6655 	}
6656 }
6657 
6658 /*
6659  * Determine if iport still has targets. During detach(9E), if SCSA is
6660  * successfull in its guarantee of tran_tgt_free(9E) before detach(9E),
6661  * this should always return B_FALSE.
6662  */
6663 boolean_t
6664 pmcs_iport_has_targets(pmcs_hw_t *pwp, pmcs_iport_t *iport)
6665 {
6666 	pmcs_xscsi_t *xp;
6667 	int i;
6668 
6669 	mutex_enter(&pwp->lock);
6670 
6671 	if (!pwp->targets || !pwp->max_dev) {
6672 		mutex_exit(&pwp->lock);
6673 		return (B_FALSE);
6674 	}
6675 
6676 	for (i = 0; i < pwp->max_dev; i++) {
6677 		xp = pwp->targets[i];
6678 		if ((xp == NULL) || (xp->phy == NULL) ||
6679 		    (xp->phy->iport != iport)) {
6680 			continue;
6681 		}
6682 
6683 		mutex_exit(&pwp->lock);
6684 		return (B_TRUE);
6685 	}
6686 
6687 	mutex_exit(&pwp->lock);
6688 	return (B_FALSE);
6689 }
6690 
6691 /*
6692  * Called with softstate lock held
6693  */
6694 void
6695 pmcs_destroy_target(pmcs_xscsi_t *target)
6696 {
6697 	pmcs_hw_t *pwp = target->pwp;
6698 	pmcs_iport_t *iport;
6699 
6700 	ASSERT(pwp);
6701 	ASSERT(mutex_owned(&pwp->lock));
6702 
6703 	if (!target->ua) {
6704 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
6705 		    "%s: target %p iport addres is null",
6706 		    __func__, (void *)target);
6707 	}
6708 
6709 	iport = pmcs_get_iport_by_ua(pwp, target->ua);
6710 	if (iport == NULL) {
6711 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
6712 		    "%s: no iport associated with tgt(0x%p)",
6713 		    __func__, (void *)target);
6714 		return;
6715 	}
6716 
6717 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
6718 	    "%s: free target %p", __func__, (void *)target);
6719 	if (target->ua) {
6720 		strfree(target->ua);
6721 	}
6722 
6723 	mutex_destroy(&target->wqlock);
6724 	mutex_destroy(&target->aqlock);
6725 	mutex_destroy(&target->statlock);
6726 	cv_destroy(&target->reset_cv);
6727 	cv_destroy(&target->abort_cv);
6728 	ddi_soft_state_bystr_fini(&target->lun_sstate);
6729 	ddi_soft_state_bystr_free(iport->tgt_sstate, target->unit_address);
6730 	pmcs_rele_iport(iport);
6731 }
6732 
6733 /*
6734  * Get device state.  Called with statlock and PHY lock held.
6735  */
6736 int
6737 pmcs_get_dev_state(pmcs_hw_t *pwp, pmcs_xscsi_t *xp, uint8_t *ds)
6738 {
6739 	uint32_t htag, *ptr, msg[PMCS_MSG_SIZE];
6740 	int result;
6741 	struct pmcwork *pwrk;
6742 	pmcs_phy_t *phyp;
6743 
6744 	pmcs_prt(pwp, PMCS_PRT_DEBUG3, "%s: tgt(0x%p)", __func__, (void *)xp);
6745 	if (xp == NULL) {
6746 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Target is NULL", __func__);
6747 		return (-1);
6748 	}
6749 
6750 	ASSERT(mutex_owned(&xp->statlock));
6751 	phyp = xp->phy;
6752 	ASSERT(mutex_owned(&phyp->phy_lock));
6753 
6754 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp);
6755 	if (pwrk == NULL) {
6756 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
6757 		return (-1);
6758 	}
6759 	pwrk->arg = msg;
6760 	pwrk->dtype = phyp->dtype;
6761 
6762 	if (phyp->valid_device_id == 0) {
6763 		pmcs_pwork(pwp, pwrk);
6764 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Invalid DeviceID", __func__);
6765 		return (-1);
6766 	}
6767 	htag = pwrk->htag;
6768 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
6769 	    PMCIN_GET_DEVICE_STATE));
6770 	msg[1] = LE_32(pwrk->htag);
6771 	msg[2] = LE_32(phyp->device_id);
6772 
6773 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6774 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6775 	if (ptr == NULL) {
6776 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6777 		pmcs_pwork(pwp, pwrk);
6778 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
6779 		return (-1);
6780 	}
6781 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
6782 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
6783 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6784 	mutex_exit(&xp->statlock);
6785 	pmcs_unlock_phy(phyp);
6786 	WAIT_FOR(pwrk, 1000, result);
6787 	pmcs_lock_phy(phyp);
6788 	pmcs_pwork(pwp, pwrk);
6789 	mutex_enter(&xp->statlock);
6790 
6791 	if (result) {
6792 		pmcs_timed_out(pwp, htag, __func__);
6793 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: cmd timed out, returning ",
6794 		    __func__);
6795 		return (-1);
6796 	}
6797 	if (LE_32(msg[2]) == 0) {
6798 		*ds = (uint8_t)(LE_32(msg[4]));
6799 		if (*ds !=  xp->dev_state) {
6800 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6801 			    "%s: retrieved_ds=0x%x, target_ds=0x%x", __func__,
6802 			    *ds, xp->dev_state);
6803 		}
6804 		return (0);
6805 	} else {
6806 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6807 		    "%s: cmd failed Status(0x%x), returning ", __func__,
6808 		    LE_32(msg[2]));
6809 		return (-1);
6810 	}
6811 }
6812 
6813 /*
6814  * Set device state.  Called with target's statlock and PHY lock held.
6815  */
6816 int
6817 pmcs_set_dev_state(pmcs_hw_t *pwp, pmcs_xscsi_t *xp, uint8_t ds)
6818 {
6819 	uint32_t htag, *ptr, msg[PMCS_MSG_SIZE];
6820 	int result;
6821 	uint8_t pds, nds;
6822 	struct pmcwork *pwrk;
6823 	pmcs_phy_t *phyp;
6824 
6825 	pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: ds(0x%x), tgt(0x%p)",
6826 	    __func__, ds, (void *)xp);
6827 	if (xp == NULL) {
6828 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Target is Null", __func__);
6829 		return (-1);
6830 	}
6831 
6832 	phyp = xp->phy;
6833 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp);
6834 	if (pwrk == NULL) {
6835 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
6836 		return (-1);
6837 	}
6838 	if (phyp == NULL) {
6839 		pmcs_pwork(pwp, pwrk);
6840 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: PHY is Null",
6841 		    __func__);
6842 		return (-1);
6843 	}
6844 	if (phyp->valid_device_id == 0) {
6845 		pmcs_pwork(pwp, pwrk);
6846 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6847 		    "%s: Invalid DeviceID", __func__);
6848 		return (-1);
6849 	}
6850 	pwrk->arg = msg;
6851 	pwrk->dtype = phyp->dtype;
6852 	htag = pwrk->htag;
6853 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
6854 	    PMCIN_SET_DEVICE_STATE));
6855 	msg[1] = LE_32(pwrk->htag);
6856 	msg[2] = LE_32(phyp->device_id);
6857 	msg[3] = LE_32(ds);
6858 
6859 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6860 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6861 	if (ptr == NULL) {
6862 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6863 		pmcs_pwork(pwp, pwrk);
6864 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
6865 		return (-1);
6866 	}
6867 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
6868 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
6869 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6870 
6871 	mutex_exit(&xp->statlock);
6872 	pmcs_unlock_phy(phyp);
6873 	WAIT_FOR(pwrk, 1000, result);
6874 	pmcs_lock_phy(phyp);
6875 	pmcs_pwork(pwp, pwrk);
6876 	mutex_enter(&xp->statlock);
6877 
6878 	if (result) {
6879 		pmcs_timed_out(pwp, htag, __func__);
6880 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6881 		    "%s: cmd timed out, returning", __func__);
6882 		return (-1);
6883 	}
6884 	if (LE_32(msg[2]) == 0) {
6885 		pds = (uint8_t)(LE_32(msg[4]) >> 4);
6886 		nds = (uint8_t)(LE_32(msg[4]) & 0x0000000f);
6887 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: previous_ds=0x%x, "
6888 		    "new_ds=0x%x", __func__, pds, nds);
6889 		xp->dev_state = nds;
6890 		return (0);
6891 	} else {
6892 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6893 		    "%s: cmd failed Status(0x%x), returning ", __func__,
6894 		    LE_32(msg[2]));
6895 		return (-1);
6896 	}
6897 }
6898 
6899 void
6900 pmcs_dev_state_recovery(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
6901 {
6902 	uint8_t	ds;
6903 	int rc;
6904 	pmcs_xscsi_t *tgt;
6905 	pmcs_phy_t *pptr, *pnext, *pchild;
6906 
6907 	/*
6908 	 * First time, check to see if we're already performing recovery
6909 	 */
6910 	if (phyp == NULL) {
6911 		mutex_enter(&pwp->lock);
6912 		if (pwp->ds_err_recovering) {
6913 			mutex_exit(&pwp->lock);
6914 			SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
6915 			return;
6916 		}
6917 
6918 		pwp->ds_err_recovering = 1;
6919 		pptr = pwp->root_phys;
6920 		mutex_exit(&pwp->lock);
6921 	} else {
6922 		pptr = phyp;
6923 	}
6924 
6925 	while (pptr) {
6926 		/*
6927 		 * Since ds_err_recovering is set, we can be assured these
6928 		 * PHYs won't disappear on us while we do this.
6929 		 */
6930 		pmcs_lock_phy(pptr);
6931 		pchild = pptr->children;
6932 		pnext = pptr->sibling;
6933 		pmcs_unlock_phy(pptr);
6934 
6935 		if (pchild) {
6936 			pmcs_dev_state_recovery(pwp, pchild);
6937 		}
6938 
6939 		tgt = NULL;
6940 		pmcs_lock_phy(pptr);
6941 
6942 		if (pptr->dead) {
6943 			goto next_phy;
6944 		}
6945 
6946 		tgt = pptr->target;
6947 		if (tgt == NULL || tgt->dev_gone) {
6948 			if (pptr->dtype != NOTHING) {
6949 				pmcs_prt(pwp, PMCS_PRT_DEBUG2,
6950 				    "%s: no target for DS error recovery for "
6951 				    "PHY 0x%p", __func__, (void *)pptr);
6952 			}
6953 			goto next_phy;
6954 		}
6955 
6956 		mutex_enter(&tgt->statlock);
6957 
6958 		if (tgt->recover_wait == 0) {
6959 			goto next_phy;
6960 		}
6961 
6962 		/*
6963 		 * Step 1: Put the device into the IN_RECOVERY state
6964 		 */
6965 		rc = pmcs_get_dev_state(pwp, tgt, &ds);
6966 		if (rc != 0) {
6967 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
6968 			    "%s: pmcs_get_dev_state on PHY %s "
6969 			    "failed (rc=%d)",
6970 			    __func__, pptr->path, rc);
6971 
6972 			pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
6973 			    __func__, __LINE__, "pmcs_get_dev_state");
6974 
6975 			goto next_phy;
6976 		}
6977 
6978 		if (tgt->dev_state == ds) {
6979 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6980 			    "%s: Target 0x%p already IN_RECOVERY", __func__,
6981 			    (void *)tgt);
6982 		} else {
6983 			tgt->dev_state = ds;
6984 			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
6985 			rc = pmcs_send_err_recovery_cmd(pwp, ds, tgt);
6986 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6987 			    "%s: pmcs_send_err_recovery_cmd "
6988 			    "result(%d) tgt(0x%p) ds(0x%x) tgt->ds(0x%x)",
6989 			    __func__, rc, (void *)tgt, ds, tgt->dev_state);
6990 
6991 			if (rc) {
6992 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
6993 				    "%s: pmcs_send_err_recovery_cmd to PHY %s "
6994 				    "failed (rc=%d)",
6995 				    __func__, pptr->path, rc);
6996 
6997 				pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
6998 				    __func__, __LINE__,
6999 				    "pmcs_send_err_recovery_cmd");
7000 
7001 				goto next_phy;
7002 			}
7003 		}
7004 
7005 		/*
7006 		 * Step 2: Perform a hard reset on the PHY
7007 		 */
7008 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7009 		    "%s: Issue HARD_RESET to PHY %s", __func__, pptr->path);
7010 		/*
7011 		 * Must release statlock here because pmcs_reset_phy will
7012 		 * drop and reacquire the PHY lock.
7013 		 */
7014 		mutex_exit(&tgt->statlock);
7015 		rc = pmcs_reset_phy(pwp, pptr, PMCS_PHYOP_HARD_RESET);
7016 		mutex_enter(&tgt->statlock);
7017 		if (rc) {
7018 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
7019 			    "%s: HARD_RESET to PHY %s failed (rc=%d)",
7020 			    __func__, pptr->path, rc);
7021 
7022 			pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
7023 			    __func__, __LINE__, "HARD_RESET");
7024 
7025 			goto next_phy;
7026 		}
7027 
7028 		/*
7029 		 * Step 3: Abort all I/Os to the device
7030 		 */
7031 		if (pptr->abort_all_start) {
7032 			while (pptr->abort_all_start) {
7033 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7034 				    "%s: Waiting for outstanding ABORT_ALL on "
7035 				    "PHY 0x%p", __func__, (void *)pptr);
7036 				cv_wait(&pptr->abort_all_cv, &pptr->phy_lock);
7037 			}
7038 		} else {
7039 			mutex_exit(&tgt->statlock);
7040 			rc = pmcs_abort(pwp, pptr, pptr->device_id, 1, 1);
7041 			mutex_enter(&tgt->statlock);
7042 			if (rc != 0) {
7043 				pptr->abort_pending = 1;
7044 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7045 				    "%s: pmcs_abort to PHY %s failed (rc=%d)",
7046 				    __func__, pptr->path, rc);
7047 
7048 				pmcs_handle_ds_recovery_error(pptr, tgt,
7049 				    pwp, __func__, __LINE__, "pmcs_abort");
7050 
7051 				goto next_phy;
7052 			}
7053 		}
7054 
7055 		/*
7056 		 * Step 4: Set the device back to OPERATIONAL state
7057 		 */
7058 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7059 		    "%s: Set PHY/tgt 0x%p/0x%p to OPERATIONAL state",
7060 		    __func__, (void *)pptr, (void *)tgt);
7061 		rc = pmcs_set_dev_state(pwp, tgt,
7062 		    PMCS_DEVICE_STATE_OPERATIONAL);
7063 		if (rc == 0) {
7064 			tgt->recover_wait = 0;
7065 			pptr->ds_recovery_retries = 0;
7066 			/*
7067 			 * Don't bother to run the work queues if the PHY
7068 			 * is dead.
7069 			 */
7070 			if (tgt->phy && !tgt->phy->dead) {
7071 				SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES);
7072 				(void) ddi_taskq_dispatch(pwp->tq, pmcs_worker,
7073 				    pwp, DDI_NOSLEEP);
7074 			}
7075 		} else {
7076 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7077 			    "%s: Failed to SET tgt 0x%p to OPERATIONAL state",
7078 			    __func__, (void *)tgt);
7079 
7080 			pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
7081 			    __func__, __LINE__, "SET tgt to OPERATIONAL state");
7082 
7083 			goto next_phy;
7084 		}
7085 
7086 next_phy:
7087 		if (tgt) {
7088 			mutex_exit(&tgt->statlock);
7089 		}
7090 		pmcs_unlock_phy(pptr);
7091 		pptr = pnext;
7092 	}
7093 
7094 	/*
7095 	 * Only clear ds_err_recovering if we're exiting for good and not
7096 	 * just unwinding from recursion
7097 	 */
7098 	if (phyp == NULL) {
7099 		mutex_enter(&pwp->lock);
7100 		pwp->ds_err_recovering = 0;
7101 		mutex_exit(&pwp->lock);
7102 	}
7103 }
7104 
7105 /*
7106  * Called with target's statlock and PHY lock held.
7107  */
7108 int
7109 pmcs_send_err_recovery_cmd(pmcs_hw_t *pwp, uint8_t dev_state, pmcs_xscsi_t *tgt)
7110 {
7111 	pmcs_phy_t *pptr;
7112 	int rc = -1;
7113 
7114 	ASSERT(tgt != NULL);
7115 	ASSERT(mutex_owned(&tgt->statlock));
7116 
7117 	if (tgt->recovering) {
7118 		return (0);
7119 	}
7120 
7121 	tgt->recovering = 1;
7122 	pptr = tgt->phy;
7123 
7124 	if (pptr == NULL) {
7125 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: PHY is Null",
7126 		    __func__);
7127 		return (-1);
7128 	}
7129 
7130 	ASSERT(mutex_owned(&pptr->phy_lock));
7131 
7132 	pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: ds: 0x%x, tgt ds(0x%x)",
7133 	    __func__, dev_state, tgt->dev_state);
7134 
7135 	switch (dev_state) {
7136 	case PMCS_DEVICE_STATE_IN_RECOVERY:
7137 		if (tgt->dev_state == PMCS_DEVICE_STATE_IN_RECOVERY) {
7138 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7139 			    "%s: Target 0x%p already IN_RECOVERY", __func__,
7140 			    (void *)tgt);
7141 			rc = 0;	/* This is not an error */
7142 			goto no_action;
7143 		}
7144 
7145 		rc = pmcs_set_dev_state(pwp, tgt,
7146 		    PMCS_DEVICE_STATE_IN_RECOVERY);
7147 		if (rc != 0) {
7148 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7149 			    "%s(1): Failed to SET tgt(0x%p) to _IN_RECOVERY",
7150 			    __func__, (void *)tgt);
7151 		}
7152 
7153 		break;
7154 
7155 	case PMCS_DEVICE_STATE_OPERATIONAL:
7156 		if (tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) {
7157 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7158 			    "%s: Target 0x%p not ready to go OPERATIONAL",
7159 			    __func__, (void *)tgt);
7160 			goto no_action;
7161 		}
7162 
7163 		rc = pmcs_set_dev_state(pwp, tgt,
7164 		    PMCS_DEVICE_STATE_OPERATIONAL);
7165 		tgt->reset_success = 1;
7166 		if (rc != 0) {
7167 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7168 			    "%s(2): Failed to SET tgt(0x%p) to OPERATIONAL",
7169 			    __func__, (void *)tgt);
7170 			tgt->reset_success = 0;
7171 		}
7172 
7173 		break;
7174 
7175 	case PMCS_DEVICE_STATE_NON_OPERATIONAL:
7176 		PHY_CHANGED(pwp, pptr);
7177 		RESTART_DISCOVERY(pwp);
7178 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7179 		    "%s: Device at %s is non-operational",
7180 		    __func__, pptr->path);
7181 		tgt->dev_state = PMCS_DEVICE_STATE_NON_OPERATIONAL;
7182 		rc = 0;
7183 
7184 		break;
7185 
7186 	default:
7187 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7188 		    "%s: Invalid state requested (%d)", __func__,
7189 		    dev_state);
7190 		break;
7191 
7192 	}
7193 
7194 no_action:
7195 	tgt->recovering = 0;
7196 	return (rc);
7197 }
7198 
7199 /*
7200  * pmcs_lock_phy_impl
7201  *
7202  * This function is what does the actual work for pmcs_lock_phy.  It will
7203  * lock all PHYs from phyp down in a top-down fashion.
7204  *
7205  * Locking notes:
7206  * 1. level starts from 0 for the PHY ("parent") that's passed in.  It is
7207  * not a reflection of the actual level of the PHY in the SAS topology.
7208  * 2. If parent is an expander, then parent is locked along with all its
7209  * descendents.
7210  * 3. Expander subsidiary PHYs at level 0 are not locked.  It is the
7211  * responsibility of the caller to individually lock expander subsidiary PHYs
7212  * at level 0 if necessary.
7213  * 4. Siblings at level 0 are not traversed due to the possibility that we're
7214  * locking a PHY on the dead list.  The siblings could be pointing to invalid
7215  * PHYs.  We don't lock siblings at level 0 anyway.
7216  */
7217 static void
7218 pmcs_lock_phy_impl(pmcs_phy_t *phyp, int level)
7219 {
7220 	pmcs_phy_t *tphyp;
7221 
7222 	ASSERT((phyp->dtype == SAS) || (phyp->dtype == SATA) ||
7223 	    (phyp->dtype == EXPANDER) || (phyp->dtype == NOTHING));
7224 
7225 	/*
7226 	 * Start walking the PHYs.
7227 	 */
7228 	tphyp = phyp;
7229 	while (tphyp) {
7230 		/*
7231 		 * If we're at the top level, only lock ourselves.  For anything
7232 		 * at level > 0, traverse children while locking everything.
7233 		 */
7234 		if ((level > 0) || (tphyp == phyp)) {
7235 			pmcs_prt(tphyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7236 			    "%s: PHY 0x%p parent 0x%p path %s lvl %d",
7237 			    __func__, (void *)tphyp, (void *)tphyp->parent,
7238 			    tphyp->path, level);
7239 			mutex_enter(&tphyp->phy_lock);
7240 
7241 			if (tphyp->children) {
7242 				pmcs_lock_phy_impl(tphyp->children, level + 1);
7243 			}
7244 		}
7245 
7246 		if (level == 0) {
7247 			return;
7248 		}
7249 
7250 		tphyp = tphyp->sibling;
7251 	}
7252 }
7253 
7254 /*
7255  * pmcs_lock_phy
7256  *
7257  * This function is responsible for locking a PHY and all its descendents
7258  */
7259 void
7260 pmcs_lock_phy(pmcs_phy_t *phyp)
7261 {
7262 #ifdef DEBUG
7263 	char *callername = NULL;
7264 	ulong_t off;
7265 
7266 	ASSERT(phyp != NULL);
7267 
7268 	callername = modgetsymname((uintptr_t)caller(), &off);
7269 
7270 	if (callername == NULL) {
7271 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7272 		    "%s: PHY 0x%p path %s caller: unknown", __func__,
7273 		    (void *)phyp, phyp->path);
7274 	} else {
7275 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7276 		    "%s: PHY 0x%p path %s caller: %s+%lx", __func__,
7277 		    (void *)phyp, phyp->path, callername, off);
7278 	}
7279 #else
7280 	pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7281 	    "%s: PHY 0x%p path %s", __func__, (void *)phyp, phyp->path);
7282 #endif
7283 	pmcs_lock_phy_impl(phyp, 0);
7284 }
7285 
7286 /*
7287  * pmcs_unlock_phy_impl
7288  *
7289  * Unlock all PHYs from phyp down in a bottom-up fashion.
7290  */
7291 static void
7292 pmcs_unlock_phy_impl(pmcs_phy_t *phyp, int level)
7293 {
7294 	pmcs_phy_t *phy_next;
7295 
7296 	ASSERT((phyp->dtype == SAS) || (phyp->dtype == SATA) ||
7297 	    (phyp->dtype == EXPANDER) || (phyp->dtype == NOTHING));
7298 
7299 	/*
7300 	 * Recurse down to the bottom PHYs
7301 	 */
7302 	if (level == 0) {
7303 		if (phyp->children) {
7304 			pmcs_unlock_phy_impl(phyp->children, level + 1);
7305 		}
7306 	} else {
7307 		phy_next = phyp;
7308 		while (phy_next) {
7309 			if (phy_next->children) {
7310 				pmcs_unlock_phy_impl(phy_next->children,
7311 				    level + 1);
7312 			}
7313 			phy_next = phy_next->sibling;
7314 		}
7315 	}
7316 
7317 	/*
7318 	 * Iterate through PHYs unlocking all at level > 0 as well the top PHY
7319 	 */
7320 	phy_next = phyp;
7321 	while (phy_next) {
7322 		if ((level > 0) || (phy_next == phyp)) {
7323 			pmcs_prt(phy_next->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7324 			    "%s: PHY 0x%p parent 0x%p path %s lvl %d",
7325 			    __func__, (void *)phy_next,
7326 			    (void *)phy_next->parent, phy_next->path, level);
7327 			mutex_exit(&phy_next->phy_lock);
7328 		}
7329 
7330 		if (level == 0) {
7331 			return;
7332 		}
7333 
7334 		phy_next = phy_next->sibling;
7335 	}
7336 }
7337 
7338 /*
7339  * pmcs_unlock_phy
7340  *
7341  * Unlock a PHY and all its descendents
7342  */
7343 void
7344 pmcs_unlock_phy(pmcs_phy_t *phyp)
7345 {
7346 #ifdef DEBUG
7347 	char *callername = NULL;
7348 	ulong_t off;
7349 
7350 	ASSERT(phyp != NULL);
7351 
7352 	callername = modgetsymname((uintptr_t)caller(), &off);
7353 
7354 	if (callername == NULL) {
7355 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7356 		    "%s: PHY 0x%p path %s caller: unknown", __func__,
7357 		    (void *)phyp, phyp->path);
7358 	} else {
7359 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7360 		    "%s: PHY 0x%p path %s caller: %s+%lx", __func__,
7361 		    (void *)phyp, phyp->path, callername, off);
7362 	}
7363 #else
7364 	pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7365 	    "%s: PHY 0x%p path %s", __func__, (void *)phyp, phyp->path);
7366 #endif
7367 	pmcs_unlock_phy_impl(phyp, 0);
7368 }
7369 
7370 /*
7371  * pmcs_get_root_phy
7372  *
7373  * For a given phy pointer return its root phy.
7374  * The caller must be holding the lock on every PHY from phyp up to the root.
7375  */
7376 pmcs_phy_t *
7377 pmcs_get_root_phy(pmcs_phy_t *phyp)
7378 {
7379 	ASSERT(phyp);
7380 
7381 	while (phyp) {
7382 		if (IS_ROOT_PHY(phyp)) {
7383 			break;
7384 		}
7385 		phyp = phyp->parent;
7386 	}
7387 
7388 	return (phyp);
7389 }
7390 
7391 /*
7392  * pmcs_free_dma_chunklist
7393  *
7394  * Free DMA S/G chunk list
7395  */
7396 void
7397 pmcs_free_dma_chunklist(pmcs_hw_t *pwp)
7398 {
7399 	pmcs_chunk_t	*pchunk;
7400 
7401 	while (pwp->dma_chunklist) {
7402 		pchunk = pwp->dma_chunklist;
7403 		pwp->dma_chunklist = pwp->dma_chunklist->next;
7404 		if (pchunk->dma_handle) {
7405 			if (ddi_dma_unbind_handle(pchunk->dma_handle) !=
7406 			    DDI_SUCCESS) {
7407 				pmcs_prt(pwp, PMCS_PRT_DEBUG, "Condition failed"
7408 				    " at %s():%d", __func__, __LINE__);
7409 			}
7410 			ddi_dma_free_handle(&pchunk->dma_handle);
7411 			ddi_dma_mem_free(&pchunk->acc_handle);
7412 		}
7413 		kmem_free(pchunk, sizeof (pmcs_chunk_t));
7414 	}
7415 }
7416 
7417 
7418 /*
7419  * Start ssp event recovery. We have to schedule recovery operation because
7420  * it involves sending multiple commands to device and we should not do it
7421  * in the interrupt context.
7422  * If it is failure of a recovery command, let the recovery thread deal with it.
7423  * Called with pmcwork lock held.
7424  */
7425 
7426 void
7427 pmcs_start_ssp_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb,
7428     size_t amt)
7429 {
7430 	pmcs_xscsi_t *tgt = pwrk->xp;
7431 	uint32_t event = LE_32(iomb[2]);
7432 	pmcs_phy_t *pptr = pwrk->phy;
7433 	uint32_t tag;
7434 
7435 	if (tgt != NULL) {
7436 		mutex_enter(&tgt->statlock);
7437 		if (!tgt->assigned) {
7438 			if (pptr) {
7439 				pmcs_dec_phy_ref_count(pptr);
7440 			}
7441 			pptr = NULL;
7442 			pwrk->phy = NULL;
7443 		}
7444 		mutex_exit(&tgt->statlock);
7445 	}
7446 	if (pptr == NULL) {
7447 		/*
7448 		 * No target, need to run RE-DISCOVERY here.
7449 		 */
7450 		if (pwrk->state != PMCS_WORK_STATE_TIMED_OUT) {
7451 			pwrk->state = PMCS_WORK_STATE_INTR;
7452 		}
7453 		/*
7454 		 * Although we cannot mark phy to force abort nor mark phy
7455 		 * as changed, killing of a target would take care of aborting
7456 		 * commands for the device.
7457 		 */
7458 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: No valid target for event "
7459 		    "processing found. Scheduling RECONFIGURE",  __func__);
7460 		pmcs_pwork(pwp, pwrk);
7461 		RESTART_DISCOVERY(pwp);
7462 		return;
7463 	} else {
7464 		pmcs_lock_phy(pptr);
7465 		mutex_enter(&tgt->statlock);
7466 		if (event == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS) {
7467 			if (tgt->dev_state !=
7468 			    PMCS_DEVICE_STATE_NON_OPERATIONAL) {
7469 				pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Device at "
7470 				    "%s is non-operational", __func__,
7471 				    pptr->path);
7472 				tgt->dev_state =
7473 				    PMCS_DEVICE_STATE_NON_OPERATIONAL;
7474 			}
7475 			pptr->abort_pending = 1;
7476 			mutex_exit(&tgt->statlock);
7477 			pmcs_unlock_phy(pptr);
7478 			mutex_exit(&pwrk->lock);
7479 			SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
7480 			RESTART_DISCOVERY(pwp);
7481 			return;
7482 		}
7483 
7484 		/*
7485 		 * If this command is run in WAIT mode, it is a failing recovery
7486 		 * command. If so, just wake up recovery thread waiting for
7487 		 * command completion.
7488 		 */
7489 		tag = PMCS_TAG_TYPE(pwrk->htag);
7490 		if (tag == PMCS_TAG_TYPE_WAIT) {
7491 			pwrk->htag |= PMCS_TAG_DONE;
7492 			if (pwrk->arg && amt) {
7493 				(void) memcpy(pwrk->arg, iomb, amt);
7494 			}
7495 			cv_signal(&pwrk->sleep_cv);
7496 			mutex_exit(&tgt->statlock);
7497 			pmcs_unlock_phy(pptr);
7498 			mutex_exit(&pwrk->lock);
7499 			return;
7500 		}
7501 
7502 		/*
7503 		 * To recover from primary failures,
7504 		 * we need to schedule handling events recovery.
7505 		 */
7506 		tgt->event_recovery = 1;
7507 		mutex_exit(&tgt->statlock);
7508 		pmcs_unlock_phy(pptr);
7509 		pwrk->ssp_event = event;
7510 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
7511 		    "%s: Scheduling SSP event recovery for tgt(0x%p) "
7512 		    "pwrk(%p) tag(0x%x)", __func__, (void *)tgt, (void *)pwrk,
7513 		    pwrk->htag);
7514 		mutex_exit(&pwrk->lock);
7515 		SCHEDULE_WORK(pwp, PMCS_WORK_SSP_EVT_RECOVERY);
7516 	}
7517 
7518 	/* Work cannot be completed until event recovery is completed. */
7519 }
7520 
7521 /*
7522  * SSP target event recovery
7523  * Entered with a phy lock held
7524  * Pwrk lock is not needed - pwrk is on the target aq and no other thread
7525  * will do anything with it until this thread starts the chain of recovery.
7526  * Statlock may be acquired and released.
7527  */
7528 
7529 void
7530 pmcs_tgt_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk)
7531 {
7532 	pmcs_phy_t *pptr = pwrk->phy;
7533 	pmcs_cmd_t *sp = pwrk->arg;
7534 	pmcs_lun_t *lun = sp->cmd_lun;
7535 	pmcs_xscsi_t *tgt = pwrk->xp;
7536 	uint32_t event;
7537 	uint32_t htag;
7538 	uint32_t status;
7539 	uint8_t dstate;
7540 	int rv;
7541 
7542 	ASSERT(pwrk->arg != NULL);
7543 	ASSERT(pwrk->xp != NULL);
7544 	pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: event recovery for "
7545 	    "target 0x%p", __func__, (void *)pwrk->xp);
7546 	htag = pwrk->htag;
7547 	event = pwrk->ssp_event;
7548 	pwrk->ssp_event = 0xffffffff;
7549 	if (event == PMCOUT_STATUS_XFER_ERR_BREAK ||
7550 	    event == PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY ||
7551 	    event == PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT) {
7552 		/* Command may be still pending on device */
7553 		rv = pmcs_ssp_tmf(pwp, pptr, SAS_QUERY_TASK, htag,
7554 		    lun->lun_num, &status);
7555 		if (rv != 0) {
7556 			goto out;
7557 		}
7558 		if (status == SAS_RSP_TMF_COMPLETE) {
7559 			/* Command NOT pending on a device */
7560 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
7561 			    "%s: No pending command for tgt 0x%p",
7562 			    __func__, (void *)tgt);
7563 			/* Nothing more to do, just abort it on chip */
7564 			htag = 0;
7565 		}
7566 	}
7567 	/*
7568 	 * All other events left the command pending in the host
7569 	 * Send abort task and abort it on the chip
7570 	 */
7571 	if (htag != 0) {
7572 		if (pmcs_ssp_tmf(pwp, pptr, SAS_ABORT_TASK, htag,
7573 		    lun->lun_num, &status))
7574 			goto out;
7575 	}
7576 	(void) pmcs_abort(pwp, pptr, pwrk->htag, 0, 1);
7577 	/*
7578 	 * Abort either took care of work completion, or put device in
7579 	 * a recovery state
7580 	 */
7581 	return;
7582 out:
7583 	/* Abort failed, do full device recovery */
7584 	mutex_enter(&tgt->statlock);
7585 	if (!pmcs_get_dev_state(pwp, tgt, &dstate))
7586 		tgt->dev_state = dstate;
7587 
7588 	if ((tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) &&
7589 	    (tgt->dev_state != PMCS_DEVICE_STATE_NON_OPERATIONAL)) {
7590 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
7591 		    "%s: Setting IN_RECOVERY for tgt 0x%p",
7592 		    __func__, (void *)tgt);
7593 		(void) pmcs_send_err_recovery_cmd(pwp,
7594 		    PMCS_DEVICE_STATE_IN_RECOVERY, tgt);
7595 	}
7596 	mutex_exit(&tgt->statlock);
7597 }
7598 
7599 /*
7600  * SSP event recovery task.
7601  */
7602 void
7603 pmcs_ssp_event_recovery(pmcs_hw_t *pwp)
7604 {
7605 	int idx;
7606 	pmcs_xscsi_t *tgt;
7607 	pmcs_cmd_t *cp;
7608 	pmcwork_t *pwrk;
7609 	pmcs_phy_t *pphy;
7610 	int er_flag;
7611 	uint32_t idxpwrk;
7612 
7613 restart:
7614 	for (idx = 0; idx < pwp->max_dev; idx++) {
7615 		mutex_enter(&pwp->lock);
7616 		tgt = pwp->targets[idx];
7617 		mutex_exit(&pwp->lock);
7618 		if (tgt != NULL) {
7619 			mutex_enter(&tgt->statlock);
7620 			if (!tgt->assigned) {
7621 				mutex_exit(&tgt->statlock);
7622 				continue;
7623 			}
7624 			pphy = tgt->phy;
7625 			er_flag = tgt->event_recovery;
7626 			mutex_exit(&tgt->statlock);
7627 			if (pphy != NULL && er_flag != 0) {
7628 				pmcs_lock_phy(pphy);
7629 				mutex_enter(&tgt->statlock);
7630 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7631 				    "%s: found target(0x%p)", __func__,
7632 				    (void *) tgt);
7633 
7634 				/* Check what cmd expects recovery */
7635 				mutex_enter(&tgt->aqlock);
7636 				STAILQ_FOREACH(cp, &tgt->aq, cmd_next) {
7637 					/*
7638 					 * Since work structure is on this
7639 					 * target aq, and only this thread
7640 					 * is accessing it now, we do not need
7641 					 * to lock it
7642 					 */
7643 					idxpwrk = PMCS_TAG_INDEX(cp->cmd_tag);
7644 					pwrk = &pwp->work[idxpwrk];
7645 					if (pwrk->htag != cp->cmd_tag) {
7646 						/*
7647 						 * aq may contain TMF commands,
7648 						 * so we may not find work
7649 						 * structure with htag
7650 						 */
7651 						break;
7652 					}
7653 					if (pwrk->ssp_event != 0 &&
7654 					    pwrk->ssp_event !=
7655 					    PMCS_REC_EVENT) {
7656 						pmcs_prt(pwp,
7657 						    PMCS_PRT_DEBUG,
7658 						    "%s: pwrk(%p) ctag(0x%x)",
7659 						    __func__, (void *) pwrk,
7660 						    cp->cmd_tag);
7661 						mutex_exit(&tgt->aqlock);
7662 						mutex_exit(&tgt->statlock);
7663 						pmcs_tgt_event_recovery(
7664 						    pwp, pwrk);
7665 						/*
7666 						 * We dropped statlock, so
7667 						 * restart scanning from scratch
7668 						 */
7669 						pmcs_unlock_phy(pphy);
7670 						goto restart;
7671 					}
7672 				}
7673 				mutex_exit(&tgt->aqlock);
7674 				tgt->event_recovery = 0;
7675 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7676 				    "%s: end of SSP event recovery for "
7677 				    "target(0x%p)", __func__, (void *) tgt);
7678 				mutex_exit(&tgt->statlock);
7679 				pmcs_unlock_phy(pphy);
7680 			}
7681 		}
7682 	}
7683 	pmcs_prt(pwp, PMCS_PRT_DEBUG,
7684 	    "%s: end of SSP event recovery for pwp(0x%p)", __func__,
7685 	    (void *) pwp);
7686 }
7687 
7688 /*ARGSUSED2*/
7689 int
7690 pmcs_phy_constructor(void *buf, void *arg, int kmflags)
7691 {
7692 	pmcs_hw_t *pwp = (pmcs_hw_t *)arg;
7693 	pmcs_phy_t *phyp = (pmcs_phy_t *)buf;
7694 
7695 	mutex_init(&phyp->phy_lock, NULL, MUTEX_DRIVER,
7696 	    DDI_INTR_PRI(pwp->intr_pri));
7697 	cv_init(&phyp->abort_all_cv, NULL, CV_DRIVER, NULL);
7698 	return (0);
7699 }
7700 
7701 /*ARGSUSED1*/
7702 void
7703 pmcs_phy_destructor(void *buf, void *arg)
7704 {
7705 	pmcs_phy_t *phyp = (pmcs_phy_t *)buf;
7706 
7707 	cv_destroy(&phyp->abort_all_cv);
7708 	mutex_destroy(&phyp->phy_lock);
7709 }
7710 
7711 /*
7712  * Free all PHYs from the kmem_cache starting at phyp as well as everything
7713  * on the dead_phys list.
7714  *
7715  * NOTE: This function does not free root PHYs as they are not allocated
7716  * from the kmem_cache.
7717  *
7718  * No PHY locks are acquired as this should only be called during DDI_DETACH
7719  * or soft reset (while pmcs interrupts are disabled).
7720  */
7721 void
7722 pmcs_free_all_phys(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
7723 {
7724 	pmcs_phy_t *tphyp, *nphyp;
7725 
7726 	if (phyp == NULL) {
7727 		return;
7728 	}
7729 
7730 	tphyp = phyp;
7731 	while (tphyp) {
7732 		nphyp = tphyp->sibling;
7733 
7734 		if (tphyp->children) {
7735 			pmcs_free_all_phys(pwp, tphyp->children);
7736 			tphyp->children = NULL;
7737 		}
7738 		if (!IS_ROOT_PHY(tphyp)) {
7739 			kmem_cache_free(pwp->phy_cache, tphyp);
7740 		}
7741 
7742 		tphyp = nphyp;
7743 	}
7744 
7745 	tphyp = pwp->dead_phys;
7746 	while (tphyp) {
7747 		nphyp = tphyp->sibling;
7748 		kmem_cache_free(pwp->phy_cache, tphyp);
7749 		tphyp = nphyp;
7750 	}
7751 	pwp->dead_phys = NULL;
7752 }
7753 
7754 /*
7755  * Free a list of PHYs linked together by the sibling pointer back to the
7756  * kmem cache from whence they came.  This function does not recurse, so the
7757  * caller must ensure there are no children.
7758  */
7759 void
7760 pmcs_free_phys(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
7761 {
7762 	pmcs_phy_t *next_phy;
7763 
7764 	while (phyp) {
7765 		next_phy = phyp->sibling;
7766 		ASSERT(!mutex_owned(&phyp->phy_lock));
7767 		kmem_cache_free(pwp->phy_cache, phyp);
7768 		phyp = next_phy;
7769 	}
7770 }
7771 
7772 /*
7773  * Make a copy of an existing PHY structure.  This is used primarily in
7774  * discovery to compare the contents of an existing PHY with what gets
7775  * reported back by an expander.
7776  *
7777  * This function must not be called from any context where sleeping is
7778  * not possible.
7779  *
7780  * The new PHY is returned unlocked.
7781  */
7782 static pmcs_phy_t *
7783 pmcs_clone_phy(pmcs_phy_t *orig_phy)
7784 {
7785 	pmcs_phy_t *local;
7786 
7787 	local = kmem_cache_alloc(orig_phy->pwp->phy_cache, KM_SLEEP);
7788 
7789 	/*
7790 	 * Go ahead and just copy everything...
7791 	 */
7792 	*local = *orig_phy;
7793 
7794 	/*
7795 	 * But the following must be set appropriately for this copy
7796 	 */
7797 	local->sibling = NULL;
7798 	local->children = NULL;
7799 	mutex_init(&local->phy_lock, NULL, MUTEX_DRIVER,
7800 	    DDI_INTR_PRI(orig_phy->pwp->intr_pri));
7801 
7802 	return (local);
7803 }
7804 
7805 int
7806 pmcs_check_acc_handle(ddi_acc_handle_t handle)
7807 {
7808 	ddi_fm_error_t de;
7809 
7810 	if (handle == NULL) {
7811 		return (DDI_FAILURE);
7812 	}
7813 	ddi_fm_acc_err_get(handle, &de, DDI_FME_VER0);
7814 	return (de.fme_status);
7815 }
7816 
7817 int
7818 pmcs_check_dma_handle(ddi_dma_handle_t handle)
7819 {
7820 	ddi_fm_error_t de;
7821 
7822 	if (handle == NULL) {
7823 		return (DDI_FAILURE);
7824 	}
7825 	ddi_fm_dma_err_get(handle, &de, DDI_FME_VER0);
7826 	return (de.fme_status);
7827 }
7828 
7829 
7830 void
7831 pmcs_fm_ereport(pmcs_hw_t *pwp, char *detail)
7832 {
7833 	uint64_t ena;
7834 	char buf[FM_MAX_CLASS];
7835 
7836 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
7837 	ena = fm_ena_generate(0, FM_ENA_FMT1);
7838 	if (DDI_FM_EREPORT_CAP(pwp->fm_capabilities)) {
7839 		ddi_fm_ereport_post(pwp->dip, buf, ena, DDI_NOSLEEP,
7840 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
7841 	}
7842 }
7843 
7844 int
7845 pmcs_check_acc_dma_handle(pmcs_hw_t *pwp)
7846 {
7847 	pmcs_chunk_t *pchunk;
7848 	int i;
7849 
7850 	/* check all acc & dma handles allocated in attach */
7851 	if ((pmcs_check_acc_handle(pwp->pci_acc_handle) != DDI_SUCCESS) ||
7852 	    (pmcs_check_acc_handle(pwp->msg_acc_handle) != DDI_SUCCESS) ||
7853 	    (pmcs_check_acc_handle(pwp->top_acc_handle) != DDI_SUCCESS) ||
7854 	    (pmcs_check_acc_handle(pwp->mpi_acc_handle) != DDI_SUCCESS) ||
7855 	    (pmcs_check_acc_handle(pwp->gsm_acc_handle) != DDI_SUCCESS)) {
7856 		goto check_failed;
7857 	}
7858 
7859 	for (i = 0; i < PMCS_NIQ; i++) {
7860 		if ((pmcs_check_dma_handle(
7861 		    pwp->iqp_handles[i]) != DDI_SUCCESS) ||
7862 		    (pmcs_check_acc_handle(
7863 		    pwp->iqp_acchdls[i]) != DDI_SUCCESS)) {
7864 			goto check_failed;
7865 		}
7866 	}
7867 
7868 	for (i = 0; i < PMCS_NOQ; i++) {
7869 		if ((pmcs_check_dma_handle(
7870 		    pwp->oqp_handles[i]) != DDI_SUCCESS) ||
7871 		    (pmcs_check_acc_handle(
7872 		    pwp->oqp_acchdls[i]) != DDI_SUCCESS)) {
7873 			goto check_failed;
7874 		}
7875 	}
7876 
7877 	if ((pmcs_check_dma_handle(pwp->cip_handles) != DDI_SUCCESS) ||
7878 	    (pmcs_check_acc_handle(pwp->cip_acchdls) != DDI_SUCCESS)) {
7879 		goto check_failed;
7880 	}
7881 
7882 	if (pwp->fwlog &&
7883 	    ((pmcs_check_dma_handle(pwp->fwlog_hndl) != DDI_SUCCESS) ||
7884 	    (pmcs_check_acc_handle(pwp->fwlog_acchdl) != DDI_SUCCESS))) {
7885 		goto check_failed;
7886 	}
7887 
7888 	if (pwp->regdump_hndl && pwp->regdump_acchdl &&
7889 	    ((pmcs_check_dma_handle(pwp->regdump_hndl) != DDI_SUCCESS) ||
7890 	    (pmcs_check_acc_handle(pwp->regdump_acchdl)
7891 	    != DDI_SUCCESS))) {
7892 		goto check_failed;
7893 	}
7894 
7895 
7896 	pchunk = pwp->dma_chunklist;
7897 	while (pchunk) {
7898 		if ((pmcs_check_acc_handle(pchunk->acc_handle)
7899 		    != DDI_SUCCESS) ||
7900 		    (pmcs_check_dma_handle(pchunk->dma_handle)
7901 		    != DDI_SUCCESS)) {
7902 			goto check_failed;
7903 		}
7904 		pchunk = pchunk->next;
7905 	}
7906 
7907 	return (0);
7908 
7909 check_failed:
7910 
7911 	return (1);
7912 }
7913 
7914 /*
7915  * pmcs_handle_dead_phys
7916  *
7917  * If the PHY has no outstanding work associated with it, remove it from
7918  * the dead PHY list and free it.
7919  *
7920  * If pwp->ds_err_recovering or pwp->configuring is set, don't run.
7921  * This keeps routines that need to submit work to the chip from having to
7922  * hold PHY locks to ensure that PHYs don't disappear while they do their work.
7923  */
7924 void
7925 pmcs_handle_dead_phys(pmcs_hw_t *pwp)
7926 {
7927 	pmcs_phy_t *phyp, *nphyp, *pphyp;
7928 
7929 	mutex_enter(&pwp->lock);
7930 	mutex_enter(&pwp->config_lock);
7931 
7932 	if (pwp->configuring | pwp->ds_err_recovering) {
7933 		mutex_exit(&pwp->config_lock);
7934 		mutex_exit(&pwp->lock);
7935 		return;
7936 	}
7937 
7938 	/*
7939 	 * Check every PHY in the dead PHY list
7940 	 */
7941 	mutex_enter(&pwp->dead_phylist_lock);
7942 	phyp = pwp->dead_phys;
7943 	pphyp = NULL;	/* Set previous PHY to NULL */
7944 
7945 	while (phyp != NULL) {
7946 		pmcs_lock_phy(phyp);
7947 		ASSERT(phyp->dead);
7948 
7949 		nphyp = phyp->dead_next;
7950 
7951 		/*
7952 		 * Check for outstanding work
7953 		 */
7954 		if (phyp->ref_count > 0) {
7955 			pmcs_unlock_phy(phyp);
7956 			pphyp = phyp;	/* This PHY becomes "previous" */
7957 		} else if (phyp->target) {
7958 			pmcs_unlock_phy(phyp);
7959 			pmcs_prt(pwp, PMCS_PRT_DEBUG1,
7960 			    "%s: Not freeing PHY 0x%p: target 0x%p is not free",
7961 			    __func__, (void *)phyp, (void *)phyp->target);
7962 			pphyp = phyp;
7963 		} else {
7964 			/*
7965 			 * No outstanding work or target references. Remove it
7966 			 * from the list and free it
7967 			 */
7968 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
7969 			    "%s: Freeing inactive dead PHY 0x%p @ %s "
7970 			    "target = 0x%p", __func__, (void *)phyp,
7971 			    phyp->path, (void *)phyp->target);
7972 			/*
7973 			 * If pphyp is NULL, then phyp was the head of the list,
7974 			 * so just reset the head to nphyp. Otherwise, the
7975 			 * previous PHY will now point to nphyp (the next PHY)
7976 			 */
7977 			if (pphyp == NULL) {
7978 				pwp->dead_phys = nphyp;
7979 			} else {
7980 				pphyp->dead_next = nphyp;
7981 			}
7982 			/*
7983 			 * If the target still points to this PHY, remove
7984 			 * that linkage now.
7985 			 */
7986 			if (phyp->target) {
7987 				mutex_enter(&phyp->target->statlock);
7988 				if (phyp->target->phy == phyp) {
7989 					phyp->target->phy = NULL;
7990 				}
7991 				mutex_exit(&phyp->target->statlock);
7992 			}
7993 			pmcs_unlock_phy(phyp);
7994 			kmem_cache_free(pwp->phy_cache, phyp);
7995 		}
7996 
7997 		phyp = nphyp;
7998 	}
7999 
8000 	mutex_exit(&pwp->dead_phylist_lock);
8001 	mutex_exit(&pwp->config_lock);
8002 	mutex_exit(&pwp->lock);
8003 }
8004 
8005 void
8006 pmcs_inc_phy_ref_count(pmcs_phy_t *phyp)
8007 {
8008 	atomic_inc_32(&phyp->ref_count);
8009 }
8010 
8011 void
8012 pmcs_dec_phy_ref_count(pmcs_phy_t *phyp)
8013 {
8014 	ASSERT(phyp->ref_count != 0);
8015 	atomic_dec_32(&phyp->ref_count);
8016 }
8017 
8018 /*
8019  * pmcs_reap_dead_phy
8020  *
8021  * This function is called from pmcs_new_tport when we have a PHY
8022  * without a target pointer.  It's possible in that case that this PHY
8023  * may have a "brother" on the dead_phys list.  That is, it may be the same as
8024  * this one but with a different root PHY number (e.g. pp05 vs. pp04).  If
8025  * that's the case, update the dead PHY and this new PHY.  If that's not the
8026  * case, we should get a tran_tgt_init on this after it's reported to SCSA.
8027  *
8028  * Called with PHY locked.
8029  */
8030 static void
8031 pmcs_reap_dead_phy(pmcs_phy_t *phyp)
8032 {
8033 	pmcs_hw_t *pwp = phyp->pwp;
8034 	pmcs_phy_t *ctmp;
8035 
8036 	ASSERT(mutex_owned(&phyp->phy_lock));
8037 
8038 	/*
8039 	 * Check the dead PHYs list
8040 	 */
8041 	mutex_enter(&pwp->dead_phylist_lock);
8042 	ctmp = pwp->dead_phys;
8043 	while (ctmp) {
8044 		if ((ctmp->iport != phyp->iport) ||
8045 		    (memcmp((void *)&ctmp->sas_address[0],
8046 		    (void *)&phyp->sas_address[0], 8))) {
8047 			ctmp = ctmp->dead_next;
8048 			continue;
8049 		}
8050 
8051 		/*
8052 		 * Same SAS address on same iport.  Now check to see if
8053 		 * the PHY path is the same with the possible exception
8054 		 * of the root PHY number.
8055 		 * The "5" is the string length of "pp00."
8056 		 */
8057 		if ((strnlen(phyp->path, 5) >= 5) &&
8058 		    (strnlen(ctmp->path, 5) >= 5)) {
8059 			if (memcmp((void *)&phyp->path[5],
8060 			    (void *)&ctmp->path[5],
8061 			    strnlen(phyp->path, 32) - 5) == 0) {
8062 				break;
8063 			}
8064 		}
8065 
8066 		ctmp = ctmp->dead_next;
8067 	}
8068 	mutex_exit(&pwp->dead_phylist_lock);
8069 
8070 	/*
8071 	 * Found a match.  Remove the target linkage and drop the
8072 	 * ref count on the old PHY.  Then, increment the ref count
8073 	 * on the new PHY to compensate.
8074 	 */
8075 	if (ctmp) {
8076 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
8077 		    "%s: Found match in dead PHY list for new PHY %s",
8078 		    __func__, phyp->path);
8079 		if (ctmp->target) {
8080 			/*
8081 			 * If there is a pointer to the target in the dead
8082 			 * PHY, and that PHY's ref_count drops to 0, we can
8083 			 * clear the target linkage now.  If the PHY's
8084 			 * ref_count is > 1, then there may be multiple
8085 			 * LUNs still remaining, so leave the linkage.
8086 			 */
8087 			pmcs_inc_phy_ref_count(phyp);
8088 			pmcs_dec_phy_ref_count(ctmp);
8089 			phyp->target = ctmp->target;
8090 			/*
8091 			 * Update the target's linkage as well
8092 			 */
8093 			mutex_enter(&phyp->target->statlock);
8094 			phyp->target->phy = phyp;
8095 			phyp->target->dtype = phyp->dtype;
8096 			mutex_exit(&phyp->target->statlock);
8097 
8098 			if (ctmp->ref_count == 0) {
8099 				ctmp->target = NULL;
8100 			}
8101 		}
8102 	}
8103 }
8104 
8105 /*
8106  * Called with iport lock held
8107  */
8108 void
8109 pmcs_add_phy_to_iport(pmcs_iport_t *iport, pmcs_phy_t *phyp)
8110 {
8111 	ASSERT(mutex_owned(&iport->lock));
8112 	ASSERT(phyp);
8113 	ASSERT(!list_link_active(&phyp->list_node));
8114 	iport->nphy++;
8115 	pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS,
8116 	    &iport->nphy);
8117 	list_insert_tail(&iport->phys, phyp);
8118 	mutex_enter(&iport->refcnt_lock);
8119 	iport->refcnt++;
8120 	mutex_exit(&iport->refcnt_lock);
8121 }
8122 
8123 /*
8124  * Called with the iport lock held
8125  */
8126 void
8127 pmcs_remove_phy_from_iport(pmcs_iport_t *iport, pmcs_phy_t *phyp)
8128 {
8129 	pmcs_phy_t *pptr, *next_pptr;
8130 
8131 	ASSERT(mutex_owned(&iport->lock));
8132 
8133 	/*
8134 	 * If phyp is NULL, remove all PHYs from the iport
8135 	 */
8136 	if (phyp == NULL) {
8137 		for (pptr = list_head(&iport->phys); pptr != NULL;
8138 		    pptr = next_pptr) {
8139 			next_pptr = list_next(&iport->phys, pptr);
8140 			mutex_enter(&pptr->phy_lock);
8141 			pptr->iport = NULL;
8142 			mutex_exit(&pptr->phy_lock);
8143 			pmcs_rele_iport(iport);
8144 			list_remove(&iport->phys, pptr);
8145 		}
8146 		iport->nphy = 0;
8147 		return;
8148 	}
8149 
8150 	ASSERT(phyp);
8151 	ASSERT(iport->nphy > 0);
8152 	ASSERT(list_link_active(&phyp->list_node));
8153 	iport->nphy--;
8154 	pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS,
8155 	    &iport->nphy);
8156 	list_remove(&iport->phys, phyp);
8157 	pmcs_rele_iport(iport);
8158 }
8159 
8160 /*
8161  * This function checks to see if the target pointed to by phyp is still
8162  * correct.  This is done by comparing the target's unit address with the
8163  * SAS address in phyp.
8164  *
8165  * Called with PHY locked and target statlock held
8166  */
8167 static boolean_t
8168 pmcs_phy_target_match(pmcs_phy_t *phyp)
8169 {
8170 	uint64_t wwn;
8171 	char unit_address[PMCS_MAX_UA_SIZE];
8172 	boolean_t rval = B_FALSE;
8173 
8174 	ASSERT(phyp);
8175 	ASSERT(phyp->target);
8176 	ASSERT(mutex_owned(&phyp->phy_lock));
8177 	ASSERT(mutex_owned(&phyp->target->statlock));
8178 
8179 	wwn = pmcs_barray2wwn(phyp->sas_address);
8180 	(void) scsi_wwn_to_wwnstr(wwn, 1, unit_address);
8181 
8182 	if (memcmp((void *)unit_address, (void *)phyp->target->unit_address,
8183 	    strnlen(phyp->target->unit_address, PMCS_MAX_UA_SIZE)) == 0) {
8184 		rval = B_TRUE;
8185 	}
8186 
8187 	return (rval);
8188 }
8189 
8190 void
8191 pmcs_start_dev_state_recovery(pmcs_xscsi_t *xp, pmcs_phy_t *phyp)
8192 {
8193 	ASSERT(mutex_owned(&xp->statlock));
8194 	ASSERT(xp->pwp != NULL);
8195 
8196 	if (xp->recover_wait == 0) {
8197 		pmcs_prt(xp->pwp, PMCS_PRT_DEBUG_DEV_STATE,
8198 		    "%s: Start ds_recovery for tgt 0x%p/PHY 0x%p (%s)",
8199 		    __func__, (void *)xp, (void *)phyp, phyp->path);
8200 		xp->recover_wait = 1;
8201 
8202 		/*
8203 		 * Rather than waiting for the watchdog timer, we'll
8204 		 * kick it right now.
8205 		 */
8206 		SCHEDULE_WORK(xp->pwp, PMCS_WORK_DS_ERR_RECOVERY);
8207 		(void) ddi_taskq_dispatch(xp->pwp->tq, pmcs_worker, xp->pwp,
8208 		    DDI_NOSLEEP);
8209 	}
8210 }
8211 
8212 /*
8213  * Increment the phy ds error retry count.
8214  * If too many retries, mark phy dead and restart discovery;
8215  * otherwise schedule ds recovery.
8216  */
8217 static void
8218 pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp, pmcs_xscsi_t *tgt,
8219     pmcs_hw_t *pwp, const char *func_name, int line, char *reason_string)
8220 {
8221 	ASSERT(mutex_owned(&phyp->phy_lock));
8222 
8223 	phyp->ds_recovery_retries++;
8224 
8225 	if (phyp->ds_recovery_retries > PMCS_MAX_DS_RECOVERY_RETRIES) {
8226 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
8227 		    "%s: retry limit reached after %s to PHY %s failed",
8228 		    func_name, reason_string, phyp->path);
8229 		tgt->recover_wait = 0;
8230 		phyp->dead = 1;
8231 		PHY_CHANGED_AT_LOCATION(pwp, phyp, func_name, line);
8232 		RESTART_DISCOVERY(pwp);
8233 	} else {
8234 		SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
8235 	}
8236 }
8237