xref: /illumos-gate/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c (revision 4c06356b0f0fffb4fc1b6eccc8e5d8e2254a84d6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  *
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This file contains various support routines.
28  */
29 
30 #include <sys/scsi/adapters/pmcs/pmcs.h>
31 
32 /*
33  * Local static data
34  */
35 static int tgtmap_usec = MICROSEC;
36 
37 /*
38  * SAS Topology Configuration
39  */
40 static void pmcs_new_tport(pmcs_hw_t *, pmcs_phy_t *);
41 static void pmcs_configure_expander(pmcs_hw_t *, pmcs_phy_t *, pmcs_iport_t *);
42 
43 static boolean_t pmcs_check_expanders(pmcs_hw_t *, pmcs_phy_t *);
44 static void pmcs_check_expander(pmcs_hw_t *, pmcs_phy_t *);
45 static void pmcs_clear_expander(pmcs_hw_t *, pmcs_phy_t *, int);
46 
47 static int pmcs_expander_get_nphy(pmcs_hw_t *, pmcs_phy_t *);
48 static int pmcs_expander_content_discover(pmcs_hw_t *, pmcs_phy_t *,
49     pmcs_phy_t *);
50 
51 static int pmcs_smp_function_result(pmcs_hw_t *, smp_response_frame_t *);
52 static boolean_t pmcs_validate_devid(pmcs_phy_t *, pmcs_phy_t *, uint32_t);
53 static void pmcs_clear_phys(pmcs_hw_t *, pmcs_phy_t *);
54 static int pmcs_configure_new_devices(pmcs_hw_t *, pmcs_phy_t *);
55 static boolean_t pmcs_report_observations(pmcs_hw_t *);
56 static boolean_t pmcs_report_iport_observations(pmcs_hw_t *, pmcs_iport_t *,
57     pmcs_phy_t *);
58 static pmcs_phy_t *pmcs_find_phy_needing_work(pmcs_hw_t *, pmcs_phy_t *);
59 static int pmcs_kill_devices(pmcs_hw_t *, pmcs_phy_t *);
60 static void pmcs_lock_phy_impl(pmcs_phy_t *, int);
61 static void pmcs_unlock_phy_impl(pmcs_phy_t *, int);
62 static pmcs_phy_t *pmcs_clone_phy(pmcs_phy_t *);
63 static boolean_t pmcs_configure_phy(pmcs_hw_t *, pmcs_phy_t *);
64 static void pmcs_reap_dead_phy(pmcs_phy_t *);
65 static pmcs_iport_t *pmcs_get_iport_by_ua(pmcs_hw_t *, char *);
66 static boolean_t pmcs_phy_target_match(pmcs_phy_t *);
67 static void pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp,
68     pmcs_xscsi_t *tgt, pmcs_hw_t *pwp, const char *func_name, int line,
69     char *reason_string);
70 
71 /*
72  * Often used strings
73  */
74 const char pmcs_nowrk[] = "%s: unable to get work structure";
75 const char pmcs_nomsg[] = "%s: unable to get Inbound Message entry";
76 const char pmcs_timeo[] = "!%s: command timed out";
77 
78 extern const ddi_dma_attr_t pmcs_dattr;
79 
80 /*
81  * Some Initial setup steps.
82  */
83 
84 int
85 pmcs_setup(pmcs_hw_t *pwp)
86 {
87 	uint32_t barval = pwp->mpibar;
88 	uint32_t i, scratch, regbar, regoff, barbar, baroff;
89 	uint32_t new_ioq_depth, ferr = 0;
90 
91 	/*
92 	 * Check current state. If we're not at READY state,
93 	 * we can't go further.
94 	 */
95 	scratch = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
96 	if ((scratch & PMCS_MSGU_AAP_STATE_MASK) == PMCS_MSGU_AAP_STATE_ERROR) {
97 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: AAP Error State (0x%x)",
98 		    __func__, pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
99 		    PMCS_MSGU_AAP_ERROR_MASK);
100 		pmcs_fm_ereport(pwp, DDI_FM_DEVICE_INVAL_STATE);
101 		ddi_fm_service_impact(pwp->dip, DDI_SERVICE_LOST);
102 		return (-1);
103 	}
104 	if ((scratch & PMCS_MSGU_AAP_STATE_MASK) != PMCS_MSGU_AAP_STATE_READY) {
105 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
106 		    "%s: AAP unit not ready (state 0x%x)",
107 		    __func__, scratch & PMCS_MSGU_AAP_STATE_MASK);
108 		pmcs_fm_ereport(pwp, DDI_FM_DEVICE_INVAL_STATE);
109 		ddi_fm_service_impact(pwp->dip, DDI_SERVICE_LOST);
110 		return (-1);
111 	}
112 
113 	/*
114 	 * Read the offset from the Message Unit scratchpad 0 register.
115 	 * This allows us to read the MPI Configuration table.
116 	 *
117 	 * Check its signature for validity.
118 	 */
119 	baroff = barval;
120 	barbar = barval >> PMCS_MSGU_MPI_BAR_SHIFT;
121 	baroff &= PMCS_MSGU_MPI_OFFSET_MASK;
122 
123 	regoff = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH0);
124 	regbar = regoff >> PMCS_MSGU_MPI_BAR_SHIFT;
125 	regoff &= PMCS_MSGU_MPI_OFFSET_MASK;
126 
127 	if (regoff > baroff) {
128 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad MPI Table Length "
129 		    "(register offset=0x%08x, passed offset=0x%08x)", __func__,
130 		    regoff, baroff);
131 		return (-1);
132 	}
133 	if (regbar != barbar) {
134 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad MPI BAR (register "
135 		    "BAROFF=0x%08x, passed BAROFF=0x%08x)", __func__,
136 		    regbar, barbar);
137 		return (-1);
138 	}
139 	pwp->mpi_offset = regoff;
140 	if (pmcs_rd_mpi_tbl(pwp, PMCS_MPI_AS) != PMCS_SIGNATURE) {
141 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
142 		    "%s: Bad MPI Configuration Table Signature 0x%x", __func__,
143 		    pmcs_rd_mpi_tbl(pwp, PMCS_MPI_AS));
144 		return (-1);
145 	}
146 
147 	if (pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IR) != PMCS_MPI_REVISION1) {
148 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
149 		    "%s: Bad MPI Configuration Revision 0x%x", __func__,
150 		    pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IR));
151 		return (-1);
152 	}
153 
154 	/*
155 	 * Generate offsets for the General System, Inbound Queue Configuration
156 	 * and Outbound Queue configuration tables. This way the macros to
157 	 * access those tables will work correctly.
158 	 */
159 	pwp->mpi_gst_offset =
160 	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_GSTO);
161 	pwp->mpi_iqc_offset =
162 	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IQCTO);
163 	pwp->mpi_oqc_offset =
164 	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_OQCTO);
165 
166 	pwp->fw = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_FW);
167 
168 	pwp->max_cmd = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_MOIO);
169 	pwp->max_dev = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO0) >> 16;
170 
171 	pwp->max_iq = PMCS_MNIQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
172 	pwp->max_oq = PMCS_MNOQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
173 	pwp->nphy = PMCS_NPHY(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
174 	if (pwp->max_iq <= PMCS_NIQ) {
175 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: not enough Inbound Queues "
176 		    "supported (need %d, max_oq=%d)", __func__, pwp->max_iq,
177 		    PMCS_NIQ);
178 		return (-1);
179 	}
180 	if (pwp->max_oq <= PMCS_NOQ) {
181 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: not enough Outbound Queues "
182 		    "supported (need %d, max_oq=%d)", __func__, pwp->max_oq,
183 		    PMCS_NOQ);
184 		return (-1);
185 	}
186 	if (pwp->nphy == 0) {
187 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: zero phys reported",
188 		    __func__);
189 		return (-1);
190 	}
191 	if (PMCS_HPIQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1))) {
192 		pwp->hipri_queue = (1 << PMCS_IQ_OTHER);
193 	}
194 
195 
196 	for (i = 0; i < pwp->nphy; i++) {
197 		PMCS_MPI_EVQSET(pwp, PMCS_OQ_EVENTS, i);
198 		PMCS_MPI_NCQSET(pwp, PMCS_OQ_EVENTS, i);
199 	}
200 
201 	pmcs_wr_mpi_tbl(pwp, PMCS_MPI_INFO2,
202 	    (PMCS_OQ_EVENTS << GENERAL_EVENT_OQ_SHIFT) |
203 	    (PMCS_OQ_EVENTS << DEVICE_HANDLE_REMOVED_SHIFT));
204 
205 	/*
206 	 * Verify that ioq_depth is valid (> 0 and not so high that it
207 	 * would cause us to overrun the chip with commands).
208 	 */
209 	if (pwp->ioq_depth == 0) {
210 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
211 		    "%s: I/O queue depth set to 0. Setting to %d",
212 		    __func__, PMCS_NQENTRY);
213 		pwp->ioq_depth = PMCS_NQENTRY;
214 	}
215 
216 	if (pwp->ioq_depth < PMCS_MIN_NQENTRY) {
217 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
218 		    "%s: I/O queue depth set too low (%d). Setting to %d",
219 		    __func__, pwp->ioq_depth, PMCS_MIN_NQENTRY);
220 		pwp->ioq_depth = PMCS_MIN_NQENTRY;
221 	}
222 
223 	if (pwp->ioq_depth > (pwp->max_cmd / (PMCS_IO_IQ_MASK + 1))) {
224 		new_ioq_depth = pwp->max_cmd / (PMCS_IO_IQ_MASK + 1);
225 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
226 		    "%s: I/O queue depth set too high (%d). Setting to %d",
227 		    __func__, pwp->ioq_depth, new_ioq_depth);
228 		pwp->ioq_depth = new_ioq_depth;
229 	}
230 
231 	/*
232 	 * Allocate consistent memory for OQs and IQs.
233 	 */
234 	pwp->iqp_dma_attr = pwp->oqp_dma_attr = pmcs_dattr;
235 	pwp->iqp_dma_attr.dma_attr_align =
236 	    pwp->oqp_dma_attr.dma_attr_align = PMCS_QENTRY_SIZE;
237 
238 	/*
239 	 * The Rev C chip has the ability to do PIO to or from consistent
240 	 * memory anywhere in a 64 bit address space, but the firmware is
241 	 * not presently set up to do so.
242 	 */
243 	pwp->iqp_dma_attr.dma_attr_addr_hi =
244 	    pwp->oqp_dma_attr.dma_attr_addr_hi = 0x000000FFFFFFFFFFull;
245 
246 	for (i = 0; i < PMCS_NIQ; i++) {
247 		if (pmcs_dma_setup(pwp, &pwp->iqp_dma_attr,
248 		    &pwp->iqp_acchdls[i],
249 		    &pwp->iqp_handles[i], PMCS_QENTRY_SIZE * pwp->ioq_depth,
250 		    (caddr_t *)&pwp->iqp[i], &pwp->iqaddr[i]) == B_FALSE) {
251 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
252 			    "Failed to setup DMA for iqp[%d]", i);
253 			return (-1);
254 		}
255 		bzero(pwp->iqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
256 	}
257 
258 	for (i = 0; i < PMCS_NOQ; i++) {
259 		if (pmcs_dma_setup(pwp, &pwp->oqp_dma_attr,
260 		    &pwp->oqp_acchdls[i],
261 		    &pwp->oqp_handles[i], PMCS_QENTRY_SIZE * pwp->ioq_depth,
262 		    (caddr_t *)&pwp->oqp[i], &pwp->oqaddr[i]) == B_FALSE) {
263 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
264 			    "Failed to setup DMA for oqp[%d]", i);
265 			return (-1);
266 		}
267 		bzero(pwp->oqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
268 	}
269 
270 	/*
271 	 * Install the IQ and OQ addresses (and null out the rest).
272 	 */
273 	for (i = 0; i < pwp->max_iq; i++) {
274 		pwp->iqpi_offset[i] = pmcs_rd_iqc_tbl(pwp, PMCS_IQPIOFFX(i));
275 		if (i < PMCS_NIQ) {
276 			if (i != PMCS_IQ_OTHER) {
277 				pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i),
278 				    pwp->ioq_depth | (PMCS_QENTRY_SIZE << 16));
279 			} else {
280 				pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i),
281 				    (1 << 30) | pwp->ioq_depth |
282 				    (PMCS_QENTRY_SIZE << 16));
283 			}
284 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i),
285 			    DWORD1(pwp->iqaddr[i]));
286 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i),
287 			    DWORD0(pwp->iqaddr[i]));
288 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i),
289 			    DWORD1(pwp->ciaddr+IQ_OFFSET(i)));
290 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i),
291 			    DWORD0(pwp->ciaddr+IQ_OFFSET(i)));
292 		} else {
293 			pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i), 0);
294 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i), 0);
295 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i), 0);
296 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i), 0);
297 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i), 0);
298 		}
299 	}
300 
301 	for (i = 0; i < pwp->max_oq; i++) {
302 		pwp->oqci_offset[i] = pmcs_rd_oqc_tbl(pwp, PMCS_OQCIOFFX(i));
303 		if (i < PMCS_NOQ) {
304 			pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), pwp->ioq_depth |
305 			    (PMCS_QENTRY_SIZE << 16) | OQIEX);
306 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i),
307 			    DWORD1(pwp->oqaddr[i]));
308 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i),
309 			    DWORD0(pwp->oqaddr[i]));
310 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i),
311 			    DWORD1(pwp->ciaddr+OQ_OFFSET(i)));
312 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i),
313 			    DWORD0(pwp->ciaddr+OQ_OFFSET(i)));
314 			pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i),
315 			    pwp->oqvec[i] << 24);
316 			pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
317 		} else {
318 			pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), 0);
319 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i), 0);
320 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i), 0);
321 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i), 0);
322 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i), 0);
323 			pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i), 0);
324 			pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
325 		}
326 	}
327 
328 	/*
329 	 * Set up logging, if defined.
330 	 */
331 	if (pwp->fwlog) {
332 		uint64_t logdma = pwp->fwaddr;
333 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBAH, DWORD1(logdma));
334 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBAL, DWORD0(logdma));
335 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBS, PMCS_FWLOG_SIZE >> 1);
336 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELSEV, pwp->fwlog);
337 		logdma += (PMCS_FWLOG_SIZE >> 1);
338 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBAH, DWORD1(logdma));
339 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBAL, DWORD0(logdma));
340 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBS, PMCS_FWLOG_SIZE >> 1);
341 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELSEV, pwp->fwlog);
342 	}
343 
344 	/*
345 	 * Interrupt vectors, outbound queues, and odb_auto_clear
346 	 *
347 	 * MSI/MSI-X:
348 	 * If we got 4 interrupt vectors, we'll assign one to each outbound
349 	 * queue as well as the fatal interrupt, and auto clear can be set
350 	 * for each.
351 	 *
352 	 * If we only got 2 vectors, one will be used for I/O completions
353 	 * and the other for the other two vectors.  In this case, auto_
354 	 * clear can only be set for I/Os, which is fine.  The fatal
355 	 * interrupt will be mapped to the PMCS_FATAL_INTERRUPT bit, which
356 	 * is not an interrupt vector.
357 	 *
358 	 * MSI/MSI-X/INT-X:
359 	 * If we only got 1 interrupt vector, auto_clear must be set to 0,
360 	 * and again the fatal interrupt will be mapped to the
361 	 * PMCS_FATAL_INTERRUPT bit (again, not an interrupt vector).
362 	 */
363 
364 	switch (pwp->int_type) {
365 	case PMCS_INT_MSIX:
366 	case PMCS_INT_MSI:
367 		switch (pwp->intr_cnt) {
368 		case 1:
369 			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
370 			    (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
371 			pwp->odb_auto_clear = 0;
372 			break;
373 		case 2:
374 			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
375 			    (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
376 			pwp->odb_auto_clear = (1 << PMCS_FATAL_INTERRUPT) |
377 			    (1 << PMCS_MSIX_IODONE);
378 			break;
379 		case 4:
380 			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
381 			    (PMCS_MSIX_FATAL << PMCS_FERIV_SHIFT));
382 			pwp->odb_auto_clear = (1 << PMCS_MSIX_FATAL) |
383 			    (1 << PMCS_MSIX_GENERAL) | (1 << PMCS_MSIX_IODONE) |
384 			    (1 << PMCS_MSIX_EVENTS);
385 			break;
386 		}
387 		break;
388 
389 	case PMCS_INT_FIXED:
390 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR,
391 		    PMCS_FERRIE | (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
392 		pwp->odb_auto_clear = 0;
393 		break;
394 	}
395 
396 	/*
397 	 * Enable Interrupt Reassertion
398 	 * Default Delay 1000us
399 	 */
400 	ferr = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_FERR);
401 	if ((ferr & PMCS_MPI_IRAE) == 0) {
402 		ferr &= ~(PMCS_MPI_IRAU | PMCS_MPI_IRAD_MASK);
403 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, ferr | PMCS_MPI_IRAE);
404 	}
405 
406 	pmcs_wr_topunit(pwp, PMCS_OBDB_AUTO_CLR, pwp->odb_auto_clear);
407 	pwp->mpi_table_setup = 1;
408 	return (0);
409 }
410 
411 /*
412  * Start the Message Passing protocol with the PMC chip.
413  */
414 int
415 pmcs_start_mpi(pmcs_hw_t *pwp)
416 {
417 	int i;
418 
419 	pmcs_wr_msgunit(pwp, PMCS_MSGU_IBDB, PMCS_MSGU_IBDB_MPIINI);
420 	for (i = 0; i < 1000; i++) {
421 		if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) &
422 		    PMCS_MSGU_IBDB_MPIINI) == 0) {
423 			break;
424 		}
425 		drv_usecwait(1000);
426 	}
427 	if (pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) & PMCS_MSGU_IBDB_MPIINI) {
428 		return (-1);
429 	}
430 	drv_usecwait(500000);
431 
432 	/*
433 	 * Check to make sure we got to INIT state.
434 	 */
435 	if (PMCS_MPI_S(pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE)) !=
436 	    PMCS_MPI_STATE_INIT) {
437 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: MPI launch failed (GST 0x%x "
438 		    "DBCLR 0x%x)", __func__,
439 		    pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE),
440 		    pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB_CLEAR));
441 		return (-1);
442 	}
443 	return (0);
444 }
445 
446 /*
447  * Stop the Message Passing protocol with the PMC chip.
448  */
449 int
450 pmcs_stop_mpi(pmcs_hw_t *pwp)
451 {
452 	int i;
453 
454 	for (i = 0; i < pwp->max_iq; i++) {
455 		pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i), 0);
456 		pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i), 0);
457 		pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i), 0);
458 		pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i), 0);
459 		pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i), 0);
460 	}
461 	for (i = 0; i < pwp->max_oq; i++) {
462 		pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), 0);
463 		pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i), 0);
464 		pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i), 0);
465 		pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i), 0);
466 		pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i), 0);
467 		pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i), 0);
468 		pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
469 	}
470 	pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, 0);
471 	pmcs_wr_msgunit(pwp, PMCS_MSGU_IBDB, PMCS_MSGU_IBDB_MPICTU);
472 	for (i = 0; i < 2000; i++) {
473 		if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) &
474 		    PMCS_MSGU_IBDB_MPICTU) == 0) {
475 			break;
476 		}
477 		drv_usecwait(1000);
478 	}
479 	if (pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) & PMCS_MSGU_IBDB_MPICTU) {
480 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: MPI stop failed", __func__);
481 		return (-1);
482 	}
483 	return (0);
484 }
485 
486 /*
487  * Do a sequence of ECHO messages to test for MPI functionality,
488  * all inbound and outbound queue functionality and interrupts.
489  */
490 int
491 pmcs_echo_test(pmcs_hw_t *pwp)
492 {
493 	echo_test_t fred;
494 	struct pmcwork *pwrk;
495 	uint32_t *msg, count;
496 	int iqe = 0, iqo = 0, result, rval = 0;
497 	int iterations;
498 	hrtime_t echo_start, echo_end, echo_total;
499 
500 	ASSERT(pwp->max_cmd > 0);
501 
502 	/*
503 	 * We want iterations to be max_cmd * 3 to ensure that we run the
504 	 * echo test enough times to iterate through every inbound queue
505 	 * at least twice.
506 	 */
507 	iterations = pwp->max_cmd * 3;
508 
509 	echo_total = 0;
510 	count = 0;
511 
512 	while (count < iterations) {
513 		pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
514 		if (pwrk == NULL) {
515 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
516 			rval = -1;
517 			break;
518 		}
519 
520 		mutex_enter(&pwp->iqp_lock[iqe]);
521 		msg = GET_IQ_ENTRY(pwp, iqe);
522 		if (msg == NULL) {
523 			mutex_exit(&pwp->iqp_lock[iqe]);
524 			pmcs_pwork(pwp, pwrk);
525 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
526 			rval = -1;
527 			break;
528 		}
529 
530 		bzero(msg, PMCS_QENTRY_SIZE);
531 
532 		if (iqe == PMCS_IQ_OTHER) {
533 			/* This is on the high priority queue */
534 			msg[0] = LE_32(PMCS_HIPRI(pwp, iqo, PMCIN_ECHO));
535 		} else {
536 			msg[0] = LE_32(PMCS_IOMB_IN_SAS(iqo, PMCIN_ECHO));
537 		}
538 		msg[1] = LE_32(pwrk->htag);
539 		fred.signature = 0xdeadbeef;
540 		fred.count = count;
541 		fred.ptr = &count;
542 		(void) memcpy(&msg[2], &fred, sizeof (fred));
543 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
544 
545 		INC_IQ_ENTRY(pwp, iqe);
546 
547 		echo_start = gethrtime();
548 		DTRACE_PROBE2(pmcs__echo__test__wait__start,
549 		    hrtime_t, echo_start, uint32_t, pwrk->htag);
550 
551 		if (++iqe == PMCS_NIQ) {
552 			iqe = 0;
553 		}
554 		if (++iqo == PMCS_NOQ) {
555 			iqo = 0;
556 		}
557 
558 		WAIT_FOR(pwrk, 250, result);
559 
560 		echo_end = gethrtime();
561 		DTRACE_PROBE2(pmcs__echo__test__wait__end,
562 		    hrtime_t, echo_end, int, result);
563 
564 		echo_total += (echo_end - echo_start);
565 
566 		pmcs_pwork(pwp, pwrk);
567 		if (result) {
568 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
569 			    "%s: command timed out on echo test #%d",
570 			    __func__, count);
571 			rval = -1;
572 			break;
573 		}
574 	}
575 
576 	/*
577 	 * The intr_threshold is adjusted by PMCS_INTR_THRESHOLD in order to
578 	 * remove the overhead of things like the delay in getting signaled
579 	 * for completion.
580 	 */
581 	if (echo_total != 0) {
582 		pwp->io_intr_coal.intr_latency =
583 		    (echo_total / iterations) / 2;
584 		pwp->io_intr_coal.intr_threshold =
585 		    PMCS_INTR_THRESHOLD(PMCS_QUANTUM_TIME_USECS * 1000 /
586 		    pwp->io_intr_coal.intr_latency);
587 	}
588 
589 	return (rval);
590 }
591 
592 /*
593  * Start the (real) phys
594  */
595 int
596 pmcs_start_phy(pmcs_hw_t *pwp, int phynum, int linkmode, int speed)
597 {
598 	int result;
599 	uint32_t *msg;
600 	struct pmcwork *pwrk;
601 	pmcs_phy_t *pptr;
602 	sas_identify_af_t sap;
603 
604 	mutex_enter(&pwp->lock);
605 	pptr = pwp->root_phys + phynum;
606 	if (pptr == NULL) {
607 		mutex_exit(&pwp->lock);
608 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: cannot find port %d",
609 		    __func__, phynum);
610 		return (0);
611 	}
612 
613 	pmcs_lock_phy(pptr);
614 	mutex_exit(&pwp->lock);
615 
616 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
617 	if (pwrk == NULL) {
618 		pmcs_unlock_phy(pptr);
619 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
620 		return (-1);
621 	}
622 
623 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
624 	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
625 
626 	if (msg == NULL) {
627 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
628 		pmcs_unlock_phy(pptr);
629 		pmcs_pwork(pwp, pwrk);
630 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
631 		return (-1);
632 	}
633 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_PHY_START));
634 	msg[1] = LE_32(pwrk->htag);
635 	msg[2] = LE_32(linkmode | speed | phynum);
636 	bzero(&sap, sizeof (sap));
637 	sap.device_type = SAS_IF_DTYPE_ENDPOINT;
638 	sap.ssp_ini_port = 1;
639 
640 	if (pwp->separate_ports) {
641 		pmcs_wwn2barray(pwp->sas_wwns[phynum], sap.sas_address);
642 	} else {
643 		pmcs_wwn2barray(pwp->sas_wwns[0], sap.sas_address);
644 	}
645 
646 	ASSERT(phynum < SAS2_PHYNUM_MAX);
647 	sap.phy_identifier = phynum & SAS2_PHYNUM_MASK;
648 	(void) memcpy(&msg[3], &sap, sizeof (sas_identify_af_t));
649 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
650 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
651 
652 	pptr->state.prog_min_rate = (lowbit((ulong_t)speed) - 1);
653 	pptr->state.prog_max_rate = (highbit((ulong_t)speed) - 1);
654 	pptr->state.hw_min_rate = PMCS_HW_MIN_LINK_RATE;
655 	pptr->state.hw_max_rate = PMCS_HW_MAX_LINK_RATE;
656 
657 	pmcs_unlock_phy(pptr);
658 	WAIT_FOR(pwrk, 1000, result);
659 	pmcs_pwork(pwp, pwrk);
660 
661 	if (result) {
662 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
663 	} else {
664 		mutex_enter(&pwp->lock);
665 		pwp->phys_started |= (1 << phynum);
666 		mutex_exit(&pwp->lock);
667 	}
668 
669 	return (0);
670 }
671 
672 int
673 pmcs_start_phys(pmcs_hw_t *pwp)
674 {
675 	int i;
676 
677 	for (i = 0; i < pwp->nphy; i++) {
678 		if ((pwp->phyid_block_mask & (1 << i)) == 0) {
679 			if (pmcs_start_phy(pwp, i,
680 			    (pwp->phymode << PHY_MODE_SHIFT),
681 			    pwp->physpeed << PHY_LINK_SHIFT)) {
682 				return (-1);
683 			}
684 			if (pmcs_clear_diag_counters(pwp, i)) {
685 				pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: failed to "
686 				    "reset counters on PHY (%d)", __func__, i);
687 			}
688 		}
689 	}
690 	return (0);
691 }
692 
693 /*
694  * Called with PHY locked
695  */
696 int
697 pmcs_reset_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint8_t type)
698 {
699 	uint32_t *msg;
700 	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
701 	const char *mbar;
702 	uint32_t amt;
703 	uint32_t pdevid;
704 	uint32_t stsoff;
705 	uint32_t status;
706 	int result, level, phynum;
707 	struct pmcwork *pwrk;
708 	uint32_t htag;
709 
710 	ASSERT(mutex_owned(&pptr->phy_lock));
711 
712 	bzero(iomb, PMCS_QENTRY_SIZE);
713 	phynum = pptr->phynum;
714 	level = pptr->level;
715 	if (level > 0) {
716 		pdevid = pptr->parent->device_id;
717 	}
718 
719 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
720 
721 	if (pwrk == NULL) {
722 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
723 		return (ENOMEM);
724 	}
725 
726 	pwrk->arg = iomb;
727 
728 	/*
729 	 * If level > 0, we need to issue an SMP_REQUEST with a PHY_CONTROL
730 	 * function to do either a link reset or hard reset.  If level == 0,
731 	 * then we do a LOCAL_PHY_CONTROL IOMB to do link/hard reset to the
732 	 * root (local) PHY
733 	 */
734 	if (level) {
735 		stsoff = 2;
736 		iomb[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
737 		    PMCIN_SMP_REQUEST));
738 		iomb[1] = LE_32(pwrk->htag);
739 		iomb[2] = LE_32(pdevid);
740 		iomb[3] = LE_32(40 << SMP_REQUEST_LENGTH_SHIFT);
741 		/*
742 		 * Send SMP PHY CONTROL/HARD or LINK RESET
743 		 */
744 		iomb[4] = BE_32(0x40910000);
745 		iomb[5] = 0;
746 
747 		if (type == PMCS_PHYOP_HARD_RESET) {
748 			mbar = "SMP PHY CONTROL/HARD RESET";
749 			iomb[6] = BE_32((phynum << 24) |
750 			    (PMCS_PHYOP_HARD_RESET << 16));
751 		} else {
752 			mbar = "SMP PHY CONTROL/LINK RESET";
753 			iomb[6] = BE_32((phynum << 24) |
754 			    (PMCS_PHYOP_LINK_RESET << 16));
755 		}
756 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
757 		    "%s: sending %s to %s for phy 0x%x",
758 		    __func__, mbar, pptr->parent->path, pptr->phynum);
759 		amt = 7;
760 	} else {
761 		/*
762 		 * Unlike most other Outbound messages, status for
763 		 * a local phy operation is in DWORD 3.
764 		 */
765 		stsoff = 3;
766 		iomb[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
767 		    PMCIN_LOCAL_PHY_CONTROL));
768 		iomb[1] = LE_32(pwrk->htag);
769 		if (type == PMCS_PHYOP_LINK_RESET) {
770 			mbar = "LOCAL PHY LINK RESET";
771 			iomb[2] = LE_32((PMCS_PHYOP_LINK_RESET << 8) | phynum);
772 		} else {
773 			mbar = "LOCAL PHY HARD RESET";
774 			iomb[2] = LE_32((PMCS_PHYOP_HARD_RESET << 8) | phynum);
775 		}
776 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
777 		    "%s: sending %s to %s", __func__, mbar, pptr->path);
778 		amt = 3;
779 	}
780 
781 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
782 	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
783 	if (msg == NULL) {
784 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
785 		pmcs_pwork(pwp, pwrk);
786 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
787 		return (ENOMEM);
788 	}
789 	COPY_MESSAGE(msg, iomb, amt);
790 	htag = pwrk->htag;
791 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
792 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
793 
794 	pmcs_unlock_phy(pptr);
795 	WAIT_FOR(pwrk, 1000, result);
796 	pmcs_pwork(pwp, pwrk);
797 	pmcs_lock_phy(pptr);
798 
799 	if (result) {
800 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
801 
802 		if (pmcs_abort(pwp, pptr, htag, 0, 0)) {
803 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
804 			    "%s: Unable to issue SMP abort for htag 0x%08x",
805 			    __func__, htag);
806 		} else {
807 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
808 			    "%s: Issuing SMP ABORT for htag 0x%08x",
809 			    __func__, htag);
810 		}
811 		return (EIO);
812 	}
813 	status = LE_32(iomb[stsoff]);
814 
815 	if (status != PMCOUT_STATUS_OK) {
816 		char buf[32];
817 		const char *es =  pmcs_status_str(status);
818 		if (es == NULL) {
819 			(void) snprintf(buf, sizeof (buf), "Status 0x%x",
820 			    status);
821 			es = buf;
822 		}
823 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
824 		    "%s: %s action returned %s for %s", __func__, mbar, es,
825 		    pptr->path);
826 		return (EIO);
827 	}
828 
829 	return (0);
830 }
831 
832 /*
833  * Stop the (real) phys.  No PHY or softstate locks are required as this only
834  * happens during detach.
835  */
836 void
837 pmcs_stop_phy(pmcs_hw_t *pwp, int phynum)
838 {
839 	int result;
840 	pmcs_phy_t *pptr;
841 	uint32_t *msg;
842 	struct pmcwork *pwrk;
843 
844 	pptr =  pwp->root_phys + phynum;
845 	if (pptr == NULL) {
846 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
847 		    "%s: unable to find port %d", __func__, phynum);
848 		return;
849 	}
850 
851 	if (pwp->phys_started & (1 << phynum)) {
852 		pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
853 
854 		if (pwrk == NULL) {
855 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
856 			return;
857 		}
858 
859 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
860 		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
861 
862 		if (msg == NULL) {
863 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
864 			pmcs_pwork(pwp, pwrk);
865 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
866 			return;
867 		}
868 
869 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_PHY_STOP));
870 		msg[1] = LE_32(pwrk->htag);
871 		msg[2] = LE_32(phynum);
872 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
873 		/*
874 		 * Make this unconfigured now.
875 		 */
876 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
877 		WAIT_FOR(pwrk, 1000, result);
878 
879 		pmcs_pwork(pwp, pwrk);
880 		if (result) {
881 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
882 		}
883 
884 		pwp->phys_started &= ~(1 << phynum);
885 	}
886 
887 	pptr->configured = 0;
888 }
889 
890 /*
891  * No locks should be required as this is only called during detach
892  */
893 void
894 pmcs_stop_phys(pmcs_hw_t *pwp)
895 {
896 	int i;
897 	for (i = 0; i < pwp->nphy; i++) {
898 		if ((pwp->phyid_block_mask & (1 << i)) == 0) {
899 			pmcs_stop_phy(pwp, i);
900 		}
901 	}
902 }
903 
904 /*
905  * Run SAS_DIAG_EXECUTE with cmd and cmd_desc passed.
906  * 	ERR_CNT_RESET: return status of cmd
907  *	DIAG_REPORT_GET: return value of the counter
908  */
909 int
910 pmcs_sas_diag_execute(pmcs_hw_t *pwp, uint32_t cmd, uint32_t cmd_desc,
911     uint8_t phynum)
912 {
913 	uint32_t htag, *ptr, status, msg[PMCS_MSG_SIZE << 1];
914 	int result;
915 	struct pmcwork *pwrk;
916 
917 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
918 	if (pwrk == NULL) {
919 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
920 		return (DDI_FAILURE);
921 	}
922 	pwrk->arg = msg;
923 	htag = pwrk->htag;
924 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_SAS_DIAG_EXECUTE));
925 	msg[1] = LE_32(htag);
926 	msg[2] = LE_32((cmd << PMCS_DIAG_CMD_SHIFT) |
927 	    (cmd_desc << PMCS_DIAG_CMD_DESC_SHIFT) | phynum);
928 
929 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
930 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
931 	if (ptr == NULL) {
932 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
933 		pmcs_pwork(pwp, pwrk);
934 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
935 		return (DDI_FAILURE);
936 	}
937 	COPY_MESSAGE(ptr, msg, 3);
938 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
939 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
940 
941 	WAIT_FOR(pwrk, 1000, result);
942 
943 	pmcs_pwork(pwp, pwrk);
944 
945 	if (result) {
946 		pmcs_timed_out(pwp, htag, __func__);
947 		return (DDI_FAILURE);
948 	}
949 
950 	status = LE_32(msg[3]);
951 
952 	/* Return for counter reset */
953 	if (cmd == PMCS_ERR_CNT_RESET)
954 		return (status);
955 
956 	/* Return for counter value */
957 	if (status) {
958 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: failed, status (0x%x)",
959 		    __func__, status);
960 		return (DDI_FAILURE);
961 	}
962 	return (LE_32(msg[4]));
963 }
964 
965 /* Get the current value of the counter for desc on phynum and return it. */
966 int
967 pmcs_get_diag_report(pmcs_hw_t *pwp, uint32_t desc, uint8_t phynum)
968 {
969 	return (pmcs_sas_diag_execute(pwp, PMCS_DIAG_REPORT_GET, desc, phynum));
970 }
971 
972 /* Clear all of the counters for phynum. Returns the status of the command. */
973 int
974 pmcs_clear_diag_counters(pmcs_hw_t *pwp, uint8_t phynum)
975 {
976 	uint32_t	cmd = PMCS_ERR_CNT_RESET;
977 	uint32_t	cmd_desc;
978 
979 	cmd_desc = PMCS_INVALID_DWORD_CNT;
980 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
981 		return (DDI_FAILURE);
982 
983 	cmd_desc = PMCS_DISPARITY_ERR_CNT;
984 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
985 		return (DDI_FAILURE);
986 
987 	cmd_desc = PMCS_LOST_DWORD_SYNC_CNT;
988 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
989 		return (DDI_FAILURE);
990 
991 	cmd_desc = PMCS_RESET_FAILED_CNT;
992 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
993 		return (DDI_FAILURE);
994 
995 	return (DDI_SUCCESS);
996 }
997 
998 /*
999  * Get firmware timestamp
1000  */
1001 int
1002 pmcs_get_time_stamp(pmcs_hw_t *pwp, uint64_t *ts)
1003 {
1004 	uint32_t htag, *ptr, msg[PMCS_MSG_SIZE << 1];
1005 	int result;
1006 	struct pmcwork *pwrk;
1007 
1008 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
1009 	if (pwrk == NULL) {
1010 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
1011 		return (-1);
1012 	}
1013 	pwrk->arg = msg;
1014 	htag = pwrk->htag;
1015 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_GET_TIME_STAMP));
1016 	msg[1] = LE_32(pwrk->htag);
1017 
1018 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1019 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1020 	if (ptr == NULL) {
1021 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1022 		pmcs_pwork(pwp, pwrk);
1023 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
1024 		return (-1);
1025 	}
1026 	COPY_MESSAGE(ptr, msg, 2);
1027 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1028 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1029 
1030 	WAIT_FOR(pwrk, 1000, result);
1031 
1032 	pmcs_pwork(pwp, pwrk);
1033 
1034 	if (result) {
1035 		pmcs_timed_out(pwp, htag, __func__);
1036 		return (-1);
1037 	}
1038 	*ts = LE_32(msg[2]) | (((uint64_t)LE_32(msg[3])) << 32);
1039 	return (0);
1040 }
1041 
1042 /*
1043  * Dump all pertinent registers
1044  */
1045 
1046 void
1047 pmcs_register_dump(pmcs_hw_t *pwp)
1048 {
1049 	int i;
1050 	uint32_t val;
1051 
1052 	pmcs_prt(pwp, PMCS_PRT_INFO, "pmcs%d: Register dump start",
1053 	    ddi_get_instance(pwp->dip));
1054 	pmcs_prt(pwp, PMCS_PRT_INFO,
1055 	    "OBDB (intr): 0x%08x (mask): 0x%08x (clear): 0x%08x",
1056 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB),
1057 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB_MASK),
1058 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR));
1059 	pmcs_prt(pwp, PMCS_PRT_INFO, "SCRATCH0: 0x%08x",
1060 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH0));
1061 	pmcs_prt(pwp, PMCS_PRT_INFO, "SCRATCH1: 0x%08x",
1062 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1));
1063 	pmcs_prt(pwp, PMCS_PRT_INFO, "SCRATCH2: 0x%08x",
1064 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2));
1065 	pmcs_prt(pwp, PMCS_PRT_INFO, "SCRATCH3: 0x%08x",
1066 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH3));
1067 	for (i = 0; i < PMCS_NIQ; i++) {
1068 		pmcs_prt(pwp, PMCS_PRT_INFO, "IQ %d: CI %u PI %u",
1069 		    i, pmcs_rd_iqci(pwp, i), pmcs_rd_iqpi(pwp, i));
1070 	}
1071 	for (i = 0; i < PMCS_NOQ; i++) {
1072 		pmcs_prt(pwp, PMCS_PRT_INFO, "OQ %d: CI %u PI %u",
1073 		    i, pmcs_rd_oqci(pwp, i), pmcs_rd_oqpi(pwp, i));
1074 	}
1075 	val = pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE);
1076 	pmcs_prt(pwp, PMCS_PRT_INFO,
1077 	    "GST TABLE BASE: 0x%08x (STATE=0x%x QF=%d GSTLEN=%d HMI_ERR=0x%x)",
1078 	    val, PMCS_MPI_S(val), PMCS_QF(val), PMCS_GSTLEN(val) * 4,
1079 	    PMCS_HMI_ERR(val));
1080 	pmcs_prt(pwp, PMCS_PRT_INFO, "GST TABLE IQFRZ0: 0x%08x",
1081 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IQFRZ0));
1082 	pmcs_prt(pwp, PMCS_PRT_INFO, "GST TABLE IQFRZ1: 0x%08x",
1083 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IQFRZ1));
1084 	pmcs_prt(pwp, PMCS_PRT_INFO, "GST TABLE MSGU TICK: 0x%08x",
1085 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_MSGU_TICK));
1086 	pmcs_prt(pwp, PMCS_PRT_INFO, "GST TABLE IOP TICK: 0x%08x",
1087 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IOP_TICK));
1088 	for (i = 0; i < pwp->nphy; i++) {
1089 		uint32_t rerrf, pinfo, started = 0, link = 0;
1090 		pinfo = pmcs_rd_gst_tbl(pwp, PMCS_GST_PHY_INFO(i));
1091 		if (pinfo & 1) {
1092 			started = 1;
1093 			link = pinfo & 2;
1094 		}
1095 		rerrf = pmcs_rd_gst_tbl(pwp, PMCS_GST_RERR_INFO(i));
1096 		pmcs_prt(pwp, PMCS_PRT_INFO,
1097 		    "GST TABLE PHY%d STARTED=%d LINK=%d RERR=0x%08x",
1098 		    i, started, link, rerrf);
1099 	}
1100 	pmcs_prt(pwp, PMCS_PRT_INFO, "pmcs%d: Register dump end",
1101 	    ddi_get_instance(pwp->dip));
1102 }
1103 
1104 /*
1105  * Handle SATA Abort and other error processing
1106  */
1107 int
1108 pmcs_abort_handler(pmcs_hw_t *pwp)
1109 {
1110 	pmcs_phy_t *pptr, *pnext, *pnext_uplevel[PMCS_MAX_XPND];
1111 	int r, level = 0;
1112 
1113 	pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s", __func__);
1114 
1115 	mutex_enter(&pwp->lock);
1116 	pptr = pwp->root_phys;
1117 	mutex_exit(&pwp->lock);
1118 
1119 	while (pptr) {
1120 		/*
1121 		 * XXX: Need to make sure this doesn't happen
1122 		 * XXX: when non-NCQ commands are running.
1123 		 */
1124 		pmcs_lock_phy(pptr);
1125 		if (pptr->need_rl_ext) {
1126 			ASSERT(pptr->dtype == SATA);
1127 			if (pmcs_acquire_scratch(pwp, B_FALSE)) {
1128 				goto next_phy;
1129 			}
1130 			r = pmcs_sata_abort_ncq(pwp, pptr);
1131 			pmcs_release_scratch(pwp);
1132 			if (r == ENOMEM) {
1133 				goto next_phy;
1134 			}
1135 			if (r) {
1136 				r = pmcs_reset_phy(pwp, pptr,
1137 				    PMCS_PHYOP_LINK_RESET);
1138 				if (r == ENOMEM) {
1139 					goto next_phy;
1140 				}
1141 				/* what if other failures happened? */
1142 				pptr->abort_pending = 1;
1143 				pptr->abort_sent = 0;
1144 			}
1145 		}
1146 		if (pptr->abort_pending == 0 || pptr->abort_sent) {
1147 			goto next_phy;
1148 		}
1149 		pptr->abort_pending = 0;
1150 		if (pmcs_abort(pwp, pptr, pptr->device_id, 1, 1) == ENOMEM) {
1151 			pptr->abort_pending = 1;
1152 			goto next_phy;
1153 		}
1154 		pptr->abort_sent = 1;
1155 
1156 next_phy:
1157 		if (pptr->children) {
1158 			pnext = pptr->children;
1159 			pnext_uplevel[level++] = pptr->sibling;
1160 		} else {
1161 			pnext = pptr->sibling;
1162 			while ((pnext == NULL) && (level > 0)) {
1163 				pnext = pnext_uplevel[--level];
1164 			}
1165 		}
1166 
1167 		pmcs_unlock_phy(pptr);
1168 		pptr = pnext;
1169 	}
1170 
1171 	return (0);
1172 }
1173 
1174 /*
1175  * Register a device (get a device handle for it).
1176  * Called with PHY lock held.
1177  */
1178 int
1179 pmcs_register_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1180 {
1181 	struct pmcwork *pwrk;
1182 	int result = 0;
1183 	uint32_t *msg;
1184 	uint32_t tmp, status;
1185 	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
1186 
1187 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1188 	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1189 
1190 	if (msg == NULL ||
1191 	    (pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr)) == NULL) {
1192 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1193 		result = ENOMEM;
1194 		goto out;
1195 	}
1196 
1197 	pwrk->arg = iomb;
1198 	pwrk->dtype = pptr->dtype;
1199 
1200 	msg[1] = LE_32(pwrk->htag);
1201 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_REGISTER_DEVICE));
1202 	tmp = PMCS_DEVREG_TLR |
1203 	    (pptr->link_rate << PMCS_DEVREG_LINK_RATE_SHIFT);
1204 	if (IS_ROOT_PHY(pptr)) {
1205 		msg[2] = LE_32(pptr->portid |
1206 		    (pptr->phynum << PMCS_PHYID_SHIFT));
1207 	} else {
1208 		msg[2] = LE_32(pptr->portid);
1209 	}
1210 	if (pptr->dtype == SATA) {
1211 		if (IS_ROOT_PHY(pptr)) {
1212 			tmp |= PMCS_DEVREG_TYPE_SATA_DIRECT;
1213 		} else {
1214 			tmp |= PMCS_DEVREG_TYPE_SATA;
1215 		}
1216 	} else {
1217 		tmp |= PMCS_DEVREG_TYPE_SAS;
1218 	}
1219 	msg[3] = LE_32(tmp);
1220 	msg[4] = LE_32(PMCS_DEVREG_IT_NEXUS_TIMEOUT);
1221 	(void) memcpy(&msg[5], pptr->sas_address, 8);
1222 
1223 	CLEAN_MESSAGE(msg, 7);
1224 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1225 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1226 
1227 	pmcs_unlock_phy(pptr);
1228 	WAIT_FOR(pwrk, 250, result);
1229 	pmcs_lock_phy(pptr);
1230 	pmcs_pwork(pwp, pwrk);
1231 
1232 	if (result) {
1233 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
1234 		result = ETIMEDOUT;
1235 		goto out;
1236 	}
1237 	status = LE_32(iomb[2]);
1238 	tmp = LE_32(iomb[3]);
1239 	switch (status) {
1240 	case PMCS_DEVREG_OK:
1241 	case PMCS_DEVREG_DEVICE_ALREADY_REGISTERED:
1242 	case PMCS_DEVREG_PHY_ALREADY_REGISTERED:
1243 		if (pmcs_validate_devid(pwp->root_phys, pptr, tmp) == B_FALSE) {
1244 			result = EEXIST;
1245 			goto out;
1246 		} else if (status != PMCS_DEVREG_OK) {
1247 			if (tmp == 0xffffffff) {	/* F/W bug */
1248 				pmcs_prt(pwp, PMCS_PRT_INFO,
1249 				    "%s: phy %s already has bogus devid 0x%x",
1250 				    __func__, pptr->path, tmp);
1251 				result = EIO;
1252 				goto out;
1253 			} else {
1254 				pmcs_prt(pwp, PMCS_PRT_INFO,
1255 				    "%s: phy %s already has a device id 0x%x",
1256 				    __func__, pptr->path, tmp);
1257 			}
1258 		}
1259 		break;
1260 	default:
1261 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: status 0x%x when trying to "
1262 		    "register device %s", __func__, status, pptr->path);
1263 		result = EIO;
1264 		goto out;
1265 	}
1266 	pptr->device_id = tmp;
1267 	pptr->valid_device_id = 1;
1268 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "Phy %s/" SAS_ADDR_FMT
1269 	    " registered with device_id 0x%x (portid %d)", pptr->path,
1270 	    SAS_ADDR_PRT(pptr->sas_address), tmp, pptr->portid);
1271 out:
1272 	return (result);
1273 }
1274 
1275 /*
1276  * Deregister a device (remove a device handle).
1277  * Called with PHY locked.
1278  */
1279 void
1280 pmcs_deregister_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1281 {
1282 	struct pmcwork *pwrk;
1283 	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
1284 	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
1285 	int result;
1286 
1287 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
1288 	if (pwrk == NULL) {
1289 		return;
1290 	}
1291 
1292 	pwrk->arg = iomb;
1293 	pwrk->dtype = pptr->dtype;
1294 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1295 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1296 	if (ptr == NULL) {
1297 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1298 		pmcs_pwork(pwp, pwrk);
1299 		return;
1300 	}
1301 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
1302 	    PMCIN_DEREGISTER_DEVICE_HANDLE));
1303 	msg[1] = LE_32(pwrk->htag);
1304 	msg[2] = LE_32(pptr->device_id);
1305 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1306 	COPY_MESSAGE(ptr, msg, 3);
1307 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1308 
1309 	pmcs_unlock_phy(pptr);
1310 	WAIT_FOR(pwrk, 250, result);
1311 	pmcs_pwork(pwp, pwrk);
1312 	pmcs_lock_phy(pptr);
1313 
1314 	if (result) {
1315 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
1316 		return;
1317 	}
1318 	status = LE_32(iomb[2]);
1319 	if (status != PMCOUT_STATUS_OK) {
1320 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: status 0x%x when trying to "
1321 		    "deregister device %s", __func__, status, pptr->path);
1322 	} else {
1323 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: device %s deregistered",
1324 		    __func__, pptr->path);
1325 		pptr->valid_device_id = 0;
1326 		pptr->device_id = PMCS_INVALID_DEVICE_ID;
1327 	}
1328 }
1329 
1330 /*
1331  * Deregister all registered devices.
1332  */
1333 void
1334 pmcs_deregister_devices(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
1335 {
1336 	/*
1337 	 * Start at the maximum level and walk back to level 0.  This only
1338 	 * gets done during detach after all threads and timers have been
1339 	 * destroyed, so there's no need to hold the softstate or PHY lock.
1340 	 */
1341 	while (phyp) {
1342 		if (phyp->children) {
1343 			pmcs_deregister_devices(pwp, phyp->children);
1344 		}
1345 		if (phyp->valid_device_id) {
1346 			pmcs_deregister_device(pwp, phyp);
1347 		}
1348 		phyp = phyp->sibling;
1349 	}
1350 }
1351 
1352 /*
1353  * Perform a 'soft' reset on the PMC chip
1354  */
1355 int
1356 pmcs_soft_reset(pmcs_hw_t *pwp, boolean_t no_restart)
1357 {
1358 	uint32_t s2, sfrbits, gsm, rapchk, wapchk, wdpchk, spc, tsmode;
1359 	pmcs_phy_t *pptr;
1360 	char *msg = NULL;
1361 	int i;
1362 
1363 	/*
1364 	 * Disable interrupts
1365 	 */
1366 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, 0xffffffff);
1367 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1368 
1369 	pmcs_prt(pwp, PMCS_PRT_INFO, "%s", __func__);
1370 
1371 	if (pwp->locks_initted) {
1372 		mutex_enter(&pwp->lock);
1373 	}
1374 	pwp->blocked = 1;
1375 
1376 	/*
1377 	 * Step 1
1378 	 */
1379 	s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2);
1380 	if ((s2 & PMCS_MSGU_HOST_SOFT_RESET_READY) == 0) {
1381 		pmcs_wr_gsm_reg(pwp, RB6_ACCESS, RB6_NMI_SIGNATURE);
1382 		pmcs_wr_gsm_reg(pwp, RB6_ACCESS, RB6_NMI_SIGNATURE);
1383 		for (i = 0; i < 100; i++) {
1384 			s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1385 			    PMCS_MSGU_HOST_SOFT_RESET_READY;
1386 			if (s2) {
1387 				break;
1388 			}
1389 			drv_usecwait(10000);
1390 		}
1391 		s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1392 		    PMCS_MSGU_HOST_SOFT_RESET_READY;
1393 		if (s2 == 0) {
1394 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: PMCS_MSGU_HOST_"
1395 			    "SOFT_RESET_READY never came ready", __func__);
1396 			pmcs_register_dump(pwp);
1397 			if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1398 			    PMCS_MSGU_CPU_SOFT_RESET_READY) == 0 ||
1399 			    (pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1400 			    PMCS_MSGU_CPU_SOFT_RESET_READY) == 0) {
1401 				pwp->state = STATE_DEAD;
1402 				pwp->blocked = 0;
1403 				if (pwp->locks_initted) {
1404 					mutex_exit(&pwp->lock);
1405 				}
1406 				return (-1);
1407 			}
1408 		}
1409 	}
1410 
1411 	/*
1412 	 * Step 2
1413 	 */
1414 	pmcs_wr_gsm_reg(pwp, NMI_EN_VPE0_IOP, 0);
1415 	drv_usecwait(10);
1416 	pmcs_wr_gsm_reg(pwp, NMI_EN_VPE0_AAP1, 0);
1417 	drv_usecwait(10);
1418 	pmcs_wr_topunit(pwp, PMCS_EVENT_INT_ENABLE, 0);
1419 	drv_usecwait(10);
1420 	pmcs_wr_topunit(pwp, PMCS_EVENT_INT_STAT,
1421 	    pmcs_rd_topunit(pwp, PMCS_EVENT_INT_STAT));
1422 	drv_usecwait(10);
1423 	pmcs_wr_topunit(pwp, PMCS_ERROR_INT_ENABLE, 0);
1424 	drv_usecwait(10);
1425 	pmcs_wr_topunit(pwp, PMCS_ERROR_INT_STAT,
1426 	    pmcs_rd_topunit(pwp, PMCS_ERROR_INT_STAT));
1427 	drv_usecwait(10);
1428 
1429 	sfrbits = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1430 	    PMCS_MSGU_AAP_SFR_PROGRESS;
1431 	sfrbits ^= PMCS_MSGU_AAP_SFR_PROGRESS;
1432 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "PMCS_MSGU_HOST_SCRATCH0 %08x -> %08x",
1433 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_HOST_SCRATCH0), HST_SFT_RESET_SIG);
1434 	pmcs_wr_msgunit(pwp, PMCS_MSGU_HOST_SCRATCH0, HST_SFT_RESET_SIG);
1435 
1436 	/*
1437 	 * Step 3
1438 	 */
1439 	gsm = pmcs_rd_gsm_reg(pwp, GSM_CFG_AND_RESET);
1440 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "GSM %08x -> %08x", gsm,
1441 	    gsm & ~PMCS_SOFT_RESET_BITS);
1442 	pmcs_wr_gsm_reg(pwp, GSM_CFG_AND_RESET, gsm & ~PMCS_SOFT_RESET_BITS);
1443 
1444 	/*
1445 	 * Step 4
1446 	 */
1447 	rapchk = pmcs_rd_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN);
1448 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "READ_ADR_PARITY_CHK_EN %08x -> %08x",
1449 	    rapchk, 0);
1450 	pmcs_wr_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN, 0);
1451 	wapchk = pmcs_rd_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN);
1452 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "WRITE_ADR_PARITY_CHK_EN %08x -> %08x",
1453 	    wapchk, 0);
1454 	pmcs_wr_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN, 0);
1455 	wdpchk = pmcs_rd_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN);
1456 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "WRITE_DATA_PARITY_CHK_EN %08x -> %08x",
1457 	    wdpchk, 0);
1458 	pmcs_wr_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN, 0);
1459 
1460 	/*
1461 	 * Step 5
1462 	 */
1463 	drv_usecwait(100);
1464 
1465 	/*
1466 	 * Step 5.5 (Temporary workaround for 1.07.xx Beta)
1467 	 */
1468 	tsmode = pmcs_rd_gsm_reg(pwp, PMCS_GPIO_TRISTATE_MODE_ADDR);
1469 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "GPIO TSMODE %08x -> %08x", tsmode,
1470 	    tsmode & ~(PMCS_GPIO_TSMODE_BIT0|PMCS_GPIO_TSMODE_BIT1));
1471 	pmcs_wr_gsm_reg(pwp, PMCS_GPIO_TRISTATE_MODE_ADDR,
1472 	    tsmode & ~(PMCS_GPIO_TSMODE_BIT0|PMCS_GPIO_TSMODE_BIT1));
1473 	drv_usecwait(10);
1474 
1475 	/*
1476 	 * Step 6
1477 	 */
1478 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1479 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "SPC_RESET %08x -> %08x", spc,
1480 	    spc & ~(PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1481 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET,
1482 	    spc & ~(PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1483 	drv_usecwait(10);
1484 
1485 	/*
1486 	 * Step 7
1487 	 */
1488 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1489 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "SPC_RESET %08x -> %08x", spc,
1490 	    spc & ~(BDMA_CORE_RSTB|OSSP_RSTB));
1491 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET, spc & ~(BDMA_CORE_RSTB|OSSP_RSTB));
1492 
1493 	/*
1494 	 * Step 8
1495 	 */
1496 	drv_usecwait(100);
1497 
1498 	/*
1499 	 * Step 9
1500 	 */
1501 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1502 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "SPC_RESET %08x -> %08x", spc,
1503 	    spc | (BDMA_CORE_RSTB|OSSP_RSTB));
1504 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET, spc | (BDMA_CORE_RSTB|OSSP_RSTB));
1505 
1506 	/*
1507 	 * Step 10
1508 	 */
1509 	drv_usecwait(100);
1510 
1511 	/*
1512 	 * Step 11
1513 	 */
1514 	gsm = pmcs_rd_gsm_reg(pwp, GSM_CFG_AND_RESET);
1515 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "GSM %08x -> %08x", gsm,
1516 	    gsm | PMCS_SOFT_RESET_BITS);
1517 	pmcs_wr_gsm_reg(pwp, GSM_CFG_AND_RESET, gsm | PMCS_SOFT_RESET_BITS);
1518 	drv_usecwait(10);
1519 
1520 	/*
1521 	 * Step 12
1522 	 */
1523 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "READ_ADR_PARITY_CHK_EN %08x -> %08x",
1524 	    pmcs_rd_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN), rapchk);
1525 	pmcs_wr_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN, rapchk);
1526 	drv_usecwait(10);
1527 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "WRITE_ADR_PARITY_CHK_EN %08x -> %08x",
1528 	    pmcs_rd_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN), wapchk);
1529 	pmcs_wr_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN, wapchk);
1530 	drv_usecwait(10);
1531 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "WRITE_DATA_PARITY_CHK_EN %08x -> %08x",
1532 	    pmcs_rd_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN), wapchk);
1533 	pmcs_wr_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN, wdpchk);
1534 	drv_usecwait(10);
1535 
1536 	/*
1537 	 * Step 13
1538 	 */
1539 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1540 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "SPC_RESET %08x -> %08x", spc,
1541 	    spc | (PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1542 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET,
1543 	    spc | (PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1544 
1545 	/*
1546 	 * Step 14
1547 	 */
1548 	drv_usecwait(100);
1549 
1550 	/*
1551 	 * Step 15
1552 	 */
1553 	for (spc = 0, i = 0; i < 1000; i++) {
1554 		drv_usecwait(1000);
1555 		spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
1556 		if ((spc & PMCS_MSGU_AAP_SFR_PROGRESS) == sfrbits) {
1557 			break;
1558 		}
1559 	}
1560 
1561 	if ((spc & PMCS_MSGU_AAP_SFR_PROGRESS) != sfrbits) {
1562 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
1563 		    "SFR didn't toggle (sfr 0x%x)", spc);
1564 		pwp->state = STATE_DEAD;
1565 		pwp->blocked = 0;
1566 		if (pwp->locks_initted) {
1567 			mutex_exit(&pwp->lock);
1568 		}
1569 		return (-1);
1570 	}
1571 
1572 	/*
1573 	 * Step 16
1574 	 */
1575 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, 0xffffffff);
1576 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1577 
1578 	/*
1579 	 * Wait for up to 5 seconds for AAP state to come either ready or error.
1580 	 */
1581 	for (i = 0; i < 50; i++) {
1582 		spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1583 		    PMCS_MSGU_AAP_STATE_MASK;
1584 		if (spc == PMCS_MSGU_AAP_STATE_ERROR ||
1585 		    spc == PMCS_MSGU_AAP_STATE_READY) {
1586 			break;
1587 		}
1588 		drv_usecwait(100000);
1589 	}
1590 	spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
1591 	if ((spc & PMCS_MSGU_AAP_STATE_MASK) != PMCS_MSGU_AAP_STATE_READY) {
1592 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
1593 		    "soft reset failed (state 0x%x)", spc);
1594 		pwp->state = STATE_DEAD;
1595 		pwp->blocked = 0;
1596 		if (pwp->locks_initted) {
1597 			mutex_exit(&pwp->lock);
1598 		}
1599 		return (-1);
1600 	}
1601 
1602 
1603 	if (pwp->state == STATE_DEAD || pwp->state == STATE_UNPROBING ||
1604 	    pwp->state == STATE_PROBING || pwp->locks_initted == 0) {
1605 		pwp->blocked = 0;
1606 		if (pwp->locks_initted) {
1607 			mutex_exit(&pwp->lock);
1608 		}
1609 		return (0);
1610 	}
1611 
1612 	/*
1613 	 * Return at this point if we dont need to startup.
1614 	 */
1615 	if (no_restart) {
1616 		return (0);
1617 	}
1618 
1619 	ASSERT(pwp->locks_initted != 0);
1620 
1621 	/*
1622 	 * Clean up various soft state.
1623 	 */
1624 	bzero(pwp->ports, sizeof (pwp->ports));
1625 
1626 	pmcs_free_all_phys(pwp, pwp->root_phys);
1627 
1628 	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
1629 		pmcs_lock_phy(pptr);
1630 		pmcs_clear_phy(pwp, pptr);
1631 		pmcs_unlock_phy(pptr);
1632 	}
1633 
1634 	if (pwp->targets) {
1635 		for (i = 0; i < pwp->max_dev; i++) {
1636 			pmcs_xscsi_t *xp = pwp->targets[i];
1637 
1638 			if (xp == NULL) {
1639 				continue;
1640 			}
1641 			mutex_enter(&xp->statlock);
1642 			if (xp->assigned == 0 && xp->dying == 0) {
1643 				if (xp->new) {
1644 					xp->new = 0;
1645 					xp->ca = 0;
1646 					xp->qdepth = 0;
1647 					xp->phy = NULL;
1648 				}
1649 				mutex_exit(&xp->statlock);
1650 				continue;
1651 			}
1652 			xp->tagmap = 0;
1653 			xp->dying = 1;
1654 			xp->assigned = 0;
1655 			mutex_exit(&xp->statlock);
1656 			SCHEDULE_WORK(pwp, PMCS_WORK_REM_DEVICES);
1657 		}
1658 	}
1659 
1660 	bzero(pwp->shadow_iqpi, sizeof (pwp->shadow_iqpi));
1661 	for (i = 0; i < PMCS_NIQ; i++) {
1662 		if (pwp->iqp[i]) {
1663 			bzero(pwp->iqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
1664 			pmcs_wr_iqpi(pwp, i, 0);
1665 			pmcs_wr_iqci(pwp, i, 0);
1666 		}
1667 	}
1668 	for (i = 0; i < PMCS_NOQ; i++) {
1669 		if (pwp->oqp[i]) {
1670 			bzero(pwp->oqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
1671 			pmcs_wr_oqpi(pwp, i, 0);
1672 			pmcs_wr_oqci(pwp, i, 0);
1673 		}
1674 
1675 	}
1676 	if (pwp->fwlogp) {
1677 		bzero(pwp->fwlogp, PMCS_FWLOG_SIZE);
1678 	}
1679 	STAILQ_INIT(&pwp->wf);
1680 	bzero(pwp->work, sizeof (pmcwork_t) * pwp->max_cmd);
1681 	for (i = 0; i < pwp->max_cmd - 1; i++) {
1682 		pmcwork_t *pwrk = &pwp->work[i];
1683 		STAILQ_INSERT_TAIL(&pwp->wf, pwrk, next);
1684 	}
1685 
1686 	/*
1687 	 * Clear out any leftover commands sitting in the work list
1688 	 */
1689 	for (i = 0; i < pwp->max_cmd; i++) {
1690 		pmcwork_t *pwrk = &pwp->work[i];
1691 		mutex_enter(&pwrk->lock);
1692 		if (pwrk->state == PMCS_WORK_STATE_ONCHIP) {
1693 			switch (PMCS_TAG_TYPE(pwrk->htag)) {
1694 			case PMCS_TAG_TYPE_WAIT:
1695 				mutex_exit(&pwrk->lock);
1696 				break;
1697 			case PMCS_TAG_TYPE_CBACK:
1698 			case PMCS_TAG_TYPE_NONE:
1699 				pmcs_pwork(pwp, pwrk);
1700 				break;
1701 			default:
1702 				break;
1703 			}
1704 		} else if (pwrk->state == PMCS_WORK_STATE_IOCOMPQ) {
1705 			pwrk->dead = 1;
1706 			mutex_exit(&pwrk->lock);
1707 		} else {
1708 			/*
1709 			 * The other states of NIL, READY and INTR
1710 			 * should not be visible outside of a lock being held.
1711 			 */
1712 			pmcs_pwork(pwp, pwrk);
1713 		}
1714 	}
1715 
1716 	/*
1717 	 * Restore Interrupt Mask
1718 	 */
1719 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, pwp->intr_mask);
1720 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1721 
1722 	pwp->blocked = 0;
1723 	pwp->mpi_table_setup = 0;
1724 	mutex_exit(&pwp->lock);
1725 
1726 	/*
1727 	 * Set up MPI again.
1728 	 */
1729 	if (pmcs_setup(pwp)) {
1730 		msg = "unable to setup MPI tables again";
1731 		goto fail_restart;
1732 	}
1733 	pmcs_report_fwversion(pwp);
1734 
1735 	/*
1736 	 * Restart MPI
1737 	 */
1738 	if (pmcs_start_mpi(pwp)) {
1739 		msg = "unable to restart MPI again";
1740 		goto fail_restart;
1741 	}
1742 
1743 	mutex_enter(&pwp->lock);
1744 	pwp->blocked = 0;
1745 	SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES);
1746 	mutex_exit(&pwp->lock);
1747 
1748 	/*
1749 	 * Run any completions
1750 	 */
1751 	PMCS_CQ_RUN(pwp);
1752 
1753 	/*
1754 	 * Delay
1755 	 */
1756 	drv_usecwait(1000000);
1757 	return (0);
1758 
1759 fail_restart:
1760 	mutex_enter(&pwp->lock);
1761 	pwp->state = STATE_DEAD;
1762 	mutex_exit(&pwp->lock);
1763 	pmcs_prt(pwp, PMCS_PRT_ERR, "%s: Failed: %s", __func__, msg);
1764 	return (-1);
1765 }
1766 
1767 /*
1768  * Reset a device or a logical unit.
1769  */
1770 int
1771 pmcs_reset_dev(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint64_t lun)
1772 {
1773 	int rval = 0;
1774 
1775 	if (pptr == NULL) {
1776 		return (ENXIO);
1777 	}
1778 
1779 	pmcs_lock_phy(pptr);
1780 	if (pptr->dtype == SAS) {
1781 		/*
1782 		 * Some devices do not support SAS_I_T_NEXUS_RESET as
1783 		 * it is not a mandatory (in SAM4) task management
1784 		 * function, while LOGIC_UNIT_RESET is mandatory.
1785 		 *
1786 		 * The problem here is that we need to iterate over
1787 		 * all known LUNs to emulate the semantics of
1788 		 * "RESET_TARGET".
1789 		 *
1790 		 * XXX: FIX ME
1791 		 */
1792 		if (lun == (uint64_t)-1) {
1793 			lun = 0;
1794 		}
1795 		rval = pmcs_ssp_tmf(pwp, pptr, SAS_LOGICAL_UNIT_RESET, 0, lun,
1796 		    NULL);
1797 	} else if (pptr->dtype == SATA) {
1798 		if (lun != 0ull) {
1799 			pmcs_unlock_phy(pptr);
1800 			return (EINVAL);
1801 		}
1802 		rval = pmcs_reset_phy(pwp, pptr, PMCS_PHYOP_LINK_RESET);
1803 	} else {
1804 		pmcs_unlock_phy(pptr);
1805 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
1806 		    "%s: cannot reset a SMP device yet (%s)",
1807 		    __func__, pptr->path);
1808 		return (EINVAL);
1809 	}
1810 
1811 	/*
1812 	 * Now harvest any commands killed by this action
1813 	 * by issuing an ABORT for all commands on this device.
1814 	 *
1815 	 * We do this even if the the tmf or reset fails (in case there
1816 	 * are any dead commands around to be harvested *anyway*).
1817 	 * We don't have to await for the abort to complete.
1818 	 */
1819 	if (pmcs_abort(pwp, pptr, 0, 1, 0)) {
1820 		pptr->abort_pending = 1;
1821 		SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
1822 	}
1823 
1824 	pmcs_unlock_phy(pptr);
1825 	return (rval);
1826 }
1827 
1828 /*
1829  * Called with PHY locked.
1830  */
1831 static int
1832 pmcs_get_device_handle(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1833 {
1834 	if (pptr->valid_device_id == 0) {
1835 		int result = pmcs_register_device(pwp, pptr);
1836 
1837 		/*
1838 		 * If we changed while registering, punt
1839 		 */
1840 		if (pptr->changed) {
1841 			RESTART_DISCOVERY(pwp);
1842 			return (-1);
1843 		}
1844 
1845 		/*
1846 		 * If we had a failure to register, check against errors.
1847 		 * An ENOMEM error means we just retry (temp resource shortage).
1848 		 */
1849 		if (result == ENOMEM) {
1850 			PHY_CHANGED(pwp, pptr);
1851 			RESTART_DISCOVERY(pwp);
1852 			return (-1);
1853 		}
1854 
1855 		/*
1856 		 * An ETIMEDOUT error means we retry (if our counter isn't
1857 		 * exhausted)
1858 		 */
1859 		if (result == ETIMEDOUT) {
1860 			if (ddi_get_lbolt() < pptr->config_stop) {
1861 				PHY_CHANGED(pwp, pptr);
1862 				RESTART_DISCOVERY(pwp);
1863 			} else {
1864 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
1865 				    "%s: Retries exhausted for %s, killing",
1866 				    __func__, pptr->path);
1867 				pptr->config_stop = 0;
1868 				pmcs_kill_changed(pwp, pptr, 0);
1869 			}
1870 			return (-1);
1871 		}
1872 		/*
1873 		 * Other errors or no valid device id is fatal, but don't
1874 		 * preclude a future action.
1875 		 */
1876 		if (result || pptr->valid_device_id == 0) {
1877 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s could not "
1878 			    "be registered", __func__,  pptr->path);
1879 			return (-1);
1880 		}
1881 	}
1882 	return (0);
1883 }
1884 
1885 int
1886 pmcs_iport_tgtmap_create(pmcs_iport_t *iport)
1887 {
1888 	ASSERT(iport);
1889 	if (iport == NULL)
1890 		return (B_FALSE);
1891 
1892 	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_MAP, "%s", __func__);
1893 
1894 	/* create target map */
1895 	if (scsi_hba_tgtmap_create(iport->dip, SCSI_TM_FULLSET, tgtmap_usec,
1896 	    2048, NULL, NULL, NULL, &iport->iss_tgtmap) != DDI_SUCCESS) {
1897 		pmcs_prt(iport->pwp, PMCS_PRT_DEBUG,
1898 		    "%s: failed to create tgtmap", __func__);
1899 		return (B_FALSE);
1900 	}
1901 	return (B_TRUE);
1902 }
1903 
1904 int
1905 pmcs_iport_tgtmap_destroy(pmcs_iport_t *iport)
1906 {
1907 	ASSERT(iport && iport->iss_tgtmap);
1908 	if ((iport == NULL) || (iport->iss_tgtmap == NULL))
1909 		return (B_FALSE);
1910 
1911 	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_MAP, "%s", __func__);
1912 
1913 	/* destroy target map */
1914 	scsi_hba_tgtmap_destroy(iport->iss_tgtmap);
1915 	return (B_TRUE);
1916 }
1917 
1918 /*
1919  * Query the phymap and populate the iport handle passed in.
1920  * Called with iport lock held.
1921  */
1922 int
1923 pmcs_iport_configure_phys(pmcs_iport_t *iport)
1924 {
1925 	pmcs_hw_t		*pwp;
1926 	pmcs_phy_t		*pptr;
1927 	sas_phymap_phys_t	*phys;
1928 	int			phynum;
1929 	int			inst;
1930 
1931 	ASSERT(iport);
1932 	ASSERT(mutex_owned(&iport->lock));
1933 	pwp = iport->pwp;
1934 	ASSERT(pwp);
1935 	inst = ddi_get_instance(iport->dip);
1936 
1937 	mutex_enter(&pwp->lock);
1938 	ASSERT(pwp->root_phys != NULL);
1939 
1940 	/*
1941 	 * Query the phymap regarding the phys in this iport and populate
1942 	 * the iport's phys list. Hereafter this list is maintained via
1943 	 * port up and down events in pmcs_intr.c
1944 	 */
1945 	ASSERT(list_is_empty(&iport->phys));
1946 	phys = sas_phymap_ua2phys(pwp->hss_phymap, iport->ua);
1947 	while ((phynum = sas_phymap_phys_next(phys)) != -1) {
1948 		/* Grab the phy pointer from root_phys */
1949 		pptr = pwp->root_phys + phynum;
1950 		ASSERT(pptr);
1951 		pmcs_lock_phy(pptr);
1952 		ASSERT(pptr->phynum == phynum);
1953 
1954 		/*
1955 		 * Set a back pointer in the phy to this iport.
1956 		 */
1957 		pptr->iport = iport;
1958 
1959 		/*
1960 		 * If this phy is the primary, set a pointer to it on our
1961 		 * iport handle, and set our portid from it.
1962 		 */
1963 		if (!pptr->subsidiary) {
1964 			iport->pptr = pptr;
1965 			iport->portid = pptr->portid;
1966 		}
1967 
1968 		/*
1969 		 * Finally, insert the phy into our list
1970 		 */
1971 		pmcs_add_phy_to_iport(iport, pptr);
1972 		pmcs_unlock_phy(pptr);
1973 
1974 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: found phy %d [0x%p] "
1975 		    "on iport%d, refcnt(%d)", __func__, phynum,
1976 		    (void *)pptr, inst, iport->refcnt);
1977 	}
1978 	mutex_exit(&pwp->lock);
1979 	sas_phymap_phys_free(phys);
1980 	RESTART_DISCOVERY(pwp);
1981 	return (DDI_SUCCESS);
1982 }
1983 
1984 /*
1985  * Return the iport that ua is associated with, or NULL.  If an iport is
1986  * returned, it will be held and the caller must release the hold.
1987  */
1988 static pmcs_iport_t *
1989 pmcs_get_iport_by_ua(pmcs_hw_t *pwp, char *ua)
1990 {
1991 	pmcs_iport_t	*iport = NULL;
1992 
1993 	rw_enter(&pwp->iports_lock, RW_READER);
1994 	for (iport = list_head(&pwp->iports);
1995 	    iport != NULL;
1996 	    iport = list_next(&pwp->iports, iport)) {
1997 		mutex_enter(&iport->lock);
1998 		if (strcmp(iport->ua, ua) == 0) {
1999 			mutex_exit(&iport->lock);
2000 			mutex_enter(&iport->refcnt_lock);
2001 			iport->refcnt++;
2002 			mutex_exit(&iport->refcnt_lock);
2003 			break;
2004 		}
2005 		mutex_exit(&iport->lock);
2006 	}
2007 	rw_exit(&pwp->iports_lock);
2008 
2009 	return (iport);
2010 }
2011 
2012 /*
2013  * Return the iport that pptr is associated with, or NULL.
2014  * If an iport is returned, there is a hold that the caller must release.
2015  */
2016 pmcs_iport_t *
2017 pmcs_get_iport_by_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2018 {
2019 	pmcs_iport_t	*iport = NULL;
2020 	char		*ua;
2021 
2022 	ua = sas_phymap_lookup_ua(pwp->hss_phymap, pwp->sas_wwns[0],
2023 	    pmcs_barray2wwn(pptr->sas_address));
2024 	if (ua) {
2025 		iport = pmcs_get_iport_by_ua(pwp, ua);
2026 		if (iport) {
2027 			mutex_enter(&iport->lock);
2028 			iport->ua_state = UA_ACTIVE;
2029 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: "
2030 			    "found iport [0x%p] on ua (%s) for phy [0x%p], "
2031 			    "refcnt (%d)", __func__, (void *)iport, ua,
2032 			    (void *)pptr, iport->refcnt);
2033 			mutex_exit(&iport->lock);
2034 		}
2035 	}
2036 
2037 	return (iport);
2038 }
2039 
2040 void
2041 pmcs_rele_iport(pmcs_iport_t *iport)
2042 {
2043 	/*
2044 	 * Release a refcnt on this iport. If this is the last reference,
2045 	 * signal the potential waiter in pmcs_iport_unattach().
2046 	 */
2047 	ASSERT(iport->refcnt > 0);
2048 	mutex_enter(&iport->refcnt_lock);
2049 	iport->refcnt--;
2050 	mutex_exit(&iport->refcnt_lock);
2051 	if (iport->refcnt == 0) {
2052 		cv_signal(&iport->refcnt_cv);
2053 	}
2054 	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_CONFIG, "%s: iport [0x%p] "
2055 	    "refcnt (%d)", __func__, (void *)iport, iport->refcnt);
2056 }
2057 
2058 void
2059 pmcs_phymap_activate(void *arg, char *ua, void **privp)
2060 {
2061 	_NOTE(ARGUNUSED(privp));
2062 	pmcs_hw_t	*pwp = arg;
2063 	pmcs_iport_t	*iport = NULL;
2064 
2065 	mutex_enter(&pwp->lock);
2066 	if ((pwp->state == STATE_UNPROBING) || (pwp->state == STATE_DEAD)) {
2067 		mutex_exit(&pwp->lock);
2068 		return;
2069 	}
2070 	pwp->phymap_active++;
2071 	mutex_exit(&pwp->lock);
2072 
2073 	if (scsi_hba_iportmap_iport_add(pwp->hss_iportmap, ua, NULL) !=
2074 	    DDI_SUCCESS) {
2075 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, "%s: failed to add "
2076 		    "iport handle on unit address [%s]", __func__, ua);
2077 	} else {
2078 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, "%s: phymap_active count "
2079 		    "(%d), added iport handle on unit address [%s]", __func__,
2080 		    pwp->phymap_active, ua);
2081 	}
2082 
2083 	/* Set the HBA softstate as our private data for this unit address */
2084 	*privp = (void *)pwp;
2085 
2086 	/*
2087 	 * We are waiting on attach for this iport node, unless it is still
2088 	 * attached. This can happen if a consumer has an outstanding open
2089 	 * on our iport node, but the port is down.  If this is the case, we
2090 	 * need to configure our iport here for reuse.
2091 	 */
2092 	iport = pmcs_get_iport_by_ua(pwp, ua);
2093 	if (iport) {
2094 		mutex_enter(&iport->lock);
2095 		if (pmcs_iport_configure_phys(iport) != DDI_SUCCESS) {
2096 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: "
2097 			    "failed to configure phys on iport [0x%p] at "
2098 			    "unit address (%s)", __func__, (void *)iport, ua);
2099 		}
2100 		iport->ua_state = UA_ACTIVE;
2101 		pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS,
2102 		    &iport->nphy);
2103 		mutex_exit(&iport->lock);
2104 		pmcs_rele_iport(iport);
2105 	}
2106 
2107 }
2108 
2109 void
2110 pmcs_phymap_deactivate(void *arg, char *ua, void *privp)
2111 {
2112 	_NOTE(ARGUNUSED(privp));
2113 	pmcs_hw_t	*pwp = arg;
2114 	pmcs_iport_t	*iport;
2115 
2116 	mutex_enter(&pwp->lock);
2117 	pwp->phymap_active--;
2118 	mutex_exit(&pwp->lock);
2119 
2120 	if (scsi_hba_iportmap_iport_remove(pwp->hss_iportmap, ua) !=
2121 	    DDI_SUCCESS) {
2122 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, "%s: failed to remove "
2123 		    "iport handle on unit address [%s]", __func__, ua);
2124 	} else {
2125 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, "%s: phymap_active "
2126 		    "count (%d), removed iport handle on unit address [%s]",
2127 		    __func__, pwp->phymap_active, ua);
2128 	}
2129 
2130 	iport = pmcs_get_iport_by_ua(pwp, ua);
2131 
2132 	if (iport == NULL) {
2133 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: failed lookup of "
2134 		    "iport handle on unit address (%s)", __func__, ua);
2135 		return;
2136 	}
2137 
2138 	mutex_enter(&iport->lock);
2139 	iport->ua_state = UA_INACTIVE;
2140 	iport->portid = PMCS_IPORT_INVALID_PORT_ID;
2141 	pmcs_remove_phy_from_iport(iport, NULL);
2142 	mutex_exit(&iport->lock);
2143 	pmcs_rele_iport(iport);
2144 }
2145 
2146 /*
2147  * Top-level discovery function
2148  */
2149 void
2150 pmcs_discover(pmcs_hw_t *pwp)
2151 {
2152 	pmcs_phy_t		*pptr;
2153 	pmcs_phy_t		*root_phy;
2154 
2155 	DTRACE_PROBE2(pmcs__discover__entry, ulong_t, pwp->work_flags,
2156 	    boolean_t, pwp->config_changed);
2157 
2158 	mutex_enter(&pwp->lock);
2159 
2160 	if (pwp->state != STATE_RUNNING) {
2161 		mutex_exit(&pwp->lock);
2162 		return;
2163 	}
2164 
2165 	/* Ensure we have at least one phymap active */
2166 	if (pwp->phymap_active == 0) {
2167 		mutex_exit(&pwp->lock);
2168 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2169 		    "%s: phymap inactive, exiting", __func__);
2170 		return;
2171 	}
2172 
2173 	mutex_exit(&pwp->lock);
2174 
2175 	/*
2176 	 * If no iports have attached, but we have PHYs that are up, we
2177 	 * are waiting for iport attach to complete.  Restart discovery.
2178 	 */
2179 	rw_enter(&pwp->iports_lock, RW_READER);
2180 	if (!pwp->iports_attached) {
2181 		rw_exit(&pwp->iports_lock);
2182 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2183 		    "%s: no iports attached, retry discovery", __func__);
2184 		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2185 		return;
2186 	}
2187 	rw_exit(&pwp->iports_lock);
2188 
2189 	mutex_enter(&pwp->config_lock);
2190 	if (pwp->configuring) {
2191 		mutex_exit(&pwp->config_lock);
2192 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2193 		    "%s: configuration already in progress", __func__);
2194 		return;
2195 	}
2196 
2197 	if (pmcs_acquire_scratch(pwp, B_FALSE)) {
2198 		mutex_exit(&pwp->config_lock);
2199 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2200 		    "%s: cannot allocate scratch", __func__);
2201 		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2202 		return;
2203 	}
2204 
2205 	pwp->configuring = 1;
2206 	pwp->config_changed = B_FALSE;
2207 	mutex_exit(&pwp->config_lock);
2208 
2209 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "Discovery begin");
2210 
2211 	/*
2212 	 * The order of the following traversals is important.
2213 	 *
2214 	 * The first one checks for changed expanders.
2215 	 *
2216 	 * The second one aborts commands for dead devices and deregisters them.
2217 	 *
2218 	 * The third one clears the contents of dead expanders from the tree
2219 	 *
2220 	 * The fourth one clears now dead devices in expanders that remain.
2221 	 */
2222 
2223 	/*
2224 	 * 1. Check expanders marked changed (but not dead) to see if they still
2225 	 * have the same number of phys and the same SAS address. Mark them,
2226 	 * their subsidiary phys (if wide) and their descendents dead if
2227 	 * anything has changed. Check the devices they contain to see if
2228 	 * *they* have changed. If they've changed from type NOTHING we leave
2229 	 * them marked changed to be configured later (picking up a new SAS
2230 	 * address and link rate if possible). Otherwise, any change in type,
2231 	 * SAS address or removal of target role will cause us to mark them
2232 	 * (and their descendents) as dead (and cause any pending commands
2233 	 * and associated devices to be removed).
2234 	 */
2235 	root_phy = pwp->root_phys;
2236 	if (pmcs_check_expanders(pwp, root_phy) == B_TRUE) {
2237 		goto out;
2238 	}
2239 
2240 	/*
2241 	 * 2. Descend the tree looking for dead devices and kill them
2242 	 * by aborting all active commands and then deregistering them.
2243 	 */
2244 	if (pmcs_kill_devices(pwp, root_phy)) {
2245 		goto out;
2246 	}
2247 
2248 	/*
2249 	 * 3. Check for dead expanders and remove their children from the tree.
2250 	 * By the time we get here, the devices and commands for them have
2251 	 * already been terminated and removed.
2252 	 *
2253 	 * We do this independent of the configuration count changing so we can
2254 	 * free any dead device PHYs that were discovered while checking
2255 	 * expanders. We ignore any subsidiary phys as pmcs_clear_expander
2256 	 * will take care of those.
2257 	 *
2258 	 * NOTE: pmcs_clear_expander requires softstate lock
2259 	 */
2260 	mutex_enter(&pwp->lock);
2261 	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
2262 		/*
2263 		 * Call pmcs_clear_expander for every root PHY.  It will
2264 		 * recurse and determine which (if any) expanders actually
2265 		 * need to be cleared.
2266 		 */
2267 		pmcs_lock_phy(pptr);
2268 		pmcs_clear_expander(pwp, pptr, 0);
2269 		pmcs_unlock_phy(pptr);
2270 	}
2271 	mutex_exit(&pwp->lock);
2272 
2273 	/*
2274 	 * 4. Check for dead devices and nullify them. By the time we get here,
2275 	 * the devices and commands for them have already been terminated
2276 	 * and removed. This is different from step 2 in that this just nulls
2277 	 * phys that are part of expanders that are still here but used to
2278 	 * be something but are no longer something (e.g., after a pulled
2279 	 * disk drive). Note that dead expanders had their contained phys
2280 	 * removed from the tree- here, the expanders themselves are
2281 	 * nullified (unless they were removed by being contained in another
2282 	 * expander phy).
2283 	 */
2284 	pmcs_clear_phys(pwp, root_phy);
2285 
2286 	/*
2287 	 * 5. Now check for and configure new devices.
2288 	 */
2289 	if (pmcs_configure_new_devices(pwp, root_phy)) {
2290 		goto restart;
2291 	}
2292 
2293 out:
2294 	DTRACE_PROBE2(pmcs__discover__exit, ulong_t, pwp->work_flags,
2295 	    boolean_t, pwp->config_changed);
2296 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "Discovery end");
2297 
2298 	mutex_enter(&pwp->config_lock);
2299 
2300 	if (pwp->config_changed == B_FALSE) {
2301 		/*
2302 		 * Observation is stable, report what we currently see to
2303 		 * the tgtmaps for delta processing. Start by setting
2304 		 * BEGIN on all tgtmaps.
2305 		 */
2306 		mutex_exit(&pwp->config_lock);
2307 		if (pmcs_report_observations(pwp) == B_FALSE) {
2308 			goto restart;
2309 		}
2310 		mutex_enter(&pwp->config_lock);
2311 	} else {
2312 		/*
2313 		 * If config_changed is TRUE, we need to reschedule
2314 		 * discovery now.
2315 		 */
2316 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2317 		    "%s: Config has changed, will re-run discovery", __func__);
2318 		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2319 	}
2320 
2321 	pmcs_release_scratch(pwp);
2322 	pwp->configuring = 0;
2323 	mutex_exit(&pwp->config_lock);
2324 
2325 #ifdef DEBUG
2326 	pptr = pmcs_find_phy_needing_work(pwp, pwp->root_phys);
2327 	if (pptr != NULL) {
2328 		if (!WORK_IS_SCHEDULED(pwp, PMCS_WORK_DISCOVER)) {
2329 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
2330 			    "PHY %s dead=%d changed=%d configured=%d "
2331 			    "but no work scheduled", pptr->path, pptr->dead,
2332 			    pptr->changed, pptr->configured);
2333 		}
2334 		pmcs_unlock_phy(pptr);
2335 	}
2336 #endif
2337 
2338 	return;
2339 
2340 restart:
2341 	/* Clean up and restart discovery */
2342 	pmcs_release_scratch(pwp);
2343 	mutex_enter(&pwp->config_lock);
2344 	pwp->configuring = 0;
2345 	RESTART_DISCOVERY_LOCKED(pwp);
2346 	mutex_exit(&pwp->config_lock);
2347 }
2348 
2349 /*
2350  * Return any PHY that needs to have scheduled work done.  The PHY is returned
2351  * locked.
2352  */
2353 static pmcs_phy_t *
2354 pmcs_find_phy_needing_work(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2355 {
2356 	pmcs_phy_t *cphyp, *pnext;
2357 
2358 	while (pptr) {
2359 		pmcs_lock_phy(pptr);
2360 
2361 		if (pptr->changed || (pptr->dead && pptr->valid_device_id)) {
2362 			return (pptr);
2363 		}
2364 
2365 		pnext = pptr->sibling;
2366 
2367 		if (pptr->children) {
2368 			cphyp = pptr->children;
2369 			pmcs_unlock_phy(pptr);
2370 			cphyp = pmcs_find_phy_needing_work(pwp, cphyp);
2371 			if (cphyp) {
2372 				return (cphyp);
2373 			}
2374 		} else {
2375 			pmcs_unlock_phy(pptr);
2376 		}
2377 
2378 		pptr = pnext;
2379 	}
2380 
2381 	return (NULL);
2382 }
2383 
2384 /*
2385  * Report current observations to SCSA.
2386  */
2387 static boolean_t
2388 pmcs_report_observations(pmcs_hw_t *pwp)
2389 {
2390 	pmcs_iport_t		*iport;
2391 	scsi_hba_tgtmap_t	*tgtmap;
2392 	char			*ap;
2393 	pmcs_phy_t		*pptr;
2394 	uint64_t		wwn;
2395 
2396 	/*
2397 	 * Observation is stable, report what we currently see to the tgtmaps
2398 	 * for delta processing. Start by setting BEGIN on all tgtmaps.
2399 	 */
2400 	rw_enter(&pwp->iports_lock, RW_READER);
2401 	for (iport = list_head(&pwp->iports); iport != NULL;
2402 	    iport = list_next(&pwp->iports, iport)) {
2403 		/*
2404 		 * Unless we have at least one phy up, skip this iport.
2405 		 * Note we don't need to lock the iport for report_skip
2406 		 * since it is only used here.  We are doing the skip so that
2407 		 * the phymap and iportmap stabilization times are honored -
2408 		 * giving us the ability to recover port operation within the
2409 		 * stabilization time without unconfiguring targets using the
2410 		 * port.
2411 		 */
2412 		if (!sas_phymap_uahasphys(pwp->hss_phymap, iport->ua)) {
2413 			iport->report_skip = 1;
2414 			continue;		/* skip set_begin */
2415 		}
2416 		iport->report_skip = 0;
2417 
2418 		tgtmap = iport->iss_tgtmap;
2419 		ASSERT(tgtmap);
2420 		if (scsi_hba_tgtmap_set_begin(tgtmap) != DDI_SUCCESS) {
2421 			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2422 			    "%s: cannot set_begin tgtmap ", __func__);
2423 			rw_exit(&pwp->iports_lock);
2424 			return (B_FALSE);
2425 		}
2426 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2427 		    "%s: set begin on tgtmap [0x%p]", __func__,
2428 		    (void *)tgtmap);
2429 	}
2430 	rw_exit(&pwp->iports_lock);
2431 
2432 	/*
2433 	 * Now, cycle through all levels of all phys and report
2434 	 * observations into their respective tgtmaps.
2435 	 */
2436 	pptr = pwp->root_phys;
2437 
2438 	while (pptr) {
2439 		pmcs_lock_phy(pptr);
2440 
2441 		/*
2442 		 * Skip PHYs that have nothing attached or are dead.
2443 		 */
2444 		if ((pptr->dtype == NOTHING) || pptr->dead) {
2445 			pmcs_unlock_phy(pptr);
2446 			pptr = pptr->sibling;
2447 			continue;
2448 		}
2449 
2450 		if (pptr->changed) {
2451 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2452 			    "%s: oops, PHY %s changed; restart discovery",
2453 			    __func__, pptr->path);
2454 			pmcs_unlock_phy(pptr);
2455 			return (B_FALSE);
2456 		}
2457 
2458 		/*
2459 		 * Get the iport for this root PHY, then call the helper
2460 		 * to report observations for this iport's targets
2461 		 */
2462 		iport = pmcs_get_iport_by_phy(pwp, pptr);
2463 		if (iport == NULL) {
2464 			/* No iport for this tgt */
2465 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2466 			    "%s: no iport for this target",
2467 			    __func__);
2468 			pmcs_unlock_phy(pptr);
2469 			pptr = pptr->sibling;
2470 			continue;
2471 		}
2472 
2473 		if (!iport->report_skip) {
2474 			if (pmcs_report_iport_observations(
2475 			    pwp, iport, pptr) == B_FALSE) {
2476 				pmcs_rele_iport(iport);
2477 				pmcs_unlock_phy(pptr);
2478 				return (B_FALSE);
2479 			}
2480 		}
2481 		pmcs_rele_iport(iport);
2482 		pmcs_unlock_phy(pptr);
2483 		pptr = pptr->sibling;
2484 	}
2485 
2486 	/*
2487 	 * The observation is complete, end sets. Note we will skip any
2488 	 * iports that are active, but have no PHYs in them (i.e. awaiting
2489 	 * unconfigure). Set to restart discovery if we find this.
2490 	 */
2491 	rw_enter(&pwp->iports_lock, RW_READER);
2492 	for (iport = list_head(&pwp->iports);
2493 	    iport != NULL;
2494 	    iport = list_next(&pwp->iports, iport)) {
2495 
2496 		if (iport->report_skip)
2497 			continue;		/* skip set_end */
2498 
2499 		tgtmap = iport->iss_tgtmap;
2500 		ASSERT(tgtmap);
2501 		if (scsi_hba_tgtmap_set_end(tgtmap, 0) != DDI_SUCCESS) {
2502 			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2503 			    "%s: cannot set_end tgtmap ", __func__);
2504 			rw_exit(&pwp->iports_lock);
2505 			return (B_FALSE);
2506 		}
2507 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2508 		    "%s: set end on tgtmap [0x%p]", __func__,
2509 		    (void *)tgtmap);
2510 	}
2511 
2512 	/*
2513 	 * Now that discovery is complete, set up the necessary
2514 	 * DDI properties on each iport node.
2515 	 */
2516 	for (iport = list_head(&pwp->iports); iport != NULL;
2517 	    iport = list_next(&pwp->iports, iport)) {
2518 		/* Set up the DDI properties on each phy */
2519 		pmcs_smhba_set_phy_props(iport);
2520 
2521 		/* Set up the 'attached-port' property on the iport */
2522 		ap = kmem_zalloc(PMCS_MAX_UA_SIZE, KM_SLEEP);
2523 		mutex_enter(&iport->lock);
2524 		pptr = iport->pptr;
2525 		mutex_exit(&iport->lock);
2526 		if (pptr == NULL) {
2527 			/*
2528 			 * This iport is down, but has not been
2529 			 * removed from our list (unconfigured).
2530 			 * Set our value to '0'.
2531 			 */
2532 			(void) snprintf(ap, 1, "%s", "0");
2533 		} else {
2534 			/* Otherwise, set it to remote phy's wwn */
2535 			pmcs_lock_phy(pptr);
2536 			wwn = pmcs_barray2wwn(pptr->sas_address);
2537 			(void) scsi_wwn_to_wwnstr(wwn, 1, ap);
2538 			pmcs_unlock_phy(pptr);
2539 		}
2540 		if (ndi_prop_update_string(DDI_DEV_T_NONE, iport->dip,
2541 		    SCSI_ADDR_PROP_ATTACHED_PORT,  ap) != DDI_SUCCESS) {
2542 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Failed to "
2543 			    "set prop ("SCSI_ADDR_PROP_ATTACHED_PORT")",
2544 			    __func__);
2545 		}
2546 		kmem_free(ap, PMCS_MAX_UA_SIZE);
2547 	}
2548 	rw_exit(&pwp->iports_lock);
2549 
2550 	return (B_TRUE);
2551 }
2552 
2553 /*
2554  * Report observations into a particular iport's target map
2555  *
2556  * Called with phyp (and all descendents) locked
2557  */
2558 static boolean_t
2559 pmcs_report_iport_observations(pmcs_hw_t *pwp, pmcs_iport_t *iport,
2560     pmcs_phy_t *phyp)
2561 {
2562 	pmcs_phy_t		*lphyp;
2563 	scsi_hba_tgtmap_t	*tgtmap;
2564 	scsi_tgtmap_tgt_type_t	tgt_type;
2565 	char			*ua;
2566 	uint64_t		wwn;
2567 
2568 	tgtmap = iport->iss_tgtmap;
2569 	ASSERT(tgtmap);
2570 
2571 	lphyp = phyp;
2572 	while (lphyp) {
2573 		switch (lphyp->dtype) {
2574 		default:		/* Skip unknown PHYs. */
2575 			/* for non-root phys, skip to sibling */
2576 			goto next_phy;
2577 
2578 		case SATA:
2579 		case SAS:
2580 			tgt_type = SCSI_TGT_SCSI_DEVICE;
2581 			break;
2582 
2583 		case EXPANDER:
2584 			tgt_type = SCSI_TGT_SMP_DEVICE;
2585 			break;
2586 		}
2587 
2588 		if (lphyp->dead) {
2589 			goto next_phy;
2590 		}
2591 
2592 		wwn = pmcs_barray2wwn(lphyp->sas_address);
2593 		ua = scsi_wwn_to_wwnstr(wwn, 1, NULL);
2594 
2595 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2596 		    "iport_observation: adding %s on tgtmap [0x%p] phy [0x%p]",
2597 		    ua, (void *)tgtmap, (void*)lphyp);
2598 
2599 		if (scsi_hba_tgtmap_set_add(tgtmap, tgt_type, ua, NULL) !=
2600 		    DDI_SUCCESS) {
2601 			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2602 			    "%s: failed to add address %s", __func__, ua);
2603 			scsi_free_wwnstr(ua);
2604 			return (B_FALSE);
2605 		}
2606 		scsi_free_wwnstr(ua);
2607 
2608 		if (lphyp->children) {
2609 			if (pmcs_report_iport_observations(pwp, iport,
2610 			    lphyp->children) == B_FALSE) {
2611 				return (B_FALSE);
2612 			}
2613 		}
2614 
2615 		/* for non-root phys, report siblings too */
2616 next_phy:
2617 		if (IS_ROOT_PHY(lphyp)) {
2618 			lphyp = NULL;
2619 		} else {
2620 			lphyp = lphyp->sibling;
2621 		}
2622 	}
2623 
2624 	return (B_TRUE);
2625 }
2626 
2627 /*
2628  * Check for and configure new devices.
2629  *
2630  * If the changed device is a SATA device, add a SATA device.
2631  *
2632  * If the changed device is a SAS device, add a SAS device.
2633  *
2634  * If the changed device is an EXPANDER device, do a REPORT
2635  * GENERAL SMP command to find out the number of contained phys.
2636  *
2637  * For each number of contained phys, allocate a phy, do a
2638  * DISCOVERY SMP command to find out what kind of device it
2639  * is and add it to the linked list of phys on the *next* level.
2640  *
2641  * NOTE: pptr passed in by the caller will be a root PHY
2642  */
2643 static int
2644 pmcs_configure_new_devices(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2645 {
2646 	int rval = 0;
2647 	pmcs_iport_t *iport;
2648 	pmcs_phy_t *pnext, *orig_pptr = pptr, *root_phy, *pchild;
2649 
2650 	/*
2651 	 * First, walk through each PHY at this level
2652 	 */
2653 	while (pptr) {
2654 		pmcs_lock_phy(pptr);
2655 		pnext = pptr->sibling;
2656 
2657 		/*
2658 		 * Set the new dtype if it has changed
2659 		 */
2660 		if ((pptr->pend_dtype != NEW) &&
2661 		    (pptr->pend_dtype != pptr->dtype)) {
2662 			pptr->dtype = pptr->pend_dtype;
2663 		}
2664 
2665 		if (pptr->changed == 0 || pptr->dead || pptr->configured) {
2666 			goto next_phy;
2667 		}
2668 
2669 		/*
2670 		 * Confirm that this target's iport is configured
2671 		 */
2672 		root_phy = pmcs_get_root_phy(pptr);
2673 		iport = pmcs_get_iport_by_phy(pwp, root_phy);
2674 		if (iport == NULL) {
2675 			/* No iport for this tgt, restart */
2676 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2677 			    "%s: iport not yet configured, "
2678 			    "retry discovery", __func__);
2679 			pnext = NULL;
2680 			rval = -1;
2681 			goto next_phy;
2682 		}
2683 
2684 		switch (pptr->dtype) {
2685 		case NOTHING:
2686 			pptr->changed = 0;
2687 			break;
2688 		case SATA:
2689 		case SAS:
2690 			pptr->iport = iport;
2691 			pmcs_new_tport(pwp, pptr);
2692 			break;
2693 		case EXPANDER:
2694 			pmcs_configure_expander(pwp, pptr, iport);
2695 			break;
2696 		}
2697 		pmcs_rele_iport(iport);
2698 
2699 		mutex_enter(&pwp->config_lock);
2700 		if (pwp->config_changed) {
2701 			mutex_exit(&pwp->config_lock);
2702 			pnext = NULL;
2703 			goto next_phy;
2704 		}
2705 		mutex_exit(&pwp->config_lock);
2706 
2707 next_phy:
2708 		pmcs_unlock_phy(pptr);
2709 		pptr = pnext;
2710 	}
2711 
2712 	if (rval != 0) {
2713 		return (rval);
2714 	}
2715 
2716 	/*
2717 	 * Now walk through each PHY again, recalling ourselves if they
2718 	 * have children
2719 	 */
2720 	pptr = orig_pptr;
2721 	while (pptr) {
2722 		pmcs_lock_phy(pptr);
2723 		pnext = pptr->sibling;
2724 		pchild = pptr->children;
2725 		pmcs_unlock_phy(pptr);
2726 
2727 		if (pchild) {
2728 			rval = pmcs_configure_new_devices(pwp, pchild);
2729 			if (rval != 0) {
2730 				break;
2731 			}
2732 		}
2733 
2734 		pptr = pnext;
2735 	}
2736 
2737 	return (rval);
2738 }
2739 
2740 /*
2741  * Set all phys and descendent phys as changed if changed == B_TRUE, otherwise
2742  * mark them all as not changed.
2743  *
2744  * Called with parent PHY locked.
2745  */
2746 void
2747 pmcs_set_changed(pmcs_hw_t *pwp, pmcs_phy_t *parent, boolean_t changed,
2748     int level)
2749 {
2750 	pmcs_phy_t *pptr;
2751 
2752 	if (level == 0) {
2753 		if (changed) {
2754 			PHY_CHANGED(pwp, parent);
2755 		} else {
2756 			parent->changed = 0;
2757 		}
2758 		if (parent->dtype == EXPANDER && parent->level) {
2759 			parent->width = 1;
2760 		}
2761 		if (parent->children) {
2762 			pmcs_set_changed(pwp, parent->children, changed,
2763 			    level + 1);
2764 		}
2765 	} else {
2766 		pptr = parent;
2767 		while (pptr) {
2768 			if (changed) {
2769 				PHY_CHANGED(pwp, pptr);
2770 			} else {
2771 				pptr->changed = 0;
2772 			}
2773 			if (pptr->dtype == EXPANDER && pptr->level) {
2774 				pptr->width = 1;
2775 			}
2776 			if (pptr->children) {
2777 				pmcs_set_changed(pwp, pptr->children, changed,
2778 				    level + 1);
2779 			}
2780 			pptr = pptr->sibling;
2781 		}
2782 	}
2783 }
2784 
2785 /*
2786  * Take the passed phy mark it and its descendants as dead.
2787  * Fire up reconfiguration to abort commands and bury it.
2788  *
2789  * Called with the parent PHY locked.
2790  */
2791 void
2792 pmcs_kill_changed(pmcs_hw_t *pwp, pmcs_phy_t *parent, int level)
2793 {
2794 	pmcs_phy_t *pptr = parent;
2795 
2796 	while (pptr) {
2797 		pptr->link_rate = 0;
2798 		pptr->abort_sent = 0;
2799 		pptr->abort_pending = 1;
2800 		SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
2801 		pptr->need_rl_ext = 0;
2802 
2803 		if (pptr->dead == 0) {
2804 			PHY_CHANGED(pwp, pptr);
2805 			RESTART_DISCOVERY(pwp);
2806 		}
2807 
2808 		pptr->dead = 1;
2809 
2810 		if (pptr->children) {
2811 			pmcs_kill_changed(pwp, pptr->children, level + 1);
2812 		}
2813 
2814 		/*
2815 		 * Only kill siblings at level > 0
2816 		 */
2817 		if (level == 0) {
2818 			return;
2819 		}
2820 
2821 		pptr = pptr->sibling;
2822 	}
2823 }
2824 
2825 /*
2826  * Go through every PHY and clear any that are dead (unless they're expanders)
2827  */
2828 static void
2829 pmcs_clear_phys(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2830 {
2831 	pmcs_phy_t *pnext, *phyp;
2832 
2833 	phyp = pptr;
2834 	while (phyp) {
2835 		if (IS_ROOT_PHY(phyp)) {
2836 			pmcs_lock_phy(phyp);
2837 		}
2838 
2839 		if ((phyp->dtype != EXPANDER) && phyp->dead) {
2840 			pmcs_clear_phy(pwp, phyp);
2841 		}
2842 
2843 		if (phyp->children) {
2844 			pmcs_clear_phys(pwp, phyp->children);
2845 		}
2846 
2847 		pnext = phyp->sibling;
2848 
2849 		if (IS_ROOT_PHY(phyp)) {
2850 			pmcs_unlock_phy(phyp);
2851 		}
2852 
2853 		phyp = pnext;
2854 	}
2855 }
2856 
2857 /*
2858  * Clear volatile parts of a phy.  Called with PHY locked.
2859  */
2860 void
2861 pmcs_clear_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2862 {
2863 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s", __func__, pptr->path);
2864 	ASSERT(mutex_owned(&pptr->phy_lock));
2865 	/* keep sibling */
2866 	/* keep children */
2867 	/* keep parent */
2868 	pptr->device_id = PMCS_INVALID_DEVICE_ID;
2869 	/* keep hw_event_ack */
2870 	pptr->ncphy = 0;
2871 	/* keep phynum */
2872 	pptr->width = 0;
2873 	pptr->ds_recovery_retries = 0;
2874 	/* keep dtype */
2875 	pptr->config_stop = 0;
2876 	pptr->spinup_hold = 0;
2877 	pptr->atdt = 0;
2878 	/* keep portid */
2879 	pptr->link_rate = 0;
2880 	pptr->valid_device_id = 0;
2881 	pptr->abort_sent = 0;
2882 	pptr->abort_pending = 0;
2883 	pptr->need_rl_ext = 0;
2884 	pptr->subsidiary = 0;
2885 	pptr->configured = 0;
2886 	/* Only mark dead if it's not a root PHY and its dtype isn't NOTHING */
2887 	/* XXX: What about directly attached disks? */
2888 	if (!IS_ROOT_PHY(pptr) && (pptr->dtype != NOTHING))
2889 		pptr->dead = 1;
2890 	pptr->changed = 0;
2891 	/* keep SAS address */
2892 	/* keep path */
2893 	/* keep ref_count */
2894 	/* Don't clear iport on root PHYs - they are handled in pmcs_intr.c */
2895 	if (!IS_ROOT_PHY(pptr)) {
2896 		pptr->iport = NULL;
2897 	}
2898 }
2899 
2900 /*
2901  * Allocate softstate for this target if there isn't already one.  If there
2902  * is, just redo our internal configuration.  If it is actually "new", we'll
2903  * soon get a tran_tgt_init for it.
2904  *
2905  * Called with PHY locked.
2906  */
2907 static void
2908 pmcs_new_tport(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2909 {
2910 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: phy 0x%p @ %s", __func__,
2911 	    (void *)pptr, pptr->path);
2912 
2913 	if (pmcs_configure_phy(pwp, pptr) == B_FALSE) {
2914 		/*
2915 		 * If the config failed, mark the PHY as changed.
2916 		 */
2917 		PHY_CHANGED(pwp, pptr);
2918 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2919 		    "%s: pmcs_configure_phy failed for phy 0x%p", __func__,
2920 		    (void *)pptr);
2921 		return;
2922 	}
2923 
2924 	/* Mark PHY as no longer changed */
2925 	pptr->changed = 0;
2926 
2927 	/*
2928 	 * If the PHY has no target pointer, see if there's a dead PHY that
2929 	 * matches.
2930 	 */
2931 	if (pptr->target == NULL) {
2932 		pmcs_reap_dead_phy(pptr);
2933 	}
2934 
2935 	/*
2936 	 * Only assign the device if there is a target for this PHY with a
2937 	 * matching SAS address.  If an iport is disconnected from one piece
2938 	 * of storage and connected to another within the iport stabilization
2939 	 * time, we can get the PHY/target mismatch situation.
2940 	 *
2941 	 * Otherwise, it'll get done in tran_tgt_init.
2942 	 */
2943 	if (pptr->target) {
2944 		mutex_enter(&pptr->target->statlock);
2945 		if (pmcs_phy_target_match(pptr) == B_FALSE) {
2946 			mutex_exit(&pptr->target->statlock);
2947 			if (!IS_ROOT_PHY(pptr)) {
2948 				pmcs_dec_phy_ref_count(pptr);
2949 			}
2950 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
2951 			    "%s: Not assigning existing tgt %p for PHY %p "
2952 			    "(WWN mismatch)", __func__, (void *)pptr->target,
2953 			    (void *)pptr);
2954 			pptr->target = NULL;
2955 			return;
2956 		}
2957 
2958 		if (!pmcs_assign_device(pwp, pptr->target)) {
2959 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2960 			    "%s: pmcs_assign_device failed for target 0x%p",
2961 			    __func__, (void *)pptr->target);
2962 		}
2963 		mutex_exit(&pptr->target->statlock);
2964 	}
2965 }
2966 
2967 /*
2968  * Called with PHY lock held.
2969  */
2970 static boolean_t
2971 pmcs_configure_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2972 {
2973 	char *dtype;
2974 
2975 	ASSERT(mutex_owned(&pptr->phy_lock));
2976 
2977 	/*
2978 	 * Mark this device as no longer changed.
2979 	 */
2980 	pptr->changed = 0;
2981 
2982 	/*
2983 	 * If we don't have a device handle, get one.
2984 	 */
2985 	if (pmcs_get_device_handle(pwp, pptr)) {
2986 		return (B_FALSE);
2987 	}
2988 
2989 	pptr->configured = 1;
2990 
2991 	switch (pptr->dtype) {
2992 	case SAS:
2993 		dtype = "SAS";
2994 		break;
2995 	case SATA:
2996 		dtype = "SATA";
2997 		break;
2998 	case EXPANDER:
2999 		dtype = "SMP";
3000 		break;
3001 	default:
3002 		dtype = "???";
3003 	}
3004 
3005 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "config_dev: %s dev %s "
3006 	    SAS_ADDR_FMT " dev id 0x%x lr 0x%x", dtype, pptr->path,
3007 	    SAS_ADDR_PRT(pptr->sas_address), pptr->device_id, pptr->link_rate);
3008 
3009 	return (B_TRUE);
3010 }
3011 
3012 /*
3013  * Called with PHY locked
3014  */
3015 static void
3016 pmcs_configure_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr, pmcs_iport_t *iport)
3017 {
3018 	pmcs_phy_t *ctmp, *clist = NULL, *cnext;
3019 	int result, i, nphy = 0;
3020 	boolean_t root_phy = B_FALSE;
3021 
3022 	ASSERT(iport);
3023 
3024 	/*
3025 	 * Step 1- clear our "changed" bit. If we need to retry/restart due
3026 	 * to resource shortages, we'll set it again. While we're doing
3027 	 * configuration, other events may set it again as well.  If the PHY
3028 	 * is a root PHY and is currently marked as having changed, reset the
3029 	 * config_stop timer as well.
3030 	 */
3031 	if (IS_ROOT_PHY(pptr) && pptr->changed) {
3032 		pptr->config_stop = ddi_get_lbolt() +
3033 		    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3034 	}
3035 	pptr->changed = 0;
3036 
3037 	/*
3038 	 * Step 2- make sure we don't overflow
3039 	 */
3040 	if (pptr->level == PMCS_MAX_XPND-1) {
3041 		pmcs_prt(pwp, PMCS_PRT_WARN,
3042 		    "%s: SAS expansion tree too deep", __func__);
3043 		return;
3044 	}
3045 
3046 	/*
3047 	 * Step 3- Check if this expander is part of a wide phy that has
3048 	 * already been configured.
3049 	 *
3050 	 * This is known by checking this level for another EXPANDER device
3051 	 * with the same SAS address and isn't already marked as a subsidiary
3052 	 * phy and a parent whose SAS address is the same as our SAS address
3053 	 * (if there are parents).
3054 	 */
3055 	if (!IS_ROOT_PHY(pptr)) {
3056 		/*
3057 		 * No need to lock the parent here because we're in discovery
3058 		 * and the only time a PHY's children pointer can change is
3059 		 * in discovery; either in pmcs_clear_expander (which has
3060 		 * already been called) or here, down below.  Plus, trying to
3061 		 * grab the parent's lock here can cause deadlock.
3062 		 */
3063 		ctmp = pptr->parent->children;
3064 	} else {
3065 		ctmp = pwp->root_phys;
3066 		root_phy = B_TRUE;
3067 	}
3068 
3069 	while (ctmp) {
3070 		/*
3071 		 * If we've checked all PHYs up to pptr, we stop. Otherwise,
3072 		 * we'll be checking for a primary PHY with a higher PHY
3073 		 * number than pptr, which will never happen.  The primary
3074 		 * PHY on non-root expanders will ALWAYS be the lowest
3075 		 * numbered PHY.
3076 		 */
3077 		if (ctmp == pptr) {
3078 			break;
3079 		}
3080 
3081 		/*
3082 		 * If pptr and ctmp are root PHYs, just grab the mutex on
3083 		 * ctmp.  No need to lock the entire tree.  If they are not
3084 		 * root PHYs, there is no need to lock since a non-root PHY's
3085 		 * SAS address and other characteristics can only change in
3086 		 * discovery anyway.
3087 		 */
3088 		if (root_phy) {
3089 			mutex_enter(&ctmp->phy_lock);
3090 		}
3091 
3092 		if (ctmp->dtype == EXPANDER && ctmp->width &&
3093 		    memcmp(ctmp->sas_address, pptr->sas_address, 8) == 0) {
3094 			int widephy = 0;
3095 			/*
3096 			 * If these phys are not root PHYs, compare their SAS
3097 			 * addresses too.
3098 			 */
3099 			if (!root_phy) {
3100 				if (memcmp(ctmp->parent->sas_address,
3101 				    pptr->parent->sas_address, 8) == 0) {
3102 					widephy = 1;
3103 				}
3104 			} else {
3105 				widephy = 1;
3106 			}
3107 			if (widephy) {
3108 				ctmp->width++;
3109 				pptr->subsidiary = 1;
3110 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: PHY "
3111 				    "%s part of wide PHY %s (now %d wide)",
3112 				    __func__, pptr->path, ctmp->path,
3113 				    ctmp->width);
3114 				if (root_phy) {
3115 					mutex_exit(&ctmp->phy_lock);
3116 				}
3117 				return;
3118 			}
3119 		}
3120 
3121 		cnext = ctmp->sibling;
3122 		if (root_phy) {
3123 			mutex_exit(&ctmp->phy_lock);
3124 		}
3125 		ctmp = cnext;
3126 	}
3127 
3128 	/*
3129 	 * Step 4- If we don't have a device handle, get one.  Since this
3130 	 * is the primary PHY, make sure subsidiary is cleared.
3131 	 */
3132 	pptr->subsidiary = 0;
3133 	if (pmcs_get_device_handle(pwp, pptr)) {
3134 		goto out;
3135 	}
3136 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "Config expander %s "
3137 	    SAS_ADDR_FMT " dev id 0x%x lr 0x%x", pptr->path,
3138 	    SAS_ADDR_PRT(pptr->sas_address), pptr->device_id, pptr->link_rate);
3139 
3140 	/*
3141 	 * Step 5- figure out how many phys are in this expander.
3142 	 */
3143 	nphy = pmcs_expander_get_nphy(pwp, pptr);
3144 	if (nphy <= 0) {
3145 		if (nphy == 0 && ddi_get_lbolt() < pptr->config_stop) {
3146 			PHY_CHANGED(pwp, pptr);
3147 			RESTART_DISCOVERY(pwp);
3148 		} else {
3149 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3150 			    "%s: Retries exhausted for %s, killing", __func__,
3151 			    pptr->path);
3152 			pptr->config_stop = 0;
3153 			pmcs_kill_changed(pwp, pptr, 0);
3154 		}
3155 		goto out;
3156 	}
3157 
3158 	/*
3159 	 * Step 6- Allocate a list of phys for this expander and figure out
3160 	 * what each one is.
3161 	 */
3162 	for (i = 0; i < nphy; i++) {
3163 		ctmp = kmem_cache_alloc(pwp->phy_cache, KM_SLEEP);
3164 		bzero(ctmp, sizeof (pmcs_phy_t));
3165 		ctmp->device_id = PMCS_INVALID_DEVICE_ID;
3166 		ctmp->sibling = clist;
3167 		ctmp->pend_dtype = NEW;	/* Init pending dtype */
3168 		ctmp->config_stop = ddi_get_lbolt() +
3169 		    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3170 		clist = ctmp;
3171 	}
3172 
3173 	mutex_enter(&pwp->config_lock);
3174 	if (pwp->config_changed) {
3175 		RESTART_DISCOVERY_LOCKED(pwp);
3176 		mutex_exit(&pwp->config_lock);
3177 		/*
3178 		 * Clean up the newly allocated PHYs and return
3179 		 */
3180 		while (clist) {
3181 			ctmp = clist->sibling;
3182 			kmem_cache_free(pwp->phy_cache, clist);
3183 			clist = ctmp;
3184 		}
3185 		return;
3186 	}
3187 	mutex_exit(&pwp->config_lock);
3188 
3189 	/*
3190 	 * Step 7- Now fill in the rest of the static portions of the phy.
3191 	 */
3192 	for (i = 0, ctmp = clist; ctmp; ctmp = ctmp->sibling, i++) {
3193 		ctmp->parent = pptr;
3194 		ctmp->pwp = pwp;
3195 		ctmp->level = pptr->level+1;
3196 		ctmp->portid = pptr->portid;
3197 		if (ctmp->tolerates_sas2) {
3198 			ASSERT(i < SAS2_PHYNUM_MAX);
3199 			ctmp->phynum = i & SAS2_PHYNUM_MASK;
3200 		} else {
3201 			ASSERT(i < SAS_PHYNUM_MAX);
3202 			ctmp->phynum = i & SAS_PHYNUM_MASK;
3203 		}
3204 		pmcs_phy_name(pwp, ctmp, ctmp->path, sizeof (ctmp->path));
3205 		pmcs_lock_phy(ctmp);
3206 	}
3207 
3208 	/*
3209 	 * Step 8- Discover things about each phy in the expander.
3210 	 */
3211 	for (i = 0, ctmp = clist; ctmp; ctmp = ctmp->sibling, i++) {
3212 		result = pmcs_expander_content_discover(pwp, pptr, ctmp);
3213 		if (result <= 0) {
3214 			if (ddi_get_lbolt() < pptr->config_stop) {
3215 				PHY_CHANGED(pwp, pptr);
3216 				RESTART_DISCOVERY(pwp);
3217 			} else {
3218 				pptr->config_stop = 0;
3219 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3220 				    "%s: Retries exhausted for %s, killing",
3221 				    __func__, pptr->path);
3222 				pmcs_kill_changed(pwp, pptr, 0);
3223 			}
3224 			goto out;
3225 		}
3226 
3227 		/* Set pend_dtype to dtype for 1st time initialization */
3228 		ctmp->pend_dtype = ctmp->dtype;
3229 	}
3230 
3231 	/*
3232 	 * Step 9- Install the new list on the next level. There should be
3233 	 * no children pointer on this PHY.  If there is, we'd need to know
3234 	 * how it happened (The expander suddenly got more PHYs?).
3235 	 */
3236 	ASSERT(pptr->children == NULL);
3237 	if (pptr->children != NULL) {
3238 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Already child PHYs attached "
3239 		    " to PHY %s: This should never happen", __func__,
3240 		    pptr->path);
3241 		goto out;
3242 	} else {
3243 		pptr->children = clist;
3244 	}
3245 
3246 	clist = NULL;
3247 	pptr->ncphy = nphy;
3248 	pptr->configured = 1;
3249 
3250 	/*
3251 	 * We only set width if we're greater than level 0.
3252 	 */
3253 	if (pptr->level) {
3254 		pptr->width = 1;
3255 	}
3256 
3257 	/*
3258 	 * Now tell the rest of the world about us, as an SMP node.
3259 	 */
3260 	pptr->iport = iport;
3261 	pmcs_new_tport(pwp, pptr);
3262 
3263 out:
3264 	while (clist) {
3265 		ctmp = clist->sibling;
3266 		pmcs_unlock_phy(clist);
3267 		kmem_cache_free(pwp->phy_cache, clist);
3268 		clist = ctmp;
3269 	}
3270 }
3271 
3272 /*
3273  * 2. Check expanders marked changed (but not dead) to see if they still have
3274  * the same number of phys and the same SAS address. Mark them, their subsidiary
3275  * phys (if wide) and their descendents dead if anything has changed. Check the
3276  * the devices they contain to see if *they* have changed. If they've changed
3277  * from type NOTHING we leave them marked changed to be configured later
3278  * (picking up a new SAS address and link rate if possible). Otherwise, any
3279  * change in type, SAS address or removal of target role will cause us to
3280  * mark them (and their descendents) as dead and cause any pending commands
3281  * and associated devices to be removed.
3282  *
3283  * Called with PHY (pptr) locked.
3284  */
3285 
3286 static void
3287 pmcs_check_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3288 {
3289 	int nphy, result;
3290 	pmcs_phy_t *ctmp, *local, *local_list = NULL, *local_tail = NULL;
3291 	boolean_t kill_changed, changed;
3292 
3293 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3294 	    "%s: check %s", __func__, pptr->path);
3295 
3296 	/*
3297 	 * Step 1: Mark phy as not changed. We will mark it changed if we need
3298 	 * to retry.
3299 	 */
3300 	pptr->changed = 0;
3301 
3302 	/*
3303 	 * Reset the config_stop time. Although we're not actually configuring
3304 	 * anything here, we do want some indication of when to give up trying
3305 	 * if we can't communicate with the expander.
3306 	 */
3307 	pptr->config_stop = ddi_get_lbolt() +
3308 	    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3309 
3310 	/*
3311 	 * Step 2: Figure out how many phys are in this expander. If
3312 	 * pmcs_expander_get_nphy returns 0 we ran out of resources,
3313 	 * so reschedule and try later. If it returns another error,
3314 	 * just return.
3315 	 */
3316 	nphy = pmcs_expander_get_nphy(pwp, pptr);
3317 	if (nphy <= 0) {
3318 		if ((nphy == 0) && (ddi_get_lbolt() < pptr->config_stop)) {
3319 			PHY_CHANGED(pwp, pptr);
3320 			RESTART_DISCOVERY(pwp);
3321 		} else {
3322 			pptr->config_stop = 0;
3323 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3324 			    "%s: Retries exhausted for %s, killing", __func__,
3325 			    pptr->path);
3326 			pmcs_kill_changed(pwp, pptr, 0);
3327 		}
3328 		return;
3329 	}
3330 
3331 	/*
3332 	 * Step 3: If the number of phys don't agree, kill the old sub-tree.
3333 	 */
3334 	if (nphy != pptr->ncphy) {
3335 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3336 		    "%s: number of contained phys for %s changed from %d to %d",
3337 		    __func__, pptr->path, pptr->ncphy, nphy);
3338 		/*
3339 		 * Force a rescan of this expander after dead contents
3340 		 * are cleared and removed.
3341 		 */
3342 		pmcs_kill_changed(pwp, pptr, 0);
3343 		return;
3344 	}
3345 
3346 	/*
3347 	 * Step 4: if we're at the bottom of the stack, we're done
3348 	 * (we can't have any levels below us)
3349 	 */
3350 	if (pptr->level == PMCS_MAX_XPND-1) {
3351 		return;
3352 	}
3353 
3354 	/*
3355 	 * Step 5: Discover things about each phy in this expander.  We do
3356 	 * this by walking the current list of contained phys and doing a
3357 	 * content discovery for it to a local phy.
3358 	 */
3359 	ctmp = pptr->children;
3360 	ASSERT(ctmp);
3361 	if (ctmp == NULL) {
3362 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3363 		    "%s: No children attached to expander @ %s?", __func__,
3364 		    pptr->path);
3365 		return;
3366 	}
3367 
3368 	while (ctmp) {
3369 		/*
3370 		 * Allocate a local PHY to contain the proposed new contents
3371 		 * and link it to the rest of the local PHYs so that they
3372 		 * can all be freed later.
3373 		 */
3374 		local = pmcs_clone_phy(ctmp);
3375 
3376 		if (local_list == NULL) {
3377 			local_list = local;
3378 			local_tail = local;
3379 		} else {
3380 			local_tail->sibling = local;
3381 			local_tail = local;
3382 		}
3383 
3384 		/*
3385 		 * Need to lock the local PHY since pmcs_expander_content_
3386 		 * discovery may call pmcs_clear_phy on it, which expects
3387 		 * the PHY to be locked.
3388 		 */
3389 		pmcs_lock_phy(local);
3390 		result = pmcs_expander_content_discover(pwp, pptr, local);
3391 		pmcs_unlock_phy(local);
3392 		if (result <= 0) {
3393 			if (ddi_get_lbolt() < pptr->config_stop) {
3394 				PHY_CHANGED(pwp, pptr);
3395 				RESTART_DISCOVERY(pwp);
3396 			} else {
3397 				pptr->config_stop = 0;
3398 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3399 				    "%s: Retries exhausted for %s, killing",
3400 				    __func__, pptr->path);
3401 				pmcs_kill_changed(pwp, pptr, 0);
3402 			}
3403 
3404 			/*
3405 			 * Release all the local PHYs that we allocated.
3406 			 */
3407 			pmcs_free_phys(pwp, local_list);
3408 			return;
3409 		}
3410 
3411 		ctmp = ctmp->sibling;
3412 	}
3413 
3414 	/*
3415 	 * Step 6: Compare the local PHY's contents to our current PHY.  If
3416 	 * there are changes, take the appropriate action.
3417 	 * This is done in two steps (step 5 above, and 6 here) so that if we
3418 	 * have to bail during this process (e.g. pmcs_expander_content_discover
3419 	 * fails), we haven't actually changed the state of any of the real
3420 	 * PHYs.  Next time we come through here, we'll be starting over from
3421 	 * scratch.  This keeps us from marking a changed PHY as no longer
3422 	 * changed, but then having to bail only to come back next time and
3423 	 * think that the PHY hadn't changed.  If this were to happen, we
3424 	 * would fail to properly configure the device behind this PHY.
3425 	 */
3426 	local = local_list;
3427 	ctmp = pptr->children;
3428 
3429 	while (ctmp) {
3430 		changed = B_FALSE;
3431 		kill_changed = B_FALSE;
3432 
3433 		/*
3434 		 * We set local to local_list prior to this loop so that we
3435 		 * can simply walk the local_list while we walk this list.  The
3436 		 * two lists should be completely in sync.
3437 		 *
3438 		 * Clear the changed flag here.
3439 		 */
3440 		ctmp->changed = 0;
3441 
3442 		if (ctmp->dtype != local->dtype) {
3443 			if (ctmp->dtype != NOTHING) {
3444 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s "
3445 				    "type changed from %s to %s (killing)",
3446 				    __func__, ctmp->path, PHY_TYPE(ctmp),
3447 				    PHY_TYPE(local));
3448 				/*
3449 				 * Force a rescan of this expander after dead
3450 				 * contents are cleared and removed.
3451 				 */
3452 				changed = B_TRUE;
3453 				kill_changed = B_TRUE;
3454 			} else {
3455 				changed = B_TRUE;
3456 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3457 				    "%s: %s type changed from NOTHING to %s",
3458 				    __func__, ctmp->path, PHY_TYPE(local));
3459 			}
3460 
3461 		} else if (ctmp->atdt != local->atdt) {
3462 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s attached "
3463 			    "device type changed from %d to %d (killing)",
3464 			    __func__, ctmp->path, ctmp->atdt, local->atdt);
3465 			/*
3466 			 * Force a rescan of this expander after dead
3467 			 * contents are cleared and removed.
3468 			 */
3469 			changed = B_TRUE;
3470 
3471 			if (local->atdt == 0) {
3472 				kill_changed = B_TRUE;
3473 			}
3474 		} else if (ctmp->link_rate != local->link_rate) {
3475 			pmcs_prt(pwp, PMCS_PRT_INFO, "%s: %s changed speed from"
3476 			    " %s to %s", __func__, ctmp->path,
3477 			    pmcs_get_rate(ctmp->link_rate),
3478 			    pmcs_get_rate(local->link_rate));
3479 			/* If the speed changed from invalid, force rescan */
3480 			if (!PMCS_VALID_LINK_RATE(ctmp->link_rate)) {
3481 				changed = B_TRUE;
3482 				RESTART_DISCOVERY(pwp);
3483 			} else {
3484 				/* Just update to the new link rate */
3485 				ctmp->link_rate = local->link_rate;
3486 			}
3487 
3488 			if (!PMCS_VALID_LINK_RATE(local->link_rate)) {
3489 				kill_changed = B_TRUE;
3490 			}
3491 		} else if (memcmp(ctmp->sas_address, local->sas_address,
3492 		    sizeof (ctmp->sas_address)) != 0) {
3493 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: SASAddr "
3494 			    "for %s changed from " SAS_ADDR_FMT " to "
3495 			    SAS_ADDR_FMT " (kill old tree)", __func__,
3496 			    ctmp->path, SAS_ADDR_PRT(ctmp->sas_address),
3497 			    SAS_ADDR_PRT(local->sas_address));
3498 			/*
3499 			 * Force a rescan of this expander after dead
3500 			 * contents are cleared and removed.
3501 			 */
3502 			changed = B_TRUE;
3503 		} else {
3504 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3505 			    "%s: %s looks the same (type %s)",
3506 			    __func__, ctmp->path, PHY_TYPE(ctmp));
3507 			/*
3508 			 * If EXPANDER, still mark it changed so we
3509 			 * re-evaluate its contents.  If it's not an expander,
3510 			 * but it hasn't been configured, also mark it as
3511 			 * changed so that it will undergo configuration.
3512 			 */
3513 			if (ctmp->dtype == EXPANDER) {
3514 				changed = B_TRUE;
3515 			} else if ((ctmp->dtype != NOTHING) &&
3516 			    !ctmp->configured) {
3517 				ctmp->changed = 1;
3518 			} else {
3519 				/* It simply hasn't changed */
3520 				ctmp->changed = 0;
3521 			}
3522 		}
3523 
3524 		/*
3525 		 * If the PHY changed, call pmcs_kill_changed if indicated,
3526 		 * update its contents to reflect its current state and mark it
3527 		 * as changed.
3528 		 */
3529 		if (changed) {
3530 			/*
3531 			 * pmcs_kill_changed will mark the PHY as changed, so
3532 			 * only do PHY_CHANGED if we did not do kill_changed.
3533 			 */
3534 			if (kill_changed) {
3535 				pmcs_kill_changed(pwp, ctmp, 0);
3536 			} else {
3537 				/*
3538 				 * If we're not killing the device, it's not
3539 				 * dead.  Mark the PHY as changed.
3540 				 */
3541 				PHY_CHANGED(pwp, ctmp);
3542 
3543 				if (ctmp->dead) {
3544 					pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3545 					    "%s: Unmarking PHY %s dead, "
3546 					    "restarting discovery",
3547 					    __func__, ctmp->path);
3548 					ctmp->dead = 0;
3549 					RESTART_DISCOVERY(pwp);
3550 				}
3551 			}
3552 
3553 			/*
3554 			 * If the dtype of this PHY is now NOTHING, mark it as
3555 			 * unconfigured.  Set pend_dtype to what the new dtype
3556 			 * is.  It'll get updated at the end of the discovery
3557 			 * process.
3558 			 */
3559 			if (local->dtype == NOTHING) {
3560 				bzero(ctmp->sas_address,
3561 				    sizeof (local->sas_address));
3562 				ctmp->atdt = 0;
3563 				ctmp->link_rate = 0;
3564 				ctmp->pend_dtype = NOTHING;
3565 				ctmp->configured = 0;
3566 			} else {
3567 				(void) memcpy(ctmp->sas_address,
3568 				    local->sas_address,
3569 				    sizeof (local->sas_address));
3570 				ctmp->atdt = local->atdt;
3571 				ctmp->link_rate = local->link_rate;
3572 				ctmp->pend_dtype = local->dtype;
3573 			}
3574 		}
3575 
3576 		local = local->sibling;
3577 		ctmp = ctmp->sibling;
3578 	}
3579 
3580 	/*
3581 	 * If we got to here, that means we were able to see all the PHYs
3582 	 * and we can now update all of the real PHYs with the information
3583 	 * we got on the local PHYs.  Once that's done, free all the local
3584 	 * PHYs.
3585 	 */
3586 
3587 	pmcs_free_phys(pwp, local_list);
3588 }
3589 
3590 /*
3591  * Top level routine to check expanders.  We call pmcs_check_expander for
3592  * each expander.  Since we're not doing any configuration right now, it
3593  * doesn't matter if this is breadth-first.
3594  */
3595 static boolean_t
3596 pmcs_check_expanders(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3597 {
3598 	pmcs_phy_t *phyp, *pnext, *pchild;
3599 	boolean_t config_changed = B_FALSE;
3600 
3601 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s", __func__, pptr->path);
3602 
3603 	/*
3604 	 * Check each expander at this level
3605 	 */
3606 	phyp = pptr;
3607 	while (phyp && !config_changed) {
3608 		pmcs_lock_phy(phyp);
3609 
3610 		if ((phyp->dtype == EXPANDER) && phyp->changed &&
3611 		    !phyp->dead && !phyp->subsidiary &&
3612 		    phyp->configured) {
3613 			pmcs_check_expander(pwp, phyp);
3614 		}
3615 
3616 		pnext = phyp->sibling;
3617 		pmcs_unlock_phy(phyp);
3618 
3619 		mutex_enter(&pwp->config_lock);
3620 		config_changed = pwp->config_changed;
3621 		mutex_exit(&pwp->config_lock);
3622 
3623 		phyp = pnext;
3624 	}
3625 
3626 	if (config_changed) {
3627 		return (config_changed);
3628 	}
3629 
3630 	/*
3631 	 * Now check the children
3632 	 */
3633 	phyp = pptr;
3634 	while (phyp && !config_changed) {
3635 		pmcs_lock_phy(phyp);
3636 		pnext = phyp->sibling;
3637 		pchild = phyp->children;
3638 		pmcs_unlock_phy(phyp);
3639 
3640 		if (pchild) {
3641 			(void) pmcs_check_expanders(pwp, pchild);
3642 		}
3643 
3644 		mutex_enter(&pwp->config_lock);
3645 		config_changed = pwp->config_changed;
3646 		mutex_exit(&pwp->config_lock);
3647 
3648 		phyp = pnext;
3649 	}
3650 
3651 	/*
3652 	 * We're done
3653 	 */
3654 	return (config_changed);
3655 }
3656 
3657 /*
3658  * Called with softstate and PHY locked
3659  */
3660 static void
3661 pmcs_clear_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr, int level)
3662 {
3663 	pmcs_phy_t *ctmp;
3664 
3665 	ASSERT(mutex_owned(&pwp->lock));
3666 	ASSERT(mutex_owned(&pptr->phy_lock));
3667 	ASSERT(pptr->level < PMCS_MAX_XPND - 1);
3668 
3669 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: checking %s", __func__,
3670 	    pptr->path);
3671 
3672 	ctmp = pptr->children;
3673 	while (ctmp) {
3674 		/*
3675 		 * If the expander is dead, mark its children dead
3676 		 */
3677 		if (pptr->dead) {
3678 			ctmp->dead = 1;
3679 		}
3680 		if (ctmp->dtype == EXPANDER) {
3681 			pmcs_clear_expander(pwp, ctmp, level + 1);
3682 		}
3683 		ctmp = ctmp->sibling;
3684 	}
3685 
3686 	/*
3687 	 * If this expander is not dead, we're done here.
3688 	 */
3689 	if (!pptr->dead) {
3690 		return;
3691 	}
3692 
3693 	/*
3694 	 * Now snip out the list of children below us and release them
3695 	 */
3696 	ctmp = pptr->children;
3697 	while (ctmp) {
3698 		pmcs_phy_t *nxt = ctmp->sibling;
3699 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3700 		    "%s: dead PHY 0x%p (%s) (ref_count %d)", __func__,
3701 		    (void *)ctmp, ctmp->path, ctmp->ref_count);
3702 		/*
3703 		 * Put this PHY on the dead PHY list for the watchdog to
3704 		 * clean up after any outstanding work has completed.
3705 		 */
3706 		mutex_enter(&pwp->dead_phylist_lock);
3707 		ctmp->dead_next = pwp->dead_phys;
3708 		pwp->dead_phys = ctmp;
3709 		mutex_exit(&pwp->dead_phylist_lock);
3710 		pmcs_unlock_phy(ctmp);
3711 		ctmp = nxt;
3712 	}
3713 
3714 	pptr->children = NULL;
3715 
3716 	/*
3717 	 * Clear subsidiary phys as well.  Getting the parent's PHY lock
3718 	 * is only necessary if level == 0 since otherwise the parent is
3719 	 * already locked.
3720 	 */
3721 	if (!IS_ROOT_PHY(pptr)) {
3722 		if (level == 0) {
3723 			mutex_enter(&pptr->parent->phy_lock);
3724 		}
3725 		ctmp = pptr->parent->children;
3726 		if (level == 0) {
3727 			mutex_exit(&pptr->parent->phy_lock);
3728 		}
3729 	} else {
3730 		ctmp = pwp->root_phys;
3731 	}
3732 
3733 	while (ctmp) {
3734 		if (ctmp == pptr) {
3735 			ctmp = ctmp->sibling;
3736 			continue;
3737 		}
3738 		/*
3739 		 * We only need to lock subsidiary PHYs on the level 0
3740 		 * expander.  Any children of that expander, subsidiaries or
3741 		 * not, will already be locked.
3742 		 */
3743 		if (level == 0) {
3744 			pmcs_lock_phy(ctmp);
3745 		}
3746 		if (ctmp->dtype != EXPANDER || ctmp->subsidiary == 0 ||
3747 		    memcmp(ctmp->sas_address, pptr->sas_address,
3748 		    sizeof (ctmp->sas_address)) != 0) {
3749 			if (level == 0) {
3750 				pmcs_unlock_phy(ctmp);
3751 			}
3752 			ctmp = ctmp->sibling;
3753 			continue;
3754 		}
3755 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: subsidiary %s",
3756 		    __func__, ctmp->path);
3757 		pmcs_clear_phy(pwp, ctmp);
3758 		if (level == 0) {
3759 			pmcs_unlock_phy(ctmp);
3760 		}
3761 		ctmp = ctmp->sibling;
3762 	}
3763 
3764 	pmcs_clear_phy(pwp, pptr);
3765 }
3766 
3767 /*
3768  * Called with PHY locked and with scratch acquired. We return 0 if
3769  * we fail to allocate resources or notice that the configuration
3770  * count changed while we were running the command. We return
3771  * less than zero if we had an I/O error or received an unsupported
3772  * configuration. Otherwise we return the number of phys in the
3773  * expander.
3774  */
3775 #define	DFM(m, y) if (m == NULL) m = y
3776 static int
3777 pmcs_expander_get_nphy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3778 {
3779 	struct pmcwork *pwrk;
3780 	char buf[64];
3781 	const uint_t rdoff = 0x100;	/* returned data offset */
3782 	smp_response_frame_t *srf;
3783 	smp_report_general_resp_t *srgr;
3784 	uint32_t msg[PMCS_MSG_SIZE], *ptr, htag, status, ival;
3785 	int result;
3786 
3787 	ival = 0x40001100;
3788 again:
3789 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
3790 	if (pwrk == NULL) {
3791 		result = 0;
3792 		goto out;
3793 	}
3794 	(void) memset(pwp->scratch, 0x77, PMCS_SCRATCH_SIZE);
3795 	pwrk->arg = pwp->scratch;
3796 	pwrk->dtype = pptr->dtype;
3797 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
3798 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
3799 	if (ptr == NULL) {
3800 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
3801 		pmcs_prt(pwp, PMCS_PRT_DEBUG2, "%s: GET_IQ_ENTRY failed",
3802 		    __func__);
3803 		pmcs_pwork(pwp, pwrk);
3804 		result = 0;
3805 		goto out;
3806 	}
3807 
3808 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_SMP_REQUEST));
3809 	msg[1] = LE_32(pwrk->htag);
3810 	msg[2] = LE_32(pptr->device_id);
3811 	msg[3] = LE_32((4 << SMP_REQUEST_LENGTH_SHIFT) | SMP_INDIRECT_RESPONSE);
3812 	/*
3813 	 * Send SMP REPORT GENERAL (of either SAS1.1 or SAS2 flavors).
3814 	 */
3815 	msg[4] = BE_32(ival);
3816 	msg[5] = 0;
3817 	msg[6] = 0;
3818 	msg[7] = 0;
3819 	msg[8] = 0;
3820 	msg[9] = 0;
3821 	msg[10] = 0;
3822 	msg[11] = 0;
3823 	msg[12] = LE_32(DWORD0(pwp->scratch_dma+rdoff));
3824 	msg[13] = LE_32(DWORD1(pwp->scratch_dma+rdoff));
3825 	msg[14] = LE_32(PMCS_SCRATCH_SIZE - rdoff);
3826 	msg[15] = 0;
3827 
3828 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
3829 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
3830 	htag = pwrk->htag;
3831 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
3832 
3833 	pmcs_unlock_phy(pptr);
3834 	WAIT_FOR(pwrk, 1000, result);
3835 	pmcs_lock_phy(pptr);
3836 	pmcs_pwork(pwp, pwrk);
3837 
3838 	mutex_enter(&pwp->config_lock);
3839 	if (pwp->config_changed) {
3840 		RESTART_DISCOVERY_LOCKED(pwp);
3841 		mutex_exit(&pwp->config_lock);
3842 		result = 0;
3843 		goto out;
3844 	}
3845 	mutex_exit(&pwp->config_lock);
3846 
3847 	if (result) {
3848 		pmcs_timed_out(pwp, htag, __func__);
3849 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3850 		    "%s: Issuing SMP ABORT for htag 0x%08x", __func__, htag);
3851 		if (pmcs_abort(pwp, pptr, htag, 0, 0)) {
3852 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3853 			    "%s: Unable to issue SMP ABORT for htag 0x%08x",
3854 			    __func__, htag);
3855 		} else {
3856 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3857 			    "%s: Issuing SMP ABORT for htag 0x%08x",
3858 			    __func__, htag);
3859 		}
3860 		result = 0;
3861 		goto out;
3862 	}
3863 	ptr = (void *)pwp->scratch;
3864 	status = LE_32(ptr[2]);
3865 	if (status == PMCOUT_STATUS_UNDERFLOW ||
3866 	    status == PMCOUT_STATUS_OVERFLOW) {
3867 		pmcs_prt(pwp, PMCS_PRT_DEBUG_UNDERFLOW,
3868 		    "%s: over/underflow", __func__);
3869 		status = PMCOUT_STATUS_OK;
3870 	}
3871 	srf = (smp_response_frame_t *)&((uint32_t *)pwp->scratch)[rdoff >> 2];
3872 	srgr = (smp_report_general_resp_t *)
3873 	    &((uint32_t *)pwp->scratch)[(rdoff >> 2)+1];
3874 
3875 	if (status != PMCOUT_STATUS_OK) {
3876 		char *nag = NULL;
3877 		(void) snprintf(buf, sizeof (buf),
3878 		    "%s: SMP op failed (0x%x)", __func__, status);
3879 		switch (status) {
3880 		case PMCOUT_STATUS_IO_PORT_IN_RESET:
3881 			DFM(nag, "I/O Port In Reset");
3882 			/* FALLTHROUGH */
3883 		case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
3884 			DFM(nag, "Hardware Timeout");
3885 			/* FALLTHROUGH */
3886 		case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
3887 			DFM(nag, "Internal SMP Resource Failure");
3888 			/* FALLTHROUGH */
3889 		case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
3890 			DFM(nag, "PHY Not Ready");
3891 			/* FALLTHROUGH */
3892 		case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
3893 			DFM(nag, "Connection Rate Not Supported");
3894 			/* FALLTHROUGH */
3895 		case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
3896 			DFM(nag, "Open Retry Timeout");
3897 			/* FALLTHROUGH */
3898 		case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
3899 			DFM(nag, "Response Connection Error");
3900 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
3901 			    "%s: expander %s SMP operation failed (%s)",
3902 			    __func__, pptr->path, nag);
3903 			break;
3904 
3905 		/*
3906 		 * For the IO_DS_NON_OPERATIONAL case, we need to kick off
3907 		 * device state recovery and return 0 so that the caller
3908 		 * doesn't assume this expander is dead for good.
3909 		 */
3910 		case PMCOUT_STATUS_IO_DS_NON_OPERATIONAL: {
3911 			pmcs_xscsi_t *xp = pptr->target;
3912 
3913 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
3914 			    "%s: expander %s device state non-operational",
3915 			    __func__, pptr->path);
3916 
3917 			if (xp == NULL) {
3918 				pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
3919 				    "%s: No target to do DS recovery for PHY "
3920 				    "%p (%s), attempting PHY hard reset",
3921 				    __func__, (void *)pptr, pptr->path);
3922 				(void) pmcs_reset_phy(pwp, pptr,
3923 				    PMCS_PHYOP_HARD_RESET);
3924 				break;
3925 			}
3926 
3927 			mutex_enter(&xp->statlock);
3928 			pmcs_start_dev_state_recovery(xp, pptr);
3929 			mutex_exit(&xp->statlock);
3930 			break;
3931 		}
3932 
3933 		default:
3934 			pmcs_print_entry(pwp, PMCS_PRT_DEBUG, buf, ptr);
3935 			result = -EIO;
3936 			break;
3937 		}
3938 	} else if (srf->srf_frame_type != SMP_FRAME_TYPE_RESPONSE) {
3939 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
3940 		    "%s: bad response frame type 0x%x",
3941 		    __func__, srf->srf_frame_type);
3942 		result = -EINVAL;
3943 	} else if (srf->srf_function != SMP_FUNC_REPORT_GENERAL) {
3944 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad response function 0x%x",
3945 		    __func__, srf->srf_function);
3946 		result = -EINVAL;
3947 	} else if (srf->srf_result != 0) {
3948 		/*
3949 		 * Check to see if we have a value of 3 for failure and
3950 		 * whether we were using a SAS2.0 allocation length value
3951 		 * and retry without it.
3952 		 */
3953 		if (srf->srf_result == 3 && (ival & 0xff00)) {
3954 			ival &= ~0xff00;
3955 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
3956 			    "%s: err 0x%x with SAS2 request- retry with SAS1",
3957 			    __func__, srf->srf_result);
3958 			goto again;
3959 		}
3960 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad response 0x%x",
3961 		    __func__, srf->srf_result);
3962 		result = -EINVAL;
3963 	} else if (srgr->srgr_configuring) {
3964 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
3965 		    "%s: expander at phy %s is still configuring",
3966 		    __func__, pptr->path);
3967 		result = 0;
3968 	} else {
3969 		result = srgr->srgr_number_of_phys;
3970 		if (ival & 0xff00) {
3971 			pptr->tolerates_sas2 = 1;
3972 		}
3973 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3974 		    "%s has %d phys and %s SAS2", pptr->path, result,
3975 		    pptr->tolerates_sas2? "tolerates" : "does not tolerate");
3976 	}
3977 out:
3978 	return (result);
3979 }
3980 
3981 /*
3982  * Called with expander locked (and thus, pptr) as well as all PHYs up to
3983  * the root, and scratch acquired. Return 0 if we fail to allocate resources
3984  * or notice that the configuration changed while we were running the command.
3985  *
3986  * We return less than zero if we had an I/O error or received an
3987  * unsupported configuration.
3988  */
3989 static int
3990 pmcs_expander_content_discover(pmcs_hw_t *pwp, pmcs_phy_t *expander,
3991     pmcs_phy_t *pptr)
3992 {
3993 	struct pmcwork *pwrk;
3994 	char buf[64];
3995 	uint8_t sas_address[8];
3996 	uint8_t att_sas_address[8];
3997 	smp_response_frame_t *srf;
3998 	smp_discover_resp_t *sdr;
3999 	const uint_t rdoff = 0x100;	/* returned data offset */
4000 	uint8_t *roff;
4001 	uint32_t status, *ptr, msg[PMCS_MSG_SIZE], htag;
4002 	int result;
4003 	uint8_t	ini_support;
4004 	uint8_t	tgt_support;
4005 
4006 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, expander);
4007 	if (pwrk == NULL) {
4008 		result = 0;
4009 		goto out;
4010 	}
4011 	(void) memset(pwp->scratch, 0x77, PMCS_SCRATCH_SIZE);
4012 	pwrk->arg = pwp->scratch;
4013 	pwrk->dtype = expander->dtype;
4014 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_SMP_REQUEST));
4015 	msg[1] = LE_32(pwrk->htag);
4016 	msg[2] = LE_32(expander->device_id);
4017 	msg[3] = LE_32((12 << SMP_REQUEST_LENGTH_SHIFT) |
4018 	    SMP_INDIRECT_RESPONSE);
4019 	/*
4020 	 * Send SMP DISCOVER (of either SAS1.1 or SAS2 flavors).
4021 	 */
4022 	if (expander->tolerates_sas2) {
4023 		msg[4] = BE_32(0x40101B00);
4024 	} else {
4025 		msg[4] = BE_32(0x40100000);
4026 	}
4027 	msg[5] = 0;
4028 	msg[6] = BE_32((pptr->phynum << 16));
4029 	msg[7] = 0;
4030 	msg[8] = 0;
4031 	msg[9] = 0;
4032 	msg[10] = 0;
4033 	msg[11] = 0;
4034 	msg[12] = LE_32(DWORD0(pwp->scratch_dma+rdoff));
4035 	msg[13] = LE_32(DWORD1(pwp->scratch_dma+rdoff));
4036 	msg[14] = LE_32(PMCS_SCRATCH_SIZE - rdoff);
4037 	msg[15] = 0;
4038 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4039 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4040 	if (ptr == NULL) {
4041 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4042 		result = 0;
4043 		goto out;
4044 	}
4045 
4046 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
4047 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4048 	htag = pwrk->htag;
4049 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4050 
4051 	/*
4052 	 * Drop PHY lock while waiting so other completions aren't potentially
4053 	 * blocked.
4054 	 */
4055 	pmcs_unlock_phy(expander);
4056 	WAIT_FOR(pwrk, 1000, result);
4057 	pmcs_lock_phy(expander);
4058 	pmcs_pwork(pwp, pwrk);
4059 
4060 	mutex_enter(&pwp->config_lock);
4061 	if (pwp->config_changed) {
4062 		RESTART_DISCOVERY_LOCKED(pwp);
4063 		mutex_exit(&pwp->config_lock);
4064 		result = 0;
4065 		goto out;
4066 	}
4067 	mutex_exit(&pwp->config_lock);
4068 
4069 	if (result) {
4070 		pmcs_prt(pwp, PMCS_PRT_WARN, pmcs_timeo, __func__);
4071 		if (pmcs_abort(pwp, expander, htag, 0, 0)) {
4072 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4073 			    "%s: Unable to issue SMP ABORT for htag 0x%08x",
4074 			    __func__, htag);
4075 		} else {
4076 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4077 			    "%s: Issuing SMP ABORT for htag 0x%08x",
4078 			    __func__, htag);
4079 		}
4080 		result = -ETIMEDOUT;
4081 		goto out;
4082 	}
4083 	ptr = (void *)pwp->scratch;
4084 	/*
4085 	 * Point roff to the DMA offset for returned data
4086 	 */
4087 	roff = pwp->scratch;
4088 	roff += rdoff;
4089 	srf = (smp_response_frame_t *)roff;
4090 	sdr = (smp_discover_resp_t *)(roff+4);
4091 	status = LE_32(ptr[2]);
4092 	if (status == PMCOUT_STATUS_UNDERFLOW ||
4093 	    status == PMCOUT_STATUS_OVERFLOW) {
4094 		pmcs_prt(pwp, PMCS_PRT_DEBUG_UNDERFLOW,
4095 		    "%s: over/underflow", __func__);
4096 		status = PMCOUT_STATUS_OK;
4097 	}
4098 	if (status != PMCOUT_STATUS_OK) {
4099 		char *nag = NULL;
4100 		(void) snprintf(buf, sizeof (buf),
4101 		    "%s: SMP op failed (0x%x)", __func__, status);
4102 		switch (status) {
4103 		case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
4104 			DFM(nag, "Hardware Timeout");
4105 			/* FALLTHROUGH */
4106 		case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
4107 			DFM(nag, "Internal SMP Resource Failure");
4108 			/* FALLTHROUGH */
4109 		case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
4110 			DFM(nag, "PHY Not Ready");
4111 			/* FALLTHROUGH */
4112 		case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
4113 			DFM(nag, "Connection Rate Not Supported");
4114 			/* FALLTHROUGH */
4115 		case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
4116 			DFM(nag, "Open Retry Timeout");
4117 			/* FALLTHROUGH */
4118 		case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
4119 			DFM(nag, "Response Connection Error");
4120 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
4121 			    "%s: expander %s SMP operation failed (%s)",
4122 			    __func__, pptr->path, nag);
4123 			break;
4124 		default:
4125 			pmcs_print_entry(pwp, PMCS_PRT_DEBUG, buf, ptr);
4126 			result = -EIO;
4127 			break;
4128 		}
4129 		goto out;
4130 	} else if (srf->srf_frame_type != SMP_FRAME_TYPE_RESPONSE) {
4131 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4132 		    "%s: bad response frame type 0x%x",
4133 		    __func__, srf->srf_frame_type);
4134 		result = -EINVAL;
4135 		goto out;
4136 	} else if (srf->srf_function != SMP_FUNC_DISCOVER) {
4137 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad response function 0x%x",
4138 		    __func__, srf->srf_function);
4139 		result = -EINVAL;
4140 		goto out;
4141 	} else if (srf->srf_result != SMP_RES_FUNCTION_ACCEPTED) {
4142 		result = pmcs_smp_function_result(pwp, srf);
4143 		/* Need not fail if PHY is Vacant */
4144 		if (result != SMP_RES_PHY_VACANT) {
4145 			result = -EINVAL;
4146 			goto out;
4147 		}
4148 	}
4149 
4150 	ini_support = (sdr->sdr_attached_sata_host |
4151 	    (sdr->sdr_attached_smp_initiator << 1) |
4152 	    (sdr->sdr_attached_stp_initiator << 2) |
4153 	    (sdr->sdr_attached_ssp_initiator << 3));
4154 
4155 	tgt_support = (sdr->sdr_attached_sata_device |
4156 	    (sdr->sdr_attached_smp_target << 1) |
4157 	    (sdr->sdr_attached_stp_target << 2) |
4158 	    (sdr->sdr_attached_ssp_target << 3));
4159 
4160 	pmcs_wwn2barray(BE_64(sdr->sdr_sas_addr), sas_address);
4161 	pmcs_wwn2barray(BE_64(sdr->sdr_attached_sas_addr), att_sas_address);
4162 
4163 	switch (sdr->sdr_attached_device_type) {
4164 	case SAS_IF_DTYPE_ENDPOINT:
4165 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4166 		    "exp_content: %s atdt=0x%x lr=%x is=%x ts=%x SAS="
4167 		    SAS_ADDR_FMT " attSAS=" SAS_ADDR_FMT " atPHY=%x",
4168 		    pptr->path,
4169 		    sdr->sdr_attached_device_type,
4170 		    sdr->sdr_negotiated_logical_link_rate,
4171 		    ini_support,
4172 		    tgt_support,
4173 		    SAS_ADDR_PRT(sas_address),
4174 		    SAS_ADDR_PRT(att_sas_address),
4175 		    sdr->sdr_attached_phy_identifier);
4176 
4177 		if (sdr->sdr_attached_sata_device ||
4178 		    sdr->sdr_attached_stp_target) {
4179 			pptr->dtype = SATA;
4180 		} else if (sdr->sdr_attached_ssp_target) {
4181 			pptr->dtype = SAS;
4182 		} else if (tgt_support || ini_support) {
4183 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s has "
4184 			    "tgt support=%x init support=(%x)",
4185 			    __func__, pptr->path, tgt_support, ini_support);
4186 		}
4187 		break;
4188 	case SAS_IF_DTYPE_EDGE:
4189 	case SAS_IF_DTYPE_FANOUT:
4190 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4191 		    "exp_content: %s atdt=0x%x lr=%x is=%x ts=%x SAS="
4192 		    SAS_ADDR_FMT " attSAS=" SAS_ADDR_FMT " atPHY=%x",
4193 		    pptr->path,
4194 		    sdr->sdr_attached_device_type,
4195 		    sdr->sdr_negotiated_logical_link_rate,
4196 		    ini_support,
4197 		    tgt_support,
4198 		    SAS_ADDR_PRT(sas_address),
4199 		    SAS_ADDR_PRT(att_sas_address),
4200 		    sdr->sdr_attached_phy_identifier);
4201 		if (sdr->sdr_attached_smp_target) {
4202 			/*
4203 			 * Avoid configuring phys that just point back
4204 			 * at a parent phy
4205 			 */
4206 			if (expander->parent &&
4207 			    memcmp(expander->parent->sas_address,
4208 			    att_sas_address,
4209 			    sizeof (expander->parent->sas_address)) == 0) {
4210 				pmcs_prt(pwp, PMCS_PRT_DEBUG3,
4211 				    "%s: skipping port back to parent "
4212 				    "expander (%s)", __func__, pptr->path);
4213 				pptr->dtype = NOTHING;
4214 				break;
4215 			}
4216 			pptr->dtype = EXPANDER;
4217 
4218 		} else if (tgt_support || ini_support) {
4219 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s has "
4220 			    "tgt support=%x init support=(%x)",
4221 			    pptr->path, tgt_support, ini_support);
4222 			pptr->dtype = EXPANDER;
4223 		}
4224 		break;
4225 	default:
4226 		pptr->dtype = NOTHING;
4227 		break;
4228 	}
4229 	if (pptr->dtype != NOTHING) {
4230 		pmcs_phy_t *ctmp;
4231 
4232 		/*
4233 		 * If the attached device is a SATA device and the expander
4234 		 * is (possibly) a SAS2 compliant expander, check for whether
4235 		 * there is a NAA=5 WWN field starting at this offset and
4236 		 * use that for the SAS Address for this device.
4237 		 */
4238 		if (expander->tolerates_sas2 && pptr->dtype == SATA &&
4239 		    (roff[SAS_ATTACHED_NAME_OFFSET] >> 8) == 0x5) {
4240 			(void) memcpy(pptr->sas_address,
4241 			    &roff[SAS_ATTACHED_NAME_OFFSET], 8);
4242 		} else {
4243 			(void) memcpy(pptr->sas_address, att_sas_address, 8);
4244 		}
4245 		pptr->atdt = (sdr->sdr_attached_device_type);
4246 		/*
4247 		 * Now run up from the expander's parent up to the top to
4248 		 * make sure we only use the least common link_rate.
4249 		 */
4250 		for (ctmp = expander->parent; ctmp; ctmp = ctmp->parent) {
4251 			if (ctmp->link_rate <
4252 			    sdr->sdr_negotiated_logical_link_rate) {
4253 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4254 				    "%s: derating link rate from %x to %x due "
4255 				    "to %s being slower", pptr->path,
4256 				    sdr->sdr_negotiated_logical_link_rate,
4257 				    ctmp->link_rate,
4258 				    ctmp->path);
4259 				sdr->sdr_negotiated_logical_link_rate =
4260 				    ctmp->link_rate;
4261 			}
4262 		}
4263 		pptr->link_rate = sdr->sdr_negotiated_logical_link_rate;
4264 		pptr->state.prog_min_rate = sdr->sdr_prog_min_phys_link_rate;
4265 		pptr->state.hw_min_rate = sdr->sdr_hw_min_phys_link_rate;
4266 		pptr->state.prog_max_rate = sdr->sdr_prog_max_phys_link_rate;
4267 		pptr->state.hw_max_rate = sdr->sdr_hw_max_phys_link_rate;
4268 		PHY_CHANGED(pwp, pptr);
4269 	} else {
4270 		pmcs_clear_phy(pwp, pptr);
4271 	}
4272 	result = 1;
4273 out:
4274 	return (result);
4275 }
4276 
4277 /*
4278  * Get a work structure and assign it a tag with type and serial number
4279  * If a structure is returned, it is returned locked.
4280  */
4281 pmcwork_t *
4282 pmcs_gwork(pmcs_hw_t *pwp, uint32_t tag_type, pmcs_phy_t *phyp)
4283 {
4284 	pmcwork_t *p;
4285 	uint16_t snum;
4286 	uint32_t off;
4287 
4288 	mutex_enter(&pwp->wfree_lock);
4289 	p = STAILQ_FIRST(&pwp->wf);
4290 	if (p == NULL) {
4291 		/*
4292 		 * If we couldn't get a work structure, it's time to bite
4293 		 * the bullet, grab the pfree_lock and copy over all the
4294 		 * work structures from the pending free list to the actual
4295 		 * free list.  This shouldn't happen all that often.
4296 		 */
4297 		mutex_enter(&pwp->pfree_lock);
4298 		pwp->wf.stqh_first = pwp->pf.stqh_first;
4299 		pwp->wf.stqh_last = pwp->pf.stqh_last;
4300 		STAILQ_INIT(&pwp->pf);
4301 		mutex_exit(&pwp->pfree_lock);
4302 
4303 		p = STAILQ_FIRST(&pwp->wf);
4304 		if (p == NULL) {
4305 			mutex_exit(&pwp->wfree_lock);
4306 			return (NULL);
4307 		}
4308 	}
4309 	STAILQ_REMOVE(&pwp->wf, p, pmcwork, next);
4310 	snum = pwp->wserno++;
4311 	mutex_exit(&pwp->wfree_lock);
4312 
4313 	off = p - pwp->work;
4314 
4315 	mutex_enter(&p->lock);
4316 	ASSERT(p->state == PMCS_WORK_STATE_NIL);
4317 	ASSERT(p->htag == PMCS_TAG_FREE);
4318 	p->htag = (tag_type << PMCS_TAG_TYPE_SHIFT) & PMCS_TAG_TYPE_MASK;
4319 	p->htag |= ((snum << PMCS_TAG_SERNO_SHIFT) & PMCS_TAG_SERNO_MASK);
4320 	p->htag |= ((off << PMCS_TAG_INDEX_SHIFT) & PMCS_TAG_INDEX_MASK);
4321 	p->start = gethrtime();
4322 	p->state = PMCS_WORK_STATE_READY;
4323 	p->ssp_event = 0;
4324 	p->dead = 0;
4325 
4326 	if (phyp) {
4327 		p->phy = phyp;
4328 		pmcs_inc_phy_ref_count(phyp);
4329 	}
4330 
4331 	return (p);
4332 }
4333 
4334 /*
4335  * Called with pwrk lock held.  Returned with lock released.
4336  */
4337 void
4338 pmcs_pwork(pmcs_hw_t *pwp, pmcwork_t *p)
4339 {
4340 	ASSERT(p != NULL);
4341 	ASSERT(mutex_owned(&p->lock));
4342 
4343 #ifdef DEBUG
4344 	p->last_ptr = p->ptr;
4345 	p->last_arg = p->arg;
4346 	p->last_phy = p->phy;
4347 	p->last_xp = p->xp;
4348 	p->last_htag = p->htag;
4349 	p->last_state = p->state;
4350 #endif
4351 	p->finish = gethrtime();
4352 
4353 	if (p->phy) {
4354 		pmcs_dec_phy_ref_count(p->phy);
4355 	}
4356 
4357 	p->state = PMCS_WORK_STATE_NIL;
4358 	p->htag = PMCS_TAG_FREE;
4359 	p->xp = NULL;
4360 	p->ptr = NULL;
4361 	p->arg = NULL;
4362 	p->phy = NULL;
4363 	p->timer = 0;
4364 	mutex_exit(&p->lock);
4365 
4366 	if (mutex_tryenter(&pwp->wfree_lock) == 0) {
4367 		mutex_enter(&pwp->pfree_lock);
4368 		STAILQ_INSERT_TAIL(&pwp->pf, p, next);
4369 		mutex_exit(&pwp->pfree_lock);
4370 	} else {
4371 		STAILQ_INSERT_TAIL(&pwp->wf, p, next);
4372 		mutex_exit(&pwp->wfree_lock);
4373 	}
4374 }
4375 
4376 /*
4377  * Find a work structure based upon a tag and make sure that the tag
4378  * serial number matches the work structure we've found.
4379  * If a structure is found, its lock is held upon return.
4380  */
4381 pmcwork_t *
4382 pmcs_tag2wp(pmcs_hw_t *pwp, uint32_t htag)
4383 {
4384 	pmcwork_t *p;
4385 	uint32_t idx = PMCS_TAG_INDEX(htag);
4386 
4387 	p = &pwp->work[idx];
4388 
4389 	mutex_enter(&p->lock);
4390 	if (p->htag == htag) {
4391 		return (p);
4392 	}
4393 	mutex_exit(&p->lock);
4394 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "INDEX 0x%x HTAG 0x%x got p->htag 0x%x",
4395 	    idx, htag, p->htag);
4396 	return (NULL);
4397 }
4398 
4399 /*
4400  * Issue an abort for a command or for all commands.
4401  *
4402  * Since this can be called from interrupt context,
4403  * we don't wait for completion if wait is not set.
4404  *
4405  * Called with PHY lock held.
4406  */
4407 int
4408 pmcs_abort(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint32_t tag, int all_cmds,
4409     int wait)
4410 {
4411 	pmcwork_t *pwrk;
4412 	pmcs_xscsi_t *tgt;
4413 	uint32_t msg[PMCS_MSG_SIZE], *ptr;
4414 	int result, abt_type;
4415 	uint32_t abt_htag, status;
4416 
4417 	if (pptr->abort_all_start) {
4418 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ABORT_ALL for (%s) already"
4419 		    " in progress.", __func__, pptr->path);
4420 		return (EBUSY);
4421 	}
4422 
4423 	switch (pptr->dtype) {
4424 	case SAS:
4425 		abt_type = PMCIN_SSP_ABORT;
4426 		break;
4427 	case SATA:
4428 		abt_type = PMCIN_SATA_ABORT;
4429 		break;
4430 	case EXPANDER:
4431 		abt_type = PMCIN_SMP_ABORT;
4432 		break;
4433 	default:
4434 		return (0);
4435 	}
4436 
4437 	pwrk = pmcs_gwork(pwp, wait ? PMCS_TAG_TYPE_WAIT : PMCS_TAG_TYPE_NONE,
4438 	    pptr);
4439 
4440 	if (pwrk == NULL) {
4441 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
4442 		return (ENOMEM);
4443 	}
4444 
4445 	pwrk->dtype = pptr->dtype;
4446 	if (wait) {
4447 		pwrk->arg = msg;
4448 	}
4449 	if (pptr->valid_device_id == 0) {
4450 		pmcs_pwork(pwp, pwrk);
4451 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Invalid DeviceID", __func__);
4452 		return (ENODEV);
4453 	}
4454 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, abt_type));
4455 	msg[1] = LE_32(pwrk->htag);
4456 	msg[2] = LE_32(pptr->device_id);
4457 	if (all_cmds) {
4458 		msg[3] = 0;
4459 		msg[4] = LE_32(1);
4460 		pwrk->ptr = NULL;
4461 		pptr->abort_all_start = gethrtime();
4462 	} else {
4463 		msg[3] = LE_32(tag);
4464 		msg[4] = 0;
4465 		pwrk->ptr = &tag;
4466 	}
4467 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4468 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4469 	if (ptr == NULL) {
4470 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4471 		pmcs_pwork(pwp, pwrk);
4472 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
4473 		return (ENOMEM);
4474 	}
4475 
4476 	COPY_MESSAGE(ptr, msg, 5);
4477 	if (all_cmds) {
4478 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4479 		    "%s: aborting all commands for %s device %s. (htag=0x%x)",
4480 		    __func__, pmcs_get_typename(pptr->dtype), pptr->path,
4481 		    msg[1]);
4482 	} else {
4483 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4484 		    "%s: aborting tag 0x%x for %s device %s. (htag=0x%x)",
4485 		    __func__, tag, pmcs_get_typename(pptr->dtype), pptr->path,
4486 		    msg[1]);
4487 	}
4488 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4489 
4490 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4491 	if (!wait) {
4492 		mutex_exit(&pwrk->lock);
4493 		return (0);
4494 	}
4495 
4496 	abt_htag = pwrk->htag;
4497 	pmcs_unlock_phy(pwrk->phy);
4498 	WAIT_FOR(pwrk, 1000, result);
4499 	pmcs_lock_phy(pwrk->phy);
4500 
4501 	tgt = pwrk->xp;
4502 	pmcs_pwork(pwp, pwrk);
4503 
4504 	if (tgt != NULL) {
4505 		mutex_enter(&tgt->aqlock);
4506 		if (!STAILQ_EMPTY(&tgt->aq)) {
4507 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
4508 			    "%s: Abort complete (result=0x%x), but "
4509 			    "aq not empty (tgt 0x%p), waiting",
4510 			    __func__, result, (void *)tgt);
4511 			cv_wait(&tgt->abort_cv, &tgt->aqlock);
4512 		}
4513 		mutex_exit(&tgt->aqlock);
4514 	}
4515 
4516 	if (all_cmds) {
4517 		pptr->abort_all_start = 0;
4518 		cv_signal(&pptr->abort_all_cv);
4519 	}
4520 
4521 	if (result) {
4522 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4523 		    "%s: Abort (htag 0x%08x) request timed out",
4524 		    __func__, abt_htag);
4525 		if (tgt != NULL) {
4526 			mutex_enter(&tgt->statlock);
4527 			if ((tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) &&
4528 			    (tgt->dev_state !=
4529 			    PMCS_DEVICE_STATE_NON_OPERATIONAL)) {
4530 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
4531 				    "%s: Trying DS error recovery for tgt 0x%p",
4532 				    __func__, (void *)tgt);
4533 				(void) pmcs_send_err_recovery_cmd(pwp,
4534 				    PMCS_DEVICE_STATE_IN_RECOVERY, tgt);
4535 			}
4536 			mutex_exit(&tgt->statlock);
4537 		}
4538 		return (ETIMEDOUT);
4539 	}
4540 
4541 	status = LE_32(msg[2]);
4542 	if (status != PMCOUT_STATUS_OK) {
4543 		/*
4544 		 * The only non-success status are IO_NOT_VALID &
4545 		 * IO_ABORT_IN_PROGRESS.
4546 		 * In case of IO_ABORT_IN_PROGRESS, the other ABORT cmd's
4547 		 * status is of concern and this duplicate cmd status can
4548 		 * be ignored.
4549 		 * If IO_NOT_VALID, that's not an error per-se.
4550 		 * For abort of single I/O complete the command anyway.
4551 		 * If, however, we were aborting all, that is a problem
4552 		 * as IO_NOT_VALID really means that the IO or device is
4553 		 * not there. So, discovery process will take of the cleanup.
4554 		 */
4555 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: abort result 0x%x",
4556 		    __func__, LE_32(msg[2]));
4557 		if (all_cmds) {
4558 			PHY_CHANGED(pwp, pptr);
4559 			RESTART_DISCOVERY(pwp);
4560 		} else {
4561 			return (EINVAL);
4562 		}
4563 
4564 		return (0);
4565 	}
4566 
4567 	if (tgt != NULL) {
4568 		mutex_enter(&tgt->statlock);
4569 		if (tgt->dev_state == PMCS_DEVICE_STATE_IN_RECOVERY) {
4570 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
4571 			    "%s: Restoring OPERATIONAL dev_state for tgt 0x%p",
4572 			    __func__, (void *)tgt);
4573 			(void) pmcs_send_err_recovery_cmd(pwp,
4574 			    PMCS_DEVICE_STATE_OPERATIONAL, tgt);
4575 		}
4576 		mutex_exit(&tgt->statlock);
4577 	}
4578 
4579 	return (0);
4580 }
4581 
4582 /*
4583  * Issue a task management function to an SSP device.
4584  *
4585  * Called with PHY lock held.
4586  * statlock CANNOT be held upon entry.
4587  */
4588 int
4589 pmcs_ssp_tmf(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint8_t tmf, uint32_t tag,
4590     uint64_t lun, uint32_t *response)
4591 {
4592 	int result, ds;
4593 	uint8_t local[PMCS_QENTRY_SIZE << 1], *xd;
4594 	sas_ssp_rsp_iu_t *rptr = (void *)local;
4595 	static const uint8_t ssp_rsp_evec[] = {
4596 		0x58, 0x61, 0x56, 0x72, 0x00
4597 	};
4598 	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
4599 	struct pmcwork *pwrk;
4600 	pmcs_xscsi_t *xp;
4601 
4602 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
4603 	if (pwrk == NULL) {
4604 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
4605 		return (ENOMEM);
4606 	}
4607 	/*
4608 	 * NB: We use the PMCS_OQ_GENERAL outbound queue
4609 	 * NB: so as to not get entangled in normal I/O
4610 	 * NB: processing.
4611 	 */
4612 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
4613 	    PMCIN_SSP_INI_TM_START));
4614 	msg[1] = LE_32(pwrk->htag);
4615 	msg[2] = LE_32(pptr->device_id);
4616 	if (tmf == SAS_ABORT_TASK || tmf == SAS_QUERY_TASK) {
4617 		msg[3] = LE_32(tag);
4618 	} else {
4619 		msg[3] = 0;
4620 	}
4621 	msg[4] = LE_32(tmf);
4622 	msg[5] = BE_32((uint32_t)lun);
4623 	msg[6] = BE_32((uint32_t)(lun >> 32));
4624 	msg[7] = LE_32(PMCIN_MESSAGE_REPORT);
4625 
4626 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4627 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4628 	if (ptr == NULL) {
4629 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4630 		pmcs_pwork(pwp, pwrk);
4631 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
4632 		return (ENOMEM);
4633 	}
4634 	COPY_MESSAGE(ptr, msg, 7);
4635 	pwrk->arg = msg;
4636 	pwrk->dtype = pptr->dtype;
4637 
4638 	xp = pptr->target;
4639 	if (xp != NULL) {
4640 		mutex_enter(&xp->statlock);
4641 		if (xp->dev_state == PMCS_DEVICE_STATE_NON_OPERATIONAL) {
4642 			mutex_exit(&xp->statlock);
4643 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4644 			pmcs_pwork(pwp, pwrk);
4645 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Not sending '%s'"
4646 			    " because DS is '%s'", __func__, pmcs_tmf2str(tmf),
4647 			    pmcs_status_str
4648 			    (PMCOUT_STATUS_IO_DS_NON_OPERATIONAL));
4649 			return (EIO);
4650 		}
4651 		mutex_exit(&xp->statlock);
4652 	}
4653 
4654 	pmcs_prt(pwp, PMCS_PRT_DEBUG,
4655 	    "%s: sending '%s' to %s (lun %llu) tag 0x%x", __func__,
4656 	    pmcs_tmf2str(tmf), pptr->path, (unsigned long long) lun, tag);
4657 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4658 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4659 
4660 	pmcs_unlock_phy(pptr);
4661 	/*
4662 	 * This is a command sent to the target device, so it can take
4663 	 * significant amount of time to complete when path & device is busy.
4664 	 * Set a timeout to 20 seconds
4665 	 */
4666 	WAIT_FOR(pwrk, 20000, result);
4667 	pmcs_lock_phy(pptr);
4668 	pmcs_pwork(pwp, pwrk);
4669 
4670 	if (result) {
4671 		if (xp == NULL) {
4672 			return (ETIMEDOUT);
4673 		}
4674 
4675 		mutex_enter(&xp->statlock);
4676 		pmcs_start_dev_state_recovery(xp, pptr);
4677 		mutex_exit(&xp->statlock);
4678 		return (ETIMEDOUT);
4679 	}
4680 
4681 	status = LE_32(msg[2]);
4682 	if (status != PMCOUT_STATUS_OK) {
4683 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4684 		    "%s: status %s for TMF %s action to %s, lun %llu",
4685 		    __func__, pmcs_status_str(status),  pmcs_tmf2str(tmf),
4686 		    pptr->path, (unsigned long long) lun);
4687 		if ((status == PMCOUT_STATUS_IO_DS_NON_OPERATIONAL) ||
4688 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK) ||
4689 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS)) {
4690 			ds = PMCS_DEVICE_STATE_NON_OPERATIONAL;
4691 		} else if (status == PMCOUT_STATUS_IO_DS_IN_RECOVERY) {
4692 			/*
4693 			 * If the status is IN_RECOVERY, it's an indication
4694 			 * that it's now time for us to request to have the
4695 			 * device state set to OPERATIONAL since we're the ones
4696 			 * that requested recovery to begin with.
4697 			 */
4698 			ds = PMCS_DEVICE_STATE_OPERATIONAL;
4699 		} else {
4700 			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
4701 		}
4702 		if (xp != NULL) {
4703 			mutex_enter(&xp->statlock);
4704 			if (xp->dev_state != ds) {
4705 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
4706 				    "%s: Sending err recovery cmd"
4707 				    " for tgt 0x%p (status = %s)",
4708 				    __func__, (void *)xp,
4709 				    pmcs_status_str(status));
4710 				(void) pmcs_send_err_recovery_cmd(pwp, ds, xp);
4711 			}
4712 			mutex_exit(&xp->statlock);
4713 		}
4714 		return (EIO);
4715 	} else {
4716 		ds = PMCS_DEVICE_STATE_OPERATIONAL;
4717 		if (xp != NULL) {
4718 			mutex_enter(&xp->statlock);
4719 			if (xp->dev_state != ds) {
4720 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
4721 				    "%s: Sending err recovery cmd"
4722 				    " for tgt 0x%p (status = %s)",
4723 				    __func__, (void *)xp,
4724 				    pmcs_status_str(status));
4725 				(void) pmcs_send_err_recovery_cmd(pwp, ds, xp);
4726 			}
4727 			mutex_exit(&xp->statlock);
4728 		}
4729 	}
4730 	if (LE_32(msg[3]) == 0) {
4731 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "TMF completed with no response");
4732 		return (EIO);
4733 	}
4734 	pmcs_endian_transform(pwp, local, &msg[5], ssp_rsp_evec);
4735 	xd = (uint8_t *)(&msg[5]);
4736 	xd += SAS_RSP_HDR_SIZE;
4737 	if (rptr->datapres != SAS_RSP_DATAPRES_RESPONSE_DATA) {
4738 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4739 		    "%s: TMF response not RESPONSE DATA (0x%x)",
4740 		    __func__, rptr->datapres);
4741 		return (EIO);
4742 	}
4743 	if (rptr->response_data_length != 4) {
4744 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
4745 		    "Bad SAS RESPONSE DATA LENGTH", msg);
4746 		return (EIO);
4747 	}
4748 	(void) memcpy(&status, xd, sizeof (uint32_t));
4749 	status = BE_32(status);
4750 	if (response != NULL)
4751 		*response = status;
4752 	/*
4753 	 * The status is actually in the low-order byte.  The upper three
4754 	 * bytes contain additional information for the TMFs that support them.
4755 	 * However, at this time we do not issue any of those.  In the other
4756 	 * cases, the upper three bytes are supposed to be 0, but it appears
4757 	 * they aren't always.  Just mask them off.
4758 	 */
4759 	switch (status & 0xff) {
4760 	case SAS_RSP_TMF_COMPLETE:
4761 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: TMF complete", __func__);
4762 		result = 0;
4763 		break;
4764 	case SAS_RSP_TMF_SUCCEEDED:
4765 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: TMF succeeded", __func__);
4766 		result = 0;
4767 		break;
4768 	case SAS_RSP_INVALID_FRAME:
4769 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4770 		    "%s: TMF returned INVALID FRAME", __func__);
4771 		result = EIO;
4772 		break;
4773 	case SAS_RSP_TMF_NOT_SUPPORTED:
4774 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4775 		    "%s: TMF returned TMF NOT SUPPORTED", __func__);
4776 		result = EIO;
4777 		break;
4778 	case SAS_RSP_TMF_FAILED:
4779 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4780 		    "%s: TMF returned TMF FAILED", __func__);
4781 		result = EIO;
4782 		break;
4783 	case SAS_RSP_TMF_INCORRECT_LUN:
4784 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4785 		    "%s: TMF returned INCORRECT LUN", __func__);
4786 		result = EIO;
4787 		break;
4788 	case SAS_RSP_OVERLAPPED_OIPTTA:
4789 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4790 		    "%s: TMF returned OVERLAPPED INITIATOR PORT TRANSFER TAG "
4791 		    "ATTEMPTED", __func__);
4792 		result = EIO;
4793 		break;
4794 	default:
4795 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4796 		    "%s: TMF returned unknown code 0x%x", __func__, status);
4797 		result = EIO;
4798 		break;
4799 	}
4800 	return (result);
4801 }
4802 
4803 /*
4804  * Called with PHY lock held and scratch acquired
4805  */
4806 int
4807 pmcs_sata_abort_ncq(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
4808 {
4809 	const char *utag_fail_fmt = "%s: untagged NCQ command failure";
4810 	const char *tag_fail_fmt = "%s: NCQ command failure (tag 0x%x)";
4811 	uint32_t msg[PMCS_QENTRY_SIZE], *ptr, result, status;
4812 	uint8_t *fp = pwp->scratch, ds;
4813 	fis_t fis;
4814 	pmcwork_t *pwrk;
4815 	pmcs_xscsi_t *tgt;
4816 
4817 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
4818 	if (pwrk == NULL) {
4819 		return (ENOMEM);
4820 	}
4821 	msg[0] = LE_32(PMCS_IOMB_IN_SAS(PMCS_OQ_IODONE,
4822 	    PMCIN_SATA_HOST_IO_START));
4823 	msg[1] = LE_32(pwrk->htag);
4824 	msg[2] = LE_32(pptr->device_id);
4825 	msg[3] = LE_32(512);
4826 	msg[4] = LE_32(SATA_PROTOCOL_PIO | PMCIN_DATADIR_2_INI);
4827 	msg[5] = LE_32((READ_LOG_EXT << 16) | (C_BIT << 8) | FIS_REG_H2DEV);
4828 	msg[6] = LE_32(0x10);
4829 	msg[8] = LE_32(1);
4830 	msg[9] = 0;
4831 	msg[10] = 0;
4832 	msg[11] = 0;
4833 	msg[12] = LE_32(DWORD0(pwp->scratch_dma));
4834 	msg[13] = LE_32(DWORD1(pwp->scratch_dma));
4835 	msg[14] = LE_32(512);
4836 	msg[15] = 0;
4837 
4838 	pwrk->arg = msg;
4839 	pwrk->dtype = pptr->dtype;
4840 
4841 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4842 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4843 	if (ptr == NULL) {
4844 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4845 		pmcs_pwork(pwp, pwrk);
4846 		return (ENOMEM);
4847 	}
4848 	COPY_MESSAGE(ptr, msg, PMCS_QENTRY_SIZE);
4849 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4850 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4851 
4852 	pmcs_unlock_phy(pptr);
4853 	WAIT_FOR(pwrk, 250, result);
4854 	pmcs_lock_phy(pptr);
4855 	pmcs_pwork(pwp, pwrk);
4856 
4857 	if (result) {
4858 		pmcs_prt(pwp, PMCS_PRT_INFO, pmcs_timeo, __func__);
4859 		return (EIO);
4860 	}
4861 	status = LE_32(msg[2]);
4862 	if (status != PMCOUT_STATUS_OK || LE_32(msg[3])) {
4863 		tgt = pptr->target;
4864 		if (tgt == NULL) {
4865 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
4866 			    "%s: cannot find target for phy 0x%p for "
4867 			    "dev state recovery", __func__, (void *)pptr);
4868 			return (EIO);
4869 		}
4870 
4871 		mutex_enter(&tgt->statlock);
4872 
4873 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG, "READ LOG EXT", msg);
4874 		if ((status == PMCOUT_STATUS_IO_DS_NON_OPERATIONAL) ||
4875 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK) ||
4876 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS)) {
4877 			ds = PMCS_DEVICE_STATE_NON_OPERATIONAL;
4878 		} else {
4879 			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
4880 		}
4881 		if (tgt->dev_state != ds) {
4882 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Trying SATA DS Error"
4883 			    " Recovery for tgt(0x%p) for status(%s)",
4884 			    __func__, (void *)tgt, pmcs_status_str(status));
4885 			(void) pmcs_send_err_recovery_cmd(pwp, ds, tgt);
4886 		}
4887 
4888 		mutex_exit(&tgt->statlock);
4889 		return (EIO);
4890 	}
4891 	fis[0] = (fp[4] << 24) | (fp[3] << 16) | (fp[2] << 8) | FIS_REG_D2H;
4892 	fis[1] = (fp[8] << 24) | (fp[7] << 16) | (fp[6] << 8) | fp[5];
4893 	fis[2] = (fp[12] << 24) | (fp[11] << 16) | (fp[10] << 8) | fp[9];
4894 	fis[3] = (fp[16] << 24) | (fp[15] << 16) | (fp[14] << 8) | fp[13];
4895 	fis[4] = 0;
4896 	if (fp[0] & 0x80) {
4897 		pmcs_prt(pwp, PMCS_PRT_DEBUG, utag_fail_fmt, __func__);
4898 	} else {
4899 		pmcs_prt(pwp, PMCS_PRT_DEBUG, tag_fail_fmt, __func__,
4900 		    fp[0] & 0x1f);
4901 	}
4902 	pmcs_fis_dump(pwp, fis);
4903 	pptr->need_rl_ext = 0;
4904 	return (0);
4905 }
4906 
4907 /*
4908  * Transform a structure from CPU to Device endian format, or
4909  * vice versa, based upon a transformation vector.
4910  *
4911  * A transformation vector is an array of bytes, each byte
4912  * of which is defined thusly:
4913  *
4914  *  bit 7: from CPU to desired endian, otherwise from desired endian
4915  *	   to CPU format
4916  *  bit 6: Big Endian, else Little Endian
4917  *  bits 5-4:
4918  *       00 Undefined
4919  *       01 One Byte quantities
4920  *       02 Two Byte quantities
4921  *       03 Four Byte quantities
4922  *
4923  *  bits 3-0:
4924  *       00 Undefined
4925  *       Number of quantities to transform
4926  *
4927  * The vector is terminated by a 0 value.
4928  */
4929 
4930 void
4931 pmcs_endian_transform(pmcs_hw_t *pwp, void *orig_out, void *orig_in,
4932     const uint8_t *xfvec)
4933 {
4934 	uint8_t c, *out = orig_out, *in = orig_in;
4935 
4936 	if (xfvec == NULL) {
4937 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: null xfvec", __func__);
4938 		return;
4939 	}
4940 	if (out == NULL) {
4941 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: null out", __func__);
4942 		return;
4943 	}
4944 	if (in == NULL) {
4945 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: null in", __func__);
4946 		return;
4947 	}
4948 	while ((c = *xfvec++) != 0) {
4949 		int nbyt = (c & 0xf);
4950 		int size = (c >> 4) & 0x3;
4951 		int bige = (c >> 4) & 0x4;
4952 
4953 		switch (size) {
4954 		case 1:
4955 		{
4956 			while (nbyt-- > 0) {
4957 				*out++ = *in++;
4958 			}
4959 			break;
4960 		}
4961 		case 2:
4962 		{
4963 			uint16_t tmp;
4964 			while (nbyt-- > 0) {
4965 				(void) memcpy(&tmp, in, sizeof (uint16_t));
4966 				if (bige) {
4967 					tmp = BE_16(tmp);
4968 				} else {
4969 					tmp = LE_16(tmp);
4970 				}
4971 				(void) memcpy(out, &tmp, sizeof (uint16_t));
4972 				out += sizeof (uint16_t);
4973 				in += sizeof (uint16_t);
4974 			}
4975 			break;
4976 		}
4977 		case 3:
4978 		{
4979 			uint32_t tmp;
4980 			while (nbyt-- > 0) {
4981 				(void) memcpy(&tmp, in, sizeof (uint32_t));
4982 				if (bige) {
4983 					tmp = BE_32(tmp);
4984 				} else {
4985 					tmp = LE_32(tmp);
4986 				}
4987 				(void) memcpy(out, &tmp, sizeof (uint32_t));
4988 				out += sizeof (uint32_t);
4989 				in += sizeof (uint32_t);
4990 			}
4991 			break;
4992 		}
4993 		default:
4994 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad size", __func__);
4995 			return;
4996 		}
4997 	}
4998 }
4999 
5000 const char *
5001 pmcs_get_rate(unsigned int linkrt)
5002 {
5003 	const char *rate;
5004 	switch (linkrt) {
5005 	case SAS_LINK_RATE_1_5GBIT:
5006 		rate = "1.5";
5007 		break;
5008 	case SAS_LINK_RATE_3GBIT:
5009 		rate = "3.0";
5010 		break;
5011 	case SAS_LINK_RATE_6GBIT:
5012 		rate = "6.0";
5013 		break;
5014 	default:
5015 		rate = "???";
5016 		break;
5017 	}
5018 	return (rate);
5019 }
5020 
5021 const char *
5022 pmcs_get_typename(pmcs_dtype_t type)
5023 {
5024 	switch (type) {
5025 	case NOTHING:
5026 		return ("NIL");
5027 	case SATA:
5028 		return ("SATA");
5029 	case SAS:
5030 		return ("SSP");
5031 	case EXPANDER:
5032 		return ("EXPANDER");
5033 	}
5034 	return ("????");
5035 }
5036 
5037 const char *
5038 pmcs_tmf2str(int tmf)
5039 {
5040 	switch (tmf) {
5041 	case SAS_ABORT_TASK:
5042 		return ("Abort Task");
5043 	case SAS_ABORT_TASK_SET:
5044 		return ("Abort Task Set");
5045 	case SAS_CLEAR_TASK_SET:
5046 		return ("Clear Task Set");
5047 	case SAS_LOGICAL_UNIT_RESET:
5048 		return ("Logical Unit Reset");
5049 	case SAS_I_T_NEXUS_RESET:
5050 		return ("I_T Nexus Reset");
5051 	case SAS_CLEAR_ACA:
5052 		return ("Clear ACA");
5053 	case SAS_QUERY_TASK:
5054 		return ("Query Task");
5055 	case SAS_QUERY_TASK_SET:
5056 		return ("Query Task Set");
5057 	case SAS_QUERY_UNIT_ATTENTION:
5058 		return ("Query Unit Attention");
5059 	default:
5060 		return ("Unknown");
5061 	}
5062 }
5063 
5064 const char *
5065 pmcs_status_str(uint32_t status)
5066 {
5067 	switch (status) {
5068 	case PMCOUT_STATUS_OK:
5069 		return ("OK");
5070 	case PMCOUT_STATUS_ABORTED:
5071 		return ("ABORTED");
5072 	case PMCOUT_STATUS_OVERFLOW:
5073 		return ("OVERFLOW");
5074 	case PMCOUT_STATUS_UNDERFLOW:
5075 		return ("UNDERFLOW");
5076 	case PMCOUT_STATUS_FAILED:
5077 		return ("FAILED");
5078 	case PMCOUT_STATUS_ABORT_RESET:
5079 		return ("ABORT_RESET");
5080 	case PMCOUT_STATUS_IO_NOT_VALID:
5081 		return ("IO_NOT_VALID");
5082 	case PMCOUT_STATUS_NO_DEVICE:
5083 		return ("NO_DEVICE");
5084 	case PMCOUT_STATUS_ILLEGAL_PARAMETER:
5085 		return ("ILLEGAL_PARAMETER");
5086 	case PMCOUT_STATUS_LINK_FAILURE:
5087 		return ("LINK_FAILURE");
5088 	case PMCOUT_STATUS_PROG_ERROR:
5089 		return ("PROG_ERROR");
5090 	case PMCOUT_STATUS_EDC_IN_ERROR:
5091 		return ("EDC_IN_ERROR");
5092 	case PMCOUT_STATUS_EDC_OUT_ERROR:
5093 		return ("EDC_OUT_ERROR");
5094 	case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
5095 		return ("ERROR_HW_TIMEOUT");
5096 	case PMCOUT_STATUS_XFER_ERR_BREAK:
5097 		return ("XFER_ERR_BREAK");
5098 	case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
5099 		return ("XFER_ERR_PHY_NOT_READY");
5100 	case PMCOUT_STATUS_OPEN_CNX_PROTOCOL_NOT_SUPPORTED:
5101 		return ("OPEN_CNX_PROTOCOL_NOT_SUPPORTED");
5102 	case PMCOUT_STATUS_OPEN_CNX_ERROR_ZONE_VIOLATION:
5103 		return ("OPEN_CNX_ERROR_ZONE_VIOLATION");
5104 	case PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK:
5105 		return ("OPEN_CNX_ERROR_BREAK");
5106 	case PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS:
5107 		return ("OPEN_CNX_ERROR_IT_NEXUS_LOSS");
5108 	case PMCOUT_STATUS_OPENCNX_ERROR_BAD_DESTINATION:
5109 		return ("OPENCNX_ERROR_BAD_DESTINATION");
5110 	case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
5111 		return ("OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED");
5112 	case PMCOUT_STATUS_OPEN_CNX_ERROR_STP_RESOURCES_BUSY:
5113 		return ("OPEN_CNX_ERROR_STP_RESOURCES_BUSY");
5114 	case PMCOUT_STATUS_OPEN_CNX_ERROR_WRONG_DESTINATION:
5115 		return ("OPEN_CNX_ERROR_WRONG_DESTINATION");
5116 	case PMCOUT_STATUS_OPEN_CNX_ERROR_UNKNOWN_EROOR:
5117 		return ("OPEN_CNX_ERROR_UNKNOWN_EROOR");
5118 	case PMCOUT_STATUS_IO_XFER_ERROR_NAK_RECEIVED:
5119 		return ("IO_XFER_ERROR_NAK_RECEIVED");
5120 	case PMCOUT_STATUS_XFER_ERROR_ACK_NAK_TIMEOUT:
5121 		return ("XFER_ERROR_ACK_NAK_TIMEOUT");
5122 	case PMCOUT_STATUS_XFER_ERROR_PEER_ABORTED:
5123 		return ("XFER_ERROR_PEER_ABORTED");
5124 	case PMCOUT_STATUS_XFER_ERROR_RX_FRAME:
5125 		return ("XFER_ERROR_RX_FRAME");
5126 	case PMCOUT_STATUS_IO_XFER_ERROR_DMA:
5127 		return ("IO_XFER_ERROR_DMA");
5128 	case PMCOUT_STATUS_XFER_ERROR_CREDIT_TIMEOUT:
5129 		return ("XFER_ERROR_CREDIT_TIMEOUT");
5130 	case PMCOUT_STATUS_XFER_ERROR_SATA_LINK_TIMEOUT:
5131 		return ("XFER_ERROR_SATA_LINK_TIMEOUT");
5132 	case PMCOUT_STATUS_XFER_ERROR_SATA:
5133 		return ("XFER_ERROR_SATA");
5134 	case PMCOUT_STATUS_XFER_ERROR_REJECTED_NCQ_MODE:
5135 		return ("XFER_ERROR_REJECTED_NCQ_MODE");
5136 	case PMCOUT_STATUS_XFER_ERROR_ABORTED_DUE_TO_SRST:
5137 		return ("XFER_ERROR_ABORTED_DUE_TO_SRST");
5138 	case PMCOUT_STATUS_XFER_ERROR_ABORTED_NCQ_MODE:
5139 		return ("XFER_ERROR_ABORTED_NCQ_MODE");
5140 	case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
5141 		return ("IO_XFER_OPEN_RETRY_TIMEOUT");
5142 	case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
5143 		return ("SMP_RESP_CONNECTION_ERROR");
5144 	case PMCOUT_STATUS_XFER_ERROR_UNEXPECTED_PHASE:
5145 		return ("XFER_ERROR_UNEXPECTED_PHASE");
5146 	case PMCOUT_STATUS_XFER_ERROR_RDY_OVERRUN:
5147 		return ("XFER_ERROR_RDY_OVERRUN");
5148 	case PMCOUT_STATUS_XFER_ERROR_RDY_NOT_EXPECTED:
5149 		return ("XFER_ERROR_RDY_NOT_EXPECTED");
5150 	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT:
5151 		return ("XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT");
5152 	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_BREAK_BEFORE_ACK_NACK:
5153 		return ("XFER_ERROR_CMD_ISSUE_BREAK_BEFORE_ACK_NACK");
5154 	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_PHY_DOWN_BEFORE_ACK_NAK:
5155 		return ("XFER_ERROR_CMD_ISSUE_PHY_DOWN_BEFORE_ACK_NAK");
5156 	case PMCOUT_STATUS_XFER_ERROR_OFFSET_MISMATCH:
5157 		return ("XFER_ERROR_OFFSET_MISMATCH");
5158 	case PMCOUT_STATUS_XFER_ERROR_ZERO_DATA_LEN:
5159 		return ("XFER_ERROR_ZERO_DATA_LEN");
5160 	case PMCOUT_STATUS_XFER_CMD_FRAME_ISSUED:
5161 		return ("XFER_CMD_FRAME_ISSUED");
5162 	case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
5163 		return ("ERROR_INTERNAL_SMP_RESOURCE");
5164 	case PMCOUT_STATUS_IO_PORT_IN_RESET:
5165 		return ("IO_PORT_IN_RESET");
5166 	case PMCOUT_STATUS_IO_DS_NON_OPERATIONAL:
5167 		return ("DEVICE STATE NON-OPERATIONAL");
5168 	case PMCOUT_STATUS_IO_DS_IN_RECOVERY:
5169 		return ("DEVICE STATE IN RECOVERY");
5170 	default:
5171 		return (NULL);
5172 	}
5173 }
5174 
5175 uint64_t
5176 pmcs_barray2wwn(uint8_t ba[8])
5177 {
5178 	uint64_t result = 0;
5179 	int i;
5180 
5181 	for (i = 0; i < 8; i++) {
5182 		result <<= 8;
5183 		result |= ba[i];
5184 	}
5185 	return (result);
5186 }
5187 
5188 void
5189 pmcs_wwn2barray(uint64_t wwn, uint8_t ba[8])
5190 {
5191 	int i;
5192 	for (i = 0; i < 8; i++) {
5193 		ba[7 - i] = wwn & 0xff;
5194 		wwn >>= 8;
5195 	}
5196 }
5197 
5198 void
5199 pmcs_report_fwversion(pmcs_hw_t *pwp)
5200 {
5201 	const char *fwsupport;
5202 	switch (PMCS_FW_TYPE(pwp)) {
5203 	case PMCS_FW_TYPE_RELEASED:
5204 		fwsupport = "Released";
5205 		break;
5206 	case PMCS_FW_TYPE_DEVELOPMENT:
5207 		fwsupport = "Development";
5208 		break;
5209 	case PMCS_FW_TYPE_ALPHA:
5210 		fwsupport = "Alpha";
5211 		break;
5212 	case PMCS_FW_TYPE_BETA:
5213 		fwsupport = "Beta";
5214 		break;
5215 	default:
5216 		fwsupport = "Special";
5217 		break;
5218 	}
5219 	pmcs_prt(pwp, PMCS_PRT_INFO,
5220 	    "Chip Revision: %c; F/W Revision %x.%x.%x %s", 'A' + pwp->chiprev,
5221 	    PMCS_FW_MAJOR(pwp), PMCS_FW_MINOR(pwp), PMCS_FW_MICRO(pwp),
5222 	    fwsupport);
5223 }
5224 
5225 void
5226 pmcs_phy_name(pmcs_hw_t *pwp, pmcs_phy_t *pptr, char *obuf, size_t olen)
5227 {
5228 	if (pptr->parent) {
5229 		pmcs_phy_name(pwp, pptr->parent, obuf, olen);
5230 		(void) snprintf(obuf, olen, "%s.%02x", obuf, pptr->phynum);
5231 	} else {
5232 		(void) snprintf(obuf, olen, "pp%02x", pptr->phynum);
5233 	}
5234 }
5235 
5236 /*
5237  * Implementation for pmcs_find_phy_by_devid.
5238  * If the PHY is found, it is returned locked.
5239  */
5240 static pmcs_phy_t *
5241 pmcs_find_phy_by_devid_impl(pmcs_phy_t *phyp, uint32_t device_id)
5242 {
5243 	pmcs_phy_t *match, *cphyp, *nphyp;
5244 
5245 	ASSERT(!mutex_owned(&phyp->phy_lock));
5246 
5247 	while (phyp) {
5248 		pmcs_lock_phy(phyp);
5249 
5250 		if ((phyp->valid_device_id) && (phyp->device_id == device_id)) {
5251 			return (phyp);
5252 		}
5253 		if (phyp->children) {
5254 			cphyp = phyp->children;
5255 			pmcs_unlock_phy(phyp);
5256 			match = pmcs_find_phy_by_devid_impl(cphyp, device_id);
5257 			if (match) {
5258 				ASSERT(mutex_owned(&match->phy_lock));
5259 				return (match);
5260 			}
5261 			pmcs_lock_phy(phyp);
5262 		}
5263 
5264 		if (IS_ROOT_PHY(phyp)) {
5265 			pmcs_unlock_phy(phyp);
5266 			phyp = NULL;
5267 		} else {
5268 			nphyp = phyp->sibling;
5269 			pmcs_unlock_phy(phyp);
5270 			phyp = nphyp;
5271 		}
5272 	}
5273 
5274 	return (NULL);
5275 }
5276 
5277 /*
5278  * If the PHY is found, it is returned locked
5279  */
5280 pmcs_phy_t *
5281 pmcs_find_phy_by_devid(pmcs_hw_t *pwp, uint32_t device_id)
5282 {
5283 	pmcs_phy_t *phyp, *match = NULL;
5284 
5285 	phyp = pwp->root_phys;
5286 
5287 	while (phyp) {
5288 		match = pmcs_find_phy_by_devid_impl(phyp, device_id);
5289 		if (match) {
5290 			ASSERT(mutex_owned(&match->phy_lock));
5291 			return (match);
5292 		}
5293 		phyp = phyp->sibling;
5294 	}
5295 
5296 	return (NULL);
5297 }
5298 
5299 /*
5300  * This function is called as a sanity check to ensure that a newly registered
5301  * PHY doesn't have a device_id that exists with another registered PHY.
5302  */
5303 static boolean_t
5304 pmcs_validate_devid(pmcs_phy_t *parent, pmcs_phy_t *phyp, uint32_t device_id)
5305 {
5306 	pmcs_phy_t *pptr;
5307 	boolean_t rval;
5308 
5309 	pptr = parent;
5310 
5311 	while (pptr) {
5312 		if (pptr->valid_device_id && (pptr != phyp) &&
5313 		    (pptr->device_id == device_id)) {
5314 			pmcs_prt(pptr->pwp, PMCS_PRT_DEBUG,
5315 			    "%s: phy %s already exists as %s with "
5316 			    "device id 0x%x", __func__, phyp->path,
5317 			    pptr->path, device_id);
5318 			return (B_FALSE);
5319 		}
5320 
5321 		if (pptr->children) {
5322 			rval = pmcs_validate_devid(pptr->children, phyp,
5323 			    device_id);
5324 			if (rval == B_FALSE) {
5325 				return (rval);
5326 			}
5327 		}
5328 
5329 		pptr = pptr->sibling;
5330 	}
5331 
5332 	/* This PHY and device_id are valid */
5333 	return (B_TRUE);
5334 }
5335 
5336 /*
5337  * If the PHY is found, it is returned locked
5338  */
5339 static pmcs_phy_t *
5340 pmcs_find_phy_by_wwn_impl(pmcs_phy_t *phyp, uint8_t *wwn)
5341 {
5342 	pmcs_phy_t *matched_phy, *cphyp, *nphyp;
5343 
5344 	ASSERT(!mutex_owned(&phyp->phy_lock));
5345 
5346 	while (phyp) {
5347 		pmcs_lock_phy(phyp);
5348 
5349 		if (phyp->valid_device_id) {
5350 			if (memcmp(phyp->sas_address, wwn, 8) == 0) {
5351 				return (phyp);
5352 			}
5353 		}
5354 
5355 		if (phyp->children) {
5356 			cphyp = phyp->children;
5357 			pmcs_unlock_phy(phyp);
5358 			matched_phy = pmcs_find_phy_by_wwn_impl(cphyp, wwn);
5359 			if (matched_phy) {
5360 				ASSERT(mutex_owned(&matched_phy->phy_lock));
5361 				return (matched_phy);
5362 			}
5363 			pmcs_lock_phy(phyp);
5364 		}
5365 
5366 		/*
5367 		 * Only iterate through non-root PHYs
5368 		 */
5369 		if (IS_ROOT_PHY(phyp)) {
5370 			pmcs_unlock_phy(phyp);
5371 			phyp = NULL;
5372 		} else {
5373 			nphyp = phyp->sibling;
5374 			pmcs_unlock_phy(phyp);
5375 			phyp = nphyp;
5376 		}
5377 	}
5378 
5379 	return (NULL);
5380 }
5381 
5382 pmcs_phy_t *
5383 pmcs_find_phy_by_wwn(pmcs_hw_t *pwp, uint64_t wwn)
5384 {
5385 	uint8_t ebstr[8];
5386 	pmcs_phy_t *pptr, *matched_phy;
5387 
5388 	pmcs_wwn2barray(wwn, ebstr);
5389 
5390 	pptr = pwp->root_phys;
5391 	while (pptr) {
5392 		matched_phy = pmcs_find_phy_by_wwn_impl(pptr, ebstr);
5393 		if (matched_phy) {
5394 			ASSERT(mutex_owned(&matched_phy->phy_lock));
5395 			return (matched_phy);
5396 		}
5397 
5398 		pptr = pptr->sibling;
5399 	}
5400 
5401 	return (NULL);
5402 }
5403 
5404 
5405 /*
5406  * pmcs_find_phy_by_sas_address
5407  *
5408  * Find a PHY that both matches "sas_addr" and is on "iport".
5409  * If a matching PHY is found, it is returned locked.
5410  */
5411 pmcs_phy_t *
5412 pmcs_find_phy_by_sas_address(pmcs_hw_t *pwp, pmcs_iport_t *iport,
5413     pmcs_phy_t *root, char *sas_addr)
5414 {
5415 	int ua_form = 1;
5416 	uint64_t wwn;
5417 	char addr[PMCS_MAX_UA_SIZE];
5418 	pmcs_phy_t *pptr, *pnext, *pchild;
5419 
5420 	if (root == NULL) {
5421 		pptr = pwp->root_phys;
5422 	} else {
5423 		pptr = root;
5424 	}
5425 
5426 	while (pptr) {
5427 		pmcs_lock_phy(pptr);
5428 		/*
5429 		 * If the PHY is dead or does not have a valid device ID,
5430 		 * skip it.
5431 		 */
5432 		if ((pptr->dead) || (!pptr->valid_device_id)) {
5433 			goto next_phy;
5434 		}
5435 
5436 		if (pptr->iport != iport) {
5437 			goto next_phy;
5438 		}
5439 
5440 		wwn = pmcs_barray2wwn(pptr->sas_address);
5441 		(void *) scsi_wwn_to_wwnstr(wwn, ua_form, addr);
5442 		if (strncmp(addr, sas_addr, strlen(addr)) == 0) {
5443 			return (pptr);
5444 		}
5445 
5446 		if (pptr->children) {
5447 			pchild = pptr->children;
5448 			pmcs_unlock_phy(pptr);
5449 			pnext = pmcs_find_phy_by_sas_address(pwp, iport, pchild,
5450 			    sas_addr);
5451 			if (pnext) {
5452 				return (pnext);
5453 			}
5454 			pmcs_lock_phy(pptr);
5455 		}
5456 
5457 next_phy:
5458 		pnext = pptr->sibling;
5459 		pmcs_unlock_phy(pptr);
5460 		pptr = pnext;
5461 	}
5462 
5463 	return (NULL);
5464 }
5465 
5466 void
5467 pmcs_fis_dump(pmcs_hw_t *pwp, fis_t fis)
5468 {
5469 	switch (fis[0] & 0xff) {
5470 	case FIS_REG_H2DEV:
5471 		pmcs_prt(pwp, PMCS_PRT_INFO, "FIS REGISTER HOST TO DEVICE: "
5472 		    "OP=0x%02x Feature=0x%04x Count=0x%04x Device=0x%02x "
5473 		    "LBA=%llu", BYTE2(fis[0]), BYTE3(fis[2]) << 8 |
5474 		    BYTE3(fis[0]), WORD0(fis[3]), BYTE3(fis[1]),
5475 		    (unsigned long long)
5476 		    (((uint64_t)fis[2] & 0x00ffffff) << 24 |
5477 		    ((uint64_t)fis[1] & 0x00ffffff)));
5478 		break;
5479 	case FIS_REG_D2H:
5480 		pmcs_prt(pwp, PMCS_PRT_INFO, "FIS REGISTER DEVICE TO HOST: Stat"
5481 		    "us=0x%02x Error=0x%02x Dev=0x%02x Count=0x%04x LBA=%llu",
5482 		    BYTE2(fis[0]), BYTE3(fis[0]), BYTE3(fis[1]), WORD0(fis[3]),
5483 		    (unsigned long long)(((uint64_t)fis[2] & 0x00ffffff) << 24 |
5484 		    ((uint64_t)fis[1] & 0x00ffffff)));
5485 		break;
5486 	default:
5487 		pmcs_prt(pwp, PMCS_PRT_INFO, "FIS: 0x%08x 0x%08x 0x%08x 0x%08x "
5488 		    "0x%08x 0x%08x 0x%08x",
5489 		    fis[0], fis[1], fis[2], fis[3], fis[4], fis[5], fis[6]);
5490 		break;
5491 	}
5492 }
5493 
5494 void
5495 pmcs_print_entry(pmcs_hw_t *pwp, int level, char *msg, void *arg)
5496 {
5497 	uint32_t *mb = arg;
5498 	size_t i;
5499 
5500 	pmcs_prt(pwp, level, msg);
5501 	for (i = 0; i < (PMCS_QENTRY_SIZE / sizeof (uint32_t)); i += 4) {
5502 		pmcs_prt(pwp, level, "Offset %2lu: 0x%08x 0x%08x 0x%08"
5503 		    "x 0x%08x", i * sizeof (uint32_t), LE_32(mb[i]),
5504 		    LE_32(mb[i+1]), LE_32(mb[i+2]),
5505 		    LE_32(mb[i+3]));
5506 	}
5507 }
5508 
5509 /*
5510  * If phyp == NULL we're being called from the worker thread, in which
5511  * case we need to check all the PHYs.  In this case, the softstate lock
5512  * will be held.
5513  * If phyp is non-NULL, just issue the spinup release for the specified PHY
5514  * (which will already be locked).
5515  */
5516 void
5517 pmcs_spinup_release(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
5518 {
5519 	uint32_t *msg;
5520 	struct pmcwork *pwrk;
5521 	pmcs_phy_t *tphyp;
5522 
5523 	if (phyp != NULL) {
5524 		ASSERT(mutex_owned(&phyp->phy_lock));
5525 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
5526 		    "%s: Issuing spinup release only for PHY %s", __func__,
5527 		    phyp->path);
5528 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5529 		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5530 		if (msg == NULL || (pwrk =
5531 		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
5532 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5533 			SCHEDULE_WORK(pwp, PMCS_WORK_SPINUP_RELEASE);
5534 			return;
5535 		}
5536 
5537 		phyp->spinup_hold = 0;
5538 		bzero(msg, PMCS_QENTRY_SIZE);
5539 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5540 		    PMCIN_LOCAL_PHY_CONTROL));
5541 		msg[1] = LE_32(pwrk->htag);
5542 		msg[2] = LE_32((0x10 << 8) | phyp->phynum);
5543 
5544 		pwrk->dtype = phyp->dtype;
5545 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
5546 		mutex_exit(&pwrk->lock);
5547 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5548 		return;
5549 	}
5550 
5551 	ASSERT(mutex_owned(&pwp->lock));
5552 
5553 	tphyp = pwp->root_phys;
5554 	while (tphyp) {
5555 		pmcs_lock_phy(tphyp);
5556 		if (tphyp->spinup_hold == 0) {
5557 			pmcs_unlock_phy(tphyp);
5558 			tphyp = tphyp->sibling;
5559 			continue;
5560 		}
5561 
5562 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
5563 		    "%s: Issuing spinup release for PHY %s", __func__,
5564 		    phyp->path);
5565 
5566 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5567 		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5568 		if (msg == NULL || (pwrk =
5569 		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
5570 			pmcs_unlock_phy(tphyp);
5571 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5572 			SCHEDULE_WORK(pwp, PMCS_WORK_SPINUP_RELEASE);
5573 			break;
5574 		}
5575 
5576 		tphyp->spinup_hold = 0;
5577 		bzero(msg, PMCS_QENTRY_SIZE);
5578 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5579 		    PMCIN_LOCAL_PHY_CONTROL));
5580 		msg[1] = LE_32(pwrk->htag);
5581 		msg[2] = LE_32((0x10 << 8) | tphyp->phynum);
5582 
5583 		pwrk->dtype = phyp->dtype;
5584 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
5585 		mutex_exit(&pwrk->lock);
5586 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5587 		pmcs_unlock_phy(tphyp);
5588 
5589 		tphyp = tphyp->sibling;
5590 	}
5591 }
5592 
5593 /*
5594  * Abort commands on dead PHYs and deregister them as well as removing
5595  * the associated targets.
5596  */
5597 static int
5598 pmcs_kill_devices(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
5599 {
5600 	pmcs_phy_t *pnext, *pchild;
5601 	boolean_t remove_device;
5602 	int rval = 0;
5603 
5604 	while (phyp) {
5605 		pmcs_lock_phy(phyp);
5606 		pchild = phyp->children;
5607 		pnext = phyp->sibling;
5608 		pmcs_unlock_phy(phyp);
5609 
5610 		if (pchild) {
5611 			rval = pmcs_kill_devices(pwp, pchild);
5612 			if (rval) {
5613 				return (rval);
5614 			}
5615 		}
5616 
5617 		/*
5618 		 * pmcs_remove_device requires the softstate lock.
5619 		 */
5620 		mutex_enter(&pwp->lock);
5621 		pmcs_lock_phy(phyp);
5622 		if (phyp->dead && phyp->valid_device_id) {
5623 			remove_device = B_TRUE;
5624 		} else {
5625 			remove_device = B_FALSE;
5626 		}
5627 
5628 		if (remove_device) {
5629 			pmcs_remove_device(pwp, phyp);
5630 			mutex_exit(&pwp->lock);
5631 
5632 			rval = pmcs_kill_device(pwp, phyp);
5633 
5634 			if (rval) {
5635 				pmcs_unlock_phy(phyp);
5636 				return (rval);
5637 			}
5638 		} else {
5639 			mutex_exit(&pwp->lock);
5640 		}
5641 
5642 		pmcs_unlock_phy(phyp);
5643 		phyp = pnext;
5644 	}
5645 
5646 	return (rval);
5647 }
5648 
5649 /*
5650  * Called with PHY locked
5651  */
5652 int
5653 pmcs_kill_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
5654 {
5655 	int r, result;
5656 	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
5657 	struct pmcwork *pwrk;
5658 	pmcs_xscsi_t *tgt;
5659 
5660 	pmcs_prt(pwp, PMCS_PRT_DEBUG, "kill %s device @ %s",
5661 	    pmcs_get_typename(pptr->dtype), pptr->path);
5662 
5663 	/*
5664 	 * There may be an outstanding ABORT_ALL running, which we wouldn't
5665 	 * know just by checking abort_pending.  We can, however, check
5666 	 * abort_all_start.  If it's non-zero, there is one, and we'll just
5667 	 * sit here and wait for it to complete.  If we don't, we'll remove
5668 	 * the device while there are still commands pending.
5669 	 */
5670 	if (pptr->abort_all_start) {
5671 		while (pptr->abort_all_start) {
5672 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
5673 			    "%s: Waiting for outstanding ABORT_ALL on PHY 0x%p",
5674 			    __func__, (void *)pptr);
5675 			cv_wait(&pptr->abort_all_cv, &pptr->phy_lock);
5676 		}
5677 	} else if (pptr->abort_pending) {
5678 		r = pmcs_abort(pwp, pptr, pptr->device_id, 1, 1);
5679 
5680 		if (r) {
5681 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
5682 			    "%s: ABORT_ALL returned non-zero status (%d) for "
5683 			    "PHY 0x%p", __func__, r, (void *)pptr);
5684 			return (r);
5685 		}
5686 		pptr->abort_pending = 0;
5687 	}
5688 
5689 	/*
5690 	 * Now that everything is aborted from the chip's perspective (or even
5691 	 * if it is not), flush out the wait queue.  We won't flush the active
5692 	 * queue since it is possible that abort completions may follow after
5693 	 * the notification that the abort all has completed.
5694 	 */
5695 	tgt = pptr->target;
5696 	if (tgt) {
5697 		mutex_enter(&tgt->statlock);
5698 		pmcs_flush_target_queues(pwp, tgt, PMCS_TGT_WAIT_QUEUE);
5699 		mutex_exit(&tgt->statlock);
5700 	}
5701 
5702 	if (pptr->valid_device_id == 0) {
5703 		return (0);
5704 	}
5705 
5706 	if ((pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr)) == NULL) {
5707 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
5708 		return (ENOMEM);
5709 	}
5710 	pwrk->arg = msg;
5711 	pwrk->dtype = pptr->dtype;
5712 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5713 	    PMCIN_DEREGISTER_DEVICE_HANDLE));
5714 	msg[1] = LE_32(pwrk->htag);
5715 	msg[2] = LE_32(pptr->device_id);
5716 
5717 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5718 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5719 	if (ptr == NULL) {
5720 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5721 		mutex_exit(&pwrk->lock);
5722 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
5723 		return (ENOMEM);
5724 	}
5725 
5726 	COPY_MESSAGE(ptr, msg, 3);
5727 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
5728 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5729 
5730 	pmcs_unlock_phy(pptr);
5731 	WAIT_FOR(pwrk, 250, result);
5732 	pmcs_lock_phy(pptr);
5733 	pmcs_pwork(pwp, pwrk);
5734 
5735 	if (result) {
5736 		return (ETIMEDOUT);
5737 	}
5738 	status = LE_32(msg[2]);
5739 	if (status != PMCOUT_STATUS_OK) {
5740 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
5741 		    "%s: status 0x%x when trying to deregister device %s",
5742 		    __func__, status, pptr->path);
5743 	}
5744 
5745 	pptr->device_id = PMCS_INVALID_DEVICE_ID;
5746 	PHY_CHANGED(pwp, pptr);
5747 	RESTART_DISCOVERY(pwp);
5748 	pptr->valid_device_id = 0;
5749 	return (0);
5750 }
5751 
5752 /*
5753  * Acknowledge the SAS h/w events that need acknowledgement.
5754  * This is only needed for first level PHYs.
5755  */
5756 void
5757 pmcs_ack_events(pmcs_hw_t *pwp)
5758 {
5759 	uint32_t msg[PMCS_MSG_SIZE], *ptr;
5760 	struct pmcwork *pwrk;
5761 	pmcs_phy_t *pptr;
5762 
5763 	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
5764 		pmcs_lock_phy(pptr);
5765 		if (pptr->hw_event_ack == 0) {
5766 			pmcs_unlock_phy(pptr);
5767 			continue;
5768 		}
5769 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5770 		ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5771 
5772 		if ((ptr == NULL) || (pwrk =
5773 		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
5774 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5775 			pmcs_unlock_phy(pptr);
5776 			SCHEDULE_WORK(pwp, PMCS_WORK_SAS_HW_ACK);
5777 			break;
5778 		}
5779 
5780 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5781 		    PMCIN_SAW_HW_EVENT_ACK));
5782 		msg[1] = LE_32(pwrk->htag);
5783 		msg[2] = LE_32(pptr->hw_event_ack);
5784 
5785 		mutex_exit(&pwrk->lock);
5786 		pwrk->dtype = pptr->dtype;
5787 		pptr->hw_event_ack = 0;
5788 		COPY_MESSAGE(ptr, msg, 3);
5789 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5790 		pmcs_unlock_phy(pptr);
5791 	}
5792 }
5793 
5794 /*
5795  * Load DMA
5796  */
5797 int
5798 pmcs_dma_load(pmcs_hw_t *pwp, pmcs_cmd_t *sp, uint32_t *msg)
5799 {
5800 	ddi_dma_cookie_t *sg;
5801 	pmcs_dmachunk_t *tc;
5802 	pmcs_dmasgl_t *sgl, *prior;
5803 	int seg, tsc;
5804 	uint64_t sgl_addr;
5805 
5806 	/*
5807 	 * If we have no data segments, we're done.
5808 	 */
5809 	if (CMD2PKT(sp)->pkt_numcookies == 0) {
5810 		return (0);
5811 	}
5812 
5813 	/*
5814 	 * Get the S/G list pointer.
5815 	 */
5816 	sg = CMD2PKT(sp)->pkt_cookies;
5817 
5818 	/*
5819 	 * If we only have one dma segment, we can directly address that
5820 	 * data within the Inbound message itself.
5821 	 */
5822 	if (CMD2PKT(sp)->pkt_numcookies == 1) {
5823 		msg[12] = LE_32(DWORD0(sg->dmac_laddress));
5824 		msg[13] = LE_32(DWORD1(sg->dmac_laddress));
5825 		msg[14] = LE_32(sg->dmac_size);
5826 		msg[15] = 0;
5827 		return (0);
5828 	}
5829 
5830 	/*
5831 	 * Otherwise, we'll need one or more external S/G list chunks.
5832 	 * Get the first one and its dma address into the Inbound message.
5833 	 */
5834 	mutex_enter(&pwp->dma_lock);
5835 	tc = pwp->dma_freelist;
5836 	if (tc == NULL) {
5837 		SCHEDULE_WORK(pwp, PMCS_WORK_ADD_DMA_CHUNKS);
5838 		mutex_exit(&pwp->dma_lock);
5839 		pmcs_prt(pwp, PMCS_PRT_DEBUG2, "%s: out of SG lists", __func__);
5840 		return (-1);
5841 	}
5842 	pwp->dma_freelist = tc->nxt;
5843 	mutex_exit(&pwp->dma_lock);
5844 
5845 	tc->nxt = NULL;
5846 	sp->cmd_clist = tc;
5847 	sgl = tc->chunks;
5848 	(void) memset(tc->chunks, 0, PMCS_SGL_CHUNKSZ);
5849 	sgl_addr = tc->addr;
5850 	msg[12] = LE_32(DWORD0(sgl_addr));
5851 	msg[13] = LE_32(DWORD1(sgl_addr));
5852 	msg[14] = 0;
5853 	msg[15] = LE_32(PMCS_DMASGL_EXTENSION);
5854 
5855 	prior = sgl;
5856 	tsc = 0;
5857 
5858 	for (seg = 0; seg < CMD2PKT(sp)->pkt_numcookies; seg++) {
5859 		/*
5860 		 * If the current segment count for this chunk is one less than
5861 		 * the number s/g lists per chunk and we have more than one seg
5862 		 * to go, we need another chunk. Get it, and make sure that the
5863 		 * tail end of the the previous chunk points the new chunk
5864 		 * (if remembering an offset can be called 'pointing to').
5865 		 *
5866 		 * Note that we can store the offset into our command area that
5867 		 * represents the new chunk in the length field of the part
5868 		 * that points the PMC chip at the next chunk- the PMC chip
5869 		 * ignores this field when the EXTENSION bit is set.
5870 		 *
5871 		 * This is required for dma unloads later.
5872 		 */
5873 		if (tsc == (PMCS_SGL_NCHUNKS - 1) &&
5874 		    seg < (CMD2PKT(sp)->pkt_numcookies - 1)) {
5875 			mutex_enter(&pwp->dma_lock);
5876 			tc = pwp->dma_freelist;
5877 			if (tc == NULL) {
5878 				SCHEDULE_WORK(pwp, PMCS_WORK_ADD_DMA_CHUNKS);
5879 				mutex_exit(&pwp->dma_lock);
5880 				pmcs_dma_unload(pwp, sp);
5881 				pmcs_prt(pwp, PMCS_PRT_DEBUG2,
5882 				    "%s: out of SG lists", __func__);
5883 				return (-1);
5884 			}
5885 			pwp->dma_freelist = tc->nxt;
5886 			tc->nxt = sp->cmd_clist;
5887 			mutex_exit(&pwp->dma_lock);
5888 
5889 			sp->cmd_clist = tc;
5890 			(void) memset(tc->chunks, 0, PMCS_SGL_CHUNKSZ);
5891 			sgl = tc->chunks;
5892 			sgl_addr = tc->addr;
5893 			prior[PMCS_SGL_NCHUNKS-1].sglal =
5894 			    LE_32(DWORD0(sgl_addr));
5895 			prior[PMCS_SGL_NCHUNKS-1].sglah =
5896 			    LE_32(DWORD1(sgl_addr));
5897 			prior[PMCS_SGL_NCHUNKS-1].sglen = 0;
5898 			prior[PMCS_SGL_NCHUNKS-1].flags =
5899 			    LE_32(PMCS_DMASGL_EXTENSION);
5900 			prior = sgl;
5901 			tsc = 0;
5902 		}
5903 		sgl[tsc].sglal = LE_32(DWORD0(sg->dmac_laddress));
5904 		sgl[tsc].sglah = LE_32(DWORD1(sg->dmac_laddress));
5905 		sgl[tsc].sglen = LE_32(sg->dmac_size);
5906 		sgl[tsc++].flags = 0;
5907 		sg++;
5908 	}
5909 	return (0);
5910 }
5911 
5912 /*
5913  * Unload DMA
5914  */
5915 void
5916 pmcs_dma_unload(pmcs_hw_t *pwp, pmcs_cmd_t *sp)
5917 {
5918 	pmcs_dmachunk_t *cp;
5919 
5920 	mutex_enter(&pwp->dma_lock);
5921 	while ((cp = sp->cmd_clist) != NULL) {
5922 		sp->cmd_clist = cp->nxt;
5923 		cp->nxt = pwp->dma_freelist;
5924 		pwp->dma_freelist = cp;
5925 	}
5926 	mutex_exit(&pwp->dma_lock);
5927 }
5928 
5929 /*
5930  * Take a chunk of consistent memory that has just been allocated and inserted
5931  * into the cip indices and prepare it for DMA chunk usage and add it to the
5932  * freelist.
5933  *
5934  * Called with dma_lock locked (except during attach when it's unnecessary)
5935  */
5936 void
5937 pmcs_idma_chunks(pmcs_hw_t *pwp, pmcs_dmachunk_t *dcp,
5938     pmcs_chunk_t *pchunk, unsigned long lim)
5939 {
5940 	unsigned long off, n;
5941 	pmcs_dmachunk_t *np = dcp;
5942 	pmcs_chunk_t *tmp_chunk;
5943 
5944 	if (pwp->dma_chunklist == NULL) {
5945 		pwp->dma_chunklist = pchunk;
5946 	} else {
5947 		tmp_chunk = pwp->dma_chunklist;
5948 		while (tmp_chunk->next) {
5949 			tmp_chunk = tmp_chunk->next;
5950 		}
5951 		tmp_chunk->next = pchunk;
5952 	}
5953 
5954 	/*
5955 	 * Install offsets into chunk lists.
5956 	 */
5957 	for (n = 0, off = 0; off < lim; off += PMCS_SGL_CHUNKSZ, n++) {
5958 		np->chunks = (void *)&pchunk->addrp[off];
5959 		np->addr = pchunk->dma_addr + off;
5960 		np->acc_handle = pchunk->acc_handle;
5961 		np->dma_handle = pchunk->dma_handle;
5962 		if ((off + PMCS_SGL_CHUNKSZ) < lim) {
5963 			np = np->nxt;
5964 		}
5965 	}
5966 	np->nxt = pwp->dma_freelist;
5967 	pwp->dma_freelist = dcp;
5968 	pmcs_prt(pwp, PMCS_PRT_DEBUG2,
5969 	    "added %lu DMA chunks ", n);
5970 }
5971 
5972 /*
5973  * Change the value of the interrupt coalescing timer.  This is done currently
5974  * only for I/O completions.  If we're using the "auto clear" feature, it can
5975  * be turned back on when interrupt coalescing is turned off and must be
5976  * turned off when the coalescing timer is on.
5977  * NOTE: PMCS_MSIX_GENERAL and PMCS_OQ_IODONE are the same value.  As long
5978  * as that's true, we don't need to distinguish between them.
5979  */
5980 
5981 void
5982 pmcs_set_intr_coal_timer(pmcs_hw_t *pwp, pmcs_coal_timer_adj_t adj)
5983 {
5984 	if (adj == DECREASE_TIMER) {
5985 		/* If the timer is already off, nothing to do. */
5986 		if (pwp->io_intr_coal.timer_on == B_FALSE) {
5987 			return;
5988 		}
5989 
5990 		pwp->io_intr_coal.intr_coal_timer -= PMCS_COAL_TIMER_GRAN;
5991 
5992 		if (pwp->io_intr_coal.intr_coal_timer == 0) {
5993 			/* Disable the timer */
5994 			pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_CONTROL, 0);
5995 
5996 			if (pwp->odb_auto_clear & (1 << PMCS_MSIX_IODONE)) {
5997 				pmcs_wr_topunit(pwp, PMCS_OBDB_AUTO_CLR,
5998 				    pwp->odb_auto_clear);
5999 			}
6000 
6001 			pwp->io_intr_coal.timer_on = B_FALSE;
6002 			pwp->io_intr_coal.max_io_completions = B_FALSE;
6003 			pwp->io_intr_coal.num_intrs = 0;
6004 			pwp->io_intr_coal.int_cleared = B_FALSE;
6005 			pwp->io_intr_coal.num_io_completions = 0;
6006 
6007 			DTRACE_PROBE1(pmcs__intr__coalesce__timer__off,
6008 			    pmcs_io_intr_coal_t *, &pwp->io_intr_coal);
6009 		} else {
6010 			pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_TIMER,
6011 			    pwp->io_intr_coal.intr_coal_timer);
6012 		}
6013 	} else {
6014 		/*
6015 		 * If the timer isn't on yet, do the setup for it now.
6016 		 */
6017 		if (pwp->io_intr_coal.timer_on == B_FALSE) {
6018 			/* If auto clear is being used, turn it off. */
6019 			if (pwp->odb_auto_clear & (1 << PMCS_MSIX_IODONE)) {
6020 				pmcs_wr_topunit(pwp, PMCS_OBDB_AUTO_CLR,
6021 				    (pwp->odb_auto_clear &
6022 				    ~(1 << PMCS_MSIX_IODONE)));
6023 			}
6024 
6025 			pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_CONTROL,
6026 			    (1 << PMCS_MSIX_IODONE));
6027 			pwp->io_intr_coal.timer_on = B_TRUE;
6028 			pwp->io_intr_coal.intr_coal_timer =
6029 			    PMCS_COAL_TIMER_GRAN;
6030 
6031 			DTRACE_PROBE1(pmcs__intr__coalesce__timer__on,
6032 			    pmcs_io_intr_coal_t *, &pwp->io_intr_coal);
6033 		} else {
6034 			pwp->io_intr_coal.intr_coal_timer +=
6035 			    PMCS_COAL_TIMER_GRAN;
6036 		}
6037 
6038 		if (pwp->io_intr_coal.intr_coal_timer > PMCS_MAX_COAL_TIMER) {
6039 			pwp->io_intr_coal.intr_coal_timer = PMCS_MAX_COAL_TIMER;
6040 		}
6041 
6042 		pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_TIMER,
6043 		    pwp->io_intr_coal.intr_coal_timer);
6044 	}
6045 
6046 	/*
6047 	 * Adjust the interrupt threshold based on the current timer value
6048 	 */
6049 	pwp->io_intr_coal.intr_threshold =
6050 	    PMCS_INTR_THRESHOLD(PMCS_QUANTUM_TIME_USECS * 1000 /
6051 	    (pwp->io_intr_coal.intr_latency +
6052 	    (pwp->io_intr_coal.intr_coal_timer * 1000)));
6053 }
6054 
6055 /*
6056  * Register Access functions
6057  */
6058 uint32_t
6059 pmcs_rd_iqci(pmcs_hw_t *pwp, uint32_t qnum)
6060 {
6061 	uint32_t iqci;
6062 
6063 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORKERNEL) !=
6064 	    DDI_SUCCESS) {
6065 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ddi_dma_sync failed?",
6066 		    __func__);
6067 	}
6068 
6069 	iqci = LE_32(
6070 	    ((uint32_t *)((void *)pwp->cip))[IQ_OFFSET(qnum) >> 2]);
6071 
6072 	return (iqci);
6073 }
6074 
6075 uint32_t
6076 pmcs_rd_oqpi(pmcs_hw_t *pwp, uint32_t qnum)
6077 {
6078 	uint32_t oqpi;
6079 
6080 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORKERNEL) !=
6081 	    DDI_SUCCESS) {
6082 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ddi_dma_sync failed?",
6083 		    __func__);
6084 	}
6085 
6086 	oqpi = LE_32(
6087 	    ((uint32_t *)((void *)pwp->cip))[OQ_OFFSET(qnum) >> 2]);
6088 
6089 	return (oqpi);
6090 }
6091 
6092 uint32_t
6093 pmcs_rd_gsm_reg(pmcs_hw_t *pwp, uint32_t off)
6094 {
6095 	uint32_t rv, newaxil, oldaxil;
6096 
6097 	newaxil = off & ~GSM_BASE_MASK;
6098 	off &= GSM_BASE_MASK;
6099 	mutex_enter(&pwp->axil_lock);
6100 	oldaxil = ddi_get32(pwp->top_acc_handle,
6101 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]);
6102 	ddi_put32(pwp->top_acc_handle,
6103 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], newaxil);
6104 	drv_usecwait(10);
6105 	if (ddi_get32(pwp->top_acc_handle,
6106 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != newaxil) {
6107 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "AXIL register update failed");
6108 	}
6109 	rv = ddi_get32(pwp->gsm_acc_handle, &pwp->gsm_regs[off >> 2]);
6110 	ddi_put32(pwp->top_acc_handle,
6111 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], oldaxil);
6112 	drv_usecwait(10);
6113 	if (ddi_get32(pwp->top_acc_handle,
6114 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != oldaxil) {
6115 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "AXIL register restore failed");
6116 	}
6117 	mutex_exit(&pwp->axil_lock);
6118 	return (rv);
6119 }
6120 
6121 void
6122 pmcs_wr_gsm_reg(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6123 {
6124 	uint32_t newaxil, oldaxil;
6125 
6126 	newaxil = off & ~GSM_BASE_MASK;
6127 	off &= GSM_BASE_MASK;
6128 	mutex_enter(&pwp->axil_lock);
6129 	oldaxil = ddi_get32(pwp->top_acc_handle,
6130 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]);
6131 	ddi_put32(pwp->top_acc_handle,
6132 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], newaxil);
6133 	drv_usecwait(10);
6134 	if (ddi_get32(pwp->top_acc_handle,
6135 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != newaxil) {
6136 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "AXIL register update failed");
6137 	}
6138 	ddi_put32(pwp->gsm_acc_handle, &pwp->gsm_regs[off >> 2], val);
6139 	ddi_put32(pwp->top_acc_handle,
6140 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], oldaxil);
6141 	drv_usecwait(10);
6142 	if (ddi_get32(pwp->top_acc_handle,
6143 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != oldaxil) {
6144 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "AXIL register restore failed");
6145 	}
6146 	mutex_exit(&pwp->axil_lock);
6147 }
6148 
6149 uint32_t
6150 pmcs_rd_topunit(pmcs_hw_t *pwp, uint32_t off)
6151 {
6152 	switch (off) {
6153 	case PMCS_SPC_RESET:
6154 	case PMCS_SPC_BOOT_STRAP:
6155 	case PMCS_SPC_DEVICE_ID:
6156 	case PMCS_DEVICE_REVISION:
6157 		off = pmcs_rd_gsm_reg(pwp, off);
6158 		break;
6159 	default:
6160 		off = ddi_get32(pwp->top_acc_handle,
6161 		    &pwp->top_regs[off >> 2]);
6162 		break;
6163 	}
6164 	return (off);
6165 }
6166 
6167 void
6168 pmcs_wr_topunit(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6169 {
6170 	switch (off) {
6171 	case PMCS_SPC_RESET:
6172 	case PMCS_DEVICE_REVISION:
6173 		pmcs_wr_gsm_reg(pwp, off, val);
6174 		break;
6175 	default:
6176 		ddi_put32(pwp->top_acc_handle, &pwp->top_regs[off >> 2], val);
6177 		break;
6178 	}
6179 }
6180 
6181 uint32_t
6182 pmcs_rd_msgunit(pmcs_hw_t *pwp, uint32_t off)
6183 {
6184 	return (ddi_get32(pwp->msg_acc_handle, &pwp->msg_regs[off >> 2]));
6185 }
6186 
6187 uint32_t
6188 pmcs_rd_mpi_tbl(pmcs_hw_t *pwp, uint32_t off)
6189 {
6190 	return (ddi_get32(pwp->mpi_acc_handle,
6191 	    &pwp->mpi_regs[(pwp->mpi_offset + off) >> 2]));
6192 }
6193 
6194 uint32_t
6195 pmcs_rd_gst_tbl(pmcs_hw_t *pwp, uint32_t off)
6196 {
6197 	return (ddi_get32(pwp->mpi_acc_handle,
6198 	    &pwp->mpi_regs[(pwp->mpi_gst_offset + off) >> 2]));
6199 }
6200 
6201 uint32_t
6202 pmcs_rd_iqc_tbl(pmcs_hw_t *pwp, uint32_t off)
6203 {
6204 	return (ddi_get32(pwp->mpi_acc_handle,
6205 	    &pwp->mpi_regs[(pwp->mpi_iqc_offset + off) >> 2]));
6206 }
6207 
6208 uint32_t
6209 pmcs_rd_oqc_tbl(pmcs_hw_t *pwp, uint32_t off)
6210 {
6211 	return (ddi_get32(pwp->mpi_acc_handle,
6212 	    &pwp->mpi_regs[(pwp->mpi_oqc_offset + off) >> 2]));
6213 }
6214 
6215 uint32_t
6216 pmcs_rd_iqpi(pmcs_hw_t *pwp, uint32_t qnum)
6217 {
6218 	return (ddi_get32(pwp->mpi_acc_handle,
6219 	    &pwp->mpi_regs[pwp->iqpi_offset[qnum] >> 2]));
6220 }
6221 
6222 uint32_t
6223 pmcs_rd_oqci(pmcs_hw_t *pwp, uint32_t qnum)
6224 {
6225 	return (ddi_get32(pwp->mpi_acc_handle,
6226 	    &pwp->mpi_regs[pwp->oqci_offset[qnum] >> 2]));
6227 }
6228 
6229 void
6230 pmcs_wr_msgunit(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6231 {
6232 	ddi_put32(pwp->msg_acc_handle, &pwp->msg_regs[off >> 2], val);
6233 }
6234 
6235 void
6236 pmcs_wr_mpi_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6237 {
6238 	ddi_put32(pwp->mpi_acc_handle,
6239 	    &pwp->mpi_regs[(pwp->mpi_offset + off) >> 2], (val));
6240 }
6241 
6242 void
6243 pmcs_wr_gst_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6244 {
6245 	ddi_put32(pwp->mpi_acc_handle,
6246 	    &pwp->mpi_regs[(pwp->mpi_gst_offset + off) >> 2], val);
6247 }
6248 
6249 void
6250 pmcs_wr_iqc_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6251 {
6252 	ddi_put32(pwp->mpi_acc_handle,
6253 	    &pwp->mpi_regs[(pwp->mpi_iqc_offset + off) >> 2], val);
6254 }
6255 
6256 void
6257 pmcs_wr_oqc_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6258 {
6259 	ddi_put32(pwp->mpi_acc_handle,
6260 	    &pwp->mpi_regs[(pwp->mpi_oqc_offset + off) >> 2], val);
6261 }
6262 
6263 void
6264 pmcs_wr_iqci(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6265 {
6266 	((uint32_t *)((void *)pwp->cip))[IQ_OFFSET(qnum) >> 2] = val;
6267 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORDEV) !=
6268 	    DDI_SUCCESS) {
6269 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ddi_dma_sync failed?",
6270 		    __func__);
6271 	}
6272 }
6273 
6274 void
6275 pmcs_wr_iqpi(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6276 {
6277 	ddi_put32(pwp->mpi_acc_handle,
6278 	    &pwp->mpi_regs[pwp->iqpi_offset[qnum] >> 2], val);
6279 }
6280 
6281 void
6282 pmcs_wr_oqci(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6283 {
6284 	ddi_put32(pwp->mpi_acc_handle,
6285 	    &pwp->mpi_regs[pwp->oqci_offset[qnum] >> 2], val);
6286 }
6287 
6288 void
6289 pmcs_wr_oqpi(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6290 {
6291 	((uint32_t *)((void *)pwp->cip))[OQ_OFFSET(qnum) >> 2] = val;
6292 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORDEV) !=
6293 	    DDI_SUCCESS) {
6294 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ddi_dma_sync failed?",
6295 		    __func__);
6296 	}
6297 }
6298 
6299 /*
6300  * Check the status value of an outbound IOMB and report anything bad
6301  */
6302 
6303 void
6304 pmcs_check_iomb_status(pmcs_hw_t *pwp, uint32_t *iomb)
6305 {
6306 	uint16_t 	opcode;
6307 	int		offset;
6308 
6309 	if (iomb == NULL) {
6310 		return;
6311 	}
6312 
6313 	opcode = LE_32(iomb[0]) & 0xfff;
6314 
6315 	switch (opcode) {
6316 		/*
6317 		 * The following have no status field, so ignore them
6318 		 */
6319 	case PMCOUT_ECHO:
6320 	case PMCOUT_SAS_HW_EVENT:
6321 	case PMCOUT_GET_DEVICE_HANDLE:
6322 	case PMCOUT_SATA_EVENT:
6323 	case PMCOUT_SSP_EVENT:
6324 	case PMCOUT_DEVICE_HANDLE_ARRIVED:
6325 	case PMCOUT_SMP_REQUEST_RECEIVED:
6326 	case PMCOUT_GPIO:
6327 	case PMCOUT_GPIO_EVENT:
6328 	case PMCOUT_GET_TIME_STAMP:
6329 	case PMCOUT_SKIP_ENTRIES:
6330 	case PMCOUT_GET_NVMD_DATA:	/* Actually lower 16 bits of word 3 */
6331 	case PMCOUT_SET_NVMD_DATA:	/* but ignore - we don't use these */
6332 	case PMCOUT_DEVICE_HANDLE_REMOVED:
6333 	case PMCOUT_SSP_REQUEST_RECEIVED:
6334 		return;
6335 
6336 	case PMCOUT_GENERAL_EVENT:
6337 		offset = 1;
6338 		break;
6339 
6340 	case PMCOUT_SSP_COMPLETION:
6341 	case PMCOUT_SMP_COMPLETION:
6342 	case PMCOUT_DEVICE_REGISTRATION:
6343 	case PMCOUT_DEREGISTER_DEVICE_HANDLE:
6344 	case PMCOUT_SATA_COMPLETION:
6345 	case PMCOUT_DEVICE_INFO:
6346 	case PMCOUT_FW_FLASH_UPDATE:
6347 	case PMCOUT_SSP_ABORT:
6348 	case PMCOUT_SATA_ABORT:
6349 	case PMCOUT_SAS_DIAG_MODE_START_END:
6350 	case PMCOUT_SAS_HW_EVENT_ACK_ACK:
6351 	case PMCOUT_SMP_ABORT:
6352 	case PMCOUT_SET_DEVICE_STATE:
6353 	case PMCOUT_GET_DEVICE_STATE:
6354 	case PMCOUT_SET_DEVICE_INFO:
6355 		offset = 2;
6356 		break;
6357 
6358 	case PMCOUT_LOCAL_PHY_CONTROL:
6359 	case PMCOUT_SAS_DIAG_EXECUTE:
6360 	case PMCOUT_PORT_CONTROL:
6361 		offset = 3;
6362 		break;
6363 
6364 	case PMCOUT_GET_INFO:
6365 	case PMCOUT_GET_VPD:
6366 	case PMCOUT_SAS_ASSISTED_DISCOVERY_EVENT:
6367 	case PMCOUT_SATA_ASSISTED_DISCOVERY_EVENT:
6368 	case PMCOUT_SET_VPD:
6369 	case PMCOUT_TWI:
6370 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
6371 		    "Got response for deprecated opcode", iomb);
6372 		return;
6373 
6374 	default:
6375 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
6376 		    "Got response for unknown opcode", iomb);
6377 		return;
6378 	}
6379 
6380 	if (LE_32(iomb[offset]) != PMCOUT_STATUS_OK) {
6381 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
6382 		    "bad status on TAG_TYPE_NONE command", iomb);
6383 	}
6384 }
6385 
6386 /*
6387  * Called with statlock held
6388  */
6389 void
6390 pmcs_clear_xp(pmcs_hw_t *pwp, pmcs_xscsi_t *xp)
6391 {
6392 	_NOTE(ARGUNUSED(pwp));
6393 
6394 	ASSERT(mutex_owned(&xp->statlock));
6395 	ASSERT(xp->dying);
6396 
6397 	pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Device 0x%p is gone.", __func__,
6398 	    (void *)xp);
6399 
6400 	/*
6401 	 * Clear the dip now.  This keeps pmcs_rem_old_devices from attempting
6402 	 * to call us on the same device while we're still flushing queues.
6403 	 * The only side effect is we can no longer update SM-HBA properties,
6404 	 * but this device is going away anyway, so no matter.
6405 	 */
6406 	xp->dip = NULL;
6407 
6408 	/*
6409 	 * Flush all target queues
6410 	 */
6411 	pmcs_flush_target_queues(pwp, xp, PMCS_TGT_ALL_QUEUES);
6412 
6413 	xp->special_running = 0;
6414 	xp->recovering = 0;
6415 	xp->recover_wait = 0;
6416 	xp->draining = 0;
6417 	xp->dying = 0;
6418 	xp->new = 0;
6419 	xp->assigned = 0;
6420 	xp->dev_state = 0;
6421 	xp->tagmap = 0;
6422 	xp->dev_gone = 1;
6423 	xp->event_recovery = 0;
6424 	xp->dtype = NOTHING;
6425 	xp->wq_recovery_tail = NULL;
6426 	/* Don't clear xp->phy */
6427 	/* Don't clear xp->actv_cnt */
6428 }
6429 
6430 static int
6431 pmcs_smp_function_result(pmcs_hw_t *pwp, smp_response_frame_t *srf)
6432 {
6433 	int result = srf->srf_result;
6434 
6435 	switch (result) {
6436 	case SMP_RES_UNKNOWN_FUNCTION:
6437 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6438 		    "Function Result: Unknown SMP Function(0x%x)",
6439 		    __func__, result);
6440 		break;
6441 	case SMP_RES_FUNCTION_FAILED:
6442 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6443 		    "Function Result: SMP Function Failed(0x%x)",
6444 		    __func__, result);
6445 		break;
6446 	case SMP_RES_INVALID_REQUEST_FRAME_LENGTH:
6447 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6448 		    "Function Result: Invalid Request Frame Length(0x%x)",
6449 		    __func__, result);
6450 		break;
6451 	case SMP_RES_INCOMPLETE_DESCRIPTOR_LIST:
6452 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6453 		    "Function Result: Incomplete Descriptor List(0x%x)",
6454 		    __func__, result);
6455 		break;
6456 	case SMP_RES_PHY_DOES_NOT_EXIST:
6457 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6458 		    "Function Result: PHY does not exist(0x%x)",
6459 		    __func__, result);
6460 		break;
6461 	case SMP_RES_PHY_VACANT:
6462 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6463 		    "Function Result: PHY Vacant(0x%x)",
6464 		    __func__, result);
6465 		break;
6466 	default:
6467 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6468 		    "Function Result: (0x%x)",
6469 		    __func__, result);
6470 		break;
6471 	}
6472 
6473 	return (result);
6474 }
6475 
6476 /*
6477  * Do all the repetitive stuff necessary to setup for DMA
6478  *
6479  * pwp: Used for dip
6480  * dma_attr: ddi_dma_attr_t to use for the mapping
6481  * acch: ddi_acc_handle_t to use for the mapping
6482  * dmah: ddi_dma_handle_t to use
6483  * length: Amount of memory for mapping
6484  * kvp: Pointer filled in with kernel virtual address on successful return
6485  * dma_addr: Pointer filled in with DMA address on successful return
6486  */
6487 boolean_t
6488 pmcs_dma_setup(pmcs_hw_t *pwp, ddi_dma_attr_t *dma_attr, ddi_acc_handle_t *acch,
6489     ddi_dma_handle_t *dmah, size_t length, caddr_t *kvp, uint64_t *dma_addr)
6490 {
6491 	dev_info_t		*dip = pwp->dip;
6492 	ddi_dma_cookie_t	cookie;
6493 	size_t			real_length;
6494 	uint_t			ddma_flag = DDI_DMA_CONSISTENT;
6495 	uint_t			ddabh_flag = DDI_DMA_CONSISTENT | DDI_DMA_RDWR;
6496 	uint_t			cookie_cnt;
6497 	ddi_device_acc_attr_t	mattr = {
6498 		DDI_DEVICE_ATTR_V0,
6499 		DDI_NEVERSWAP_ACC,
6500 		DDI_STRICTORDER_ACC,
6501 		DDI_DEFAULT_ACC
6502 	};
6503 
6504 	*acch = NULL;
6505 	*dmah = NULL;
6506 
6507 	if (ddi_dma_alloc_handle(dip, dma_attr, DDI_DMA_SLEEP, NULL, dmah) !=
6508 	    DDI_SUCCESS) {
6509 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "Failed to allocate DMA handle");
6510 		return (B_FALSE);
6511 	}
6512 
6513 	if (ddi_dma_mem_alloc(*dmah, length, &mattr, ddma_flag, DDI_DMA_SLEEP,
6514 	    NULL, kvp, &real_length, acch) != DDI_SUCCESS) {
6515 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "Failed to allocate DMA mem");
6516 		ddi_dma_free_handle(dmah);
6517 		*dmah = NULL;
6518 		return (B_FALSE);
6519 	}
6520 
6521 	if (ddi_dma_addr_bind_handle(*dmah, NULL, *kvp, real_length,
6522 	    ddabh_flag, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt)
6523 	    != DDI_DMA_MAPPED) {
6524 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "Failed to bind DMA");
6525 		ddi_dma_free_handle(dmah);
6526 		ddi_dma_mem_free(acch);
6527 		*dmah = NULL;
6528 		*acch = NULL;
6529 		return (B_FALSE);
6530 	}
6531 
6532 	if (cookie_cnt != 1) {
6533 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "Multiple cookies");
6534 		if (ddi_dma_unbind_handle(*dmah) != DDI_SUCCESS) {
6535 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "Condition failed at "
6536 			    "%s():%d", __func__, __LINE__);
6537 		}
6538 		ddi_dma_free_handle(dmah);
6539 		ddi_dma_mem_free(acch);
6540 		*dmah = NULL;
6541 		*acch = NULL;
6542 		return (B_FALSE);
6543 	}
6544 
6545 	*dma_addr = cookie.dmac_laddress;
6546 
6547 	return (B_TRUE);
6548 }
6549 
6550 /*
6551  * Flush requested queues for a particular target.  Called with statlock held
6552  */
6553 void
6554 pmcs_flush_target_queues(pmcs_hw_t *pwp, pmcs_xscsi_t *tgt, uint8_t queues)
6555 {
6556 	pmcs_cmd_t	*sp;
6557 	pmcwork_t	*pwrk;
6558 
6559 	ASSERT(pwp != NULL);
6560 	ASSERT(tgt != NULL);
6561 
6562 	pmcs_prt(pwp, PMCS_PRT_DEBUG,
6563 	    "%s: Flushing queues (%d) for target 0x%p", __func__,
6564 	    queues, (void *)tgt);
6565 
6566 	/*
6567 	 * Commands on the wait queue (or the special queue below) don't have
6568 	 * work structures associated with them.
6569 	 */
6570 	if (queues & PMCS_TGT_WAIT_QUEUE) {
6571 		mutex_enter(&tgt->wqlock);
6572 		while ((sp = STAILQ_FIRST(&tgt->wq)) != NULL) {
6573 			STAILQ_REMOVE(&tgt->wq, sp, pmcs_cmd, cmd_next);
6574 			pmcs_prt(pwp, PMCS_PRT_DEBUG1,
6575 			    "%s: Removing cmd 0x%p from wq for target 0x%p",
6576 			    __func__, (void *)sp, (void *)tgt);
6577 			CMD2PKT(sp)->pkt_reason = CMD_DEV_GONE;
6578 			CMD2PKT(sp)->pkt_state = STATE_GOT_BUS;
6579 			mutex_exit(&tgt->wqlock);
6580 			pmcs_dma_unload(pwp, sp);
6581 			mutex_enter(&pwp->cq_lock);
6582 			STAILQ_INSERT_TAIL(&pwp->cq, sp, cmd_next);
6583 			mutex_exit(&pwp->cq_lock);
6584 			mutex_enter(&tgt->wqlock);
6585 		}
6586 		mutex_exit(&tgt->wqlock);
6587 	}
6588 
6589 	/*
6590 	 * Commands on the active queue will have work structures associated
6591 	 * with them.
6592 	 */
6593 	if (queues & PMCS_TGT_ACTIVE_QUEUE) {
6594 		mutex_enter(&tgt->aqlock);
6595 		while ((sp = STAILQ_FIRST(&tgt->aq)) != NULL) {
6596 			STAILQ_REMOVE(&tgt->aq, sp, pmcs_cmd, cmd_next);
6597 			pwrk = pmcs_tag2wp(pwp, sp->cmd_tag);
6598 			mutex_exit(&tgt->aqlock);
6599 			mutex_exit(&tgt->statlock);
6600 			/*
6601 			 * If we found a work structure, mark it as dead
6602 			 * and complete it
6603 			 */
6604 			if (pwrk != NULL) {
6605 				pwrk->dead = 1;
6606 				CMD2PKT(sp)->pkt_reason = CMD_DEV_GONE;
6607 				CMD2PKT(sp)->pkt_state = STATE_GOT_BUS;
6608 				pmcs_complete_work_impl(pwp, pwrk, NULL, 0);
6609 			}
6610 			pmcs_prt(pwp, PMCS_PRT_DEBUG1,
6611 			    "%s: Removing cmd 0x%p from aq for target 0x%p",
6612 			    __func__, (void *)sp, (void *)tgt);
6613 			pmcs_dma_unload(pwp, sp);
6614 			mutex_enter(&pwp->cq_lock);
6615 			STAILQ_INSERT_TAIL(&pwp->cq, sp, cmd_next);
6616 			mutex_exit(&pwp->cq_lock);
6617 			mutex_enter(&tgt->aqlock);
6618 			mutex_enter(&tgt->statlock);
6619 		}
6620 		mutex_exit(&tgt->aqlock);
6621 	}
6622 
6623 	if (queues & PMCS_TGT_SPECIAL_QUEUE) {
6624 		while ((sp = STAILQ_FIRST(&tgt->sq)) != NULL) {
6625 			STAILQ_REMOVE(&tgt->sq, sp, pmcs_cmd, cmd_next);
6626 			pmcs_prt(pwp, PMCS_PRT_DEBUG1,
6627 			    "%s: Removing cmd 0x%p from sq for target 0x%p",
6628 			    __func__, (void *)sp, (void *)tgt);
6629 			CMD2PKT(sp)->pkt_reason = CMD_DEV_GONE;
6630 			CMD2PKT(sp)->pkt_state = STATE_GOT_BUS;
6631 			pmcs_dma_unload(pwp, sp);
6632 			mutex_enter(&pwp->cq_lock);
6633 			STAILQ_INSERT_TAIL(&pwp->cq, sp, cmd_next);
6634 			mutex_exit(&pwp->cq_lock);
6635 		}
6636 	}
6637 }
6638 
6639 void
6640 pmcs_complete_work_impl(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb,
6641     size_t amt)
6642 {
6643 	switch (PMCS_TAG_TYPE(pwrk->htag)) {
6644 	case PMCS_TAG_TYPE_CBACK:
6645 	{
6646 		pmcs_cb_t callback = (pmcs_cb_t)pwrk->ptr;
6647 		(*callback)(pwp, pwrk, iomb);
6648 		break;
6649 	}
6650 	case PMCS_TAG_TYPE_WAIT:
6651 		if (pwrk->arg && iomb && amt) {
6652 			(void) memcpy(pwrk->arg, iomb, amt);
6653 		}
6654 		cv_signal(&pwrk->sleep_cv);
6655 		mutex_exit(&pwrk->lock);
6656 		break;
6657 	case PMCS_TAG_TYPE_NONE:
6658 #ifdef DEBUG
6659 		pmcs_check_iomb_status(pwp, iomb);
6660 #endif
6661 		pmcs_pwork(pwp, pwrk);
6662 		break;
6663 	default:
6664 		/*
6665 		 * We will leak a structure here if we don't know
6666 		 * what happened
6667 		 */
6668 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Unknown PMCS_TAG_TYPE (%x)",
6669 		    __func__, PMCS_TAG_TYPE(pwrk->htag));
6670 		break;
6671 	}
6672 }
6673 
6674 /*
6675  * Determine if iport still has targets. During detach(9E), if SCSA is
6676  * successfull in its guarantee of tran_tgt_free(9E) before detach(9E),
6677  * this should always return B_FALSE.
6678  */
6679 boolean_t
6680 pmcs_iport_has_targets(pmcs_hw_t *pwp, pmcs_iport_t *iport)
6681 {
6682 	pmcs_xscsi_t *xp;
6683 	int i;
6684 
6685 	mutex_enter(&pwp->lock);
6686 
6687 	if (!pwp->targets || !pwp->max_dev) {
6688 		mutex_exit(&pwp->lock);
6689 		return (B_FALSE);
6690 	}
6691 
6692 	for (i = 0; i < pwp->max_dev; i++) {
6693 		xp = pwp->targets[i];
6694 		if ((xp == NULL) || (xp->phy == NULL) ||
6695 		    (xp->phy->iport != iport)) {
6696 			continue;
6697 		}
6698 
6699 		mutex_exit(&pwp->lock);
6700 		return (B_TRUE);
6701 	}
6702 
6703 	mutex_exit(&pwp->lock);
6704 	return (B_FALSE);
6705 }
6706 
6707 /*
6708  * Called with softstate lock held
6709  */
6710 void
6711 pmcs_destroy_target(pmcs_xscsi_t *target)
6712 {
6713 	pmcs_hw_t *pwp = target->pwp;
6714 	pmcs_iport_t *iport;
6715 
6716 	ASSERT(pwp);
6717 	ASSERT(mutex_owned(&pwp->lock));
6718 
6719 	if (!target->ua) {
6720 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
6721 		    "%s: target %p iport addres is null",
6722 		    __func__, (void *)target);
6723 	}
6724 
6725 	iport = pmcs_get_iport_by_ua(pwp, target->ua);
6726 	if (iport == NULL) {
6727 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
6728 		    "%s: no iport associated with tgt(0x%p)",
6729 		    __func__, (void *)target);
6730 		return;
6731 	}
6732 
6733 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
6734 	    "%s: free target %p", __func__, (void *)target);
6735 	if (target->ua) {
6736 		strfree(target->ua);
6737 	}
6738 
6739 	mutex_destroy(&target->wqlock);
6740 	mutex_destroy(&target->aqlock);
6741 	mutex_destroy(&target->statlock);
6742 	cv_destroy(&target->reset_cv);
6743 	cv_destroy(&target->abort_cv);
6744 	ddi_soft_state_bystr_fini(&target->lun_sstate);
6745 	ddi_soft_state_bystr_free(iport->tgt_sstate, target->unit_address);
6746 	pmcs_rele_iport(iport);
6747 }
6748 
6749 /*
6750  * Get device state.  Called with statlock and PHY lock held.
6751  */
6752 int
6753 pmcs_get_dev_state(pmcs_hw_t *pwp, pmcs_xscsi_t *xp, uint8_t *ds)
6754 {
6755 	uint32_t htag, *ptr, msg[PMCS_MSG_SIZE];
6756 	int result;
6757 	struct pmcwork *pwrk;
6758 	pmcs_phy_t *phyp;
6759 
6760 	pmcs_prt(pwp, PMCS_PRT_DEBUG3, "%s: tgt(0x%p)", __func__, (void *)xp);
6761 	if (xp == NULL) {
6762 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Target is NULL", __func__);
6763 		return (-1);
6764 	}
6765 
6766 	ASSERT(mutex_owned(&xp->statlock));
6767 	phyp = xp->phy;
6768 	ASSERT(mutex_owned(&phyp->phy_lock));
6769 
6770 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp);
6771 	if (pwrk == NULL) {
6772 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
6773 		return (-1);
6774 	}
6775 	pwrk->arg = msg;
6776 	pwrk->dtype = phyp->dtype;
6777 
6778 	if (phyp->valid_device_id == 0) {
6779 		pmcs_pwork(pwp, pwrk);
6780 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Invalid DeviceID", __func__);
6781 		return (-1);
6782 	}
6783 	htag = pwrk->htag;
6784 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
6785 	    PMCIN_GET_DEVICE_STATE));
6786 	msg[1] = LE_32(pwrk->htag);
6787 	msg[2] = LE_32(phyp->device_id);
6788 
6789 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6790 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6791 	if (ptr == NULL) {
6792 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6793 		pmcs_pwork(pwp, pwrk);
6794 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
6795 		return (-1);
6796 	}
6797 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
6798 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
6799 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6800 	mutex_exit(&xp->statlock);
6801 	pmcs_unlock_phy(phyp);
6802 	WAIT_FOR(pwrk, 1000, result);
6803 	pmcs_lock_phy(phyp);
6804 	pmcs_pwork(pwp, pwrk);
6805 	mutex_enter(&xp->statlock);
6806 
6807 	if (result) {
6808 		pmcs_timed_out(pwp, htag, __func__);
6809 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: cmd timed out, returning ",
6810 		    __func__);
6811 		return (-1);
6812 	}
6813 	if (LE_32(msg[2]) == 0) {
6814 		*ds = (uint8_t)(LE_32(msg[4]));
6815 		if (*ds !=  xp->dev_state) {
6816 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6817 			    "%s: retrieved_ds=0x%x, target_ds=0x%x", __func__,
6818 			    *ds, xp->dev_state);
6819 		}
6820 		return (0);
6821 	} else {
6822 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6823 		    "%s: cmd failed Status(0x%x), returning ", __func__,
6824 		    LE_32(msg[2]));
6825 		return (-1);
6826 	}
6827 }
6828 
6829 /*
6830  * Set device state.  Called with target's statlock and PHY lock held.
6831  */
6832 int
6833 pmcs_set_dev_state(pmcs_hw_t *pwp, pmcs_xscsi_t *xp, uint8_t ds)
6834 {
6835 	uint32_t htag, *ptr, msg[PMCS_MSG_SIZE];
6836 	int result;
6837 	uint8_t pds, nds;
6838 	struct pmcwork *pwrk;
6839 	pmcs_phy_t *phyp;
6840 
6841 	pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: ds(0x%x), tgt(0x%p)",
6842 	    __func__, ds, (void *)xp);
6843 	if (xp == NULL) {
6844 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Target is Null", __func__);
6845 		return (-1);
6846 	}
6847 
6848 	phyp = xp->phy;
6849 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp);
6850 	if (pwrk == NULL) {
6851 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
6852 		return (-1);
6853 	}
6854 	if (phyp == NULL) {
6855 		pmcs_pwork(pwp, pwrk);
6856 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: PHY is Null",
6857 		    __func__);
6858 		return (-1);
6859 	}
6860 	if (phyp->valid_device_id == 0) {
6861 		pmcs_pwork(pwp, pwrk);
6862 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6863 		    "%s: Invalid DeviceID", __func__);
6864 		return (-1);
6865 	}
6866 	pwrk->arg = msg;
6867 	pwrk->dtype = phyp->dtype;
6868 	htag = pwrk->htag;
6869 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
6870 	    PMCIN_SET_DEVICE_STATE));
6871 	msg[1] = LE_32(pwrk->htag);
6872 	msg[2] = LE_32(phyp->device_id);
6873 	msg[3] = LE_32(ds);
6874 
6875 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6876 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6877 	if (ptr == NULL) {
6878 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6879 		pmcs_pwork(pwp, pwrk);
6880 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
6881 		return (-1);
6882 	}
6883 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
6884 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
6885 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6886 
6887 	mutex_exit(&xp->statlock);
6888 	pmcs_unlock_phy(phyp);
6889 	WAIT_FOR(pwrk, 1000, result);
6890 	pmcs_lock_phy(phyp);
6891 	pmcs_pwork(pwp, pwrk);
6892 	mutex_enter(&xp->statlock);
6893 
6894 	if (result) {
6895 		pmcs_timed_out(pwp, htag, __func__);
6896 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6897 		    "%s: cmd timed out, returning", __func__);
6898 		return (-1);
6899 	}
6900 	if (LE_32(msg[2]) == 0) {
6901 		pds = (uint8_t)(LE_32(msg[4]) >> 4);
6902 		nds = (uint8_t)(LE_32(msg[4]) & 0x0000000f);
6903 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: previous_ds=0x%x, "
6904 		    "new_ds=0x%x", __func__, pds, nds);
6905 		xp->dev_state = nds;
6906 		return (0);
6907 	} else {
6908 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6909 		    "%s: cmd failed Status(0x%x), returning ", __func__,
6910 		    LE_32(msg[2]));
6911 		return (-1);
6912 	}
6913 }
6914 
6915 void
6916 pmcs_dev_state_recovery(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
6917 {
6918 	uint8_t	ds;
6919 	int rc;
6920 	pmcs_xscsi_t *tgt;
6921 	pmcs_phy_t *pptr, *pnext, *pchild;
6922 
6923 	/*
6924 	 * First time, check to see if we're already performing recovery
6925 	 */
6926 	if (phyp == NULL) {
6927 		mutex_enter(&pwp->lock);
6928 		if (pwp->ds_err_recovering) {
6929 			mutex_exit(&pwp->lock);
6930 			SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
6931 			return;
6932 		}
6933 
6934 		pwp->ds_err_recovering = 1;
6935 		pptr = pwp->root_phys;
6936 		mutex_exit(&pwp->lock);
6937 	} else {
6938 		pptr = phyp;
6939 	}
6940 
6941 	while (pptr) {
6942 		/*
6943 		 * Since ds_err_recovering is set, we can be assured these
6944 		 * PHYs won't disappear on us while we do this.
6945 		 */
6946 		pmcs_lock_phy(pptr);
6947 		pchild = pptr->children;
6948 		pnext = pptr->sibling;
6949 		pmcs_unlock_phy(pptr);
6950 
6951 		if (pchild) {
6952 			pmcs_dev_state_recovery(pwp, pchild);
6953 		}
6954 
6955 		tgt = NULL;
6956 		pmcs_lock_phy(pptr);
6957 
6958 		if (pptr->dead) {
6959 			goto next_phy;
6960 		}
6961 
6962 		tgt = pptr->target;
6963 		if (tgt == NULL) {
6964 			if (pptr->dtype != NOTHING) {
6965 				pmcs_prt(pwp, PMCS_PRT_DEBUG2,
6966 				    "%s: no target for DS error recovery for "
6967 				    "PHY 0x%p", __func__, (void *)pptr);
6968 			}
6969 			goto next_phy;
6970 		}
6971 
6972 		mutex_enter(&tgt->statlock);
6973 
6974 		if (tgt->recover_wait == 0) {
6975 			goto next_phy;
6976 		}
6977 
6978 		if (tgt->dying) {
6979 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6980 			    "%s: Not doing DS recovery on dying target %p",
6981 			    __func__, (void *)tgt);
6982 			goto next_phy;
6983 		}
6984 
6985 		/*
6986 		 * Step 1: Put the device into the IN_RECOVERY state
6987 		 */
6988 		rc = pmcs_get_dev_state(pwp, tgt, &ds);
6989 		if (rc != 0) {
6990 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
6991 			    "%s: pmcs_get_dev_state on PHY %s "
6992 			    "failed (rc=%d)",
6993 			    __func__, pptr->path, rc);
6994 
6995 			pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
6996 			    __func__, __LINE__, "pmcs_get_dev_state");
6997 
6998 			goto next_phy;
6999 		}
7000 
7001 		if (tgt->dev_state == ds) {
7002 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7003 			    "%s: Target 0x%p already IN_RECOVERY", __func__,
7004 			    (void *)tgt);
7005 		} else {
7006 			tgt->dev_state = ds;
7007 			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
7008 			rc = pmcs_send_err_recovery_cmd(pwp, ds, tgt);
7009 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7010 			    "%s: pmcs_send_err_recovery_cmd "
7011 			    "result(%d) tgt(0x%p) ds(0x%x) tgt->ds(0x%x)",
7012 			    __func__, rc, (void *)tgt, ds, tgt->dev_state);
7013 
7014 			if (rc) {
7015 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7016 				    "%s: pmcs_send_err_recovery_cmd to PHY %s "
7017 				    "failed (rc=%d)",
7018 				    __func__, pptr->path, rc);
7019 
7020 				pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
7021 				    __func__, __LINE__,
7022 				    "pmcs_send_err_recovery_cmd");
7023 
7024 				goto next_phy;
7025 			}
7026 		}
7027 
7028 		/*
7029 		 * Step 2: Perform a hard reset on the PHY
7030 		 */
7031 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7032 		    "%s: Issue HARD_RESET to PHY %s", __func__, pptr->path);
7033 		/*
7034 		 * Must release statlock here because pmcs_reset_phy will
7035 		 * drop and reacquire the PHY lock.
7036 		 */
7037 		mutex_exit(&tgt->statlock);
7038 		rc = pmcs_reset_phy(pwp, pptr, PMCS_PHYOP_HARD_RESET);
7039 		mutex_enter(&tgt->statlock);
7040 		if (rc) {
7041 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
7042 			    "%s: HARD_RESET to PHY %s failed (rc=%d)",
7043 			    __func__, pptr->path, rc);
7044 
7045 			pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
7046 			    __func__, __LINE__, "HARD_RESET");
7047 
7048 			goto next_phy;
7049 		}
7050 
7051 		/*
7052 		 * Step 3: Abort all I/Os to the device
7053 		 */
7054 		if (pptr->abort_all_start) {
7055 			while (pptr->abort_all_start) {
7056 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7057 				    "%s: Waiting for outstanding ABORT_ALL on "
7058 				    "PHY 0x%p", __func__, (void *)pptr);
7059 				cv_wait(&pptr->abort_all_cv, &pptr->phy_lock);
7060 			}
7061 		} else {
7062 			mutex_exit(&tgt->statlock);
7063 			rc = pmcs_abort(pwp, pptr, pptr->device_id, 1, 1);
7064 			mutex_enter(&tgt->statlock);
7065 			if (rc != 0) {
7066 				pptr->abort_pending = 1;
7067 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7068 				    "%s: pmcs_abort to PHY %s failed (rc=%d)",
7069 				    __func__, pptr->path, rc);
7070 
7071 				pmcs_handle_ds_recovery_error(pptr, tgt,
7072 				    pwp, __func__, __LINE__, "pmcs_abort");
7073 
7074 				goto next_phy;
7075 			}
7076 		}
7077 
7078 		/*
7079 		 * Step 4: Set the device back to OPERATIONAL state
7080 		 */
7081 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7082 		    "%s: Set PHY/tgt 0x%p/0x%p to OPERATIONAL state",
7083 		    __func__, (void *)pptr, (void *)tgt);
7084 		rc = pmcs_set_dev_state(pwp, tgt,
7085 		    PMCS_DEVICE_STATE_OPERATIONAL);
7086 		if (rc == 0) {
7087 			tgt->recover_wait = 0;
7088 			pptr->ds_recovery_retries = 0;
7089 			/*
7090 			 * Don't bother to run the work queues if the PHY
7091 			 * is dead.
7092 			 */
7093 			if (tgt->phy && !tgt->phy->dead) {
7094 				SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES);
7095 				(void) ddi_taskq_dispatch(pwp->tq, pmcs_worker,
7096 				    pwp, DDI_NOSLEEP);
7097 			}
7098 		} else {
7099 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7100 			    "%s: Failed to SET tgt 0x%p to OPERATIONAL state",
7101 			    __func__, (void *)tgt);
7102 
7103 			pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
7104 			    __func__, __LINE__, "SET tgt to OPERATIONAL state");
7105 
7106 			goto next_phy;
7107 		}
7108 
7109 next_phy:
7110 		if (tgt) {
7111 			mutex_exit(&tgt->statlock);
7112 		}
7113 		pmcs_unlock_phy(pptr);
7114 		pptr = pnext;
7115 	}
7116 
7117 	/*
7118 	 * Only clear ds_err_recovering if we're exiting for good and not
7119 	 * just unwinding from recursion
7120 	 */
7121 	if (phyp == NULL) {
7122 		mutex_enter(&pwp->lock);
7123 		pwp->ds_err_recovering = 0;
7124 		mutex_exit(&pwp->lock);
7125 	}
7126 }
7127 
7128 /*
7129  * Called with target's statlock and PHY lock held.
7130  */
7131 int
7132 pmcs_send_err_recovery_cmd(pmcs_hw_t *pwp, uint8_t dev_state, pmcs_xscsi_t *tgt)
7133 {
7134 	pmcs_phy_t *pptr;
7135 	int rc = -1;
7136 
7137 	ASSERT(tgt != NULL);
7138 	ASSERT(mutex_owned(&tgt->statlock));
7139 
7140 	if (tgt->recovering) {
7141 		return (0);
7142 	}
7143 
7144 	tgt->recovering = 1;
7145 	pptr = tgt->phy;
7146 
7147 	if (pptr == NULL) {
7148 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: PHY is Null",
7149 		    __func__);
7150 		return (-1);
7151 	}
7152 
7153 	ASSERT(mutex_owned(&pptr->phy_lock));
7154 
7155 	pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: ds: 0x%x, tgt ds(0x%x)",
7156 	    __func__, dev_state, tgt->dev_state);
7157 
7158 	switch (dev_state) {
7159 	case PMCS_DEVICE_STATE_IN_RECOVERY:
7160 		if (tgt->dev_state == PMCS_DEVICE_STATE_IN_RECOVERY) {
7161 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7162 			    "%s: Target 0x%p already IN_RECOVERY", __func__,
7163 			    (void *)tgt);
7164 			rc = 0;	/* This is not an error */
7165 			goto no_action;
7166 		}
7167 
7168 		rc = pmcs_set_dev_state(pwp, tgt,
7169 		    PMCS_DEVICE_STATE_IN_RECOVERY);
7170 		if (rc != 0) {
7171 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7172 			    "%s(1): Failed to SET tgt(0x%p) to _IN_RECOVERY",
7173 			    __func__, (void *)tgt);
7174 		}
7175 
7176 		break;
7177 
7178 	case PMCS_DEVICE_STATE_OPERATIONAL:
7179 		if (tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) {
7180 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7181 			    "%s: Target 0x%p not ready to go OPERATIONAL",
7182 			    __func__, (void *)tgt);
7183 			goto no_action;
7184 		}
7185 
7186 		rc = pmcs_set_dev_state(pwp, tgt,
7187 		    PMCS_DEVICE_STATE_OPERATIONAL);
7188 		tgt->reset_success = 1;
7189 		if (rc != 0) {
7190 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7191 			    "%s(2): Failed to SET tgt(0x%p) to OPERATIONAL",
7192 			    __func__, (void *)tgt);
7193 			tgt->reset_success = 0;
7194 		}
7195 
7196 		break;
7197 
7198 	case PMCS_DEVICE_STATE_NON_OPERATIONAL:
7199 		PHY_CHANGED(pwp, pptr);
7200 		RESTART_DISCOVERY(pwp);
7201 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7202 		    "%s: Device at %s is non-operational",
7203 		    __func__, pptr->path);
7204 		tgt->dev_state = PMCS_DEVICE_STATE_NON_OPERATIONAL;
7205 		rc = 0;
7206 
7207 		break;
7208 
7209 	default:
7210 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7211 		    "%s: Invalid state requested (%d)", __func__,
7212 		    dev_state);
7213 		break;
7214 
7215 	}
7216 
7217 no_action:
7218 	tgt->recovering = 0;
7219 	return (rc);
7220 }
7221 
7222 /*
7223  * pmcs_lock_phy_impl
7224  *
7225  * This function is what does the actual work for pmcs_lock_phy.  It will
7226  * lock all PHYs from phyp down in a top-down fashion.
7227  *
7228  * Locking notes:
7229  * 1. level starts from 0 for the PHY ("parent") that's passed in.  It is
7230  * not a reflection of the actual level of the PHY in the SAS topology.
7231  * 2. If parent is an expander, then parent is locked along with all its
7232  * descendents.
7233  * 3. Expander subsidiary PHYs at level 0 are not locked.  It is the
7234  * responsibility of the caller to individually lock expander subsidiary PHYs
7235  * at level 0 if necessary.
7236  * 4. Siblings at level 0 are not traversed due to the possibility that we're
7237  * locking a PHY on the dead list.  The siblings could be pointing to invalid
7238  * PHYs.  We don't lock siblings at level 0 anyway.
7239  */
7240 static void
7241 pmcs_lock_phy_impl(pmcs_phy_t *phyp, int level)
7242 {
7243 	pmcs_phy_t *tphyp;
7244 
7245 	ASSERT((phyp->dtype == SAS) || (phyp->dtype == SATA) ||
7246 	    (phyp->dtype == EXPANDER) || (phyp->dtype == NOTHING));
7247 
7248 	/*
7249 	 * Start walking the PHYs.
7250 	 */
7251 	tphyp = phyp;
7252 	while (tphyp) {
7253 		/*
7254 		 * If we're at the top level, only lock ourselves.  For anything
7255 		 * at level > 0, traverse children while locking everything.
7256 		 */
7257 		if ((level > 0) || (tphyp == phyp)) {
7258 			pmcs_prt(tphyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7259 			    "%s: PHY 0x%p parent 0x%p path %s lvl %d",
7260 			    __func__, (void *)tphyp, (void *)tphyp->parent,
7261 			    tphyp->path, level);
7262 			mutex_enter(&tphyp->phy_lock);
7263 
7264 			if (tphyp->children) {
7265 				pmcs_lock_phy_impl(tphyp->children, level + 1);
7266 			}
7267 		}
7268 
7269 		if (level == 0) {
7270 			return;
7271 		}
7272 
7273 		tphyp = tphyp->sibling;
7274 	}
7275 }
7276 
7277 /*
7278  * pmcs_lock_phy
7279  *
7280  * This function is responsible for locking a PHY and all its descendents
7281  */
7282 void
7283 pmcs_lock_phy(pmcs_phy_t *phyp)
7284 {
7285 #ifdef DEBUG
7286 	char *callername = NULL;
7287 	ulong_t off;
7288 
7289 	ASSERT(phyp != NULL);
7290 
7291 	callername = modgetsymname((uintptr_t)caller(), &off);
7292 
7293 	if (callername == NULL) {
7294 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7295 		    "%s: PHY 0x%p path %s caller: unknown", __func__,
7296 		    (void *)phyp, phyp->path);
7297 	} else {
7298 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7299 		    "%s: PHY 0x%p path %s caller: %s+%lx", __func__,
7300 		    (void *)phyp, phyp->path, callername, off);
7301 	}
7302 #else
7303 	pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7304 	    "%s: PHY 0x%p path %s", __func__, (void *)phyp, phyp->path);
7305 #endif
7306 	pmcs_lock_phy_impl(phyp, 0);
7307 }
7308 
7309 /*
7310  * pmcs_unlock_phy_impl
7311  *
7312  * Unlock all PHYs from phyp down in a bottom-up fashion.
7313  */
7314 static void
7315 pmcs_unlock_phy_impl(pmcs_phy_t *phyp, int level)
7316 {
7317 	pmcs_phy_t *phy_next;
7318 
7319 	ASSERT((phyp->dtype == SAS) || (phyp->dtype == SATA) ||
7320 	    (phyp->dtype == EXPANDER) || (phyp->dtype == NOTHING));
7321 
7322 	/*
7323 	 * Recurse down to the bottom PHYs
7324 	 */
7325 	if (level == 0) {
7326 		if (phyp->children) {
7327 			pmcs_unlock_phy_impl(phyp->children, level + 1);
7328 		}
7329 	} else {
7330 		phy_next = phyp;
7331 		while (phy_next) {
7332 			if (phy_next->children) {
7333 				pmcs_unlock_phy_impl(phy_next->children,
7334 				    level + 1);
7335 			}
7336 			phy_next = phy_next->sibling;
7337 		}
7338 	}
7339 
7340 	/*
7341 	 * Iterate through PHYs unlocking all at level > 0 as well the top PHY
7342 	 */
7343 	phy_next = phyp;
7344 	while (phy_next) {
7345 		if ((level > 0) || (phy_next == phyp)) {
7346 			pmcs_prt(phy_next->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7347 			    "%s: PHY 0x%p parent 0x%p path %s lvl %d",
7348 			    __func__, (void *)phy_next,
7349 			    (void *)phy_next->parent, phy_next->path, level);
7350 			mutex_exit(&phy_next->phy_lock);
7351 		}
7352 
7353 		if (level == 0) {
7354 			return;
7355 		}
7356 
7357 		phy_next = phy_next->sibling;
7358 	}
7359 }
7360 
7361 /*
7362  * pmcs_unlock_phy
7363  *
7364  * Unlock a PHY and all its descendents
7365  */
7366 void
7367 pmcs_unlock_phy(pmcs_phy_t *phyp)
7368 {
7369 #ifdef DEBUG
7370 	char *callername = NULL;
7371 	ulong_t off;
7372 
7373 	ASSERT(phyp != NULL);
7374 
7375 	callername = modgetsymname((uintptr_t)caller(), &off);
7376 
7377 	if (callername == NULL) {
7378 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7379 		    "%s: PHY 0x%p path %s caller: unknown", __func__,
7380 		    (void *)phyp, phyp->path);
7381 	} else {
7382 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7383 		    "%s: PHY 0x%p path %s caller: %s+%lx", __func__,
7384 		    (void *)phyp, phyp->path, callername, off);
7385 	}
7386 #else
7387 	pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7388 	    "%s: PHY 0x%p path %s", __func__, (void *)phyp, phyp->path);
7389 #endif
7390 	pmcs_unlock_phy_impl(phyp, 0);
7391 }
7392 
7393 /*
7394  * pmcs_get_root_phy
7395  *
7396  * For a given phy pointer return its root phy.
7397  * The caller must be holding the lock on every PHY from phyp up to the root.
7398  */
7399 pmcs_phy_t *
7400 pmcs_get_root_phy(pmcs_phy_t *phyp)
7401 {
7402 	ASSERT(phyp);
7403 
7404 	while (phyp) {
7405 		if (IS_ROOT_PHY(phyp)) {
7406 			break;
7407 		}
7408 		phyp = phyp->parent;
7409 	}
7410 
7411 	return (phyp);
7412 }
7413 
7414 /*
7415  * pmcs_free_dma_chunklist
7416  *
7417  * Free DMA S/G chunk list
7418  */
7419 void
7420 pmcs_free_dma_chunklist(pmcs_hw_t *pwp)
7421 {
7422 	pmcs_chunk_t	*pchunk;
7423 
7424 	while (pwp->dma_chunklist) {
7425 		pchunk = pwp->dma_chunklist;
7426 		pwp->dma_chunklist = pwp->dma_chunklist->next;
7427 		if (pchunk->dma_handle) {
7428 			if (ddi_dma_unbind_handle(pchunk->dma_handle) !=
7429 			    DDI_SUCCESS) {
7430 				pmcs_prt(pwp, PMCS_PRT_DEBUG, "Condition failed"
7431 				    " at %s():%d", __func__, __LINE__);
7432 			}
7433 			ddi_dma_free_handle(&pchunk->dma_handle);
7434 			ddi_dma_mem_free(&pchunk->acc_handle);
7435 		}
7436 		kmem_free(pchunk, sizeof (pmcs_chunk_t));
7437 	}
7438 }
7439 
7440 
7441 /*
7442  * Start ssp event recovery. We have to schedule recovery operation because
7443  * it involves sending multiple commands to device and we should not do it
7444  * in the interrupt context.
7445  * If it is failure of a recovery command, let the recovery thread deal with it.
7446  * Called with pmcwork lock held.
7447  */
7448 
7449 void
7450 pmcs_start_ssp_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb,
7451     size_t amt)
7452 {
7453 	pmcs_xscsi_t *tgt = pwrk->xp;
7454 	uint32_t event = LE_32(iomb[2]);
7455 	pmcs_phy_t *pptr = pwrk->phy;
7456 	uint32_t tag;
7457 
7458 	if (tgt != NULL) {
7459 		mutex_enter(&tgt->statlock);
7460 		if (tgt->dying || !tgt->assigned) {
7461 			if (pptr) {
7462 				pmcs_dec_phy_ref_count(pptr);
7463 			}
7464 			pptr = NULL;
7465 			pwrk->phy = NULL;
7466 		}
7467 		mutex_exit(&tgt->statlock);
7468 	}
7469 	if (pptr == NULL) {
7470 		/*
7471 		 * No target or dying target.Need to run RE-DISCOVERY here.
7472 		 */
7473 		if (pwrk->state != PMCS_WORK_STATE_TIMED_OUT) {
7474 			pwrk->state = PMCS_WORK_STATE_INTR;
7475 		}
7476 		/*
7477 		 * Although we cannot mark phy to force abort nor mark phy
7478 		 * as changed, killing of a target would take care of aborting
7479 		 * commands for the device.
7480 		 */
7481 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: No valid target for event "
7482 		    "processing found. Scheduling RECONFIGURE",  __func__);
7483 		pmcs_pwork(pwp, pwrk);
7484 		RESTART_DISCOVERY(pwp);
7485 		return;
7486 	} else {
7487 		pmcs_lock_phy(pptr);
7488 		mutex_enter(&tgt->statlock);
7489 		if (event == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS) {
7490 			if (tgt->dev_state !=
7491 			    PMCS_DEVICE_STATE_NON_OPERATIONAL) {
7492 				pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Device at "
7493 				    "%s is non-operational", __func__,
7494 				    pptr->path);
7495 				tgt->dev_state =
7496 				    PMCS_DEVICE_STATE_NON_OPERATIONAL;
7497 			}
7498 			pptr->abort_pending = 1;
7499 			mutex_exit(&tgt->statlock);
7500 			pmcs_unlock_phy(pptr);
7501 			mutex_exit(&pwrk->lock);
7502 			SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
7503 			RESTART_DISCOVERY(pwp);
7504 			return;
7505 		}
7506 
7507 		/*
7508 		 * If this command is run in WAIT mode, it is a failing recovery
7509 		 * command. If so, just wake up recovery thread waiting for
7510 		 * command completion.
7511 		 */
7512 		tag = PMCS_TAG_TYPE(pwrk->htag);
7513 		if (tag == PMCS_TAG_TYPE_WAIT) {
7514 			pwrk->htag |= PMCS_TAG_DONE;
7515 			if (pwrk->arg && amt) {
7516 				(void) memcpy(pwrk->arg, iomb, amt);
7517 			}
7518 			cv_signal(&pwrk->sleep_cv);
7519 			mutex_exit(&tgt->statlock);
7520 			pmcs_unlock_phy(pptr);
7521 			mutex_exit(&pwrk->lock);
7522 			return;
7523 		}
7524 
7525 		/*
7526 		 * To recover from primary failures,
7527 		 * we need to schedule handling events recovery.
7528 		 */
7529 		tgt->event_recovery = 1;
7530 		mutex_exit(&tgt->statlock);
7531 		pmcs_unlock_phy(pptr);
7532 		pwrk->ssp_event = event;
7533 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
7534 		    "%s: Scheduling SSP event recovery for tgt(0x%p) "
7535 		    "pwrk(%p) tag(0x%x)", __func__, (void *)tgt, (void *)pwrk,
7536 		    pwrk->htag);
7537 		mutex_exit(&pwrk->lock);
7538 		SCHEDULE_WORK(pwp, PMCS_WORK_SSP_EVT_RECOVERY);
7539 	}
7540 
7541 	/* Work cannot be completed until event recovery is completed. */
7542 }
7543 
7544 /*
7545  * SSP target event recovery
7546  * Entered with a phy lock held
7547  * Pwrk lock is not needed - pwrk is on the target aq and no other thread
7548  * will do anything with it until this thread starts the chain of recovery.
7549  * Statlock may be acquired and released.
7550  */
7551 
7552 void
7553 pmcs_tgt_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk)
7554 {
7555 	pmcs_phy_t *pptr = pwrk->phy;
7556 	pmcs_cmd_t *sp = pwrk->arg;
7557 	pmcs_lun_t *lun = sp->cmd_lun;
7558 	pmcs_xscsi_t *tgt = pwrk->xp;
7559 	uint32_t event;
7560 	uint32_t htag;
7561 	uint32_t status;
7562 	uint8_t dstate;
7563 	int rv;
7564 
7565 	ASSERT(pwrk->arg != NULL);
7566 	ASSERT(pwrk->xp != NULL);
7567 	pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: event recovery for "
7568 	    "target 0x%p", __func__, (void *)pwrk->xp);
7569 	htag = pwrk->htag;
7570 	event = pwrk->ssp_event;
7571 	pwrk->ssp_event = 0xffffffff;
7572 	if (event == PMCOUT_STATUS_XFER_ERR_BREAK ||
7573 	    event == PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY ||
7574 	    event == PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT) {
7575 		/* Command may be still pending on device */
7576 		rv = pmcs_ssp_tmf(pwp, pptr, SAS_QUERY_TASK, htag,
7577 		    lun->lun_num, &status);
7578 		if (rv != 0) {
7579 			goto out;
7580 		}
7581 		if (status == SAS_RSP_TMF_COMPLETE) {
7582 			/* Command NOT pending on a device */
7583 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
7584 			    "%s: No pending command for tgt 0x%p",
7585 			    __func__, (void *)tgt);
7586 			/* Nothing more to do, just abort it on chip */
7587 			htag = 0;
7588 		}
7589 	}
7590 	/*
7591 	 * All other events left the command pending in the host
7592 	 * Send abort task and abort it on the chip
7593 	 */
7594 	if (htag != 0) {
7595 		if (pmcs_ssp_tmf(pwp, pptr, SAS_ABORT_TASK, htag,
7596 		    lun->lun_num, &status))
7597 			goto out;
7598 	}
7599 	(void) pmcs_abort(pwp, pptr, pwrk->htag, 0, 1);
7600 	/*
7601 	 * Abort either took care of work completion, or put device in
7602 	 * a recovery state
7603 	 */
7604 	return;
7605 out:
7606 	/* Abort failed, do full device recovery */
7607 	mutex_enter(&tgt->statlock);
7608 	if (!pmcs_get_dev_state(pwp, tgt, &dstate))
7609 		tgt->dev_state = dstate;
7610 
7611 	if ((tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) &&
7612 	    (tgt->dev_state != PMCS_DEVICE_STATE_NON_OPERATIONAL)) {
7613 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
7614 		    "%s: Setting IN_RECOVERY for tgt 0x%p",
7615 		    __func__, (void *)tgt);
7616 		(void) pmcs_send_err_recovery_cmd(pwp,
7617 		    PMCS_DEVICE_STATE_IN_RECOVERY, tgt);
7618 	}
7619 	mutex_exit(&tgt->statlock);
7620 }
7621 
7622 /*
7623  * SSP event recovery task.
7624  */
7625 void
7626 pmcs_ssp_event_recovery(pmcs_hw_t *pwp)
7627 {
7628 	int idx;
7629 	pmcs_xscsi_t *tgt;
7630 	pmcs_cmd_t *cp;
7631 	pmcwork_t *pwrk;
7632 	pmcs_phy_t *pphy;
7633 	int er_flag;
7634 	uint32_t idxpwrk;
7635 
7636 restart:
7637 	for (idx = 0; idx < pwp->max_dev; idx++) {
7638 		mutex_enter(&pwp->lock);
7639 		tgt = pwp->targets[idx];
7640 		mutex_exit(&pwp->lock);
7641 		if (tgt != NULL) {
7642 			mutex_enter(&tgt->statlock);
7643 			if (tgt->dying || !tgt->assigned) {
7644 				mutex_exit(&tgt->statlock);
7645 				continue;
7646 			}
7647 			pphy = tgt->phy;
7648 			er_flag = tgt->event_recovery;
7649 			mutex_exit(&tgt->statlock);
7650 			if (pphy != NULL && er_flag != 0) {
7651 				pmcs_lock_phy(pphy);
7652 				mutex_enter(&tgt->statlock);
7653 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7654 				    "%s: found target(0x%p)", __func__,
7655 				    (void *) tgt);
7656 
7657 				/* Check what cmd expects recovery */
7658 				mutex_enter(&tgt->aqlock);
7659 				STAILQ_FOREACH(cp, &tgt->aq, cmd_next) {
7660 					/*
7661 					 * Since work structure is on this
7662 					 * target aq, and only this thread
7663 					 * is accessing it now, we do not need
7664 					 * to lock it
7665 					 */
7666 					idxpwrk = PMCS_TAG_INDEX(cp->cmd_tag);
7667 					pwrk = &pwp->work[idxpwrk];
7668 					if (pwrk->htag != cp->cmd_tag) {
7669 						/*
7670 						 * aq may contain TMF commands,
7671 						 * so we may not find work
7672 						 * structure with htag
7673 						 */
7674 						break;
7675 					}
7676 					if (pwrk->ssp_event != 0 &&
7677 					    pwrk->ssp_event !=
7678 					    PMCS_REC_EVENT) {
7679 						pmcs_prt(pwp,
7680 						    PMCS_PRT_DEBUG,
7681 						    "%s: pwrk(%p) ctag(0x%x)",
7682 						    __func__, (void *) pwrk,
7683 						    cp->cmd_tag);
7684 						mutex_exit(&tgt->aqlock);
7685 						mutex_exit(&tgt->statlock);
7686 						pmcs_tgt_event_recovery(
7687 						    pwp, pwrk);
7688 						/*
7689 						 * We dropped statlock, so
7690 						 * restart scanning from scratch
7691 						 */
7692 						pmcs_unlock_phy(pphy);
7693 						goto restart;
7694 					}
7695 				}
7696 				mutex_exit(&tgt->aqlock);
7697 				tgt->event_recovery = 0;
7698 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7699 				    "%s: end of SSP event recovery for "
7700 				    "target(0x%p)", __func__, (void *) tgt);
7701 				mutex_exit(&tgt->statlock);
7702 				pmcs_unlock_phy(pphy);
7703 			}
7704 		}
7705 	}
7706 	pmcs_prt(pwp, PMCS_PRT_DEBUG,
7707 	    "%s: end of SSP event recovery for pwp(0x%p)", __func__,
7708 	    (void *) pwp);
7709 }
7710 
7711 /*ARGSUSED2*/
7712 int
7713 pmcs_phy_constructor(void *buf, void *arg, int kmflags)
7714 {
7715 	pmcs_hw_t *pwp = (pmcs_hw_t *)arg;
7716 	pmcs_phy_t *phyp = (pmcs_phy_t *)buf;
7717 
7718 	mutex_init(&phyp->phy_lock, NULL, MUTEX_DRIVER,
7719 	    DDI_INTR_PRI(pwp->intr_pri));
7720 	cv_init(&phyp->abort_all_cv, NULL, CV_DRIVER, NULL);
7721 	return (0);
7722 }
7723 
7724 /*ARGSUSED1*/
7725 void
7726 pmcs_phy_destructor(void *buf, void *arg)
7727 {
7728 	pmcs_phy_t *phyp = (pmcs_phy_t *)buf;
7729 
7730 	cv_destroy(&phyp->abort_all_cv);
7731 	mutex_destroy(&phyp->phy_lock);
7732 }
7733 
7734 /*
7735  * Free all PHYs from the kmem_cache starting at phyp as well as everything
7736  * on the dead_phys list.
7737  *
7738  * NOTE: This function does not free root PHYs as they are not allocated
7739  * from the kmem_cache.
7740  *
7741  * No PHY locks are acquired as this should only be called during DDI_DETACH
7742  * or soft reset (while pmcs interrupts are disabled).
7743  */
7744 void
7745 pmcs_free_all_phys(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
7746 {
7747 	pmcs_phy_t *tphyp, *nphyp;
7748 
7749 	if (phyp == NULL) {
7750 		return;
7751 	}
7752 
7753 	tphyp = phyp;
7754 	while (tphyp) {
7755 		nphyp = tphyp->sibling;
7756 
7757 		if (tphyp->children) {
7758 			pmcs_free_all_phys(pwp, tphyp->children);
7759 			tphyp->children = NULL;
7760 		}
7761 		if (!IS_ROOT_PHY(tphyp)) {
7762 			kmem_cache_free(pwp->phy_cache, tphyp);
7763 		}
7764 
7765 		tphyp = nphyp;
7766 	}
7767 
7768 	tphyp = pwp->dead_phys;
7769 	while (tphyp) {
7770 		nphyp = tphyp->sibling;
7771 		kmem_cache_free(pwp->phy_cache, tphyp);
7772 		tphyp = nphyp;
7773 	}
7774 	pwp->dead_phys = NULL;
7775 }
7776 
7777 /*
7778  * Free a list of PHYs linked together by the sibling pointer back to the
7779  * kmem cache from whence they came.  This function does not recurse, so the
7780  * caller must ensure there are no children.
7781  */
7782 void
7783 pmcs_free_phys(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
7784 {
7785 	pmcs_phy_t *next_phy;
7786 
7787 	while (phyp) {
7788 		next_phy = phyp->sibling;
7789 		ASSERT(!mutex_owned(&phyp->phy_lock));
7790 		kmem_cache_free(pwp->phy_cache, phyp);
7791 		phyp = next_phy;
7792 	}
7793 }
7794 
7795 /*
7796  * Make a copy of an existing PHY structure.  This is used primarily in
7797  * discovery to compare the contents of an existing PHY with what gets
7798  * reported back by an expander.
7799  *
7800  * This function must not be called from any context where sleeping is
7801  * not possible.
7802  *
7803  * The new PHY is returned unlocked.
7804  */
7805 static pmcs_phy_t *
7806 pmcs_clone_phy(pmcs_phy_t *orig_phy)
7807 {
7808 	pmcs_phy_t *local;
7809 
7810 	local = kmem_cache_alloc(orig_phy->pwp->phy_cache, KM_SLEEP);
7811 
7812 	/*
7813 	 * Go ahead and just copy everything...
7814 	 */
7815 	*local = *orig_phy;
7816 
7817 	/*
7818 	 * But the following must be set appropriately for this copy
7819 	 */
7820 	local->sibling = NULL;
7821 	local->children = NULL;
7822 	mutex_init(&local->phy_lock, NULL, MUTEX_DRIVER,
7823 	    DDI_INTR_PRI(orig_phy->pwp->intr_pri));
7824 
7825 	return (local);
7826 }
7827 
7828 int
7829 pmcs_check_acc_handle(ddi_acc_handle_t handle)
7830 {
7831 	ddi_fm_error_t de;
7832 
7833 	if (handle == NULL) {
7834 		return (DDI_FAILURE);
7835 	}
7836 	ddi_fm_acc_err_get(handle, &de, DDI_FME_VER0);
7837 	return (de.fme_status);
7838 }
7839 
7840 int
7841 pmcs_check_dma_handle(ddi_dma_handle_t handle)
7842 {
7843 	ddi_fm_error_t de;
7844 
7845 	if (handle == NULL) {
7846 		return (DDI_FAILURE);
7847 	}
7848 	ddi_fm_dma_err_get(handle, &de, DDI_FME_VER0);
7849 	return (de.fme_status);
7850 }
7851 
7852 
7853 void
7854 pmcs_fm_ereport(pmcs_hw_t *pwp, char *detail)
7855 {
7856 	uint64_t ena;
7857 	char buf[FM_MAX_CLASS];
7858 
7859 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
7860 	ena = fm_ena_generate(0, FM_ENA_FMT1);
7861 	if (DDI_FM_EREPORT_CAP(pwp->fm_capabilities)) {
7862 		ddi_fm_ereport_post(pwp->dip, buf, ena, DDI_NOSLEEP,
7863 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
7864 	}
7865 }
7866 
7867 int
7868 pmcs_check_acc_dma_handle(pmcs_hw_t *pwp)
7869 {
7870 	pmcs_chunk_t *pchunk;
7871 	int i;
7872 
7873 	/* check all acc & dma handles allocated in attach */
7874 	if ((pmcs_check_acc_handle(pwp->pci_acc_handle) != DDI_SUCCESS) ||
7875 	    (pmcs_check_acc_handle(pwp->msg_acc_handle) != DDI_SUCCESS) ||
7876 	    (pmcs_check_acc_handle(pwp->top_acc_handle) != DDI_SUCCESS) ||
7877 	    (pmcs_check_acc_handle(pwp->mpi_acc_handle) != DDI_SUCCESS) ||
7878 	    (pmcs_check_acc_handle(pwp->gsm_acc_handle) != DDI_SUCCESS)) {
7879 		goto check_failed;
7880 	}
7881 
7882 	for (i = 0; i < PMCS_NIQ; i++) {
7883 		if ((pmcs_check_dma_handle(
7884 		    pwp->iqp_handles[i]) != DDI_SUCCESS) ||
7885 		    (pmcs_check_acc_handle(
7886 		    pwp->iqp_acchdls[i]) != DDI_SUCCESS)) {
7887 			goto check_failed;
7888 		}
7889 	}
7890 
7891 	for (i = 0; i < PMCS_NOQ; i++) {
7892 		if ((pmcs_check_dma_handle(
7893 		    pwp->oqp_handles[i]) != DDI_SUCCESS) ||
7894 		    (pmcs_check_acc_handle(
7895 		    pwp->oqp_acchdls[i]) != DDI_SUCCESS)) {
7896 			goto check_failed;
7897 		}
7898 	}
7899 
7900 	if ((pmcs_check_dma_handle(pwp->cip_handles) != DDI_SUCCESS) ||
7901 	    (pmcs_check_acc_handle(pwp->cip_acchdls) != DDI_SUCCESS)) {
7902 		goto check_failed;
7903 	}
7904 
7905 	if (pwp->fwlog &&
7906 	    ((pmcs_check_dma_handle(pwp->fwlog_hndl) != DDI_SUCCESS) ||
7907 	    (pmcs_check_acc_handle(pwp->fwlog_acchdl) != DDI_SUCCESS))) {
7908 		goto check_failed;
7909 	}
7910 
7911 	if (pwp->regdump_hndl && pwp->regdump_acchdl &&
7912 	    ((pmcs_check_dma_handle(pwp->regdump_hndl) != DDI_SUCCESS) ||
7913 	    (pmcs_check_acc_handle(pwp->regdump_acchdl)
7914 	    != DDI_SUCCESS))) {
7915 		goto check_failed;
7916 	}
7917 
7918 
7919 	pchunk = pwp->dma_chunklist;
7920 	while (pchunk) {
7921 		if ((pmcs_check_acc_handle(pchunk->acc_handle)
7922 		    != DDI_SUCCESS) ||
7923 		    (pmcs_check_dma_handle(pchunk->dma_handle)
7924 		    != DDI_SUCCESS)) {
7925 			goto check_failed;
7926 		}
7927 		pchunk = pchunk->next;
7928 	}
7929 
7930 	return (0);
7931 
7932 check_failed:
7933 
7934 	return (1);
7935 }
7936 
7937 /*
7938  * pmcs_handle_dead_phys
7939  *
7940  * If the PHY has no outstanding work associated with it, remove it from
7941  * the dead PHY list and free it.
7942  *
7943  * If pwp->ds_err_recovering or pwp->configuring is set, don't run.
7944  * This keeps routines that need to submit work to the chip from having to
7945  * hold PHY locks to ensure that PHYs don't disappear while they do their work.
7946  */
7947 void
7948 pmcs_handle_dead_phys(pmcs_hw_t *pwp)
7949 {
7950 	pmcs_phy_t *phyp, *nphyp, *pphyp;
7951 
7952 	mutex_enter(&pwp->lock);
7953 	mutex_enter(&pwp->config_lock);
7954 
7955 	if (pwp->configuring | pwp->ds_err_recovering) {
7956 		mutex_exit(&pwp->config_lock);
7957 		mutex_exit(&pwp->lock);
7958 		return;
7959 	}
7960 
7961 	/*
7962 	 * Check every PHY in the dead PHY list
7963 	 */
7964 	mutex_enter(&pwp->dead_phylist_lock);
7965 	phyp = pwp->dead_phys;
7966 	pphyp = NULL;	/* Set previous PHY to NULL */
7967 
7968 	while (phyp != NULL) {
7969 		pmcs_lock_phy(phyp);
7970 		ASSERT(phyp->dead);
7971 
7972 		nphyp = phyp->dead_next;
7973 
7974 		/*
7975 		 * Check for outstanding work
7976 		 */
7977 		if (phyp->ref_count > 0) {
7978 			pmcs_unlock_phy(phyp);
7979 			pphyp = phyp;	/* This PHY becomes "previous" */
7980 		} else if (phyp->target) {
7981 			pmcs_unlock_phy(phyp);
7982 			pmcs_prt(pwp, PMCS_PRT_DEBUG1,
7983 			    "%s: Not freeing PHY 0x%p: target 0x%p is not free",
7984 			    __func__, (void *)phyp, (void *)phyp->target);
7985 			pphyp = phyp;
7986 		} else {
7987 			/*
7988 			 * No outstanding work or target references. Remove it
7989 			 * from the list and free it
7990 			 */
7991 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
7992 			    "%s: Freeing inactive dead PHY 0x%p @ %s "
7993 			    "target = 0x%p", __func__, (void *)phyp,
7994 			    phyp->path, (void *)phyp->target);
7995 			/*
7996 			 * If pphyp is NULL, then phyp was the head of the list,
7997 			 * so just reset the head to nphyp. Otherwise, the
7998 			 * previous PHY will now point to nphyp (the next PHY)
7999 			 */
8000 			if (pphyp == NULL) {
8001 				pwp->dead_phys = nphyp;
8002 			} else {
8003 				pphyp->dead_next = nphyp;
8004 			}
8005 			/*
8006 			 * If the target still points to this PHY, remove
8007 			 * that linkage now.
8008 			 */
8009 			if (phyp->target) {
8010 				mutex_enter(&phyp->target->statlock);
8011 				if (phyp->target->phy == phyp) {
8012 					phyp->target->phy = NULL;
8013 				}
8014 				mutex_exit(&phyp->target->statlock);
8015 			}
8016 			kmem_cache_free(pwp->phy_cache, phyp);
8017 		}
8018 
8019 		phyp = nphyp;
8020 	}
8021 
8022 	mutex_exit(&pwp->dead_phylist_lock);
8023 	mutex_exit(&pwp->config_lock);
8024 	mutex_exit(&pwp->lock);
8025 }
8026 
8027 void
8028 pmcs_inc_phy_ref_count(pmcs_phy_t *phyp)
8029 {
8030 	atomic_inc_32(&phyp->ref_count);
8031 }
8032 
8033 void
8034 pmcs_dec_phy_ref_count(pmcs_phy_t *phyp)
8035 {
8036 	ASSERT(phyp->ref_count != 0);
8037 	atomic_dec_32(&phyp->ref_count);
8038 }
8039 
8040 /*
8041  * pmcs_reap_dead_phy
8042  *
8043  * This function is called from pmcs_new_tport when we have a PHY
8044  * without a target pointer.  It's possible in that case that this PHY
8045  * may have a "brother" on the dead_phys list.  That is, it may be the same as
8046  * this one but with a different root PHY number (e.g. pp05 vs. pp04).  If
8047  * that's the case, update the dead PHY and this new PHY.  If that's not the
8048  * case, we should get a tran_tgt_init on this after it's reported to SCSA.
8049  *
8050  * Called with PHY locked.
8051  */
8052 static void
8053 pmcs_reap_dead_phy(pmcs_phy_t *phyp)
8054 {
8055 	pmcs_hw_t *pwp = phyp->pwp;
8056 	pmcs_phy_t *ctmp;
8057 
8058 	ASSERT(mutex_owned(&phyp->phy_lock));
8059 
8060 	/*
8061 	 * Check the dead PHYs list
8062 	 */
8063 	mutex_enter(&pwp->dead_phylist_lock);
8064 	ctmp = pwp->dead_phys;
8065 	while (ctmp) {
8066 		if ((ctmp->iport != phyp->iport) ||
8067 		    (memcmp((void *)&ctmp->sas_address[0],
8068 		    (void *)&phyp->sas_address[0], 8))) {
8069 			ctmp = ctmp->dead_next;
8070 			continue;
8071 		}
8072 
8073 		/*
8074 		 * Same SAS address on same iport.  Now check to see if
8075 		 * the PHY path is the same with the possible exception
8076 		 * of the root PHY number.
8077 		 * The "5" is the string length of "pp00."
8078 		 */
8079 		if ((strnlen(phyp->path, 5) >= 5) &&
8080 		    (strnlen(ctmp->path, 5) >= 5)) {
8081 			if (memcmp((void *)&phyp->path[5],
8082 			    (void *)&ctmp->path[5],
8083 			    strnlen(phyp->path, 32) - 5) == 0) {
8084 				break;
8085 			}
8086 		}
8087 
8088 		ctmp = ctmp->dead_next;
8089 	}
8090 	mutex_exit(&pwp->dead_phylist_lock);
8091 
8092 	/*
8093 	 * Found a match.  Remove the target linkage and drop the
8094 	 * ref count on the old PHY.  Then, increment the ref count
8095 	 * on the new PHY to compensate.
8096 	 */
8097 	if (ctmp) {
8098 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
8099 		    "%s: Found match in dead PHY list for new PHY %s",
8100 		    __func__, phyp->path);
8101 		if (ctmp->target) {
8102 			/*
8103 			 * If there is a pointer to the target in the dead
8104 			 * PHY, and that PHY's ref_count drops to 0, we can
8105 			 * clear the target linkage now.  If the PHY's
8106 			 * ref_count is > 1, then there may be multiple
8107 			 * LUNs still remaining, so leave the linkage.
8108 			 */
8109 			pmcs_inc_phy_ref_count(phyp);
8110 			pmcs_dec_phy_ref_count(ctmp);
8111 			phyp->target = ctmp->target;
8112 			/*
8113 			 * Update the target's linkage as well
8114 			 */
8115 			mutex_enter(&phyp->target->statlock);
8116 			phyp->target->phy = phyp;
8117 			phyp->target->dtype = phyp->dtype;
8118 			mutex_exit(&phyp->target->statlock);
8119 
8120 			if (ctmp->ref_count == 0) {
8121 				ctmp->target = NULL;
8122 			}
8123 		}
8124 	}
8125 }
8126 
8127 /*
8128  * Called with iport lock held
8129  */
8130 void
8131 pmcs_add_phy_to_iport(pmcs_iport_t *iport, pmcs_phy_t *phyp)
8132 {
8133 	ASSERT(mutex_owned(&iport->lock));
8134 	ASSERT(phyp);
8135 	ASSERT(!list_link_active(&phyp->list_node));
8136 	iport->nphy++;
8137 	pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS,
8138 	    &iport->nphy);
8139 	list_insert_tail(&iport->phys, phyp);
8140 	mutex_enter(&iport->refcnt_lock);
8141 	iport->refcnt++;
8142 	mutex_exit(&iport->refcnt_lock);
8143 }
8144 
8145 /*
8146  * Called with the iport lock held
8147  */
8148 void
8149 pmcs_remove_phy_from_iport(pmcs_iport_t *iport, pmcs_phy_t *phyp)
8150 {
8151 	pmcs_phy_t *pptr, *next_pptr;
8152 
8153 	ASSERT(mutex_owned(&iport->lock));
8154 
8155 	/*
8156 	 * If phyp is NULL, remove all PHYs from the iport
8157 	 */
8158 	if (phyp == NULL) {
8159 		for (pptr = list_head(&iport->phys); pptr != NULL;
8160 		    pptr = next_pptr) {
8161 			next_pptr = list_next(&iport->phys, pptr);
8162 			mutex_enter(&pptr->phy_lock);
8163 			pptr->iport = NULL;
8164 			mutex_exit(&pptr->phy_lock);
8165 			pmcs_rele_iport(iport);
8166 			list_remove(&iport->phys, pptr);
8167 		}
8168 		iport->nphy = 0;
8169 		return;
8170 	}
8171 
8172 	ASSERT(phyp);
8173 	ASSERT(iport->nphy > 0);
8174 	ASSERT(list_link_active(&phyp->list_node));
8175 	iport->nphy--;
8176 	pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS,
8177 	    &iport->nphy);
8178 	list_remove(&iport->phys, phyp);
8179 	pmcs_rele_iport(iport);
8180 }
8181 
8182 /*
8183  * This function checks to see if the target pointed to by phyp is still
8184  * correct.  This is done by comparing the target's unit address with the
8185  * SAS address in phyp.
8186  *
8187  * Called with PHY locked and target statlock held
8188  */
8189 static boolean_t
8190 pmcs_phy_target_match(pmcs_phy_t *phyp)
8191 {
8192 	uint64_t wwn;
8193 	char unit_address[PMCS_MAX_UA_SIZE];
8194 	boolean_t rval = B_FALSE;
8195 
8196 	ASSERT(phyp);
8197 	ASSERT(phyp->target);
8198 	ASSERT(mutex_owned(&phyp->phy_lock));
8199 	ASSERT(mutex_owned(&phyp->target->statlock));
8200 
8201 	wwn = pmcs_barray2wwn(phyp->sas_address);
8202 	(void) scsi_wwn_to_wwnstr(wwn, 1, unit_address);
8203 
8204 	if (memcmp((void *)unit_address, (void *)phyp->target->unit_address,
8205 	    strnlen(phyp->target->unit_address, PMCS_MAX_UA_SIZE)) == 0) {
8206 		rval = B_TRUE;
8207 	}
8208 
8209 	return (rval);
8210 }
8211 
8212 void
8213 pmcs_start_dev_state_recovery(pmcs_xscsi_t *xp, pmcs_phy_t *phyp)
8214 {
8215 	ASSERT(mutex_owned(&xp->statlock));
8216 	ASSERT(xp->pwp != NULL);
8217 
8218 	if (xp->recover_wait == 0) {
8219 		pmcs_prt(xp->pwp, PMCS_PRT_DEBUG_DEV_STATE,
8220 		    "%s: Start ds_recovery for tgt 0x%p/PHY 0x%p (%s)",
8221 		    __func__, (void *)xp, (void *)phyp, phyp->path);
8222 		xp->recover_wait = 1;
8223 
8224 		/*
8225 		 * Rather than waiting for the watchdog timer, we'll
8226 		 * kick it right now.
8227 		 */
8228 		SCHEDULE_WORK(xp->pwp, PMCS_WORK_DS_ERR_RECOVERY);
8229 		(void) ddi_taskq_dispatch(xp->pwp->tq, pmcs_worker, xp->pwp,
8230 		    DDI_NOSLEEP);
8231 	}
8232 }
8233 
8234 /*
8235  * Increment the phy ds error retry count.
8236  * If too many retries, mark phy dead and restart discovery;
8237  * otherwise schedule ds recovery.
8238  */
8239 static void
8240 pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp, pmcs_xscsi_t *tgt,
8241     pmcs_hw_t *pwp, const char *func_name, int line, char *reason_string)
8242 {
8243 	ASSERT(mutex_owned(&phyp->phy_lock));
8244 
8245 	phyp->ds_recovery_retries++;
8246 
8247 	if (phyp->ds_recovery_retries > PMCS_MAX_DS_RECOVERY_RETRIES) {
8248 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
8249 		    "%s: retry limit reached after %s to PHY %s failed",
8250 		    func_name, reason_string, phyp->path);
8251 		tgt->recover_wait = 0;
8252 		phyp->dead = 1;
8253 		PHY_CHANGED_AT_LOCATION(pwp, phyp, func_name, line);
8254 		RESTART_DISCOVERY(pwp);
8255 	} else {
8256 		SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
8257 	}
8258 }
8259