xref: /illumos-gate/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c (revision 145e0143b4896d03ce53b1af6787afa1a7e73959)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  *
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This file contains various support routines.
28  */
29 
30 #include <sys/scsi/adapters/pmcs/pmcs.h>
31 
32 /*
33  * Local static data
34  */
35 static int tgtmap_usec = MICROSEC;
36 
37 /*
38  * SAS Topology Configuration
39  */
40 static void pmcs_new_tport(pmcs_hw_t *, pmcs_phy_t *);
41 static void pmcs_configure_expander(pmcs_hw_t *, pmcs_phy_t *, pmcs_iport_t *);
42 
43 static boolean_t pmcs_check_expanders(pmcs_hw_t *, pmcs_phy_t *);
44 static void pmcs_check_expander(pmcs_hw_t *, pmcs_phy_t *);
45 static void pmcs_clear_expander(pmcs_hw_t *, pmcs_phy_t *, int);
46 
47 static int pmcs_expander_get_nphy(pmcs_hw_t *, pmcs_phy_t *);
48 static int pmcs_expander_content_discover(pmcs_hw_t *, pmcs_phy_t *,
49     pmcs_phy_t *);
50 
51 static int pmcs_smp_function_result(pmcs_hw_t *, smp_response_frame_t *);
52 static boolean_t pmcs_validate_devid(pmcs_phy_t *, pmcs_phy_t *, uint32_t);
53 static void pmcs_clear_phys(pmcs_hw_t *, pmcs_phy_t *);
54 static int pmcs_configure_new_devices(pmcs_hw_t *, pmcs_phy_t *);
55 static boolean_t pmcs_report_observations(pmcs_hw_t *);
56 static boolean_t pmcs_report_iport_observations(pmcs_hw_t *, pmcs_iport_t *,
57     pmcs_phy_t *);
58 static pmcs_phy_t *pmcs_find_phy_needing_work(pmcs_hw_t *, pmcs_phy_t *);
59 static int pmcs_kill_devices(pmcs_hw_t *, pmcs_phy_t *);
60 static void pmcs_lock_phy_impl(pmcs_phy_t *, int);
61 static void pmcs_unlock_phy_impl(pmcs_phy_t *, int);
62 static pmcs_phy_t *pmcs_clone_phy(pmcs_phy_t *);
63 static boolean_t pmcs_configure_phy(pmcs_hw_t *, pmcs_phy_t *);
64 static void pmcs_reap_dead_phy(pmcs_phy_t *);
65 static pmcs_iport_t *pmcs_get_iport_by_ua(pmcs_hw_t *, char *);
66 static boolean_t pmcs_phy_target_match(pmcs_phy_t *);
67 
68 /*
69  * Often used strings
70  */
71 const char pmcs_nowrk[] = "%s: unable to get work structure";
72 const char pmcs_nomsg[] = "%s: unable to get Inbound Message entry";
73 const char pmcs_timeo[] = "!%s: command timed out";
74 
75 extern const ddi_dma_attr_t pmcs_dattr;
76 
77 /*
78  * Some Initial setup steps.
79  */
80 
81 int
82 pmcs_setup(pmcs_hw_t *pwp)
83 {
84 	uint32_t barval = pwp->mpibar;
85 	uint32_t i, scratch, regbar, regoff, barbar, baroff;
86 	uint32_t new_ioq_depth, ferr = 0;
87 
88 	/*
89 	 * Check current state. If we're not at READY state,
90 	 * we can't go further.
91 	 */
92 	scratch = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
93 	if ((scratch & PMCS_MSGU_AAP_STATE_MASK) == PMCS_MSGU_AAP_STATE_ERROR) {
94 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
95 		    "%s: AAP Error State (0x%x)",
96 		    __func__, pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
97 		    PMCS_MSGU_AAP_ERROR_MASK);
98 		pmcs_fm_ereport(pwp, DDI_FM_DEVICE_INVAL_STATE);
99 		ddi_fm_service_impact(pwp->dip, DDI_SERVICE_LOST);
100 		return (-1);
101 	}
102 	if ((scratch & PMCS_MSGU_AAP_STATE_MASK) != PMCS_MSGU_AAP_STATE_READY) {
103 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
104 		    "%s: AAP unit not ready (state 0x%x)",
105 		    __func__, scratch & PMCS_MSGU_AAP_STATE_MASK);
106 		pmcs_fm_ereport(pwp, DDI_FM_DEVICE_INVAL_STATE);
107 		ddi_fm_service_impact(pwp->dip, DDI_SERVICE_LOST);
108 		return (-1);
109 	}
110 
111 	/*
112 	 * Read the offset from the Message Unit scratchpad 0 register.
113 	 * This allows us to read the MPI Configuration table.
114 	 *
115 	 * Check its signature for validity.
116 	 */
117 	baroff = barval;
118 	barbar = barval >> PMCS_MSGU_MPI_BAR_SHIFT;
119 	baroff &= PMCS_MSGU_MPI_OFFSET_MASK;
120 
121 	regoff = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH0);
122 	regbar = regoff >> PMCS_MSGU_MPI_BAR_SHIFT;
123 	regoff &= PMCS_MSGU_MPI_OFFSET_MASK;
124 
125 	if (regoff > baroff) {
126 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
127 		    "%s: bad MPI Table Length (register offset=0x%08x, "
128 		    "passed offset=0x%08x)", __func__, regoff, baroff);
129 		return (-1);
130 	}
131 	if (regbar != barbar) {
132 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
133 		    "%s: bad MPI BAR (register BAROFF=0x%08x, "
134 		    "passed BAROFF=0x%08x)", __func__, regbar, barbar);
135 		return (-1);
136 	}
137 	pwp->mpi_offset = regoff;
138 	if (pmcs_rd_mpi_tbl(pwp, PMCS_MPI_AS) != PMCS_SIGNATURE) {
139 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
140 		    "%s: Bad MPI Configuration Table Signature 0x%x", __func__,
141 		    pmcs_rd_mpi_tbl(pwp, PMCS_MPI_AS));
142 		return (-1);
143 	}
144 
145 	if (pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IR) != PMCS_MPI_REVISION1) {
146 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
147 		    "%s: Bad MPI Configuration Revision 0x%x", __func__,
148 		    pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IR));
149 		return (-1);
150 	}
151 
152 	/*
153 	 * Generate offsets for the General System, Inbound Queue Configuration
154 	 * and Outbound Queue configuration tables. This way the macros to
155 	 * access those tables will work correctly.
156 	 */
157 	pwp->mpi_gst_offset =
158 	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_GSTO);
159 	pwp->mpi_iqc_offset =
160 	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IQCTO);
161 	pwp->mpi_oqc_offset =
162 	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_OQCTO);
163 
164 	pwp->fw = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_FW);
165 
166 	pwp->max_cmd = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_MOIO);
167 	pwp->max_dev = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO0) >> 16;
168 
169 	pwp->max_iq = PMCS_MNIQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
170 	pwp->max_oq = PMCS_MNOQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
171 	pwp->nphy = PMCS_NPHY(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
172 	if (pwp->max_iq <= PMCS_NIQ) {
173 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
174 		    "%s: not enough Inbound Queues supported "
175 		    "(need %d, max_oq=%d)", __func__, pwp->max_iq, PMCS_NIQ);
176 		return (-1);
177 	}
178 	if (pwp->max_oq <= PMCS_NOQ) {
179 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
180 		    "%s: not enough Outbound Queues supported "
181 		    "(need %d, max_oq=%d)", __func__, pwp->max_oq, PMCS_NOQ);
182 		return (-1);
183 	}
184 	if (pwp->nphy == 0) {
185 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
186 		    "%s: zero phys reported", __func__);
187 		return (-1);
188 	}
189 	if (PMCS_HPIQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1))) {
190 		pwp->hipri_queue = (1 << PMCS_IQ_OTHER);
191 	}
192 
193 
194 	for (i = 0; i < pwp->nphy; i++) {
195 		PMCS_MPI_EVQSET(pwp, PMCS_OQ_EVENTS, i);
196 		PMCS_MPI_NCQSET(pwp, PMCS_OQ_EVENTS, i);
197 	}
198 
199 	pmcs_wr_mpi_tbl(pwp, PMCS_MPI_INFO2,
200 	    (PMCS_OQ_EVENTS << GENERAL_EVENT_OQ_SHIFT) |
201 	    (PMCS_OQ_EVENTS << DEVICE_HANDLE_REMOVED_SHIFT));
202 
203 	/*
204 	 * Verify that ioq_depth is valid (> 0 and not so high that it
205 	 * would cause us to overrun the chip with commands).
206 	 */
207 	if (pwp->ioq_depth == 0) {
208 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
209 		    "%s: I/O queue depth set to 0. Setting to %d",
210 		    __func__, PMCS_NQENTRY);
211 		pwp->ioq_depth = PMCS_NQENTRY;
212 	}
213 
214 	if (pwp->ioq_depth < PMCS_MIN_NQENTRY) {
215 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
216 		    "%s: I/O queue depth set too low (%d). Setting to %d",
217 		    __func__, pwp->ioq_depth, PMCS_MIN_NQENTRY);
218 		pwp->ioq_depth = PMCS_MIN_NQENTRY;
219 	}
220 
221 	if (pwp->ioq_depth > (pwp->max_cmd / (PMCS_IO_IQ_MASK + 1))) {
222 		new_ioq_depth = pwp->max_cmd / (PMCS_IO_IQ_MASK + 1);
223 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
224 		    "%s: I/O queue depth set too high (%d). Setting to %d",
225 		    __func__, pwp->ioq_depth, new_ioq_depth);
226 		pwp->ioq_depth = new_ioq_depth;
227 	}
228 
229 	/*
230 	 * Allocate consistent memory for OQs and IQs.
231 	 */
232 	pwp->iqp_dma_attr = pwp->oqp_dma_attr = pmcs_dattr;
233 	pwp->iqp_dma_attr.dma_attr_align =
234 	    pwp->oqp_dma_attr.dma_attr_align = PMCS_QENTRY_SIZE;
235 
236 	/*
237 	 * The Rev C chip has the ability to do PIO to or from consistent
238 	 * memory anywhere in a 64 bit address space, but the firmware is
239 	 * not presently set up to do so.
240 	 */
241 	pwp->iqp_dma_attr.dma_attr_addr_hi =
242 	    pwp->oqp_dma_attr.dma_attr_addr_hi = 0x000000FFFFFFFFFFull;
243 
244 	for (i = 0; i < PMCS_NIQ; i++) {
245 		if (pmcs_dma_setup(pwp, &pwp->iqp_dma_attr,
246 		    &pwp->iqp_acchdls[i],
247 		    &pwp->iqp_handles[i], PMCS_QENTRY_SIZE * pwp->ioq_depth,
248 		    (caddr_t *)&pwp->iqp[i], &pwp->iqaddr[i]) == B_FALSE) {
249 			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
250 			    "Failed to setup DMA for iqp[%d]", i);
251 			return (-1);
252 		}
253 		bzero(pwp->iqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
254 	}
255 
256 	for (i = 0; i < PMCS_NOQ; i++) {
257 		if (pmcs_dma_setup(pwp, &pwp->oqp_dma_attr,
258 		    &pwp->oqp_acchdls[i],
259 		    &pwp->oqp_handles[i], PMCS_QENTRY_SIZE * pwp->ioq_depth,
260 		    (caddr_t *)&pwp->oqp[i], &pwp->oqaddr[i]) == B_FALSE) {
261 			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
262 			    "Failed to setup DMA for oqp[%d]", i);
263 			return (-1);
264 		}
265 		bzero(pwp->oqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
266 	}
267 
268 	/*
269 	 * Install the IQ and OQ addresses (and null out the rest).
270 	 */
271 	for (i = 0; i < pwp->max_iq; i++) {
272 		pwp->iqpi_offset[i] = pmcs_rd_iqc_tbl(pwp, PMCS_IQPIOFFX(i));
273 		if (i < PMCS_NIQ) {
274 			if (i != PMCS_IQ_OTHER) {
275 				pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i),
276 				    pwp->ioq_depth | (PMCS_QENTRY_SIZE << 16));
277 			} else {
278 				pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i),
279 				    (1 << 30) | pwp->ioq_depth |
280 				    (PMCS_QENTRY_SIZE << 16));
281 			}
282 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i),
283 			    DWORD1(pwp->iqaddr[i]));
284 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i),
285 			    DWORD0(pwp->iqaddr[i]));
286 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i),
287 			    DWORD1(pwp->ciaddr+IQ_OFFSET(i)));
288 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i),
289 			    DWORD0(pwp->ciaddr+IQ_OFFSET(i)));
290 		} else {
291 			pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i), 0);
292 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i), 0);
293 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i), 0);
294 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i), 0);
295 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i), 0);
296 		}
297 	}
298 
299 	for (i = 0; i < pwp->max_oq; i++) {
300 		pwp->oqci_offset[i] = pmcs_rd_oqc_tbl(pwp, PMCS_OQCIOFFX(i));
301 		if (i < PMCS_NOQ) {
302 			pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), pwp->ioq_depth |
303 			    (PMCS_QENTRY_SIZE << 16) | OQIEX);
304 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i),
305 			    DWORD1(pwp->oqaddr[i]));
306 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i),
307 			    DWORD0(pwp->oqaddr[i]));
308 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i),
309 			    DWORD1(pwp->ciaddr+OQ_OFFSET(i)));
310 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i),
311 			    DWORD0(pwp->ciaddr+OQ_OFFSET(i)));
312 			pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i),
313 			    pwp->oqvec[i] << 24);
314 			pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
315 		} else {
316 			pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), 0);
317 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i), 0);
318 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i), 0);
319 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i), 0);
320 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i), 0);
321 			pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i), 0);
322 			pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
323 		}
324 	}
325 
326 	/*
327 	 * Set up logging, if defined.
328 	 */
329 	if (pwp->fwlog) {
330 		uint64_t logdma = pwp->fwaddr;
331 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBAH, DWORD1(logdma));
332 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBAL, DWORD0(logdma));
333 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBS, PMCS_FWLOG_SIZE >> 1);
334 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELSEV, pwp->fwlog);
335 		logdma += (PMCS_FWLOG_SIZE >> 1);
336 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBAH, DWORD1(logdma));
337 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBAL, DWORD0(logdma));
338 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBS, PMCS_FWLOG_SIZE >> 1);
339 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELSEV, pwp->fwlog);
340 	}
341 
342 	/*
343 	 * Interrupt vectors, outbound queues, and odb_auto_clear
344 	 *
345 	 * MSI/MSI-X:
346 	 * If we got 4 interrupt vectors, we'll assign one to each outbound
347 	 * queue as well as the fatal interrupt, and auto clear can be set
348 	 * for each.
349 	 *
350 	 * If we only got 2 vectors, one will be used for I/O completions
351 	 * and the other for the other two vectors.  In this case, auto_
352 	 * clear can only be set for I/Os, which is fine.  The fatal
353 	 * interrupt will be mapped to the PMCS_FATAL_INTERRUPT bit, which
354 	 * is not an interrupt vector.
355 	 *
356 	 * MSI/MSI-X/INT-X:
357 	 * If we only got 1 interrupt vector, auto_clear must be set to 0,
358 	 * and again the fatal interrupt will be mapped to the
359 	 * PMCS_FATAL_INTERRUPT bit (again, not an interrupt vector).
360 	 */
361 
362 	switch (pwp->int_type) {
363 	case PMCS_INT_MSIX:
364 	case PMCS_INT_MSI:
365 		switch (pwp->intr_cnt) {
366 		case 1:
367 			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
368 			    (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
369 			pwp->odb_auto_clear = 0;
370 			break;
371 		case 2:
372 			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
373 			    (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
374 			pwp->odb_auto_clear = (1 << PMCS_FATAL_INTERRUPT) |
375 			    (1 << PMCS_MSIX_IODONE);
376 			break;
377 		case 4:
378 			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
379 			    (PMCS_MSIX_FATAL << PMCS_FERIV_SHIFT));
380 			pwp->odb_auto_clear = (1 << PMCS_MSIX_FATAL) |
381 			    (1 << PMCS_MSIX_GENERAL) | (1 << PMCS_MSIX_IODONE) |
382 			    (1 << PMCS_MSIX_EVENTS);
383 			break;
384 		}
385 		break;
386 
387 	case PMCS_INT_FIXED:
388 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR,
389 		    PMCS_FERRIE | (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
390 		pwp->odb_auto_clear = 0;
391 		break;
392 	}
393 
394 	/*
395 	 * Enable Interrupt Reassertion
396 	 * Default Delay 1000us
397 	 */
398 	ferr = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_FERR);
399 	if ((ferr & PMCS_MPI_IRAE) == 0) {
400 		ferr &= ~(PMCS_MPI_IRAU | PMCS_MPI_IRAD_MASK);
401 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, ferr | PMCS_MPI_IRAE);
402 	}
403 
404 	pmcs_wr_topunit(pwp, PMCS_OBDB_AUTO_CLR, pwp->odb_auto_clear);
405 	pwp->mpi_table_setup = 1;
406 	return (0);
407 }
408 
409 /*
410  * Start the Message Passing protocol with the PMC chip.
411  */
412 int
413 pmcs_start_mpi(pmcs_hw_t *pwp)
414 {
415 	int i;
416 
417 	pmcs_wr_msgunit(pwp, PMCS_MSGU_IBDB, PMCS_MSGU_IBDB_MPIINI);
418 	for (i = 0; i < 1000; i++) {
419 		if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) &
420 		    PMCS_MSGU_IBDB_MPIINI) == 0) {
421 			break;
422 		}
423 		drv_usecwait(1000);
424 	}
425 	if (pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) & PMCS_MSGU_IBDB_MPIINI) {
426 		return (-1);
427 	}
428 	drv_usecwait(500000);
429 
430 	/*
431 	 * Check to make sure we got to INIT state.
432 	 */
433 	if (PMCS_MPI_S(pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE)) !=
434 	    PMCS_MPI_STATE_INIT) {
435 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
436 		    "%s: MPI launch failed (GST 0x%x DBCLR 0x%x)", __func__,
437 		    pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE),
438 		    pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB_CLEAR));
439 		return (-1);
440 	}
441 	return (0);
442 }
443 
444 /*
445  * Stop the Message Passing protocol with the PMC chip.
446  */
447 int
448 pmcs_stop_mpi(pmcs_hw_t *pwp)
449 {
450 	int i;
451 
452 	for (i = 0; i < pwp->max_iq; i++) {
453 		pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i), 0);
454 		pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i), 0);
455 		pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i), 0);
456 		pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i), 0);
457 		pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i), 0);
458 	}
459 	for (i = 0; i < pwp->max_oq; i++) {
460 		pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), 0);
461 		pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i), 0);
462 		pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i), 0);
463 		pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i), 0);
464 		pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i), 0);
465 		pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i), 0);
466 		pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
467 	}
468 	pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, 0);
469 	pmcs_wr_msgunit(pwp, PMCS_MSGU_IBDB, PMCS_MSGU_IBDB_MPICTU);
470 	for (i = 0; i < 2000; i++) {
471 		if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) &
472 		    PMCS_MSGU_IBDB_MPICTU) == 0) {
473 			break;
474 		}
475 		drv_usecwait(1000);
476 	}
477 	if (pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) & PMCS_MSGU_IBDB_MPICTU) {
478 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
479 		    "%s: MPI stop failed", __func__);
480 		return (-1);
481 	}
482 	return (0);
483 }
484 
485 /*
486  * Do a sequence of ECHO messages to test for MPI functionality,
487  * all inbound and outbound queue functionality and interrupts.
488  */
489 int
490 pmcs_echo_test(pmcs_hw_t *pwp)
491 {
492 	echo_test_t fred;
493 	struct pmcwork *pwrk;
494 	uint32_t *msg, count;
495 	int iqe = 0, iqo = 0, result, rval = 0;
496 	int iterations;
497 	hrtime_t echo_start, echo_end, echo_total;
498 
499 	ASSERT(pwp->max_cmd > 0);
500 
501 	/*
502 	 * We want iterations to be max_cmd * 3 to ensure that we run the
503 	 * echo test enough times to iterate through every inbound queue
504 	 * at least twice.
505 	 */
506 	iterations = pwp->max_cmd * 3;
507 
508 	echo_total = 0;
509 	count = 0;
510 
511 	while (count < iterations) {
512 		pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
513 		if (pwrk == NULL) {
514 			pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL,
515 			    pmcs_nowrk, __func__);
516 			rval = -1;
517 			break;
518 		}
519 
520 		mutex_enter(&pwp->iqp_lock[iqe]);
521 		msg = GET_IQ_ENTRY(pwp, iqe);
522 		if (msg == NULL) {
523 			mutex_exit(&pwp->iqp_lock[iqe]);
524 			pmcs_pwork(pwp, pwrk);
525 			pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL,
526 			    pmcs_nomsg, __func__);
527 			rval = -1;
528 			break;
529 		}
530 
531 		bzero(msg, PMCS_QENTRY_SIZE);
532 
533 		if (iqe == PMCS_IQ_OTHER) {
534 			/* This is on the high priority queue */
535 			msg[0] = LE_32(PMCS_HIPRI(pwp, iqo, PMCIN_ECHO));
536 		} else {
537 			msg[0] = LE_32(PMCS_IOMB_IN_SAS(iqo, PMCIN_ECHO));
538 		}
539 		msg[1] = LE_32(pwrk->htag);
540 		fred.signature = 0xdeadbeef;
541 		fred.count = count;
542 		fred.ptr = &count;
543 		(void) memcpy(&msg[2], &fred, sizeof (fred));
544 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
545 
546 		INC_IQ_ENTRY(pwp, iqe);
547 
548 		echo_start = gethrtime();
549 		DTRACE_PROBE2(pmcs__echo__test__wait__start,
550 		    hrtime_t, echo_start, uint32_t, pwrk->htag);
551 
552 		if (++iqe == PMCS_NIQ) {
553 			iqe = 0;
554 		}
555 		if (++iqo == PMCS_NOQ) {
556 			iqo = 0;
557 		}
558 
559 		WAIT_FOR(pwrk, 250, result);
560 
561 		echo_end = gethrtime();
562 		DTRACE_PROBE2(pmcs__echo__test__wait__end,
563 		    hrtime_t, echo_end, int, result);
564 
565 		echo_total += (echo_end - echo_start);
566 
567 		pmcs_pwork(pwp, pwrk);
568 		if (result) {
569 			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
570 			    "%s: command timed out on echo test #%d",
571 			    __func__, count);
572 			rval = -1;
573 			break;
574 		}
575 	}
576 
577 	/*
578 	 * The intr_threshold is adjusted by PMCS_INTR_THRESHOLD in order to
579 	 * remove the overhead of things like the delay in getting signaled
580 	 * for completion.
581 	 */
582 	if (echo_total != 0) {
583 		pwp->io_intr_coal.intr_latency =
584 		    (echo_total / iterations) / 2;
585 		pwp->io_intr_coal.intr_threshold =
586 		    PMCS_INTR_THRESHOLD(PMCS_QUANTUM_TIME_USECS * 1000 /
587 		    pwp->io_intr_coal.intr_latency);
588 	}
589 
590 	return (rval);
591 }
592 
593 /*
594  * Start the (real) phys
595  */
596 int
597 pmcs_start_phy(pmcs_hw_t *pwp, int phynum, int linkmode, int speed)
598 {
599 	int result;
600 	uint32_t *msg;
601 	struct pmcwork *pwrk;
602 	pmcs_phy_t *pptr;
603 	sas_identify_af_t sap;
604 
605 	mutex_enter(&pwp->lock);
606 	pptr = pwp->root_phys + phynum;
607 	if (pptr == NULL) {
608 		mutex_exit(&pwp->lock);
609 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
610 		    "%s: cannot find port %d", __func__, phynum);
611 		return (0);
612 	}
613 
614 	pmcs_lock_phy(pptr);
615 	mutex_exit(&pwp->lock);
616 
617 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
618 	if (pwrk == NULL) {
619 		pmcs_unlock_phy(pptr);
620 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nowrk, __func__);
621 		return (-1);
622 	}
623 
624 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
625 	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
626 
627 	if (msg == NULL) {
628 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
629 		pmcs_unlock_phy(pptr);
630 		pmcs_pwork(pwp, pwrk);
631 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nomsg, __func__);
632 		return (-1);
633 	}
634 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_PHY_START));
635 	msg[1] = LE_32(pwrk->htag);
636 	msg[2] = LE_32(linkmode | speed | phynum);
637 	bzero(&sap, sizeof (sap));
638 	sap.device_type = SAS_IF_DTYPE_ENDPOINT;
639 	sap.ssp_ini_port = 1;
640 
641 	if (pwp->separate_ports) {
642 		pmcs_wwn2barray(pwp->sas_wwns[phynum], sap.sas_address);
643 	} else {
644 		pmcs_wwn2barray(pwp->sas_wwns[0], sap.sas_address);
645 	}
646 
647 	ASSERT(phynum < SAS2_PHYNUM_MAX);
648 	sap.phy_identifier = phynum & SAS2_PHYNUM_MASK;
649 	(void) memcpy(&msg[3], &sap, sizeof (sas_identify_af_t));
650 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
651 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
652 
653 	pptr->state.prog_min_rate = (lowbit((ulong_t)speed) - 1);
654 	pptr->state.prog_max_rate = (highbit((ulong_t)speed) - 1);
655 	pptr->state.hw_min_rate = PMCS_HW_MIN_LINK_RATE;
656 	pptr->state.hw_max_rate = PMCS_HW_MAX_LINK_RATE;
657 
658 	pmcs_unlock_phy(pptr);
659 	WAIT_FOR(pwrk, 1000, result);
660 	pmcs_pwork(pwp, pwrk);
661 
662 	if (result) {
663 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_timeo, __func__);
664 	} else {
665 		mutex_enter(&pwp->lock);
666 		pwp->phys_started |= (1 << phynum);
667 		mutex_exit(&pwp->lock);
668 	}
669 
670 	return (0);
671 }
672 
673 int
674 pmcs_start_phys(pmcs_hw_t *pwp)
675 {
676 	int i;
677 
678 	for (i = 0; i < pwp->nphy; i++) {
679 		if ((pwp->phyid_block_mask & (1 << i)) == 0) {
680 			if (pmcs_start_phy(pwp, i,
681 			    (pwp->phymode << PHY_MODE_SHIFT),
682 			    pwp->physpeed << PHY_LINK_SHIFT)) {
683 				return (-1);
684 			}
685 			if (pmcs_clear_diag_counters(pwp, i)) {
686 				pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
687 				    "%s: failed to reset counters on PHY (%d)",
688 				    __func__, i);
689 			}
690 		}
691 	}
692 	return (0);
693 }
694 
695 /*
696  * Called with PHY locked
697  */
698 int
699 pmcs_reset_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint8_t type)
700 {
701 	uint32_t *msg;
702 	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
703 	const char *mbar;
704 	uint32_t amt;
705 	uint32_t pdevid;
706 	uint32_t stsoff;
707 	uint32_t status;
708 	int result, level, phynum;
709 	struct pmcwork *pwrk;
710 	uint32_t htag;
711 
712 	ASSERT(mutex_owned(&pptr->phy_lock));
713 
714 	bzero(iomb, PMCS_QENTRY_SIZE);
715 	phynum = pptr->phynum;
716 	level = pptr->level;
717 	if (level > 0) {
718 		pdevid = pptr->parent->device_id;
719 	}
720 
721 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
722 
723 	if (pwrk == NULL) {
724 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nowrk, __func__);
725 		return (ENOMEM);
726 	}
727 
728 	pwrk->arg = iomb;
729 
730 	/*
731 	 * If level > 0, we need to issue an SMP_REQUEST with a PHY_CONTROL
732 	 * function to do either a link reset or hard reset.  If level == 0,
733 	 * then we do a LOCAL_PHY_CONTROL IOMB to do link/hard reset to the
734 	 * root (local) PHY
735 	 */
736 	if (level) {
737 		stsoff = 2;
738 		iomb[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
739 		    PMCIN_SMP_REQUEST));
740 		iomb[1] = LE_32(pwrk->htag);
741 		iomb[2] = LE_32(pdevid);
742 		iomb[3] = LE_32(40 << SMP_REQUEST_LENGTH_SHIFT);
743 		/*
744 		 * Send SMP PHY CONTROL/HARD or LINK RESET
745 		 */
746 		iomb[4] = BE_32(0x40910000);
747 		iomb[5] = 0;
748 
749 		if (type == PMCS_PHYOP_HARD_RESET) {
750 			mbar = "SMP PHY CONTROL/HARD RESET";
751 			iomb[6] = BE_32((phynum << 24) |
752 			    (PMCS_PHYOP_HARD_RESET << 16));
753 		} else {
754 			mbar = "SMP PHY CONTROL/LINK RESET";
755 			iomb[6] = BE_32((phynum << 24) |
756 			    (PMCS_PHYOP_LINK_RESET << 16));
757 		}
758 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
759 		    "%s: sending %s to %s for phy 0x%x",
760 		    __func__, mbar, pptr->parent->path, pptr->phynum);
761 		amt = 7;
762 	} else {
763 		/*
764 		 * Unlike most other Outbound messages, status for
765 		 * a local phy operation is in DWORD 3.
766 		 */
767 		stsoff = 3;
768 		iomb[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
769 		    PMCIN_LOCAL_PHY_CONTROL));
770 		iomb[1] = LE_32(pwrk->htag);
771 		if (type == PMCS_PHYOP_LINK_RESET) {
772 			mbar = "LOCAL PHY LINK RESET";
773 			iomb[2] = LE_32((PMCS_PHYOP_LINK_RESET << 8) | phynum);
774 		} else {
775 			mbar = "LOCAL PHY HARD RESET";
776 			iomb[2] = LE_32((PMCS_PHYOP_HARD_RESET << 8) | phynum);
777 		}
778 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
779 		    "%s: sending %s to %s", __func__, mbar, pptr->path);
780 		amt = 3;
781 	}
782 
783 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
784 	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
785 	if (msg == NULL) {
786 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
787 		pmcs_pwork(pwp, pwrk);
788 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nomsg, __func__);
789 		return (ENOMEM);
790 	}
791 	COPY_MESSAGE(msg, iomb, amt);
792 	htag = pwrk->htag;
793 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
794 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
795 
796 	pmcs_unlock_phy(pptr);
797 	WAIT_FOR(pwrk, 1000, result);
798 	pmcs_pwork(pwp, pwrk);
799 	pmcs_lock_phy(pptr);
800 
801 	if (result) {
802 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_timeo, __func__);
803 
804 		if (pmcs_abort(pwp, pptr, htag, 0, 0)) {
805 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
806 			    "%s: Unable to issue SMP abort for htag 0x%08x",
807 			    __func__, htag);
808 		} else {
809 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
810 			    "%s: Issuing SMP ABORT for htag 0x%08x",
811 			    __func__, htag);
812 		}
813 		return (EIO);
814 	}
815 	status = LE_32(iomb[stsoff]);
816 
817 	if (status != PMCOUT_STATUS_OK) {
818 		char buf[32];
819 		const char *es =  pmcs_status_str(status);
820 		if (es == NULL) {
821 			(void) snprintf(buf, sizeof (buf), "Status 0x%x",
822 			    status);
823 			es = buf;
824 		}
825 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
826 		    "%s: %s action returned %s for %s", __func__, mbar, es,
827 		    pptr->path);
828 		return (EIO);
829 	}
830 
831 	return (0);
832 }
833 
834 /*
835  * Stop the (real) phys.  No PHY or softstate locks are required as this only
836  * happens during detach.
837  */
838 void
839 pmcs_stop_phy(pmcs_hw_t *pwp, int phynum)
840 {
841 	int result;
842 	pmcs_phy_t *pptr;
843 	uint32_t *msg;
844 	struct pmcwork *pwrk;
845 
846 	pptr =  pwp->root_phys + phynum;
847 	if (pptr == NULL) {
848 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
849 		    "%s: unable to find port %d", __func__, phynum);
850 		return;
851 	}
852 
853 	if (pwp->phys_started & (1 << phynum)) {
854 		pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
855 
856 		if (pwrk == NULL) {
857 			pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL,
858 			    pmcs_nowrk, __func__);
859 			return;
860 		}
861 
862 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
863 		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
864 
865 		if (msg == NULL) {
866 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
867 			pmcs_pwork(pwp, pwrk);
868 			pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL,
869 			    pmcs_nomsg, __func__);
870 			return;
871 		}
872 
873 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_PHY_STOP));
874 		msg[1] = LE_32(pwrk->htag);
875 		msg[2] = LE_32(phynum);
876 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
877 		/*
878 		 * Make this unconfigured now.
879 		 */
880 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
881 		WAIT_FOR(pwrk, 1000, result);
882 
883 		pmcs_pwork(pwp, pwrk);
884 		if (result) {
885 			pmcs_prt(pwp, PMCS_PRT_ERR,
886 			    pptr, NULL, pmcs_timeo, __func__);
887 		}
888 
889 		pwp->phys_started &= ~(1 << phynum);
890 	}
891 
892 	pptr->configured = 0;
893 }
894 
895 /*
896  * No locks should be required as this is only called during detach
897  */
898 void
899 pmcs_stop_phys(pmcs_hw_t *pwp)
900 {
901 	int i;
902 	for (i = 0; i < pwp->nphy; i++) {
903 		if ((pwp->phyid_block_mask & (1 << i)) == 0) {
904 			pmcs_stop_phy(pwp, i);
905 		}
906 	}
907 }
908 
909 /*
910  * Run SAS_DIAG_EXECUTE with cmd and cmd_desc passed.
911  * 	ERR_CNT_RESET: return status of cmd
912  *	DIAG_REPORT_GET: return value of the counter
913  */
914 int
915 pmcs_sas_diag_execute(pmcs_hw_t *pwp, uint32_t cmd, uint32_t cmd_desc,
916     uint8_t phynum)
917 {
918 	uint32_t htag, *ptr, status, msg[PMCS_MSG_SIZE << 1];
919 	int result;
920 	struct pmcwork *pwrk;
921 
922 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
923 	if (pwrk == NULL) {
924 		pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL, pmcs_nowrk, __func__);
925 		return (DDI_FAILURE);
926 	}
927 	pwrk->arg = msg;
928 	htag = pwrk->htag;
929 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_SAS_DIAG_EXECUTE));
930 	msg[1] = LE_32(htag);
931 	msg[2] = LE_32((cmd << PMCS_DIAG_CMD_SHIFT) |
932 	    (cmd_desc << PMCS_DIAG_CMD_DESC_SHIFT) | phynum);
933 
934 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
935 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
936 	if (ptr == NULL) {
937 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
938 		pmcs_pwork(pwp, pwrk);
939 		pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL, pmcs_nomsg, __func__);
940 		return (DDI_FAILURE);
941 	}
942 	COPY_MESSAGE(ptr, msg, 3);
943 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
944 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
945 
946 	WAIT_FOR(pwrk, 1000, result);
947 
948 	pmcs_pwork(pwp, pwrk);
949 
950 	if (result) {
951 		pmcs_timed_out(pwp, htag, __func__);
952 		return (DDI_FAILURE);
953 	}
954 
955 	status = LE_32(msg[3]);
956 
957 	/* Return for counter reset */
958 	if (cmd == PMCS_ERR_CNT_RESET)
959 		return (status);
960 
961 	/* Return for counter value */
962 	if (status) {
963 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
964 		    "%s: failed, status (0x%x)", __func__, status);
965 		return (DDI_FAILURE);
966 	}
967 	return (LE_32(msg[4]));
968 }
969 
970 /* Get the current value of the counter for desc on phynum and return it. */
971 int
972 pmcs_get_diag_report(pmcs_hw_t *pwp, uint32_t desc, uint8_t phynum)
973 {
974 	return (pmcs_sas_diag_execute(pwp, PMCS_DIAG_REPORT_GET, desc, phynum));
975 }
976 
977 /* Clear all of the counters for phynum. Returns the status of the command. */
978 int
979 pmcs_clear_diag_counters(pmcs_hw_t *pwp, uint8_t phynum)
980 {
981 	uint32_t	cmd = PMCS_ERR_CNT_RESET;
982 	uint32_t	cmd_desc;
983 
984 	cmd_desc = PMCS_INVALID_DWORD_CNT;
985 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
986 		return (DDI_FAILURE);
987 
988 	cmd_desc = PMCS_DISPARITY_ERR_CNT;
989 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
990 		return (DDI_FAILURE);
991 
992 	cmd_desc = PMCS_LOST_DWORD_SYNC_CNT;
993 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
994 		return (DDI_FAILURE);
995 
996 	cmd_desc = PMCS_RESET_FAILED_CNT;
997 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
998 		return (DDI_FAILURE);
999 
1000 	return (DDI_SUCCESS);
1001 }
1002 
1003 /*
1004  * Get firmware timestamp
1005  */
1006 int
1007 pmcs_get_time_stamp(pmcs_hw_t *pwp, uint64_t *ts)
1008 {
1009 	uint32_t htag, *ptr, msg[PMCS_MSG_SIZE << 1];
1010 	int result;
1011 	struct pmcwork *pwrk;
1012 
1013 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
1014 	if (pwrk == NULL) {
1015 		pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL, pmcs_nowrk, __func__);
1016 		return (-1);
1017 	}
1018 	pwrk->arg = msg;
1019 	htag = pwrk->htag;
1020 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_GET_TIME_STAMP));
1021 	msg[1] = LE_32(pwrk->htag);
1022 
1023 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1024 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1025 	if (ptr == NULL) {
1026 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1027 		pmcs_pwork(pwp, pwrk);
1028 		pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL, pmcs_nomsg, __func__);
1029 		return (-1);
1030 	}
1031 	COPY_MESSAGE(ptr, msg, 2);
1032 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1033 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1034 
1035 	WAIT_FOR(pwrk, 1000, result);
1036 
1037 	pmcs_pwork(pwp, pwrk);
1038 
1039 	if (result) {
1040 		pmcs_timed_out(pwp, htag, __func__);
1041 		return (-1);
1042 	}
1043 	*ts = LE_32(msg[2]) | (((uint64_t)LE_32(msg[3])) << 32);
1044 	return (0);
1045 }
1046 
1047 /*
1048  * Dump all pertinent registers
1049  */
1050 
1051 void
1052 pmcs_register_dump(pmcs_hw_t *pwp)
1053 {
1054 	int i;
1055 	uint32_t val;
1056 
1057 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "pmcs%d: Register dump start",
1058 	    ddi_get_instance(pwp->dip));
1059 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
1060 	    "OBDB (intr): 0x%08x (mask): 0x%08x (clear): 0x%08x",
1061 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB),
1062 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB_MASK),
1063 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR));
1064 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "SCRATCH0: 0x%08x",
1065 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH0));
1066 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "SCRATCH1: 0x%08x",
1067 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1));
1068 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "SCRATCH2: 0x%08x",
1069 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2));
1070 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "SCRATCH3: 0x%08x",
1071 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH3));
1072 	for (i = 0; i < PMCS_NIQ; i++) {
1073 		pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "IQ %d: CI %u PI %u",
1074 		    i, pmcs_rd_iqci(pwp, i), pmcs_rd_iqpi(pwp, i));
1075 	}
1076 	for (i = 0; i < PMCS_NOQ; i++) {
1077 		pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "OQ %d: CI %u PI %u",
1078 		    i, pmcs_rd_oqci(pwp, i), pmcs_rd_oqpi(pwp, i));
1079 	}
1080 	val = pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE);
1081 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
1082 	    "GST TABLE BASE: 0x%08x (STATE=0x%x QF=%d GSTLEN=%d HMI_ERR=0x%x)",
1083 	    val, PMCS_MPI_S(val), PMCS_QF(val), PMCS_GSTLEN(val) * 4,
1084 	    PMCS_HMI_ERR(val));
1085 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "GST TABLE IQFRZ0: 0x%08x",
1086 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IQFRZ0));
1087 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "GST TABLE IQFRZ1: 0x%08x",
1088 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IQFRZ1));
1089 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "GST TABLE MSGU TICK: 0x%08x",
1090 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_MSGU_TICK));
1091 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "GST TABLE IOP TICK: 0x%08x",
1092 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IOP_TICK));
1093 	for (i = 0; i < pwp->nphy; i++) {
1094 		uint32_t rerrf, pinfo, started = 0, link = 0;
1095 		pinfo = pmcs_rd_gst_tbl(pwp, PMCS_GST_PHY_INFO(i));
1096 		if (pinfo & 1) {
1097 			started = 1;
1098 			link = pinfo & 2;
1099 		}
1100 		rerrf = pmcs_rd_gst_tbl(pwp, PMCS_GST_RERR_INFO(i));
1101 		pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
1102 		    "GST TABLE PHY%d STARTED=%d LINK=%d RERR=0x%08x",
1103 		    i, started, link, rerrf);
1104 	}
1105 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "pmcs%d: Register dump end",
1106 	    ddi_get_instance(pwp->dip));
1107 }
1108 
1109 /*
1110  * Handle SATA Abort and other error processing
1111  */
1112 int
1113 pmcs_abort_handler(pmcs_hw_t *pwp)
1114 {
1115 	pmcs_phy_t *pptr, *pnext, *pnext_uplevel[PMCS_MAX_XPND];
1116 	int r, level = 0;
1117 
1118 	pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, "%s", __func__);
1119 
1120 	mutex_enter(&pwp->lock);
1121 	pptr = pwp->root_phys;
1122 	mutex_exit(&pwp->lock);
1123 
1124 	while (pptr) {
1125 		/*
1126 		 * XXX: Need to make sure this doesn't happen
1127 		 * XXX: when non-NCQ commands are running.
1128 		 */
1129 		pmcs_lock_phy(pptr);
1130 		if (pptr->need_rl_ext) {
1131 			ASSERT(pptr->dtype == SATA);
1132 			if (pmcs_acquire_scratch(pwp, B_FALSE)) {
1133 				goto next_phy;
1134 			}
1135 			r = pmcs_sata_abort_ncq(pwp, pptr);
1136 			pmcs_release_scratch(pwp);
1137 			if (r == ENOMEM) {
1138 				goto next_phy;
1139 			}
1140 			if (r) {
1141 				r = pmcs_reset_phy(pwp, pptr,
1142 				    PMCS_PHYOP_LINK_RESET);
1143 				if (r == ENOMEM) {
1144 					goto next_phy;
1145 				}
1146 				/* what if other failures happened? */
1147 				pptr->abort_pending = 1;
1148 				pptr->abort_sent = 0;
1149 			}
1150 		}
1151 		if (pptr->abort_pending == 0 || pptr->abort_sent) {
1152 			goto next_phy;
1153 		}
1154 		pptr->abort_pending = 0;
1155 		if (pmcs_abort(pwp, pptr, pptr->device_id, 1, 1) == ENOMEM) {
1156 			pptr->abort_pending = 1;
1157 			goto next_phy;
1158 		}
1159 		pptr->abort_sent = 1;
1160 
1161 next_phy:
1162 		if (pptr->children) {
1163 			pnext = pptr->children;
1164 			pnext_uplevel[level++] = pptr->sibling;
1165 		} else {
1166 			pnext = pptr->sibling;
1167 			while ((pnext == NULL) && (level > 0)) {
1168 				pnext = pnext_uplevel[--level];
1169 			}
1170 		}
1171 
1172 		pmcs_unlock_phy(pptr);
1173 		pptr = pnext;
1174 	}
1175 
1176 	return (0);
1177 }
1178 
1179 /*
1180  * Register a device (get a device handle for it).
1181  * Called with PHY lock held.
1182  */
1183 int
1184 pmcs_register_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1185 {
1186 	struct pmcwork *pwrk;
1187 	int result = 0;
1188 	uint32_t *msg;
1189 	uint32_t tmp, status;
1190 	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
1191 
1192 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1193 	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1194 
1195 	if (msg == NULL ||
1196 	    (pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr)) == NULL) {
1197 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1198 		result = ENOMEM;
1199 		goto out;
1200 	}
1201 
1202 	pwrk->arg = iomb;
1203 	pwrk->dtype = pptr->dtype;
1204 
1205 	msg[1] = LE_32(pwrk->htag);
1206 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_REGISTER_DEVICE));
1207 	tmp = PMCS_DEVREG_TLR |
1208 	    (pptr->link_rate << PMCS_DEVREG_LINK_RATE_SHIFT);
1209 	if (IS_ROOT_PHY(pptr)) {
1210 		msg[2] = LE_32(pptr->portid |
1211 		    (pptr->phynum << PMCS_PHYID_SHIFT));
1212 	} else {
1213 		msg[2] = LE_32(pptr->portid);
1214 	}
1215 	if (pptr->dtype == SATA) {
1216 		if (IS_ROOT_PHY(pptr)) {
1217 			tmp |= PMCS_DEVREG_TYPE_SATA_DIRECT;
1218 		} else {
1219 			tmp |= PMCS_DEVREG_TYPE_SATA;
1220 		}
1221 	} else {
1222 		tmp |= PMCS_DEVREG_TYPE_SAS;
1223 	}
1224 	msg[3] = LE_32(tmp);
1225 	msg[4] = LE_32(PMCS_DEVREG_IT_NEXUS_TIMEOUT);
1226 	(void) memcpy(&msg[5], pptr->sas_address, 8);
1227 
1228 	CLEAN_MESSAGE(msg, 7);
1229 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1230 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1231 
1232 	pmcs_unlock_phy(pptr);
1233 	WAIT_FOR(pwrk, 250, result);
1234 	pmcs_lock_phy(pptr);
1235 	pmcs_pwork(pwp, pwrk);
1236 
1237 	if (result) {
1238 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_timeo, __func__);
1239 		result = ETIMEDOUT;
1240 		goto out;
1241 	}
1242 	status = LE_32(iomb[2]);
1243 	tmp = LE_32(iomb[3]);
1244 	switch (status) {
1245 	case PMCS_DEVREG_OK:
1246 	case PMCS_DEVREG_DEVICE_ALREADY_REGISTERED:
1247 	case PMCS_DEVREG_PHY_ALREADY_REGISTERED:
1248 		if (pmcs_validate_devid(pwp->root_phys, pptr, tmp) == B_FALSE) {
1249 			result = EEXIST;
1250 			goto out;
1251 		} else if (status != PMCS_DEVREG_OK) {
1252 			if (tmp == 0xffffffff) {	/* F/W bug */
1253 				pmcs_prt(pwp, PMCS_PRT_INFO, pptr, NULL,
1254 				    "%s: phy %s already has bogus devid 0x%x",
1255 				    __func__, pptr->path, tmp);
1256 				result = EIO;
1257 				goto out;
1258 			} else {
1259 				pmcs_prt(pwp, PMCS_PRT_INFO, pptr, NULL,
1260 				    "%s: phy %s already has a device id 0x%x",
1261 				    __func__, pptr->path, tmp);
1262 			}
1263 		}
1264 		break;
1265 	default:
1266 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
1267 		    "%s: status 0x%x when trying to register device %s",
1268 		    __func__, status, pptr->path);
1269 		result = EIO;
1270 		goto out;
1271 	}
1272 	pptr->device_id = tmp;
1273 	pptr->valid_device_id = 1;
1274 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "Phy %s/" SAS_ADDR_FMT
1275 	    " registered with device_id 0x%x (portid %d)", pptr->path,
1276 	    SAS_ADDR_PRT(pptr->sas_address), tmp, pptr->portid);
1277 out:
1278 	return (result);
1279 }
1280 
1281 /*
1282  * Deregister a device (remove a device handle).
1283  * Called with PHY locked.
1284  */
1285 void
1286 pmcs_deregister_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1287 {
1288 	struct pmcwork *pwrk;
1289 	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
1290 	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
1291 	int result;
1292 
1293 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
1294 	if (pwrk == NULL) {
1295 		return;
1296 	}
1297 
1298 	pwrk->arg = iomb;
1299 	pwrk->dtype = pptr->dtype;
1300 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1301 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1302 	if (ptr == NULL) {
1303 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1304 		pmcs_pwork(pwp, pwrk);
1305 		return;
1306 	}
1307 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
1308 	    PMCIN_DEREGISTER_DEVICE_HANDLE));
1309 	msg[1] = LE_32(pwrk->htag);
1310 	msg[2] = LE_32(pptr->device_id);
1311 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1312 	COPY_MESSAGE(ptr, msg, 3);
1313 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1314 
1315 	pmcs_unlock_phy(pptr);
1316 	WAIT_FOR(pwrk, 250, result);
1317 	pmcs_pwork(pwp, pwrk);
1318 	pmcs_lock_phy(pptr);
1319 
1320 	if (result) {
1321 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_timeo, __func__);
1322 		return;
1323 	}
1324 	status = LE_32(iomb[2]);
1325 	if (status != PMCOUT_STATUS_OK) {
1326 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
1327 		    "%s: status 0x%x when trying to deregister device %s",
1328 		    __func__, status, pptr->path);
1329 	} else {
1330 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
1331 		    "%s: device %s deregistered", __func__, pptr->path);
1332 		pptr->valid_device_id = 0;
1333 		pptr->device_id = PMCS_INVALID_DEVICE_ID;
1334 	}
1335 }
1336 
1337 /*
1338  * Deregister all registered devices.
1339  */
1340 void
1341 pmcs_deregister_devices(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
1342 {
1343 	/*
1344 	 * Start at the maximum level and walk back to level 0.  This only
1345 	 * gets done during detach after all threads and timers have been
1346 	 * destroyed, so there's no need to hold the softstate or PHY lock.
1347 	 */
1348 	while (phyp) {
1349 		if (phyp->children) {
1350 			pmcs_deregister_devices(pwp, phyp->children);
1351 		}
1352 		if (phyp->valid_device_id) {
1353 			pmcs_deregister_device(pwp, phyp);
1354 		}
1355 		phyp = phyp->sibling;
1356 	}
1357 }
1358 
1359 /*
1360  * Perform a 'soft' reset on the PMC chip
1361  */
1362 int
1363 pmcs_soft_reset(pmcs_hw_t *pwp, boolean_t no_restart)
1364 {
1365 	uint32_t s2, sfrbits, gsm, rapchk, wapchk, wdpchk, spc, tsmode;
1366 	pmcs_phy_t *pptr;
1367 	char *msg = NULL;
1368 	int i;
1369 
1370 	/*
1371 	 * Disable interrupts
1372 	 */
1373 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, 0xffffffff);
1374 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1375 
1376 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "%s", __func__);
1377 
1378 	if (pwp->locks_initted) {
1379 		mutex_enter(&pwp->lock);
1380 	}
1381 	pwp->blocked = 1;
1382 
1383 	/*
1384 	 * Step 1
1385 	 */
1386 	s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2);
1387 	if ((s2 & PMCS_MSGU_HOST_SOFT_RESET_READY) == 0) {
1388 		pmcs_wr_gsm_reg(pwp, RB6_ACCESS, RB6_NMI_SIGNATURE);
1389 		pmcs_wr_gsm_reg(pwp, RB6_ACCESS, RB6_NMI_SIGNATURE);
1390 		for (i = 0; i < 100; i++) {
1391 			s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1392 			    PMCS_MSGU_HOST_SOFT_RESET_READY;
1393 			if (s2) {
1394 				break;
1395 			}
1396 			drv_usecwait(10000);
1397 		}
1398 		s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1399 		    PMCS_MSGU_HOST_SOFT_RESET_READY;
1400 		if (s2 == 0) {
1401 			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
1402 			    "%s: PMCS_MSGU_HOST_SOFT_RESET_READY never came "
1403 			    "ready", __func__);
1404 			pmcs_register_dump(pwp);
1405 			if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1406 			    PMCS_MSGU_CPU_SOFT_RESET_READY) == 0 ||
1407 			    (pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1408 			    PMCS_MSGU_CPU_SOFT_RESET_READY) == 0) {
1409 				pwp->state = STATE_DEAD;
1410 				pwp->blocked = 0;
1411 				if (pwp->locks_initted) {
1412 					mutex_exit(&pwp->lock);
1413 				}
1414 				return (-1);
1415 			}
1416 		}
1417 	}
1418 
1419 	/*
1420 	 * Step 2
1421 	 */
1422 	pmcs_wr_gsm_reg(pwp, NMI_EN_VPE0_IOP, 0);
1423 	drv_usecwait(10);
1424 	pmcs_wr_gsm_reg(pwp, NMI_EN_VPE0_AAP1, 0);
1425 	drv_usecwait(10);
1426 	pmcs_wr_topunit(pwp, PMCS_EVENT_INT_ENABLE, 0);
1427 	drv_usecwait(10);
1428 	pmcs_wr_topunit(pwp, PMCS_EVENT_INT_STAT,
1429 	    pmcs_rd_topunit(pwp, PMCS_EVENT_INT_STAT));
1430 	drv_usecwait(10);
1431 	pmcs_wr_topunit(pwp, PMCS_ERROR_INT_ENABLE, 0);
1432 	drv_usecwait(10);
1433 	pmcs_wr_topunit(pwp, PMCS_ERROR_INT_STAT,
1434 	    pmcs_rd_topunit(pwp, PMCS_ERROR_INT_STAT));
1435 	drv_usecwait(10);
1436 
1437 	sfrbits = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1438 	    PMCS_MSGU_AAP_SFR_PROGRESS;
1439 	sfrbits ^= PMCS_MSGU_AAP_SFR_PROGRESS;
1440 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "PMCS_MSGU_HOST_SCRATCH0 "
1441 	    "%08x -> %08x", pmcs_rd_msgunit(pwp, PMCS_MSGU_HOST_SCRATCH0),
1442 	    HST_SFT_RESET_SIG);
1443 	pmcs_wr_msgunit(pwp, PMCS_MSGU_HOST_SCRATCH0, HST_SFT_RESET_SIG);
1444 
1445 	/*
1446 	 * Step 3
1447 	 */
1448 	gsm = pmcs_rd_gsm_reg(pwp, GSM_CFG_AND_RESET);
1449 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "GSM %08x -> %08x", gsm,
1450 	    gsm & ~PMCS_SOFT_RESET_BITS);
1451 	pmcs_wr_gsm_reg(pwp, GSM_CFG_AND_RESET, gsm & ~PMCS_SOFT_RESET_BITS);
1452 
1453 	/*
1454 	 * Step 4
1455 	 */
1456 	rapchk = pmcs_rd_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN);
1457 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "READ_ADR_PARITY_CHK_EN "
1458 	    "%08x -> %08x", rapchk, 0);
1459 	pmcs_wr_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN, 0);
1460 	wapchk = pmcs_rd_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN);
1461 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "WRITE_ADR_PARITY_CHK_EN "
1462 	    "%08x -> %08x", wapchk, 0);
1463 	pmcs_wr_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN, 0);
1464 	wdpchk = pmcs_rd_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN);
1465 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "WRITE_DATA_PARITY_CHK_EN "
1466 	    "%08x -> %08x", wdpchk, 0);
1467 	pmcs_wr_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN, 0);
1468 
1469 	/*
1470 	 * Step 5
1471 	 */
1472 	drv_usecwait(100);
1473 
1474 	/*
1475 	 * Step 5.5 (Temporary workaround for 1.07.xx Beta)
1476 	 */
1477 	tsmode = pmcs_rd_gsm_reg(pwp, PMCS_GPIO_TRISTATE_MODE_ADDR);
1478 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "GPIO TSMODE %08x -> %08x",
1479 	    tsmode, tsmode & ~(PMCS_GPIO_TSMODE_BIT0|PMCS_GPIO_TSMODE_BIT1));
1480 	pmcs_wr_gsm_reg(pwp, PMCS_GPIO_TRISTATE_MODE_ADDR,
1481 	    tsmode & ~(PMCS_GPIO_TSMODE_BIT0|PMCS_GPIO_TSMODE_BIT1));
1482 	drv_usecwait(10);
1483 
1484 	/*
1485 	 * Step 6
1486 	 */
1487 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1488 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "SPC_RESET %08x -> %08x",
1489 	    spc, spc & ~(PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1490 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET,
1491 	    spc & ~(PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1492 	drv_usecwait(10);
1493 
1494 	/*
1495 	 * Step 7
1496 	 */
1497 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1498 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "SPC_RESET %08x -> %08x",
1499 	    spc, spc & ~(BDMA_CORE_RSTB|OSSP_RSTB));
1500 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET, spc & ~(BDMA_CORE_RSTB|OSSP_RSTB));
1501 
1502 	/*
1503 	 * Step 8
1504 	 */
1505 	drv_usecwait(100);
1506 
1507 	/*
1508 	 * Step 9
1509 	 */
1510 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1511 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "SPC_RESET %08x -> %08x",
1512 	    spc, spc | (BDMA_CORE_RSTB|OSSP_RSTB));
1513 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET, spc | (BDMA_CORE_RSTB|OSSP_RSTB));
1514 
1515 	/*
1516 	 * Step 10
1517 	 */
1518 	drv_usecwait(100);
1519 
1520 	/*
1521 	 * Step 11
1522 	 */
1523 	gsm = pmcs_rd_gsm_reg(pwp, GSM_CFG_AND_RESET);
1524 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "GSM %08x -> %08x", gsm,
1525 	    gsm | PMCS_SOFT_RESET_BITS);
1526 	pmcs_wr_gsm_reg(pwp, GSM_CFG_AND_RESET, gsm | PMCS_SOFT_RESET_BITS);
1527 	drv_usecwait(10);
1528 
1529 	/*
1530 	 * Step 12
1531 	 */
1532 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "READ_ADR_PARITY_CHK_EN "
1533 	    "%08x -> %08x", pmcs_rd_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN),
1534 	    rapchk);
1535 	pmcs_wr_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN, rapchk);
1536 	drv_usecwait(10);
1537 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "WRITE_ADR_PARITY_CHK_EN "
1538 	    "%08x -> %08x", pmcs_rd_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN),
1539 	    wapchk);
1540 	pmcs_wr_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN, wapchk);
1541 	drv_usecwait(10);
1542 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "WRITE_DATA_PARITY_CHK_EN "
1543 	    "%08x -> %08x", pmcs_rd_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN),
1544 	    wapchk);
1545 	pmcs_wr_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN, wdpchk);
1546 	drv_usecwait(10);
1547 
1548 	/*
1549 	 * Step 13
1550 	 */
1551 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1552 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "SPC_RESET %08x -> %08x",
1553 	    spc, spc | (PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1554 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET,
1555 	    spc | (PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1556 
1557 	/*
1558 	 * Step 14
1559 	 */
1560 	drv_usecwait(100);
1561 
1562 	/*
1563 	 * Step 15
1564 	 */
1565 	for (spc = 0, i = 0; i < 1000; i++) {
1566 		drv_usecwait(1000);
1567 		spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
1568 		if ((spc & PMCS_MSGU_AAP_SFR_PROGRESS) == sfrbits) {
1569 			break;
1570 		}
1571 	}
1572 
1573 	if ((spc & PMCS_MSGU_AAP_SFR_PROGRESS) != sfrbits) {
1574 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
1575 		    "SFR didn't toggle (sfr 0x%x)", spc);
1576 		pwp->state = STATE_DEAD;
1577 		pwp->blocked = 0;
1578 		if (pwp->locks_initted) {
1579 			mutex_exit(&pwp->lock);
1580 		}
1581 		return (-1);
1582 	}
1583 
1584 	/*
1585 	 * Step 16
1586 	 */
1587 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, 0xffffffff);
1588 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1589 
1590 	/*
1591 	 * Wait for up to 5 seconds for AAP state to come either ready or error.
1592 	 */
1593 	for (i = 0; i < 50; i++) {
1594 		spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1595 		    PMCS_MSGU_AAP_STATE_MASK;
1596 		if (spc == PMCS_MSGU_AAP_STATE_ERROR ||
1597 		    spc == PMCS_MSGU_AAP_STATE_READY) {
1598 			break;
1599 		}
1600 		drv_usecwait(100000);
1601 	}
1602 	spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
1603 	if ((spc & PMCS_MSGU_AAP_STATE_MASK) != PMCS_MSGU_AAP_STATE_READY) {
1604 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
1605 		    "soft reset failed (state 0x%x)", spc);
1606 		pwp->state = STATE_DEAD;
1607 		pwp->blocked = 0;
1608 		if (pwp->locks_initted) {
1609 			mutex_exit(&pwp->lock);
1610 		}
1611 		return (-1);
1612 	}
1613 
1614 
1615 	if (pwp->state == STATE_DEAD || pwp->state == STATE_UNPROBING ||
1616 	    pwp->state == STATE_PROBING || pwp->locks_initted == 0) {
1617 		pwp->blocked = 0;
1618 		if (pwp->locks_initted) {
1619 			mutex_exit(&pwp->lock);
1620 		}
1621 		return (0);
1622 	}
1623 
1624 	/*
1625 	 * Return at this point if we dont need to startup.
1626 	 */
1627 	if (no_restart) {
1628 		return (0);
1629 	}
1630 
1631 	ASSERT(pwp->locks_initted != 0);
1632 
1633 	/*
1634 	 * Clean up various soft state.
1635 	 */
1636 	bzero(pwp->ports, sizeof (pwp->ports));
1637 
1638 	pmcs_free_all_phys(pwp, pwp->root_phys);
1639 
1640 	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
1641 		pmcs_lock_phy(pptr);
1642 		pmcs_clear_phy(pwp, pptr);
1643 		pmcs_unlock_phy(pptr);
1644 	}
1645 
1646 	if (pwp->targets) {
1647 		for (i = 0; i < pwp->max_dev; i++) {
1648 			pmcs_xscsi_t *xp = pwp->targets[i];
1649 
1650 			if (xp == NULL) {
1651 				continue;
1652 			}
1653 			mutex_enter(&xp->statlock);
1654 			pmcs_clear_xp(pwp, xp);
1655 			mutex_exit(&xp->statlock);
1656 		}
1657 	}
1658 
1659 	bzero(pwp->shadow_iqpi, sizeof (pwp->shadow_iqpi));
1660 	for (i = 0; i < PMCS_NIQ; i++) {
1661 		if (pwp->iqp[i]) {
1662 			bzero(pwp->iqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
1663 			pmcs_wr_iqpi(pwp, i, 0);
1664 			pmcs_wr_iqci(pwp, i, 0);
1665 		}
1666 	}
1667 	for (i = 0; i < PMCS_NOQ; i++) {
1668 		if (pwp->oqp[i]) {
1669 			bzero(pwp->oqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
1670 			pmcs_wr_oqpi(pwp, i, 0);
1671 			pmcs_wr_oqci(pwp, i, 0);
1672 		}
1673 
1674 	}
1675 	if (pwp->fwlogp) {
1676 		bzero(pwp->fwlogp, PMCS_FWLOG_SIZE);
1677 	}
1678 	STAILQ_INIT(&pwp->wf);
1679 	bzero(pwp->work, sizeof (pmcwork_t) * pwp->max_cmd);
1680 	for (i = 0; i < pwp->max_cmd - 1; i++) {
1681 		pmcwork_t *pwrk = &pwp->work[i];
1682 		STAILQ_INSERT_TAIL(&pwp->wf, pwrk, next);
1683 	}
1684 
1685 	/*
1686 	 * Clear out any leftover commands sitting in the work list
1687 	 */
1688 	for (i = 0; i < pwp->max_cmd; i++) {
1689 		pmcwork_t *pwrk = &pwp->work[i];
1690 		mutex_enter(&pwrk->lock);
1691 		if (pwrk->state == PMCS_WORK_STATE_ONCHIP) {
1692 			switch (PMCS_TAG_TYPE(pwrk->htag)) {
1693 			case PMCS_TAG_TYPE_WAIT:
1694 				mutex_exit(&pwrk->lock);
1695 				break;
1696 			case PMCS_TAG_TYPE_CBACK:
1697 			case PMCS_TAG_TYPE_NONE:
1698 				pmcs_pwork(pwp, pwrk);
1699 				break;
1700 			default:
1701 				break;
1702 			}
1703 		} else if (pwrk->state == PMCS_WORK_STATE_IOCOMPQ) {
1704 			pwrk->dead = 1;
1705 			mutex_exit(&pwrk->lock);
1706 		} else {
1707 			/*
1708 			 * The other states of NIL, READY and INTR
1709 			 * should not be visible outside of a lock being held.
1710 			 */
1711 			pmcs_pwork(pwp, pwrk);
1712 		}
1713 	}
1714 
1715 	/*
1716 	 * Restore Interrupt Mask
1717 	 */
1718 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, pwp->intr_mask);
1719 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1720 
1721 	pwp->blocked = 0;
1722 	pwp->mpi_table_setup = 0;
1723 	mutex_exit(&pwp->lock);
1724 
1725 	/*
1726 	 * Set up MPI again.
1727 	 */
1728 	if (pmcs_setup(pwp)) {
1729 		msg = "unable to setup MPI tables again";
1730 		goto fail_restart;
1731 	}
1732 	pmcs_report_fwversion(pwp);
1733 
1734 	/*
1735 	 * Restart MPI
1736 	 */
1737 	if (pmcs_start_mpi(pwp)) {
1738 		msg = "unable to restart MPI again";
1739 		goto fail_restart;
1740 	}
1741 
1742 	mutex_enter(&pwp->lock);
1743 	pwp->blocked = 0;
1744 	SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES);
1745 	mutex_exit(&pwp->lock);
1746 
1747 	/*
1748 	 * Run any completions
1749 	 */
1750 	PMCS_CQ_RUN(pwp);
1751 
1752 	/*
1753 	 * Delay
1754 	 */
1755 	drv_usecwait(1000000);
1756 	return (0);
1757 
1758 fail_restart:
1759 	mutex_enter(&pwp->lock);
1760 	pwp->state = STATE_DEAD;
1761 	mutex_exit(&pwp->lock);
1762 	pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL,
1763 	    "%s: Failed: %s", __func__, msg);
1764 	return (-1);
1765 }
1766 
1767 /*
1768  * Reset a device or a logical unit.
1769  */
1770 int
1771 pmcs_reset_dev(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint64_t lun)
1772 {
1773 	int rval = 0;
1774 
1775 	if (pptr == NULL) {
1776 		return (ENXIO);
1777 	}
1778 
1779 	pmcs_lock_phy(pptr);
1780 	if (pptr->dtype == SAS) {
1781 		/*
1782 		 * Some devices do not support SAS_I_T_NEXUS_RESET as
1783 		 * it is not a mandatory (in SAM4) task management
1784 		 * function, while LOGIC_UNIT_RESET is mandatory.
1785 		 *
1786 		 * The problem here is that we need to iterate over
1787 		 * all known LUNs to emulate the semantics of
1788 		 * "RESET_TARGET".
1789 		 *
1790 		 * XXX: FIX ME
1791 		 */
1792 		if (lun == (uint64_t)-1) {
1793 			lun = 0;
1794 		}
1795 		rval = pmcs_ssp_tmf(pwp, pptr, SAS_LOGICAL_UNIT_RESET, 0, lun,
1796 		    NULL);
1797 	} else if (pptr->dtype == SATA) {
1798 		if (lun != 0ull) {
1799 			pmcs_unlock_phy(pptr);
1800 			return (EINVAL);
1801 		}
1802 		rval = pmcs_reset_phy(pwp, pptr, PMCS_PHYOP_LINK_RESET);
1803 	} else {
1804 		pmcs_unlock_phy(pptr);
1805 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
1806 		    "%s: cannot reset a SMP device yet (%s)",
1807 		    __func__, pptr->path);
1808 		return (EINVAL);
1809 	}
1810 
1811 	/*
1812 	 * Now harvest any commands killed by this action
1813 	 * by issuing an ABORT for all commands on this device.
1814 	 *
1815 	 * We do this even if the the tmf or reset fails (in case there
1816 	 * are any dead commands around to be harvested *anyway*).
1817 	 * We don't have to await for the abort to complete.
1818 	 */
1819 	if (pmcs_abort(pwp, pptr, 0, 1, 0)) {
1820 		pptr->abort_pending = 1;
1821 		SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
1822 	}
1823 
1824 	pmcs_unlock_phy(pptr);
1825 	return (rval);
1826 }
1827 
1828 /*
1829  * Called with PHY locked.
1830  */
1831 static int
1832 pmcs_get_device_handle(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1833 {
1834 	if (pptr->valid_device_id == 0) {
1835 		int result = pmcs_register_device(pwp, pptr);
1836 
1837 		/*
1838 		 * If we changed while registering, punt
1839 		 */
1840 		if (pptr->changed) {
1841 			RESTART_DISCOVERY(pwp);
1842 			return (-1);
1843 		}
1844 
1845 		/*
1846 		 * If we had a failure to register, check against errors.
1847 		 * An ENOMEM error means we just retry (temp resource shortage).
1848 		 */
1849 		if (result == ENOMEM) {
1850 			PHY_CHANGED(pwp, pptr);
1851 			RESTART_DISCOVERY(pwp);
1852 			return (-1);
1853 		}
1854 
1855 		/*
1856 		 * An ETIMEDOUT error means we retry (if our counter isn't
1857 		 * exhausted)
1858 		 */
1859 		if (result == ETIMEDOUT) {
1860 			if (ddi_get_lbolt() < pptr->config_stop) {
1861 				PHY_CHANGED(pwp, pptr);
1862 				RESTART_DISCOVERY(pwp);
1863 			} else {
1864 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
1865 				    "%s: Retries exhausted for %s, killing",
1866 				    __func__, pptr->path);
1867 				pptr->config_stop = 0;
1868 				pmcs_kill_changed(pwp, pptr, 0);
1869 			}
1870 			return (-1);
1871 		}
1872 		/*
1873 		 * Other errors or no valid device id is fatal, but don't
1874 		 * preclude a future action.
1875 		 */
1876 		if (result || pptr->valid_device_id == 0) {
1877 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
1878 			    "%s: %s could not be registered", __func__,
1879 			    pptr->path);
1880 			return (-1);
1881 		}
1882 	}
1883 	return (0);
1884 }
1885 
1886 int
1887 pmcs_iport_tgtmap_create(pmcs_iport_t *iport)
1888 {
1889 	ASSERT(iport);
1890 	if (iport == NULL)
1891 		return (B_FALSE);
1892 
1893 	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL, "%s", __func__);
1894 
1895 	/* create target map */
1896 	if (scsi_hba_tgtmap_create(iport->dip, SCSI_TM_FULLSET, tgtmap_usec,
1897 	    NULL, NULL, NULL, &iport->iss_tgtmap) != DDI_SUCCESS) {
1898 		pmcs_prt(iport->pwp, PMCS_PRT_DEBUG, NULL, NULL,
1899 		    "%s: failed to create tgtmap", __func__);
1900 		return (B_FALSE);
1901 	}
1902 	return (B_TRUE);
1903 }
1904 
1905 int
1906 pmcs_iport_tgtmap_destroy(pmcs_iport_t *iport)
1907 {
1908 	ASSERT(iport && iport->iss_tgtmap);
1909 	if ((iport == NULL) || (iport->iss_tgtmap == NULL))
1910 		return (B_FALSE);
1911 
1912 	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL, "%s", __func__);
1913 
1914 	/* destroy target map */
1915 	scsi_hba_tgtmap_destroy(iport->iss_tgtmap);
1916 	return (B_TRUE);
1917 }
1918 
1919 /*
1920  * Query the phymap and populate the iport handle passed in.
1921  * Called with iport lock held.
1922  */
1923 int
1924 pmcs_iport_configure_phys(pmcs_iport_t *iport)
1925 {
1926 	pmcs_hw_t		*pwp;
1927 	pmcs_phy_t		*pptr;
1928 	sas_phymap_phys_t	*phys;
1929 	int			phynum;
1930 	int			inst;
1931 
1932 	ASSERT(iport);
1933 	ASSERT(mutex_owned(&iport->lock));
1934 	pwp = iport->pwp;
1935 	ASSERT(pwp);
1936 	inst = ddi_get_instance(iport->dip);
1937 
1938 	mutex_enter(&pwp->lock);
1939 	ASSERT(pwp->root_phys != NULL);
1940 
1941 	/*
1942 	 * Query the phymap regarding the phys in this iport and populate
1943 	 * the iport's phys list. Hereafter this list is maintained via
1944 	 * port up and down events in pmcs_intr.c
1945 	 */
1946 	ASSERT(list_is_empty(&iport->phys));
1947 	phys = sas_phymap_ua2phys(pwp->hss_phymap, iport->ua);
1948 	while ((phynum = sas_phymap_phys_next(phys)) != -1) {
1949 		/* Grab the phy pointer from root_phys */
1950 		pptr = pwp->root_phys + phynum;
1951 		ASSERT(pptr);
1952 		pmcs_lock_phy(pptr);
1953 		ASSERT(pptr->phynum == phynum);
1954 
1955 		/*
1956 		 * Set a back pointer in the phy to this iport.
1957 		 */
1958 		pptr->iport = iport;
1959 
1960 		/*
1961 		 * If this phy is the primary, set a pointer to it on our
1962 		 * iport handle, and set our portid from it.
1963 		 */
1964 		if (!pptr->subsidiary) {
1965 			iport->pptr = pptr;
1966 			iport->portid = pptr->portid;
1967 		}
1968 
1969 		/*
1970 		 * Finally, insert the phy into our list
1971 		 */
1972 		pmcs_unlock_phy(pptr);
1973 		pmcs_add_phy_to_iport(iport, pptr);
1974 
1975 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "%s: found "
1976 		    "phy %d [0x%p] on iport%d, refcnt(%d)", __func__, phynum,
1977 		    (void *)pptr, inst, iport->refcnt);
1978 	}
1979 	mutex_exit(&pwp->lock);
1980 	sas_phymap_phys_free(phys);
1981 	RESTART_DISCOVERY(pwp);
1982 	return (DDI_SUCCESS);
1983 }
1984 
1985 /*
1986  * Return the iport that ua is associated with, or NULL.  If an iport is
1987  * returned, it will be held and the caller must release the hold.
1988  */
1989 static pmcs_iport_t *
1990 pmcs_get_iport_by_ua(pmcs_hw_t *pwp, char *ua)
1991 {
1992 	pmcs_iport_t	*iport = NULL;
1993 
1994 	rw_enter(&pwp->iports_lock, RW_READER);
1995 	for (iport = list_head(&pwp->iports);
1996 	    iport != NULL;
1997 	    iport = list_next(&pwp->iports, iport)) {
1998 		mutex_enter(&iport->lock);
1999 		if (strcmp(iport->ua, ua) == 0) {
2000 			mutex_exit(&iport->lock);
2001 			mutex_enter(&iport->refcnt_lock);
2002 			iport->refcnt++;
2003 			mutex_exit(&iport->refcnt_lock);
2004 			break;
2005 		}
2006 		mutex_exit(&iport->lock);
2007 	}
2008 	rw_exit(&pwp->iports_lock);
2009 
2010 	return (iport);
2011 }
2012 
2013 /*
2014  * Return the iport that pptr is associated with, or NULL.
2015  * If an iport is returned, there is a hold that the caller must release.
2016  */
2017 pmcs_iport_t *
2018 pmcs_get_iport_by_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2019 {
2020 	pmcs_iport_t	*iport = NULL;
2021 	char		*ua;
2022 
2023 	ua = sas_phymap_lookup_ua(pwp->hss_phymap, pwp->sas_wwns[0],
2024 	    pmcs_barray2wwn(pptr->sas_address));
2025 	if (ua) {
2026 		iport = pmcs_get_iport_by_ua(pwp, ua);
2027 		if (iport) {
2028 			mutex_enter(&iport->lock);
2029 			iport->ua_state = UA_ACTIVE;
2030 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "%s: "
2031 			    "found iport [0x%p] on ua (%s) for phy [0x%p], "
2032 			    "refcnt (%d)", __func__, (void *)iport, ua,
2033 			    (void *)pptr, iport->refcnt);
2034 			mutex_exit(&iport->lock);
2035 		}
2036 	}
2037 
2038 	return (iport);
2039 }
2040 
2041 void
2042 pmcs_rele_iport(pmcs_iport_t *iport)
2043 {
2044 	/*
2045 	 * Release a refcnt on this iport. If this is the last reference,
2046 	 * signal the potential waiter in pmcs_iport_unattach().
2047 	 */
2048 	ASSERT(iport->refcnt > 0);
2049 	mutex_enter(&iport->refcnt_lock);
2050 	iport->refcnt--;
2051 	mutex_exit(&iport->refcnt_lock);
2052 	if (iport->refcnt == 0) {
2053 		cv_signal(&iport->refcnt_cv);
2054 	}
2055 	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL, "%s: iport "
2056 	    "[0x%p] refcnt (%d)", __func__, (void *)iport, iport->refcnt);
2057 }
2058 
2059 void
2060 pmcs_phymap_activate(void *arg, char *ua, void **privp)
2061 {
2062 	_NOTE(ARGUNUSED(privp));
2063 	pmcs_hw_t	*pwp = arg;
2064 	pmcs_iport_t	*iport = NULL;
2065 
2066 	mutex_enter(&pwp->lock);
2067 	if ((pwp->state == STATE_UNPROBING) || (pwp->state == STATE_DEAD)) {
2068 		mutex_exit(&pwp->lock);
2069 		return;
2070 	}
2071 	pwp->phymap_active++;
2072 	mutex_exit(&pwp->lock);
2073 
2074 	if (scsi_hba_iportmap_iport_add(pwp->hss_iportmap, ua, NULL) !=
2075 	    DDI_SUCCESS) {
2076 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL, "%s: failed to "
2077 		    "add iport handle on unit address [%s]", __func__, ua);
2078 	} else {
2079 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL, "%s: "
2080 		    "phymap_active count (%d), added iport handle on unit "
2081 		    "address [%s]", __func__, pwp->phymap_active, ua);
2082 	}
2083 
2084 	/* Set the HBA softstate as our private data for this unit address */
2085 	*privp = (void *)pwp;
2086 
2087 	/*
2088 	 * We are waiting on attach for this iport node, unless it is still
2089 	 * attached. This can happen if a consumer has an outstanding open
2090 	 * on our iport node, but the port is down.  If this is the case, we
2091 	 * need to configure our iport here for reuse.
2092 	 */
2093 	iport = pmcs_get_iport_by_ua(pwp, ua);
2094 	if (iport) {
2095 		mutex_enter(&iport->lock);
2096 		if (pmcs_iport_configure_phys(iport) != DDI_SUCCESS) {
2097 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL, "%s: "
2098 			    "failed to configure phys on iport [0x%p] at "
2099 			    "unit address (%s)", __func__, (void *)iport, ua);
2100 		}
2101 		iport->ua_state = UA_ACTIVE;
2102 		pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS,
2103 		    &iport->nphy);
2104 		mutex_exit(&iport->lock);
2105 		pmcs_rele_iport(iport);
2106 	}
2107 
2108 }
2109 
2110 void
2111 pmcs_phymap_deactivate(void *arg, char *ua, void *privp)
2112 {
2113 	_NOTE(ARGUNUSED(privp));
2114 	pmcs_hw_t	*pwp = arg;
2115 	pmcs_iport_t	*iport;
2116 
2117 	mutex_enter(&pwp->lock);
2118 	pwp->phymap_active--;
2119 	mutex_exit(&pwp->lock);
2120 
2121 	if (scsi_hba_iportmap_iport_remove(pwp->hss_iportmap, ua) !=
2122 	    DDI_SUCCESS) {
2123 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL, "%s: failed to "
2124 		    "remove iport handle on unit address [%s]", __func__, ua);
2125 	} else {
2126 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL, "%s: "
2127 		    "phymap_active count (%d), removed iport handle on unit "
2128 		    "address [%s]", __func__, pwp->phymap_active, ua);
2129 	}
2130 
2131 	iport = pmcs_get_iport_by_ua(pwp, ua);
2132 
2133 	if (iport == NULL) {
2134 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL, "%s: failed "
2135 		    "lookup of iport handle on unit addr (%s)", __func__, ua);
2136 		return;
2137 	}
2138 
2139 	mutex_enter(&iport->lock);
2140 	iport->ua_state = UA_INACTIVE;
2141 	iport->portid = PMCS_IPORT_INVALID_PORT_ID;
2142 	pmcs_remove_phy_from_iport(iport, NULL);
2143 	mutex_exit(&iport->lock);
2144 	pmcs_rele_iport(iport);
2145 }
2146 
2147 /*
2148  * Top-level discovery function
2149  */
2150 void
2151 pmcs_discover(pmcs_hw_t *pwp)
2152 {
2153 	pmcs_phy_t		*pptr;
2154 	pmcs_phy_t		*root_phy;
2155 	boolean_t		config_changed;
2156 
2157 	DTRACE_PROBE2(pmcs__discover__entry, ulong_t, pwp->work_flags,
2158 	    boolean_t, pwp->config_changed);
2159 
2160 	mutex_enter(&pwp->lock);
2161 
2162 	if (pwp->state != STATE_RUNNING) {
2163 		mutex_exit(&pwp->lock);
2164 		return;
2165 	}
2166 
2167 	/* Ensure we have at least one phymap active */
2168 	if (pwp->phymap_active == 0) {
2169 		mutex_exit(&pwp->lock);
2170 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2171 		    "%s: phymap inactive, exiting", __func__);
2172 		return;
2173 	}
2174 
2175 	mutex_exit(&pwp->lock);
2176 
2177 	/*
2178 	 * If no iports have attached, but we have PHYs that are up, we
2179 	 * are waiting for iport attach to complete.  Restart discovery.
2180 	 */
2181 	rw_enter(&pwp->iports_lock, RW_READER);
2182 	if (!pwp->iports_attached) {
2183 		rw_exit(&pwp->iports_lock);
2184 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2185 		    "%s: no iports attached, retry discovery", __func__);
2186 		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2187 		return;
2188 	}
2189 	rw_exit(&pwp->iports_lock);
2190 
2191 	mutex_enter(&pwp->config_lock);
2192 	if (pwp->configuring) {
2193 		mutex_exit(&pwp->config_lock);
2194 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2195 		    "%s: configuration already in progress", __func__);
2196 		return;
2197 	}
2198 
2199 	if (pmcs_acquire_scratch(pwp, B_FALSE)) {
2200 		mutex_exit(&pwp->config_lock);
2201 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2202 		    "%s: cannot allocate scratch", __func__);
2203 		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2204 		return;
2205 	}
2206 
2207 	pwp->configuring = 1;
2208 	pwp->config_changed = B_FALSE;
2209 	mutex_exit(&pwp->config_lock);
2210 
2211 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL, "Discovery begin");
2212 
2213 	/*
2214 	 * The order of the following traversals is important.
2215 	 *
2216 	 * The first one checks for changed expanders.
2217 	 *
2218 	 * The second one aborts commands for dead devices and deregisters them.
2219 	 *
2220 	 * The third one clears the contents of dead expanders from the tree
2221 	 *
2222 	 * The fourth one clears now dead devices in expanders that remain.
2223 	 */
2224 
2225 	/*
2226 	 * 1. Check expanders marked changed (but not dead) to see if they still
2227 	 * have the same number of phys and the same SAS address. Mark them,
2228 	 * their subsidiary phys (if wide) and their descendents dead if
2229 	 * anything has changed. Check the devices they contain to see if
2230 	 * *they* have changed. If they've changed from type NOTHING we leave
2231 	 * them marked changed to be configured later (picking up a new SAS
2232 	 * address and link rate if possible). Otherwise, any change in type,
2233 	 * SAS address or removal of target role will cause us to mark them
2234 	 * (and their descendents) as dead (and cause any pending commands
2235 	 * and associated devices to be removed).
2236 	 *
2237 	 * NOTE: We don't want to bail on discovery if the config has
2238 	 * changed until *after* we run pmcs_kill_devices.
2239 	 */
2240 	root_phy = pwp->root_phys;
2241 	config_changed = pmcs_check_expanders(pwp, root_phy);
2242 
2243 	/*
2244 	 * 2. Descend the tree looking for dead devices and kill them
2245 	 * by aborting all active commands and then deregistering them.
2246 	 */
2247 	if (pmcs_kill_devices(pwp, root_phy) || config_changed) {
2248 		goto out;
2249 	}
2250 
2251 	/*
2252 	 * 3. Check for dead expanders and remove their children from the tree.
2253 	 * By the time we get here, the devices and commands for them have
2254 	 * already been terminated and removed.
2255 	 *
2256 	 * We do this independent of the configuration count changing so we can
2257 	 * free any dead device PHYs that were discovered while checking
2258 	 * expanders. We ignore any subsidiary phys as pmcs_clear_expander
2259 	 * will take care of those.
2260 	 *
2261 	 * NOTE: pmcs_clear_expander requires softstate lock
2262 	 */
2263 	mutex_enter(&pwp->lock);
2264 	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
2265 		/*
2266 		 * Call pmcs_clear_expander for every root PHY.  It will
2267 		 * recurse and determine which (if any) expanders actually
2268 		 * need to be cleared.
2269 		 */
2270 		pmcs_lock_phy(pptr);
2271 		pmcs_clear_expander(pwp, pptr, 0);
2272 		pmcs_unlock_phy(pptr);
2273 	}
2274 	mutex_exit(&pwp->lock);
2275 
2276 	/*
2277 	 * 4. Check for dead devices and nullify them. By the time we get here,
2278 	 * the devices and commands for them have already been terminated
2279 	 * and removed. This is different from step 2 in that this just nulls
2280 	 * phys that are part of expanders that are still here but used to
2281 	 * be something but are no longer something (e.g., after a pulled
2282 	 * disk drive). Note that dead expanders had their contained phys
2283 	 * removed from the tree- here, the expanders themselves are
2284 	 * nullified (unless they were removed by being contained in another
2285 	 * expander phy).
2286 	 */
2287 	pmcs_clear_phys(pwp, root_phy);
2288 
2289 	/*
2290 	 * 5. Now check for and configure new devices.
2291 	 */
2292 	if (pmcs_configure_new_devices(pwp, root_phy)) {
2293 		goto restart;
2294 	}
2295 
2296 out:
2297 	DTRACE_PROBE2(pmcs__discover__exit, ulong_t, pwp->work_flags,
2298 	    boolean_t, pwp->config_changed);
2299 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL, "Discovery end");
2300 
2301 	mutex_enter(&pwp->config_lock);
2302 
2303 	if (pwp->config_changed == B_FALSE) {
2304 		/*
2305 		 * Observation is stable, report what we currently see to
2306 		 * the tgtmaps for delta processing. Start by setting
2307 		 * BEGIN on all tgtmaps.
2308 		 */
2309 		mutex_exit(&pwp->config_lock);
2310 		if (pmcs_report_observations(pwp) == B_FALSE) {
2311 			goto restart;
2312 		}
2313 		mutex_enter(&pwp->config_lock);
2314 	} else {
2315 		/*
2316 		 * If config_changed is TRUE, we need to reschedule
2317 		 * discovery now.
2318 		 */
2319 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2320 		    "%s: Config has changed, will re-run discovery", __func__);
2321 		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2322 	}
2323 
2324 	pmcs_release_scratch(pwp);
2325 	pwp->configuring = 0;
2326 	mutex_exit(&pwp->config_lock);
2327 
2328 #ifdef DEBUG
2329 	pptr = pmcs_find_phy_needing_work(pwp, pwp->root_phys);
2330 	if (pptr != NULL) {
2331 		if (!WORK_IS_SCHEDULED(pwp, PMCS_WORK_DISCOVER)) {
2332 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
2333 			    "PHY %s dead=%d changed=%d configured=%d "
2334 			    "but no work scheduled", pptr->path, pptr->dead,
2335 			    pptr->changed, pptr->configured);
2336 		}
2337 		pmcs_unlock_phy(pptr);
2338 	}
2339 #endif
2340 
2341 	return;
2342 
2343 restart:
2344 	/* Clean up and restart discovery */
2345 	pmcs_release_scratch(pwp);
2346 	mutex_enter(&pwp->config_lock);
2347 	pwp->configuring = 0;
2348 	RESTART_DISCOVERY_LOCKED(pwp);
2349 	mutex_exit(&pwp->config_lock);
2350 }
2351 
2352 /*
2353  * Return any PHY that needs to have scheduled work done.  The PHY is returned
2354  * locked.
2355  */
2356 static pmcs_phy_t *
2357 pmcs_find_phy_needing_work(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2358 {
2359 	pmcs_phy_t *cphyp, *pnext;
2360 
2361 	while (pptr) {
2362 		pmcs_lock_phy(pptr);
2363 
2364 		if (pptr->changed || (pptr->dead && pptr->valid_device_id)) {
2365 			return (pptr);
2366 		}
2367 
2368 		pnext = pptr->sibling;
2369 
2370 		if (pptr->children) {
2371 			cphyp = pptr->children;
2372 			pmcs_unlock_phy(pptr);
2373 			cphyp = pmcs_find_phy_needing_work(pwp, cphyp);
2374 			if (cphyp) {
2375 				return (cphyp);
2376 			}
2377 		} else {
2378 			pmcs_unlock_phy(pptr);
2379 		}
2380 
2381 		pptr = pnext;
2382 	}
2383 
2384 	return (NULL);
2385 }
2386 
2387 /*
2388  * Report current observations to SCSA.
2389  */
2390 static boolean_t
2391 pmcs_report_observations(pmcs_hw_t *pwp)
2392 {
2393 	pmcs_iport_t		*iport;
2394 	scsi_hba_tgtmap_t	*tgtmap;
2395 	char			*ap;
2396 	pmcs_phy_t		*pptr;
2397 	uint64_t		wwn;
2398 
2399 	/*
2400 	 * Observation is stable, report what we currently see to the tgtmaps
2401 	 * for delta processing. Start by setting BEGIN on all tgtmaps.
2402 	 */
2403 	rw_enter(&pwp->iports_lock, RW_READER);
2404 	for (iport = list_head(&pwp->iports); iport != NULL;
2405 	    iport = list_next(&pwp->iports, iport)) {
2406 		/*
2407 		 * Unless we have at least one phy up, skip this iport.
2408 		 * Note we don't need to lock the iport for report_skip
2409 		 * since it is only used here.  We are doing the skip so that
2410 		 * the phymap and iportmap stabilization times are honored -
2411 		 * giving us the ability to recover port operation within the
2412 		 * stabilization time without unconfiguring targets using the
2413 		 * port.
2414 		 */
2415 		if (!sas_phymap_uahasphys(pwp->hss_phymap, iport->ua)) {
2416 			iport->report_skip = 1;
2417 			continue;		/* skip set_begin */
2418 		}
2419 		iport->report_skip = 0;
2420 
2421 		tgtmap = iport->iss_tgtmap;
2422 		ASSERT(tgtmap);
2423 		if (scsi_hba_tgtmap_set_begin(tgtmap) != DDI_SUCCESS) {
2424 			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL,
2425 			    "%s: cannot set_begin tgtmap ", __func__);
2426 			rw_exit(&pwp->iports_lock);
2427 			return (B_FALSE);
2428 		}
2429 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL,
2430 		    "%s: set begin on tgtmap [0x%p]", __func__, (void *)tgtmap);
2431 	}
2432 	rw_exit(&pwp->iports_lock);
2433 
2434 	/*
2435 	 * Now, cycle through all levels of all phys and report
2436 	 * observations into their respective tgtmaps.
2437 	 */
2438 	pptr = pwp->root_phys;
2439 
2440 	while (pptr) {
2441 		pmcs_lock_phy(pptr);
2442 
2443 		/*
2444 		 * Skip PHYs that have nothing attached or are dead.
2445 		 */
2446 		if ((pptr->dtype == NOTHING) || pptr->dead) {
2447 			pmcs_unlock_phy(pptr);
2448 			pptr = pptr->sibling;
2449 			continue;
2450 		}
2451 
2452 		if (pptr->changed) {
2453 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
2454 			    "%s: oops, PHY %s changed; restart discovery",
2455 			    __func__, pptr->path);
2456 			pmcs_unlock_phy(pptr);
2457 			return (B_FALSE);
2458 		}
2459 
2460 		/*
2461 		 * Get the iport for this root PHY, then call the helper
2462 		 * to report observations for this iport's targets
2463 		 */
2464 		iport = pmcs_get_iport_by_phy(pwp, pptr);
2465 		if (iport == NULL) {
2466 			/* No iport for this tgt */
2467 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2468 			    "%s: no iport for this target", __func__);
2469 			pmcs_unlock_phy(pptr);
2470 			pptr = pptr->sibling;
2471 			continue;
2472 		}
2473 
2474 		if (!iport->report_skip) {
2475 			if (pmcs_report_iport_observations(
2476 			    pwp, iport, pptr) == B_FALSE) {
2477 				pmcs_rele_iport(iport);
2478 				pmcs_unlock_phy(pptr);
2479 				return (B_FALSE);
2480 			}
2481 		}
2482 		pmcs_rele_iport(iport);
2483 		pmcs_unlock_phy(pptr);
2484 		pptr = pptr->sibling;
2485 	}
2486 
2487 	/*
2488 	 * The observation is complete, end sets. Note we will skip any
2489 	 * iports that are active, but have no PHYs in them (i.e. awaiting
2490 	 * unconfigure). Set to restart discovery if we find this.
2491 	 */
2492 	rw_enter(&pwp->iports_lock, RW_READER);
2493 	for (iport = list_head(&pwp->iports);
2494 	    iport != NULL;
2495 	    iport = list_next(&pwp->iports, iport)) {
2496 
2497 		if (iport->report_skip)
2498 			continue;		/* skip set_end */
2499 
2500 		tgtmap = iport->iss_tgtmap;
2501 		ASSERT(tgtmap);
2502 		if (scsi_hba_tgtmap_set_end(tgtmap, 0) != DDI_SUCCESS) {
2503 			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL,
2504 			    "%s: cannot set_end tgtmap ", __func__);
2505 			rw_exit(&pwp->iports_lock);
2506 			return (B_FALSE);
2507 		}
2508 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL,
2509 		    "%s: set end on tgtmap [0x%p]", __func__, (void *)tgtmap);
2510 	}
2511 
2512 	/*
2513 	 * Now that discovery is complete, set up the necessary
2514 	 * DDI properties on each iport node.
2515 	 */
2516 	for (iport = list_head(&pwp->iports); iport != NULL;
2517 	    iport = list_next(&pwp->iports, iport)) {
2518 		/* Set up the 'attached-port' property on the iport */
2519 		ap = kmem_zalloc(PMCS_MAX_UA_SIZE, KM_SLEEP);
2520 		mutex_enter(&iport->lock);
2521 		pptr = iport->pptr;
2522 		mutex_exit(&iport->lock);
2523 		if (pptr == NULL) {
2524 			/*
2525 			 * This iport is down, but has not been
2526 			 * removed from our list (unconfigured).
2527 			 * Set our value to '0'.
2528 			 */
2529 			(void) snprintf(ap, 1, "%s", "0");
2530 		} else {
2531 			/* Otherwise, set it to remote phy's wwn */
2532 			pmcs_lock_phy(pptr);
2533 			wwn = pmcs_barray2wwn(pptr->sas_address);
2534 			(void) scsi_wwn_to_wwnstr(wwn, 1, ap);
2535 			pmcs_unlock_phy(pptr);
2536 		}
2537 		if (ndi_prop_update_string(DDI_DEV_T_NONE, iport->dip,
2538 		    SCSI_ADDR_PROP_ATTACHED_PORT,  ap) != DDI_SUCCESS) {
2539 			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, "%s: Failed "
2540 			    "to set prop ("SCSI_ADDR_PROP_ATTACHED_PORT")",
2541 			    __func__);
2542 		}
2543 		kmem_free(ap, PMCS_MAX_UA_SIZE);
2544 	}
2545 	rw_exit(&pwp->iports_lock);
2546 
2547 	return (B_TRUE);
2548 }
2549 
2550 /*
2551  * Report observations into a particular iport's target map
2552  *
2553  * Called with phyp (and all descendents) locked
2554  */
2555 static boolean_t
2556 pmcs_report_iport_observations(pmcs_hw_t *pwp, pmcs_iport_t *iport,
2557     pmcs_phy_t *phyp)
2558 {
2559 	pmcs_phy_t		*lphyp;
2560 	scsi_hba_tgtmap_t	*tgtmap;
2561 	scsi_tgtmap_tgt_type_t	tgt_type;
2562 	char			*ua;
2563 	uint64_t		wwn;
2564 
2565 	tgtmap = iport->iss_tgtmap;
2566 	ASSERT(tgtmap);
2567 
2568 	lphyp = phyp;
2569 	while (lphyp) {
2570 		switch (lphyp->dtype) {
2571 		default:		/* Skip unknown PHYs. */
2572 			/* for non-root phys, skip to sibling */
2573 			goto next_phy;
2574 
2575 		case SATA:
2576 		case SAS:
2577 			tgt_type = SCSI_TGT_SCSI_DEVICE;
2578 			break;
2579 
2580 		case EXPANDER:
2581 			tgt_type = SCSI_TGT_SMP_DEVICE;
2582 			break;
2583 		}
2584 
2585 		if (lphyp->dead) {
2586 			goto next_phy;
2587 		}
2588 
2589 		wwn = pmcs_barray2wwn(lphyp->sas_address);
2590 		ua = scsi_wwn_to_wwnstr(wwn, 1, NULL);
2591 
2592 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, lphyp, NULL,
2593 		    "iport_observation: adding %s on tgtmap [0x%p] phy [0x%p]",
2594 		    ua, (void *)tgtmap, (void*)lphyp);
2595 
2596 		if (scsi_hba_tgtmap_set_add(tgtmap, tgt_type, ua, NULL) !=
2597 		    DDI_SUCCESS) {
2598 			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,  NULL, NULL,
2599 			    "%s: failed to add address %s", __func__, ua);
2600 			scsi_free_wwnstr(ua);
2601 			return (B_FALSE);
2602 		}
2603 		scsi_free_wwnstr(ua);
2604 
2605 		if (lphyp->children) {
2606 			if (pmcs_report_iport_observations(pwp, iport,
2607 			    lphyp->children) == B_FALSE) {
2608 				return (B_FALSE);
2609 			}
2610 		}
2611 
2612 		/* for non-root phys, report siblings too */
2613 next_phy:
2614 		if (IS_ROOT_PHY(lphyp)) {
2615 			lphyp = NULL;
2616 		} else {
2617 			lphyp = lphyp->sibling;
2618 		}
2619 	}
2620 
2621 	return (B_TRUE);
2622 }
2623 
2624 /*
2625  * Check for and configure new devices.
2626  *
2627  * If the changed device is a SATA device, add a SATA device.
2628  *
2629  * If the changed device is a SAS device, add a SAS device.
2630  *
2631  * If the changed device is an EXPANDER device, do a REPORT
2632  * GENERAL SMP command to find out the number of contained phys.
2633  *
2634  * For each number of contained phys, allocate a phy, do a
2635  * DISCOVERY SMP command to find out what kind of device it
2636  * is and add it to the linked list of phys on the *next* level.
2637  *
2638  * NOTE: pptr passed in by the caller will be a root PHY
2639  */
2640 static int
2641 pmcs_configure_new_devices(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2642 {
2643 	int rval = 0;
2644 	pmcs_iport_t *iport;
2645 	pmcs_phy_t *pnext, *orig_pptr = pptr, *root_phy, *pchild;
2646 
2647 	/*
2648 	 * First, walk through each PHY at this level
2649 	 */
2650 	while (pptr) {
2651 		pmcs_lock_phy(pptr);
2652 		pnext = pptr->sibling;
2653 
2654 		/*
2655 		 * Set the new dtype if it has changed
2656 		 */
2657 		if ((pptr->pend_dtype != NEW) &&
2658 		    (pptr->pend_dtype != pptr->dtype)) {
2659 			pptr->dtype = pptr->pend_dtype;
2660 		}
2661 
2662 		if (pptr->changed == 0 || pptr->dead || pptr->configured) {
2663 			goto next_phy;
2664 		}
2665 
2666 		/*
2667 		 * Confirm that this target's iport is configured
2668 		 */
2669 		root_phy = pmcs_get_root_phy(pptr);
2670 		iport = pmcs_get_iport_by_phy(pwp, root_phy);
2671 		if (iport == NULL) {
2672 			/* No iport for this tgt, restart */
2673 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2674 			    "%s: iport not yet configured, "
2675 			    "retry discovery", __func__);
2676 			pnext = NULL;
2677 			rval = -1;
2678 			goto next_phy;
2679 		}
2680 
2681 		switch (pptr->dtype) {
2682 		case NOTHING:
2683 			pptr->changed = 0;
2684 			break;
2685 		case SATA:
2686 		case SAS:
2687 			pptr->iport = iport;
2688 			pmcs_new_tport(pwp, pptr);
2689 			break;
2690 		case EXPANDER:
2691 			pmcs_configure_expander(pwp, pptr, iport);
2692 			break;
2693 		}
2694 		pmcs_rele_iport(iport);
2695 
2696 		mutex_enter(&pwp->config_lock);
2697 		if (pwp->config_changed) {
2698 			mutex_exit(&pwp->config_lock);
2699 			pnext = NULL;
2700 			goto next_phy;
2701 		}
2702 		mutex_exit(&pwp->config_lock);
2703 
2704 next_phy:
2705 		pmcs_unlock_phy(pptr);
2706 		pptr = pnext;
2707 	}
2708 
2709 	if (rval != 0) {
2710 		return (rval);
2711 	}
2712 
2713 	/*
2714 	 * Now walk through each PHY again, recalling ourselves if they
2715 	 * have children
2716 	 */
2717 	pptr = orig_pptr;
2718 	while (pptr) {
2719 		pmcs_lock_phy(pptr);
2720 		pnext = pptr->sibling;
2721 		pchild = pptr->children;
2722 		pmcs_unlock_phy(pptr);
2723 
2724 		if (pchild) {
2725 			rval = pmcs_configure_new_devices(pwp, pchild);
2726 			if (rval != 0) {
2727 				break;
2728 			}
2729 		}
2730 
2731 		pptr = pnext;
2732 	}
2733 
2734 	return (rval);
2735 }
2736 
2737 /*
2738  * Set all phys and descendent phys as changed if changed == B_TRUE, otherwise
2739  * mark them all as not changed.
2740  *
2741  * Called with parent PHY locked.
2742  */
2743 void
2744 pmcs_set_changed(pmcs_hw_t *pwp, pmcs_phy_t *parent, boolean_t changed,
2745     int level)
2746 {
2747 	pmcs_phy_t *pptr;
2748 
2749 	if (level == 0) {
2750 		if (changed) {
2751 			PHY_CHANGED(pwp, parent);
2752 		} else {
2753 			parent->changed = 0;
2754 		}
2755 		if (parent->dtype == EXPANDER && parent->level) {
2756 			parent->width = 1;
2757 		}
2758 		if (parent->children) {
2759 			pmcs_set_changed(pwp, parent->children, changed,
2760 			    level + 1);
2761 		}
2762 	} else {
2763 		pptr = parent;
2764 		while (pptr) {
2765 			if (changed) {
2766 				PHY_CHANGED(pwp, pptr);
2767 			} else {
2768 				pptr->changed = 0;
2769 			}
2770 			if (pptr->dtype == EXPANDER && pptr->level) {
2771 				pptr->width = 1;
2772 			}
2773 			if (pptr->children) {
2774 				pmcs_set_changed(pwp, pptr->children, changed,
2775 				    level + 1);
2776 			}
2777 			pptr = pptr->sibling;
2778 		}
2779 	}
2780 }
2781 
2782 /*
2783  * Take the passed phy mark it and its descendants as dead.
2784  * Fire up reconfiguration to abort commands and bury it.
2785  *
2786  * Called with the parent PHY locked.
2787  */
2788 void
2789 pmcs_kill_changed(pmcs_hw_t *pwp, pmcs_phy_t *parent, int level)
2790 {
2791 	pmcs_phy_t *pptr = parent;
2792 
2793 	while (pptr) {
2794 		pptr->link_rate = 0;
2795 		pptr->abort_sent = 0;
2796 		pptr->abort_pending = 1;
2797 		SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
2798 		pptr->need_rl_ext = 0;
2799 
2800 		if (pptr->dead == 0) {
2801 			PHY_CHANGED(pwp, pptr);
2802 			RESTART_DISCOVERY(pwp);
2803 		}
2804 
2805 		pptr->dead = 1;
2806 
2807 		if (pptr->children) {
2808 			pmcs_kill_changed(pwp, pptr->children, level + 1);
2809 		}
2810 
2811 		/*
2812 		 * Only kill siblings at level > 0
2813 		 */
2814 		if (level == 0) {
2815 			return;
2816 		}
2817 
2818 		pptr = pptr->sibling;
2819 	}
2820 }
2821 
2822 /*
2823  * Go through every PHY and clear any that are dead (unless they're expanders)
2824  */
2825 static void
2826 pmcs_clear_phys(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2827 {
2828 	pmcs_phy_t *pnext, *phyp;
2829 
2830 	phyp = pptr;
2831 	while (phyp) {
2832 		if (IS_ROOT_PHY(phyp)) {
2833 			pmcs_lock_phy(phyp);
2834 		}
2835 
2836 		if ((phyp->dtype != EXPANDER) && phyp->dead) {
2837 			pmcs_clear_phy(pwp, phyp);
2838 		}
2839 
2840 		if (phyp->children) {
2841 			pmcs_clear_phys(pwp, phyp->children);
2842 		}
2843 
2844 		pnext = phyp->sibling;
2845 
2846 		if (IS_ROOT_PHY(phyp)) {
2847 			pmcs_unlock_phy(phyp);
2848 		}
2849 
2850 		phyp = pnext;
2851 	}
2852 }
2853 
2854 /*
2855  * Clear volatile parts of a phy.  Called with PHY locked.
2856  */
2857 void
2858 pmcs_clear_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2859 {
2860 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "%s: %s",
2861 	    __func__, pptr->path);
2862 	ASSERT(mutex_owned(&pptr->phy_lock));
2863 	/* keep sibling */
2864 	/* keep children */
2865 	/* keep parent */
2866 	pptr->device_id = PMCS_INVALID_DEVICE_ID;
2867 	/* keep hw_event_ack */
2868 	pptr->ncphy = 0;
2869 	/* keep phynum */
2870 	pptr->width = 0;
2871 	pptr->ds_recovery_retries = 0;
2872 	pptr->ds_prev_good_recoveries = 0;
2873 	pptr->last_good_recovery = 0;
2874 	pptr->prev_recovery = 0;
2875 
2876 	/* keep dtype */
2877 	pptr->config_stop = 0;
2878 	pptr->spinup_hold = 0;
2879 	pptr->atdt = 0;
2880 	/* keep portid */
2881 	pptr->link_rate = 0;
2882 	pptr->valid_device_id = 0;
2883 	pptr->abort_sent = 0;
2884 	pptr->abort_pending = 0;
2885 	pptr->need_rl_ext = 0;
2886 	pptr->subsidiary = 0;
2887 	pptr->configured = 0;
2888 	/* Only mark dead if it's not a root PHY and its dtype isn't NOTHING */
2889 	/* XXX: What about directly attached disks? */
2890 	if (!IS_ROOT_PHY(pptr) && (pptr->dtype != NOTHING))
2891 		pptr->dead = 1;
2892 	pptr->changed = 0;
2893 	/* keep SAS address */
2894 	/* keep path */
2895 	/* keep ref_count */
2896 	/* Don't clear iport on root PHYs - they are handled in pmcs_intr.c */
2897 	if (!IS_ROOT_PHY(pptr)) {
2898 		pptr->iport = NULL;
2899 	}
2900 	/* keep target */
2901 }
2902 
2903 /*
2904  * Allocate softstate for this target if there isn't already one.  If there
2905  * is, just redo our internal configuration.  If it is actually "new", we'll
2906  * soon get a tran_tgt_init for it.
2907  *
2908  * Called with PHY locked.
2909  */
2910 static void
2911 pmcs_new_tport(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2912 {
2913 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "%s: phy 0x%p @ %s",
2914 	    __func__, (void *)pptr, pptr->path);
2915 
2916 	if (pmcs_configure_phy(pwp, pptr) == B_FALSE) {
2917 		/*
2918 		 * If the config failed, mark the PHY as changed.
2919 		 */
2920 		PHY_CHANGED(pwp, pptr);
2921 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
2922 		    "%s: pmcs_configure_phy failed for phy 0x%p", __func__,
2923 		    (void *)pptr);
2924 		return;
2925 	}
2926 
2927 	/* Mark PHY as no longer changed */
2928 	pptr->changed = 0;
2929 
2930 	/*
2931 	 * If the PHY has no target pointer, see if there's a dead PHY that
2932 	 * matches.
2933 	 */
2934 	if (pptr->target == NULL) {
2935 		pmcs_reap_dead_phy(pptr);
2936 	}
2937 
2938 	/*
2939 	 * Only assign the device if there is a target for this PHY with a
2940 	 * matching SAS address.  If an iport is disconnected from one piece
2941 	 * of storage and connected to another within the iport stabilization
2942 	 * time, we can get the PHY/target mismatch situation.
2943 	 *
2944 	 * Otherwise, it'll get done in tran_tgt_init.
2945 	 */
2946 	if (pptr->target) {
2947 		mutex_enter(&pptr->target->statlock);
2948 		if (pmcs_phy_target_match(pptr) == B_FALSE) {
2949 			mutex_exit(&pptr->target->statlock);
2950 			if (!IS_ROOT_PHY(pptr)) {
2951 				pmcs_dec_phy_ref_count(pptr);
2952 			}
2953 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
2954 			    "%s: Not assigning existing tgt %p for PHY %p "
2955 			    "(WWN mismatch)", __func__, (void *)pptr->target,
2956 			    (void *)pptr);
2957 			pptr->target = NULL;
2958 			return;
2959 		}
2960 
2961 		if (!pmcs_assign_device(pwp, pptr->target)) {
2962 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, pptr->target,
2963 			    "%s: pmcs_assign_device failed for target 0x%p",
2964 			    __func__, (void *)pptr->target);
2965 		}
2966 		mutex_exit(&pptr->target->statlock);
2967 	}
2968 }
2969 
2970 /*
2971  * Called with PHY lock held.
2972  */
2973 static boolean_t
2974 pmcs_configure_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2975 {
2976 	char *dtype;
2977 
2978 	ASSERT(mutex_owned(&pptr->phy_lock));
2979 
2980 	/*
2981 	 * Mark this device as no longer changed.
2982 	 */
2983 	pptr->changed = 0;
2984 
2985 	/*
2986 	 * If we don't have a device handle, get one.
2987 	 */
2988 	if (pmcs_get_device_handle(pwp, pptr)) {
2989 		return (B_FALSE);
2990 	}
2991 
2992 	pptr->configured = 1;
2993 
2994 	switch (pptr->dtype) {
2995 	case SAS:
2996 		dtype = "SAS";
2997 		break;
2998 	case SATA:
2999 		dtype = "SATA";
3000 		break;
3001 	case EXPANDER:
3002 		dtype = "SMP";
3003 		break;
3004 	default:
3005 		dtype = "???";
3006 	}
3007 
3008 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "config_dev: %s "
3009 	    "dev %s " SAS_ADDR_FMT " dev id 0x%x lr 0x%x", dtype, pptr->path,
3010 	    SAS_ADDR_PRT(pptr->sas_address), pptr->device_id, pptr->link_rate);
3011 
3012 	return (B_TRUE);
3013 }
3014 
3015 /*
3016  * Called with PHY locked
3017  */
3018 static void
3019 pmcs_configure_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr, pmcs_iport_t *iport)
3020 {
3021 	pmcs_phy_t *ctmp, *clist = NULL, *cnext;
3022 	int result, i, nphy = 0;
3023 	boolean_t root_phy = B_FALSE;
3024 
3025 	ASSERT(iport);
3026 
3027 	/*
3028 	 * Step 1- clear our "changed" bit. If we need to retry/restart due
3029 	 * to resource shortages, we'll set it again. While we're doing
3030 	 * configuration, other events may set it again as well.  If the PHY
3031 	 * is a root PHY and is currently marked as having changed, reset the
3032 	 * config_stop timer as well.
3033 	 */
3034 	if (IS_ROOT_PHY(pptr) && pptr->changed) {
3035 		pptr->config_stop = ddi_get_lbolt() +
3036 		    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3037 	}
3038 	pptr->changed = 0;
3039 
3040 	/*
3041 	 * Step 2- make sure we don't overflow
3042 	 */
3043 	if (pptr->level == PMCS_MAX_XPND-1) {
3044 		pmcs_prt(pwp, PMCS_PRT_WARN, pptr, NULL,
3045 		    "%s: SAS expansion tree too deep", __func__);
3046 		return;
3047 	}
3048 
3049 	/*
3050 	 * Step 3- Check if this expander is part of a wide phy that has
3051 	 * already been configured.
3052 	 *
3053 	 * This is known by checking this level for another EXPANDER device
3054 	 * with the same SAS address and isn't already marked as a subsidiary
3055 	 * phy and a parent whose SAS address is the same as our SAS address
3056 	 * (if there are parents).
3057 	 */
3058 	if (!IS_ROOT_PHY(pptr)) {
3059 		/*
3060 		 * No need to lock the parent here because we're in discovery
3061 		 * and the only time a PHY's children pointer can change is
3062 		 * in discovery; either in pmcs_clear_expander (which has
3063 		 * already been called) or here, down below.  Plus, trying to
3064 		 * grab the parent's lock here can cause deadlock.
3065 		 */
3066 		ctmp = pptr->parent->children;
3067 	} else {
3068 		ctmp = pwp->root_phys;
3069 		root_phy = B_TRUE;
3070 	}
3071 
3072 	while (ctmp) {
3073 		/*
3074 		 * If we've checked all PHYs up to pptr, we stop. Otherwise,
3075 		 * we'll be checking for a primary PHY with a higher PHY
3076 		 * number than pptr, which will never happen.  The primary
3077 		 * PHY on non-root expanders will ALWAYS be the lowest
3078 		 * numbered PHY.
3079 		 */
3080 		if (ctmp == pptr) {
3081 			break;
3082 		}
3083 
3084 		/*
3085 		 * If pptr and ctmp are root PHYs, just grab the mutex on
3086 		 * ctmp.  No need to lock the entire tree.  If they are not
3087 		 * root PHYs, there is no need to lock since a non-root PHY's
3088 		 * SAS address and other characteristics can only change in
3089 		 * discovery anyway.
3090 		 */
3091 		if (root_phy) {
3092 			mutex_enter(&ctmp->phy_lock);
3093 		}
3094 
3095 		if (ctmp->dtype == EXPANDER && ctmp->width &&
3096 		    memcmp(ctmp->sas_address, pptr->sas_address, 8) == 0) {
3097 			int widephy = 0;
3098 			/*
3099 			 * If these phys are not root PHYs, compare their SAS
3100 			 * addresses too.
3101 			 */
3102 			if (!root_phy) {
3103 				if (memcmp(ctmp->parent->sas_address,
3104 				    pptr->parent->sas_address, 8) == 0) {
3105 					widephy = 1;
3106 				}
3107 			} else {
3108 				widephy = 1;
3109 			}
3110 			if (widephy) {
3111 				ctmp->width++;
3112 				pptr->subsidiary = 1;
3113 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3114 				    "%s: PHY %s part of wide PHY %s "
3115 				    "(now %d wide)", __func__, pptr->path,
3116 				    ctmp->path, ctmp->width);
3117 				if (root_phy) {
3118 					mutex_exit(&ctmp->phy_lock);
3119 				}
3120 				return;
3121 			}
3122 		}
3123 
3124 		cnext = ctmp->sibling;
3125 		if (root_phy) {
3126 			mutex_exit(&ctmp->phy_lock);
3127 		}
3128 		ctmp = cnext;
3129 	}
3130 
3131 	/*
3132 	 * Step 4- If we don't have a device handle, get one.  Since this
3133 	 * is the primary PHY, make sure subsidiary is cleared.
3134 	 */
3135 	pptr->subsidiary = 0;
3136 	if (pmcs_get_device_handle(pwp, pptr)) {
3137 		goto out;
3138 	}
3139 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "Config expander %s "
3140 	    SAS_ADDR_FMT " dev id 0x%x lr 0x%x", pptr->path,
3141 	    SAS_ADDR_PRT(pptr->sas_address), pptr->device_id, pptr->link_rate);
3142 
3143 	/*
3144 	 * Step 5- figure out how many phys are in this expander.
3145 	 */
3146 	nphy = pmcs_expander_get_nphy(pwp, pptr);
3147 	if (nphy <= 0) {
3148 		if (nphy == 0 && ddi_get_lbolt() < pptr->config_stop) {
3149 			PHY_CHANGED(pwp, pptr);
3150 			RESTART_DISCOVERY(pwp);
3151 		} else {
3152 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3153 			    "%s: Retries exhausted for %s, killing", __func__,
3154 			    pptr->path);
3155 			pptr->config_stop = 0;
3156 			pmcs_kill_changed(pwp, pptr, 0);
3157 		}
3158 		goto out;
3159 	}
3160 
3161 	/*
3162 	 * Step 6- Allocate a list of phys for this expander and figure out
3163 	 * what each one is.
3164 	 */
3165 	for (i = 0; i < nphy; i++) {
3166 		ctmp = kmem_cache_alloc(pwp->phy_cache, KM_SLEEP);
3167 		bzero(ctmp, sizeof (pmcs_phy_t));
3168 		ctmp->device_id = PMCS_INVALID_DEVICE_ID;
3169 		ctmp->sibling = clist;
3170 		ctmp->pend_dtype = NEW;	/* Init pending dtype */
3171 		ctmp->config_stop = ddi_get_lbolt() +
3172 		    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3173 		clist = ctmp;
3174 	}
3175 
3176 	mutex_enter(&pwp->config_lock);
3177 	if (pwp->config_changed) {
3178 		RESTART_DISCOVERY_LOCKED(pwp);
3179 		mutex_exit(&pwp->config_lock);
3180 		/*
3181 		 * Clean up the newly allocated PHYs and return
3182 		 */
3183 		while (clist) {
3184 			ctmp = clist->sibling;
3185 			kmem_cache_free(pwp->phy_cache, clist);
3186 			clist = ctmp;
3187 		}
3188 		return;
3189 	}
3190 	mutex_exit(&pwp->config_lock);
3191 
3192 	/*
3193 	 * Step 7- Now fill in the rest of the static portions of the phy.
3194 	 */
3195 	for (i = 0, ctmp = clist; ctmp; ctmp = ctmp->sibling, i++) {
3196 		ctmp->parent = pptr;
3197 		ctmp->pwp = pwp;
3198 		ctmp->level = pptr->level+1;
3199 		ctmp->portid = pptr->portid;
3200 		if (ctmp->tolerates_sas2) {
3201 			ASSERT(i < SAS2_PHYNUM_MAX);
3202 			ctmp->phynum = i & SAS2_PHYNUM_MASK;
3203 		} else {
3204 			ASSERT(i < SAS_PHYNUM_MAX);
3205 			ctmp->phynum = i & SAS_PHYNUM_MASK;
3206 		}
3207 		pmcs_phy_name(pwp, ctmp, ctmp->path, sizeof (ctmp->path));
3208 		pmcs_lock_phy(ctmp);
3209 	}
3210 
3211 	/*
3212 	 * Step 8- Discover things about each phy in the expander.
3213 	 */
3214 	for (i = 0, ctmp = clist; ctmp; ctmp = ctmp->sibling, i++) {
3215 		result = pmcs_expander_content_discover(pwp, pptr, ctmp);
3216 		if (result <= 0) {
3217 			if (ddi_get_lbolt() < pptr->config_stop) {
3218 				PHY_CHANGED(pwp, pptr);
3219 				RESTART_DISCOVERY(pwp);
3220 			} else {
3221 				pptr->config_stop = 0;
3222 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3223 				    "%s: Retries exhausted for %s, killing",
3224 				    __func__, pptr->path);
3225 				pmcs_kill_changed(pwp, pptr, 0);
3226 			}
3227 			goto out;
3228 		}
3229 
3230 		/* Set pend_dtype to dtype for 1st time initialization */
3231 		ctmp->pend_dtype = ctmp->dtype;
3232 	}
3233 
3234 	/*
3235 	 * Step 9- Install the new list on the next level. There should be
3236 	 * no children pointer on this PHY.  If there is, we'd need to know
3237 	 * how it happened (The expander suddenly got more PHYs?).
3238 	 */
3239 	ASSERT(pptr->children == NULL);
3240 	if (pptr->children != NULL) {
3241 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL, "%s: Already child "
3242 		    "PHYs attached  to PHY %s: This should never happen",
3243 		    __func__, pptr->path);
3244 		goto out;
3245 	} else {
3246 		pptr->children = clist;
3247 	}
3248 
3249 	clist = NULL;
3250 	pptr->ncphy = nphy;
3251 	pptr->configured = 1;
3252 
3253 	/*
3254 	 * We only set width if we're greater than level 0.
3255 	 */
3256 	if (pptr->level) {
3257 		pptr->width = 1;
3258 	}
3259 
3260 	/*
3261 	 * Now tell the rest of the world about us, as an SMP node.
3262 	 */
3263 	pptr->iport = iport;
3264 	pmcs_new_tport(pwp, pptr);
3265 
3266 out:
3267 	while (clist) {
3268 		ctmp = clist->sibling;
3269 		pmcs_unlock_phy(clist);
3270 		kmem_cache_free(pwp->phy_cache, clist);
3271 		clist = ctmp;
3272 	}
3273 }
3274 
3275 /*
3276  * 2. Check expanders marked changed (but not dead) to see if they still have
3277  * the same number of phys and the same SAS address. Mark them, their subsidiary
3278  * phys (if wide) and their descendents dead if anything has changed. Check the
3279  * the devices they contain to see if *they* have changed. If they've changed
3280  * from type NOTHING we leave them marked changed to be configured later
3281  * (picking up a new SAS address and link rate if possible). Otherwise, any
3282  * change in type, SAS address or removal of target role will cause us to
3283  * mark them (and their descendents) as dead and cause any pending commands
3284  * and associated devices to be removed.
3285  *
3286  * Called with PHY (pptr) locked.
3287  */
3288 
3289 static void
3290 pmcs_check_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3291 {
3292 	int nphy, result;
3293 	pmcs_phy_t *ctmp, *local, *local_list = NULL, *local_tail = NULL;
3294 	boolean_t kill_changed, changed;
3295 
3296 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3297 	    "%s: check %s", __func__, pptr->path);
3298 
3299 	/*
3300 	 * Step 1: Mark phy as not changed. We will mark it changed if we need
3301 	 * to retry.
3302 	 */
3303 	pptr->changed = 0;
3304 
3305 	/*
3306 	 * Reset the config_stop time. Although we're not actually configuring
3307 	 * anything here, we do want some indication of when to give up trying
3308 	 * if we can't communicate with the expander.
3309 	 */
3310 	pptr->config_stop = ddi_get_lbolt() +
3311 	    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3312 
3313 	/*
3314 	 * Step 2: Figure out how many phys are in this expander. If
3315 	 * pmcs_expander_get_nphy returns 0 we ran out of resources,
3316 	 * so reschedule and try later. If it returns another error,
3317 	 * just return.
3318 	 */
3319 	nphy = pmcs_expander_get_nphy(pwp, pptr);
3320 	if (nphy <= 0) {
3321 		if ((nphy == 0) && (ddi_get_lbolt() < pptr->config_stop)) {
3322 			PHY_CHANGED(pwp, pptr);
3323 			RESTART_DISCOVERY(pwp);
3324 		} else {
3325 			pptr->config_stop = 0;
3326 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3327 			    "%s: Retries exhausted for %s, killing", __func__,
3328 			    pptr->path);
3329 			pmcs_kill_changed(pwp, pptr, 0);
3330 		}
3331 		return;
3332 	}
3333 
3334 	/*
3335 	 * Step 3: If the number of phys don't agree, kill the old sub-tree.
3336 	 */
3337 	if (nphy != pptr->ncphy) {
3338 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3339 		    "%s: number of contained phys for %s changed from %d to %d",
3340 		    __func__, pptr->path, pptr->ncphy, nphy);
3341 		/*
3342 		 * Force a rescan of this expander after dead contents
3343 		 * are cleared and removed.
3344 		 */
3345 		pmcs_kill_changed(pwp, pptr, 0);
3346 		return;
3347 	}
3348 
3349 	/*
3350 	 * Step 4: if we're at the bottom of the stack, we're done
3351 	 * (we can't have any levels below us)
3352 	 */
3353 	if (pptr->level == PMCS_MAX_XPND-1) {
3354 		return;
3355 	}
3356 
3357 	/*
3358 	 * Step 5: Discover things about each phy in this expander.  We do
3359 	 * this by walking the current list of contained phys and doing a
3360 	 * content discovery for it to a local phy.
3361 	 */
3362 	ctmp = pptr->children;
3363 	ASSERT(ctmp);
3364 	if (ctmp == NULL) {
3365 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3366 		    "%s: No children attached to expander @ %s?", __func__,
3367 		    pptr->path);
3368 		return;
3369 	}
3370 
3371 	while (ctmp) {
3372 		/*
3373 		 * Allocate a local PHY to contain the proposed new contents
3374 		 * and link it to the rest of the local PHYs so that they
3375 		 * can all be freed later.
3376 		 */
3377 		local = pmcs_clone_phy(ctmp);
3378 
3379 		if (local_list == NULL) {
3380 			local_list = local;
3381 			local_tail = local;
3382 		} else {
3383 			local_tail->sibling = local;
3384 			local_tail = local;
3385 		}
3386 
3387 		/*
3388 		 * Need to lock the local PHY since pmcs_expander_content_
3389 		 * discovery may call pmcs_clear_phy on it, which expects
3390 		 * the PHY to be locked.
3391 		 */
3392 		pmcs_lock_phy(local);
3393 		result = pmcs_expander_content_discover(pwp, pptr, local);
3394 		pmcs_unlock_phy(local);
3395 		if (result <= 0) {
3396 			if (ddi_get_lbolt() < pptr->config_stop) {
3397 				PHY_CHANGED(pwp, pptr);
3398 				RESTART_DISCOVERY(pwp);
3399 			} else {
3400 				pptr->config_stop = 0;
3401 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3402 				    "%s: Retries exhausted for %s, killing",
3403 				    __func__, pptr->path);
3404 				pmcs_kill_changed(pwp, pptr, 0);
3405 			}
3406 
3407 			/*
3408 			 * Release all the local PHYs that we allocated.
3409 			 */
3410 			pmcs_free_phys(pwp, local_list);
3411 			return;
3412 		}
3413 
3414 		ctmp = ctmp->sibling;
3415 	}
3416 
3417 	/*
3418 	 * Step 6: Compare the local PHY's contents to our current PHY.  If
3419 	 * there are changes, take the appropriate action.
3420 	 * This is done in two steps (step 5 above, and 6 here) so that if we
3421 	 * have to bail during this process (e.g. pmcs_expander_content_discover
3422 	 * fails), we haven't actually changed the state of any of the real
3423 	 * PHYs.  Next time we come through here, we'll be starting over from
3424 	 * scratch.  This keeps us from marking a changed PHY as no longer
3425 	 * changed, but then having to bail only to come back next time and
3426 	 * think that the PHY hadn't changed.  If this were to happen, we
3427 	 * would fail to properly configure the device behind this PHY.
3428 	 */
3429 	local = local_list;
3430 	ctmp = pptr->children;
3431 
3432 	while (ctmp) {
3433 		changed = B_FALSE;
3434 		kill_changed = B_FALSE;
3435 
3436 		/*
3437 		 * We set local to local_list prior to this loop so that we
3438 		 * can simply walk the local_list while we walk this list.  The
3439 		 * two lists should be completely in sync.
3440 		 *
3441 		 * Clear the changed flag here.
3442 		 */
3443 		ctmp->changed = 0;
3444 
3445 		if (ctmp->dtype != local->dtype) {
3446 			if (ctmp->dtype != NOTHING) {
3447 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL,
3448 				    "%s: %s type changed from %s to %s "
3449 				    "(killing)", __func__, ctmp->path,
3450 				    PHY_TYPE(ctmp), PHY_TYPE(local));
3451 				/*
3452 				 * Force a rescan of this expander after dead
3453 				 * contents are cleared and removed.
3454 				 */
3455 				changed = B_TRUE;
3456 				kill_changed = B_TRUE;
3457 			} else {
3458 				changed = B_TRUE;
3459 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL,
3460 				    "%s: %s type changed from NOTHING to %s",
3461 				    __func__, ctmp->path, PHY_TYPE(local));
3462 			}
3463 
3464 		} else if (ctmp->atdt != local->atdt) {
3465 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL, "%s: "
3466 			    "%s attached device type changed from %d to %d "
3467 			    "(killing)", __func__, ctmp->path, ctmp->atdt,
3468 			    local->atdt);
3469 			/*
3470 			 * Force a rescan of this expander after dead
3471 			 * contents are cleared and removed.
3472 			 */
3473 			changed = B_TRUE;
3474 
3475 			if (local->atdt == 0) {
3476 				kill_changed = B_TRUE;
3477 			}
3478 		} else if (ctmp->link_rate != local->link_rate) {
3479 			pmcs_prt(pwp, PMCS_PRT_INFO, ctmp, NULL, "%s: %s "
3480 			    "changed speed from %s to %s", __func__, ctmp->path,
3481 			    pmcs_get_rate(ctmp->link_rate),
3482 			    pmcs_get_rate(local->link_rate));
3483 			/* If the speed changed from invalid, force rescan */
3484 			if (!PMCS_VALID_LINK_RATE(ctmp->link_rate)) {
3485 				changed = B_TRUE;
3486 				RESTART_DISCOVERY(pwp);
3487 			} else {
3488 				/* Just update to the new link rate */
3489 				ctmp->link_rate = local->link_rate;
3490 			}
3491 
3492 			if (!PMCS_VALID_LINK_RATE(local->link_rate)) {
3493 				kill_changed = B_TRUE;
3494 			}
3495 		} else if (memcmp(ctmp->sas_address, local->sas_address,
3496 		    sizeof (ctmp->sas_address)) != 0) {
3497 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL,
3498 			    "%s: SAS Addr for %s changed from " SAS_ADDR_FMT
3499 			    "to " SAS_ADDR_FMT " (kill old tree)", __func__,
3500 			    ctmp->path, SAS_ADDR_PRT(ctmp->sas_address),
3501 			    SAS_ADDR_PRT(local->sas_address));
3502 			/*
3503 			 * Force a rescan of this expander after dead
3504 			 * contents are cleared and removed.
3505 			 */
3506 			changed = B_TRUE;
3507 		} else {
3508 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL,
3509 			    "%s: %s looks the same (type %s)",
3510 			    __func__, ctmp->path, PHY_TYPE(ctmp));
3511 			/*
3512 			 * If EXPANDER, still mark it changed so we
3513 			 * re-evaluate its contents.  If it's not an expander,
3514 			 * but it hasn't been configured, also mark it as
3515 			 * changed so that it will undergo configuration.
3516 			 */
3517 			if (ctmp->dtype == EXPANDER) {
3518 				changed = B_TRUE;
3519 			} else if ((ctmp->dtype != NOTHING) &&
3520 			    !ctmp->configured) {
3521 				ctmp->changed = 1;
3522 			} else {
3523 				/* It simply hasn't changed */
3524 				ctmp->changed = 0;
3525 			}
3526 		}
3527 
3528 		/*
3529 		 * If the PHY changed, call pmcs_kill_changed if indicated,
3530 		 * update its contents to reflect its current state and mark it
3531 		 * as changed.
3532 		 */
3533 		if (changed) {
3534 			/*
3535 			 * pmcs_kill_changed will mark the PHY as changed, so
3536 			 * only do PHY_CHANGED if we did not do kill_changed.
3537 			 */
3538 			if (kill_changed) {
3539 				pmcs_kill_changed(pwp, ctmp, 0);
3540 			} else {
3541 				/*
3542 				 * If we're not killing the device, it's not
3543 				 * dead.  Mark the PHY as changed.
3544 				 */
3545 				PHY_CHANGED(pwp, ctmp);
3546 
3547 				if (ctmp->dead) {
3548 					pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3549 					    ctmp, NULL, "%s: Unmarking PHY %s "
3550 					    "dead, restarting discovery",
3551 					    __func__, ctmp->path);
3552 					ctmp->dead = 0;
3553 					RESTART_DISCOVERY(pwp);
3554 				}
3555 			}
3556 
3557 			/*
3558 			 * If the dtype of this PHY is now NOTHING, mark it as
3559 			 * unconfigured.  Set pend_dtype to what the new dtype
3560 			 * is.  It'll get updated at the end of the discovery
3561 			 * process.
3562 			 */
3563 			if (local->dtype == NOTHING) {
3564 				bzero(ctmp->sas_address,
3565 				    sizeof (local->sas_address));
3566 				ctmp->atdt = 0;
3567 				ctmp->link_rate = 0;
3568 				ctmp->pend_dtype = NOTHING;
3569 				ctmp->configured = 0;
3570 			} else {
3571 				(void) memcpy(ctmp->sas_address,
3572 				    local->sas_address,
3573 				    sizeof (local->sas_address));
3574 				ctmp->atdt = local->atdt;
3575 				ctmp->link_rate = local->link_rate;
3576 				ctmp->pend_dtype = local->dtype;
3577 			}
3578 		}
3579 
3580 		local = local->sibling;
3581 		ctmp = ctmp->sibling;
3582 	}
3583 
3584 	/*
3585 	 * If we got to here, that means we were able to see all the PHYs
3586 	 * and we can now update all of the real PHYs with the information
3587 	 * we got on the local PHYs.  Once that's done, free all the local
3588 	 * PHYs.
3589 	 */
3590 
3591 	pmcs_free_phys(pwp, local_list);
3592 }
3593 
3594 /*
3595  * Top level routine to check expanders.  We call pmcs_check_expander for
3596  * each expander.  Since we're not doing any configuration right now, it
3597  * doesn't matter if this is breadth-first.
3598  */
3599 static boolean_t
3600 pmcs_check_expanders(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3601 {
3602 	pmcs_phy_t *phyp, *pnext, *pchild;
3603 	boolean_t config_changed = B_FALSE;
3604 
3605 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3606 	    "%s: %s", __func__, pptr->path);
3607 
3608 	/*
3609 	 * Check each expander at this level
3610 	 */
3611 	phyp = pptr;
3612 	while (phyp && !config_changed) {
3613 		pmcs_lock_phy(phyp);
3614 
3615 		if ((phyp->dtype == EXPANDER) && phyp->changed &&
3616 		    !phyp->dead && !phyp->subsidiary &&
3617 		    phyp->configured) {
3618 			pmcs_check_expander(pwp, phyp);
3619 		}
3620 
3621 		pnext = phyp->sibling;
3622 		pmcs_unlock_phy(phyp);
3623 
3624 		mutex_enter(&pwp->config_lock);
3625 		config_changed = pwp->config_changed;
3626 		mutex_exit(&pwp->config_lock);
3627 
3628 		phyp = pnext;
3629 	}
3630 
3631 	if (config_changed) {
3632 		return (config_changed);
3633 	}
3634 
3635 	/*
3636 	 * Now check the children
3637 	 */
3638 	phyp = pptr;
3639 	while (phyp && !config_changed) {
3640 		pmcs_lock_phy(phyp);
3641 		pnext = phyp->sibling;
3642 		pchild = phyp->children;
3643 		pmcs_unlock_phy(phyp);
3644 
3645 		if (pchild) {
3646 			(void) pmcs_check_expanders(pwp, pchild);
3647 		}
3648 
3649 		mutex_enter(&pwp->config_lock);
3650 		config_changed = pwp->config_changed;
3651 		mutex_exit(&pwp->config_lock);
3652 
3653 		phyp = pnext;
3654 	}
3655 
3656 	/*
3657 	 * We're done
3658 	 */
3659 	return (config_changed);
3660 }
3661 
3662 /*
3663  * Called with softstate and PHY locked
3664  */
3665 static void
3666 pmcs_clear_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr, int level)
3667 {
3668 	pmcs_phy_t *ctmp;
3669 
3670 	ASSERT(mutex_owned(&pwp->lock));
3671 	ASSERT(mutex_owned(&pptr->phy_lock));
3672 	ASSERT(pptr->level < PMCS_MAX_XPND - 1);
3673 
3674 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3675 	    "%s: checking %s", __func__, pptr->path);
3676 
3677 	ctmp = pptr->children;
3678 	while (ctmp) {
3679 		/*
3680 		 * If the expander is dead, mark its children dead
3681 		 */
3682 		if (pptr->dead) {
3683 			ctmp->dead = 1;
3684 		}
3685 		if (ctmp->dtype == EXPANDER) {
3686 			pmcs_clear_expander(pwp, ctmp, level + 1);
3687 		}
3688 		ctmp = ctmp->sibling;
3689 	}
3690 
3691 	/*
3692 	 * If this expander is not dead, we're done here.
3693 	 */
3694 	if (!pptr->dead) {
3695 		return;
3696 	}
3697 
3698 	/*
3699 	 * Now snip out the list of children below us and release them
3700 	 */
3701 	ctmp = pptr->children;
3702 	while (ctmp) {
3703 		pmcs_phy_t *nxt = ctmp->sibling;
3704 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL,
3705 		    "%s: dead PHY 0x%p (%s) (ref_count %d)", __func__,
3706 		    (void *)ctmp, ctmp->path, ctmp->ref_count);
3707 		/*
3708 		 * Put this PHY on the dead PHY list for the watchdog to
3709 		 * clean up after any outstanding work has completed.
3710 		 */
3711 		mutex_enter(&pwp->dead_phylist_lock);
3712 		ctmp->dead_next = pwp->dead_phys;
3713 		pwp->dead_phys = ctmp;
3714 		mutex_exit(&pwp->dead_phylist_lock);
3715 		pmcs_unlock_phy(ctmp);
3716 		ctmp = nxt;
3717 	}
3718 
3719 	pptr->children = NULL;
3720 
3721 	/*
3722 	 * Clear subsidiary phys as well.  Getting the parent's PHY lock
3723 	 * is only necessary if level == 0 since otherwise the parent is
3724 	 * already locked.
3725 	 */
3726 	if (!IS_ROOT_PHY(pptr)) {
3727 		if (level == 0) {
3728 			mutex_enter(&pptr->parent->phy_lock);
3729 		}
3730 		ctmp = pptr->parent->children;
3731 		if (level == 0) {
3732 			mutex_exit(&pptr->parent->phy_lock);
3733 		}
3734 	} else {
3735 		ctmp = pwp->root_phys;
3736 	}
3737 
3738 	while (ctmp) {
3739 		if (ctmp == pptr) {
3740 			ctmp = ctmp->sibling;
3741 			continue;
3742 		}
3743 		/*
3744 		 * We only need to lock subsidiary PHYs on the level 0
3745 		 * expander.  Any children of that expander, subsidiaries or
3746 		 * not, will already be locked.
3747 		 */
3748 		if (level == 0) {
3749 			pmcs_lock_phy(ctmp);
3750 		}
3751 		if (ctmp->dtype != EXPANDER || ctmp->subsidiary == 0 ||
3752 		    memcmp(ctmp->sas_address, pptr->sas_address,
3753 		    sizeof (ctmp->sas_address)) != 0) {
3754 			if (level == 0) {
3755 				pmcs_unlock_phy(ctmp);
3756 			}
3757 			ctmp = ctmp->sibling;
3758 			continue;
3759 		}
3760 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL,
3761 		    "%s: subsidiary %s", __func__, ctmp->path);
3762 		pmcs_clear_phy(pwp, ctmp);
3763 		if (level == 0) {
3764 			pmcs_unlock_phy(ctmp);
3765 		}
3766 		ctmp = ctmp->sibling;
3767 	}
3768 
3769 	pmcs_clear_phy(pwp, pptr);
3770 }
3771 
3772 /*
3773  * Called with PHY locked and with scratch acquired. We return 0 if
3774  * we fail to allocate resources or notice that the configuration
3775  * count changed while we were running the command. We return
3776  * less than zero if we had an I/O error or received an unsupported
3777  * configuration. Otherwise we return the number of phys in the
3778  * expander.
3779  */
3780 #define	DFM(m, y) if (m == NULL) m = y
3781 static int
3782 pmcs_expander_get_nphy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3783 {
3784 	struct pmcwork *pwrk;
3785 	char buf[64];
3786 	const uint_t rdoff = 0x100;	/* returned data offset */
3787 	smp_response_frame_t *srf;
3788 	smp_report_general_resp_t *srgr;
3789 	uint32_t msg[PMCS_MSG_SIZE], *ptr, htag, status, ival;
3790 	int result;
3791 
3792 	ival = 0x40001100;
3793 again:
3794 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
3795 	if (pwrk == NULL) {
3796 		result = 0;
3797 		goto out;
3798 	}
3799 	(void) memset(pwp->scratch, 0x77, PMCS_SCRATCH_SIZE);
3800 	pwrk->arg = pwp->scratch;
3801 	pwrk->dtype = pptr->dtype;
3802 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
3803 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
3804 	if (ptr == NULL) {
3805 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
3806 		pmcs_prt(pwp, PMCS_PRT_DEBUG2, pptr, NULL,
3807 		    "%s: GET_IQ_ENTRY failed", __func__);
3808 		pmcs_pwork(pwp, pwrk);
3809 		result = 0;
3810 		goto out;
3811 	}
3812 
3813 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_SMP_REQUEST));
3814 	msg[1] = LE_32(pwrk->htag);
3815 	msg[2] = LE_32(pptr->device_id);
3816 	msg[3] = LE_32((4 << SMP_REQUEST_LENGTH_SHIFT) | SMP_INDIRECT_RESPONSE);
3817 	/*
3818 	 * Send SMP REPORT GENERAL (of either SAS1.1 or SAS2 flavors).
3819 	 */
3820 	msg[4] = BE_32(ival);
3821 	msg[5] = 0;
3822 	msg[6] = 0;
3823 	msg[7] = 0;
3824 	msg[8] = 0;
3825 	msg[9] = 0;
3826 	msg[10] = 0;
3827 	msg[11] = 0;
3828 	msg[12] = LE_32(DWORD0(pwp->scratch_dma+rdoff));
3829 	msg[13] = LE_32(DWORD1(pwp->scratch_dma+rdoff));
3830 	msg[14] = LE_32(PMCS_SCRATCH_SIZE - rdoff);
3831 	msg[15] = 0;
3832 
3833 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
3834 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
3835 	htag = pwrk->htag;
3836 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
3837 
3838 	pmcs_unlock_phy(pptr);
3839 	WAIT_FOR(pwrk, 1000, result);
3840 	pmcs_lock_phy(pptr);
3841 	pmcs_pwork(pwp, pwrk);
3842 
3843 	mutex_enter(&pwp->config_lock);
3844 	if (pwp->config_changed) {
3845 		RESTART_DISCOVERY_LOCKED(pwp);
3846 		mutex_exit(&pwp->config_lock);
3847 		result = 0;
3848 		goto out;
3849 	}
3850 	mutex_exit(&pwp->config_lock);
3851 
3852 	if (result) {
3853 		pmcs_timed_out(pwp, htag, __func__);
3854 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3855 		    "%s: Issuing SMP ABORT for htag 0x%08x", __func__, htag);
3856 		if (pmcs_abort(pwp, pptr, htag, 0, 0)) {
3857 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3858 			    "%s: Unable to issue SMP ABORT for htag 0x%08x",
3859 			    __func__, htag);
3860 		} else {
3861 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3862 			    "%s: Issuing SMP ABORT for htag 0x%08x",
3863 			    __func__, htag);
3864 		}
3865 		result = 0;
3866 		goto out;
3867 	}
3868 	ptr = (void *)pwp->scratch;
3869 	status = LE_32(ptr[2]);
3870 	if (status == PMCOUT_STATUS_UNDERFLOW ||
3871 	    status == PMCOUT_STATUS_OVERFLOW) {
3872 		pmcs_prt(pwp, PMCS_PRT_DEBUG_UNDERFLOW, pptr, NULL,
3873 		    "%s: over/underflow", __func__);
3874 		status = PMCOUT_STATUS_OK;
3875 	}
3876 	srf = (smp_response_frame_t *)&((uint32_t *)pwp->scratch)[rdoff >> 2];
3877 	srgr = (smp_report_general_resp_t *)
3878 	    &((uint32_t *)pwp->scratch)[(rdoff >> 2)+1];
3879 
3880 	if (status != PMCOUT_STATUS_OK) {
3881 		char *nag = NULL;
3882 		(void) snprintf(buf, sizeof (buf),
3883 		    "%s: SMP op failed (0x%x)", __func__, status);
3884 		switch (status) {
3885 		case PMCOUT_STATUS_IO_PORT_IN_RESET:
3886 			DFM(nag, "I/O Port In Reset");
3887 			/* FALLTHROUGH */
3888 		case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
3889 			DFM(nag, "Hardware Timeout");
3890 			/* FALLTHROUGH */
3891 		case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
3892 			DFM(nag, "Internal SMP Resource Failure");
3893 			/* FALLTHROUGH */
3894 		case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
3895 			DFM(nag, "PHY Not Ready");
3896 			/* FALLTHROUGH */
3897 		case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
3898 			DFM(nag, "Connection Rate Not Supported");
3899 			/* FALLTHROUGH */
3900 		case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
3901 			DFM(nag, "Open Retry Timeout");
3902 			/* FALLTHROUGH */
3903 		case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
3904 			DFM(nag, "Response Connection Error");
3905 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
3906 			    "%s: expander %s SMP operation failed (%s)",
3907 			    __func__, pptr->path, nag);
3908 			break;
3909 
3910 		/*
3911 		 * For the IO_DS_NON_OPERATIONAL case, we need to kick off
3912 		 * device state recovery and return 0 so that the caller
3913 		 * doesn't assume this expander is dead for good.
3914 		 */
3915 		case PMCOUT_STATUS_IO_DS_NON_OPERATIONAL: {
3916 			pmcs_xscsi_t *xp = pptr->target;
3917 
3918 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, xp,
3919 			    "%s: expander %s device state non-operational",
3920 			    __func__, pptr->path);
3921 
3922 			if (xp == NULL) {
3923 				pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr,
3924 				    xp, "%s: No target to do DS recovery for "
3925 				    "PHY %p (%s), attempting PHY hard reset",
3926 				    __func__, (void *)pptr, pptr->path);
3927 				(void) pmcs_reset_phy(pwp, pptr,
3928 				    PMCS_PHYOP_HARD_RESET);
3929 				break;
3930 			}
3931 
3932 			mutex_enter(&xp->statlock);
3933 			pmcs_start_dev_state_recovery(xp, pptr);
3934 			mutex_exit(&xp->statlock);
3935 			break;
3936 		}
3937 
3938 		default:
3939 			pmcs_print_entry(pwp, PMCS_PRT_DEBUG, buf, ptr);
3940 			result = -EIO;
3941 			break;
3942 		}
3943 	} else if (srf->srf_frame_type != SMP_FRAME_TYPE_RESPONSE) {
3944 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
3945 		    "%s: bad response frame type 0x%x",
3946 		    __func__, srf->srf_frame_type);
3947 		result = -EINVAL;
3948 	} else if (srf->srf_function != SMP_FUNC_REPORT_GENERAL) {
3949 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
3950 		    "%s: bad response function 0x%x",
3951 		    __func__, srf->srf_function);
3952 		result = -EINVAL;
3953 	} else if (srf->srf_result != 0) {
3954 		/*
3955 		 * Check to see if we have a value of 3 for failure and
3956 		 * whether we were using a SAS2.0 allocation length value
3957 		 * and retry without it.
3958 		 */
3959 		if (srf->srf_result == 3 && (ival & 0xff00)) {
3960 			ival &= ~0xff00;
3961 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
3962 			    "%s: err 0x%x with SAS2 request- retry with SAS1",
3963 			    __func__, srf->srf_result);
3964 			goto again;
3965 		}
3966 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
3967 		    "%s: bad response 0x%x", __func__, srf->srf_result);
3968 		result = -EINVAL;
3969 	} else if (srgr->srgr_configuring) {
3970 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
3971 		    "%s: expander at phy %s is still configuring",
3972 		    __func__, pptr->path);
3973 		result = 0;
3974 	} else {
3975 		result = srgr->srgr_number_of_phys;
3976 		if (ival & 0xff00) {
3977 			pptr->tolerates_sas2 = 1;
3978 		}
3979 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3980 		    "%s has %d phys and %s SAS2", pptr->path, result,
3981 		    pptr->tolerates_sas2? "tolerates" : "does not tolerate");
3982 	}
3983 out:
3984 	return (result);
3985 }
3986 
3987 /*
3988  * Called with expander locked (and thus, pptr) as well as all PHYs up to
3989  * the root, and scratch acquired. Return 0 if we fail to allocate resources
3990  * or notice that the configuration changed while we were running the command.
3991  *
3992  * We return less than zero if we had an I/O error or received an
3993  * unsupported configuration.
3994  */
3995 static int
3996 pmcs_expander_content_discover(pmcs_hw_t *pwp, pmcs_phy_t *expander,
3997     pmcs_phy_t *pptr)
3998 {
3999 	struct pmcwork *pwrk;
4000 	char buf[64];
4001 	uint8_t sas_address[8];
4002 	uint8_t att_sas_address[8];
4003 	smp_response_frame_t *srf;
4004 	smp_discover_resp_t *sdr;
4005 	const uint_t rdoff = 0x100;	/* returned data offset */
4006 	uint8_t *roff;
4007 	uint32_t status, *ptr, msg[PMCS_MSG_SIZE], htag;
4008 	int result;
4009 	uint8_t	ini_support;
4010 	uint8_t	tgt_support;
4011 
4012 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, expander);
4013 	if (pwrk == NULL) {
4014 		result = 0;
4015 		goto out;
4016 	}
4017 	(void) memset(pwp->scratch, 0x77, PMCS_SCRATCH_SIZE);
4018 	pwrk->arg = pwp->scratch;
4019 	pwrk->dtype = expander->dtype;
4020 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_SMP_REQUEST));
4021 	msg[1] = LE_32(pwrk->htag);
4022 	msg[2] = LE_32(expander->device_id);
4023 	msg[3] = LE_32((12 << SMP_REQUEST_LENGTH_SHIFT) |
4024 	    SMP_INDIRECT_RESPONSE);
4025 	/*
4026 	 * Send SMP DISCOVER (of either SAS1.1 or SAS2 flavors).
4027 	 */
4028 	if (expander->tolerates_sas2) {
4029 		msg[4] = BE_32(0x40101B00);
4030 	} else {
4031 		msg[4] = BE_32(0x40100000);
4032 	}
4033 	msg[5] = 0;
4034 	msg[6] = BE_32((pptr->phynum << 16));
4035 	msg[7] = 0;
4036 	msg[8] = 0;
4037 	msg[9] = 0;
4038 	msg[10] = 0;
4039 	msg[11] = 0;
4040 	msg[12] = LE_32(DWORD0(pwp->scratch_dma+rdoff));
4041 	msg[13] = LE_32(DWORD1(pwp->scratch_dma+rdoff));
4042 	msg[14] = LE_32(PMCS_SCRATCH_SIZE - rdoff);
4043 	msg[15] = 0;
4044 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4045 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4046 	if (ptr == NULL) {
4047 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4048 		result = 0;
4049 		goto out;
4050 	}
4051 
4052 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
4053 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4054 	htag = pwrk->htag;
4055 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4056 
4057 	/*
4058 	 * Drop PHY lock while waiting so other completions aren't potentially
4059 	 * blocked.
4060 	 */
4061 	pmcs_unlock_phy(expander);
4062 	WAIT_FOR(pwrk, 1000, result);
4063 	pmcs_lock_phy(expander);
4064 	pmcs_pwork(pwp, pwrk);
4065 
4066 	mutex_enter(&pwp->config_lock);
4067 	if (pwp->config_changed) {
4068 		RESTART_DISCOVERY_LOCKED(pwp);
4069 		mutex_exit(&pwp->config_lock);
4070 		result = 0;
4071 		goto out;
4072 	}
4073 	mutex_exit(&pwp->config_lock);
4074 
4075 	if (result) {
4076 		pmcs_prt(pwp, PMCS_PRT_WARN, pptr, NULL, pmcs_timeo, __func__);
4077 		if (pmcs_abort(pwp, expander, htag, 0, 0)) {
4078 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4079 			    "%s: Unable to issue SMP ABORT for htag 0x%08x",
4080 			    __func__, htag);
4081 		} else {
4082 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4083 			    "%s: Issuing SMP ABORT for htag 0x%08x",
4084 			    __func__, htag);
4085 		}
4086 		result = -ETIMEDOUT;
4087 		goto out;
4088 	}
4089 	ptr = (void *)pwp->scratch;
4090 	/*
4091 	 * Point roff to the DMA offset for returned data
4092 	 */
4093 	roff = pwp->scratch;
4094 	roff += rdoff;
4095 	srf = (smp_response_frame_t *)roff;
4096 	sdr = (smp_discover_resp_t *)(roff+4);
4097 	status = LE_32(ptr[2]);
4098 	if (status == PMCOUT_STATUS_UNDERFLOW ||
4099 	    status == PMCOUT_STATUS_OVERFLOW) {
4100 		pmcs_prt(pwp, PMCS_PRT_DEBUG_UNDERFLOW, pptr, NULL,
4101 		    "%s: over/underflow", __func__);
4102 		status = PMCOUT_STATUS_OK;
4103 	}
4104 	if (status != PMCOUT_STATUS_OK) {
4105 		char *nag = NULL;
4106 		(void) snprintf(buf, sizeof (buf),
4107 		    "%s: SMP op failed (0x%x)", __func__, status);
4108 		switch (status) {
4109 		case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
4110 			DFM(nag, "Hardware Timeout");
4111 			/* FALLTHROUGH */
4112 		case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
4113 			DFM(nag, "Internal SMP Resource Failure");
4114 			/* FALLTHROUGH */
4115 		case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
4116 			DFM(nag, "PHY Not Ready");
4117 			/* FALLTHROUGH */
4118 		case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
4119 			DFM(nag, "Connection Rate Not Supported");
4120 			/* FALLTHROUGH */
4121 		case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
4122 			DFM(nag, "Open Retry Timeout");
4123 			/* FALLTHROUGH */
4124 		case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
4125 			DFM(nag, "Response Connection Error");
4126 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4127 			    "%s: expander %s SMP operation failed (%s)",
4128 			    __func__, pptr->path, nag);
4129 			break;
4130 		default:
4131 			pmcs_print_entry(pwp, PMCS_PRT_DEBUG, buf, ptr);
4132 			result = -EIO;
4133 			break;
4134 		}
4135 		goto out;
4136 	} else if (srf->srf_frame_type != SMP_FRAME_TYPE_RESPONSE) {
4137 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4138 		    "%s: bad response frame type 0x%x",
4139 		    __func__, srf->srf_frame_type);
4140 		result = -EINVAL;
4141 		goto out;
4142 	} else if (srf->srf_function != SMP_FUNC_DISCOVER) {
4143 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4144 		    "%s: bad response function 0x%x",
4145 		    __func__, srf->srf_function);
4146 		result = -EINVAL;
4147 		goto out;
4148 	} else if (srf->srf_result != SMP_RES_FUNCTION_ACCEPTED) {
4149 		result = pmcs_smp_function_result(pwp, srf);
4150 		/* Need not fail if PHY is Vacant */
4151 		if (result != SMP_RES_PHY_VACANT) {
4152 			result = -EINVAL;
4153 			goto out;
4154 		}
4155 	}
4156 
4157 	ini_support = (sdr->sdr_attached_sata_host |
4158 	    (sdr->sdr_attached_smp_initiator << 1) |
4159 	    (sdr->sdr_attached_stp_initiator << 2) |
4160 	    (sdr->sdr_attached_ssp_initiator << 3));
4161 
4162 	tgt_support = (sdr->sdr_attached_sata_device |
4163 	    (sdr->sdr_attached_smp_target << 1) |
4164 	    (sdr->sdr_attached_stp_target << 2) |
4165 	    (sdr->sdr_attached_ssp_target << 3));
4166 
4167 	pmcs_wwn2barray(BE_64(sdr->sdr_sas_addr), sas_address);
4168 	pmcs_wwn2barray(BE_64(sdr->sdr_attached_sas_addr), att_sas_address);
4169 
4170 	switch (sdr->sdr_attached_device_type) {
4171 	case SAS_IF_DTYPE_ENDPOINT:
4172 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4173 		    "exp_content: %s atdt=0x%x lr=%x is=%x ts=%x SAS="
4174 		    SAS_ADDR_FMT " attSAS=" SAS_ADDR_FMT " atPHY=%x",
4175 		    pptr->path,
4176 		    sdr->sdr_attached_device_type,
4177 		    sdr->sdr_negotiated_logical_link_rate,
4178 		    ini_support,
4179 		    tgt_support,
4180 		    SAS_ADDR_PRT(sas_address),
4181 		    SAS_ADDR_PRT(att_sas_address),
4182 		    sdr->sdr_attached_phy_identifier);
4183 
4184 		if (sdr->sdr_attached_sata_device ||
4185 		    sdr->sdr_attached_stp_target) {
4186 			pptr->dtype = SATA;
4187 		} else if (sdr->sdr_attached_ssp_target) {
4188 			pptr->dtype = SAS;
4189 		} else if (tgt_support || ini_support) {
4190 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4191 			    "%s: %s has tgt support=%x init support=(%x)",
4192 			    __func__, pptr->path, tgt_support, ini_support);
4193 		}
4194 		break;
4195 	case SAS_IF_DTYPE_EDGE:
4196 	case SAS_IF_DTYPE_FANOUT:
4197 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4198 		    "exp_content: %s atdt=0x%x lr=%x is=%x ts=%x SAS="
4199 		    SAS_ADDR_FMT " attSAS=" SAS_ADDR_FMT " atPHY=%x",
4200 		    pptr->path,
4201 		    sdr->sdr_attached_device_type,
4202 		    sdr->sdr_negotiated_logical_link_rate,
4203 		    ini_support,
4204 		    tgt_support,
4205 		    SAS_ADDR_PRT(sas_address),
4206 		    SAS_ADDR_PRT(att_sas_address),
4207 		    sdr->sdr_attached_phy_identifier);
4208 		if (sdr->sdr_attached_smp_target) {
4209 			/*
4210 			 * Avoid configuring phys that just point back
4211 			 * at a parent phy
4212 			 */
4213 			if (expander->parent &&
4214 			    memcmp(expander->parent->sas_address,
4215 			    att_sas_address,
4216 			    sizeof (expander->parent->sas_address)) == 0) {
4217 				pmcs_prt(pwp, PMCS_PRT_DEBUG3, pptr, NULL,
4218 				    "%s: skipping port back to parent "
4219 				    "expander (%s)", __func__, pptr->path);
4220 				pptr->dtype = NOTHING;
4221 				break;
4222 			}
4223 			pptr->dtype = EXPANDER;
4224 
4225 		} else if (tgt_support || ini_support) {
4226 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4227 			    "%s has tgt support=%x init support=(%x)",
4228 			    pptr->path, tgt_support, ini_support);
4229 			pptr->dtype = EXPANDER;
4230 		}
4231 		break;
4232 	default:
4233 		pptr->dtype = NOTHING;
4234 		break;
4235 	}
4236 	if (pptr->dtype != NOTHING) {
4237 		pmcs_phy_t *ctmp;
4238 
4239 		/*
4240 		 * If the attached device is a SATA device and the expander
4241 		 * is (possibly) a SAS2 compliant expander, check for whether
4242 		 * there is a NAA=5 WWN field starting at this offset and
4243 		 * use that for the SAS Address for this device.
4244 		 */
4245 		if (expander->tolerates_sas2 && pptr->dtype == SATA &&
4246 		    (roff[SAS_ATTACHED_NAME_OFFSET] >> 8) == 0x5) {
4247 			(void) memcpy(pptr->sas_address,
4248 			    &roff[SAS_ATTACHED_NAME_OFFSET], 8);
4249 		} else {
4250 			(void) memcpy(pptr->sas_address, att_sas_address, 8);
4251 		}
4252 		pptr->atdt = (sdr->sdr_attached_device_type);
4253 		/*
4254 		 * Now run up from the expander's parent up to the top to
4255 		 * make sure we only use the least common link_rate.
4256 		 */
4257 		for (ctmp = expander->parent; ctmp; ctmp = ctmp->parent) {
4258 			if (ctmp->link_rate <
4259 			    sdr->sdr_negotiated_logical_link_rate) {
4260 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4261 				    "%s: derating link rate from %x to %x due "
4262 				    "to %s being slower", pptr->path,
4263 				    sdr->sdr_negotiated_logical_link_rate,
4264 				    ctmp->link_rate,
4265 				    ctmp->path);
4266 				sdr->sdr_negotiated_logical_link_rate =
4267 				    ctmp->link_rate;
4268 			}
4269 		}
4270 		pptr->link_rate = sdr->sdr_negotiated_logical_link_rate;
4271 		pptr->state.prog_min_rate = sdr->sdr_prog_min_phys_link_rate;
4272 		pptr->state.hw_min_rate = sdr->sdr_hw_min_phys_link_rate;
4273 		pptr->state.prog_max_rate = sdr->sdr_prog_max_phys_link_rate;
4274 		pptr->state.hw_max_rate = sdr->sdr_hw_max_phys_link_rate;
4275 		PHY_CHANGED(pwp, pptr);
4276 	} else {
4277 		pmcs_clear_phy(pwp, pptr);
4278 	}
4279 	result = 1;
4280 out:
4281 	return (result);
4282 }
4283 
4284 /*
4285  * Get a work structure and assign it a tag with type and serial number
4286  * If a structure is returned, it is returned locked.
4287  */
4288 pmcwork_t *
4289 pmcs_gwork(pmcs_hw_t *pwp, uint32_t tag_type, pmcs_phy_t *phyp)
4290 {
4291 	pmcwork_t *p;
4292 	uint16_t snum;
4293 	uint32_t off;
4294 
4295 	mutex_enter(&pwp->wfree_lock);
4296 	p = STAILQ_FIRST(&pwp->wf);
4297 	if (p == NULL) {
4298 		/*
4299 		 * If we couldn't get a work structure, it's time to bite
4300 		 * the bullet, grab the pfree_lock and copy over all the
4301 		 * work structures from the pending free list to the actual
4302 		 * free list.  This shouldn't happen all that often.
4303 		 */
4304 		mutex_enter(&pwp->pfree_lock);
4305 		pwp->wf.stqh_first = pwp->pf.stqh_first;
4306 		pwp->wf.stqh_last = pwp->pf.stqh_last;
4307 		STAILQ_INIT(&pwp->pf);
4308 		mutex_exit(&pwp->pfree_lock);
4309 
4310 		p = STAILQ_FIRST(&pwp->wf);
4311 		if (p == NULL) {
4312 			mutex_exit(&pwp->wfree_lock);
4313 			return (NULL);
4314 		}
4315 	}
4316 	STAILQ_REMOVE(&pwp->wf, p, pmcwork, next);
4317 	snum = pwp->wserno++;
4318 	mutex_exit(&pwp->wfree_lock);
4319 
4320 	off = p - pwp->work;
4321 
4322 	mutex_enter(&p->lock);
4323 	ASSERT(p->state == PMCS_WORK_STATE_NIL);
4324 	ASSERT(p->htag == PMCS_TAG_FREE);
4325 	p->htag = (tag_type << PMCS_TAG_TYPE_SHIFT) & PMCS_TAG_TYPE_MASK;
4326 	p->htag |= ((snum << PMCS_TAG_SERNO_SHIFT) & PMCS_TAG_SERNO_MASK);
4327 	p->htag |= ((off << PMCS_TAG_INDEX_SHIFT) & PMCS_TAG_INDEX_MASK);
4328 	p->start = gethrtime();
4329 	p->state = PMCS_WORK_STATE_READY;
4330 	p->ssp_event = 0;
4331 	p->dead = 0;
4332 
4333 	if (phyp) {
4334 		p->phy = phyp;
4335 		pmcs_inc_phy_ref_count(phyp);
4336 	}
4337 
4338 	return (p);
4339 }
4340 
4341 /*
4342  * Called with pwrk lock held.  Returned with lock released.
4343  */
4344 void
4345 pmcs_pwork(pmcs_hw_t *pwp, pmcwork_t *p)
4346 {
4347 	ASSERT(p != NULL);
4348 	ASSERT(mutex_owned(&p->lock));
4349 
4350 	p->last_ptr = p->ptr;
4351 	p->last_arg = p->arg;
4352 	p->last_phy = p->phy;
4353 	p->last_xp = p->xp;
4354 	p->last_htag = p->htag;
4355 	p->last_state = p->state;
4356 	p->finish = gethrtime();
4357 
4358 	if (p->phy) {
4359 		pmcs_dec_phy_ref_count(p->phy);
4360 	}
4361 
4362 	p->state = PMCS_WORK_STATE_NIL;
4363 	p->htag = PMCS_TAG_FREE;
4364 	p->xp = NULL;
4365 	p->ptr = NULL;
4366 	p->arg = NULL;
4367 	p->phy = NULL;
4368 	p->abt_htag = 0;
4369 	p->timer = 0;
4370 	mutex_exit(&p->lock);
4371 
4372 	if (mutex_tryenter(&pwp->wfree_lock) == 0) {
4373 		mutex_enter(&pwp->pfree_lock);
4374 		STAILQ_INSERT_TAIL(&pwp->pf, p, next);
4375 		mutex_exit(&pwp->pfree_lock);
4376 	} else {
4377 		STAILQ_INSERT_TAIL(&pwp->wf, p, next);
4378 		mutex_exit(&pwp->wfree_lock);
4379 	}
4380 }
4381 
4382 /*
4383  * Find a work structure based upon a tag and make sure that the tag
4384  * serial number matches the work structure we've found.
4385  * If a structure is found, its lock is held upon return.
4386  */
4387 pmcwork_t *
4388 pmcs_tag2wp(pmcs_hw_t *pwp, uint32_t htag)
4389 {
4390 	pmcwork_t *p;
4391 	uint32_t idx = PMCS_TAG_INDEX(htag);
4392 
4393 	p = &pwp->work[idx];
4394 
4395 	mutex_enter(&p->lock);
4396 	if (p->htag == htag) {
4397 		return (p);
4398 	}
4399 	mutex_exit(&p->lock);
4400 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL,
4401 	    "INDEX 0x%x HTAG 0x%x got p->htag 0x%x", idx, htag, p->htag);
4402 	return (NULL);
4403 }
4404 
4405 /*
4406  * Issue an abort for a command or for all commands.
4407  *
4408  * Since this can be called from interrupt context,
4409  * we don't wait for completion if wait is not set.
4410  *
4411  * Called with PHY lock held.
4412  */
4413 int
4414 pmcs_abort(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint32_t tag, int all_cmds,
4415     int wait)
4416 {
4417 	pmcwork_t *pwrk;
4418 	pmcs_xscsi_t *tgt;
4419 	uint32_t msg[PMCS_MSG_SIZE], *ptr;
4420 	int result, abt_type;
4421 	uint32_t abt_htag, status;
4422 
4423 	if (pptr->abort_all_start) {
4424 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL, "%s: ABORT_ALL for "
4425 		    "(%s) already in progress.", __func__, pptr->path);
4426 		return (EBUSY);
4427 	}
4428 
4429 	switch (pptr->dtype) {
4430 	case SAS:
4431 		abt_type = PMCIN_SSP_ABORT;
4432 		break;
4433 	case SATA:
4434 		abt_type = PMCIN_SATA_ABORT;
4435 		break;
4436 	case EXPANDER:
4437 		abt_type = PMCIN_SMP_ABORT;
4438 		break;
4439 	default:
4440 		return (0);
4441 	}
4442 
4443 	pwrk = pmcs_gwork(pwp, wait ? PMCS_TAG_TYPE_WAIT : PMCS_TAG_TYPE_NONE,
4444 	    pptr);
4445 
4446 	if (pwrk == NULL) {
4447 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nowrk, __func__);
4448 		return (ENOMEM);
4449 	}
4450 
4451 	pwrk->dtype = pptr->dtype;
4452 	if (wait) {
4453 		pwrk->arg = msg;
4454 	}
4455 	if (pptr->valid_device_id == 0) {
4456 		pmcs_pwork(pwp, pwrk);
4457 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4458 		    "%s: Invalid DeviceID", __func__);
4459 		return (ENODEV);
4460 	}
4461 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, abt_type));
4462 	msg[1] = LE_32(pwrk->htag);
4463 	msg[2] = LE_32(pptr->device_id);
4464 	if (all_cmds) {
4465 		msg[3] = 0;
4466 		msg[4] = LE_32(1);
4467 		pwrk->ptr = NULL;
4468 		pptr->abort_all_start = gethrtime();
4469 	} else {
4470 		msg[3] = LE_32(tag);
4471 		msg[4] = 0;
4472 		pwrk->abt_htag = tag;
4473 	}
4474 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4475 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4476 	if (ptr == NULL) {
4477 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4478 		pmcs_pwork(pwp, pwrk);
4479 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nomsg, __func__);
4480 		return (ENOMEM);
4481 	}
4482 
4483 	COPY_MESSAGE(ptr, msg, 5);
4484 	if (all_cmds) {
4485 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4486 		    "%s: aborting all commands for %s device %s. (htag=0x%x)",
4487 		    __func__, pmcs_get_typename(pptr->dtype), pptr->path,
4488 		    msg[1]);
4489 	} else {
4490 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4491 		    "%s: aborting tag 0x%x for %s device %s. (htag=0x%x)",
4492 		    __func__, tag, pmcs_get_typename(pptr->dtype), pptr->path,
4493 		    msg[1]);
4494 	}
4495 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4496 
4497 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4498 	if (!wait) {
4499 		mutex_exit(&pwrk->lock);
4500 		return (0);
4501 	}
4502 
4503 	abt_htag = pwrk->htag;
4504 	pmcs_unlock_phy(pwrk->phy);
4505 	WAIT_FOR(pwrk, 1000, result);
4506 	pmcs_lock_phy(pwrk->phy);
4507 
4508 	tgt = pwrk->xp;
4509 	pmcs_pwork(pwp, pwrk);
4510 
4511 	if (tgt != NULL) {
4512 		mutex_enter(&tgt->aqlock);
4513 		if (!STAILQ_EMPTY(&tgt->aq)) {
4514 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
4515 			    "%s: Abort complete (result=0x%x), but "
4516 			    "aq not empty (tgt 0x%p), waiting",
4517 			    __func__, result, (void *)tgt);
4518 			cv_wait(&tgt->abort_cv, &tgt->aqlock);
4519 		}
4520 		mutex_exit(&tgt->aqlock);
4521 	}
4522 
4523 	if (all_cmds) {
4524 		pptr->abort_all_start = 0;
4525 		cv_signal(&pptr->abort_all_cv);
4526 	}
4527 
4528 	if (result) {
4529 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
4530 		    "%s: Abort (htag 0x%08x) request timed out",
4531 		    __func__, abt_htag);
4532 		if (tgt != NULL) {
4533 			mutex_enter(&tgt->statlock);
4534 			if ((tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) &&
4535 			    (tgt->dev_state !=
4536 			    PMCS_DEVICE_STATE_NON_OPERATIONAL)) {
4537 				pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
4538 				    "%s: Trying DS error recovery for tgt 0x%p",
4539 				    __func__, (void *)tgt);
4540 				(void) pmcs_send_err_recovery_cmd(pwp,
4541 				    PMCS_DEVICE_STATE_IN_RECOVERY, pptr, tgt);
4542 			}
4543 			mutex_exit(&tgt->statlock);
4544 		}
4545 		return (ETIMEDOUT);
4546 	}
4547 
4548 	status = LE_32(msg[2]);
4549 	if (status != PMCOUT_STATUS_OK) {
4550 		/*
4551 		 * The only non-success status are IO_NOT_VALID &
4552 		 * IO_ABORT_IN_PROGRESS.
4553 		 * In case of IO_ABORT_IN_PROGRESS, the other ABORT cmd's
4554 		 * status is of concern and this duplicate cmd status can
4555 		 * be ignored.
4556 		 * If IO_NOT_VALID, that's not an error per-se.
4557 		 * For abort of single I/O complete the command anyway.
4558 		 * If, however, we were aborting all, that is a problem
4559 		 * as IO_NOT_VALID really means that the IO or device is
4560 		 * not there. So, discovery process will take of the cleanup.
4561 		 */
4562 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
4563 		    "%s: abort result 0x%x", __func__, LE_32(msg[2]));
4564 		if (all_cmds) {
4565 			PHY_CHANGED(pwp, pptr);
4566 			RESTART_DISCOVERY(pwp);
4567 		} else {
4568 			return (EINVAL);
4569 		}
4570 
4571 		return (0);
4572 	}
4573 
4574 	if (tgt != NULL) {
4575 		mutex_enter(&tgt->statlock);
4576 		if (tgt->dev_state == PMCS_DEVICE_STATE_IN_RECOVERY) {
4577 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
4578 			    "%s: Restoring OPERATIONAL dev_state for tgt 0x%p",
4579 			    __func__, (void *)tgt);
4580 			(void) pmcs_send_err_recovery_cmd(pwp,
4581 			    PMCS_DEVICE_STATE_OPERATIONAL, pptr, tgt);
4582 		}
4583 		mutex_exit(&tgt->statlock);
4584 	}
4585 
4586 	return (0);
4587 }
4588 
4589 /*
4590  * Issue a task management function to an SSP device.
4591  *
4592  * Called with PHY lock held.
4593  * statlock CANNOT be held upon entry.
4594  */
4595 int
4596 pmcs_ssp_tmf(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint8_t tmf, uint32_t tag,
4597     uint64_t lun, uint32_t *response)
4598 {
4599 	int result, ds;
4600 	uint8_t local[PMCS_QENTRY_SIZE << 1], *xd;
4601 	sas_ssp_rsp_iu_t *rptr = (void *)local;
4602 	static const uint8_t ssp_rsp_evec[] = {
4603 		0x58, 0x61, 0x56, 0x72, 0x00
4604 	};
4605 	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
4606 	struct pmcwork *pwrk;
4607 	pmcs_xscsi_t *xp;
4608 
4609 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
4610 	if (pwrk == NULL) {
4611 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nowrk, __func__);
4612 		return (ENOMEM);
4613 	}
4614 	/*
4615 	 * NB: We use the PMCS_OQ_GENERAL outbound queue
4616 	 * NB: so as to not get entangled in normal I/O
4617 	 * NB: processing.
4618 	 */
4619 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
4620 	    PMCIN_SSP_INI_TM_START));
4621 	msg[1] = LE_32(pwrk->htag);
4622 	msg[2] = LE_32(pptr->device_id);
4623 	if (tmf == SAS_ABORT_TASK || tmf == SAS_QUERY_TASK) {
4624 		msg[3] = LE_32(tag);
4625 	} else {
4626 		msg[3] = 0;
4627 	}
4628 	msg[4] = LE_32(tmf);
4629 	msg[5] = BE_32((uint32_t)lun);
4630 	msg[6] = BE_32((uint32_t)(lun >> 32));
4631 	msg[7] = LE_32(PMCIN_MESSAGE_REPORT);
4632 
4633 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4634 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4635 	if (ptr == NULL) {
4636 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4637 		pmcs_pwork(pwp, pwrk);
4638 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nomsg, __func__);
4639 		return (ENOMEM);
4640 	}
4641 	COPY_MESSAGE(ptr, msg, 7);
4642 	pwrk->arg = msg;
4643 	pwrk->dtype = pptr->dtype;
4644 
4645 	xp = pptr->target;
4646 	if (xp != NULL) {
4647 		mutex_enter(&xp->statlock);
4648 		if (xp->dev_state == PMCS_DEVICE_STATE_NON_OPERATIONAL) {
4649 			mutex_exit(&xp->statlock);
4650 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4651 			pmcs_pwork(pwp, pwrk);
4652 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp, "%s: Not "
4653 			    "sending '%s' because DS is '%s'", __func__,
4654 			    pmcs_tmf2str(tmf), pmcs_status_str
4655 			    (PMCOUT_STATUS_IO_DS_NON_OPERATIONAL));
4656 			return (EIO);
4657 		}
4658 		mutex_exit(&xp->statlock);
4659 	}
4660 
4661 	pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4662 	    "%s: sending '%s' to %s (lun %llu) tag 0x%x", __func__,
4663 	    pmcs_tmf2str(tmf), pptr->path, (unsigned long long) lun, tag);
4664 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4665 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4666 
4667 	pmcs_unlock_phy(pptr);
4668 	/*
4669 	 * This is a command sent to the target device, so it can take
4670 	 * significant amount of time to complete when path & device is busy.
4671 	 * Set a timeout to 20 seconds
4672 	 */
4673 	WAIT_FOR(pwrk, 20000, result);
4674 	pmcs_lock_phy(pptr);
4675 	pmcs_pwork(pwp, pwrk);
4676 
4677 	if (result) {
4678 		if (xp == NULL) {
4679 			return (ETIMEDOUT);
4680 		}
4681 
4682 		mutex_enter(&xp->statlock);
4683 		pmcs_start_dev_state_recovery(xp, pptr);
4684 		mutex_exit(&xp->statlock);
4685 		return (ETIMEDOUT);
4686 	}
4687 
4688 	status = LE_32(msg[2]);
4689 	if (status != PMCOUT_STATUS_OK) {
4690 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4691 		    "%s: status %s for TMF %s action to %s, lun %llu",
4692 		    __func__, pmcs_status_str(status),  pmcs_tmf2str(tmf),
4693 		    pptr->path, (unsigned long long) lun);
4694 		if ((status == PMCOUT_STATUS_IO_DS_NON_OPERATIONAL) ||
4695 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK) ||
4696 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS)) {
4697 			ds = PMCS_DEVICE_STATE_NON_OPERATIONAL;
4698 		} else if (status == PMCOUT_STATUS_IO_DS_IN_RECOVERY) {
4699 			/*
4700 			 * If the status is IN_RECOVERY, it's an indication
4701 			 * that it's now time for us to request to have the
4702 			 * device state set to OPERATIONAL since we're the ones
4703 			 * that requested recovery to begin with.
4704 			 */
4705 			ds = PMCS_DEVICE_STATE_OPERATIONAL;
4706 		} else {
4707 			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
4708 		}
4709 		if (xp != NULL) {
4710 			mutex_enter(&xp->statlock);
4711 			if (xp->dev_state != ds) {
4712 				pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4713 				    "%s: Sending err recovery cmd"
4714 				    " for tgt 0x%p (status = %s)",
4715 				    __func__, (void *)xp,
4716 				    pmcs_status_str(status));
4717 				(void) pmcs_send_err_recovery_cmd(pwp, ds,
4718 				    pptr, xp);
4719 			}
4720 			mutex_exit(&xp->statlock);
4721 		}
4722 		return (EIO);
4723 	} else {
4724 		ds = PMCS_DEVICE_STATE_OPERATIONAL;
4725 		if (xp != NULL) {
4726 			mutex_enter(&xp->statlock);
4727 			if (xp->dev_state != ds) {
4728 				pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4729 				    "%s: Sending err recovery cmd"
4730 				    " for tgt 0x%p (status = %s)",
4731 				    __func__, (void *)xp,
4732 				    pmcs_status_str(status));
4733 				(void) pmcs_send_err_recovery_cmd(pwp, ds,
4734 				    pptr, xp);
4735 			}
4736 			mutex_exit(&xp->statlock);
4737 		}
4738 	}
4739 	if (LE_32(msg[3]) == 0) {
4740 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4741 		    "TMF completed with no response");
4742 		return (EIO);
4743 	}
4744 	pmcs_endian_transform(pwp, local, &msg[5], ssp_rsp_evec);
4745 	xd = (uint8_t *)(&msg[5]);
4746 	xd += SAS_RSP_HDR_SIZE;
4747 	if (rptr->datapres != SAS_RSP_DATAPRES_RESPONSE_DATA) {
4748 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4749 		    "%s: TMF response not RESPONSE DATA (0x%x)",
4750 		    __func__, rptr->datapres);
4751 		return (EIO);
4752 	}
4753 	if (rptr->response_data_length != 4) {
4754 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
4755 		    "Bad SAS RESPONSE DATA LENGTH", msg);
4756 		return (EIO);
4757 	}
4758 	(void) memcpy(&status, xd, sizeof (uint32_t));
4759 	status = BE_32(status);
4760 	if (response != NULL)
4761 		*response = status;
4762 	/*
4763 	 * The status is actually in the low-order byte.  The upper three
4764 	 * bytes contain additional information for the TMFs that support them.
4765 	 * However, at this time we do not issue any of those.  In the other
4766 	 * cases, the upper three bytes are supposed to be 0, but it appears
4767 	 * they aren't always.  Just mask them off.
4768 	 */
4769 	switch (status & 0xff) {
4770 	case SAS_RSP_TMF_COMPLETE:
4771 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4772 		    "%s: TMF complete", __func__);
4773 		result = 0;
4774 		break;
4775 	case SAS_RSP_TMF_SUCCEEDED:
4776 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4777 		    "%s: TMF succeeded", __func__);
4778 		result = 0;
4779 		break;
4780 	case SAS_RSP_INVALID_FRAME:
4781 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4782 		    "%s: TMF returned INVALID FRAME", __func__);
4783 		result = EIO;
4784 		break;
4785 	case SAS_RSP_TMF_NOT_SUPPORTED:
4786 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4787 		    "%s: TMF returned TMF NOT SUPPORTED", __func__);
4788 		result = EIO;
4789 		break;
4790 	case SAS_RSP_TMF_FAILED:
4791 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4792 		    "%s: TMF returned TMF FAILED", __func__);
4793 		result = EIO;
4794 		break;
4795 	case SAS_RSP_TMF_INCORRECT_LUN:
4796 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4797 		    "%s: TMF returned INCORRECT LUN", __func__);
4798 		result = EIO;
4799 		break;
4800 	case SAS_RSP_OVERLAPPED_OIPTTA:
4801 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4802 		    "%s: TMF returned OVERLAPPED INITIATOR PORT TRANSFER TAG "
4803 		    "ATTEMPTED", __func__);
4804 		result = EIO;
4805 		break;
4806 	default:
4807 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
4808 		    "%s: TMF returned unknown code 0x%x", __func__, status);
4809 		result = EIO;
4810 		break;
4811 	}
4812 	return (result);
4813 }
4814 
4815 /*
4816  * Called with PHY lock held and scratch acquired
4817  */
4818 int
4819 pmcs_sata_abort_ncq(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
4820 {
4821 	const char *utag_fail_fmt = "%s: untagged NCQ command failure";
4822 	const char *tag_fail_fmt = "%s: NCQ command failure (tag 0x%x)";
4823 	uint32_t msg[PMCS_QENTRY_SIZE], *ptr, result, status;
4824 	uint8_t *fp = pwp->scratch, ds;
4825 	fis_t fis;
4826 	pmcwork_t *pwrk;
4827 	pmcs_xscsi_t *tgt;
4828 
4829 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
4830 	if (pwrk == NULL) {
4831 		return (ENOMEM);
4832 	}
4833 	msg[0] = LE_32(PMCS_IOMB_IN_SAS(PMCS_OQ_IODONE,
4834 	    PMCIN_SATA_HOST_IO_START));
4835 	msg[1] = LE_32(pwrk->htag);
4836 	msg[2] = LE_32(pptr->device_id);
4837 	msg[3] = LE_32(512);
4838 	msg[4] = LE_32(SATA_PROTOCOL_PIO | PMCIN_DATADIR_2_INI);
4839 	msg[5] = LE_32((READ_LOG_EXT << 16) | (C_BIT << 8) | FIS_REG_H2DEV);
4840 	msg[6] = LE_32(0x10);
4841 	msg[8] = LE_32(1);
4842 	msg[9] = 0;
4843 	msg[10] = 0;
4844 	msg[11] = 0;
4845 	msg[12] = LE_32(DWORD0(pwp->scratch_dma));
4846 	msg[13] = LE_32(DWORD1(pwp->scratch_dma));
4847 	msg[14] = LE_32(512);
4848 	msg[15] = 0;
4849 
4850 	pwrk->arg = msg;
4851 	pwrk->dtype = pptr->dtype;
4852 
4853 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4854 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4855 	if (ptr == NULL) {
4856 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4857 		pmcs_pwork(pwp, pwrk);
4858 		return (ENOMEM);
4859 	}
4860 	COPY_MESSAGE(ptr, msg, PMCS_QENTRY_SIZE);
4861 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4862 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4863 
4864 	pmcs_unlock_phy(pptr);
4865 	WAIT_FOR(pwrk, 250, result);
4866 	pmcs_lock_phy(pptr);
4867 	pmcs_pwork(pwp, pwrk);
4868 
4869 	tgt = pptr->target;
4870 	if (result) {
4871 		pmcs_prt(pwp, PMCS_PRT_INFO, pptr, tgt, pmcs_timeo, __func__);
4872 		return (EIO);
4873 	}
4874 	status = LE_32(msg[2]);
4875 	if (status != PMCOUT_STATUS_OK || LE_32(msg[3])) {
4876 		if (tgt == NULL) {
4877 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
4878 			    "%s: cannot find target for phy 0x%p for "
4879 			    "dev state recovery", __func__, (void *)pptr);
4880 			return (EIO);
4881 		}
4882 
4883 		mutex_enter(&tgt->statlock);
4884 
4885 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG, "READ LOG EXT", msg);
4886 		if ((status == PMCOUT_STATUS_IO_DS_NON_OPERATIONAL) ||
4887 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK) ||
4888 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS)) {
4889 			ds = PMCS_DEVICE_STATE_NON_OPERATIONAL;
4890 		} else {
4891 			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
4892 		}
4893 		if (tgt->dev_state != ds) {
4894 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, "%s: Trying "
4895 			    "SATA DS Recovery for tgt(0x%p) for status(%s)",
4896 			    __func__, (void *)tgt, pmcs_status_str(status));
4897 			(void) pmcs_send_err_recovery_cmd(pwp, ds, pptr, tgt);
4898 		}
4899 
4900 		mutex_exit(&tgt->statlock);
4901 		return (EIO);
4902 	}
4903 	fis[0] = (fp[4] << 24) | (fp[3] << 16) | (fp[2] << 8) | FIS_REG_D2H;
4904 	fis[1] = (fp[8] << 24) | (fp[7] << 16) | (fp[6] << 8) | fp[5];
4905 	fis[2] = (fp[12] << 24) | (fp[11] << 16) | (fp[10] << 8) | fp[9];
4906 	fis[3] = (fp[16] << 24) | (fp[15] << 16) | (fp[14] << 8) | fp[13];
4907 	fis[4] = 0;
4908 	if (fp[0] & 0x80) {
4909 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
4910 		    utag_fail_fmt, __func__);
4911 	} else {
4912 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
4913 		    tag_fail_fmt, __func__, fp[0] & 0x1f);
4914 	}
4915 	pmcs_fis_dump(pwp, fis);
4916 	pptr->need_rl_ext = 0;
4917 	return (0);
4918 }
4919 
4920 /*
4921  * Transform a structure from CPU to Device endian format, or
4922  * vice versa, based upon a transformation vector.
4923  *
4924  * A transformation vector is an array of bytes, each byte
4925  * of which is defined thusly:
4926  *
4927  *  bit 7: from CPU to desired endian, otherwise from desired endian
4928  *	   to CPU format
4929  *  bit 6: Big Endian, else Little Endian
4930  *  bits 5-4:
4931  *       00 Undefined
4932  *       01 One Byte quantities
4933  *       02 Two Byte quantities
4934  *       03 Four Byte quantities
4935  *
4936  *  bits 3-0:
4937  *       00 Undefined
4938  *       Number of quantities to transform
4939  *
4940  * The vector is terminated by a 0 value.
4941  */
4942 
4943 void
4944 pmcs_endian_transform(pmcs_hw_t *pwp, void *orig_out, void *orig_in,
4945     const uint8_t *xfvec)
4946 {
4947 	uint8_t c, *out = orig_out, *in = orig_in;
4948 
4949 	if (xfvec == NULL) {
4950 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
4951 		    "%s: null xfvec", __func__);
4952 		return;
4953 	}
4954 	if (out == NULL) {
4955 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
4956 		    "%s: null out", __func__);
4957 		return;
4958 	}
4959 	if (in == NULL) {
4960 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
4961 		    "%s: null in", __func__);
4962 		return;
4963 	}
4964 	while ((c = *xfvec++) != 0) {
4965 		int nbyt = (c & 0xf);
4966 		int size = (c >> 4) & 0x3;
4967 		int bige = (c >> 4) & 0x4;
4968 
4969 		switch (size) {
4970 		case 1:
4971 		{
4972 			while (nbyt-- > 0) {
4973 				*out++ = *in++;
4974 			}
4975 			break;
4976 		}
4977 		case 2:
4978 		{
4979 			uint16_t tmp;
4980 			while (nbyt-- > 0) {
4981 				(void) memcpy(&tmp, in, sizeof (uint16_t));
4982 				if (bige) {
4983 					tmp = BE_16(tmp);
4984 				} else {
4985 					tmp = LE_16(tmp);
4986 				}
4987 				(void) memcpy(out, &tmp, sizeof (uint16_t));
4988 				out += sizeof (uint16_t);
4989 				in += sizeof (uint16_t);
4990 			}
4991 			break;
4992 		}
4993 		case 3:
4994 		{
4995 			uint32_t tmp;
4996 			while (nbyt-- > 0) {
4997 				(void) memcpy(&tmp, in, sizeof (uint32_t));
4998 				if (bige) {
4999 					tmp = BE_32(tmp);
5000 				} else {
5001 					tmp = LE_32(tmp);
5002 				}
5003 				(void) memcpy(out, &tmp, sizeof (uint32_t));
5004 				out += sizeof (uint32_t);
5005 				in += sizeof (uint32_t);
5006 			}
5007 			break;
5008 		}
5009 		default:
5010 			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
5011 			    "%s: bad size", __func__);
5012 			return;
5013 		}
5014 	}
5015 }
5016 
5017 const char *
5018 pmcs_get_rate(unsigned int linkrt)
5019 {
5020 	const char *rate;
5021 	switch (linkrt) {
5022 	case SAS_LINK_RATE_1_5GBIT:
5023 		rate = "1.5";
5024 		break;
5025 	case SAS_LINK_RATE_3GBIT:
5026 		rate = "3.0";
5027 		break;
5028 	case SAS_LINK_RATE_6GBIT:
5029 		rate = "6.0";
5030 		break;
5031 	default:
5032 		rate = "???";
5033 		break;
5034 	}
5035 	return (rate);
5036 }
5037 
5038 const char *
5039 pmcs_get_typename(pmcs_dtype_t type)
5040 {
5041 	switch (type) {
5042 	case NOTHING:
5043 		return ("NIL");
5044 	case SATA:
5045 		return ("SATA");
5046 	case SAS:
5047 		return ("SSP");
5048 	case EXPANDER:
5049 		return ("EXPANDER");
5050 	}
5051 	return ("????");
5052 }
5053 
5054 const char *
5055 pmcs_tmf2str(int tmf)
5056 {
5057 	switch (tmf) {
5058 	case SAS_ABORT_TASK:
5059 		return ("Abort Task");
5060 	case SAS_ABORT_TASK_SET:
5061 		return ("Abort Task Set");
5062 	case SAS_CLEAR_TASK_SET:
5063 		return ("Clear Task Set");
5064 	case SAS_LOGICAL_UNIT_RESET:
5065 		return ("Logical Unit Reset");
5066 	case SAS_I_T_NEXUS_RESET:
5067 		return ("I_T Nexus Reset");
5068 	case SAS_CLEAR_ACA:
5069 		return ("Clear ACA");
5070 	case SAS_QUERY_TASK:
5071 		return ("Query Task");
5072 	case SAS_QUERY_TASK_SET:
5073 		return ("Query Task Set");
5074 	case SAS_QUERY_UNIT_ATTENTION:
5075 		return ("Query Unit Attention");
5076 	default:
5077 		return ("Unknown");
5078 	}
5079 }
5080 
5081 const char *
5082 pmcs_status_str(uint32_t status)
5083 {
5084 	switch (status) {
5085 	case PMCOUT_STATUS_OK:
5086 		return ("OK");
5087 	case PMCOUT_STATUS_ABORTED:
5088 		return ("ABORTED");
5089 	case PMCOUT_STATUS_OVERFLOW:
5090 		return ("OVERFLOW");
5091 	case PMCOUT_STATUS_UNDERFLOW:
5092 		return ("UNDERFLOW");
5093 	case PMCOUT_STATUS_FAILED:
5094 		return ("FAILED");
5095 	case PMCOUT_STATUS_ABORT_RESET:
5096 		return ("ABORT_RESET");
5097 	case PMCOUT_STATUS_IO_NOT_VALID:
5098 		return ("IO_NOT_VALID");
5099 	case PMCOUT_STATUS_NO_DEVICE:
5100 		return ("NO_DEVICE");
5101 	case PMCOUT_STATUS_ILLEGAL_PARAMETER:
5102 		return ("ILLEGAL_PARAMETER");
5103 	case PMCOUT_STATUS_LINK_FAILURE:
5104 		return ("LINK_FAILURE");
5105 	case PMCOUT_STATUS_PROG_ERROR:
5106 		return ("PROG_ERROR");
5107 	case PMCOUT_STATUS_EDC_IN_ERROR:
5108 		return ("EDC_IN_ERROR");
5109 	case PMCOUT_STATUS_EDC_OUT_ERROR:
5110 		return ("EDC_OUT_ERROR");
5111 	case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
5112 		return ("ERROR_HW_TIMEOUT");
5113 	case PMCOUT_STATUS_XFER_ERR_BREAK:
5114 		return ("XFER_ERR_BREAK");
5115 	case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
5116 		return ("XFER_ERR_PHY_NOT_READY");
5117 	case PMCOUT_STATUS_OPEN_CNX_PROTOCOL_NOT_SUPPORTED:
5118 		return ("OPEN_CNX_PROTOCOL_NOT_SUPPORTED");
5119 	case PMCOUT_STATUS_OPEN_CNX_ERROR_ZONE_VIOLATION:
5120 		return ("OPEN_CNX_ERROR_ZONE_VIOLATION");
5121 	case PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK:
5122 		return ("OPEN_CNX_ERROR_BREAK");
5123 	case PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS:
5124 		return ("OPEN_CNX_ERROR_IT_NEXUS_LOSS");
5125 	case PMCOUT_STATUS_OPENCNX_ERROR_BAD_DESTINATION:
5126 		return ("OPENCNX_ERROR_BAD_DESTINATION");
5127 	case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
5128 		return ("OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED");
5129 	case PMCOUT_STATUS_OPEN_CNX_ERROR_STP_RESOURCES_BUSY:
5130 		return ("OPEN_CNX_ERROR_STP_RESOURCES_BUSY");
5131 	case PMCOUT_STATUS_OPEN_CNX_ERROR_WRONG_DESTINATION:
5132 		return ("OPEN_CNX_ERROR_WRONG_DESTINATION");
5133 	case PMCOUT_STATUS_OPEN_CNX_ERROR_UNKNOWN_EROOR:
5134 		return ("OPEN_CNX_ERROR_UNKNOWN_EROOR");
5135 	case PMCOUT_STATUS_IO_XFER_ERROR_NAK_RECEIVED:
5136 		return ("IO_XFER_ERROR_NAK_RECEIVED");
5137 	case PMCOUT_STATUS_XFER_ERROR_ACK_NAK_TIMEOUT:
5138 		return ("XFER_ERROR_ACK_NAK_TIMEOUT");
5139 	case PMCOUT_STATUS_XFER_ERROR_PEER_ABORTED:
5140 		return ("XFER_ERROR_PEER_ABORTED");
5141 	case PMCOUT_STATUS_XFER_ERROR_RX_FRAME:
5142 		return ("XFER_ERROR_RX_FRAME");
5143 	case PMCOUT_STATUS_IO_XFER_ERROR_DMA:
5144 		return ("IO_XFER_ERROR_DMA");
5145 	case PMCOUT_STATUS_XFER_ERROR_CREDIT_TIMEOUT:
5146 		return ("XFER_ERROR_CREDIT_TIMEOUT");
5147 	case PMCOUT_STATUS_XFER_ERROR_SATA_LINK_TIMEOUT:
5148 		return ("XFER_ERROR_SATA_LINK_TIMEOUT");
5149 	case PMCOUT_STATUS_XFER_ERROR_SATA:
5150 		return ("XFER_ERROR_SATA");
5151 	case PMCOUT_STATUS_XFER_ERROR_REJECTED_NCQ_MODE:
5152 		return ("XFER_ERROR_REJECTED_NCQ_MODE");
5153 	case PMCOUT_STATUS_XFER_ERROR_ABORTED_DUE_TO_SRST:
5154 		return ("XFER_ERROR_ABORTED_DUE_TO_SRST");
5155 	case PMCOUT_STATUS_XFER_ERROR_ABORTED_NCQ_MODE:
5156 		return ("XFER_ERROR_ABORTED_NCQ_MODE");
5157 	case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
5158 		return ("IO_XFER_OPEN_RETRY_TIMEOUT");
5159 	case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
5160 		return ("SMP_RESP_CONNECTION_ERROR");
5161 	case PMCOUT_STATUS_XFER_ERROR_UNEXPECTED_PHASE:
5162 		return ("XFER_ERROR_UNEXPECTED_PHASE");
5163 	case PMCOUT_STATUS_XFER_ERROR_RDY_OVERRUN:
5164 		return ("XFER_ERROR_RDY_OVERRUN");
5165 	case PMCOUT_STATUS_XFER_ERROR_RDY_NOT_EXPECTED:
5166 		return ("XFER_ERROR_RDY_NOT_EXPECTED");
5167 	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT:
5168 		return ("XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT");
5169 	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_BREAK_BEFORE_ACK_NACK:
5170 		return ("XFER_ERROR_CMD_ISSUE_BREAK_BEFORE_ACK_NACK");
5171 	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_PHY_DOWN_BEFORE_ACK_NAK:
5172 		return ("XFER_ERROR_CMD_ISSUE_PHY_DOWN_BEFORE_ACK_NAK");
5173 	case PMCOUT_STATUS_XFER_ERROR_OFFSET_MISMATCH:
5174 		return ("XFER_ERROR_OFFSET_MISMATCH");
5175 	case PMCOUT_STATUS_XFER_ERROR_ZERO_DATA_LEN:
5176 		return ("XFER_ERROR_ZERO_DATA_LEN");
5177 	case PMCOUT_STATUS_XFER_CMD_FRAME_ISSUED:
5178 		return ("XFER_CMD_FRAME_ISSUED");
5179 	case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
5180 		return ("ERROR_INTERNAL_SMP_RESOURCE");
5181 	case PMCOUT_STATUS_IO_PORT_IN_RESET:
5182 		return ("IO_PORT_IN_RESET");
5183 	case PMCOUT_STATUS_IO_DS_NON_OPERATIONAL:
5184 		return ("DEVICE STATE NON-OPERATIONAL");
5185 	case PMCOUT_STATUS_IO_DS_IN_RECOVERY:
5186 		return ("DEVICE STATE IN RECOVERY");
5187 	default:
5188 		return (NULL);
5189 	}
5190 }
5191 
5192 uint64_t
5193 pmcs_barray2wwn(uint8_t ba[8])
5194 {
5195 	uint64_t result = 0;
5196 	int i;
5197 
5198 	for (i = 0; i < 8; i++) {
5199 		result <<= 8;
5200 		result |= ba[i];
5201 	}
5202 	return (result);
5203 }
5204 
5205 void
5206 pmcs_wwn2barray(uint64_t wwn, uint8_t ba[8])
5207 {
5208 	int i;
5209 	for (i = 0; i < 8; i++) {
5210 		ba[7 - i] = wwn & 0xff;
5211 		wwn >>= 8;
5212 	}
5213 }
5214 
5215 void
5216 pmcs_report_fwversion(pmcs_hw_t *pwp)
5217 {
5218 	const char *fwsupport;
5219 	switch (PMCS_FW_TYPE(pwp)) {
5220 	case PMCS_FW_TYPE_RELEASED:
5221 		fwsupport = "Released";
5222 		break;
5223 	case PMCS_FW_TYPE_DEVELOPMENT:
5224 		fwsupport = "Development";
5225 		break;
5226 	case PMCS_FW_TYPE_ALPHA:
5227 		fwsupport = "Alpha";
5228 		break;
5229 	case PMCS_FW_TYPE_BETA:
5230 		fwsupport = "Beta";
5231 		break;
5232 	default:
5233 		fwsupport = "Special";
5234 		break;
5235 	}
5236 	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
5237 	    "Chip Revision: %c; F/W Revision %x.%x.%x %s", 'A' + pwp->chiprev,
5238 	    PMCS_FW_MAJOR(pwp), PMCS_FW_MINOR(pwp), PMCS_FW_MICRO(pwp),
5239 	    fwsupport);
5240 }
5241 
5242 void
5243 pmcs_phy_name(pmcs_hw_t *pwp, pmcs_phy_t *pptr, char *obuf, size_t olen)
5244 {
5245 	if (pptr->parent) {
5246 		pmcs_phy_name(pwp, pptr->parent, obuf, olen);
5247 		(void) snprintf(obuf, olen, "%s.%02x", obuf, pptr->phynum);
5248 	} else {
5249 		(void) snprintf(obuf, olen, "pp%02x", pptr->phynum);
5250 	}
5251 }
5252 
5253 /*
5254  * Implementation for pmcs_find_phy_by_devid.
5255  * If the PHY is found, it is returned locked.
5256  */
5257 static pmcs_phy_t *
5258 pmcs_find_phy_by_devid_impl(pmcs_phy_t *phyp, uint32_t device_id)
5259 {
5260 	pmcs_phy_t *match, *cphyp, *nphyp;
5261 
5262 	ASSERT(!mutex_owned(&phyp->phy_lock));
5263 
5264 	while (phyp) {
5265 		pmcs_lock_phy(phyp);
5266 
5267 		if ((phyp->valid_device_id) && (phyp->device_id == device_id)) {
5268 			return (phyp);
5269 		}
5270 		if (phyp->children) {
5271 			cphyp = phyp->children;
5272 			pmcs_unlock_phy(phyp);
5273 			match = pmcs_find_phy_by_devid_impl(cphyp, device_id);
5274 			if (match) {
5275 				ASSERT(mutex_owned(&match->phy_lock));
5276 				return (match);
5277 			}
5278 			pmcs_lock_phy(phyp);
5279 		}
5280 
5281 		if (IS_ROOT_PHY(phyp)) {
5282 			pmcs_unlock_phy(phyp);
5283 			phyp = NULL;
5284 		} else {
5285 			nphyp = phyp->sibling;
5286 			pmcs_unlock_phy(phyp);
5287 			phyp = nphyp;
5288 		}
5289 	}
5290 
5291 	return (NULL);
5292 }
5293 
5294 /*
5295  * If the PHY is found, it is returned locked
5296  */
5297 pmcs_phy_t *
5298 pmcs_find_phy_by_devid(pmcs_hw_t *pwp, uint32_t device_id)
5299 {
5300 	pmcs_phy_t *phyp, *match = NULL;
5301 
5302 	phyp = pwp->root_phys;
5303 
5304 	while (phyp) {
5305 		match = pmcs_find_phy_by_devid_impl(phyp, device_id);
5306 		if (match) {
5307 			ASSERT(mutex_owned(&match->phy_lock));
5308 			return (match);
5309 		}
5310 		phyp = phyp->sibling;
5311 	}
5312 
5313 	return (NULL);
5314 }
5315 
5316 /*
5317  * This function is called as a sanity check to ensure that a newly registered
5318  * PHY doesn't have a device_id that exists with another registered PHY.
5319  */
5320 static boolean_t
5321 pmcs_validate_devid(pmcs_phy_t *parent, pmcs_phy_t *phyp, uint32_t device_id)
5322 {
5323 	pmcs_phy_t *pptr;
5324 	boolean_t rval;
5325 
5326 	pptr = parent;
5327 
5328 	while (pptr) {
5329 		if (pptr->valid_device_id && (pptr != phyp) &&
5330 		    (pptr->device_id == device_id)) {
5331 			pmcs_prt(pptr->pwp, PMCS_PRT_DEBUG, pptr, NULL,
5332 			    "%s: phy %s already exists as %s with "
5333 			    "device id 0x%x", __func__, phyp->path,
5334 			    pptr->path, device_id);
5335 			return (B_FALSE);
5336 		}
5337 
5338 		if (pptr->children) {
5339 			rval = pmcs_validate_devid(pptr->children, phyp,
5340 			    device_id);
5341 			if (rval == B_FALSE) {
5342 				return (rval);
5343 			}
5344 		}
5345 
5346 		pptr = pptr->sibling;
5347 	}
5348 
5349 	/* This PHY and device_id are valid */
5350 	return (B_TRUE);
5351 }
5352 
5353 /*
5354  * If the PHY is found, it is returned locked
5355  */
5356 static pmcs_phy_t *
5357 pmcs_find_phy_by_wwn_impl(pmcs_phy_t *phyp, uint8_t *wwn)
5358 {
5359 	pmcs_phy_t *matched_phy, *cphyp, *nphyp;
5360 
5361 	ASSERT(!mutex_owned(&phyp->phy_lock));
5362 
5363 	while (phyp) {
5364 		pmcs_lock_phy(phyp);
5365 
5366 		if (phyp->valid_device_id) {
5367 			if (memcmp(phyp->sas_address, wwn, 8) == 0) {
5368 				return (phyp);
5369 			}
5370 		}
5371 
5372 		if (phyp->children) {
5373 			cphyp = phyp->children;
5374 			pmcs_unlock_phy(phyp);
5375 			matched_phy = pmcs_find_phy_by_wwn_impl(cphyp, wwn);
5376 			if (matched_phy) {
5377 				ASSERT(mutex_owned(&matched_phy->phy_lock));
5378 				return (matched_phy);
5379 			}
5380 			pmcs_lock_phy(phyp);
5381 		}
5382 
5383 		/*
5384 		 * Only iterate through non-root PHYs
5385 		 */
5386 		if (IS_ROOT_PHY(phyp)) {
5387 			pmcs_unlock_phy(phyp);
5388 			phyp = NULL;
5389 		} else {
5390 			nphyp = phyp->sibling;
5391 			pmcs_unlock_phy(phyp);
5392 			phyp = nphyp;
5393 		}
5394 	}
5395 
5396 	return (NULL);
5397 }
5398 
5399 pmcs_phy_t *
5400 pmcs_find_phy_by_wwn(pmcs_hw_t *pwp, uint64_t wwn)
5401 {
5402 	uint8_t ebstr[8];
5403 	pmcs_phy_t *pptr, *matched_phy;
5404 
5405 	pmcs_wwn2barray(wwn, ebstr);
5406 
5407 	pptr = pwp->root_phys;
5408 	while (pptr) {
5409 		matched_phy = pmcs_find_phy_by_wwn_impl(pptr, ebstr);
5410 		if (matched_phy) {
5411 			ASSERT(mutex_owned(&matched_phy->phy_lock));
5412 			return (matched_phy);
5413 		}
5414 
5415 		pptr = pptr->sibling;
5416 	}
5417 
5418 	return (NULL);
5419 }
5420 
5421 
5422 /*
5423  * pmcs_find_phy_by_sas_address
5424  *
5425  * Find a PHY that both matches "sas_addr" and is on "iport".
5426  * If a matching PHY is found, it is returned locked.
5427  */
5428 pmcs_phy_t *
5429 pmcs_find_phy_by_sas_address(pmcs_hw_t *pwp, pmcs_iport_t *iport,
5430     pmcs_phy_t *root, char *sas_addr)
5431 {
5432 	int ua_form = 1;
5433 	uint64_t wwn;
5434 	char addr[PMCS_MAX_UA_SIZE];
5435 	pmcs_phy_t *pptr, *pnext, *pchild;
5436 
5437 	if (root == NULL) {
5438 		pptr = pwp->root_phys;
5439 	} else {
5440 		pptr = root;
5441 	}
5442 
5443 	while (pptr) {
5444 		pmcs_lock_phy(pptr);
5445 		/*
5446 		 * If the PHY is dead or does not have a valid device ID,
5447 		 * skip it.
5448 		 */
5449 		if ((pptr->dead) || (!pptr->valid_device_id)) {
5450 			goto next_phy;
5451 		}
5452 
5453 		if (pptr->iport != iport) {
5454 			goto next_phy;
5455 		}
5456 
5457 		wwn = pmcs_barray2wwn(pptr->sas_address);
5458 		(void *) scsi_wwn_to_wwnstr(wwn, ua_form, addr);
5459 		if (strncmp(addr, sas_addr, strlen(addr)) == 0) {
5460 			return (pptr);
5461 		}
5462 
5463 		if (pptr->children) {
5464 			pchild = pptr->children;
5465 			pmcs_unlock_phy(pptr);
5466 			pnext = pmcs_find_phy_by_sas_address(pwp, iport, pchild,
5467 			    sas_addr);
5468 			if (pnext) {
5469 				return (pnext);
5470 			}
5471 			pmcs_lock_phy(pptr);
5472 		}
5473 
5474 next_phy:
5475 		pnext = pptr->sibling;
5476 		pmcs_unlock_phy(pptr);
5477 		pptr = pnext;
5478 	}
5479 
5480 	return (NULL);
5481 }
5482 
5483 void
5484 pmcs_fis_dump(pmcs_hw_t *pwp, fis_t fis)
5485 {
5486 	switch (fis[0] & 0xff) {
5487 	case FIS_REG_H2DEV:
5488 		pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
5489 		    "FIS REGISTER HOST TO DEVICE: "
5490 		    "OP=0x%02x Feature=0x%04x Count=0x%04x Device=0x%02x "
5491 		    "LBA=%llu", BYTE2(fis[0]), BYTE3(fis[2]) << 8 |
5492 		    BYTE3(fis[0]), WORD0(fis[3]), BYTE3(fis[1]),
5493 		    (unsigned long long)
5494 		    (((uint64_t)fis[2] & 0x00ffffff) << 24 |
5495 		    ((uint64_t)fis[1] & 0x00ffffff)));
5496 		break;
5497 	case FIS_REG_D2H:
5498 		pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
5499 		    "FIS REGISTER DEVICE TO HOST: Status=0x%02x "
5500 		    "Error=0x%02x Dev=0x%02x Count=0x%04x LBA=%llu",
5501 		    BYTE2(fis[0]), BYTE3(fis[0]), BYTE3(fis[1]), WORD0(fis[3]),
5502 		    (unsigned long long)(((uint64_t)fis[2] & 0x00ffffff) << 24 |
5503 		    ((uint64_t)fis[1] & 0x00ffffff)));
5504 		break;
5505 	default:
5506 		pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
5507 		    "FIS: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x",
5508 		    fis[0], fis[1], fis[2], fis[3], fis[4], fis[5], fis[6]);
5509 		break;
5510 	}
5511 }
5512 
5513 void
5514 pmcs_print_entry(pmcs_hw_t *pwp, int level, char *msg, void *arg)
5515 {
5516 	uint32_t *mb = arg;
5517 	size_t i;
5518 
5519 	pmcs_prt(pwp, level, NULL, NULL, msg);
5520 	for (i = 0; i < (PMCS_QENTRY_SIZE / sizeof (uint32_t)); i += 4) {
5521 		pmcs_prt(pwp, level, NULL, NULL,
5522 		    "Offset %2lu: 0x%08x 0x%08x 0x%08x 0x%08x",
5523 		    i * sizeof (uint32_t), LE_32(mb[i]),
5524 		    LE_32(mb[i+1]), LE_32(mb[i+2]), LE_32(mb[i+3]));
5525 	}
5526 }
5527 
5528 /*
5529  * If phyp == NULL we're being called from the worker thread, in which
5530  * case we need to check all the PHYs.  In this case, the softstate lock
5531  * will be held.
5532  * If phyp is non-NULL, just issue the spinup release for the specified PHY
5533  * (which will already be locked).
5534  */
5535 void
5536 pmcs_spinup_release(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
5537 {
5538 	uint32_t *msg;
5539 	struct pmcwork *pwrk;
5540 	pmcs_phy_t *tphyp;
5541 
5542 	if (phyp != NULL) {
5543 		ASSERT(mutex_owned(&phyp->phy_lock));
5544 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, phyp, NULL,
5545 		    "%s: Issuing spinup release only for PHY %s", __func__,
5546 		    phyp->path);
5547 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5548 		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5549 		if (msg == NULL || (pwrk =
5550 		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
5551 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5552 			SCHEDULE_WORK(pwp, PMCS_WORK_SPINUP_RELEASE);
5553 			return;
5554 		}
5555 
5556 		phyp->spinup_hold = 0;
5557 		bzero(msg, PMCS_QENTRY_SIZE);
5558 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5559 		    PMCIN_LOCAL_PHY_CONTROL));
5560 		msg[1] = LE_32(pwrk->htag);
5561 		msg[2] = LE_32((0x10 << 8) | phyp->phynum);
5562 
5563 		pwrk->dtype = phyp->dtype;
5564 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
5565 		mutex_exit(&pwrk->lock);
5566 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5567 		return;
5568 	}
5569 
5570 	ASSERT(mutex_owned(&pwp->lock));
5571 
5572 	tphyp = pwp->root_phys;
5573 	while (tphyp) {
5574 		pmcs_lock_phy(tphyp);
5575 		if (tphyp->spinup_hold == 0) {
5576 			pmcs_unlock_phy(tphyp);
5577 			tphyp = tphyp->sibling;
5578 			continue;
5579 		}
5580 
5581 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, phyp, NULL,
5582 		    "%s: Issuing spinup release for PHY %s", __func__,
5583 		    phyp->path);
5584 
5585 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5586 		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5587 		if (msg == NULL || (pwrk =
5588 		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
5589 			pmcs_unlock_phy(tphyp);
5590 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5591 			SCHEDULE_WORK(pwp, PMCS_WORK_SPINUP_RELEASE);
5592 			break;
5593 		}
5594 
5595 		tphyp->spinup_hold = 0;
5596 		bzero(msg, PMCS_QENTRY_SIZE);
5597 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5598 		    PMCIN_LOCAL_PHY_CONTROL));
5599 		msg[1] = LE_32(pwrk->htag);
5600 		msg[2] = LE_32((0x10 << 8) | tphyp->phynum);
5601 
5602 		pwrk->dtype = phyp->dtype;
5603 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
5604 		mutex_exit(&pwrk->lock);
5605 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5606 		pmcs_unlock_phy(tphyp);
5607 
5608 		tphyp = tphyp->sibling;
5609 	}
5610 }
5611 
5612 /*
5613  * Abort commands on dead PHYs and deregister them as well as removing
5614  * the associated targets.
5615  */
5616 static int
5617 pmcs_kill_devices(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
5618 {
5619 	pmcs_phy_t *pnext, *pchild;
5620 	boolean_t remove_device;
5621 	int rval = 0;
5622 
5623 	while (phyp) {
5624 		pmcs_lock_phy(phyp);
5625 		pchild = phyp->children;
5626 		pnext = phyp->sibling;
5627 		pmcs_unlock_phy(phyp);
5628 
5629 		if (pchild) {
5630 			rval = pmcs_kill_devices(pwp, pchild);
5631 			if (rval) {
5632 				return (rval);
5633 			}
5634 		}
5635 
5636 		/*
5637 		 * pmcs_remove_device requires the softstate lock.
5638 		 */
5639 		mutex_enter(&pwp->lock);
5640 		pmcs_lock_phy(phyp);
5641 		if (phyp->dead && phyp->valid_device_id) {
5642 			remove_device = B_TRUE;
5643 		} else {
5644 			remove_device = B_FALSE;
5645 		}
5646 
5647 		if (remove_device) {
5648 			pmcs_remove_device(pwp, phyp);
5649 			mutex_exit(&pwp->lock);
5650 
5651 			rval = pmcs_kill_device(pwp, phyp);
5652 
5653 			if (rval) {
5654 				pmcs_unlock_phy(phyp);
5655 				return (rval);
5656 			}
5657 		} else {
5658 			mutex_exit(&pwp->lock);
5659 		}
5660 
5661 		pmcs_unlock_phy(phyp);
5662 		phyp = pnext;
5663 	}
5664 
5665 	return (rval);
5666 }
5667 
5668 /*
5669  * Called with PHY locked
5670  */
5671 int
5672 pmcs_kill_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
5673 {
5674 	int r, result;
5675 	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
5676 	struct pmcwork *pwrk;
5677 
5678 	pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL, "kill %s device @ %s",
5679 	    pmcs_get_typename(pptr->dtype), pptr->path);
5680 
5681 	/*
5682 	 * There may be an outstanding ABORT_ALL running, which we wouldn't
5683 	 * know just by checking abort_pending.  We can, however, check
5684 	 * abort_all_start.  If it's non-zero, there is one, and we'll just
5685 	 * sit here and wait for it to complete.  If we don't, we'll remove
5686 	 * the device while there are still commands pending.
5687 	 */
5688 	if (pptr->abort_all_start) {
5689 		while (pptr->abort_all_start) {
5690 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
5691 			    "%s: Waiting for outstanding ABORT_ALL on PHY 0x%p",
5692 			    __func__, (void *)pptr);
5693 			cv_wait(&pptr->abort_all_cv, &pptr->phy_lock);
5694 		}
5695 	} else if (pptr->abort_pending) {
5696 		r = pmcs_abort(pwp, pptr, pptr->device_id, 1, 1);
5697 
5698 		if (r) {
5699 			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
5700 			    "%s: ABORT_ALL returned non-zero status (%d) for "
5701 			    "PHY 0x%p", __func__, r, (void *)pptr);
5702 			return (r);
5703 		}
5704 		pptr->abort_pending = 0;
5705 	}
5706 
5707 	if (pptr->valid_device_id == 0) {
5708 		return (0);
5709 	}
5710 
5711 	if ((pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr)) == NULL) {
5712 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nowrk, __func__);
5713 		return (ENOMEM);
5714 	}
5715 	pwrk->arg = msg;
5716 	pwrk->dtype = pptr->dtype;
5717 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5718 	    PMCIN_DEREGISTER_DEVICE_HANDLE));
5719 	msg[1] = LE_32(pwrk->htag);
5720 	msg[2] = LE_32(pptr->device_id);
5721 
5722 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5723 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5724 	if (ptr == NULL) {
5725 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5726 		mutex_exit(&pwrk->lock);
5727 		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nomsg, __func__);
5728 		return (ENOMEM);
5729 	}
5730 
5731 	COPY_MESSAGE(ptr, msg, 3);
5732 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
5733 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5734 
5735 	pmcs_unlock_phy(pptr);
5736 	WAIT_FOR(pwrk, 250, result);
5737 	pmcs_lock_phy(pptr);
5738 	pmcs_pwork(pwp, pwrk);
5739 
5740 	if (result) {
5741 		return (ETIMEDOUT);
5742 	}
5743 	status = LE_32(msg[2]);
5744 	if (status != PMCOUT_STATUS_OK) {
5745 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
5746 		    "%s: status 0x%x when trying to deregister device %s",
5747 		    __func__, status, pptr->path);
5748 	}
5749 
5750 	pptr->device_id = PMCS_INVALID_DEVICE_ID;
5751 	PHY_CHANGED(pwp, pptr);
5752 	RESTART_DISCOVERY(pwp);
5753 	pptr->valid_device_id = 0;
5754 	return (0);
5755 }
5756 
5757 /*
5758  * Acknowledge the SAS h/w events that need acknowledgement.
5759  * This is only needed for first level PHYs.
5760  */
5761 void
5762 pmcs_ack_events(pmcs_hw_t *pwp)
5763 {
5764 	uint32_t msg[PMCS_MSG_SIZE], *ptr;
5765 	struct pmcwork *pwrk;
5766 	pmcs_phy_t *pptr;
5767 
5768 	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
5769 		pmcs_lock_phy(pptr);
5770 		if (pptr->hw_event_ack == 0) {
5771 			pmcs_unlock_phy(pptr);
5772 			continue;
5773 		}
5774 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5775 		ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5776 
5777 		if ((ptr == NULL) || (pwrk =
5778 		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
5779 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5780 			pmcs_unlock_phy(pptr);
5781 			SCHEDULE_WORK(pwp, PMCS_WORK_SAS_HW_ACK);
5782 			break;
5783 		}
5784 
5785 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5786 		    PMCIN_SAW_HW_EVENT_ACK));
5787 		msg[1] = LE_32(pwrk->htag);
5788 		msg[2] = LE_32(pptr->hw_event_ack);
5789 
5790 		mutex_exit(&pwrk->lock);
5791 		pwrk->dtype = pptr->dtype;
5792 		pptr->hw_event_ack = 0;
5793 		COPY_MESSAGE(ptr, msg, 3);
5794 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5795 		pmcs_unlock_phy(pptr);
5796 	}
5797 }
5798 
5799 /*
5800  * Load DMA
5801  */
5802 int
5803 pmcs_dma_load(pmcs_hw_t *pwp, pmcs_cmd_t *sp, uint32_t *msg)
5804 {
5805 	ddi_dma_cookie_t *sg;
5806 	pmcs_dmachunk_t *tc;
5807 	pmcs_dmasgl_t *sgl, *prior;
5808 	int seg, tsc;
5809 	uint64_t sgl_addr;
5810 
5811 	/*
5812 	 * If we have no data segments, we're done.
5813 	 */
5814 	if (CMD2PKT(sp)->pkt_numcookies == 0) {
5815 		return (0);
5816 	}
5817 
5818 	/*
5819 	 * Get the S/G list pointer.
5820 	 */
5821 	sg = CMD2PKT(sp)->pkt_cookies;
5822 
5823 	/*
5824 	 * If we only have one dma segment, we can directly address that
5825 	 * data within the Inbound message itself.
5826 	 */
5827 	if (CMD2PKT(sp)->pkt_numcookies == 1) {
5828 		msg[12] = LE_32(DWORD0(sg->dmac_laddress));
5829 		msg[13] = LE_32(DWORD1(sg->dmac_laddress));
5830 		msg[14] = LE_32(sg->dmac_size);
5831 		msg[15] = 0;
5832 		return (0);
5833 	}
5834 
5835 	/*
5836 	 * Otherwise, we'll need one or more external S/G list chunks.
5837 	 * Get the first one and its dma address into the Inbound message.
5838 	 */
5839 	mutex_enter(&pwp->dma_lock);
5840 	tc = pwp->dma_freelist;
5841 	if (tc == NULL) {
5842 		SCHEDULE_WORK(pwp, PMCS_WORK_ADD_DMA_CHUNKS);
5843 		mutex_exit(&pwp->dma_lock);
5844 		pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL,
5845 		    "%s: out of SG lists", __func__);
5846 		return (-1);
5847 	}
5848 	pwp->dma_freelist = tc->nxt;
5849 	mutex_exit(&pwp->dma_lock);
5850 
5851 	tc->nxt = NULL;
5852 	sp->cmd_clist = tc;
5853 	sgl = tc->chunks;
5854 	(void) memset(tc->chunks, 0, PMCS_SGL_CHUNKSZ);
5855 	sgl_addr = tc->addr;
5856 	msg[12] = LE_32(DWORD0(sgl_addr));
5857 	msg[13] = LE_32(DWORD1(sgl_addr));
5858 	msg[14] = 0;
5859 	msg[15] = LE_32(PMCS_DMASGL_EXTENSION);
5860 
5861 	prior = sgl;
5862 	tsc = 0;
5863 
5864 	for (seg = 0; seg < CMD2PKT(sp)->pkt_numcookies; seg++) {
5865 		/*
5866 		 * If the current segment count for this chunk is one less than
5867 		 * the number s/g lists per chunk and we have more than one seg
5868 		 * to go, we need another chunk. Get it, and make sure that the
5869 		 * tail end of the the previous chunk points the new chunk
5870 		 * (if remembering an offset can be called 'pointing to').
5871 		 *
5872 		 * Note that we can store the offset into our command area that
5873 		 * represents the new chunk in the length field of the part
5874 		 * that points the PMC chip at the next chunk- the PMC chip
5875 		 * ignores this field when the EXTENSION bit is set.
5876 		 *
5877 		 * This is required for dma unloads later.
5878 		 */
5879 		if (tsc == (PMCS_SGL_NCHUNKS - 1) &&
5880 		    seg < (CMD2PKT(sp)->pkt_numcookies - 1)) {
5881 			mutex_enter(&pwp->dma_lock);
5882 			tc = pwp->dma_freelist;
5883 			if (tc == NULL) {
5884 				SCHEDULE_WORK(pwp, PMCS_WORK_ADD_DMA_CHUNKS);
5885 				mutex_exit(&pwp->dma_lock);
5886 				pmcs_dma_unload(pwp, sp);
5887 				pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL,
5888 				    "%s: out of SG lists", __func__);
5889 				return (-1);
5890 			}
5891 			pwp->dma_freelist = tc->nxt;
5892 			tc->nxt = sp->cmd_clist;
5893 			mutex_exit(&pwp->dma_lock);
5894 
5895 			sp->cmd_clist = tc;
5896 			(void) memset(tc->chunks, 0, PMCS_SGL_CHUNKSZ);
5897 			sgl = tc->chunks;
5898 			sgl_addr = tc->addr;
5899 			prior[PMCS_SGL_NCHUNKS-1].sglal =
5900 			    LE_32(DWORD0(sgl_addr));
5901 			prior[PMCS_SGL_NCHUNKS-1].sglah =
5902 			    LE_32(DWORD1(sgl_addr));
5903 			prior[PMCS_SGL_NCHUNKS-1].sglen = 0;
5904 			prior[PMCS_SGL_NCHUNKS-1].flags =
5905 			    LE_32(PMCS_DMASGL_EXTENSION);
5906 			prior = sgl;
5907 			tsc = 0;
5908 		}
5909 		sgl[tsc].sglal = LE_32(DWORD0(sg->dmac_laddress));
5910 		sgl[tsc].sglah = LE_32(DWORD1(sg->dmac_laddress));
5911 		sgl[tsc].sglen = LE_32(sg->dmac_size);
5912 		sgl[tsc++].flags = 0;
5913 		sg++;
5914 	}
5915 	return (0);
5916 }
5917 
5918 /*
5919  * Unload DMA
5920  */
5921 void
5922 pmcs_dma_unload(pmcs_hw_t *pwp, pmcs_cmd_t *sp)
5923 {
5924 	pmcs_dmachunk_t *cp;
5925 
5926 	mutex_enter(&pwp->dma_lock);
5927 	while ((cp = sp->cmd_clist) != NULL) {
5928 		sp->cmd_clist = cp->nxt;
5929 		cp->nxt = pwp->dma_freelist;
5930 		pwp->dma_freelist = cp;
5931 	}
5932 	mutex_exit(&pwp->dma_lock);
5933 }
5934 
5935 /*
5936  * Take a chunk of consistent memory that has just been allocated and inserted
5937  * into the cip indices and prepare it for DMA chunk usage and add it to the
5938  * freelist.
5939  *
5940  * Called with dma_lock locked (except during attach when it's unnecessary)
5941  */
5942 void
5943 pmcs_idma_chunks(pmcs_hw_t *pwp, pmcs_dmachunk_t *dcp,
5944     pmcs_chunk_t *pchunk, unsigned long lim)
5945 {
5946 	unsigned long off, n;
5947 	pmcs_dmachunk_t *np = dcp;
5948 	pmcs_chunk_t *tmp_chunk;
5949 
5950 	if (pwp->dma_chunklist == NULL) {
5951 		pwp->dma_chunklist = pchunk;
5952 	} else {
5953 		tmp_chunk = pwp->dma_chunklist;
5954 		while (tmp_chunk->next) {
5955 			tmp_chunk = tmp_chunk->next;
5956 		}
5957 		tmp_chunk->next = pchunk;
5958 	}
5959 
5960 	/*
5961 	 * Install offsets into chunk lists.
5962 	 */
5963 	for (n = 0, off = 0; off < lim; off += PMCS_SGL_CHUNKSZ, n++) {
5964 		np->chunks = (void *)&pchunk->addrp[off];
5965 		np->addr = pchunk->dma_addr + off;
5966 		np->acc_handle = pchunk->acc_handle;
5967 		np->dma_handle = pchunk->dma_handle;
5968 		if ((off + PMCS_SGL_CHUNKSZ) < lim) {
5969 			np = np->nxt;
5970 		}
5971 	}
5972 	np->nxt = pwp->dma_freelist;
5973 	pwp->dma_freelist = dcp;
5974 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL,
5975 	    "added %lu DMA chunks ", n);
5976 }
5977 
5978 /*
5979  * Change the value of the interrupt coalescing timer.  This is done currently
5980  * only for I/O completions.  If we're using the "auto clear" feature, it can
5981  * be turned back on when interrupt coalescing is turned off and must be
5982  * turned off when the coalescing timer is on.
5983  * NOTE: PMCS_MSIX_GENERAL and PMCS_OQ_IODONE are the same value.  As long
5984  * as that's true, we don't need to distinguish between them.
5985  */
5986 
5987 void
5988 pmcs_set_intr_coal_timer(pmcs_hw_t *pwp, pmcs_coal_timer_adj_t adj)
5989 {
5990 	if (adj == DECREASE_TIMER) {
5991 		/* If the timer is already off, nothing to do. */
5992 		if (pwp->io_intr_coal.timer_on == B_FALSE) {
5993 			return;
5994 		}
5995 
5996 		pwp->io_intr_coal.intr_coal_timer -= PMCS_COAL_TIMER_GRAN;
5997 
5998 		if (pwp->io_intr_coal.intr_coal_timer == 0) {
5999 			/* Disable the timer */
6000 			pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_CONTROL, 0);
6001 
6002 			if (pwp->odb_auto_clear & (1 << PMCS_MSIX_IODONE)) {
6003 				pmcs_wr_topunit(pwp, PMCS_OBDB_AUTO_CLR,
6004 				    pwp->odb_auto_clear);
6005 			}
6006 
6007 			pwp->io_intr_coal.timer_on = B_FALSE;
6008 			pwp->io_intr_coal.max_io_completions = B_FALSE;
6009 			pwp->io_intr_coal.num_intrs = 0;
6010 			pwp->io_intr_coal.int_cleared = B_FALSE;
6011 			pwp->io_intr_coal.num_io_completions = 0;
6012 
6013 			DTRACE_PROBE1(pmcs__intr__coalesce__timer__off,
6014 			    pmcs_io_intr_coal_t *, &pwp->io_intr_coal);
6015 		} else {
6016 			pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_TIMER,
6017 			    pwp->io_intr_coal.intr_coal_timer);
6018 		}
6019 	} else {
6020 		/*
6021 		 * If the timer isn't on yet, do the setup for it now.
6022 		 */
6023 		if (pwp->io_intr_coal.timer_on == B_FALSE) {
6024 			/* If auto clear is being used, turn it off. */
6025 			if (pwp->odb_auto_clear & (1 << PMCS_MSIX_IODONE)) {
6026 				pmcs_wr_topunit(pwp, PMCS_OBDB_AUTO_CLR,
6027 				    (pwp->odb_auto_clear &
6028 				    ~(1 << PMCS_MSIX_IODONE)));
6029 			}
6030 
6031 			pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_CONTROL,
6032 			    (1 << PMCS_MSIX_IODONE));
6033 			pwp->io_intr_coal.timer_on = B_TRUE;
6034 			pwp->io_intr_coal.intr_coal_timer =
6035 			    PMCS_COAL_TIMER_GRAN;
6036 
6037 			DTRACE_PROBE1(pmcs__intr__coalesce__timer__on,
6038 			    pmcs_io_intr_coal_t *, &pwp->io_intr_coal);
6039 		} else {
6040 			pwp->io_intr_coal.intr_coal_timer +=
6041 			    PMCS_COAL_TIMER_GRAN;
6042 		}
6043 
6044 		if (pwp->io_intr_coal.intr_coal_timer > PMCS_MAX_COAL_TIMER) {
6045 			pwp->io_intr_coal.intr_coal_timer = PMCS_MAX_COAL_TIMER;
6046 		}
6047 
6048 		pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_TIMER,
6049 		    pwp->io_intr_coal.intr_coal_timer);
6050 	}
6051 
6052 	/*
6053 	 * Adjust the interrupt threshold based on the current timer value
6054 	 */
6055 	pwp->io_intr_coal.intr_threshold =
6056 	    PMCS_INTR_THRESHOLD(PMCS_QUANTUM_TIME_USECS * 1000 /
6057 	    (pwp->io_intr_coal.intr_latency +
6058 	    (pwp->io_intr_coal.intr_coal_timer * 1000)));
6059 }
6060 
6061 /*
6062  * Register Access functions
6063  */
6064 uint32_t
6065 pmcs_rd_iqci(pmcs_hw_t *pwp, uint32_t qnum)
6066 {
6067 	uint32_t iqci;
6068 
6069 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORKERNEL) !=
6070 	    DDI_SUCCESS) {
6071 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6072 		    "%s: ddi_dma_sync failed?", __func__);
6073 	}
6074 
6075 	iqci = LE_32(
6076 	    ((uint32_t *)((void *)pwp->cip))[IQ_OFFSET(qnum) >> 2]);
6077 
6078 	return (iqci);
6079 }
6080 
6081 uint32_t
6082 pmcs_rd_oqpi(pmcs_hw_t *pwp, uint32_t qnum)
6083 {
6084 	uint32_t oqpi;
6085 
6086 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORKERNEL) !=
6087 	    DDI_SUCCESS) {
6088 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6089 		    "%s: ddi_dma_sync failed?", __func__);
6090 	}
6091 
6092 	oqpi = LE_32(
6093 	    ((uint32_t *)((void *)pwp->cip))[OQ_OFFSET(qnum) >> 2]);
6094 
6095 	return (oqpi);
6096 }
6097 
6098 uint32_t
6099 pmcs_rd_gsm_reg(pmcs_hw_t *pwp, uint32_t off)
6100 {
6101 	uint32_t rv, newaxil, oldaxil;
6102 
6103 	newaxil = off & ~GSM_BASE_MASK;
6104 	off &= GSM_BASE_MASK;
6105 	mutex_enter(&pwp->axil_lock);
6106 	oldaxil = ddi_get32(pwp->top_acc_handle,
6107 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]);
6108 	ddi_put32(pwp->top_acc_handle,
6109 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], newaxil);
6110 	drv_usecwait(10);
6111 	if (ddi_get32(pwp->top_acc_handle,
6112 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != newaxil) {
6113 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6114 		    "AXIL register update failed");
6115 	}
6116 	rv = ddi_get32(pwp->gsm_acc_handle, &pwp->gsm_regs[off >> 2]);
6117 	ddi_put32(pwp->top_acc_handle,
6118 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], oldaxil);
6119 	drv_usecwait(10);
6120 	if (ddi_get32(pwp->top_acc_handle,
6121 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != oldaxil) {
6122 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6123 		    "AXIL register restore failed");
6124 	}
6125 	mutex_exit(&pwp->axil_lock);
6126 	return (rv);
6127 }
6128 
6129 void
6130 pmcs_wr_gsm_reg(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6131 {
6132 	uint32_t newaxil, oldaxil;
6133 
6134 	newaxil = off & ~GSM_BASE_MASK;
6135 	off &= GSM_BASE_MASK;
6136 	mutex_enter(&pwp->axil_lock);
6137 	oldaxil = ddi_get32(pwp->top_acc_handle,
6138 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]);
6139 	ddi_put32(pwp->top_acc_handle,
6140 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], newaxil);
6141 	drv_usecwait(10);
6142 	if (ddi_get32(pwp->top_acc_handle,
6143 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != newaxil) {
6144 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6145 		    "AXIL register update failed");
6146 	}
6147 	ddi_put32(pwp->gsm_acc_handle, &pwp->gsm_regs[off >> 2], val);
6148 	ddi_put32(pwp->top_acc_handle,
6149 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], oldaxil);
6150 	drv_usecwait(10);
6151 	if (ddi_get32(pwp->top_acc_handle,
6152 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != oldaxil) {
6153 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6154 		    "AXIL register restore failed");
6155 	}
6156 	mutex_exit(&pwp->axil_lock);
6157 }
6158 
6159 uint32_t
6160 pmcs_rd_topunit(pmcs_hw_t *pwp, uint32_t off)
6161 {
6162 	switch (off) {
6163 	case PMCS_SPC_RESET:
6164 	case PMCS_SPC_BOOT_STRAP:
6165 	case PMCS_SPC_DEVICE_ID:
6166 	case PMCS_DEVICE_REVISION:
6167 		off = pmcs_rd_gsm_reg(pwp, off);
6168 		break;
6169 	default:
6170 		off = ddi_get32(pwp->top_acc_handle,
6171 		    &pwp->top_regs[off >> 2]);
6172 		break;
6173 	}
6174 	return (off);
6175 }
6176 
6177 void
6178 pmcs_wr_topunit(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6179 {
6180 	switch (off) {
6181 	case PMCS_SPC_RESET:
6182 	case PMCS_DEVICE_REVISION:
6183 		pmcs_wr_gsm_reg(pwp, off, val);
6184 		break;
6185 	default:
6186 		ddi_put32(pwp->top_acc_handle, &pwp->top_regs[off >> 2], val);
6187 		break;
6188 	}
6189 }
6190 
6191 uint32_t
6192 pmcs_rd_msgunit(pmcs_hw_t *pwp, uint32_t off)
6193 {
6194 	return (ddi_get32(pwp->msg_acc_handle, &pwp->msg_regs[off >> 2]));
6195 }
6196 
6197 uint32_t
6198 pmcs_rd_mpi_tbl(pmcs_hw_t *pwp, uint32_t off)
6199 {
6200 	return (ddi_get32(pwp->mpi_acc_handle,
6201 	    &pwp->mpi_regs[(pwp->mpi_offset + off) >> 2]));
6202 }
6203 
6204 uint32_t
6205 pmcs_rd_gst_tbl(pmcs_hw_t *pwp, uint32_t off)
6206 {
6207 	return (ddi_get32(pwp->mpi_acc_handle,
6208 	    &pwp->mpi_regs[(pwp->mpi_gst_offset + off) >> 2]));
6209 }
6210 
6211 uint32_t
6212 pmcs_rd_iqc_tbl(pmcs_hw_t *pwp, uint32_t off)
6213 {
6214 	return (ddi_get32(pwp->mpi_acc_handle,
6215 	    &pwp->mpi_regs[(pwp->mpi_iqc_offset + off) >> 2]));
6216 }
6217 
6218 uint32_t
6219 pmcs_rd_oqc_tbl(pmcs_hw_t *pwp, uint32_t off)
6220 {
6221 	return (ddi_get32(pwp->mpi_acc_handle,
6222 	    &pwp->mpi_regs[(pwp->mpi_oqc_offset + off) >> 2]));
6223 }
6224 
6225 uint32_t
6226 pmcs_rd_iqpi(pmcs_hw_t *pwp, uint32_t qnum)
6227 {
6228 	return (ddi_get32(pwp->mpi_acc_handle,
6229 	    &pwp->mpi_regs[pwp->iqpi_offset[qnum] >> 2]));
6230 }
6231 
6232 uint32_t
6233 pmcs_rd_oqci(pmcs_hw_t *pwp, uint32_t qnum)
6234 {
6235 	return (ddi_get32(pwp->mpi_acc_handle,
6236 	    &pwp->mpi_regs[pwp->oqci_offset[qnum] >> 2]));
6237 }
6238 
6239 void
6240 pmcs_wr_msgunit(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6241 {
6242 	ddi_put32(pwp->msg_acc_handle, &pwp->msg_regs[off >> 2], val);
6243 }
6244 
6245 void
6246 pmcs_wr_mpi_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6247 {
6248 	ddi_put32(pwp->mpi_acc_handle,
6249 	    &pwp->mpi_regs[(pwp->mpi_offset + off) >> 2], (val));
6250 }
6251 
6252 void
6253 pmcs_wr_gst_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6254 {
6255 	ddi_put32(pwp->mpi_acc_handle,
6256 	    &pwp->mpi_regs[(pwp->mpi_gst_offset + off) >> 2], val);
6257 }
6258 
6259 void
6260 pmcs_wr_iqc_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6261 {
6262 	ddi_put32(pwp->mpi_acc_handle,
6263 	    &pwp->mpi_regs[(pwp->mpi_iqc_offset + off) >> 2], val);
6264 }
6265 
6266 void
6267 pmcs_wr_oqc_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6268 {
6269 	ddi_put32(pwp->mpi_acc_handle,
6270 	    &pwp->mpi_regs[(pwp->mpi_oqc_offset + off) >> 2], val);
6271 }
6272 
6273 void
6274 pmcs_wr_iqci(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6275 {
6276 	((uint32_t *)((void *)pwp->cip))[IQ_OFFSET(qnum) >> 2] = val;
6277 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORDEV) !=
6278 	    DDI_SUCCESS) {
6279 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6280 		    "%s: ddi_dma_sync failed?", __func__);
6281 	}
6282 }
6283 
6284 void
6285 pmcs_wr_iqpi(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6286 {
6287 	ddi_put32(pwp->mpi_acc_handle,
6288 	    &pwp->mpi_regs[pwp->iqpi_offset[qnum] >> 2], val);
6289 }
6290 
6291 void
6292 pmcs_wr_oqci(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6293 {
6294 	ddi_put32(pwp->mpi_acc_handle,
6295 	    &pwp->mpi_regs[pwp->oqci_offset[qnum] >> 2], val);
6296 }
6297 
6298 void
6299 pmcs_wr_oqpi(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6300 {
6301 	((uint32_t *)((void *)pwp->cip))[OQ_OFFSET(qnum) >> 2] = val;
6302 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORDEV) !=
6303 	    DDI_SUCCESS) {
6304 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6305 		    "%s: ddi_dma_sync failed?", __func__);
6306 	}
6307 }
6308 
6309 /*
6310  * Check the status value of an outbound IOMB and report anything bad
6311  */
6312 
6313 void
6314 pmcs_check_iomb_status(pmcs_hw_t *pwp, uint32_t *iomb)
6315 {
6316 	uint16_t 	opcode;
6317 	int		offset;
6318 
6319 	if (iomb == NULL) {
6320 		return;
6321 	}
6322 
6323 	opcode = LE_32(iomb[0]) & 0xfff;
6324 
6325 	switch (opcode) {
6326 		/*
6327 		 * The following have no status field, so ignore them
6328 		 */
6329 	case PMCOUT_ECHO:
6330 	case PMCOUT_SAS_HW_EVENT:
6331 	case PMCOUT_GET_DEVICE_HANDLE:
6332 	case PMCOUT_SATA_EVENT:
6333 	case PMCOUT_SSP_EVENT:
6334 	case PMCOUT_DEVICE_HANDLE_ARRIVED:
6335 	case PMCOUT_SMP_REQUEST_RECEIVED:
6336 	case PMCOUT_GPIO:
6337 	case PMCOUT_GPIO_EVENT:
6338 	case PMCOUT_GET_TIME_STAMP:
6339 	case PMCOUT_SKIP_ENTRIES:
6340 	case PMCOUT_GET_NVMD_DATA:	/* Actually lower 16 bits of word 3 */
6341 	case PMCOUT_SET_NVMD_DATA:	/* but ignore - we don't use these */
6342 	case PMCOUT_DEVICE_HANDLE_REMOVED:
6343 	case PMCOUT_SSP_REQUEST_RECEIVED:
6344 		return;
6345 
6346 	case PMCOUT_GENERAL_EVENT:
6347 		offset = 1;
6348 		break;
6349 
6350 	case PMCOUT_SSP_COMPLETION:
6351 	case PMCOUT_SMP_COMPLETION:
6352 	case PMCOUT_DEVICE_REGISTRATION:
6353 	case PMCOUT_DEREGISTER_DEVICE_HANDLE:
6354 	case PMCOUT_SATA_COMPLETION:
6355 	case PMCOUT_DEVICE_INFO:
6356 	case PMCOUT_FW_FLASH_UPDATE:
6357 	case PMCOUT_SSP_ABORT:
6358 	case PMCOUT_SATA_ABORT:
6359 	case PMCOUT_SAS_DIAG_MODE_START_END:
6360 	case PMCOUT_SAS_HW_EVENT_ACK_ACK:
6361 	case PMCOUT_SMP_ABORT:
6362 	case PMCOUT_SET_DEVICE_STATE:
6363 	case PMCOUT_GET_DEVICE_STATE:
6364 	case PMCOUT_SET_DEVICE_INFO:
6365 		offset = 2;
6366 		break;
6367 
6368 	case PMCOUT_LOCAL_PHY_CONTROL:
6369 	case PMCOUT_SAS_DIAG_EXECUTE:
6370 	case PMCOUT_PORT_CONTROL:
6371 		offset = 3;
6372 		break;
6373 
6374 	case PMCOUT_GET_INFO:
6375 	case PMCOUT_GET_VPD:
6376 	case PMCOUT_SAS_ASSISTED_DISCOVERY_EVENT:
6377 	case PMCOUT_SATA_ASSISTED_DISCOVERY_EVENT:
6378 	case PMCOUT_SET_VPD:
6379 	case PMCOUT_TWI:
6380 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
6381 		    "Got response for deprecated opcode", iomb);
6382 		return;
6383 
6384 	default:
6385 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
6386 		    "Got response for unknown opcode", iomb);
6387 		return;
6388 	}
6389 
6390 	if (LE_32(iomb[offset]) != PMCOUT_STATUS_OK) {
6391 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
6392 		    "bad status on TAG_TYPE_NONE command", iomb);
6393 	}
6394 }
6395 
6396 /*
6397  * Called with statlock held
6398  */
6399 void
6400 pmcs_clear_xp(pmcs_hw_t *pwp, pmcs_xscsi_t *xp)
6401 {
6402 	_NOTE(ARGUNUSED(pwp));
6403 
6404 	ASSERT(mutex_owned(&xp->statlock));
6405 
6406 	pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, xp, "%s: Device 0x%p is gone.",
6407 	    __func__, (void *)xp);
6408 
6409 	/*
6410 	 * Clear the dip now.  This keeps pmcs_remove_device from attempting
6411 	 * to call us on the same device while we're still flushing queues.
6412 	 * The only side effect is we can no longer update SM-HBA properties,
6413 	 * but this device is going away anyway, so no matter.
6414 	 */
6415 	xp->dip = NULL;
6416 
6417 	xp->special_running = 0;
6418 	xp->recovering = 0;
6419 	xp->recover_wait = 0;
6420 	xp->draining = 0;
6421 	xp->new = 0;
6422 	xp->assigned = 0;
6423 	xp->dev_state = 0;
6424 	xp->tagmap = 0;
6425 	xp->dev_gone = 1;
6426 	xp->event_recovery = 0;
6427 	xp->dtype = NOTHING;
6428 	xp->wq_recovery_tail = NULL;
6429 	/* Don't clear xp->phy */
6430 	/* Don't clear xp->actv_cnt */
6431 
6432 	/*
6433 	 * Flush all target queues
6434 	 */
6435 	pmcs_flush_target_queues(pwp, xp, PMCS_TGT_ALL_QUEUES);
6436 }
6437 
6438 static int
6439 pmcs_smp_function_result(pmcs_hw_t *pwp, smp_response_frame_t *srf)
6440 {
6441 	int result = srf->srf_result;
6442 
6443 	switch (result) {
6444 	case SMP_RES_UNKNOWN_FUNCTION:
6445 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6446 		    "%s: SMP DISCOVER Response "
6447 		    "Function Result: Unknown SMP Function(0x%x)",
6448 		    __func__, result);
6449 		break;
6450 	case SMP_RES_FUNCTION_FAILED:
6451 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6452 		    "%s: SMP DISCOVER Response "
6453 		    "Function Result: SMP Function Failed(0x%x)",
6454 		    __func__, result);
6455 		break;
6456 	case SMP_RES_INVALID_REQUEST_FRAME_LENGTH:
6457 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6458 		    "%s: SMP DISCOVER Response "
6459 		    "Function Result: Invalid Request Frame Length(0x%x)",
6460 		    __func__, result);
6461 		break;
6462 	case SMP_RES_INCOMPLETE_DESCRIPTOR_LIST:
6463 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6464 		    "%s: SMP DISCOVER Response "
6465 		    "Function Result: Incomplete Descriptor List(0x%x)",
6466 		    __func__, result);
6467 		break;
6468 	case SMP_RES_PHY_DOES_NOT_EXIST:
6469 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6470 		    "%s: SMP DISCOVER Response "
6471 		    "Function Result: PHY does not exist(0x%x)",
6472 		    __func__, result);
6473 		break;
6474 	case SMP_RES_PHY_VACANT:
6475 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6476 		    "%s: SMP DISCOVER Response "
6477 		    "Function Result: PHY Vacant(0x%x)",
6478 		    __func__, result);
6479 		break;
6480 	default:
6481 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6482 		    "%s: SMP DISCOVER Response "
6483 		    "Function Result: (0x%x)",
6484 		    __func__, result);
6485 		break;
6486 	}
6487 
6488 	return (result);
6489 }
6490 
6491 /*
6492  * Do all the repetitive stuff necessary to setup for DMA
6493  *
6494  * pwp: Used for dip
6495  * dma_attr: ddi_dma_attr_t to use for the mapping
6496  * acch: ddi_acc_handle_t to use for the mapping
6497  * dmah: ddi_dma_handle_t to use
6498  * length: Amount of memory for mapping
6499  * kvp: Pointer filled in with kernel virtual address on successful return
6500  * dma_addr: Pointer filled in with DMA address on successful return
6501  */
6502 boolean_t
6503 pmcs_dma_setup(pmcs_hw_t *pwp, ddi_dma_attr_t *dma_attr, ddi_acc_handle_t *acch,
6504     ddi_dma_handle_t *dmah, size_t length, caddr_t *kvp, uint64_t *dma_addr)
6505 {
6506 	dev_info_t		*dip = pwp->dip;
6507 	ddi_dma_cookie_t	cookie;
6508 	size_t			real_length;
6509 	uint_t			ddma_flag = DDI_DMA_CONSISTENT;
6510 	uint_t			ddabh_flag = DDI_DMA_CONSISTENT | DDI_DMA_RDWR;
6511 	uint_t			cookie_cnt;
6512 	ddi_device_acc_attr_t	mattr = {
6513 		DDI_DEVICE_ATTR_V0,
6514 		DDI_NEVERSWAP_ACC,
6515 		DDI_STRICTORDER_ACC,
6516 		DDI_DEFAULT_ACC
6517 	};
6518 
6519 	*acch = NULL;
6520 	*dmah = NULL;
6521 
6522 	if (ddi_dma_alloc_handle(dip, dma_attr, DDI_DMA_SLEEP, NULL, dmah) !=
6523 	    DDI_SUCCESS) {
6524 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6525 		    "Failed to allocate DMA handle");
6526 		return (B_FALSE);
6527 	}
6528 
6529 	if (ddi_dma_mem_alloc(*dmah, length, &mattr, ddma_flag, DDI_DMA_SLEEP,
6530 	    NULL, kvp, &real_length, acch) != DDI_SUCCESS) {
6531 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6532 		    "Failed to allocate DMA mem");
6533 		ddi_dma_free_handle(dmah);
6534 		*dmah = NULL;
6535 		return (B_FALSE);
6536 	}
6537 
6538 	if (ddi_dma_addr_bind_handle(*dmah, NULL, *kvp, real_length,
6539 	    ddabh_flag, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt)
6540 	    != DDI_DMA_MAPPED) {
6541 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, "Failed to bind DMA");
6542 		ddi_dma_free_handle(dmah);
6543 		ddi_dma_mem_free(acch);
6544 		*dmah = NULL;
6545 		*acch = NULL;
6546 		return (B_FALSE);
6547 	}
6548 
6549 	if (cookie_cnt != 1) {
6550 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, "Multiple cookies");
6551 		if (ddi_dma_unbind_handle(*dmah) != DDI_SUCCESS) {
6552 			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, "Condition "
6553 			    "failed at %s():%d", __func__, __LINE__);
6554 		}
6555 		ddi_dma_free_handle(dmah);
6556 		ddi_dma_mem_free(acch);
6557 		*dmah = NULL;
6558 		*acch = NULL;
6559 		return (B_FALSE);
6560 	}
6561 
6562 	*dma_addr = cookie.dmac_laddress;
6563 
6564 	return (B_TRUE);
6565 }
6566 
6567 /*
6568  * Flush requested queues for a particular target.  Called with statlock held
6569  */
6570 void
6571 pmcs_flush_target_queues(pmcs_hw_t *pwp, pmcs_xscsi_t *tgt, uint8_t queues)
6572 {
6573 	pmcs_cmd_t	*sp;
6574 	pmcwork_t	*pwrk;
6575 
6576 	ASSERT(pwp != NULL);
6577 	ASSERT(tgt != NULL);
6578 
6579 	pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, tgt,
6580 	    "%s: Flushing queues (%d) for target 0x%p", __func__,
6581 	    queues, (void *)tgt);
6582 
6583 	/*
6584 	 * Commands on the wait queue (or the special queue below) don't have
6585 	 * work structures associated with them.
6586 	 */
6587 	if (queues & PMCS_TGT_WAIT_QUEUE) {
6588 		mutex_enter(&tgt->wqlock);
6589 		while ((sp = STAILQ_FIRST(&tgt->wq)) != NULL) {
6590 			STAILQ_REMOVE(&tgt->wq, sp, pmcs_cmd, cmd_next);
6591 			pmcs_prt(pwp, PMCS_PRT_DEBUG1, NULL, tgt,
6592 			    "%s: Removing cmd 0x%p from wq for target 0x%p",
6593 			    __func__, (void *)sp, (void *)tgt);
6594 			CMD2PKT(sp)->pkt_reason = CMD_DEV_GONE;
6595 			CMD2PKT(sp)->pkt_state = STATE_GOT_BUS;
6596 			mutex_exit(&tgt->wqlock);
6597 			pmcs_dma_unload(pwp, sp);
6598 			mutex_enter(&pwp->cq_lock);
6599 			STAILQ_INSERT_TAIL(&pwp->cq, sp, cmd_next);
6600 			mutex_exit(&pwp->cq_lock);
6601 			mutex_enter(&tgt->wqlock);
6602 		}
6603 		mutex_exit(&tgt->wqlock);
6604 	}
6605 
6606 	/*
6607 	 * Commands on the active queue will have work structures associated
6608 	 * with them.
6609 	 */
6610 	if (queues & PMCS_TGT_ACTIVE_QUEUE) {
6611 		mutex_enter(&tgt->aqlock);
6612 		while ((sp = STAILQ_FIRST(&tgt->aq)) != NULL) {
6613 			STAILQ_REMOVE(&tgt->aq, sp, pmcs_cmd, cmd_next);
6614 			pwrk = pmcs_tag2wp(pwp, sp->cmd_tag);
6615 			mutex_exit(&tgt->aqlock);
6616 			mutex_exit(&tgt->statlock);
6617 			/*
6618 			 * If we found a work structure, mark it as dead
6619 			 * and complete it
6620 			 */
6621 			if (pwrk != NULL) {
6622 				pwrk->dead = 1;
6623 				CMD2PKT(sp)->pkt_reason = CMD_DEV_GONE;
6624 				CMD2PKT(sp)->pkt_state = STATE_GOT_BUS;
6625 				pmcs_complete_work_impl(pwp, pwrk, NULL, 0);
6626 			}
6627 			pmcs_prt(pwp, PMCS_PRT_DEBUG1, NULL, tgt,
6628 			    "%s: Removing cmd 0x%p from aq for target 0x%p",
6629 			    __func__, (void *)sp, (void *)tgt);
6630 			pmcs_dma_unload(pwp, sp);
6631 			mutex_enter(&pwp->cq_lock);
6632 			STAILQ_INSERT_TAIL(&pwp->cq, sp, cmd_next);
6633 			mutex_exit(&pwp->cq_lock);
6634 			mutex_enter(&tgt->aqlock);
6635 			mutex_enter(&tgt->statlock);
6636 		}
6637 		mutex_exit(&tgt->aqlock);
6638 	}
6639 
6640 	if (queues & PMCS_TGT_SPECIAL_QUEUE) {
6641 		while ((sp = STAILQ_FIRST(&tgt->sq)) != NULL) {
6642 			STAILQ_REMOVE(&tgt->sq, sp, pmcs_cmd, cmd_next);
6643 			pmcs_prt(pwp, PMCS_PRT_DEBUG1, NULL, tgt,
6644 			    "%s: Removing cmd 0x%p from sq for target 0x%p",
6645 			    __func__, (void *)sp, (void *)tgt);
6646 			CMD2PKT(sp)->pkt_reason = CMD_DEV_GONE;
6647 			CMD2PKT(sp)->pkt_state = STATE_GOT_BUS;
6648 			pmcs_dma_unload(pwp, sp);
6649 			mutex_enter(&pwp->cq_lock);
6650 			STAILQ_INSERT_TAIL(&pwp->cq, sp, cmd_next);
6651 			mutex_exit(&pwp->cq_lock);
6652 		}
6653 	}
6654 }
6655 
6656 void
6657 pmcs_complete_work_impl(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb,
6658     size_t amt)
6659 {
6660 	switch (PMCS_TAG_TYPE(pwrk->htag)) {
6661 	case PMCS_TAG_TYPE_CBACK:
6662 	{
6663 		pmcs_cb_t callback = (pmcs_cb_t)pwrk->ptr;
6664 		(*callback)(pwp, pwrk, iomb);
6665 		break;
6666 	}
6667 	case PMCS_TAG_TYPE_WAIT:
6668 		if (pwrk->arg && iomb && amt) {
6669 			(void) memcpy(pwrk->arg, iomb, amt);
6670 		}
6671 		cv_signal(&pwrk->sleep_cv);
6672 		mutex_exit(&pwrk->lock);
6673 		break;
6674 	case PMCS_TAG_TYPE_NONE:
6675 #ifdef DEBUG
6676 		pmcs_check_iomb_status(pwp, iomb);
6677 #endif
6678 		pmcs_pwork(pwp, pwrk);
6679 		break;
6680 	default:
6681 		/*
6682 		 * We will leak a structure here if we don't know
6683 		 * what happened
6684 		 */
6685 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6686 		    "%s: Unknown PMCS_TAG_TYPE (%x)",
6687 		    __func__, PMCS_TAG_TYPE(pwrk->htag));
6688 		break;
6689 	}
6690 }
6691 
6692 /*
6693  * Determine if iport still has targets. During detach(9E), if SCSA is
6694  * successfull in its guarantee of tran_tgt_free(9E) before detach(9E),
6695  * this should always return B_FALSE.
6696  */
6697 boolean_t
6698 pmcs_iport_has_targets(pmcs_hw_t *pwp, pmcs_iport_t *iport)
6699 {
6700 	pmcs_xscsi_t *xp;
6701 	int i;
6702 
6703 	mutex_enter(&pwp->lock);
6704 
6705 	if (!pwp->targets || !pwp->max_dev) {
6706 		mutex_exit(&pwp->lock);
6707 		return (B_FALSE);
6708 	}
6709 
6710 	for (i = 0; i < pwp->max_dev; i++) {
6711 		xp = pwp->targets[i];
6712 		if ((xp == NULL) || (xp->phy == NULL) ||
6713 		    (xp->phy->iport != iport)) {
6714 			continue;
6715 		}
6716 
6717 		mutex_exit(&pwp->lock);
6718 		return (B_TRUE);
6719 	}
6720 
6721 	mutex_exit(&pwp->lock);
6722 	return (B_FALSE);
6723 }
6724 
6725 /*
6726  * Called with softstate lock held
6727  */
6728 void
6729 pmcs_destroy_target(pmcs_xscsi_t *target)
6730 {
6731 	pmcs_hw_t *pwp = target->pwp;
6732 	pmcs_iport_t *iport;
6733 
6734 	ASSERT(pwp);
6735 	ASSERT(mutex_owned(&pwp->lock));
6736 
6737 	if (!target->ua) {
6738 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, target,
6739 		    "%s: target %p iport address is null",
6740 		    __func__, (void *)target);
6741 	}
6742 
6743 	iport = pmcs_get_iport_by_ua(pwp, target->ua);
6744 	if (iport == NULL) {
6745 		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, target,
6746 		    "%s: no iport associated with tgt(0x%p)",
6747 		    __func__, (void *)target);
6748 		return;
6749 	}
6750 
6751 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, target,
6752 	    "%s: free target %p", __func__, (void *)target);
6753 	if (target->ua) {
6754 		strfree(target->ua);
6755 	}
6756 
6757 	mutex_destroy(&target->wqlock);
6758 	mutex_destroy(&target->aqlock);
6759 	mutex_destroy(&target->statlock);
6760 	cv_destroy(&target->reset_cv);
6761 	cv_destroy(&target->abort_cv);
6762 	ddi_soft_state_bystr_fini(&target->lun_sstate);
6763 	ddi_soft_state_bystr_free(iport->tgt_sstate, target->unit_address);
6764 	pmcs_rele_iport(iport);
6765 }
6766 
6767 /*
6768  * pmcs_lock_phy_impl
6769  *
6770  * This function is what does the actual work for pmcs_lock_phy.  It will
6771  * lock all PHYs from phyp down in a top-down fashion.
6772  *
6773  * Locking notes:
6774  * 1. level starts from 0 for the PHY ("parent") that's passed in.  It is
6775  * not a reflection of the actual level of the PHY in the SAS topology.
6776  * 2. If parent is an expander, then parent is locked along with all its
6777  * descendents.
6778  * 3. Expander subsidiary PHYs at level 0 are not locked.  It is the
6779  * responsibility of the caller to individually lock expander subsidiary PHYs
6780  * at level 0 if necessary.
6781  * 4. Siblings at level 0 are not traversed due to the possibility that we're
6782  * locking a PHY on the dead list.  The siblings could be pointing to invalid
6783  * PHYs.  We don't lock siblings at level 0 anyway.
6784  */
6785 static void
6786 pmcs_lock_phy_impl(pmcs_phy_t *phyp, int level)
6787 {
6788 	pmcs_phy_t *tphyp;
6789 
6790 	ASSERT((phyp->dtype == SAS) || (phyp->dtype == SATA) ||
6791 	    (phyp->dtype == EXPANDER) || (phyp->dtype == NOTHING));
6792 
6793 	/*
6794 	 * Start walking the PHYs.
6795 	 */
6796 	tphyp = phyp;
6797 	while (tphyp) {
6798 		/*
6799 		 * If we're at the top level, only lock ourselves.  For anything
6800 		 * at level > 0, traverse children while locking everything.
6801 		 */
6802 		if ((level > 0) || (tphyp == phyp)) {
6803 			pmcs_prt(tphyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING, tphyp,
6804 			    NULL, "%s: PHY 0x%p parent 0x%p path %s lvl %d",
6805 			    __func__, (void *)tphyp, (void *)tphyp->parent,
6806 			    tphyp->path, level);
6807 			mutex_enter(&tphyp->phy_lock);
6808 
6809 			if (tphyp->children) {
6810 				pmcs_lock_phy_impl(tphyp->children, level + 1);
6811 			}
6812 		}
6813 
6814 		if (level == 0) {
6815 			return;
6816 		}
6817 
6818 		tphyp = tphyp->sibling;
6819 	}
6820 }
6821 
6822 /*
6823  * pmcs_lock_phy
6824  *
6825  * This function is responsible for locking a PHY and all its descendents
6826  */
6827 void
6828 pmcs_lock_phy(pmcs_phy_t *phyp)
6829 {
6830 #ifdef DEBUG
6831 	char *callername = NULL;
6832 	ulong_t off;
6833 
6834 	ASSERT(phyp != NULL);
6835 
6836 	callername = modgetsymname((uintptr_t)caller(), &off);
6837 
6838 	if (callername == NULL) {
6839 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING, phyp, NULL,
6840 		    "%s: PHY 0x%p path %s caller: unknown", __func__,
6841 		    (void *)phyp, phyp->path);
6842 	} else {
6843 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING, phyp, NULL,
6844 		    "%s: PHY 0x%p path %s caller: %s+%lx", __func__,
6845 		    (void *)phyp, phyp->path, callername, off);
6846 	}
6847 #else
6848 	pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING, phyp, NULL,
6849 	    "%s: PHY 0x%p path %s", __func__, (void *)phyp, phyp->path);
6850 #endif
6851 	pmcs_lock_phy_impl(phyp, 0);
6852 }
6853 
6854 /*
6855  * pmcs_unlock_phy_impl
6856  *
6857  * Unlock all PHYs from phyp down in a bottom-up fashion.
6858  */
6859 static void
6860 pmcs_unlock_phy_impl(pmcs_phy_t *phyp, int level)
6861 {
6862 	pmcs_phy_t *phy_next;
6863 
6864 	ASSERT((phyp->dtype == SAS) || (phyp->dtype == SATA) ||
6865 	    (phyp->dtype == EXPANDER) || (phyp->dtype == NOTHING));
6866 
6867 	/*
6868 	 * Recurse down to the bottom PHYs
6869 	 */
6870 	if (level == 0) {
6871 		if (phyp->children) {
6872 			pmcs_unlock_phy_impl(phyp->children, level + 1);
6873 		}
6874 	} else {
6875 		phy_next = phyp;
6876 		while (phy_next) {
6877 			if (phy_next->children) {
6878 				pmcs_unlock_phy_impl(phy_next->children,
6879 				    level + 1);
6880 			}
6881 			phy_next = phy_next->sibling;
6882 		}
6883 	}
6884 
6885 	/*
6886 	 * Iterate through PHYs unlocking all at level > 0 as well the top PHY
6887 	 */
6888 	phy_next = phyp;
6889 	while (phy_next) {
6890 		if ((level > 0) || (phy_next == phyp)) {
6891 			pmcs_prt(phy_next->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
6892 			    phy_next, NULL,
6893 			    "%s: PHY 0x%p parent 0x%p path %s lvl %d",
6894 			    __func__, (void *)phy_next,
6895 			    (void *)phy_next->parent, phy_next->path, level);
6896 			mutex_exit(&phy_next->phy_lock);
6897 		}
6898 
6899 		if (level == 0) {
6900 			return;
6901 		}
6902 
6903 		phy_next = phy_next->sibling;
6904 	}
6905 }
6906 
6907 /*
6908  * pmcs_unlock_phy
6909  *
6910  * Unlock a PHY and all its descendents
6911  */
6912 void
6913 pmcs_unlock_phy(pmcs_phy_t *phyp)
6914 {
6915 #ifdef DEBUG
6916 	char *callername = NULL;
6917 	ulong_t off;
6918 
6919 	ASSERT(phyp != NULL);
6920 
6921 	callername = modgetsymname((uintptr_t)caller(), &off);
6922 
6923 	if (callername == NULL) {
6924 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING, phyp, NULL,
6925 		    "%s: PHY 0x%p path %s caller: unknown", __func__,
6926 		    (void *)phyp, phyp->path);
6927 	} else {
6928 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING, phyp, NULL,
6929 		    "%s: PHY 0x%p path %s caller: %s+%lx", __func__,
6930 		    (void *)phyp, phyp->path, callername, off);
6931 	}
6932 #else
6933 	pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING, phyp, NULL,
6934 	    "%s: PHY 0x%p path %s", __func__, (void *)phyp, phyp->path);
6935 #endif
6936 	pmcs_unlock_phy_impl(phyp, 0);
6937 }
6938 
6939 /*
6940  * pmcs_get_root_phy
6941  *
6942  * For a given phy pointer return its root phy.
6943  * The caller must be holding the lock on every PHY from phyp up to the root.
6944  */
6945 pmcs_phy_t *
6946 pmcs_get_root_phy(pmcs_phy_t *phyp)
6947 {
6948 	ASSERT(phyp);
6949 
6950 	while (phyp) {
6951 		if (IS_ROOT_PHY(phyp)) {
6952 			break;
6953 		}
6954 		phyp = phyp->parent;
6955 	}
6956 
6957 	return (phyp);
6958 }
6959 
6960 /*
6961  * pmcs_free_dma_chunklist
6962  *
6963  * Free DMA S/G chunk list
6964  */
6965 void
6966 pmcs_free_dma_chunklist(pmcs_hw_t *pwp)
6967 {
6968 	pmcs_chunk_t	*pchunk;
6969 
6970 	while (pwp->dma_chunklist) {
6971 		pchunk = pwp->dma_chunklist;
6972 		pwp->dma_chunklist = pwp->dma_chunklist->next;
6973 		if (pchunk->dma_handle) {
6974 			if (ddi_dma_unbind_handle(pchunk->dma_handle) !=
6975 			    DDI_SUCCESS) {
6976 				pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
6977 				    "Condition failed at %s():%d",
6978 				    __func__, __LINE__);
6979 			}
6980 			ddi_dma_free_handle(&pchunk->dma_handle);
6981 			ddi_dma_mem_free(&pchunk->acc_handle);
6982 		}
6983 		kmem_free(pchunk, sizeof (pmcs_chunk_t));
6984 	}
6985 }
6986 
6987 /*ARGSUSED2*/
6988 int
6989 pmcs_phy_constructor(void *buf, void *arg, int kmflags)
6990 {
6991 	pmcs_hw_t *pwp = (pmcs_hw_t *)arg;
6992 	pmcs_phy_t *phyp = (pmcs_phy_t *)buf;
6993 
6994 	mutex_init(&phyp->phy_lock, NULL, MUTEX_DRIVER,
6995 	    DDI_INTR_PRI(pwp->intr_pri));
6996 	cv_init(&phyp->abort_all_cv, NULL, CV_DRIVER, NULL);
6997 	return (0);
6998 }
6999 
7000 /*ARGSUSED1*/
7001 void
7002 pmcs_phy_destructor(void *buf, void *arg)
7003 {
7004 	pmcs_phy_t *phyp = (pmcs_phy_t *)buf;
7005 
7006 	cv_destroy(&phyp->abort_all_cv);
7007 	mutex_destroy(&phyp->phy_lock);
7008 }
7009 
7010 /*
7011  * Free all PHYs from the kmem_cache starting at phyp as well as everything
7012  * on the dead_phys list.
7013  *
7014  * NOTE: This function does not free root PHYs as they are not allocated
7015  * from the kmem_cache.
7016  *
7017  * No PHY locks are acquired as this should only be called during DDI_DETACH
7018  * or soft reset (while pmcs interrupts are disabled).
7019  */
7020 void
7021 pmcs_free_all_phys(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
7022 {
7023 	pmcs_phy_t *tphyp, *nphyp;
7024 
7025 	if (phyp == NULL) {
7026 		return;
7027 	}
7028 
7029 	tphyp = phyp;
7030 	while (tphyp) {
7031 		nphyp = tphyp->sibling;
7032 
7033 		if (tphyp->children) {
7034 			pmcs_free_all_phys(pwp, tphyp->children);
7035 			tphyp->children = NULL;
7036 		}
7037 		if (!IS_ROOT_PHY(tphyp)) {
7038 			kmem_cache_free(pwp->phy_cache, tphyp);
7039 		}
7040 
7041 		tphyp = nphyp;
7042 	}
7043 
7044 	tphyp = pwp->dead_phys;
7045 	while (tphyp) {
7046 		nphyp = tphyp->sibling;
7047 		kmem_cache_free(pwp->phy_cache, tphyp);
7048 		tphyp = nphyp;
7049 	}
7050 	pwp->dead_phys = NULL;
7051 }
7052 
7053 /*
7054  * Free a list of PHYs linked together by the sibling pointer back to the
7055  * kmem cache from whence they came.  This function does not recurse, so the
7056  * caller must ensure there are no children.
7057  */
7058 void
7059 pmcs_free_phys(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
7060 {
7061 	pmcs_phy_t *next_phy;
7062 
7063 	while (phyp) {
7064 		next_phy = phyp->sibling;
7065 		ASSERT(!mutex_owned(&phyp->phy_lock));
7066 		kmem_cache_free(pwp->phy_cache, phyp);
7067 		phyp = next_phy;
7068 	}
7069 }
7070 
7071 /*
7072  * Make a copy of an existing PHY structure.  This is used primarily in
7073  * discovery to compare the contents of an existing PHY with what gets
7074  * reported back by an expander.
7075  *
7076  * This function must not be called from any context where sleeping is
7077  * not possible.
7078  *
7079  * The new PHY is returned unlocked.
7080  */
7081 static pmcs_phy_t *
7082 pmcs_clone_phy(pmcs_phy_t *orig_phy)
7083 {
7084 	pmcs_phy_t *local;
7085 
7086 	local = kmem_cache_alloc(orig_phy->pwp->phy_cache, KM_SLEEP);
7087 
7088 	/*
7089 	 * Go ahead and just copy everything...
7090 	 */
7091 	*local = *orig_phy;
7092 
7093 	/*
7094 	 * But the following must be set appropriately for this copy
7095 	 */
7096 	local->sibling = NULL;
7097 	local->children = NULL;
7098 	mutex_init(&local->phy_lock, NULL, MUTEX_DRIVER,
7099 	    DDI_INTR_PRI(orig_phy->pwp->intr_pri));
7100 
7101 	return (local);
7102 }
7103 
7104 int
7105 pmcs_check_acc_handle(ddi_acc_handle_t handle)
7106 {
7107 	ddi_fm_error_t de;
7108 
7109 	if (handle == NULL) {
7110 		return (DDI_FAILURE);
7111 	}
7112 	ddi_fm_acc_err_get(handle, &de, DDI_FME_VER0);
7113 	return (de.fme_status);
7114 }
7115 
7116 int
7117 pmcs_check_dma_handle(ddi_dma_handle_t handle)
7118 {
7119 	ddi_fm_error_t de;
7120 
7121 	if (handle == NULL) {
7122 		return (DDI_FAILURE);
7123 	}
7124 	ddi_fm_dma_err_get(handle, &de, DDI_FME_VER0);
7125 	return (de.fme_status);
7126 }
7127 
7128 
7129 void
7130 pmcs_fm_ereport(pmcs_hw_t *pwp, char *detail)
7131 {
7132 	uint64_t ena;
7133 	char buf[FM_MAX_CLASS];
7134 
7135 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
7136 	ena = fm_ena_generate(0, FM_ENA_FMT1);
7137 	if (DDI_FM_EREPORT_CAP(pwp->fm_capabilities)) {
7138 		ddi_fm_ereport_post(pwp->dip, buf, ena, DDI_NOSLEEP,
7139 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
7140 	}
7141 }
7142 
7143 int
7144 pmcs_check_acc_dma_handle(pmcs_hw_t *pwp)
7145 {
7146 	pmcs_chunk_t *pchunk;
7147 	int i;
7148 
7149 	/* check all acc & dma handles allocated in attach */
7150 	if ((pmcs_check_acc_handle(pwp->pci_acc_handle) != DDI_SUCCESS) ||
7151 	    (pmcs_check_acc_handle(pwp->msg_acc_handle) != DDI_SUCCESS) ||
7152 	    (pmcs_check_acc_handle(pwp->top_acc_handle) != DDI_SUCCESS) ||
7153 	    (pmcs_check_acc_handle(pwp->mpi_acc_handle) != DDI_SUCCESS) ||
7154 	    (pmcs_check_acc_handle(pwp->gsm_acc_handle) != DDI_SUCCESS)) {
7155 		goto check_failed;
7156 	}
7157 
7158 	for (i = 0; i < PMCS_NIQ; i++) {
7159 		if ((pmcs_check_dma_handle(
7160 		    pwp->iqp_handles[i]) != DDI_SUCCESS) ||
7161 		    (pmcs_check_acc_handle(
7162 		    pwp->iqp_acchdls[i]) != DDI_SUCCESS)) {
7163 			goto check_failed;
7164 		}
7165 	}
7166 
7167 	for (i = 0; i < PMCS_NOQ; i++) {
7168 		if ((pmcs_check_dma_handle(
7169 		    pwp->oqp_handles[i]) != DDI_SUCCESS) ||
7170 		    (pmcs_check_acc_handle(
7171 		    pwp->oqp_acchdls[i]) != DDI_SUCCESS)) {
7172 			goto check_failed;
7173 		}
7174 	}
7175 
7176 	if ((pmcs_check_dma_handle(pwp->cip_handles) != DDI_SUCCESS) ||
7177 	    (pmcs_check_acc_handle(pwp->cip_acchdls) != DDI_SUCCESS)) {
7178 		goto check_failed;
7179 	}
7180 
7181 	if (pwp->fwlog &&
7182 	    ((pmcs_check_dma_handle(pwp->fwlog_hndl) != DDI_SUCCESS) ||
7183 	    (pmcs_check_acc_handle(pwp->fwlog_acchdl) != DDI_SUCCESS))) {
7184 		goto check_failed;
7185 	}
7186 
7187 	if (pwp->regdump_hndl && pwp->regdump_acchdl &&
7188 	    ((pmcs_check_dma_handle(pwp->regdump_hndl) != DDI_SUCCESS) ||
7189 	    (pmcs_check_acc_handle(pwp->regdump_acchdl)
7190 	    != DDI_SUCCESS))) {
7191 		goto check_failed;
7192 	}
7193 
7194 
7195 	pchunk = pwp->dma_chunklist;
7196 	while (pchunk) {
7197 		if ((pmcs_check_acc_handle(pchunk->acc_handle)
7198 		    != DDI_SUCCESS) ||
7199 		    (pmcs_check_dma_handle(pchunk->dma_handle)
7200 		    != DDI_SUCCESS)) {
7201 			goto check_failed;
7202 		}
7203 		pchunk = pchunk->next;
7204 	}
7205 
7206 	return (0);
7207 
7208 check_failed:
7209 
7210 	return (1);
7211 }
7212 
7213 /*
7214  * pmcs_handle_dead_phys
7215  *
7216  * If the PHY has no outstanding work associated with it, remove it from
7217  * the dead PHY list and free it.
7218  *
7219  * If pwp->ds_err_recovering or pwp->configuring is set, don't run.
7220  * This keeps routines that need to submit work to the chip from having to
7221  * hold PHY locks to ensure that PHYs don't disappear while they do their work.
7222  */
7223 void
7224 pmcs_handle_dead_phys(pmcs_hw_t *pwp)
7225 {
7226 	pmcs_phy_t *phyp, *nphyp, *pphyp;
7227 
7228 	mutex_enter(&pwp->lock);
7229 	mutex_enter(&pwp->config_lock);
7230 
7231 	if (pwp->configuring | pwp->ds_err_recovering) {
7232 		mutex_exit(&pwp->config_lock);
7233 		mutex_exit(&pwp->lock);
7234 		return;
7235 	}
7236 
7237 	/*
7238 	 * Check every PHY in the dead PHY list
7239 	 */
7240 	mutex_enter(&pwp->dead_phylist_lock);
7241 	phyp = pwp->dead_phys;
7242 	pphyp = NULL;	/* Set previous PHY to NULL */
7243 
7244 	while (phyp != NULL) {
7245 		pmcs_lock_phy(phyp);
7246 		ASSERT(phyp->dead);
7247 
7248 		nphyp = phyp->dead_next;
7249 
7250 		/*
7251 		 * Check for outstanding work
7252 		 */
7253 		if (phyp->ref_count > 0) {
7254 			pmcs_unlock_phy(phyp);
7255 			pphyp = phyp;	/* This PHY becomes "previous" */
7256 		} else if (phyp->target) {
7257 			pmcs_unlock_phy(phyp);
7258 			pmcs_prt(pwp, PMCS_PRT_DEBUG1, phyp, phyp->target,
7259 			    "%s: Not freeing PHY 0x%p: target 0x%p is not free",
7260 			    __func__, (void *)phyp, (void *)phyp->target);
7261 			pphyp = phyp;
7262 		} else {
7263 			/*
7264 			 * No outstanding work or target references. Remove it
7265 			 * from the list and free it
7266 			 */
7267 			pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, phyp->target,
7268 			    "%s: Freeing inactive dead PHY 0x%p @ %s "
7269 			    "target = 0x%p", __func__, (void *)phyp,
7270 			    phyp->path, (void *)phyp->target);
7271 			/*
7272 			 * If pphyp is NULL, then phyp was the head of the list,
7273 			 * so just reset the head to nphyp. Otherwise, the
7274 			 * previous PHY will now point to nphyp (the next PHY)
7275 			 */
7276 			if (pphyp == NULL) {
7277 				pwp->dead_phys = nphyp;
7278 			} else {
7279 				pphyp->dead_next = nphyp;
7280 			}
7281 			/*
7282 			 * If the target still points to this PHY, remove
7283 			 * that linkage now.
7284 			 */
7285 			if (phyp->target) {
7286 				mutex_enter(&phyp->target->statlock);
7287 				if (phyp->target->phy == phyp) {
7288 					phyp->target->phy = NULL;
7289 				}
7290 				mutex_exit(&phyp->target->statlock);
7291 			}
7292 			pmcs_unlock_phy(phyp);
7293 			kmem_cache_free(pwp->phy_cache, phyp);
7294 		}
7295 
7296 		phyp = nphyp;
7297 	}
7298 
7299 	mutex_exit(&pwp->dead_phylist_lock);
7300 	mutex_exit(&pwp->config_lock);
7301 	mutex_exit(&pwp->lock);
7302 }
7303 
7304 void
7305 pmcs_inc_phy_ref_count(pmcs_phy_t *phyp)
7306 {
7307 	atomic_inc_32(&phyp->ref_count);
7308 }
7309 
7310 void
7311 pmcs_dec_phy_ref_count(pmcs_phy_t *phyp)
7312 {
7313 	ASSERT(phyp->ref_count != 0);
7314 	atomic_dec_32(&phyp->ref_count);
7315 }
7316 
7317 /*
7318  * pmcs_reap_dead_phy
7319  *
7320  * This function is called from pmcs_new_tport when we have a PHY
7321  * without a target pointer.  It's possible in that case that this PHY
7322  * may have a "brother" on the dead_phys list.  That is, it may be the same as
7323  * this one but with a different root PHY number (e.g. pp05 vs. pp04).  If
7324  * that's the case, update the dead PHY and this new PHY.  If that's not the
7325  * case, we should get a tran_tgt_init on this after it's reported to SCSA.
7326  *
7327  * Called with PHY locked.
7328  */
7329 static void
7330 pmcs_reap_dead_phy(pmcs_phy_t *phyp)
7331 {
7332 	pmcs_hw_t *pwp = phyp->pwp;
7333 	pmcs_phy_t *ctmp;
7334 
7335 	ASSERT(mutex_owned(&phyp->phy_lock));
7336 
7337 	/*
7338 	 * Check the dead PHYs list
7339 	 */
7340 	mutex_enter(&pwp->dead_phylist_lock);
7341 	ctmp = pwp->dead_phys;
7342 	while (ctmp) {
7343 		if ((ctmp->iport != phyp->iport) ||
7344 		    (memcmp((void *)&ctmp->sas_address[0],
7345 		    (void *)&phyp->sas_address[0], 8))) {
7346 			ctmp = ctmp->dead_next;
7347 			continue;
7348 		}
7349 
7350 		/*
7351 		 * Same SAS address on same iport.  Now check to see if
7352 		 * the PHY path is the same with the possible exception
7353 		 * of the root PHY number.
7354 		 * The "5" is the string length of "pp00."
7355 		 */
7356 		if ((strnlen(phyp->path, 5) >= 5) &&
7357 		    (strnlen(ctmp->path, 5) >= 5)) {
7358 			if (memcmp((void *)&phyp->path[5],
7359 			    (void *)&ctmp->path[5],
7360 			    strnlen(phyp->path, 32) - 5) == 0) {
7361 				break;
7362 			}
7363 		}
7364 
7365 		ctmp = ctmp->dead_next;
7366 	}
7367 	mutex_exit(&pwp->dead_phylist_lock);
7368 
7369 	/*
7370 	 * Found a match.  Remove the target linkage and drop the
7371 	 * ref count on the old PHY.  Then, increment the ref count
7372 	 * on the new PHY to compensate.
7373 	 */
7374 	if (ctmp) {
7375 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL,
7376 		    "%s: Found match in dead PHY list for new PHY %s",
7377 		    __func__, phyp->path);
7378 		if (ctmp->target) {
7379 			/*
7380 			 * If there is a pointer to the target in the dead
7381 			 * PHY, and that PHY's ref_count drops to 0, we can
7382 			 * clear the target linkage now.  If the PHY's
7383 			 * ref_count is > 1, then there may be multiple
7384 			 * LUNs still remaining, so leave the linkage.
7385 			 */
7386 			pmcs_inc_phy_ref_count(phyp);
7387 			pmcs_dec_phy_ref_count(ctmp);
7388 			phyp->target = ctmp->target;
7389 			/*
7390 			 * Update the target's linkage as well
7391 			 */
7392 			mutex_enter(&phyp->target->statlock);
7393 			phyp->target->phy = phyp;
7394 			phyp->target->dtype = phyp->dtype;
7395 			mutex_exit(&phyp->target->statlock);
7396 
7397 			if (ctmp->ref_count == 0) {
7398 				ctmp->target = NULL;
7399 			}
7400 		}
7401 	}
7402 }
7403 
7404 /*
7405  * Called with iport lock held
7406  */
7407 void
7408 pmcs_add_phy_to_iport(pmcs_iport_t *iport, pmcs_phy_t *phyp)
7409 {
7410 	ASSERT(mutex_owned(&iport->lock));
7411 	ASSERT(phyp);
7412 	ASSERT(!list_link_active(&phyp->list_node));
7413 	iport->nphy++;
7414 	list_insert_tail(&iport->phys, phyp);
7415 	pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS,
7416 	    &iport->nphy);
7417 	mutex_enter(&iport->refcnt_lock);
7418 	iport->refcnt++;
7419 	mutex_exit(&iport->refcnt_lock);
7420 }
7421 
7422 /*
7423  * Called with the iport lock held
7424  */
7425 void
7426 pmcs_remove_phy_from_iport(pmcs_iport_t *iport, pmcs_phy_t *phyp)
7427 {
7428 	pmcs_phy_t *pptr, *next_pptr;
7429 
7430 	ASSERT(mutex_owned(&iport->lock));
7431 
7432 	/*
7433 	 * If phyp is NULL, remove all PHYs from the iport
7434 	 */
7435 	if (phyp == NULL) {
7436 		for (pptr = list_head(&iport->phys); pptr != NULL;
7437 		    pptr = next_pptr) {
7438 			next_pptr = list_next(&iport->phys, pptr);
7439 			mutex_enter(&pptr->phy_lock);
7440 			pptr->iport = NULL;
7441 			mutex_exit(&pptr->phy_lock);
7442 			pmcs_rele_iport(iport);
7443 			list_remove(&iport->phys, pptr);
7444 		}
7445 		iport->nphy = 0;
7446 		return;
7447 	}
7448 
7449 	ASSERT(phyp);
7450 	ASSERT(iport->nphy > 0);
7451 	ASSERT(list_link_active(&phyp->list_node));
7452 	iport->nphy--;
7453 	list_remove(&iport->phys, phyp);
7454 	pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS,
7455 	    &iport->nphy);
7456 	pmcs_rele_iport(iport);
7457 }
7458 
7459 /*
7460  * This function checks to see if the target pointed to by phyp is still
7461  * correct.  This is done by comparing the target's unit address with the
7462  * SAS address in phyp.
7463  *
7464  * Called with PHY locked and target statlock held
7465  */
7466 static boolean_t
7467 pmcs_phy_target_match(pmcs_phy_t *phyp)
7468 {
7469 	uint64_t wwn;
7470 	char unit_address[PMCS_MAX_UA_SIZE];
7471 	boolean_t rval = B_FALSE;
7472 
7473 	ASSERT(phyp);
7474 	ASSERT(phyp->target);
7475 	ASSERT(mutex_owned(&phyp->phy_lock));
7476 	ASSERT(mutex_owned(&phyp->target->statlock));
7477 
7478 	wwn = pmcs_barray2wwn(phyp->sas_address);
7479 	(void) scsi_wwn_to_wwnstr(wwn, 1, unit_address);
7480 
7481 	if (memcmp((void *)unit_address, (void *)phyp->target->unit_address,
7482 	    strnlen(phyp->target->unit_address, PMCS_MAX_UA_SIZE)) == 0) {
7483 		rval = B_TRUE;
7484 	}
7485 
7486 	return (rval);
7487 }
7488