1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25/*
26 * This file contains various support routines.
27 */
28
29#include <sys/scsi/adapters/pmcs/pmcs.h>
30
31/*
32 * Local static data
33 */
34static int tgtmap_stable_usec = MICROSEC;	/* 1 second */
35static int tgtmap_csync_usec = 10 * MICROSEC;	/* 10 seconds */
36
37/*
38 * SAS Topology Configuration
39 */
40static void pmcs_new_tport(pmcs_hw_t *, pmcs_phy_t *);
41static void pmcs_configure_expander(pmcs_hw_t *, pmcs_phy_t *, pmcs_iport_t *);
42
43static void pmcs_check_expanders(pmcs_hw_t *, pmcs_phy_t *);
44static void pmcs_check_expander(pmcs_hw_t *, pmcs_phy_t *);
45static void pmcs_clear_expander(pmcs_hw_t *, pmcs_phy_t *, int);
46
47static int pmcs_expander_get_nphy(pmcs_hw_t *, pmcs_phy_t *);
48static int pmcs_expander_content_discover(pmcs_hw_t *, pmcs_phy_t *,
49    pmcs_phy_t *);
50
51static int pmcs_smp_function_result(pmcs_hw_t *, smp_response_frame_t *);
52static void pmcs_flush_nonio_cmds(pmcs_hw_t *pwp, pmcs_xscsi_t *tgt);
53static boolean_t pmcs_validate_devid(pmcs_phy_t *, pmcs_phy_t *, uint32_t);
54static void pmcs_clear_phys(pmcs_hw_t *, pmcs_phy_t *);
55static int pmcs_configure_new_devices(pmcs_hw_t *, pmcs_phy_t *);
56static void pmcs_begin_observations(pmcs_hw_t *);
57static void pmcs_flush_observations(pmcs_hw_t *);
58static boolean_t pmcs_report_observations(pmcs_hw_t *);
59static boolean_t pmcs_report_iport_observations(pmcs_hw_t *, pmcs_iport_t *,
60    pmcs_phy_t *);
61static pmcs_phy_t *pmcs_find_phy_needing_work(pmcs_hw_t *, pmcs_phy_t *);
62static int pmcs_kill_devices(pmcs_hw_t *, pmcs_phy_t *);
63static void pmcs_lock_phy_impl(pmcs_phy_t *, int);
64static void pmcs_unlock_phy_impl(pmcs_phy_t *, int);
65static pmcs_phy_t *pmcs_clone_phy(pmcs_phy_t *);
66static boolean_t pmcs_configure_phy(pmcs_hw_t *, pmcs_phy_t *);
67static void pmcs_reap_dead_phy(pmcs_phy_t *);
68static pmcs_iport_t *pmcs_get_iport_by_ua(pmcs_hw_t *, char *);
69static boolean_t pmcs_phy_target_match(pmcs_phy_t *);
70static void pmcs_iport_active(pmcs_iport_t *);
71static void pmcs_tgtmap_activate_cb(void *, char *, scsi_tgtmap_tgt_type_t,
72    void **);
73static boolean_t pmcs_tgtmap_deactivate_cb(void *, char *,
74    scsi_tgtmap_tgt_type_t, void *, scsi_tgtmap_deact_rsn_t);
75static void pmcs_add_dead_phys(pmcs_hw_t *, pmcs_phy_t *);
76static void pmcs_get_fw_version(pmcs_hw_t *);
77static int pmcs_get_time_stamp(pmcs_hw_t *, uint64_t *, hrtime_t *);
78
79/*
80 * Often used strings
81 */
82const char pmcs_nowrk[] = "%s: unable to get work structure";
83const char pmcs_nomsg[] = "%s: unable to get Inbound Message entry";
84const char pmcs_timeo[] = "%s: command timed out";
85
86extern const ddi_dma_attr_t pmcs_dattr;
87extern kmutex_t pmcs_trace_lock;
88
89/*
90 * Some Initial setup steps.
91 */
92
93int
94pmcs_setup(pmcs_hw_t *pwp)
95{
96	uint32_t barval = pwp->mpibar;
97	uint32_t i, scratch, regbar, regoff, barbar, baroff;
98	uint32_t new_ioq_depth, ferr = 0;
99
100	/*
101	 * Check current state. If we're not at READY state,
102	 * we can't go further.
103	 */
104	scratch = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
105	if ((scratch & PMCS_MSGU_AAP_STATE_MASK) == PMCS_MSGU_AAP_STATE_ERROR) {
106		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
107		    "%s: AAP Error State (0x%x)",
108		    __func__, pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
109		    PMCS_MSGU_AAP_ERROR_MASK);
110		pmcs_fm_ereport(pwp, DDI_FM_DEVICE_INVAL_STATE);
111		ddi_fm_service_impact(pwp->dip, DDI_SERVICE_LOST);
112		return (-1);
113	}
114	if ((scratch & PMCS_MSGU_AAP_STATE_MASK) != PMCS_MSGU_AAP_STATE_READY) {
115		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
116		    "%s: AAP unit not ready (state 0x%x)",
117		    __func__, scratch & PMCS_MSGU_AAP_STATE_MASK);
118		pmcs_fm_ereport(pwp, DDI_FM_DEVICE_INVAL_STATE);
119		ddi_fm_service_impact(pwp->dip, DDI_SERVICE_LOST);
120		return (-1);
121	}
122
123	/*
124	 * Read the offset from the Message Unit scratchpad 0 register.
125	 * This allows us to read the MPI Configuration table.
126	 *
127	 * Check its signature for validity.
128	 */
129	baroff = barval;
130	barbar = barval >> PMCS_MSGU_MPI_BAR_SHIFT;
131	baroff &= PMCS_MSGU_MPI_OFFSET_MASK;
132
133	regoff = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH0);
134	regbar = regoff >> PMCS_MSGU_MPI_BAR_SHIFT;
135	regoff &= PMCS_MSGU_MPI_OFFSET_MASK;
136
137	if (regoff > baroff) {
138		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
139		    "%s: bad MPI Table Length (register offset=0x%08x, "
140		    "passed offset=0x%08x)", __func__, regoff, baroff);
141		return (-1);
142	}
143	if (regbar != barbar) {
144		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
145		    "%s: bad MPI BAR (register BAROFF=0x%08x, "
146		    "passed BAROFF=0x%08x)", __func__, regbar, barbar);
147		return (-1);
148	}
149	pwp->mpi_offset = regoff;
150	if (pmcs_rd_mpi_tbl(pwp, PMCS_MPI_AS) != PMCS_SIGNATURE) {
151		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
152		    "%s: Bad MPI Configuration Table Signature 0x%x", __func__,
153		    pmcs_rd_mpi_tbl(pwp, PMCS_MPI_AS));
154		return (-1);
155	}
156
157	if (pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IR) != PMCS_MPI_REVISION1) {
158		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
159		    "%s: Bad MPI Configuration Revision 0x%x", __func__,
160		    pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IR));
161		return (-1);
162	}
163
164	/*
165	 * Generate offsets for the General System, Inbound Queue Configuration
166	 * and Outbound Queue configuration tables. This way the macros to
167	 * access those tables will work correctly.
168	 */
169	pwp->mpi_gst_offset =
170	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_GSTO);
171	pwp->mpi_iqc_offset =
172	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IQCTO);
173	pwp->mpi_oqc_offset =
174	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_OQCTO);
175
176	pmcs_get_fw_version(pwp);
177
178	pwp->max_cmd = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_MOIO);
179	pwp->max_dev = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO0) >> 16;
180
181	pwp->max_iq = PMCS_MNIQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
182	pwp->max_oq = PMCS_MNOQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
183	pwp->nphy = PMCS_NPHY(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
184	if (pwp->max_iq <= PMCS_NIQ) {
185		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
186		    "%s: not enough Inbound Queues supported "
187		    "(need %d, max_oq=%d)", __func__, pwp->max_iq, PMCS_NIQ);
188		return (-1);
189	}
190	if (pwp->max_oq <= PMCS_NOQ) {
191		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
192		    "%s: not enough Outbound Queues supported "
193		    "(need %d, max_oq=%d)", __func__, pwp->max_oq, PMCS_NOQ);
194		return (-1);
195	}
196	if (pwp->nphy == 0) {
197		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
198		    "%s: zero phys reported", __func__);
199		return (-1);
200	}
201	if (PMCS_HPIQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1))) {
202		pwp->hipri_queue = (1 << PMCS_IQ_OTHER);
203	}
204
205
206	for (i = 0; i < pwp->nphy; i++) {
207		PMCS_MPI_EVQSET(pwp, PMCS_OQ_EVENTS, i);
208		PMCS_MPI_NCQSET(pwp, PMCS_OQ_EVENTS, i);
209	}
210
211	pmcs_wr_mpi_tbl(pwp, PMCS_MPI_INFO2,
212	    (PMCS_OQ_EVENTS << GENERAL_EVENT_OQ_SHIFT) |
213	    (PMCS_OQ_EVENTS << DEVICE_HANDLE_REMOVED_SHIFT));
214
215	/*
216	 * Verify that ioq_depth is valid (> 0 and not so high that it
217	 * would cause us to overrun the chip with commands).
218	 */
219	if (pwp->ioq_depth == 0) {
220		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
221		    "%s: I/O queue depth set to 0. Setting to %d",
222		    __func__, PMCS_NQENTRY);
223		pwp->ioq_depth = PMCS_NQENTRY;
224	}
225
226	if (pwp->ioq_depth < PMCS_MIN_NQENTRY) {
227		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
228		    "%s: I/O queue depth set too low (%d). Setting to %d",
229		    __func__, pwp->ioq_depth, PMCS_MIN_NQENTRY);
230		pwp->ioq_depth = PMCS_MIN_NQENTRY;
231	}
232
233	if (pwp->ioq_depth > (pwp->max_cmd / (PMCS_IO_IQ_MASK + 1))) {
234		new_ioq_depth = pwp->max_cmd / (PMCS_IO_IQ_MASK + 1);
235		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
236		    "%s: I/O queue depth set too high (%d). Setting to %d",
237		    __func__, pwp->ioq_depth, new_ioq_depth);
238		pwp->ioq_depth = new_ioq_depth;
239	}
240
241	/*
242	 * Allocate consistent memory for OQs and IQs.
243	 */
244	pwp->iqp_dma_attr = pwp->oqp_dma_attr = pmcs_dattr;
245	pwp->iqp_dma_attr.dma_attr_align =
246	    pwp->oqp_dma_attr.dma_attr_align = PMCS_QENTRY_SIZE;
247
248	/*
249	 * The Rev C chip has the ability to do PIO to or from consistent
250	 * memory anywhere in a 64 bit address space, but the firmware is
251	 * not presently set up to do so.
252	 */
253	pwp->iqp_dma_attr.dma_attr_addr_hi =
254	    pwp->oqp_dma_attr.dma_attr_addr_hi = 0x000000FFFFFFFFFFull;
255
256	for (i = 0; i < PMCS_NIQ; i++) {
257		if (pmcs_dma_setup(pwp, &pwp->iqp_dma_attr,
258		    &pwp->iqp_acchdls[i],
259		    &pwp->iqp_handles[i], PMCS_QENTRY_SIZE * pwp->ioq_depth,
260		    (caddr_t *)&pwp->iqp[i], &pwp->iqaddr[i]) == B_FALSE) {
261			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
262			    "Failed to setup DMA for iqp[%d]", i);
263			return (-1);
264		}
265		bzero(pwp->iqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
266	}
267
268	for (i = 0; i < PMCS_NOQ; i++) {
269		if (pmcs_dma_setup(pwp, &pwp->oqp_dma_attr,
270		    &pwp->oqp_acchdls[i],
271		    &pwp->oqp_handles[i], PMCS_QENTRY_SIZE * pwp->ioq_depth,
272		    (caddr_t *)&pwp->oqp[i], &pwp->oqaddr[i]) == B_FALSE) {
273			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
274			    "Failed to setup DMA for oqp[%d]", i);
275			return (-1);
276		}
277		bzero(pwp->oqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
278	}
279
280	/*
281	 * Install the IQ and OQ addresses (and null out the rest).
282	 */
283	for (i = 0; i < pwp->max_iq; i++) {
284		pwp->iqpi_offset[i] = pmcs_rd_iqc_tbl(pwp, PMCS_IQPIOFFX(i));
285		if (i < PMCS_NIQ) {
286			if (i != PMCS_IQ_OTHER) {
287				pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i),
288				    pwp->ioq_depth | (PMCS_QENTRY_SIZE << 16));
289			} else {
290				pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i),
291				    (1 << 30) | pwp->ioq_depth |
292				    (PMCS_QENTRY_SIZE << 16));
293			}
294			pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i),
295			    DWORD1(pwp->iqaddr[i]));
296			pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i),
297			    DWORD0(pwp->iqaddr[i]));
298			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i),
299			    DWORD1(pwp->ciaddr+IQ_OFFSET(i)));
300			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i),
301			    DWORD0(pwp->ciaddr+IQ_OFFSET(i)));
302		} else {
303			pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i), 0);
304			pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i), 0);
305			pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i), 0);
306			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i), 0);
307			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i), 0);
308		}
309	}
310
311	for (i = 0; i < pwp->max_oq; i++) {
312		pwp->oqci_offset[i] = pmcs_rd_oqc_tbl(pwp, PMCS_OQCIOFFX(i));
313		if (i < PMCS_NOQ) {
314			pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), pwp->ioq_depth |
315			    (PMCS_QENTRY_SIZE << 16) | OQIEX);
316			pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i),
317			    DWORD1(pwp->oqaddr[i]));
318			pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i),
319			    DWORD0(pwp->oqaddr[i]));
320			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i),
321			    DWORD1(pwp->ciaddr+OQ_OFFSET(i)));
322			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i),
323			    DWORD0(pwp->ciaddr+OQ_OFFSET(i)));
324			pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i),
325			    pwp->oqvec[i] << 24);
326			pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
327		} else {
328			pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), 0);
329			pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i), 0);
330			pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i), 0);
331			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i), 0);
332			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i), 0);
333			pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i), 0);
334			pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
335		}
336	}
337
338	/*
339	 * Set up logging, if defined.
340	 */
341	if (pwp->fwlog) {
342		uint64_t logdma = pwp->fwaddr;
343		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBAH, DWORD1(logdma));
344		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBAL, DWORD0(logdma));
345		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBS, PMCS_FWLOG_SIZE >> 1);
346		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELSEV, pwp->fwlog);
347		logdma += (PMCS_FWLOG_SIZE >> 1);
348		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBAH, DWORD1(logdma));
349		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBAL, DWORD0(logdma));
350		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBS, PMCS_FWLOG_SIZE >> 1);
351		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELSEV, pwp->fwlog);
352	}
353
354	/*
355	 * Interrupt vectors, outbound queues, and odb_auto_clear
356	 *
357	 * MSI/MSI-X:
358	 * If we got 4 interrupt vectors, we'll assign one to each outbound
359	 * queue as well as the fatal interrupt, and auto clear can be set
360	 * for each.
361	 *
362	 * If we only got 2 vectors, one will be used for I/O completions
363	 * and the other for the other two vectors.  In this case, auto_
364	 * clear can only be set for I/Os, which is fine.  The fatal
365	 * interrupt will be mapped to the PMCS_FATAL_INTERRUPT bit, which
366	 * is not an interrupt vector.
367	 *
368	 * MSI/MSI-X/INT-X:
369	 * If we only got 1 interrupt vector, auto_clear must be set to 0,
370	 * and again the fatal interrupt will be mapped to the
371	 * PMCS_FATAL_INTERRUPT bit (again, not an interrupt vector).
372	 */
373
374	switch (pwp->int_type) {
375	case PMCS_INT_MSIX:
376	case PMCS_INT_MSI:
377		switch (pwp->intr_cnt) {
378		case 1:
379			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
380			    (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
381			pwp->odb_auto_clear = 0;
382			break;
383		case 2:
384			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
385			    (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
386			pwp->odb_auto_clear = (1 << PMCS_FATAL_INTERRUPT) |
387			    (1 << PMCS_MSIX_IODONE);
388			break;
389		case 4:
390			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
391			    (PMCS_MSIX_FATAL << PMCS_FERIV_SHIFT));
392			pwp->odb_auto_clear = (1 << PMCS_MSIX_FATAL) |
393			    (1 << PMCS_MSIX_GENERAL) | (1 << PMCS_MSIX_IODONE) |
394			    (1 << PMCS_MSIX_EVENTS);
395			break;
396		}
397		break;
398
399	case PMCS_INT_FIXED:
400		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR,
401		    PMCS_FERRIE | (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
402		pwp->odb_auto_clear = 0;
403		break;
404	}
405
406	/*
407	 * If the open retry interval is non-zero, set it.
408	 */
409	if (pwp->open_retry_interval != 0) {
410		int phynum;
411
412		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
413		    "%s: Setting open retry interval to %d usecs", __func__,
414		    pwp->open_retry_interval);
415		for (phynum = 0; phynum < pwp->nphy; phynum ++) {
416			pmcs_wr_gsm_reg(pwp, OPEN_RETRY_INTERVAL(phynum),
417			    pwp->open_retry_interval);
418		}
419	}
420
421	/*
422	 * Enable Interrupt Reassertion
423	 * Default Delay 1000us
424	 */
425	ferr = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_FERR);
426	if ((ferr & PMCS_MPI_IRAE) == 0) {
427		ferr &= ~(PMCS_MPI_IRAU | PMCS_MPI_IRAD_MASK);
428		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, ferr | PMCS_MPI_IRAE);
429	}
430
431	pmcs_wr_topunit(pwp, PMCS_OBDB_AUTO_CLR, pwp->odb_auto_clear);
432	pwp->mpi_table_setup = 1;
433	return (0);
434}
435
436/*
437 * Start the Message Passing protocol with the PMC chip.
438 */
439int
440pmcs_start_mpi(pmcs_hw_t *pwp)
441{
442	int i;
443
444	pmcs_wr_msgunit(pwp, PMCS_MSGU_IBDB, PMCS_MSGU_IBDB_MPIINI);
445	for (i = 0; i < 1000; i++) {
446		if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) &
447		    PMCS_MSGU_IBDB_MPIINI) == 0) {
448			break;
449		}
450		drv_usecwait(1000);
451	}
452	if (pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) & PMCS_MSGU_IBDB_MPIINI) {
453		return (-1);
454	}
455	drv_usecwait(500000);
456
457	/*
458	 * Check to make sure we got to INIT state.
459	 */
460	if (PMCS_MPI_S(pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE)) !=
461	    PMCS_MPI_STATE_INIT) {
462		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
463		    "%s: MPI launch failed (GST 0x%x DBCLR 0x%x)", __func__,
464		    pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE),
465		    pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB_CLEAR));
466		return (-1);
467	}
468	return (0);
469}
470
471/*
472 * Stop the Message Passing protocol with the PMC chip.
473 */
474int
475pmcs_stop_mpi(pmcs_hw_t *pwp)
476{
477	int i;
478
479	for (i = 0; i < pwp->max_iq; i++) {
480		pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i), 0);
481		pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i), 0);
482		pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i), 0);
483		pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i), 0);
484		pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i), 0);
485	}
486	for (i = 0; i < pwp->max_oq; i++) {
487		pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), 0);
488		pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i), 0);
489		pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i), 0);
490		pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i), 0);
491		pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i), 0);
492		pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i), 0);
493		pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
494	}
495	pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, 0);
496	pmcs_wr_msgunit(pwp, PMCS_MSGU_IBDB, PMCS_MSGU_IBDB_MPICTU);
497	for (i = 0; i < 2000; i++) {
498		if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) &
499		    PMCS_MSGU_IBDB_MPICTU) == 0) {
500			break;
501		}
502		drv_usecwait(1000);
503	}
504	if (pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) & PMCS_MSGU_IBDB_MPICTU) {
505		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
506		    "%s: MPI stop failed", __func__);
507		return (-1);
508	}
509	return (0);
510}
511
512/*
513 * Do a sequence of ECHO messages to test for MPI functionality,
514 * all inbound and outbound queue functionality and interrupts.
515 */
516int
517pmcs_echo_test(pmcs_hw_t *pwp)
518{
519	echo_test_t fred;
520	struct pmcwork *pwrk;
521	uint32_t *msg, count;
522	int iqe = 0, iqo = 0, result, rval = 0;
523	int iterations;
524	hrtime_t echo_start, echo_end, echo_total;
525
526	ASSERT(pwp->max_cmd > 0);
527
528	/*
529	 * We want iterations to be max_cmd * 3 to ensure that we run the
530	 * echo test enough times to iterate through every inbound queue
531	 * at least twice.
532	 */
533	iterations = pwp->max_cmd * 3;
534
535	echo_total = 0;
536	count = 0;
537
538	while (count < iterations) {
539		pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
540		if (pwrk == NULL) {
541			pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL,
542			    pmcs_nowrk, __func__);
543			rval = -1;
544			break;
545		}
546
547		mutex_enter(&pwp->iqp_lock[iqe]);
548		msg = GET_IQ_ENTRY(pwp, iqe);
549		if (msg == NULL) {
550			mutex_exit(&pwp->iqp_lock[iqe]);
551			pmcs_pwork(pwp, pwrk);
552			pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL,
553			    pmcs_nomsg, __func__);
554			rval = -1;
555			break;
556		}
557
558		bzero(msg, PMCS_QENTRY_SIZE);
559
560		if (iqe == PMCS_IQ_OTHER) {
561			/* This is on the high priority queue */
562			msg[0] = LE_32(PMCS_HIPRI(pwp, iqo, PMCIN_ECHO));
563		} else {
564			msg[0] = LE_32(PMCS_IOMB_IN_SAS(iqo, PMCIN_ECHO));
565		}
566		msg[1] = LE_32(pwrk->htag);
567		fred.signature = 0xdeadbeef;
568		fred.count = count;
569		fred.ptr = &count;
570		(void) memcpy(&msg[2], &fred, sizeof (fred));
571		pwrk->state = PMCS_WORK_STATE_ONCHIP;
572
573		INC_IQ_ENTRY(pwp, iqe);
574
575		echo_start = gethrtime();
576		DTRACE_PROBE2(pmcs__echo__test__wait__start,
577		    hrtime_t, echo_start, uint32_t, pwrk->htag);
578
579		if (++iqe == PMCS_NIQ) {
580			iqe = 0;
581		}
582		if (++iqo == PMCS_NOQ) {
583			iqo = 0;
584		}
585
586		WAIT_FOR(pwrk, 250, result);
587		pmcs_pwork(pwp, pwrk);
588
589		echo_end = gethrtime();
590		DTRACE_PROBE2(pmcs__echo__test__wait__end,
591		    hrtime_t, echo_end, int, result);
592		echo_total += (echo_end - echo_start);
593
594		if (result) {
595			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
596			    "%s: command timed out on echo test #%d",
597			    __func__, count);
598			rval = -1;
599			break;
600		}
601	}
602
603	/*
604	 * The intr_threshold is adjusted by PMCS_INTR_THRESHOLD in order to
605	 * remove the overhead of things like the delay in getting signaled
606	 * for completion.
607	 */
608	if (echo_total != 0) {
609		pwp->io_intr_coal.intr_latency =
610		    (echo_total / iterations) / 2;
611		pwp->io_intr_coal.intr_threshold =
612		    PMCS_INTR_THRESHOLD(PMCS_QUANTUM_TIME_USECS * 1000 /
613		    pwp->io_intr_coal.intr_latency);
614	}
615
616	return (rval);
617}
618
619/*
620 * Start the (real) phys
621 */
622int
623pmcs_start_phy(pmcs_hw_t *pwp, int phynum, int linkmode, int speed)
624{
625	int result;
626	uint32_t *msg;
627	struct pmcwork *pwrk;
628	pmcs_phy_t *pptr;
629	sas_identify_af_t sap;
630
631	mutex_enter(&pwp->lock);
632	pptr = pwp->root_phys + phynum;
633	if (pptr == NULL) {
634		mutex_exit(&pwp->lock);
635		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
636		    "%s: cannot find port %d", __func__, phynum);
637		return (0);
638	}
639
640	pmcs_lock_phy(pptr);
641	mutex_exit(&pwp->lock);
642
643	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
644	if (pwrk == NULL) {
645		pmcs_unlock_phy(pptr);
646		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nowrk, __func__);
647		return (-1);
648	}
649
650	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
651	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
652
653	if (msg == NULL) {
654		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
655		pmcs_unlock_phy(pptr);
656		pmcs_pwork(pwp, pwrk);
657		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nomsg, __func__);
658		return (-1);
659	}
660	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_PHY_START));
661	msg[1] = LE_32(pwrk->htag);
662	msg[2] = LE_32(linkmode | speed | phynum);
663	bzero(&sap, sizeof (sap));
664	sap.device_type = SAS_IF_DTYPE_ENDPOINT;
665	sap.ssp_ini_port = 1;
666
667	if (pwp->separate_ports) {
668		pmcs_wwn2barray(pwp->sas_wwns[phynum], sap.sas_address);
669	} else {
670		pmcs_wwn2barray(pwp->sas_wwns[0], sap.sas_address);
671	}
672
673	ASSERT(phynum < SAS2_PHYNUM_MAX);
674	sap.phy_identifier = phynum & SAS2_PHYNUM_MASK;
675	(void) memcpy(&msg[3], &sap, sizeof (sas_identify_af_t));
676	pwrk->state = PMCS_WORK_STATE_ONCHIP;
677	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
678
679	pptr->state.prog_min_rate = (lowbit((ulong_t)speed) - 1);
680	pptr->state.prog_max_rate = (highbit((ulong_t)speed) - 1);
681	pptr->state.hw_min_rate = PMCS_HW_MIN_LINK_RATE;
682	pptr->state.hw_max_rate = PMCS_HW_MAX_LINK_RATE;
683
684	pmcs_unlock_phy(pptr);
685	WAIT_FOR(pwrk, 1000, result);
686	pmcs_pwork(pwp, pwrk);
687
688	if (result) {
689		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL, pmcs_timeo, __func__);
690	} else {
691		mutex_enter(&pwp->lock);
692		pwp->phys_started |= (1 << phynum);
693		mutex_exit(&pwp->lock);
694	}
695
696	return (0);
697}
698
699int
700pmcs_start_phys(pmcs_hw_t *pwp)
701{
702	int i, rval;
703
704	for (i = 0; i < pwp->nphy; i++) {
705		if ((pwp->phyid_block_mask & (1 << i)) == 0) {
706			if (pmcs_start_phy(pwp, i,
707			    (pwp->phymode << PHY_MODE_SHIFT),
708			    pwp->physpeed << PHY_LINK_SHIFT)) {
709				return (-1);
710			}
711			if (pmcs_clear_diag_counters(pwp, i)) {
712				pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
713				    "%s: failed to reset counters on PHY (%d)",
714				    __func__, i);
715			}
716		}
717	}
718
719	rval = pmcs_get_time_stamp(pwp, &pwp->fw_timestamp, &pwp->hrtimestamp);
720	if (rval) {
721		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
722		    "%s: Failed to obtain firmware timestamp", __func__);
723	} else {
724		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
725		    "Firmware timestamp: 0x%" PRIx64, pwp->fw_timestamp);
726	}
727
728	return (0);
729}
730
731/*
732 * Called with PHY locked
733 */
734int
735pmcs_reset_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint8_t type)
736{
737	uint32_t *msg;
738	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
739	const char *mbar;
740	uint32_t amt;
741	uint32_t pdevid;
742	uint32_t stsoff;
743	uint32_t status;
744	int result, level, phynum;
745	struct pmcwork *pwrk;
746	pmcs_iport_t *iport;
747	uint32_t htag;
748
749	ASSERT(mutex_owned(&pptr->phy_lock));
750
751	bzero(iomb, PMCS_QENTRY_SIZE);
752	phynum = pptr->phynum;
753	level = pptr->level;
754	if (level > 0) {
755		pdevid = pptr->parent->device_id;
756	} else if ((level == 0) && (pptr->dtype == EXPANDER)) {
757		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, pptr->target,
758		    "%s: Not resetting HBA PHY @ %s", __func__, pptr->path);
759		return (0);
760	}
761
762	if (!pptr->iport || !pptr->valid_device_id) {
763		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, pptr->target,
764		    "%s: Can't reach PHY %s", __func__, pptr->path);
765		return (0);
766	}
767
768	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
769
770	if (pwrk == NULL) {
771		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nowrk, __func__);
772		return (ENOMEM);
773	}
774
775	pwrk->arg = iomb;
776
777	/*
778	 * If level > 0, we need to issue an SMP_REQUEST with a PHY_CONTROL
779	 * function to do either a link reset or hard reset.  If level == 0,
780	 * then we do a LOCAL_PHY_CONTROL IOMB to do link/hard reset to the
781	 * root (local) PHY
782	 */
783	if (level) {
784		stsoff = 2;
785		iomb[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
786		    PMCIN_SMP_REQUEST));
787		iomb[1] = LE_32(pwrk->htag);
788		iomb[2] = LE_32(pdevid);
789		iomb[3] = LE_32(40 << SMP_REQUEST_LENGTH_SHIFT);
790		/*
791		 * Send SMP PHY CONTROL/HARD or LINK RESET
792		 */
793		iomb[4] = BE_32(0x40910000);
794		iomb[5] = 0;
795
796		if (type == PMCS_PHYOP_HARD_RESET) {
797			mbar = "SMP PHY CONTROL/HARD RESET";
798			iomb[6] = BE_32((phynum << 16) |
799			    (PMCS_PHYOP_HARD_RESET << 8));
800		} else {
801			mbar = "SMP PHY CONTROL/LINK RESET";
802			iomb[6] = BE_32((phynum << 16) |
803			    (PMCS_PHYOP_LINK_RESET << 8));
804		}
805		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
806		    "%s: sending %s to %s for phy 0x%x",
807		    __func__, mbar, pptr->parent->path, pptr->phynum);
808		amt = 7;
809	} else {
810		/*
811		 * Unlike most other Outbound messages, status for
812		 * a local phy operation is in DWORD 3.
813		 */
814		stsoff = 3;
815		iomb[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
816		    PMCIN_LOCAL_PHY_CONTROL));
817		iomb[1] = LE_32(pwrk->htag);
818		if (type == PMCS_PHYOP_LINK_RESET) {
819			mbar = "LOCAL PHY LINK RESET";
820			iomb[2] = LE_32((PMCS_PHYOP_LINK_RESET << 8) | phynum);
821		} else {
822			mbar = "LOCAL PHY HARD RESET";
823			iomb[2] = LE_32((PMCS_PHYOP_HARD_RESET << 8) | phynum);
824		}
825		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
826		    "%s: sending %s to %s", __func__, mbar, pptr->path);
827		amt = 3;
828	}
829
830	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
831	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
832	if (msg == NULL) {
833		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
834		pmcs_pwork(pwp, pwrk);
835		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nomsg, __func__);
836		return (ENOMEM);
837	}
838	COPY_MESSAGE(msg, iomb, amt);
839	htag = pwrk->htag;
840
841	pmcs_hold_iport(pptr->iport);
842	iport = pptr->iport;
843	pmcs_smp_acquire(iport);
844	pwrk->state = PMCS_WORK_STATE_ONCHIP;
845	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
846	pmcs_unlock_phy(pptr);
847	WAIT_FOR(pwrk, 1000, result);
848	pmcs_pwork(pwp, pwrk);
849	pmcs_smp_release(iport);
850	pmcs_rele_iport(iport);
851	pmcs_lock_phy(pptr);
852	if (result) {
853		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL, pmcs_timeo, __func__);
854
855		if (pmcs_abort(pwp, pptr, htag, 0, 0)) {
856			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
857			    "%s: Unable to issue SMP abort for htag 0x%08x",
858			    __func__, htag);
859		} else {
860			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
861			    "%s: Issuing SMP ABORT for htag 0x%08x",
862			    __func__, htag);
863		}
864		return (EIO);
865	}
866	status = LE_32(iomb[stsoff]);
867
868	if (status != PMCOUT_STATUS_OK) {
869		char buf[32];
870		const char *es =  pmcs_status_str(status);
871		if (es == NULL) {
872			(void) snprintf(buf, sizeof (buf), "Status 0x%x",
873			    status);
874			es = buf;
875		}
876		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
877		    "%s: %s action returned %s for %s", __func__, mbar, es,
878		    pptr->path);
879		return (status);
880	}
881
882	return (0);
883}
884
885/*
886 * Stop the (real) phys.  No PHY or softstate locks are required as this only
887 * happens during detach.
888 */
889void
890pmcs_stop_phy(pmcs_hw_t *pwp, int phynum)
891{
892	int result;
893	pmcs_phy_t *pptr;
894	uint32_t *msg;
895	struct pmcwork *pwrk;
896
897	pptr =  pwp->root_phys + phynum;
898	if (pptr == NULL) {
899		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
900		    "%s: unable to find port %d", __func__, phynum);
901		return;
902	}
903
904	if (pwp->phys_started & (1 << phynum)) {
905		pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
906
907		if (pwrk == NULL) {
908			pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL,
909			    pmcs_nowrk, __func__);
910			return;
911		}
912
913		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
914		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
915
916		if (msg == NULL) {
917			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
918			pmcs_pwork(pwp, pwrk);
919			pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL,
920			    pmcs_nomsg, __func__);
921			return;
922		}
923
924		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_PHY_STOP));
925		msg[1] = LE_32(pwrk->htag);
926		msg[2] = LE_32(phynum);
927		pwrk->state = PMCS_WORK_STATE_ONCHIP;
928		/*
929		 * Make this unconfigured now.
930		 */
931		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
932		WAIT_FOR(pwrk, 1000, result);
933		pmcs_pwork(pwp, pwrk);
934		if (result) {
935			pmcs_prt(pwp, PMCS_PRT_DEBUG,
936			    pptr, NULL, pmcs_timeo, __func__);
937		}
938
939		pwp->phys_started &= ~(1 << phynum);
940	}
941
942	pptr->configured = 0;
943}
944
945/*
946 * No locks should be required as this is only called during detach
947 */
948void
949pmcs_stop_phys(pmcs_hw_t *pwp)
950{
951	int i;
952	for (i = 0; i < pwp->nphy; i++) {
953		if ((pwp->phyid_block_mask & (1 << i)) == 0) {
954			pmcs_stop_phy(pwp, i);
955		}
956	}
957}
958
959/*
960 * Run SAS_DIAG_EXECUTE with cmd and cmd_desc passed.
961 * 	ERR_CNT_RESET: return status of cmd
962 *	DIAG_REPORT_GET: return value of the counter
963 */
964int
965pmcs_sas_diag_execute(pmcs_hw_t *pwp, uint32_t cmd, uint32_t cmd_desc,
966    uint8_t phynum)
967{
968	uint32_t htag, *ptr, status, msg[PMCS_MSG_SIZE << 1];
969	int result;
970	struct pmcwork *pwrk;
971
972	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
973	if (pwrk == NULL) {
974		pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL, pmcs_nowrk, __func__);
975		return (DDI_FAILURE);
976	}
977	pwrk->arg = msg;
978	htag = pwrk->htag;
979	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_SAS_DIAG_EXECUTE));
980	msg[1] = LE_32(htag);
981	msg[2] = LE_32((cmd << PMCS_DIAG_CMD_SHIFT) |
982	    (cmd_desc << PMCS_DIAG_CMD_DESC_SHIFT) | phynum);
983
984	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
985	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
986	if (ptr == NULL) {
987		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
988		pmcs_pwork(pwp, pwrk);
989		pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL, pmcs_nomsg, __func__);
990		return (DDI_FAILURE);
991	}
992	COPY_MESSAGE(ptr, msg, 3);
993	pwrk->state = PMCS_WORK_STATE_ONCHIP;
994	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
995
996	WAIT_FOR(pwrk, 1000, result);
997	pmcs_pwork(pwp, pwrk);
998	if (result) {
999		pmcs_timed_out(pwp, htag, __func__);
1000		return (DDI_FAILURE);
1001	}
1002
1003	status = LE_32(msg[3]);
1004
1005	/* Return for counter reset */
1006	if (cmd == PMCS_ERR_CNT_RESET)
1007		return (status);
1008
1009	/* Return for counter value */
1010	if (status) {
1011		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
1012		    "%s: failed, status (0x%x)", __func__, status);
1013		return (DDI_FAILURE);
1014	}
1015	return (LE_32(msg[4]));
1016}
1017
1018/* Get the current value of the counter for desc on phynum and return it. */
1019int
1020pmcs_get_diag_report(pmcs_hw_t *pwp, uint32_t desc, uint8_t phynum)
1021{
1022	return (pmcs_sas_diag_execute(pwp, PMCS_DIAG_REPORT_GET, desc, phynum));
1023}
1024
1025/* Clear all of the counters for phynum. Returns the status of the command. */
1026int
1027pmcs_clear_diag_counters(pmcs_hw_t *pwp, uint8_t phynum)
1028{
1029	uint32_t	cmd = PMCS_ERR_CNT_RESET;
1030	uint32_t	cmd_desc;
1031
1032	cmd_desc = PMCS_INVALID_DWORD_CNT;
1033	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
1034		return (DDI_FAILURE);
1035
1036	cmd_desc = PMCS_DISPARITY_ERR_CNT;
1037	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
1038		return (DDI_FAILURE);
1039
1040	cmd_desc = PMCS_LOST_DWORD_SYNC_CNT;
1041	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
1042		return (DDI_FAILURE);
1043
1044	cmd_desc = PMCS_RESET_FAILED_CNT;
1045	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
1046		return (DDI_FAILURE);
1047
1048	return (DDI_SUCCESS);
1049}
1050
1051/*
1052 * Get firmware timestamp
1053 */
1054static int
1055pmcs_get_time_stamp(pmcs_hw_t *pwp, uint64_t *fw_ts, hrtime_t *sys_hr_ts)
1056{
1057	uint32_t htag, *ptr, msg[PMCS_MSG_SIZE << 1];
1058	int result;
1059	struct pmcwork *pwrk;
1060
1061	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
1062	if (pwrk == NULL) {
1063		pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL, pmcs_nowrk, __func__);
1064		return (-1);
1065	}
1066	pwrk->arg = msg;
1067	htag = pwrk->htag;
1068	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_GET_TIME_STAMP));
1069	msg[1] = LE_32(pwrk->htag);
1070
1071	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1072	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1073	if (ptr == NULL) {
1074		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1075		pmcs_pwork(pwp, pwrk);
1076		pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL, pmcs_nomsg, __func__);
1077		return (-1);
1078	}
1079	COPY_MESSAGE(ptr, msg, 2);
1080	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1081	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1082
1083	WAIT_FOR(pwrk, 1000, result);
1084	pmcs_pwork(pwp, pwrk);
1085	if (result) {
1086		pmcs_timed_out(pwp, htag, __func__);
1087		return (-1);
1088	}
1089
1090	mutex_enter(&pmcs_trace_lock);
1091	*sys_hr_ts = gethrtime();
1092	gethrestime(&pwp->sys_timestamp);
1093	*fw_ts = LE_32(msg[2]) | (((uint64_t)LE_32(msg[3])) << 32);
1094	mutex_exit(&pmcs_trace_lock);
1095	return (0);
1096}
1097
1098/*
1099 * Dump all pertinent registers
1100 */
1101
1102void
1103pmcs_register_dump(pmcs_hw_t *pwp)
1104{
1105	int i;
1106	uint32_t val;
1107
1108	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "pmcs%d: Register dump start",
1109	    ddi_get_instance(pwp->dip));
1110	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
1111	    "OBDB (intr): 0x%08x (mask): 0x%08x (clear): 0x%08x",
1112	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB),
1113	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB_MASK),
1114	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR));
1115	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "SCRATCH0: 0x%08x",
1116	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH0));
1117	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "SCRATCH1: 0x%08x",
1118	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1));
1119	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "SCRATCH2: 0x%08x",
1120	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2));
1121	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "SCRATCH3: 0x%08x",
1122	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH3));
1123	for (i = 0; i < PMCS_NIQ; i++) {
1124		pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "IQ %d: CI %u PI %u",
1125		    i, pmcs_rd_iqci(pwp, i), pmcs_rd_iqpi(pwp, i));
1126	}
1127	for (i = 0; i < PMCS_NOQ; i++) {
1128		pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "OQ %d: CI %u PI %u",
1129		    i, pmcs_rd_oqci(pwp, i), pmcs_rd_oqpi(pwp, i));
1130	}
1131	val = pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE);
1132	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
1133	    "GST TABLE BASE: 0x%08x (STATE=0x%x QF=%d GSTLEN=%d HMI_ERR=0x%x)",
1134	    val, PMCS_MPI_S(val), PMCS_QF(val), PMCS_GSTLEN(val) * 4,
1135	    PMCS_HMI_ERR(val));
1136	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "GST TABLE IQFRZ0: 0x%08x",
1137	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IQFRZ0));
1138	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "GST TABLE IQFRZ1: 0x%08x",
1139	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IQFRZ1));
1140	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "GST TABLE MSGU TICK: 0x%08x",
1141	    pmcs_rd_gst_tbl(pwp, PMCS_GST_MSGU_TICK));
1142	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "GST TABLE IOP TICK: 0x%08x",
1143	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IOP_TICK));
1144	for (i = 0; i < pwp->nphy; i++) {
1145		uint32_t rerrf, pinfo, started = 0, link = 0;
1146		pinfo = pmcs_rd_gst_tbl(pwp, PMCS_GST_PHY_INFO(i));
1147		if (pinfo & 1) {
1148			started = 1;
1149			link = pinfo & 2;
1150		}
1151		rerrf = pmcs_rd_gst_tbl(pwp, PMCS_GST_RERR_INFO(i));
1152		pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
1153		    "GST TABLE PHY%d STARTED=%d LINK=%d RERR=0x%08x",
1154		    i, started, link, rerrf);
1155	}
1156	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "pmcs%d: Register dump end",
1157	    ddi_get_instance(pwp->dip));
1158}
1159
1160/*
1161 * Handle SATA Abort and other error processing
1162 */
1163int
1164pmcs_abort_handler(pmcs_hw_t *pwp)
1165{
1166	pmcs_phy_t *pptr, *pnext, *pnext_uplevel[PMCS_MAX_XPND];
1167	pmcs_xscsi_t *tgt;
1168	int r, level = 0;
1169
1170	pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, "%s", __func__);
1171
1172	mutex_enter(&pwp->lock);
1173	pptr = pwp->root_phys;
1174	mutex_exit(&pwp->lock);
1175
1176	while (pptr) {
1177		/*
1178		 * XXX: Need to make sure this doesn't happen
1179		 * XXX: when non-NCQ commands are running.
1180		 */
1181		pmcs_lock_phy(pptr);
1182		if (pptr->need_rl_ext) {
1183			ASSERT(pptr->dtype == SATA);
1184			if (pmcs_acquire_scratch(pwp, B_FALSE)) {
1185				goto next_phy;
1186			}
1187			r = pmcs_sata_abort_ncq(pwp, pptr);
1188			pmcs_release_scratch(pwp);
1189			if (r == ENOMEM) {
1190				goto next_phy;
1191			}
1192			if (r) {
1193				r = pmcs_reset_phy(pwp, pptr,
1194				    PMCS_PHYOP_LINK_RESET);
1195				if (r == ENOMEM) {
1196					goto next_phy;
1197				}
1198				/* what if other failures happened? */
1199				pptr->abort_pending = 1;
1200				pptr->abort_sent = 0;
1201			}
1202		}
1203		if (pptr->abort_pending == 0 || pptr->abort_sent) {
1204			goto next_phy;
1205		}
1206		pptr->abort_pending = 0;
1207		if (pmcs_abort(pwp, pptr, pptr->device_id, 1, 1) == ENOMEM) {
1208			pptr->abort_pending = 1;
1209			goto next_phy;
1210		}
1211		pptr->abort_sent = 1;
1212
1213		/*
1214		 * If the iport is no longer active, flush the queues
1215		 */
1216		if ((pptr->iport == NULL) ||
1217		    (pptr->iport->ua_state != UA_ACTIVE)) {
1218			tgt = pptr->target;
1219			if (tgt != NULL) {
1220				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, tgt,
1221				    "%s: Clearing target 0x%p, inactive iport",
1222				    __func__, (void *) tgt);
1223				mutex_enter(&tgt->statlock);
1224				pmcs_clear_xp(pwp, tgt);
1225				mutex_exit(&tgt->statlock);
1226			}
1227		}
1228
1229next_phy:
1230		if (pptr->children) {
1231			pnext = pptr->children;
1232			pnext_uplevel[level++] = pptr->sibling;
1233		} else {
1234			pnext = pptr->sibling;
1235			while ((pnext == NULL) && (level > 0)) {
1236				pnext = pnext_uplevel[--level];
1237			}
1238		}
1239
1240		pmcs_unlock_phy(pptr);
1241		pptr = pnext;
1242	}
1243
1244	return (0);
1245}
1246
1247/*
1248 * Register a device (get a device handle for it).
1249 * Called with PHY lock held.
1250 */
1251int
1252pmcs_register_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1253{
1254	struct pmcwork *pwrk;
1255	int result = 0;
1256	uint32_t *msg;
1257	uint32_t tmp, status;
1258	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
1259
1260	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1261	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1262
1263	if (msg == NULL ||
1264	    (pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr)) == NULL) {
1265		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1266		result = ENOMEM;
1267		goto out;
1268	}
1269
1270	pwrk->arg = iomb;
1271	pwrk->dtype = pptr->dtype;
1272
1273	msg[1] = LE_32(pwrk->htag);
1274	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_REGISTER_DEVICE));
1275	tmp = PMCS_DEVREG_TLR |
1276	    (pptr->link_rate << PMCS_DEVREG_LINK_RATE_SHIFT);
1277	if (IS_ROOT_PHY(pptr)) {
1278		msg[2] = LE_32(pptr->portid |
1279		    (pptr->phynum << PMCS_PHYID_SHIFT));
1280	} else {
1281		msg[2] = LE_32(pptr->portid);
1282	}
1283	if (pptr->dtype == SATA) {
1284		if (IS_ROOT_PHY(pptr)) {
1285			tmp |= PMCS_DEVREG_TYPE_SATA_DIRECT;
1286		} else {
1287			tmp |= PMCS_DEVREG_TYPE_SATA;
1288		}
1289	} else {
1290		tmp |= PMCS_DEVREG_TYPE_SAS;
1291	}
1292	msg[3] = LE_32(tmp);
1293	msg[4] = LE_32(PMCS_DEVREG_IT_NEXUS_TIMEOUT);
1294	(void) memcpy(&msg[5], pptr->sas_address, 8);
1295
1296	CLEAN_MESSAGE(msg, 7);
1297	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1298	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1299
1300	pmcs_unlock_phy(pptr);
1301	WAIT_FOR(pwrk, 250, result);
1302	pmcs_pwork(pwp, pwrk);
1303	pmcs_lock_phy(pptr);
1304
1305	if (result) {
1306		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL, pmcs_timeo, __func__);
1307		result = ETIMEDOUT;
1308		goto out;
1309	}
1310	status = LE_32(iomb[2]);
1311	tmp = LE_32(iomb[3]);
1312	switch (status) {
1313	case PMCS_DEVREG_OK:
1314	case PMCS_DEVREG_DEVICE_ALREADY_REGISTERED:
1315	case PMCS_DEVREG_PHY_ALREADY_REGISTERED:
1316		if (pmcs_validate_devid(pwp->root_phys, pptr, tmp) == B_FALSE) {
1317			result = EEXIST;
1318			goto out;
1319		} else if (status != PMCS_DEVREG_OK) {
1320			if (tmp == 0xffffffff) {	/* F/W bug */
1321				pmcs_prt(pwp, PMCS_PRT_INFO, pptr, NULL,
1322				    "%s: phy %s already has bogus devid 0x%x",
1323				    __func__, pptr->path, tmp);
1324				result = EIO;
1325				goto out;
1326			} else {
1327				pmcs_prt(pwp, PMCS_PRT_INFO, pptr, NULL,
1328				    "%s: phy %s already has a device id 0x%x",
1329				    __func__, pptr->path, tmp);
1330			}
1331		}
1332		break;
1333	default:
1334		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
1335		    "%s: status 0x%x when trying to register device %s",
1336		    __func__, status, pptr->path);
1337		result = EIO;
1338		goto out;
1339	}
1340	pptr->device_id = tmp;
1341	pptr->valid_device_id = 1;
1342	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "Phy %s/" SAS_ADDR_FMT
1343	    " registered with device_id 0x%x (portid %d)", pptr->path,
1344	    SAS_ADDR_PRT(pptr->sas_address), tmp, pptr->portid);
1345out:
1346	return (result);
1347}
1348
1349/*
1350 * Deregister a device (remove a device handle).
1351 * Called with PHY locked.
1352 */
1353void
1354pmcs_deregister_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1355{
1356	struct pmcwork *pwrk;
1357	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
1358	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
1359	int result;
1360
1361	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
1362	if (pwrk == NULL) {
1363		return;
1364	}
1365
1366	pwrk->arg = iomb;
1367	pwrk->dtype = pptr->dtype;
1368	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1369	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1370	if (ptr == NULL) {
1371		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1372		pmcs_pwork(pwp, pwrk);
1373		return;
1374	}
1375	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
1376	    PMCIN_DEREGISTER_DEVICE_HANDLE));
1377	msg[1] = LE_32(pwrk->htag);
1378	msg[2] = LE_32(pptr->device_id);
1379	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1380	COPY_MESSAGE(ptr, msg, 3);
1381	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1382
1383	pmcs_unlock_phy(pptr);
1384	WAIT_FOR(pwrk, 250, result);
1385	pmcs_pwork(pwp, pwrk);
1386	pmcs_lock_phy(pptr);
1387
1388	if (result) {
1389		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL, pmcs_timeo, __func__);
1390		return;
1391	}
1392	status = LE_32(iomb[2]);
1393	if (status != PMCOUT_STATUS_OK) {
1394		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
1395		    "%s: status 0x%x when trying to deregister device %s",
1396		    __func__, status, pptr->path);
1397	} else {
1398		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
1399		    "%s: device %s deregistered", __func__, pptr->path);
1400	}
1401
1402	pptr->device_id = PMCS_INVALID_DEVICE_ID;
1403	pptr->configured = 0;
1404	pptr->deregister_wait = 0;
1405	pptr->valid_device_id = 0;
1406}
1407
1408/*
1409 * Deregister all registered devices.
1410 */
1411void
1412pmcs_deregister_devices(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
1413{
1414	/*
1415	 * Start at the maximum level and walk back to level 0.  This only
1416	 * gets done during detach after all threads and timers have been
1417	 * destroyed.
1418	 */
1419	while (phyp) {
1420		if (phyp->children) {
1421			pmcs_deregister_devices(pwp, phyp->children);
1422		}
1423		pmcs_lock_phy(phyp);
1424		if (phyp->valid_device_id) {
1425			pmcs_deregister_device(pwp, phyp);
1426		}
1427		pmcs_unlock_phy(phyp);
1428		phyp = phyp->sibling;
1429	}
1430}
1431
1432/*
1433 * Perform a 'soft' reset on the PMC chip
1434 */
1435int
1436pmcs_soft_reset(pmcs_hw_t *pwp, boolean_t no_restart)
1437{
1438	uint32_t s2, sfrbits, gsm, rapchk, wapchk, wdpchk, spc, tsmode;
1439	pmcs_phy_t *pptr;
1440	char *msg = NULL;
1441	int i;
1442
1443	/*
1444	 * Disable interrupts
1445	 */
1446	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, 0xffffffff);
1447	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1448
1449	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL, "%s", __func__);
1450
1451	if (pwp->locks_initted) {
1452		mutex_enter(&pwp->lock);
1453	}
1454	pwp->blocked = 1;
1455
1456	/*
1457	 * Clear our softstate copies of the MSGU and IOP heartbeats.
1458	 */
1459	pwp->last_msgu_tick = pwp->last_iop_tick = 0;
1460
1461	/*
1462	 * Step 1
1463	 */
1464	s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2);
1465	if ((s2 & PMCS_MSGU_HOST_SOFT_RESET_READY) == 0) {
1466		pmcs_wr_gsm_reg(pwp, RB6_ACCESS, RB6_NMI_SIGNATURE);
1467		pmcs_wr_gsm_reg(pwp, RB6_ACCESS, RB6_NMI_SIGNATURE);
1468		for (i = 0; i < 100; i++) {
1469			s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1470			    PMCS_MSGU_HOST_SOFT_RESET_READY;
1471			if (s2) {
1472				break;
1473			}
1474			drv_usecwait(10000);
1475		}
1476		s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1477		    PMCS_MSGU_HOST_SOFT_RESET_READY;
1478		if (s2 == 0) {
1479			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
1480			    "%s: PMCS_MSGU_HOST_SOFT_RESET_READY never came "
1481			    "ready", __func__);
1482			pmcs_register_dump(pwp);
1483			if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1484			    PMCS_MSGU_CPU_SOFT_RESET_READY) == 0 ||
1485			    (pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1486			    PMCS_MSGU_CPU_SOFT_RESET_READY) == 0) {
1487				pwp->state = STATE_DEAD;
1488				pwp->blocked = 0;
1489				if (pwp->locks_initted) {
1490					mutex_exit(&pwp->lock);
1491				}
1492				return (-1);
1493			}
1494		}
1495	}
1496
1497	/*
1498	 * Step 2
1499	 */
1500	pmcs_wr_gsm_reg(pwp, NMI_EN_VPE0_IOP, 0);
1501	drv_usecwait(10);
1502	pmcs_wr_gsm_reg(pwp, NMI_EN_VPE0_AAP1, 0);
1503	drv_usecwait(10);
1504	pmcs_wr_topunit(pwp, PMCS_EVENT_INT_ENABLE, 0);
1505	drv_usecwait(10);
1506	pmcs_wr_topunit(pwp, PMCS_EVENT_INT_STAT,
1507	    pmcs_rd_topunit(pwp, PMCS_EVENT_INT_STAT));
1508	drv_usecwait(10);
1509	pmcs_wr_topunit(pwp, PMCS_ERROR_INT_ENABLE, 0);
1510	drv_usecwait(10);
1511	pmcs_wr_topunit(pwp, PMCS_ERROR_INT_STAT,
1512	    pmcs_rd_topunit(pwp, PMCS_ERROR_INT_STAT));
1513	drv_usecwait(10);
1514
1515	sfrbits = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1516	    PMCS_MSGU_AAP_SFR_PROGRESS;
1517	sfrbits ^= PMCS_MSGU_AAP_SFR_PROGRESS;
1518	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "PMCS_MSGU_HOST_SCRATCH0 "
1519	    "%08x -> %08x", pmcs_rd_msgunit(pwp, PMCS_MSGU_HOST_SCRATCH0),
1520	    HST_SFT_RESET_SIG);
1521	pmcs_wr_msgunit(pwp, PMCS_MSGU_HOST_SCRATCH0, HST_SFT_RESET_SIG);
1522
1523	/*
1524	 * Step 3
1525	 */
1526	gsm = pmcs_rd_gsm_reg(pwp, 0, GSM_CFG_AND_RESET);
1527	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "GSM %08x -> %08x", gsm,
1528	    gsm & ~PMCS_SOFT_RESET_BITS);
1529	pmcs_wr_gsm_reg(pwp, GSM_CFG_AND_RESET, gsm & ~PMCS_SOFT_RESET_BITS);
1530
1531	/*
1532	 * Step 4
1533	 */
1534	rapchk = pmcs_rd_gsm_reg(pwp, 0, READ_ADR_PARITY_CHK_EN);
1535	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "READ_ADR_PARITY_CHK_EN "
1536	    "%08x -> %08x", rapchk, 0);
1537	pmcs_wr_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN, 0);
1538	wapchk = pmcs_rd_gsm_reg(pwp, 0, WRITE_ADR_PARITY_CHK_EN);
1539	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "WRITE_ADR_PARITY_CHK_EN "
1540	    "%08x -> %08x", wapchk, 0);
1541	pmcs_wr_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN, 0);
1542	wdpchk = pmcs_rd_gsm_reg(pwp, 0, WRITE_DATA_PARITY_CHK_EN);
1543	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "WRITE_DATA_PARITY_CHK_EN "
1544	    "%08x -> %08x", wdpchk, 0);
1545	pmcs_wr_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN, 0);
1546
1547	/*
1548	 * Step 5
1549	 */
1550	drv_usecwait(100);
1551
1552	/*
1553	 * Step 5.5 (Temporary workaround for 1.07.xx Beta)
1554	 */
1555	tsmode = pmcs_rd_gsm_reg(pwp, 0, PMCS_GPIO_TRISTATE_MODE_ADDR);
1556	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "GPIO TSMODE %08x -> %08x",
1557	    tsmode, tsmode & ~(PMCS_GPIO_TSMODE_BIT0|PMCS_GPIO_TSMODE_BIT1));
1558	pmcs_wr_gsm_reg(pwp, PMCS_GPIO_TRISTATE_MODE_ADDR,
1559	    tsmode & ~(PMCS_GPIO_TSMODE_BIT0|PMCS_GPIO_TSMODE_BIT1));
1560	drv_usecwait(10);
1561
1562	/*
1563	 * Step 6
1564	 */
1565	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1566	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "SPC_RESET %08x -> %08x",
1567	    spc, spc & ~(PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1568	pmcs_wr_topunit(pwp, PMCS_SPC_RESET,
1569	    spc & ~(PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1570	drv_usecwait(10);
1571
1572	/*
1573	 * Step 7
1574	 */
1575	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1576	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "SPC_RESET %08x -> %08x",
1577	    spc, spc & ~(BDMA_CORE_RSTB|OSSP_RSTB));
1578	pmcs_wr_topunit(pwp, PMCS_SPC_RESET, spc & ~(BDMA_CORE_RSTB|OSSP_RSTB));
1579
1580	/*
1581	 * Step 8
1582	 */
1583	drv_usecwait(100);
1584
1585	/*
1586	 * Step 9
1587	 */
1588	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1589	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "SPC_RESET %08x -> %08x",
1590	    spc, spc | (BDMA_CORE_RSTB|OSSP_RSTB));
1591	pmcs_wr_topunit(pwp, PMCS_SPC_RESET, spc | (BDMA_CORE_RSTB|OSSP_RSTB));
1592
1593	/*
1594	 * Step 10
1595	 */
1596	drv_usecwait(100);
1597
1598	/*
1599	 * Step 11
1600	 */
1601	gsm = pmcs_rd_gsm_reg(pwp, 0, GSM_CFG_AND_RESET);
1602	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "GSM %08x -> %08x", gsm,
1603	    gsm | PMCS_SOFT_RESET_BITS);
1604	pmcs_wr_gsm_reg(pwp, GSM_CFG_AND_RESET, gsm | PMCS_SOFT_RESET_BITS);
1605	drv_usecwait(10);
1606
1607	/*
1608	 * Step 12
1609	 */
1610	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "READ_ADR_PARITY_CHK_EN "
1611	    "%08x -> %08x", pmcs_rd_gsm_reg(pwp, 0, READ_ADR_PARITY_CHK_EN),
1612	    rapchk);
1613	pmcs_wr_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN, rapchk);
1614	drv_usecwait(10);
1615	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "WRITE_ADR_PARITY_CHK_EN "
1616	    "%08x -> %08x", pmcs_rd_gsm_reg(pwp, 0, WRITE_ADR_PARITY_CHK_EN),
1617	    wapchk);
1618	pmcs_wr_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN, wapchk);
1619	drv_usecwait(10);
1620	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "WRITE_DATA_PARITY_CHK_EN "
1621	    "%08x -> %08x", pmcs_rd_gsm_reg(pwp, 0, WRITE_DATA_PARITY_CHK_EN),
1622	    wapchk);
1623	pmcs_wr_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN, wdpchk);
1624	drv_usecwait(10);
1625
1626	/*
1627	 * Step 13
1628	 */
1629	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1630	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL, "SPC_RESET %08x -> %08x",
1631	    spc, spc | (PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1632	pmcs_wr_topunit(pwp, PMCS_SPC_RESET,
1633	    spc | (PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1634
1635	/*
1636	 * Step 14
1637	 */
1638	drv_usecwait(100);
1639
1640	/*
1641	 * Step 15
1642	 */
1643	for (spc = 0, i = 0; i < 1000; i++) {
1644		drv_usecwait(1000);
1645		spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
1646		if ((spc & PMCS_MSGU_AAP_SFR_PROGRESS) == sfrbits) {
1647			break;
1648		}
1649	}
1650
1651	if ((spc & PMCS_MSGU_AAP_SFR_PROGRESS) != sfrbits) {
1652		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
1653		    "SFR didn't toggle (sfr 0x%x)", spc);
1654		pwp->state = STATE_DEAD;
1655		pwp->blocked = 0;
1656		if (pwp->locks_initted) {
1657			mutex_exit(&pwp->lock);
1658		}
1659		return (-1);
1660	}
1661
1662	/*
1663	 * Step 16
1664	 */
1665	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, 0xffffffff);
1666	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1667
1668	/*
1669	 * Wait for up to 5 seconds for AAP state to come either ready or error.
1670	 */
1671	for (i = 0; i < 50; i++) {
1672		spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1673		    PMCS_MSGU_AAP_STATE_MASK;
1674		if (spc == PMCS_MSGU_AAP_STATE_ERROR ||
1675		    spc == PMCS_MSGU_AAP_STATE_READY) {
1676			break;
1677		}
1678		drv_usecwait(100000);
1679	}
1680	spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
1681	if ((spc & PMCS_MSGU_AAP_STATE_MASK) != PMCS_MSGU_AAP_STATE_READY) {
1682		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
1683		    "soft reset failed (state 0x%x)", spc);
1684		pwp->state = STATE_DEAD;
1685		pwp->blocked = 0;
1686		if (pwp->locks_initted) {
1687			mutex_exit(&pwp->lock);
1688		}
1689		return (-1);
1690	}
1691
1692	/* Clear the firmware log */
1693	if (pwp->fwlogp) {
1694		bzero(pwp->fwlogp, PMCS_FWLOG_SIZE);
1695	}
1696
1697	/* Reset our queue indices and entries */
1698	bzero(pwp->shadow_iqpi, sizeof (pwp->shadow_iqpi));
1699	bzero(pwp->last_iqci, sizeof (pwp->last_iqci));
1700	bzero(pwp->last_htag, sizeof (pwp->last_htag));
1701	for (i = 0; i < PMCS_NIQ; i++) {
1702		if (pwp->iqp[i]) {
1703			bzero(pwp->iqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
1704			pmcs_wr_iqpi(pwp, i, 0);
1705			pmcs_wr_iqci(pwp, i, 0);
1706		}
1707	}
1708	for (i = 0; i < PMCS_NOQ; i++) {
1709		if (pwp->oqp[i]) {
1710			bzero(pwp->oqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
1711			pmcs_wr_oqpi(pwp, i, 0);
1712			pmcs_wr_oqci(pwp, i, 0);
1713		}
1714
1715	}
1716
1717	if (pwp->state == STATE_DEAD || pwp->state == STATE_UNPROBING ||
1718	    pwp->state == STATE_PROBING || pwp->locks_initted == 0) {
1719		pwp->blocked = 0;
1720		if (pwp->locks_initted) {
1721			mutex_exit(&pwp->lock);
1722		}
1723		return (0);
1724	}
1725
1726	/*
1727	 * Return at this point if we dont need to startup.
1728	 */
1729	if (no_restart) {
1730		return (0);
1731	}
1732
1733	ASSERT(pwp->locks_initted != 0);
1734
1735	/*
1736	 * Flush the target queues and clear each target's PHY
1737	 */
1738	if (pwp->targets) {
1739		for (i = 0; i < pwp->max_dev; i++) {
1740			pmcs_xscsi_t *xp = pwp->targets[i];
1741
1742			if (xp == NULL) {
1743				continue;
1744			}
1745
1746			mutex_enter(&xp->statlock);
1747			pmcs_flush_target_queues(pwp, xp, PMCS_TGT_ALL_QUEUES);
1748			xp->phy = NULL;
1749			mutex_exit(&xp->statlock);
1750		}
1751	}
1752
1753	/*
1754	 * Zero out the ports list, free non root phys, clear root phys
1755	 */
1756	bzero(pwp->ports, sizeof (pwp->ports));
1757	pmcs_free_all_phys(pwp, pwp->root_phys);
1758	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
1759		pmcs_lock_phy(pptr);
1760		pmcs_clear_phy(pwp, pptr);
1761		pptr->target = NULL;
1762		pmcs_unlock_phy(pptr);
1763	}
1764
1765	/*
1766	 * Restore Interrupt Mask
1767	 */
1768	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, pwp->intr_mask);
1769	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1770
1771	pwp->mpi_table_setup = 0;
1772	mutex_exit(&pwp->lock);
1773
1774	/*
1775	 * Set up MPI again.
1776	 */
1777	if (pmcs_setup(pwp)) {
1778		msg = "unable to setup MPI tables again";
1779		goto fail_restart;
1780	}
1781	pmcs_report_fwversion(pwp);
1782
1783	/*
1784	 * Restart MPI
1785	 */
1786	if (pmcs_start_mpi(pwp)) {
1787		msg = "unable to restart MPI again";
1788		goto fail_restart;
1789	}
1790
1791	mutex_enter(&pwp->lock);
1792	SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES);
1793	mutex_exit(&pwp->lock);
1794
1795	/*
1796	 * Run any completions
1797	 */
1798	PMCS_CQ_RUN(pwp);
1799
1800	/*
1801	 * Delay
1802	 */
1803	drv_usecwait(1000000);
1804	return (0);
1805
1806fail_restart:
1807	mutex_enter(&pwp->lock);
1808	pwp->state = STATE_DEAD;
1809	mutex_exit(&pwp->lock);
1810	pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL,
1811	    "%s: Failed: %s", __func__, msg);
1812	return (-1);
1813}
1814
1815
1816/*
1817 * Perform a 'hot' reset, which will soft reset the chip and
1818 * restore the state back to pre-reset context. Called with pwp
1819 * lock held.
1820 */
1821int
1822pmcs_hot_reset(pmcs_hw_t *pwp)
1823{
1824	pmcs_iport_t	*iport;
1825
1826	ASSERT(mutex_owned(&pwp->lock));
1827	pwp->state = STATE_IN_RESET;
1828
1829	/*
1830	 * For any iports on this HBA, report empty target sets and
1831	 * then tear them down.
1832	 */
1833	rw_enter(&pwp->iports_lock, RW_READER);
1834	for (iport = list_head(&pwp->iports); iport != NULL;
1835	    iport = list_next(&pwp->iports, iport)) {
1836		mutex_enter(&iport->lock);
1837		(void) scsi_hba_tgtmap_set_begin(iport->iss_tgtmap);
1838		(void) scsi_hba_tgtmap_set_end(iport->iss_tgtmap, 0);
1839		pmcs_iport_teardown_phys(iport);
1840		mutex_exit(&iport->lock);
1841	}
1842	rw_exit(&pwp->iports_lock);
1843
1844	/* Grab a register dump, in the event that reset fails */
1845	pmcs_register_dump_int(pwp);
1846	mutex_exit(&pwp->lock);
1847
1848	/* Ensure discovery is not running before we proceed */
1849	mutex_enter(&pwp->config_lock);
1850	while (pwp->configuring) {
1851		cv_wait(&pwp->config_cv, &pwp->config_lock);
1852	}
1853	mutex_exit(&pwp->config_lock);
1854
1855	/* Issue soft reset and clean up related softstate */
1856	if (pmcs_soft_reset(pwp, B_FALSE)) {
1857		/*
1858		 * Disable interrupts, in case we got far enough along to
1859		 * enable them, then fire off ereport and service impact.
1860		 */
1861		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
1862		    "%s: failed soft reset", __func__);
1863		pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, 0xffffffff);
1864		pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1865		pmcs_fm_ereport(pwp, DDI_FM_DEVICE_NO_RESPONSE);
1866		ddi_fm_service_impact(pwp->dip, DDI_SERVICE_LOST);
1867		mutex_enter(&pwp->lock);
1868		pwp->state = STATE_DEAD;
1869		return (DDI_FAILURE);
1870	}
1871
1872	mutex_enter(&pwp->lock);
1873	pwp->state = STATE_RUNNING;
1874	mutex_exit(&pwp->lock);
1875
1876	/*
1877	 * Finally, restart the phys, which will bring the iports back
1878	 * up and eventually result in discovery running.
1879	 */
1880	if (pmcs_start_phys(pwp)) {
1881		/* We should be up and running now, so retry */
1882		if (pmcs_start_phys(pwp)) {
1883			/* Apparently unable to restart PHYs, fail */
1884			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
1885			    "%s: failed to restart PHYs after soft reset",
1886			    __func__);
1887			mutex_enter(&pwp->lock);
1888			return (DDI_FAILURE);
1889		}
1890	}
1891
1892	mutex_enter(&pwp->lock);
1893	return (DDI_SUCCESS);
1894}
1895
1896/*
1897 * Reset a device or a logical unit.
1898 */
1899int
1900pmcs_reset_dev(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint64_t lun)
1901{
1902	int rval = 0;
1903
1904	if (pptr == NULL) {
1905		return (ENXIO);
1906	}
1907
1908	pmcs_lock_phy(pptr);
1909	if (pptr->dtype == SAS) {
1910		/*
1911		 * Some devices do not support SAS_I_T_NEXUS_RESET as
1912		 * it is not a mandatory (in SAM4) task management
1913		 * function, while LOGIC_UNIT_RESET is mandatory.
1914		 *
1915		 * The problem here is that we need to iterate over
1916		 * all known LUNs to emulate the semantics of
1917		 * "RESET_TARGET".
1918		 *
1919		 * XXX: FIX ME
1920		 */
1921		if (lun == (uint64_t)-1) {
1922			lun = 0;
1923		}
1924		rval = pmcs_ssp_tmf(pwp, pptr, SAS_LOGICAL_UNIT_RESET, 0, lun,
1925		    NULL);
1926	} else if (pptr->dtype == SATA) {
1927		if (lun != 0ull) {
1928			pmcs_unlock_phy(pptr);
1929			return (EINVAL);
1930		}
1931		rval = pmcs_reset_phy(pwp, pptr, PMCS_PHYOP_LINK_RESET);
1932	} else {
1933		pmcs_unlock_phy(pptr);
1934		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
1935		    "%s: cannot reset a SMP device yet (%s)",
1936		    __func__, pptr->path);
1937		return (EINVAL);
1938	}
1939
1940	/*
1941	 * Now harvest any commands killed by this action
1942	 * by issuing an ABORT for all commands on this device.
1943	 *
1944	 * We do this even if the the tmf or reset fails (in case there
1945	 * are any dead commands around to be harvested *anyway*).
1946	 * We don't have to await for the abort to complete.
1947	 */
1948	if (pmcs_abort(pwp, pptr, 0, 1, 0)) {
1949		pptr->abort_pending = 1;
1950		SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
1951	}
1952
1953	pmcs_unlock_phy(pptr);
1954	return (rval);
1955}
1956
1957/*
1958 * Called with PHY locked.
1959 */
1960static int
1961pmcs_get_device_handle(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1962{
1963	if (pptr->valid_device_id == 0) {
1964		int result = pmcs_register_device(pwp, pptr);
1965
1966		/*
1967		 * If we changed while registering, punt
1968		 */
1969		if (pptr->changed) {
1970			RESTART_DISCOVERY(pwp);
1971			return (-1);
1972		}
1973
1974		/*
1975		 * If we had a failure to register, check against errors.
1976		 * An ENOMEM error means we just retry (temp resource shortage).
1977		 */
1978		if (result == ENOMEM) {
1979			PHY_CHANGED(pwp, pptr);
1980			RESTART_DISCOVERY(pwp);
1981			return (-1);
1982		}
1983
1984		/*
1985		 * An ETIMEDOUT error means we retry (if our counter isn't
1986		 * exhausted)
1987		 */
1988		if (result == ETIMEDOUT) {
1989			if (ddi_get_lbolt() < pptr->config_stop) {
1990				PHY_CHANGED(pwp, pptr);
1991				RESTART_DISCOVERY(pwp);
1992			} else {
1993				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
1994				    "%s: Retries exhausted for %s, killing",
1995				    __func__, pptr->path);
1996				pptr->config_stop = 0;
1997				pmcs_kill_changed(pwp, pptr, 0);
1998			}
1999			return (-1);
2000		}
2001		/*
2002		 * Other errors or no valid device id is fatal, but don't
2003		 * preclude a future action.
2004		 */
2005		if (result || pptr->valid_device_id == 0) {
2006			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
2007			    "%s: %s could not be registered", __func__,
2008			    pptr->path);
2009			return (-1);
2010		}
2011	}
2012	return (0);
2013}
2014
2015int
2016pmcs_iport_tgtmap_create(pmcs_iport_t *iport)
2017{
2018	ASSERT(iport);
2019	if (iport == NULL)
2020		return (B_FALSE);
2021
2022	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL, "%s", __func__);
2023
2024	/* create target map */
2025	if (scsi_hba_tgtmap_create(iport->dip, SCSI_TM_FULLSET,
2026	    tgtmap_csync_usec, tgtmap_stable_usec, (void *)iport,
2027	    pmcs_tgtmap_activate_cb, pmcs_tgtmap_deactivate_cb,
2028	    &iport->iss_tgtmap) != DDI_SUCCESS) {
2029		pmcs_prt(iport->pwp, PMCS_PRT_DEBUG, NULL, NULL,
2030		    "%s: failed to create tgtmap", __func__);
2031		return (B_FALSE);
2032	}
2033	return (B_TRUE);
2034}
2035
2036int
2037pmcs_iport_tgtmap_destroy(pmcs_iport_t *iport)
2038{
2039	ASSERT(iport && iport->iss_tgtmap);
2040	if ((iport == NULL) || (iport->iss_tgtmap == NULL))
2041		return (B_FALSE);
2042
2043	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL, "%s", __func__);
2044
2045	/* destroy target map */
2046	scsi_hba_tgtmap_destroy(iport->iss_tgtmap);
2047	return (B_TRUE);
2048}
2049
2050/*
2051 * Remove all phys from an iport's phymap and empty it's phylist.
2052 * Called when a port has been reset by the host (see pmcs_intr.c)
2053 * or prior to issuing a soft reset if we detect a stall on the chip
2054 * (see pmcs_attach.c).
2055 */
2056void
2057pmcs_iport_teardown_phys(pmcs_iport_t *iport)
2058{
2059	pmcs_hw_t		*pwp;
2060	sas_phymap_phys_t	*phys;
2061	int			phynum;
2062
2063	ASSERT(iport);
2064	ASSERT(mutex_owned(&iport->lock));
2065	pwp = iport->pwp;
2066	ASSERT(pwp);
2067
2068	/*
2069	 * Remove all phys from the iport handle's phy list, unset its
2070	 * primary phy and update its state.
2071	 */
2072	pmcs_remove_phy_from_iport(iport, NULL);
2073	iport->pptr = NULL;
2074	iport->ua_state = UA_PEND_DEACTIVATE;
2075
2076	/* Remove all phys from the phymap */
2077	phys = sas_phymap_ua2phys(pwp->hss_phymap, iport->ua);
2078	if (phys) {
2079		while ((phynum = sas_phymap_phys_next(phys)) != -1) {
2080			(void) sas_phymap_phy_rem(pwp->hss_phymap, phynum);
2081		}
2082		sas_phymap_phys_free(phys);
2083	}
2084}
2085
2086/*
2087 * Query the phymap and populate the iport handle passed in.
2088 * Called with iport lock held.
2089 */
2090int
2091pmcs_iport_configure_phys(pmcs_iport_t *iport)
2092{
2093	pmcs_hw_t		*pwp;
2094	pmcs_phy_t		*pptr;
2095	sas_phymap_phys_t	*phys;
2096	int			phynum;
2097	int			inst;
2098
2099	ASSERT(iport);
2100	ASSERT(mutex_owned(&iport->lock));
2101	pwp = iport->pwp;
2102	ASSERT(pwp);
2103	inst = ddi_get_instance(iport->dip);
2104
2105	mutex_enter(&pwp->lock);
2106	ASSERT(pwp->root_phys != NULL);
2107
2108	/*
2109	 * Query the phymap regarding the phys in this iport and populate
2110	 * the iport's phys list. Hereafter this list is maintained via
2111	 * port up and down events in pmcs_intr.c
2112	 */
2113	ASSERT(list_is_empty(&iport->phys));
2114	phys = sas_phymap_ua2phys(pwp->hss_phymap, iport->ua);
2115	ASSERT(phys != NULL);
2116	while ((phynum = sas_phymap_phys_next(phys)) != -1) {
2117		/* Grab the phy pointer from root_phys */
2118		pptr = pwp->root_phys + phynum;
2119		ASSERT(pptr);
2120		pmcs_lock_phy(pptr);
2121		ASSERT(pptr->phynum == phynum);
2122
2123		/*
2124		 * Set a back pointer in the phy to this iport.
2125		 */
2126		pptr->iport = iport;
2127
2128		/*
2129		 * If this phy is the primary, set a pointer to it on our
2130		 * iport handle, and set our portid from it.
2131		 */
2132		if (!pptr->subsidiary) {
2133			iport->pptr = pptr;
2134			iport->portid = pptr->portid;
2135		}
2136
2137		/*
2138		 * Finally, insert the phy into our list
2139		 */
2140		pmcs_unlock_phy(pptr);
2141		pmcs_add_phy_to_iport(iport, pptr);
2142
2143		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "%s: found "
2144		    "phy %d [0x%p] on iport%d, refcnt(%d)", __func__, phynum,
2145		    (void *)pptr, inst, iport->refcnt);
2146	}
2147	mutex_exit(&pwp->lock);
2148	sas_phymap_phys_free(phys);
2149	RESTART_DISCOVERY(pwp);
2150	return (DDI_SUCCESS);
2151}
2152
2153/*
2154 * Return the iport that ua is associated with, or NULL.  If an iport is
2155 * returned, it will be held and the caller must release the hold.
2156 */
2157static pmcs_iport_t *
2158pmcs_get_iport_by_ua(pmcs_hw_t *pwp, char *ua)
2159{
2160	pmcs_iport_t	*iport = NULL;
2161
2162	rw_enter(&pwp->iports_lock, RW_READER);
2163	for (iport = list_head(&pwp->iports);
2164	    iport != NULL;
2165	    iport = list_next(&pwp->iports, iport)) {
2166		mutex_enter(&iport->lock);
2167		if (strcmp(iport->ua, ua) == 0) {
2168			mutex_exit(&iport->lock);
2169			pmcs_hold_iport(iport);
2170			break;
2171		}
2172		mutex_exit(&iport->lock);
2173	}
2174	rw_exit(&pwp->iports_lock);
2175
2176	return (iport);
2177}
2178
2179/*
2180 * Return the iport that pptr is associated with, or NULL.
2181 * If an iport is returned, there is a hold that the caller must release.
2182 */
2183pmcs_iport_t *
2184pmcs_get_iport_by_wwn(pmcs_hw_t *pwp, uint64_t wwn)
2185{
2186	pmcs_iport_t	*iport = NULL;
2187	char		*ua;
2188
2189	ua = sas_phymap_lookup_ua(pwp->hss_phymap, pwp->sas_wwns[0], wwn);
2190	if (ua) {
2191		iport = pmcs_get_iport_by_ua(pwp, ua);
2192		if (iport) {
2193			mutex_enter(&iport->lock);
2194			pmcs_iport_active(iport);
2195			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL, "%s: "
2196			    "found iport [0x%p] on ua (%s), refcnt (%d)",
2197			    __func__, (void *)iport, ua, iport->refcnt);
2198			mutex_exit(&iport->lock);
2199		}
2200	}
2201
2202	return (iport);
2203}
2204
2205/*
2206 * Promote the next phy on this port to primary, and return it.
2207 * Called when the primary PHY on a port is going down, but the port
2208 * remains up (see pmcs_intr.c).
2209 */
2210pmcs_phy_t *
2211pmcs_promote_next_phy(pmcs_phy_t *prev_primary)
2212{
2213	pmcs_hw_t	*pwp;
2214	pmcs_iport_t	*iport;
2215	pmcs_phy_t	*pptr, *child;
2216	int		portid;
2217
2218	pmcs_lock_phy(prev_primary);
2219	portid = prev_primary->portid;
2220	iport  = prev_primary->iport;
2221	pwp    = prev_primary->pwp;
2222
2223	/* Use the first available phy in this port */
2224	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
2225		if ((pptr->portid == portid) && (pptr != prev_primary)) {
2226			mutex_enter(&pptr->phy_lock);
2227			break;
2228		}
2229	}
2230
2231	if (pptr == NULL) {
2232		pmcs_unlock_phy(prev_primary);
2233		return (NULL);
2234	}
2235
2236	if (iport) {
2237		mutex_enter(&iport->lock);
2238		iport->pptr = pptr;
2239		mutex_exit(&iport->lock);
2240	}
2241
2242	/* Update the phy handle with the data from the previous primary */
2243	pptr->children		= prev_primary->children;
2244	child = pptr->children;
2245	while (child) {
2246		child->parent = pptr;
2247		child = child->sibling;
2248	}
2249	pptr->ncphy		= prev_primary->ncphy;
2250	pptr->width		= prev_primary->width;
2251	pptr->dtype		= prev_primary->dtype;
2252	pptr->pend_dtype	= prev_primary->pend_dtype;
2253	pptr->tolerates_sas2	= prev_primary->tolerates_sas2;
2254	pptr->atdt		= prev_primary->atdt;
2255	pptr->portid		= prev_primary->portid;
2256	pptr->link_rate		= prev_primary->link_rate;
2257	pptr->configured	= prev_primary->configured;
2258	pptr->iport		= prev_primary->iport;
2259	pptr->target		= prev_primary->target;
2260	if (pptr->target) {
2261		pptr->target->phy = pptr;
2262	}
2263
2264	/* Update the phy mask properties for the affected PHYs */
2265	/* Clear the current values... */
2266	pmcs_update_phy_pm_props(pptr, pptr->att_port_pm_tmp,
2267	    pptr->tgt_port_pm_tmp, B_FALSE);
2268	/* ...replace with the values from prev_primary... */
2269	pmcs_update_phy_pm_props(pptr, prev_primary->att_port_pm_tmp,
2270	    prev_primary->tgt_port_pm_tmp, B_TRUE);
2271	/* ...then clear prev_primary's PHY values from the new primary */
2272	pmcs_update_phy_pm_props(pptr, prev_primary->att_port_pm,
2273	    prev_primary->tgt_port_pm, B_FALSE);
2274	/* Clear the prev_primary's values */
2275	pmcs_update_phy_pm_props(prev_primary, prev_primary->att_port_pm_tmp,
2276	    prev_primary->tgt_port_pm_tmp, B_FALSE);
2277
2278	pptr->subsidiary = 0;
2279
2280	prev_primary->subsidiary = 1;
2281	prev_primary->children = NULL;
2282	prev_primary->target = NULL;
2283	pptr->device_id = prev_primary->device_id;
2284	pptr->valid_device_id = prev_primary->valid_device_id;
2285	pmcs_unlock_phy(prev_primary);
2286
2287	/*
2288	 * We call pmcs_unlock_phy() on pptr because it now contains the
2289	 * list of children.
2290	 */
2291	pmcs_unlock_phy(pptr);
2292
2293	return (pptr);
2294}
2295
2296void
2297pmcs_hold_iport(pmcs_iport_t *iport)
2298{
2299	/*
2300	 * Grab a reference to this iport.
2301	 */
2302	ASSERT(iport);
2303	mutex_enter(&iport->refcnt_lock);
2304	iport->refcnt++;
2305	mutex_exit(&iport->refcnt_lock);
2306
2307	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG2, NULL, NULL, "%s: iport "
2308	    "[0x%p] refcnt (%d)", __func__, (void *)iport, iport->refcnt);
2309}
2310
2311void
2312pmcs_rele_iport(pmcs_iport_t *iport)
2313{
2314	/*
2315	 * Release a refcnt on this iport. If this is the last reference,
2316	 * signal the potential waiter in pmcs_iport_unattach().
2317	 */
2318	ASSERT(iport->refcnt > 0);
2319	mutex_enter(&iport->refcnt_lock);
2320	iport->refcnt--;
2321	mutex_exit(&iport->refcnt_lock);
2322	if (iport->refcnt == 0) {
2323		cv_signal(&iport->refcnt_cv);
2324	}
2325	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG2, NULL, NULL, "%s: iport "
2326	    "[0x%p] refcnt (%d)", __func__, (void *)iport, iport->refcnt);
2327}
2328
2329void
2330pmcs_phymap_activate(void *arg, char *ua, void **privp)
2331{
2332	_NOTE(ARGUNUSED(privp));
2333	pmcs_hw_t	*pwp = arg;
2334	pmcs_iport_t	*iport = NULL;
2335
2336	mutex_enter(&pwp->lock);
2337	if ((pwp->state == STATE_UNPROBING) || (pwp->state == STATE_DEAD) ||
2338	    (pwp->state == STATE_IN_RESET)) {
2339		mutex_exit(&pwp->lock);
2340		return;
2341	}
2342	pwp->phymap_active++;
2343	mutex_exit(&pwp->lock);
2344
2345	if (scsi_hba_iportmap_iport_add(pwp->hss_iportmap, ua, NULL) !=
2346	    DDI_SUCCESS) {
2347		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL, "%s: failed to "
2348		    "add iport handle on unit address [%s]", __func__, ua);
2349	} else {
2350		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL, "%s: "
2351		    "phymap_active count (%d), added iport handle on unit "
2352		    "address [%s]", __func__, pwp->phymap_active, ua);
2353	}
2354
2355	/* Set the HBA softstate as our private data for this unit address */
2356	*privp = (void *)pwp;
2357
2358	/*
2359	 * We are waiting on attach for this iport node, unless it is still
2360	 * attached. This can happen if a consumer has an outstanding open
2361	 * on our iport node, but the port is down.  If this is the case, we
2362	 * need to configure our iport here for reuse.
2363	 */
2364	iport = pmcs_get_iport_by_ua(pwp, ua);
2365	if (iport) {
2366		mutex_enter(&iport->lock);
2367		if (pmcs_iport_configure_phys(iport) != DDI_SUCCESS) {
2368			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL, "%s: "
2369			    "failed to configure phys on iport [0x%p] at "
2370			    "unit address (%s)", __func__, (void *)iport, ua);
2371		}
2372		pmcs_iport_active(iport);
2373		pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS,
2374		    &iport->nphy);
2375		mutex_exit(&iport->lock);
2376		pmcs_rele_iport(iport);
2377	}
2378
2379}
2380
2381void
2382pmcs_phymap_deactivate(void *arg, char *ua, void *privp)
2383{
2384	_NOTE(ARGUNUSED(privp));
2385	pmcs_hw_t	*pwp = arg;
2386	pmcs_iport_t	*iport;
2387
2388	mutex_enter(&pwp->lock);
2389	pwp->phymap_active--;
2390	mutex_exit(&pwp->lock);
2391
2392	if (scsi_hba_iportmap_iport_remove(pwp->hss_iportmap, ua) !=
2393	    DDI_SUCCESS) {
2394		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL, "%s: failed to "
2395		    "remove iport handle on unit address [%s]", __func__, ua);
2396	} else {
2397		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL, "%s: "
2398		    "phymap_active count (%d), removed iport handle on unit "
2399		    "address [%s]", __func__, pwp->phymap_active, ua);
2400	}
2401
2402	iport = pmcs_get_iport_by_ua(pwp, ua);
2403
2404	if (iport == NULL) {
2405		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL, "%s: failed "
2406		    "lookup of iport handle on unit addr (%s)", __func__, ua);
2407		return;
2408	}
2409
2410	mutex_enter(&iport->lock);
2411	iport->ua_state = UA_INACTIVE;
2412	iport->portid = PMCS_IPORT_INVALID_PORT_ID;
2413	pmcs_remove_phy_from_iport(iport, NULL);
2414	mutex_exit(&iport->lock);
2415	pmcs_rele_iport(iport);
2416}
2417
2418/*
2419 * Top-level discovery function
2420 */
2421void
2422pmcs_discover(pmcs_hw_t *pwp)
2423{
2424	pmcs_phy_t		*pptr;
2425	pmcs_phy_t		*root_phy;
2426
2427	DTRACE_PROBE2(pmcs__discover__entry, ulong_t, pwp->work_flags,
2428	    boolean_t, pwp->config_changed);
2429
2430	mutex_enter(&pwp->lock);
2431
2432	if (pwp->state != STATE_RUNNING) {
2433		mutex_exit(&pwp->lock);
2434		return;
2435	}
2436
2437	/* Ensure we have at least one phymap active */
2438	if (pwp->phymap_active == 0) {
2439		mutex_exit(&pwp->lock);
2440		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2441		    "%s: phymap inactive, exiting", __func__);
2442		return;
2443	}
2444
2445	mutex_exit(&pwp->lock);
2446
2447	/*
2448	 * If no iports have attached, but we have PHYs that are up, we
2449	 * are waiting for iport attach to complete.  Restart discovery.
2450	 */
2451	rw_enter(&pwp->iports_lock, RW_READER);
2452	if (!pwp->iports_attached) {
2453		rw_exit(&pwp->iports_lock);
2454		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2455		    "%s: no iports attached, retry discovery", __func__);
2456		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2457		return;
2458	}
2459	rw_exit(&pwp->iports_lock);
2460
2461	mutex_enter(&pwp->config_lock);
2462	if (pwp->configuring) {
2463		mutex_exit(&pwp->config_lock);
2464		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2465		    "%s: configuration already in progress", __func__);
2466		return;
2467	}
2468
2469	if (pmcs_acquire_scratch(pwp, B_FALSE)) {
2470		mutex_exit(&pwp->config_lock);
2471		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2472		    "%s: cannot allocate scratch", __func__);
2473		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2474		return;
2475	}
2476
2477	pwp->configuring = 1;
2478	pwp->config_changed = B_FALSE;
2479	mutex_exit(&pwp->config_lock);
2480
2481	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL, "Discovery begin");
2482
2483	/*
2484	 * First, tell SCSA that we're beginning set operations.
2485	 */
2486	pmcs_begin_observations(pwp);
2487
2488	/*
2489	 * The order of the following traversals is important.
2490	 *
2491	 * The first one checks for changed expanders.
2492	 *
2493	 * The second one aborts commands for dead devices and deregisters them.
2494	 *
2495	 * The third one clears the contents of dead expanders from the tree
2496	 *
2497	 * The fourth one clears now dead devices in expanders that remain.
2498	 */
2499
2500	/*
2501	 * 1. Check expanders marked changed (but not dead) to see if they still
2502	 * have the same number of phys and the same SAS address. Mark them,
2503	 * their subsidiary phys (if wide) and their descendents dead if
2504	 * anything has changed. Check the devices they contain to see if
2505	 * *they* have changed. If they've changed from type NOTHING we leave
2506	 * them marked changed to be configured later (picking up a new SAS
2507	 * address and link rate if possible). Otherwise, any change in type,
2508	 * SAS address or removal of target role will cause us to mark them
2509	 * (and their descendents) as dead (and cause any pending commands
2510	 * and associated devices to be removed).
2511	 *
2512	 * NOTE: We don't want to bail on discovery if the config has
2513	 * changed until *after* we run pmcs_kill_devices.
2514	 */
2515	root_phy = pwp->root_phys;
2516	pmcs_check_expanders(pwp, root_phy);
2517
2518	/*
2519	 * 2. Descend the tree looking for dead devices and kill them
2520	 * by aborting all active commands and then deregistering them.
2521	 */
2522	if (pmcs_kill_devices(pwp, root_phy)) {
2523		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2524		    "%s: pmcs_kill_devices failed!", __func__);
2525	}
2526
2527	/*
2528	 * 3. Check for dead expanders and remove their children from the tree.
2529	 * By the time we get here, the devices and commands for them have
2530	 * already been terminated and removed.
2531	 *
2532	 * We do this independent of the configuration count changing so we can
2533	 * free any dead device PHYs that were discovered while checking
2534	 * expanders. We ignore any subsidiary phys as pmcs_clear_expander
2535	 * will take care of those.
2536	 *
2537	 * NOTE: pmcs_clear_expander requires softstate lock
2538	 */
2539	mutex_enter(&pwp->lock);
2540	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
2541		/*
2542		 * Call pmcs_clear_expander for every root PHY.  It will
2543		 * recurse and determine which (if any) expanders actually
2544		 * need to be cleared.
2545		 */
2546		pmcs_lock_phy(pptr);
2547		pmcs_clear_expander(pwp, pptr, 0);
2548		pmcs_unlock_phy(pptr);
2549	}
2550	mutex_exit(&pwp->lock);
2551
2552	/*
2553	 * 4. Check for dead devices and nullify them. By the time we get here,
2554	 * the devices and commands for them have already been terminated
2555	 * and removed. This is different from step 2 in that this just nulls
2556	 * phys that are part of expanders that are still here but used to
2557	 * be something but are no longer something (e.g., after a pulled
2558	 * disk drive). Note that dead expanders had their contained phys
2559	 * removed from the tree- here, the expanders themselves are
2560	 * nullified (unless they were removed by being contained in another
2561	 * expander phy).
2562	 */
2563	pmcs_clear_phys(pwp, root_phy);
2564
2565	/*
2566	 * 5. Now check for and configure new devices.
2567	 */
2568	if (pmcs_configure_new_devices(pwp, root_phy)) {
2569		goto restart;
2570	}
2571
2572out:
2573	DTRACE_PROBE2(pmcs__discover__exit, ulong_t, pwp->work_flags,
2574	    boolean_t, pwp->config_changed);
2575	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL, "Discovery end");
2576
2577	mutex_enter(&pwp->config_lock);
2578
2579	if (pwp->config_changed == B_FALSE) {
2580		/*
2581		 * Observation is stable, report what we currently see to
2582		 * the tgtmaps for delta processing. Start by setting
2583		 * BEGIN on all tgtmaps.
2584		 */
2585		mutex_exit(&pwp->config_lock);
2586		if (pmcs_report_observations(pwp) == B_FALSE) {
2587			goto restart;
2588		}
2589		mutex_enter(&pwp->config_lock);
2590	} else {
2591		/*
2592		 * If config_changed is TRUE, we need to reschedule
2593		 * discovery now.
2594		 */
2595		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2596		    "%s: Config has changed, will re-run discovery", __func__);
2597		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2598	}
2599
2600	pmcs_release_scratch(pwp);
2601	if (!pwp->quiesced) {
2602		pwp->blocked = 0;
2603	}
2604	pwp->configuring = 0;
2605	cv_signal(&pwp->config_cv);
2606	mutex_exit(&pwp->config_lock);
2607
2608#ifdef DEBUG
2609	pptr = pmcs_find_phy_needing_work(pwp, pwp->root_phys);
2610	if (pptr != NULL) {
2611		if (!WORK_IS_SCHEDULED(pwp, PMCS_WORK_DISCOVER)) {
2612			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
2613			    "PHY %s dead=%d changed=%d configured=%d "
2614			    "but no work scheduled", pptr->path, pptr->dead,
2615			    pptr->changed, pptr->configured);
2616		}
2617		pmcs_unlock_phy(pptr);
2618	}
2619#endif
2620
2621	return;
2622
2623restart:
2624	/* Clean up and restart discovery */
2625	pmcs_release_scratch(pwp);
2626	pmcs_flush_observations(pwp);
2627	mutex_enter(&pwp->config_lock);
2628	pwp->configuring = 0;
2629	cv_signal(&pwp->config_cv);
2630	RESTART_DISCOVERY_LOCKED(pwp);
2631	mutex_exit(&pwp->config_lock);
2632}
2633
2634/*
2635 * Return any PHY that needs to have scheduled work done.  The PHY is returned
2636 * locked.
2637 */
2638static pmcs_phy_t *
2639pmcs_find_phy_needing_work(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2640{
2641	pmcs_phy_t *cphyp, *pnext;
2642
2643	while (pptr) {
2644		pmcs_lock_phy(pptr);
2645
2646		if (pptr->changed || (pptr->dead && pptr->valid_device_id)) {
2647			return (pptr);
2648		}
2649
2650		pnext = pptr->sibling;
2651
2652		if (pptr->children) {
2653			cphyp = pptr->children;
2654			pmcs_unlock_phy(pptr);
2655			cphyp = pmcs_find_phy_needing_work(pwp, cphyp);
2656			if (cphyp) {
2657				return (cphyp);
2658			}
2659		} else {
2660			pmcs_unlock_phy(pptr);
2661		}
2662
2663		pptr = pnext;
2664	}
2665
2666	return (NULL);
2667}
2668
2669/*
2670 * We may (or may not) report observations to SCSA.  This is prefaced by
2671 * issuing a set_begin for each iport target map.
2672 */
2673static void
2674pmcs_begin_observations(pmcs_hw_t *pwp)
2675{
2676	pmcs_iport_t		*iport;
2677	scsi_hba_tgtmap_t	*tgtmap;
2678
2679	rw_enter(&pwp->iports_lock, RW_READER);
2680	for (iport = list_head(&pwp->iports); iport != NULL;
2681	    iport = list_next(&pwp->iports, iport)) {
2682		/*
2683		 * Unless we have at least one phy up, skip this iport.
2684		 * Note we don't need to lock the iport for report_skip
2685		 * since it is only used here.  We are doing the skip so that
2686		 * the phymap and iportmap stabilization times are honored -
2687		 * giving us the ability to recover port operation within the
2688		 * stabilization time without unconfiguring targets using the
2689		 * port.
2690		 */
2691		if (!sas_phymap_uahasphys(pwp->hss_phymap, iport->ua)) {
2692			iport->report_skip = 1;
2693			continue;		/* skip set_begin */
2694		}
2695		iport->report_skip = 0;
2696
2697		tgtmap = iport->iss_tgtmap;
2698		ASSERT(tgtmap);
2699		if (scsi_hba_tgtmap_set_begin(tgtmap) != DDI_SUCCESS) {
2700			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL,
2701			    "%s: cannot set_begin tgtmap ", __func__);
2702			rw_exit(&pwp->iports_lock);
2703			return;
2704		}
2705		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL,
2706		    "%s: set begin on tgtmap [0x%p]", __func__, (void *)tgtmap);
2707	}
2708	rw_exit(&pwp->iports_lock);
2709}
2710
2711/*
2712 * Tell SCSA to flush the observations we've already sent (if any), as they
2713 * are no longer valid.
2714 */
2715static void
2716pmcs_flush_observations(pmcs_hw_t *pwp)
2717{
2718	pmcs_iport_t		*iport;
2719	scsi_hba_tgtmap_t	*tgtmap;
2720
2721	rw_enter(&pwp->iports_lock, RW_READER);
2722	for (iport = list_head(&pwp->iports); iport != NULL;
2723	    iport = list_next(&pwp->iports, iport)) {
2724		/*
2725		 * Skip this iport if it has no PHYs up.
2726		 */
2727		if (!sas_phymap_uahasphys(pwp->hss_phymap, iport->ua)) {
2728			continue;
2729		}
2730
2731		tgtmap = iport->iss_tgtmap;
2732		ASSERT(tgtmap);
2733		if (scsi_hba_tgtmap_set_flush(tgtmap) != DDI_SUCCESS) {
2734			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL,
2735			    "%s: Failed set_flush on tgtmap 0x%p", __func__,
2736			    (void *)tgtmap);
2737		} else {
2738			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL,
2739			    "%s: set flush on tgtmap 0x%p", __func__,
2740			    (void *)tgtmap);
2741		}
2742	}
2743	rw_exit(&pwp->iports_lock);
2744}
2745
2746/*
2747 * Report current observations to SCSA.
2748 */
2749static boolean_t
2750pmcs_report_observations(pmcs_hw_t *pwp)
2751{
2752	pmcs_iport_t		*iport;
2753	scsi_hba_tgtmap_t	*tgtmap;
2754	char			*ap;
2755	pmcs_phy_t		*pptr;
2756	uint64_t		wwn;
2757
2758	/*
2759	 * Observation is stable, report what we currently see to the tgtmaps
2760	 * for delta processing.
2761	 */
2762	pptr = pwp->root_phys;
2763
2764	while (pptr) {
2765		pmcs_lock_phy(pptr);
2766
2767		/*
2768		 * Skip PHYs that have nothing attached or are dead.
2769		 */
2770		if ((pptr->dtype == NOTHING) || pptr->dead) {
2771			pmcs_unlock_phy(pptr);
2772			pptr = pptr->sibling;
2773			continue;
2774		}
2775
2776		if (pptr->changed) {
2777			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
2778			    "%s: oops, PHY %s changed; restart discovery",
2779			    __func__, pptr->path);
2780			pmcs_unlock_phy(pptr);
2781			return (B_FALSE);
2782		}
2783
2784		/*
2785		 * Get the iport for this root PHY, then call the helper
2786		 * to report observations for this iport's targets
2787		 */
2788		wwn = pmcs_barray2wwn(pptr->sas_address);
2789		pmcs_unlock_phy(pptr);
2790		iport = pmcs_get_iport_by_wwn(pwp, wwn);
2791		if (iport == NULL) {
2792			/* No iport for this tgt */
2793			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
2794			    "%s: no iport for this target", __func__);
2795			pptr = pptr->sibling;
2796			continue;
2797		}
2798
2799		pmcs_lock_phy(pptr);
2800		if (!iport->report_skip) {
2801			if (pmcs_report_iport_observations(
2802			    pwp, iport, pptr) == B_FALSE) {
2803				pmcs_rele_iport(iport);
2804				pmcs_unlock_phy(pptr);
2805				return (B_FALSE);
2806			}
2807		}
2808		pmcs_rele_iport(iport);
2809		pmcs_unlock_phy(pptr);
2810		pptr = pptr->sibling;
2811	}
2812
2813	/*
2814	 * The observation is complete, end sets. Note we will skip any
2815	 * iports that are active, but have no PHYs in them (i.e. awaiting
2816	 * unconfigure). Set to restart discovery if we find this.
2817	 */
2818	rw_enter(&pwp->iports_lock, RW_READER);
2819	for (iport = list_head(&pwp->iports);
2820	    iport != NULL;
2821	    iport = list_next(&pwp->iports, iport)) {
2822
2823		if (iport->report_skip)
2824			continue;		/* skip set_end */
2825
2826		tgtmap = iport->iss_tgtmap;
2827		ASSERT(tgtmap);
2828		if (scsi_hba_tgtmap_set_end(tgtmap, 0) != DDI_SUCCESS) {
2829			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL,
2830			    "%s: cannot set_end tgtmap ", __func__);
2831			rw_exit(&pwp->iports_lock);
2832			return (B_FALSE);
2833		}
2834		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, NULL, NULL,
2835		    "%s: set end on tgtmap [0x%p]", __func__, (void *)tgtmap);
2836	}
2837
2838	/*
2839	 * Now that discovery is complete, set up the necessary
2840	 * DDI properties on each iport node.
2841	 */
2842	for (iport = list_head(&pwp->iports); iport != NULL;
2843	    iport = list_next(&pwp->iports, iport)) {
2844		/* Set up the 'attached-port' property on the iport */
2845		ap = kmem_zalloc(PMCS_MAX_UA_SIZE, KM_SLEEP);
2846		mutex_enter(&iport->lock);
2847		pptr = iport->pptr;
2848		mutex_exit(&iport->lock);
2849		if (pptr == NULL) {
2850			/*
2851			 * This iport is down, but has not been
2852			 * removed from our list (unconfigured).
2853			 * Set our value to '0'.
2854			 */
2855			(void) snprintf(ap, 1, "%s", "0");
2856		} else {
2857			/* Otherwise, set it to remote phy's wwn */
2858			pmcs_lock_phy(pptr);
2859			wwn = pmcs_barray2wwn(pptr->sas_address);
2860			(void) scsi_wwn_to_wwnstr(wwn, 1, ap);
2861			pmcs_unlock_phy(pptr);
2862		}
2863		if (ndi_prop_update_string(DDI_DEV_T_NONE, iport->dip,
2864		    SCSI_ADDR_PROP_ATTACHED_PORT, ap) != DDI_SUCCESS) {
2865			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, "%s: Failed "
2866			    "to set prop ("SCSI_ADDR_PROP_ATTACHED_PORT")",
2867			    __func__);
2868		}
2869		kmem_free(ap, PMCS_MAX_UA_SIZE);
2870	}
2871	rw_exit(&pwp->iports_lock);
2872
2873	return (B_TRUE);
2874}
2875
2876/*
2877 * Report observations into a particular iport's target map
2878 *
2879 * Called with phyp (and all descendents) locked
2880 */
2881static boolean_t
2882pmcs_report_iport_observations(pmcs_hw_t *pwp, pmcs_iport_t *iport,
2883    pmcs_phy_t *phyp)
2884{
2885	pmcs_phy_t		*lphyp;
2886	scsi_hba_tgtmap_t	*tgtmap;
2887	scsi_tgtmap_tgt_type_t	tgt_type;
2888	char			*ua;
2889	uint64_t		wwn;
2890
2891	tgtmap = iport->iss_tgtmap;
2892	ASSERT(tgtmap);
2893
2894	lphyp = phyp;
2895	while (lphyp) {
2896		switch (lphyp->dtype) {
2897		default:		/* Skip unknown PHYs. */
2898			/* for non-root phys, skip to sibling */
2899			goto next_phy;
2900
2901		case SATA:
2902		case SAS:
2903			tgt_type = SCSI_TGT_SCSI_DEVICE;
2904			break;
2905
2906		case EXPANDER:
2907			tgt_type = SCSI_TGT_SMP_DEVICE;
2908			break;
2909		}
2910
2911		if (lphyp->dead || !lphyp->configured) {
2912			goto next_phy;
2913		}
2914
2915		/*
2916		 * Validate the PHY's SAS address
2917		 */
2918		if (((lphyp->sas_address[0] & 0xf0) >> 4) != NAA_IEEE_REG) {
2919			pmcs_prt(pwp, PMCS_PRT_ERR, lphyp, NULL,
2920			    "PHY 0x%p (%s) has invalid SAS address; "
2921			    "will not enumerate", (void *)lphyp, lphyp->path);
2922			goto next_phy;
2923		}
2924
2925		wwn = pmcs_barray2wwn(lphyp->sas_address);
2926		ua = scsi_wwn_to_wwnstr(wwn, 1, NULL);
2927
2928		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, lphyp, NULL,
2929		    "iport_observation: adding %s on tgtmap [0x%p] phy [0x%p]",
2930		    ua, (void *)tgtmap, (void*)lphyp);
2931
2932		if (scsi_hba_tgtmap_set_add(tgtmap, tgt_type, ua, NULL) !=
2933		    DDI_SUCCESS) {
2934			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,  NULL, NULL,
2935			    "%s: failed to add address %s", __func__, ua);
2936			scsi_free_wwnstr(ua);
2937			return (B_FALSE);
2938		}
2939		scsi_free_wwnstr(ua);
2940
2941		if (lphyp->children) {
2942			if (pmcs_report_iport_observations(pwp, iport,
2943			    lphyp->children) == B_FALSE) {
2944				return (B_FALSE);
2945			}
2946		}
2947
2948		/* for non-root phys, report siblings too */
2949next_phy:
2950		if (IS_ROOT_PHY(lphyp)) {
2951			lphyp = NULL;
2952		} else {
2953			lphyp = lphyp->sibling;
2954		}
2955	}
2956
2957	return (B_TRUE);
2958}
2959
2960/*
2961 * Check for and configure new devices.
2962 *
2963 * If the changed device is a SATA device, add a SATA device.
2964 *
2965 * If the changed device is a SAS device, add a SAS device.
2966 *
2967 * If the changed device is an EXPANDER device, do a REPORT
2968 * GENERAL SMP command to find out the number of contained phys.
2969 *
2970 * For each number of contained phys, allocate a phy, do a
2971 * DISCOVERY SMP command to find out what kind of device it
2972 * is and add it to the linked list of phys on the *next* level.
2973 *
2974 * NOTE: pptr passed in by the caller will be a root PHY
2975 */
2976static int
2977pmcs_configure_new_devices(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2978{
2979	int rval = 0;
2980	pmcs_iport_t *iport;
2981	pmcs_phy_t *pnext, *orig_pptr = pptr, *root_phy, *pchild;
2982	uint64_t wwn;
2983
2984	/*
2985	 * First, walk through each PHY at this level
2986	 */
2987	while (pptr) {
2988		pmcs_lock_phy(pptr);
2989		pnext = pptr->sibling;
2990
2991		/*
2992		 * Set the new dtype if it has changed
2993		 */
2994		if ((pptr->pend_dtype != NEW) &&
2995		    (pptr->pend_dtype != pptr->dtype)) {
2996			pptr->dtype = pptr->pend_dtype;
2997		}
2998
2999		if (pptr->changed == 0 || pptr->dead || pptr->configured) {
3000			goto next_phy;
3001		}
3002
3003		/* Confirm that this iport is configured */
3004		root_phy = pmcs_get_root_phy(pptr);
3005		wwn = pmcs_barray2wwn(root_phy->sas_address);
3006		pmcs_unlock_phy(pptr);
3007		iport = pmcs_get_iport_by_wwn(pwp, wwn);
3008		if (iport == NULL) {
3009			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL,
3010			    "%s: iport not yet configured, "
3011			    "retry discovery", __func__);
3012			pnext = NULL;
3013			rval = -1;
3014			pmcs_lock_phy(pptr);
3015			goto next_phy;
3016		}
3017
3018		pmcs_lock_phy(pptr);
3019		switch (pptr->dtype) {
3020		case NOTHING:
3021			pptr->changed = 0;
3022			break;
3023		case SATA:
3024		case SAS:
3025			pptr->iport = iport;
3026			pmcs_new_tport(pwp, pptr);
3027			break;
3028		case EXPANDER:
3029			pmcs_configure_expander(pwp, pptr, iport);
3030			break;
3031		}
3032		pmcs_rele_iport(iport);
3033
3034		mutex_enter(&pwp->config_lock);
3035		if (pwp->config_changed) {
3036			mutex_exit(&pwp->config_lock);
3037			pnext = NULL;
3038			goto next_phy;
3039		}
3040		mutex_exit(&pwp->config_lock);
3041
3042next_phy:
3043		pmcs_unlock_phy(pptr);
3044		pptr = pnext;
3045	}
3046
3047	if (rval != 0) {
3048		return (rval);
3049	}
3050
3051	/*
3052	 * Now walk through each PHY again, recalling ourselves if they
3053	 * have children
3054	 */
3055	pptr = orig_pptr;
3056	while (pptr) {
3057		pmcs_lock_phy(pptr);
3058		pnext = pptr->sibling;
3059		pchild = pptr->children;
3060		pmcs_unlock_phy(pptr);
3061
3062		if (pchild) {
3063			rval = pmcs_configure_new_devices(pwp, pchild);
3064			if (rval != 0) {
3065				break;
3066			}
3067		}
3068
3069		pptr = pnext;
3070	}
3071
3072	return (rval);
3073}
3074
3075/*
3076 * Set all phys and descendent phys as changed if changed == B_TRUE, otherwise
3077 * mark them all as not changed.
3078 *
3079 * Called with parent PHY locked.
3080 */
3081void
3082pmcs_set_changed(pmcs_hw_t *pwp, pmcs_phy_t *parent, boolean_t changed,
3083    int level)
3084{
3085	pmcs_phy_t *pptr;
3086
3087	if (level == 0) {
3088		if (changed) {
3089			PHY_CHANGED(pwp, parent);
3090		} else {
3091			parent->changed = 0;
3092		}
3093		if (parent->dtype == EXPANDER && parent->level) {
3094			parent->width = 1;
3095		}
3096		if (parent->children) {
3097			pmcs_set_changed(pwp, parent->children, changed,
3098			    level + 1);
3099		}
3100	} else {
3101		pptr = parent;
3102		while (pptr) {
3103			if (changed) {
3104				PHY_CHANGED(pwp, pptr);
3105			} else {
3106				pptr->changed = 0;
3107			}
3108			if (pptr->dtype == EXPANDER && pptr->level) {
3109				pptr->width = 1;
3110			}
3111			if (pptr->children) {
3112				pmcs_set_changed(pwp, pptr->children, changed,
3113				    level + 1);
3114			}
3115			pptr = pptr->sibling;
3116		}
3117	}
3118}
3119
3120/*
3121 * Take the passed phy mark it and its descendants as dead.
3122 * Fire up reconfiguration to abort commands and bury it.
3123 *
3124 * Called with the parent PHY locked.
3125 */
3126void
3127pmcs_kill_changed(pmcs_hw_t *pwp, pmcs_phy_t *parent, int level)
3128{
3129	pmcs_phy_t *pptr = parent;
3130
3131	while (pptr) {
3132		pptr->link_rate = 0;
3133		pptr->abort_sent = 0;
3134		pptr->abort_pending = 1;
3135		SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
3136		pptr->need_rl_ext = 0;
3137
3138		if (pptr->dead == 0) {
3139			PHY_CHANGED(pwp, pptr);
3140			RESTART_DISCOVERY(pwp);
3141		}
3142
3143		pptr->dead = 1;
3144
3145		if (pptr->children) {
3146			pmcs_kill_changed(pwp, pptr->children, level + 1);
3147		}
3148
3149		/*
3150		 * Only kill siblings at level > 0
3151		 */
3152		if (level == 0) {
3153			return;
3154		}
3155
3156		pptr = pptr->sibling;
3157	}
3158}
3159
3160/*
3161 * Go through every PHY and clear any that are dead (unless they're expanders)
3162 */
3163static void
3164pmcs_clear_phys(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3165{
3166	pmcs_phy_t *pnext, *phyp;
3167
3168	phyp = pptr;
3169	while (phyp) {
3170		if (IS_ROOT_PHY(phyp)) {
3171			pmcs_lock_phy(phyp);
3172		}
3173
3174		if ((phyp->dtype != EXPANDER) && phyp->dead) {
3175			pmcs_clear_phy(pwp, phyp);
3176		}
3177
3178		if (phyp->children) {
3179			pmcs_clear_phys(pwp, phyp->children);
3180		}
3181
3182		pnext = phyp->sibling;
3183
3184		if (IS_ROOT_PHY(phyp)) {
3185			pmcs_unlock_phy(phyp);
3186		}
3187
3188		phyp = pnext;
3189	}
3190}
3191
3192/*
3193 * Clear volatile parts of a phy.  Called with PHY locked.
3194 */
3195void
3196pmcs_clear_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3197{
3198	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "%s: %s",
3199	    __func__, pptr->path);
3200	ASSERT(mutex_owned(&pptr->phy_lock));
3201	/* keep sibling */
3202	/* keep children */
3203	/* keep parent */
3204	pptr->device_id = PMCS_INVALID_DEVICE_ID;
3205	/* keep hw_event_ack */
3206	pptr->ncphy = 0;
3207	/* keep phynum */
3208	pptr->width = 0;
3209	pptr->ds_recovery_retries = 0;
3210	pptr->ds_prev_good_recoveries = 0;
3211	pptr->last_good_recovery = 0;
3212	pptr->prev_recovery = 0;
3213
3214	/* keep dtype */
3215	pptr->config_stop = 0;
3216	pptr->spinup_hold = 0;
3217	pptr->atdt = 0;
3218	/* keep portid */
3219	pptr->link_rate = 0;
3220	pptr->valid_device_id = 0;
3221	pptr->abort_sent = 0;
3222	pptr->abort_pending = 0;
3223	pptr->need_rl_ext = 0;
3224	pptr->subsidiary = 0;
3225	pptr->configured = 0;
3226	pptr->deregister_wait = 0;
3227	pptr->reenumerate = 0;
3228	/* Only mark dead if it's not a root PHY and its dtype isn't NOTHING */
3229	/* XXX: What about directly attached disks? */
3230	if (!IS_ROOT_PHY(pptr) && (pptr->dtype != NOTHING))
3231		pptr->dead = 1;
3232	pptr->changed = 0;
3233	/* keep SAS address */
3234	/* keep path */
3235	/* keep ref_count */
3236	/* Don't clear iport on root PHYs - they are handled in pmcs_intr.c */
3237	if (!IS_ROOT_PHY(pptr)) {
3238		pptr->last_iport = pptr->iport;
3239		pptr->iport = NULL;
3240	}
3241	/* keep target */
3242}
3243
3244/*
3245 * Allocate softstate for this target if there isn't already one.  If there
3246 * is, just redo our internal configuration.  If it is actually "new", we'll
3247 * soon get a tran_tgt_init for it.
3248 *
3249 * Called with PHY locked.
3250 */
3251static void
3252pmcs_new_tport(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3253{
3254	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "%s: phy 0x%p @ %s",
3255	    __func__, (void *)pptr, pptr->path);
3256
3257	if (pmcs_configure_phy(pwp, pptr) == B_FALSE) {
3258		/*
3259		 * If the config failed, mark the PHY as changed.
3260		 */
3261		PHY_CHANGED(pwp, pptr);
3262		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3263		    "%s: pmcs_configure_phy failed for phy 0x%p", __func__,
3264		    (void *)pptr);
3265		return;
3266	}
3267
3268	/* Mark PHY as no longer changed */
3269	pptr->changed = 0;
3270
3271	/*
3272	 * If the PHY has no target pointer:
3273	 *
3274	 * If it's a root PHY, see if another PHY in the iport holds the
3275	 * target pointer (primary PHY changed).  If so, move it over.
3276	 *
3277	 * If it's not a root PHY, see if there's a PHY on the dead_phys
3278	 * list that matches.
3279	 */
3280	if (pptr->target == NULL) {
3281		if (IS_ROOT_PHY(pptr)) {
3282			pmcs_phy_t *rphy = pwp->root_phys;
3283
3284			while (rphy) {
3285				if (rphy == pptr) {
3286					rphy = rphy->sibling;
3287					continue;
3288				}
3289
3290				mutex_enter(&rphy->phy_lock);
3291				if ((rphy->iport == pptr->iport) &&
3292				    (rphy->target != NULL)) {
3293					mutex_enter(&rphy->target->statlock);
3294					pptr->target = rphy->target;
3295					rphy->target = NULL;
3296					pptr->target->phy = pptr;
3297					/* The target is now on pptr */
3298					mutex_exit(&pptr->target->statlock);
3299					mutex_exit(&rphy->phy_lock);
3300					pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3301					    pptr, pptr->target,
3302					    "%s: Moved target from %s to %s",
3303					    __func__, rphy->path, pptr->path);
3304					break;
3305				}
3306				mutex_exit(&rphy->phy_lock);
3307
3308				rphy = rphy->sibling;
3309			}
3310		} else {
3311			pmcs_reap_dead_phy(pptr);
3312		}
3313	}
3314
3315	/*
3316	 * Only assign the device if there is a target for this PHY with a
3317	 * matching SAS address.  If an iport is disconnected from one piece
3318	 * of storage and connected to another within the iport stabilization
3319	 * time, we can get the PHY/target mismatch situation.
3320	 *
3321	 * Otherwise, it'll get done in tran_tgt_init.
3322	 */
3323	if (pptr->target) {
3324		mutex_enter(&pptr->target->statlock);
3325		if (pmcs_phy_target_match(pptr) == B_FALSE) {
3326			mutex_exit(&pptr->target->statlock);
3327			if (!IS_ROOT_PHY(pptr)) {
3328				pmcs_dec_phy_ref_count(pptr);
3329			}
3330			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
3331			    "%s: Not assigning existing tgt %p for PHY %p "
3332			    "(WWN mismatch)", __func__, (void *)pptr->target,
3333			    (void *)pptr);
3334			pptr->target = NULL;
3335			return;
3336		}
3337
3338		if (!pmcs_assign_device(pwp, pptr->target)) {
3339			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, pptr->target,
3340			    "%s: pmcs_assign_device failed for target 0x%p",
3341			    __func__, (void *)pptr->target);
3342		}
3343		mutex_exit(&pptr->target->statlock);
3344	}
3345}
3346
3347/*
3348 * Called with PHY lock held.
3349 */
3350static boolean_t
3351pmcs_configure_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3352{
3353	char *dtype;
3354
3355	ASSERT(mutex_owned(&pptr->phy_lock));
3356
3357	/*
3358	 * Mark this device as no longer changed.
3359	 */
3360	pptr->changed = 0;
3361
3362	/*
3363	 * If we don't have a device handle, get one.
3364	 */
3365	if (pmcs_get_device_handle(pwp, pptr)) {
3366		return (B_FALSE);
3367	}
3368
3369	pptr->configured = 1;
3370
3371	switch (pptr->dtype) {
3372	case SAS:
3373		dtype = "SAS";
3374		break;
3375	case SATA:
3376		dtype = "SATA";
3377		break;
3378	case EXPANDER:
3379		dtype = "SMP";
3380		break;
3381	default:
3382		dtype = "???";
3383	}
3384
3385	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "config_dev: %s "
3386	    "dev %s " SAS_ADDR_FMT " dev id 0x%x lr 0x%x", dtype, pptr->path,
3387	    SAS_ADDR_PRT(pptr->sas_address), pptr->device_id, pptr->link_rate);
3388
3389	return (B_TRUE);
3390}
3391
3392/*
3393 * Called with PHY locked
3394 */
3395static void
3396pmcs_configure_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr, pmcs_iport_t *iport)
3397{
3398	pmcs_phy_t *ctmp, *clist = NULL, *cnext;
3399	int result, i, nphy = 0;
3400	boolean_t root_phy = B_FALSE;
3401
3402	ASSERT(iport);
3403
3404	/*
3405	 * Step 1- clear our "changed" bit. If we need to retry/restart due
3406	 * to resource shortages, we'll set it again. While we're doing
3407	 * configuration, other events may set it again as well.  If the PHY
3408	 * is a root PHY and is currently marked as having changed, reset the
3409	 * config_stop timer as well.
3410	 */
3411	if (IS_ROOT_PHY(pptr) && pptr->changed) {
3412		pptr->config_stop = ddi_get_lbolt() +
3413		    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3414	}
3415	pptr->changed = 0;
3416
3417	/*
3418	 * Step 2- make sure we don't overflow
3419	 */
3420	if (pptr->level == PMCS_MAX_XPND-1) {
3421		pmcs_prt(pwp, PMCS_PRT_WARN, pptr, NULL,
3422		    "%s: SAS expansion tree too deep", __func__);
3423		return;
3424	}
3425
3426	/*
3427	 * Step 3- Check if this expander is part of a wide phy that has
3428	 * already been configured.
3429	 *
3430	 * This is known by checking this level for another EXPANDER device
3431	 * with the same SAS address and isn't already marked as a subsidiary
3432	 * phy and a parent whose SAS address is the same as our SAS address
3433	 * (if there are parents).
3434	 */
3435	if (!IS_ROOT_PHY(pptr)) {
3436		/*
3437		 * No need to lock the parent here because we're in discovery
3438		 * and the only time a PHY's children pointer can change is
3439		 * in discovery; either in pmcs_clear_expander (which has
3440		 * already been called) or here, down below.  Plus, trying to
3441		 * grab the parent's lock here can cause deadlock.
3442		 */
3443		ctmp = pptr->parent->children;
3444	} else {
3445		ctmp = pwp->root_phys;
3446		root_phy = B_TRUE;
3447	}
3448
3449	while (ctmp) {
3450		/*
3451		 * If we've checked all PHYs up to pptr, we stop. Otherwise,
3452		 * we'll be checking for a primary PHY with a higher PHY
3453		 * number than pptr, which will never happen.  The primary
3454		 * PHY on non-root expanders will ALWAYS be the lowest
3455		 * numbered PHY.
3456		 */
3457		if (ctmp == pptr) {
3458			break;
3459		}
3460
3461		/*
3462		 * If pptr and ctmp are root PHYs, just grab the mutex on
3463		 * ctmp.  No need to lock the entire tree.  If they are not
3464		 * root PHYs, there is no need to lock since a non-root PHY's
3465		 * SAS address and other characteristics can only change in
3466		 * discovery anyway.
3467		 */
3468		if (root_phy) {
3469			mutex_enter(&ctmp->phy_lock);
3470		}
3471
3472		if (ctmp->dtype == EXPANDER && ctmp->width &&
3473		    memcmp(ctmp->sas_address, pptr->sas_address, 8) == 0) {
3474			int widephy = 0;
3475			/*
3476			 * If these phys are not root PHYs, compare their SAS
3477			 * addresses too.
3478			 */
3479			if (!root_phy) {
3480				if (memcmp(ctmp->parent->sas_address,
3481				    pptr->parent->sas_address, 8) == 0) {
3482					widephy = 1;
3483				}
3484			} else {
3485				widephy = 1;
3486			}
3487			if (widephy) {
3488				ctmp->width++;
3489				pptr->subsidiary = 1;
3490
3491				/*
3492				 * Update the primary PHY's attached-port-pm
3493				 * and target-port-pm information with the info
3494				 * from this subsidiary
3495				 */
3496				pmcs_update_phy_pm_props(ctmp,
3497				    pptr->att_port_pm_tmp,
3498				    pptr->tgt_port_pm_tmp, B_TRUE);
3499
3500				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3501				    "%s: PHY %s part of wide PHY %s "
3502				    "(now %d wide)", __func__, pptr->path,
3503				    ctmp->path, ctmp->width);
3504				if (root_phy) {
3505					mutex_exit(&ctmp->phy_lock);
3506				}
3507				return;
3508			}
3509		}
3510
3511		cnext = ctmp->sibling;
3512		if (root_phy) {
3513			mutex_exit(&ctmp->phy_lock);
3514		}
3515		ctmp = cnext;
3516	}
3517
3518	/*
3519	 * Step 4- If we don't have a device handle, get one.  Since this
3520	 * is the primary PHY, make sure subsidiary is cleared.
3521	 */
3522	pptr->subsidiary = 0;
3523	pptr->iport = iport;
3524	if (pmcs_get_device_handle(pwp, pptr)) {
3525		goto out;
3526	}
3527	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "Config expander %s "
3528	    SAS_ADDR_FMT " dev id 0x%x lr 0x%x", pptr->path,
3529	    SAS_ADDR_PRT(pptr->sas_address), pptr->device_id, pptr->link_rate);
3530
3531	/*
3532	 * Step 5- figure out how many phys are in this expander.
3533	 */
3534	nphy = pmcs_expander_get_nphy(pwp, pptr);
3535	if (nphy <= 0) {
3536		if (nphy == 0 && ddi_get_lbolt() < pptr->config_stop) {
3537			PHY_CHANGED(pwp, pptr);
3538			RESTART_DISCOVERY(pwp);
3539		} else {
3540			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3541			    "%s: Retries exhausted for %s, killing", __func__,
3542			    pptr->path);
3543			pptr->config_stop = 0;
3544			pmcs_kill_changed(pwp, pptr, 0);
3545		}
3546		goto out;
3547	}
3548
3549	/*
3550	 * Step 6- Allocate a list of phys for this expander and figure out
3551	 * what each one is.
3552	 */
3553	for (i = 0; i < nphy; i++) {
3554		ctmp = kmem_cache_alloc(pwp->phy_cache, KM_SLEEP);
3555		bzero(ctmp, sizeof (pmcs_phy_t));
3556		ctmp->device_id = PMCS_INVALID_DEVICE_ID;
3557		ctmp->sibling = clist;
3558		ctmp->pend_dtype = NEW;	/* Init pending dtype */
3559		ctmp->config_stop = ddi_get_lbolt() +
3560		    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3561		clist = ctmp;
3562	}
3563
3564	mutex_enter(&pwp->config_lock);
3565	if (pwp->config_changed) {
3566		RESTART_DISCOVERY_LOCKED(pwp);
3567		mutex_exit(&pwp->config_lock);
3568		/*
3569		 * Clean up the newly allocated PHYs and return
3570		 */
3571		while (clist) {
3572			ctmp = clist->sibling;
3573			clist->target_addr = NULL;
3574			kmem_cache_free(pwp->phy_cache, clist);
3575			clist = ctmp;
3576		}
3577		return;
3578	}
3579	mutex_exit(&pwp->config_lock);
3580
3581	/*
3582	 * Step 7- Now fill in the rest of the static portions of the phy.
3583	 */
3584	for (i = 0, ctmp = clist; ctmp; ctmp = ctmp->sibling, i++) {
3585		ctmp->parent = pptr;
3586		ctmp->pwp = pwp;
3587		ctmp->level = pptr->level+1;
3588		ctmp->portid = pptr->portid;
3589		if (ctmp->tolerates_sas2) {
3590			ASSERT(i < SAS2_PHYNUM_MAX);
3591			ctmp->phynum = i & SAS2_PHYNUM_MASK;
3592		} else {
3593			ASSERT(i < SAS_PHYNUM_MAX);
3594			ctmp->phynum = i & SAS_PHYNUM_MASK;
3595		}
3596		pmcs_phy_name(pwp, ctmp, ctmp->path, sizeof (ctmp->path));
3597		pmcs_lock_phy(ctmp);
3598	}
3599
3600	/*
3601	 * Step 8- Discover things about each phy in the expander.
3602	 */
3603	for (i = 0, ctmp = clist; ctmp; ctmp = ctmp->sibling, i++) {
3604		result = pmcs_expander_content_discover(pwp, pptr, ctmp);
3605		if (result <= 0) {
3606			if (ddi_get_lbolt() < pptr->config_stop) {
3607				PHY_CHANGED(pwp, pptr);
3608				RESTART_DISCOVERY(pwp);
3609			} else {
3610				pptr->config_stop = 0;
3611				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3612				    "%s: Retries exhausted for %s, killing",
3613				    __func__, pptr->path);
3614				pmcs_kill_changed(pwp, pptr, 0);
3615			}
3616			goto out;
3617		}
3618
3619		/* Set pend_dtype to dtype for 1st time initialization */
3620		ctmp->pend_dtype = ctmp->dtype;
3621	}
3622
3623	/*
3624	 * Step 9: Install the new list on the next level. There should
3625	 * typically be no children pointer on this PHY.  There is one known
3626	 * case where this can happen, though.  If a root PHY goes down and
3627	 * comes back up before discovery can run, we will fail to remove the
3628	 * children from that PHY since it will no longer be marked dead.
3629	 * However, in this case, all children should also be marked dead.  If
3630	 * we see that, take those children and put them on the dead_phys list.
3631	 */
3632	if (pptr->children != NULL) {
3633		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
3634		    "%s: Expander @ %s still has children: Clean up",
3635		    __func__, pptr->path);
3636		pmcs_add_dead_phys(pwp, pptr->children);
3637	}
3638
3639	/*
3640	 * Set the new children pointer for this expander
3641	 */
3642	pptr->children = clist;
3643	clist = NULL;
3644	pptr->ncphy = nphy;
3645	pptr->configured = 1;
3646
3647	/*
3648	 * We only set width if we're greater than level 0.
3649	 */
3650	if (pptr->level) {
3651		pptr->width = 1;
3652	}
3653
3654	/*
3655	 * Now tell the rest of the world about us, as an SMP node.
3656	 */
3657	pptr->iport = iport;
3658	pmcs_new_tport(pwp, pptr);
3659
3660out:
3661	while (clist) {
3662		ctmp = clist->sibling;
3663		pmcs_unlock_phy(clist);
3664		clist->target_addr = NULL;
3665		kmem_cache_free(pwp->phy_cache, clist);
3666		clist = ctmp;
3667	}
3668}
3669
3670/*
3671 * 2. Check expanders marked changed (but not dead) to see if they still have
3672 * the same number of phys and the same SAS address. Mark them, their subsidiary
3673 * phys (if wide) and their descendents dead if anything has changed. Check the
3674 * the devices they contain to see if *they* have changed. If they've changed
3675 * from type NOTHING we leave them marked changed to be configured later
3676 * (picking up a new SAS address and link rate if possible). Otherwise, any
3677 * change in type, SAS address or removal of target role will cause us to
3678 * mark them (and their descendents) as dead and cause any pending commands
3679 * and associated devices to be removed.
3680 *
3681 * Called with PHY (pptr) locked.
3682 */
3683
3684static void
3685pmcs_check_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3686{
3687	int nphy, result;
3688	pmcs_phy_t *ctmp, *local, *local_list = NULL, *local_tail = NULL;
3689	boolean_t kill_changed, changed;
3690
3691	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3692	    "%s: check %s", __func__, pptr->path);
3693
3694	/*
3695	 * Step 1: Mark phy as not changed. We will mark it changed if we need
3696	 * to retry.
3697	 */
3698	pptr->changed = 0;
3699
3700	/*
3701	 * Reset the config_stop time. Although we're not actually configuring
3702	 * anything here, we do want some indication of when to give up trying
3703	 * if we can't communicate with the expander.
3704	 */
3705	pptr->config_stop = ddi_get_lbolt() +
3706	    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3707
3708	/*
3709	 * Step 2: Figure out how many phys are in this expander. If
3710	 * pmcs_expander_get_nphy returns 0 we ran out of resources,
3711	 * so reschedule and try later. If it returns another error,
3712	 * just return.
3713	 */
3714	nphy = pmcs_expander_get_nphy(pwp, pptr);
3715	if (nphy <= 0) {
3716		if ((nphy == 0) && (ddi_get_lbolt() < pptr->config_stop)) {
3717			PHY_CHANGED(pwp, pptr);
3718			RESTART_DISCOVERY(pwp);
3719		} else {
3720			pptr->config_stop = 0;
3721			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3722			    "%s: Retries exhausted for %s, killing", __func__,
3723			    pptr->path);
3724			pmcs_kill_changed(pwp, pptr, 0);
3725		}
3726		return;
3727	}
3728
3729	/*
3730	 * Step 3: If the number of phys don't agree, kill the old sub-tree.
3731	 */
3732	if (nphy != pptr->ncphy) {
3733		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3734		    "%s: number of contained phys for %s changed from %d to %d",
3735		    __func__, pptr->path, pptr->ncphy, nphy);
3736		/*
3737		 * Force a rescan of this expander after dead contents
3738		 * are cleared and removed.
3739		 */
3740		pmcs_kill_changed(pwp, pptr, 0);
3741		return;
3742	}
3743
3744	/*
3745	 * Step 4: if we're at the bottom of the stack, we're done
3746	 * (we can't have any levels below us)
3747	 */
3748	if (pptr->level == PMCS_MAX_XPND-1) {
3749		return;
3750	}
3751
3752	/*
3753	 * Step 5: Discover things about each phy in this expander.  We do
3754	 * this by walking the current list of contained phys and doing a
3755	 * content discovery for it to a local phy.
3756	 */
3757	ctmp = pptr->children;
3758	ASSERT(ctmp);
3759	if (ctmp == NULL) {
3760		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3761		    "%s: No children attached to expander @ %s?", __func__,
3762		    pptr->path);
3763		return;
3764	}
3765
3766	while (ctmp) {
3767		/*
3768		 * Allocate a local PHY to contain the proposed new contents
3769		 * and link it to the rest of the local PHYs so that they
3770		 * can all be freed later.
3771		 */
3772		local = pmcs_clone_phy(ctmp);
3773
3774		if (local_list == NULL) {
3775			local_list = local;
3776			local_tail = local;
3777		} else {
3778			local_tail->sibling = local;
3779			local_tail = local;
3780		}
3781
3782		/*
3783		 * Need to lock the local PHY since pmcs_expander_content_
3784		 * discovery may call pmcs_clear_phy on it, which expects
3785		 * the PHY to be locked.
3786		 */
3787		pmcs_lock_phy(local);
3788		result = pmcs_expander_content_discover(pwp, pptr, local);
3789		pmcs_unlock_phy(local);
3790		if (result <= 0) {
3791			if (ddi_get_lbolt() < pptr->config_stop) {
3792				PHY_CHANGED(pwp, pptr);
3793				RESTART_DISCOVERY(pwp);
3794			} else {
3795				pptr->config_stop = 0;
3796				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
3797				    "%s: Retries exhausted for %s, killing",
3798				    __func__, pptr->path);
3799				pmcs_kill_changed(pwp, pptr, 0);
3800			}
3801
3802			/*
3803			 * Release all the local PHYs that we allocated.
3804			 */
3805			pmcs_free_phys(pwp, local_list);
3806			return;
3807		}
3808
3809		ctmp = ctmp->sibling;
3810	}
3811
3812	/*
3813	 * Step 6: Compare the local PHY's contents to our current PHY.  If
3814	 * there are changes, take the appropriate action.
3815	 * This is done in two steps (step 5 above, and 6 here) so that if we
3816	 * have to bail during this process (e.g. pmcs_expander_content_discover
3817	 * fails), we haven't actually changed the state of any of the real
3818	 * PHYs.  Next time we come through here, we'll be starting over from
3819	 * scratch.  This keeps us from marking a changed PHY as no longer
3820	 * changed, but then having to bail only to come back next time and
3821	 * think that the PHY hadn't changed.  If this were to happen, we
3822	 * would fail to properly configure the device behind this PHY.
3823	 */
3824	local = local_list;
3825	ctmp = pptr->children;
3826
3827	while (ctmp) {
3828		changed = B_FALSE;
3829		kill_changed = B_FALSE;
3830
3831		/*
3832		 * We set local to local_list prior to this loop so that we
3833		 * can simply walk the local_list while we walk this list.  The
3834		 * two lists should be completely in sync.
3835		 *
3836		 * Clear the changed flag here.
3837		 */
3838		ctmp->changed = 0;
3839
3840		if (ctmp->dtype != local->dtype) {
3841			if (ctmp->dtype != NOTHING) {
3842				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL,
3843				    "%s: %s type changed from %s to %s "
3844				    "(killing)", __func__, ctmp->path,
3845				    PHY_TYPE(ctmp), PHY_TYPE(local));
3846				/*
3847				 * Force a rescan of this expander after dead
3848				 * contents are cleared and removed.
3849				 */
3850				changed = B_TRUE;
3851				kill_changed = B_TRUE;
3852			} else {
3853				changed = B_TRUE;
3854				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL,
3855				    "%s: %s type changed from NOTHING to %s",
3856				    __func__, ctmp->path, PHY_TYPE(local));
3857				/*
3858				 * Since this PHY was nothing and is now
3859				 * something, reset the config_stop timer.
3860				 */
3861				ctmp->config_stop = ddi_get_lbolt() +
3862				    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3863			}
3864
3865		} else if (ctmp->atdt != local->atdt) {
3866			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL, "%s: "
3867			    "%s attached device type changed from %d to %d "
3868			    "(killing)", __func__, ctmp->path, ctmp->atdt,
3869			    local->atdt);
3870			/*
3871			 * Force a rescan of this expander after dead
3872			 * contents are cleared and removed.
3873			 */
3874			changed = B_TRUE;
3875
3876			if (local->atdt == 0) {
3877				kill_changed = B_TRUE;
3878			}
3879		} else if (ctmp->link_rate != local->link_rate) {
3880			pmcs_prt(pwp, PMCS_PRT_INFO, ctmp, NULL, "%s: %s "
3881			    "changed speed from %s to %s", __func__, ctmp->path,
3882			    pmcs_get_rate(ctmp->link_rate),
3883			    pmcs_get_rate(local->link_rate));
3884			/* If the speed changed from invalid, force rescan */
3885			if (!PMCS_VALID_LINK_RATE(ctmp->link_rate)) {
3886				changed = B_TRUE;
3887				RESTART_DISCOVERY(pwp);
3888			} else {
3889				/* Just update to the new link rate */
3890				ctmp->link_rate = local->link_rate;
3891			}
3892
3893			if (!PMCS_VALID_LINK_RATE(local->link_rate)) {
3894				kill_changed = B_TRUE;
3895			}
3896		} else if (memcmp(ctmp->sas_address, local->sas_address,
3897		    sizeof (ctmp->sas_address)) != 0) {
3898			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL,
3899			    "%s: SAS Addr for %s changed from " SAS_ADDR_FMT
3900			    "to " SAS_ADDR_FMT " (kill old tree)", __func__,
3901			    ctmp->path, SAS_ADDR_PRT(ctmp->sas_address),
3902			    SAS_ADDR_PRT(local->sas_address));
3903			/*
3904			 * Force a rescan of this expander after dead
3905			 * contents are cleared and removed.
3906			 */
3907			changed = B_TRUE;
3908		} else {
3909			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL,
3910			    "%s: %s looks the same (type %s)",
3911			    __func__, ctmp->path, PHY_TYPE(ctmp));
3912			/*
3913			 * If EXPANDER, still mark it changed so we
3914			 * re-evaluate its contents.  If it's not an expander,
3915			 * but it hasn't been configured, also mark it as
3916			 * changed so that it will undergo configuration.
3917			 */
3918			if (ctmp->dtype == EXPANDER) {
3919				changed = B_TRUE;
3920			} else if ((ctmp->dtype != NOTHING) &&
3921			    !ctmp->configured) {
3922				ctmp->changed = 1;
3923			} else {
3924				/* It simply hasn't changed */
3925				ctmp->changed = 0;
3926			}
3927		}
3928
3929		/*
3930		 * If the PHY changed, call pmcs_kill_changed if indicated,
3931		 * update its contents to reflect its current state and mark it
3932		 * as changed.
3933		 */
3934		if (changed) {
3935			/*
3936			 * pmcs_kill_changed will mark the PHY as changed, so
3937			 * only do PHY_CHANGED if we did not do kill_changed.
3938			 */
3939			if (kill_changed) {
3940				pmcs_kill_changed(pwp, ctmp, 0);
3941			} else {
3942				/*
3943				 * If we're not killing the device, it's not
3944				 * dead.  Mark the PHY as changed.
3945				 */
3946				PHY_CHANGED(pwp, ctmp);
3947
3948				if (ctmp->dead) {
3949					pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3950					    ctmp, NULL, "%s: Unmarking PHY %s "
3951					    "dead, restarting discovery",
3952					    __func__, ctmp->path);
3953					ctmp->dead = 0;
3954					RESTART_DISCOVERY(pwp);
3955				}
3956			}
3957
3958			/*
3959			 * If the dtype of this PHY is now NOTHING, mark it as
3960			 * unconfigured.  Set pend_dtype to what the new dtype
3961			 * is.  It'll get updated at the end of the discovery
3962			 * process.
3963			 */
3964			if (local->dtype == NOTHING) {
3965				bzero(ctmp->sas_address,
3966				    sizeof (local->sas_address));
3967				ctmp->atdt = 0;
3968				ctmp->link_rate = 0;
3969				ctmp->pend_dtype = NOTHING;
3970				ctmp->configured = 0;
3971			} else {
3972				(void) memcpy(ctmp->sas_address,
3973				    local->sas_address,
3974				    sizeof (local->sas_address));
3975				ctmp->atdt = local->atdt;
3976				ctmp->link_rate = local->link_rate;
3977				ctmp->pend_dtype = local->dtype;
3978				ctmp->att_port_pm_tmp = local->att_port_pm_tmp;
3979				ctmp->tgt_port_pm_tmp = local->tgt_port_pm_tmp;
3980			}
3981		}
3982
3983		local = local->sibling;
3984		ctmp = ctmp->sibling;
3985	}
3986
3987	/*
3988	 * If we got to here, that means we were able to see all the PHYs
3989	 * and we can now update all of the real PHYs with the information
3990	 * we got on the local PHYs.  Once that's done, free all the local
3991	 * PHYs.
3992	 */
3993
3994	pmcs_free_phys(pwp, local_list);
3995}
3996
3997/*
3998 * Top level routine to check expanders.  We call pmcs_check_expander for
3999 * each expander.  Since we're not doing any configuration right now, it
4000 * doesn't matter if this is breadth-first.
4001 */
4002static void
4003pmcs_check_expanders(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
4004{
4005	pmcs_phy_t *phyp, *pnext, *pchild;
4006
4007	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4008	    "%s: %s", __func__, pptr->path);
4009
4010	/*
4011	 * Check each expander at this level
4012	 */
4013	phyp = pptr;
4014	while (phyp) {
4015		pmcs_lock_phy(phyp);
4016
4017		if ((phyp->dtype == EXPANDER) && phyp->changed &&
4018		    !phyp->dead && !phyp->subsidiary &&
4019		    phyp->configured) {
4020			pmcs_check_expander(pwp, phyp);
4021		}
4022
4023		pnext = phyp->sibling;
4024		pmcs_unlock_phy(phyp);
4025		phyp = pnext;
4026	}
4027
4028	/*
4029	 * Now check the children
4030	 */
4031	phyp = pptr;
4032	while (phyp) {
4033		pmcs_lock_phy(phyp);
4034		pnext = phyp->sibling;
4035		pchild = phyp->children;
4036		pmcs_unlock_phy(phyp);
4037
4038		if (pchild) {
4039			pmcs_check_expanders(pwp, pchild);
4040		}
4041
4042		phyp = pnext;
4043	}
4044}
4045
4046/*
4047 * Called with softstate and PHY locked
4048 */
4049static void
4050pmcs_clear_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr, int level)
4051{
4052	pmcs_phy_t *ctmp;
4053
4054	ASSERT(mutex_owned(&pwp->lock));
4055	ASSERT(mutex_owned(&pptr->phy_lock));
4056	ASSERT(pptr->level < PMCS_MAX_XPND - 1);
4057
4058	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4059	    "%s: checking %s", __func__, pptr->path);
4060
4061	ctmp = pptr->children;
4062	while (ctmp) {
4063		/*
4064		 * If the expander is dead, mark its children dead
4065		 */
4066		if (pptr->dead) {
4067			ctmp->dead = 1;
4068		}
4069		if (ctmp->dtype == EXPANDER) {
4070			pmcs_clear_expander(pwp, ctmp, level + 1);
4071		}
4072		ctmp = ctmp->sibling;
4073	}
4074
4075	/*
4076	 * If this expander is not dead, we're done here.
4077	 */
4078	if (!pptr->dead) {
4079		return;
4080	}
4081
4082	/*
4083	 * Now snip out the list of children below us and release them
4084	 */
4085	if (pptr->children) {
4086		pmcs_add_dead_phys(pwp, pptr->children);
4087	}
4088
4089	pptr->children = NULL;
4090
4091	/*
4092	 * Clear subsidiary phys as well.  Getting the parent's PHY lock
4093	 * is only necessary if level == 0 since otherwise the parent is
4094	 * already locked.
4095	 */
4096	if (!IS_ROOT_PHY(pptr)) {
4097		if (level == 0) {
4098			mutex_enter(&pptr->parent->phy_lock);
4099		}
4100		ctmp = pptr->parent->children;
4101		if (level == 0) {
4102			mutex_exit(&pptr->parent->phy_lock);
4103		}
4104	} else {
4105		ctmp = pwp->root_phys;
4106	}
4107
4108	while (ctmp) {
4109		if (ctmp == pptr) {
4110			ctmp = ctmp->sibling;
4111			continue;
4112		}
4113		/*
4114		 * We only need to lock subsidiary PHYs on the level 0
4115		 * expander.  Any children of that expander, subsidiaries or
4116		 * not, will already be locked.
4117		 */
4118		if (level == 0) {
4119			pmcs_lock_phy(ctmp);
4120		}
4121		if (ctmp->dtype != EXPANDER || ctmp->subsidiary == 0 ||
4122		    memcmp(ctmp->sas_address, pptr->sas_address,
4123		    sizeof (ctmp->sas_address)) != 0) {
4124			if (level == 0) {
4125				pmcs_unlock_phy(ctmp);
4126			}
4127			ctmp = ctmp->sibling;
4128			continue;
4129		}
4130		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, ctmp, NULL,
4131		    "%s: subsidiary %s", __func__, ctmp->path);
4132		pmcs_clear_phy(pwp, ctmp);
4133		if (level == 0) {
4134			pmcs_unlock_phy(ctmp);
4135		}
4136		ctmp = ctmp->sibling;
4137	}
4138
4139	pmcs_clear_phy(pwp, pptr);
4140}
4141
4142/*
4143 * Called with PHY locked and with scratch acquired. We return 0 if
4144 * we fail to allocate resources or notice that the configuration
4145 * count changed while we were running the command. We return
4146 * less than zero if we had an I/O error or received an unsupported
4147 * configuration. Otherwise we return the number of phys in the
4148 * expander.
4149 */
4150#define	DFM(m, y) if (m == NULL) m = y
4151static int
4152pmcs_expander_get_nphy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
4153{
4154	struct pmcwork *pwrk;
4155	pmcs_iport_t *iport;
4156	char buf[64];
4157	const uint_t rdoff = 0x100;	/* returned data offset */
4158	smp_response_frame_t *srf;
4159	smp_report_general_resp_t *srgr;
4160	uint32_t msg[PMCS_MSG_SIZE], *ptr, htag, status, ival;
4161	int result = 0;
4162
4163	ival = 0x40001100;
4164
4165again:
4166	if (!pptr->iport || !pptr->valid_device_id) {
4167		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, pptr->target,
4168		    "%s: Can't reach PHY %s", __func__, pptr->path);
4169		goto out;
4170	}
4171
4172	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
4173	if (pwrk == NULL) {
4174		goto out;
4175	}
4176	(void) memset(pwp->scratch, 0x77, PMCS_SCRATCH_SIZE);
4177	pwrk->arg = pwp->scratch;
4178	pwrk->dtype = pptr->dtype;
4179	pwrk->xp = pptr->target;
4180	pwrk->htag |= PMCS_TAG_NONIO_CMD;
4181	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4182	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4183	if (ptr == NULL) {
4184		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4185		pmcs_prt(pwp, PMCS_PRT_DEBUG2, pptr, NULL,
4186		    "%s: GET_IQ_ENTRY failed", __func__);
4187		pmcs_pwork(pwp, pwrk);
4188		goto out;
4189	}
4190
4191	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_SMP_REQUEST));
4192	msg[1] = LE_32(pwrk->htag);
4193	msg[2] = LE_32(pptr->device_id);
4194	msg[3] = LE_32((4 << SMP_REQUEST_LENGTH_SHIFT) | SMP_INDIRECT_RESPONSE);
4195	/*
4196	 * Send SMP REPORT GENERAL (of either SAS1.1 or SAS2 flavors).
4197	 */
4198	msg[4] = BE_32(ival);
4199	msg[5] = 0;
4200	msg[6] = 0;
4201	msg[7] = 0;
4202	msg[8] = 0;
4203	msg[9] = 0;
4204	msg[10] = 0;
4205	msg[11] = 0;
4206	msg[12] = LE_32(DWORD0(pwp->scratch_dma+rdoff));
4207	msg[13] = LE_32(DWORD1(pwp->scratch_dma+rdoff));
4208	msg[14] = LE_32(PMCS_SCRATCH_SIZE - rdoff);
4209	msg[15] = 0;
4210
4211	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
4212
4213	pmcs_hold_iport(pptr->iport);
4214	iport = pptr->iport;
4215	pmcs_smp_acquire(iport);
4216	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4217	htag = pwrk->htag;
4218	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4219	pmcs_unlock_phy(pptr);
4220	WAIT_FOR(pwrk, 1000, result);
4221	pmcs_pwork(pwp, pwrk);
4222	pmcs_smp_release(iport);
4223	pmcs_rele_iport(iport);
4224	pmcs_lock_phy(pptr);
4225	if (result) {
4226		pmcs_timed_out(pwp, htag, __func__);
4227		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4228		    "%s: Issuing SMP ABORT for htag 0x%08x", __func__, htag);
4229		if (pmcs_abort(pwp, pptr, htag, 0, 1)) {
4230			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4231			    "%s: SMP ABORT failed for cmd (htag 0x%08x)",
4232			    __func__, htag);
4233		}
4234		result = 0;
4235		goto out;
4236	}
4237
4238	mutex_enter(&pwp->config_lock);
4239	if (pwp->config_changed) {
4240		RESTART_DISCOVERY_LOCKED(pwp);
4241		mutex_exit(&pwp->config_lock);
4242		result = 0;
4243		goto out;
4244	}
4245	mutex_exit(&pwp->config_lock);
4246
4247	ptr = (void *)pwp->scratch;
4248	status = LE_32(ptr[2]);
4249	if (status == PMCOUT_STATUS_UNDERFLOW ||
4250	    status == PMCOUT_STATUS_OVERFLOW) {
4251		pmcs_prt(pwp, PMCS_PRT_DEBUG_UNDERFLOW, pptr, NULL,
4252		    "%s: over/underflow", __func__);
4253		status = PMCOUT_STATUS_OK;
4254	}
4255	srf = (smp_response_frame_t *)&((uint32_t *)pwp->scratch)[rdoff >> 2];
4256	srgr = (smp_report_general_resp_t *)
4257	    &((uint32_t *)pwp->scratch)[(rdoff >> 2)+1];
4258
4259	if (status != PMCOUT_STATUS_OK) {
4260		char *nag = NULL;
4261		(void) snprintf(buf, sizeof (buf),
4262		    "%s: SMP op failed (0x%x)", __func__, status);
4263		switch (status) {
4264		case PMCOUT_STATUS_IO_PORT_IN_RESET:
4265			DFM(nag, "I/O Port In Reset");
4266			/* FALLTHROUGH */
4267		case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
4268			DFM(nag, "Hardware Timeout");
4269			/* FALLTHROUGH */
4270		case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
4271			DFM(nag, "Internal SMP Resource Failure");
4272			/* FALLTHROUGH */
4273		case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
4274			DFM(nag, "PHY Not Ready");
4275			/* FALLTHROUGH */
4276		case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
4277			DFM(nag, "Connection Rate Not Supported");
4278			/* FALLTHROUGH */
4279		case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
4280			DFM(nag, "Open Retry Timeout");
4281			/* FALLTHROUGH */
4282		case PMCOUT_STATUS_IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY:
4283			DFM(nag, "HW Resource Busy");
4284			/* FALLTHROUGH */
4285		case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
4286			DFM(nag, "Response Connection Error");
4287			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4288			    "%s: expander %s SMP operation failed (%s)",
4289			    __func__, pptr->path, nag);
4290			break;
4291
4292		/*
4293		 * For the IO_DS_NON_OPERATIONAL case, we need to kick off
4294		 * device state recovery and return 0 so that the caller
4295		 * doesn't assume this expander is dead for good.
4296		 */
4297		case PMCOUT_STATUS_IO_DS_NON_OPERATIONAL: {
4298			pmcs_xscsi_t *xp = pptr->target;
4299
4300			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, xp,
4301			    "%s: expander %s device state non-operational",
4302			    __func__, pptr->path);
4303
4304			if (xp == NULL) {
4305				/*
4306				 * Kick off recovery right now.
4307				 */
4308				SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
4309				(void) ddi_taskq_dispatch(pwp->tq, pmcs_worker,
4310				    pwp, DDI_NOSLEEP);
4311			} else {
4312				mutex_enter(&xp->statlock);
4313				pmcs_start_dev_state_recovery(xp, pptr);
4314				mutex_exit(&xp->statlock);
4315			}
4316
4317			break;
4318		}
4319
4320		default:
4321			pmcs_print_entry(pwp, PMCS_PRT_DEBUG, buf, ptr);
4322			result = -EIO;
4323			break;
4324		}
4325	} else if (srf->srf_frame_type != SMP_FRAME_TYPE_RESPONSE) {
4326		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4327		    "%s: bad response frame type 0x%x",
4328		    __func__, srf->srf_frame_type);
4329		result = -EINVAL;
4330	} else if (srf->srf_function != SMP_FUNC_REPORT_GENERAL) {
4331		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4332		    "%s: bad response function 0x%x",
4333		    __func__, srf->srf_function);
4334		result = -EINVAL;
4335	} else if (srf->srf_result != 0) {
4336		/*
4337		 * Check to see if we have a value of 3 for failure and
4338		 * whether we were using a SAS2.0 allocation length value
4339		 * and retry without it.
4340		 */
4341		if (srf->srf_result == 3 && (ival & 0xff00)) {
4342			ival &= ~0xff00;
4343			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4344			    "%s: err 0x%x with SAS2 request- retry with SAS1",
4345			    __func__, srf->srf_result);
4346			goto again;
4347		}
4348		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4349		    "%s: bad response 0x%x", __func__, srf->srf_result);
4350		result = -EINVAL;
4351	} else if (srgr->srgr_configuring) {
4352		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4353		    "%s: expander at phy %s is still configuring",
4354		    __func__, pptr->path);
4355		result = 0;
4356	} else {
4357		result = srgr->srgr_number_of_phys;
4358		if (ival & 0xff00) {
4359			pptr->tolerates_sas2 = 1;
4360		}
4361		/*
4362		 * Save off the REPORT_GENERAL response
4363		 */
4364		bcopy(srgr, &pptr->rg_resp, sizeof (smp_report_general_resp_t));
4365		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4366		    "%s has %d phys and %s SAS2", pptr->path, result,
4367		    pptr->tolerates_sas2? "tolerates" : "does not tolerate");
4368	}
4369out:
4370	return (result);
4371}
4372
4373/*
4374 * Called with expander locked (and thus, pptr) as well as all PHYs up to
4375 * the root, and scratch acquired. Return 0 if we fail to allocate resources
4376 * or notice that the configuration changed while we were running the command.
4377 *
4378 * We return less than zero if we had an I/O error or received an
4379 * unsupported configuration.
4380 */
4381static int
4382pmcs_expander_content_discover(pmcs_hw_t *pwp, pmcs_phy_t *expander,
4383    pmcs_phy_t *pptr)
4384{
4385	struct pmcwork *pwrk;
4386	pmcs_iport_t *iport;
4387	char buf[64];
4388	uint8_t sas_address[8];
4389	uint8_t att_sas_address[8];
4390	smp_response_frame_t *srf;
4391	smp_discover_resp_t *sdr;
4392	const uint_t rdoff = 0x100;	/* returned data offset */
4393	uint8_t *roff;
4394	uint32_t status, *ptr, msg[PMCS_MSG_SIZE], htag;
4395	int result = 0;
4396	uint8_t	ini_support;
4397	uint8_t	tgt_support;
4398
4399	if (!expander->iport || !expander->valid_device_id) {
4400		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, expander, expander->target,
4401		    "%s: Can't reach PHY %s", __func__, expander->path);
4402		goto out;
4403	}
4404
4405	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, expander);
4406	if (pwrk == NULL) {
4407		goto out;
4408	}
4409	(void) memset(pwp->scratch, 0x77, PMCS_SCRATCH_SIZE);
4410	pwrk->arg = pwp->scratch;
4411	pwrk->dtype = expander->dtype;
4412	pwrk->xp = expander->target;
4413	pwrk->htag |= PMCS_TAG_NONIO_CMD;
4414	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_SMP_REQUEST));
4415	msg[1] = LE_32(pwrk->htag);
4416	msg[2] = LE_32(expander->device_id);
4417	msg[3] = LE_32((12 << SMP_REQUEST_LENGTH_SHIFT) |
4418	    SMP_INDIRECT_RESPONSE);
4419	/*
4420	 * Send SMP DISCOVER (of either SAS1.1 or SAS2 flavors).
4421	 */
4422	if (expander->tolerates_sas2) {
4423		msg[4] = BE_32(0x40101B00);
4424	} else {
4425		msg[4] = BE_32(0x40100000);
4426	}
4427	msg[5] = 0;
4428	msg[6] = BE_32((pptr->phynum << 16));
4429	msg[7] = 0;
4430	msg[8] = 0;
4431	msg[9] = 0;
4432	msg[10] = 0;
4433	msg[11] = 0;
4434	msg[12] = LE_32(DWORD0(pwp->scratch_dma+rdoff));
4435	msg[13] = LE_32(DWORD1(pwp->scratch_dma+rdoff));
4436	msg[14] = LE_32(PMCS_SCRATCH_SIZE - rdoff);
4437	msg[15] = 0;
4438	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4439	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4440	if (ptr == NULL) {
4441		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4442		goto out;
4443	}
4444
4445	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
4446
4447	pmcs_hold_iport(expander->iport);
4448	iport = expander->iport;
4449	pmcs_smp_acquire(iport);
4450	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4451	htag = pwrk->htag;
4452	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4453	pmcs_unlock_phy(expander);
4454	WAIT_FOR(pwrk, 1000, result);
4455	pmcs_pwork(pwp, pwrk);
4456	pmcs_smp_release(iport);
4457	pmcs_rele_iport(iport);
4458	pmcs_lock_phy(expander);
4459	if (result) {
4460		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4461		    "%s: Issuing SMP ABORT for htag 0x%08x", __func__, htag);
4462		if (pmcs_abort(pwp, pptr, htag, 0, 1)) {
4463			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4464			    "%s: SMP ABORT failed for cmd (htag 0x%08x)",
4465			    __func__, htag);
4466		}
4467		result = -ETIMEDOUT;
4468		goto out;
4469	}
4470
4471	mutex_enter(&pwp->config_lock);
4472	if (pwp->config_changed) {
4473		RESTART_DISCOVERY_LOCKED(pwp);
4474		mutex_exit(&pwp->config_lock);
4475		result = 0;
4476		goto out;
4477	}
4478
4479	mutex_exit(&pwp->config_lock);
4480	ptr = (void *)pwp->scratch;
4481	/*
4482	 * Point roff to the DMA offset for returned data
4483	 */
4484	roff = pwp->scratch;
4485	roff += rdoff;
4486	srf = (smp_response_frame_t *)roff;
4487	sdr = (smp_discover_resp_t *)(roff+4);
4488	status = LE_32(ptr[2]);
4489	if (status == PMCOUT_STATUS_UNDERFLOW ||
4490	    status == PMCOUT_STATUS_OVERFLOW) {
4491		pmcs_prt(pwp, PMCS_PRT_DEBUG_UNDERFLOW, pptr, NULL,
4492		    "%s: over/underflow", __func__);
4493		status = PMCOUT_STATUS_OK;
4494	}
4495	if (status != PMCOUT_STATUS_OK) {
4496		char *nag = NULL;
4497		(void) snprintf(buf, sizeof (buf),
4498		    "%s: SMP op failed (0x%x)", __func__, status);
4499		switch (status) {
4500		case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
4501			DFM(nag, "Hardware Timeout");
4502			/* FALLTHROUGH */
4503		case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
4504			DFM(nag, "Internal SMP Resource Failure");
4505			/* FALLTHROUGH */
4506		case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
4507			DFM(nag, "PHY Not Ready");
4508			/* FALLTHROUGH */
4509		case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
4510			DFM(nag, "Connection Rate Not Supported");
4511			/* FALLTHROUGH */
4512		case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
4513			DFM(nag, "Open Retry Timeout");
4514			/* FALLTHROUGH */
4515		case PMCOUT_STATUS_IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY:
4516			DFM(nag, "HW Resource Busy");
4517			/* FALLTHROUGH */
4518		case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
4519			DFM(nag, "Response Connection Error");
4520			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4521			    "%s: expander %s SMP operation failed (%s)",
4522			    __func__, pptr->path, nag);
4523			break;
4524		default:
4525			pmcs_print_entry(pwp, PMCS_PRT_DEBUG, buf, ptr);
4526			result = -EIO;
4527			break;
4528		}
4529		goto out;
4530	} else if (srf->srf_frame_type != SMP_FRAME_TYPE_RESPONSE) {
4531		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4532		    "%s: bad response frame type 0x%x",
4533		    __func__, srf->srf_frame_type);
4534		result = -EINVAL;
4535		goto out;
4536	} else if (srf->srf_function != SMP_FUNC_DISCOVER) {
4537		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4538		    "%s: bad response function 0x%x",
4539		    __func__, srf->srf_function);
4540		result = -EINVAL;
4541		goto out;
4542	} else if (srf->srf_result != SMP_RES_FUNCTION_ACCEPTED) {
4543		result = pmcs_smp_function_result(pwp, srf);
4544		/* Need not fail if PHY is Vacant */
4545		if (result != SMP_RES_PHY_VACANT) {
4546			result = -EINVAL;
4547			goto out;
4548		}
4549	}
4550
4551	/*
4552	 * Save off the DISCOVER response
4553	 */
4554	bcopy(sdr, &pptr->disc_resp, sizeof (smp_discover_resp_t));
4555
4556	ini_support = (sdr->sdr_attached_sata_host |
4557	    (sdr->sdr_attached_smp_initiator << 1) |
4558	    (sdr->sdr_attached_stp_initiator << 2) |
4559	    (sdr->sdr_attached_ssp_initiator << 3));
4560
4561	tgt_support = (sdr->sdr_attached_sata_device |
4562	    (sdr->sdr_attached_smp_target << 1) |
4563	    (sdr->sdr_attached_stp_target << 2) |
4564	    (sdr->sdr_attached_ssp_target << 3));
4565
4566	pmcs_wwn2barray(BE_64(sdr->sdr_sas_addr), sas_address);
4567	pmcs_wwn2barray(BE_64(sdr->sdr_attached_sas_addr), att_sas_address);
4568
4569	pptr->virtual = sdr->sdr_virtual_phy;
4570
4571	/*
4572	 * Set the routing attribute regardless of the PHY type.
4573	 */
4574	pptr->routing_attr = sdr->sdr_routing_attr;
4575
4576	switch (sdr->sdr_attached_device_type) {
4577	case SAS_IF_DTYPE_ENDPOINT:
4578		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4579		    "exp_content: %s atdt=0x%x lr=%x is=%x ts=%x SAS="
4580		    SAS_ADDR_FMT " attSAS=" SAS_ADDR_FMT " atPHY=%x",
4581		    pptr->path,
4582		    sdr->sdr_attached_device_type,
4583		    sdr->sdr_negotiated_logical_link_rate,
4584		    ini_support,
4585		    tgt_support,
4586		    SAS_ADDR_PRT(sas_address),
4587		    SAS_ADDR_PRT(att_sas_address),
4588		    sdr->sdr_attached_phy_identifier);
4589
4590		if (sdr->sdr_attached_sata_device ||
4591		    sdr->sdr_attached_stp_target) {
4592			pptr->dtype = SATA;
4593		} else if (sdr->sdr_attached_ssp_target) {
4594			pptr->dtype = SAS;
4595		} else if (tgt_support || ini_support) {
4596			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4597			    "%s: %s has tgt support=%x init support=(%x)",
4598			    __func__, pptr->path, tgt_support, ini_support);
4599		}
4600
4601		switch (pptr->routing_attr) {
4602		case SMP_ROUTING_SUBTRACTIVE:
4603		case SMP_ROUTING_TABLE:
4604		case SMP_ROUTING_DIRECT:
4605			pptr->routing_method = SMP_ROUTING_DIRECT;
4606			break;
4607		default:
4608			pptr->routing_method = 0xff;	/* Invalid method */
4609			break;
4610		}
4611		pmcs_update_phy_pm_props(pptr, (1ULL << pptr->phynum),
4612		    (1ULL << sdr->sdr_attached_phy_identifier), B_TRUE);
4613		break;
4614	case SAS_IF_DTYPE_EDGE:
4615	case SAS_IF_DTYPE_FANOUT:
4616		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4617		    "exp_content: %s atdt=0x%x lr=%x is=%x ts=%x SAS="
4618		    SAS_ADDR_FMT " attSAS=" SAS_ADDR_FMT " atPHY=%x",
4619		    pptr->path,
4620		    sdr->sdr_attached_device_type,
4621		    sdr->sdr_negotiated_logical_link_rate,
4622		    ini_support,
4623		    tgt_support,
4624		    SAS_ADDR_PRT(sas_address),
4625		    SAS_ADDR_PRT(att_sas_address),
4626		    sdr->sdr_attached_phy_identifier);
4627		if (sdr->sdr_attached_smp_target) {
4628			/*
4629			 * Avoid configuring phys that just point back
4630			 * at a parent phy
4631			 */
4632			if (expander->parent &&
4633			    memcmp(expander->parent->sas_address,
4634			    att_sas_address,
4635			    sizeof (expander->parent->sas_address)) == 0) {
4636				pmcs_prt(pwp, PMCS_PRT_DEBUG3, pptr, NULL,
4637				    "%s: skipping port back to parent "
4638				    "expander (%s)", __func__, pptr->path);
4639				pptr->dtype = NOTHING;
4640				break;
4641			}
4642			pptr->dtype = EXPANDER;
4643
4644		} else if (tgt_support || ini_support) {
4645			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4646			    "%s has tgt support=%x init support=(%x)",
4647			    pptr->path, tgt_support, ini_support);
4648			pptr->dtype = EXPANDER;
4649		}
4650		if (pptr->routing_attr == SMP_ROUTING_DIRECT) {
4651			pptr->routing_method = 0xff;	/* Invalid method */
4652		} else {
4653			pptr->routing_method = pptr->routing_attr;
4654		}
4655		pmcs_update_phy_pm_props(pptr, (1ULL << pptr->phynum),
4656		    (1ULL << sdr->sdr_attached_phy_identifier), B_TRUE);
4657		break;
4658	default:
4659		pptr->dtype = NOTHING;
4660		break;
4661	}
4662	if (pptr->dtype != NOTHING) {
4663		pmcs_phy_t *ctmp;
4664
4665		/*
4666		 * If the attached device is a SATA device and the expander
4667		 * is (possibly) a SAS2 compliant expander, check for whether
4668		 * there is a NAA=5 WWN field starting at this offset and
4669		 * use that for the SAS Address for this device.
4670		 */
4671		if (expander->tolerates_sas2 && pptr->dtype == SATA &&
4672		    (roff[SAS_ATTACHED_NAME_OFFSET] >> 8) == NAA_IEEE_REG) {
4673			(void) memcpy(pptr->sas_address,
4674			    &roff[SAS_ATTACHED_NAME_OFFSET], 8);
4675		} else {
4676			(void) memcpy(pptr->sas_address, att_sas_address, 8);
4677		}
4678		pptr->atdt = (sdr->sdr_attached_device_type);
4679		/*
4680		 * Now run up from the expander's parent up to the top to
4681		 * make sure we only use the least common link_rate.
4682		 */
4683		for (ctmp = expander->parent; ctmp; ctmp = ctmp->parent) {
4684			if (ctmp->link_rate <
4685			    sdr->sdr_negotiated_logical_link_rate) {
4686				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
4687				    "%s: derating link rate from %x to %x due "
4688				    "to %s being slower", pptr->path,
4689				    sdr->sdr_negotiated_logical_link_rate,
4690				    ctmp->link_rate,
4691				    ctmp->path);
4692				sdr->sdr_negotiated_logical_link_rate =
4693				    ctmp->link_rate;
4694			}
4695		}
4696		pptr->link_rate = sdr->sdr_negotiated_logical_link_rate;
4697		pptr->state.prog_min_rate = sdr->sdr_prog_min_phys_link_rate;
4698		pptr->state.hw_min_rate = sdr->sdr_hw_min_phys_link_rate;
4699		pptr->state.prog_max_rate = sdr->sdr_prog_max_phys_link_rate;
4700		pptr->state.hw_max_rate = sdr->sdr_hw_max_phys_link_rate;
4701		PHY_CHANGED(pwp, pptr);
4702	} else {
4703		pmcs_clear_phy(pwp, pptr);
4704	}
4705	result = 1;
4706out:
4707	return (result);
4708}
4709
4710/*
4711 * Get a work structure and assign it a tag with type and serial number
4712 * If a structure is returned, it is returned locked.
4713 */
4714pmcwork_t *
4715pmcs_gwork(pmcs_hw_t *pwp, uint32_t tag_type, pmcs_phy_t *phyp)
4716{
4717	pmcwork_t *p;
4718	uint16_t snum;
4719	uint32_t off;
4720
4721	mutex_enter(&pwp->wfree_lock);
4722	p = STAILQ_FIRST(&pwp->wf);
4723	if (p == NULL) {
4724		/*
4725		 * If we couldn't get a work structure, it's time to bite
4726		 * the bullet, grab the pfree_lock and copy over all the
4727		 * work structures from the pending free list to the actual
4728		 * free list (assuming it's not also empty).
4729		 */
4730		mutex_enter(&pwp->pfree_lock);
4731		if (STAILQ_FIRST(&pwp->pf) == NULL) {
4732			mutex_exit(&pwp->pfree_lock);
4733			mutex_exit(&pwp->wfree_lock);
4734			return (NULL);
4735		}
4736		pwp->wf.stqh_first = pwp->pf.stqh_first;
4737		pwp->wf.stqh_last = pwp->pf.stqh_last;
4738		STAILQ_INIT(&pwp->pf);
4739		mutex_exit(&pwp->pfree_lock);
4740
4741		p = STAILQ_FIRST(&pwp->wf);
4742		ASSERT(p != NULL);
4743	}
4744	STAILQ_REMOVE(&pwp->wf, p, pmcwork, next);
4745	snum = pwp->wserno++;
4746	mutex_exit(&pwp->wfree_lock);
4747
4748	off = p - pwp->work;
4749
4750	mutex_enter(&p->lock);
4751	ASSERT(p->state == PMCS_WORK_STATE_NIL);
4752	ASSERT(p->htag == PMCS_TAG_FREE);
4753	p->htag = (tag_type << PMCS_TAG_TYPE_SHIFT) & PMCS_TAG_TYPE_MASK;
4754	p->htag |= ((snum << PMCS_TAG_SERNO_SHIFT) & PMCS_TAG_SERNO_MASK);
4755	p->htag |= ((off << PMCS_TAG_INDEX_SHIFT) & PMCS_TAG_INDEX_MASK);
4756	p->start = gethrtime();
4757	p->state = PMCS_WORK_STATE_READY;
4758	p->ssp_event = 0;
4759	p->dead = 0;
4760	p->timer = 0;
4761
4762	if (phyp) {
4763		p->phy = phyp;
4764		pmcs_inc_phy_ref_count(phyp);
4765	}
4766
4767	return (p);
4768}
4769
4770/*
4771 * Called with pwrk lock held.  Returned with lock released.
4772 */
4773void
4774pmcs_pwork(pmcs_hw_t *pwp, pmcwork_t *p)
4775{
4776	ASSERT(p != NULL);
4777	ASSERT(mutex_owned(&p->lock));
4778
4779	p->last_ptr = p->ptr;
4780	p->last_arg = p->arg;
4781	p->last_phy = p->phy;
4782	p->last_xp = p->xp;
4783	p->last_htag = p->htag;
4784	p->last_state = p->state;
4785	p->finish = gethrtime();
4786
4787	if (p->phy) {
4788		pmcs_dec_phy_ref_count(p->phy);
4789	}
4790
4791	p->state = PMCS_WORK_STATE_NIL;
4792	p->htag = PMCS_TAG_FREE;
4793	p->xp = NULL;
4794	p->ptr = NULL;
4795	p->arg = NULL;
4796	p->phy = NULL;
4797	p->abt_htag = 0;
4798	p->timer = 0;
4799	p->onwire = 0;
4800	p->ssp_event = 0;
4801	mutex_exit(&p->lock);
4802
4803	if (mutex_tryenter(&pwp->wfree_lock) == 0) {
4804		mutex_enter(&pwp->pfree_lock);
4805		STAILQ_INSERT_TAIL(&pwp->pf, p, next);
4806		mutex_exit(&pwp->pfree_lock);
4807	} else {
4808		STAILQ_INSERT_TAIL(&pwp->wf, p, next);
4809		mutex_exit(&pwp->wfree_lock);
4810	}
4811}
4812
4813/*
4814 * Find a work structure based upon a tag and make sure that the tag
4815 * serial number matches the work structure we've found.
4816 * If a structure is found, its lock is held upon return.
4817 * If lock_phy is B_TRUE, then lock the phy also when returning the work struct
4818 */
4819pmcwork_t *
4820pmcs_tag2wp(pmcs_hw_t *pwp, uint32_t htag, boolean_t lock_phy)
4821{
4822	pmcwork_t *p;
4823	pmcs_phy_t *phyp;
4824	uint32_t idx = PMCS_TAG_INDEX(htag);
4825
4826	p = &pwp->work[idx];
4827
4828	mutex_enter(&p->lock);
4829	if (p->htag == htag) {
4830		if (lock_phy) {
4831			phyp = p->phy;
4832			if (phyp != NULL) {
4833				/* phy lock should be held before work lock */
4834				mutex_exit(&p->lock);
4835				mutex_enter(&phyp->phy_lock);
4836				mutex_enter(&p->lock);
4837			}
4838			/*
4839			 * Check htag again, in case the work got completed
4840			 * while we dropped the work lock and got the phy lock
4841			 */
4842			if (p->htag != htag) {
4843				if (phyp != NULL) {
4844					mutex_exit(&p->lock);
4845					mutex_exit(&phyp->phy_lock);
4846				}
4847				pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, NULL, "%s: "
4848				    "HTAG (0x%x) found, but work (0x%p) "
4849				    "is already complete", __func__, htag,
4850				    (void *)p);
4851				return (NULL);
4852			}
4853		}
4854		return (p);
4855	}
4856	mutex_exit(&p->lock);
4857	pmcs_prt(pwp, PMCS_PRT_DEBUG2, NULL, NULL,
4858	    "INDEX 0x%x HTAG 0x%x got p->htag 0x%x", idx, htag, p->htag);
4859	return (NULL);
4860}
4861
4862/*
4863 * Issue an abort for a command or for all commands.
4864 *
4865 * Since this can be called from interrupt context,
4866 * we don't wait for completion if wait is not set.
4867 *
4868 * Called with PHY lock held.
4869 */
4870int
4871pmcs_abort(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint32_t tag, int all_cmds,
4872    int wait)
4873{
4874	pmcwork_t *pwrk;
4875	pmcs_xscsi_t *tgt;
4876	uint32_t msg[PMCS_MSG_SIZE], *ptr;
4877	int result, abt_type;
4878	uint32_t abt_htag, status;
4879
4880	if (pptr->abort_all_start) {
4881		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL, "%s: ABORT_ALL for "
4882		    "(%s) already in progress.", __func__, pptr->path);
4883		return (EBUSY);
4884	}
4885
4886	switch (pptr->dtype) {
4887	case SAS:
4888		abt_type = PMCIN_SSP_ABORT;
4889		break;
4890	case SATA:
4891		abt_type = PMCIN_SATA_ABORT;
4892		break;
4893	case EXPANDER:
4894		abt_type = PMCIN_SMP_ABORT;
4895		break;
4896	default:
4897		return (0);
4898	}
4899
4900	pwrk = pmcs_gwork(pwp, wait ? PMCS_TAG_TYPE_WAIT : PMCS_TAG_TYPE_NONE,
4901	    pptr);
4902
4903	if (pwrk == NULL) {
4904		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nowrk, __func__);
4905		return (ENOMEM);
4906	}
4907
4908	pwrk->dtype = pptr->dtype;
4909	pwrk->xp = pptr->target;
4910	pwrk->htag |= PMCS_TAG_NONIO_CMD;
4911	if (wait) {
4912		pwrk->arg = msg;
4913	}
4914	if (pptr->valid_device_id == 0) {
4915		pmcs_pwork(pwp, pwrk);
4916		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4917		    "%s: Invalid DeviceID", __func__);
4918		return (ENODEV);
4919	}
4920	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, abt_type));
4921	msg[1] = LE_32(pwrk->htag);
4922	msg[2] = LE_32(pptr->device_id);
4923	if (all_cmds) {
4924		msg[3] = 0;
4925		msg[4] = LE_32(1);
4926		pwrk->ptr = NULL;
4927		pwrk->abt_htag = PMCS_ABT_HTAG_ALL;
4928		pptr->abort_all_start = gethrtime();
4929	} else {
4930		msg[3] = LE_32(tag);
4931		msg[4] = 0;
4932		pwrk->abt_htag = tag;
4933	}
4934	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4935	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4936	if (ptr == NULL) {
4937		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4938		pmcs_pwork(pwp, pwrk);
4939		pptr->abort_all_start = 0;
4940		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nomsg, __func__);
4941		return (ENOMEM);
4942	}
4943
4944	COPY_MESSAGE(ptr, msg, 5);
4945	if (all_cmds) {
4946		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4947		    "%s: aborting all commands for %s device %s. (htag=0x%x)",
4948		    __func__, pmcs_get_typename(pptr->dtype), pptr->path,
4949		    msg[1]);
4950	} else {
4951		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL,
4952		    "%s: aborting tag 0x%x for %s device %s. (htag=0x%x)",
4953		    __func__, tag, pmcs_get_typename(pptr->dtype), pptr->path,
4954		    msg[1]);
4955	}
4956	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4957
4958	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4959	if (!wait) {
4960		mutex_exit(&pwrk->lock);
4961		return (0);
4962	}
4963
4964	abt_htag = pwrk->htag;
4965	pmcs_unlock_phy(pptr);
4966	WAIT_FOR(pwrk, 1000, result);
4967	pmcs_pwork(pwp, pwrk);
4968	pmcs_lock_phy(pptr);
4969	tgt = pptr->target;
4970
4971	if (all_cmds) {
4972		pptr->abort_all_start = 0;
4973		cv_signal(&pptr->abort_all_cv);
4974	}
4975
4976	if (result) {
4977		if (all_cmds) {
4978			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
4979			    "%s: Abort all request timed out", __func__);
4980		} else {
4981			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
4982			    "%s: Abort (htag 0x%08x) request timed out",
4983			    __func__, abt_htag);
4984		}
4985		if (tgt != NULL) {
4986			mutex_enter(&tgt->statlock);
4987			if ((tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) &&
4988			    (tgt->dev_state !=
4989			    PMCS_DEVICE_STATE_NON_OPERATIONAL)) {
4990				pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
4991				    "%s: Trying DS error recovery for tgt 0x%p",
4992				    __func__, (void *)tgt);
4993				(void) pmcs_send_err_recovery_cmd(pwp,
4994				    PMCS_DEVICE_STATE_IN_RECOVERY, pptr, tgt);
4995			}
4996			mutex_exit(&tgt->statlock);
4997		}
4998		return (ETIMEDOUT);
4999	}
5000
5001	status = LE_32(msg[2]);
5002	if (status != PMCOUT_STATUS_OK) {
5003		/*
5004		 * The only non-success status are IO_NOT_VALID &
5005		 * IO_ABORT_IN_PROGRESS.
5006		 * In case of IO_ABORT_IN_PROGRESS, the other ABORT cmd's
5007		 * status is of concern and this duplicate cmd status can
5008		 * be ignored.
5009		 * If IO_NOT_VALID, that's not an error per-se.
5010		 * For abort of single I/O complete the command anyway.
5011		 * If, however, we were aborting all, that is a problem
5012		 * as IO_NOT_VALID really means that the IO or device is
5013		 * not there. So, discovery process will take of the cleanup.
5014		 */
5015		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
5016		    "%s: abort result 0x%x", __func__, LE_32(msg[2]));
5017		if (all_cmds) {
5018			PHY_CHANGED(pwp, pptr);
5019			RESTART_DISCOVERY(pwp);
5020		} else {
5021			return (EINVAL);
5022		}
5023
5024		return (0);
5025	}
5026
5027	if (tgt != NULL) {
5028		mutex_enter(&tgt->statlock);
5029		if (tgt->dev_state == PMCS_DEVICE_STATE_IN_RECOVERY) {
5030			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
5031			    "%s: Restoring OPERATIONAL dev_state for tgt 0x%p",
5032			    __func__, (void *)tgt);
5033			(void) pmcs_send_err_recovery_cmd(pwp,
5034			    PMCS_DEVICE_STATE_OPERATIONAL, pptr, tgt);
5035		}
5036		mutex_exit(&tgt->statlock);
5037	}
5038
5039	return (0);
5040}
5041
5042/*
5043 * Issue a task management function to an SSP device.
5044 *
5045 * Called with PHY lock held.
5046 * statlock CANNOT be held upon entry.
5047 */
5048int
5049pmcs_ssp_tmf(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint8_t tmf, uint32_t tag,
5050    uint64_t lun, uint32_t *response)
5051{
5052	int result, ds;
5053	uint8_t local[PMCS_QENTRY_SIZE << 1], *xd;
5054	sas_ssp_rsp_iu_t *rptr = (void *)local;
5055	static const uint8_t ssp_rsp_evec[] = {
5056		0x58, 0x61, 0x56, 0x72, 0x00
5057	};
5058	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
5059	struct pmcwork *pwrk;
5060	pmcs_xscsi_t *xp;
5061
5062	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
5063	if (pwrk == NULL) {
5064		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nowrk, __func__);
5065		return (ENOMEM);
5066	}
5067	/*
5068	 * NB: We use the PMCS_OQ_GENERAL outbound queue
5069	 * NB: so as to not get entangled in normal I/O
5070	 * NB: processing.
5071	 */
5072	pwrk->htag |= PMCS_TAG_NONIO_CMD;
5073	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5074	    PMCIN_SSP_INI_TM_START));
5075	msg[1] = LE_32(pwrk->htag);
5076	msg[2] = LE_32(pptr->device_id);
5077	if (tmf == SAS_ABORT_TASK || tmf == SAS_QUERY_TASK) {
5078		msg[3] = LE_32(tag);
5079	} else {
5080		msg[3] = 0;
5081	}
5082	msg[4] = LE_32(tmf);
5083	msg[5] = BE_32((uint32_t)lun);
5084	msg[6] = BE_32((uint32_t)(lun >> 32));
5085	msg[7] = LE_32(PMCIN_MESSAGE_REPORT);
5086
5087	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5088	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5089	if (ptr == NULL) {
5090		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5091		pmcs_pwork(pwp, pwrk);
5092		pmcs_prt(pwp, PMCS_PRT_ERR, pptr, NULL, pmcs_nomsg, __func__);
5093		return (ENOMEM);
5094	}
5095	COPY_MESSAGE(ptr, msg, 7);
5096	pwrk->arg = msg;
5097	pwrk->dtype = pptr->dtype;
5098	xp = pptr->target;
5099	pwrk->xp = xp;
5100
5101	if (xp != NULL) {
5102		mutex_enter(&xp->statlock);
5103		if (xp->dev_state == PMCS_DEVICE_STATE_NON_OPERATIONAL) {
5104			mutex_exit(&xp->statlock);
5105			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5106			pmcs_pwork(pwp, pwrk);
5107			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp, "%s: Not "
5108			    "sending '%s' because DS is '%s'", __func__,
5109			    pmcs_tmf2str(tmf), pmcs_status_str
5110			    (PMCOUT_STATUS_IO_DS_NON_OPERATIONAL));
5111			return (EIO);
5112		}
5113		mutex_exit(&xp->statlock);
5114	}
5115
5116	pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5117	    "%s: sending '%s' to %s (lun %llu) tag 0x%x", __func__,
5118	    pmcs_tmf2str(tmf), pptr->path, (unsigned long long) lun, tag);
5119	pwrk->state = PMCS_WORK_STATE_ONCHIP;
5120	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5121
5122	pmcs_unlock_phy(pptr);
5123	/*
5124	 * This is a command sent to the target device, so it can take
5125	 * significant amount of time to complete when path & device is busy.
5126	 * Set a timeout to 20 seconds
5127	 */
5128	WAIT_FOR(pwrk, 20000, result);
5129	pmcs_pwork(pwp, pwrk);
5130	pmcs_lock_phy(pptr);
5131	xp = pptr->target;
5132
5133	if (result) {
5134		if (xp == NULL) {
5135			return (ETIMEDOUT);
5136		}
5137
5138		mutex_enter(&xp->statlock);
5139		pmcs_start_dev_state_recovery(xp, pptr);
5140		mutex_exit(&xp->statlock);
5141		return (ETIMEDOUT);
5142	}
5143
5144	status = LE_32(msg[2]);
5145	if (status != PMCOUT_STATUS_OK) {
5146		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5147		    "%s: status %s for TMF %s action to %s, lun %llu",
5148		    __func__, pmcs_status_str(status),  pmcs_tmf2str(tmf),
5149		    pptr->path, (unsigned long long) lun);
5150		if ((status == PMCOUT_STATUS_IO_DS_NON_OPERATIONAL) ||
5151		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK) ||
5152		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS)) {
5153			ds = PMCS_DEVICE_STATE_NON_OPERATIONAL;
5154		} else if (status == PMCOUT_STATUS_IO_DS_IN_RECOVERY) {
5155			/*
5156			 * If the status is IN_RECOVERY, it's an indication
5157			 * that it's now time for us to request to have the
5158			 * device state set to OPERATIONAL since we're the ones
5159			 * that requested recovery to begin with.
5160			 */
5161			ds = PMCS_DEVICE_STATE_OPERATIONAL;
5162		} else {
5163			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
5164		}
5165		if (xp != NULL) {
5166			mutex_enter(&xp->statlock);
5167			if (xp->dev_state != ds) {
5168				pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5169				    "%s: Sending err recovery cmd"
5170				    " for tgt 0x%p (status = %s)",
5171				    __func__, (void *)xp,
5172				    pmcs_status_str(status));
5173				(void) pmcs_send_err_recovery_cmd(pwp, ds,
5174				    pptr, xp);
5175			}
5176			mutex_exit(&xp->statlock);
5177		}
5178		return (EIO);
5179	} else {
5180		ds = PMCS_DEVICE_STATE_OPERATIONAL;
5181		if (xp != NULL) {
5182			mutex_enter(&xp->statlock);
5183			if (xp->dev_state != ds) {
5184				pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5185				    "%s: Sending err recovery cmd"
5186				    " for tgt 0x%p (status = %s)",
5187				    __func__, (void *)xp,
5188				    pmcs_status_str(status));
5189				(void) pmcs_send_err_recovery_cmd(pwp, ds,
5190				    pptr, xp);
5191			}
5192			mutex_exit(&xp->statlock);
5193		}
5194	}
5195	if (LE_32(msg[3]) == 0) {
5196		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5197		    "TMF completed with no response");
5198		return (EIO);
5199	}
5200	pmcs_endian_transform(pwp, local, &msg[5], ssp_rsp_evec);
5201	xd = (uint8_t *)(&msg[5]);
5202	xd += SAS_RSP_HDR_SIZE;
5203	if (rptr->datapres != SAS_RSP_DATAPRES_RESPONSE_DATA) {
5204		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5205		    "%s: TMF response not RESPONSE DATA (0x%x)",
5206		    __func__, rptr->datapres);
5207		return (EIO);
5208	}
5209	if (rptr->response_data_length != 4) {
5210		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
5211		    "Bad SAS RESPONSE DATA LENGTH", msg);
5212		return (EIO);
5213	}
5214	(void) memcpy(&status, xd, sizeof (uint32_t));
5215	status = BE_32(status);
5216	if (response != NULL)
5217		*response = status;
5218	/*
5219	 * The status is actually in the low-order byte.  The upper three
5220	 * bytes contain additional information for the TMFs that support them.
5221	 * However, at this time we do not issue any of those.  In the other
5222	 * cases, the upper three bytes are supposed to be 0, but it appears
5223	 * they aren't always.  Just mask them off.
5224	 */
5225	switch (status & 0xff) {
5226	case SAS_RSP_TMF_COMPLETE:
5227		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5228		    "%s: TMF complete", __func__);
5229		result = 0;
5230		break;
5231	case SAS_RSP_TMF_SUCCEEDED:
5232		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5233		    "%s: TMF succeeded", __func__);
5234		result = 0;
5235		break;
5236	case SAS_RSP_INVALID_FRAME:
5237		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5238		    "%s: TMF returned INVALID FRAME", __func__);
5239		result = EIO;
5240		break;
5241	case SAS_RSP_TMF_NOT_SUPPORTED:
5242		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5243		    "%s: TMF returned TMF NOT SUPPORTED", __func__);
5244		result = EIO;
5245		break;
5246	case SAS_RSP_TMF_FAILED:
5247		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5248		    "%s: TMF returned TMF FAILED", __func__);
5249		result = EIO;
5250		break;
5251	case SAS_RSP_TMF_INCORRECT_LUN:
5252		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5253		    "%s: TMF returned INCORRECT LUN", __func__);
5254		result = EIO;
5255		break;
5256	case SAS_RSP_OVERLAPPED_OIPTTA:
5257		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5258		    "%s: TMF returned OVERLAPPED INITIATOR PORT TRANSFER TAG "
5259		    "ATTEMPTED", __func__);
5260		result = EIO;
5261		break;
5262	default:
5263		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, xp,
5264		    "%s: TMF returned unknown code 0x%x", __func__, status);
5265		result = EIO;
5266		break;
5267	}
5268	return (result);
5269}
5270
5271/*
5272 * Called with PHY lock held and scratch acquired
5273 */
5274int
5275pmcs_sata_abort_ncq(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
5276{
5277	const char *utag_fail_fmt = "%s: untagged NCQ command failure";
5278	const char *tag_fail_fmt = "%s: NCQ command failure (tag 0x%x)";
5279	uint32_t msg[PMCS_QENTRY_SIZE], *ptr, result, status;
5280	uint8_t *fp = pwp->scratch, ds;
5281	fis_t fis;
5282	pmcwork_t *pwrk;
5283	pmcs_xscsi_t *tgt;
5284
5285	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
5286	if (pwrk == NULL) {
5287		return (ENOMEM);
5288	}
5289	pwrk->htag |= PMCS_TAG_NONIO_CMD;
5290	msg[0] = LE_32(PMCS_IOMB_IN_SAS(PMCS_OQ_IODONE,
5291	    PMCIN_SATA_HOST_IO_START));
5292	msg[1] = LE_32(pwrk->htag);
5293	msg[2] = LE_32(pptr->device_id);
5294	msg[3] = LE_32(512);
5295	msg[4] = LE_32(SATA_PROTOCOL_PIO | PMCIN_DATADIR_2_INI);
5296	msg[5] = LE_32((READ_LOG_EXT << 16) | (C_BIT << 8) | FIS_REG_H2DEV);
5297	msg[6] = LE_32(0x10);
5298	msg[8] = LE_32(1);
5299	msg[9] = 0;
5300	msg[10] = 0;
5301	msg[11] = 0;
5302	msg[12] = LE_32(DWORD0(pwp->scratch_dma));
5303	msg[13] = LE_32(DWORD1(pwp->scratch_dma));
5304	msg[14] = LE_32(512);
5305	msg[15] = 0;
5306
5307	pwrk->arg = msg;
5308	pwrk->dtype = pptr->dtype;
5309	pwrk->xp = pptr->target;
5310
5311	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5312	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5313	if (ptr == NULL) {
5314		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5315		pmcs_pwork(pwp, pwrk);
5316		return (ENOMEM);
5317	}
5318	COPY_MESSAGE(ptr, msg, PMCS_QENTRY_SIZE);
5319	pwrk->state = PMCS_WORK_STATE_ONCHIP;
5320	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5321
5322	pmcs_unlock_phy(pptr);
5323	WAIT_FOR(pwrk, 250, result);
5324	pmcs_pwork(pwp, pwrk);
5325	pmcs_lock_phy(pptr);
5326
5327	tgt = pptr->target;
5328	if (result) {
5329		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, pmcs_timeo, __func__);
5330		return (EIO);
5331	}
5332	status = LE_32(msg[2]);
5333	if (status != PMCOUT_STATUS_OK || LE_32(msg[3])) {
5334		if (tgt == NULL) {
5335			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
5336			    "%s: cannot find target for phy 0x%p for "
5337			    "dev state recovery", __func__, (void *)pptr);
5338			return (EIO);
5339		}
5340
5341		mutex_enter(&tgt->statlock);
5342
5343		pmcs_print_entry(pwp, PMCS_PRT_DEBUG, "READ LOG EXT", msg);
5344		if ((status == PMCOUT_STATUS_IO_DS_NON_OPERATIONAL) ||
5345		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK) ||
5346		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS)) {
5347			ds = PMCS_DEVICE_STATE_NON_OPERATIONAL;
5348		} else {
5349			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
5350		}
5351		if (tgt->dev_state != ds) {
5352			pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, "%s: Trying "
5353			    "SATA DS Recovery for tgt(0x%p) for status(%s)",
5354			    __func__, (void *)tgt, pmcs_status_str(status));
5355			(void) pmcs_send_err_recovery_cmd(pwp, ds, pptr, tgt);
5356		}
5357
5358		mutex_exit(&tgt->statlock);
5359		return (EIO);
5360	}
5361	fis[0] = (fp[4] << 24) | (fp[3] << 16) | (fp[2] << 8) | FIS_REG_D2H;
5362	fis[1] = (fp[8] << 24) | (fp[7] << 16) | (fp[6] << 8) | fp[5];
5363	fis[2] = (fp[12] << 24) | (fp[11] << 16) | (fp[10] << 8) | fp[9];
5364	fis[3] = (fp[16] << 24) | (fp[15] << 16) | (fp[14] << 8) | fp[13];
5365	fis[4] = 0;
5366	if (fp[0] & 0x80) {
5367		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
5368		    utag_fail_fmt, __func__);
5369	} else {
5370		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
5371		    tag_fail_fmt, __func__, fp[0] & 0x1f);
5372	}
5373	pmcs_fis_dump(pwp, fis);
5374	pptr->need_rl_ext = 0;
5375	return (0);
5376}
5377
5378/*
5379 * Transform a structure from CPU to Device endian format, or
5380 * vice versa, based upon a transformation vector.
5381 *
5382 * A transformation vector is an array of bytes, each byte
5383 * of which is defined thusly:
5384 *
5385 *  bit 7: from CPU to desired endian, otherwise from desired endian
5386 *	   to CPU format
5387 *  bit 6: Big Endian, else Little Endian
5388 *  bits 5-4:
5389 *       00 Undefined
5390 *       01 One Byte quantities
5391 *       02 Two Byte quantities
5392 *       03 Four Byte quantities
5393 *
5394 *  bits 3-0:
5395 *       00 Undefined
5396 *       Number of quantities to transform
5397 *
5398 * The vector is terminated by a 0 value.
5399 */
5400
5401void
5402pmcs_endian_transform(pmcs_hw_t *pwp, void *orig_out, void *orig_in,
5403    const uint8_t *xfvec)
5404{
5405	uint8_t c, *out = orig_out, *in = orig_in;
5406
5407	if (xfvec == NULL) {
5408		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
5409		    "%s: null xfvec", __func__);
5410		return;
5411	}
5412	if (out == NULL) {
5413		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
5414		    "%s: null out", __func__);
5415		return;
5416	}
5417	if (in == NULL) {
5418		pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
5419		    "%s: null in", __func__);
5420		return;
5421	}
5422	while ((c = *xfvec++) != 0) {
5423		int nbyt = (c & 0xf);
5424		int size = (c >> 4) & 0x3;
5425		int bige = (c >> 4) & 0x4;
5426
5427		switch (size) {
5428		case 1:
5429		{
5430			while (nbyt-- > 0) {
5431				*out++ = *in++;
5432			}
5433			break;
5434		}
5435		case 2:
5436		{
5437			uint16_t tmp;
5438			while (nbyt-- > 0) {
5439				(void) memcpy(&tmp, in, sizeof (uint16_t));
5440				if (bige) {
5441					tmp = BE_16(tmp);
5442				} else {
5443					tmp = LE_16(tmp);
5444				}
5445				(void) memcpy(out, &tmp, sizeof (uint16_t));
5446				out += sizeof (uint16_t);
5447				in += sizeof (uint16_t);
5448			}
5449			break;
5450		}
5451		case 3:
5452		{
5453			uint32_t tmp;
5454			while (nbyt-- > 0) {
5455				(void) memcpy(&tmp, in, sizeof (uint32_t));
5456				if (bige) {
5457					tmp = BE_32(tmp);
5458				} else {
5459					tmp = LE_32(tmp);
5460				}
5461				(void) memcpy(out, &tmp, sizeof (uint32_t));
5462				out += sizeof (uint32_t);
5463				in += sizeof (uint32_t);
5464			}
5465			break;
5466		}
5467		default:
5468			pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
5469			    "%s: bad size", __func__);
5470			return;
5471		}
5472	}
5473}
5474
5475const char *
5476pmcs_get_rate(unsigned int linkrt)
5477{
5478	const char *rate;
5479	switch (linkrt) {
5480	case SAS_LINK_RATE_1_5GBIT:
5481		rate = "1.5";
5482		break;
5483	case SAS_LINK_RATE_3GBIT:
5484		rate = "3.0";
5485		break;
5486	case SAS_LINK_RATE_6GBIT:
5487		rate = "6.0";
5488		break;
5489	default:
5490		rate = "???";
5491		break;
5492	}
5493	return (rate);
5494}
5495
5496const char *
5497pmcs_get_typename(pmcs_dtype_t type)
5498{
5499	switch (type) {
5500	case NOTHING:
5501		return ("NIL");
5502	case SATA:
5503		return ("SATA");
5504	case SAS:
5505		return ("SSP");
5506	case EXPANDER:
5507		return ("EXPANDER");
5508	}
5509	return ("????");
5510}
5511
5512const char *
5513pmcs_tmf2str(int tmf)
5514{
5515	switch (tmf) {
5516	case SAS_ABORT_TASK:
5517		return ("Abort Task");
5518	case SAS_ABORT_TASK_SET:
5519		return ("Abort Task Set");
5520	case SAS_CLEAR_TASK_SET:
5521		return ("Clear Task Set");
5522	case SAS_LOGICAL_UNIT_RESET:
5523		return ("Logical Unit Reset");
5524	case SAS_I_T_NEXUS_RESET:
5525		return ("I_T Nexus Reset");
5526	case SAS_CLEAR_ACA:
5527		return ("Clear ACA");
5528	case SAS_QUERY_TASK:
5529		return ("Query Task");
5530	case SAS_QUERY_TASK_SET:
5531		return ("Query Task Set");
5532	case SAS_QUERY_UNIT_ATTENTION:
5533		return ("Query Unit Attention");
5534	default:
5535		return ("Unknown");
5536	}
5537}
5538
5539const char *
5540pmcs_status_str(uint32_t status)
5541{
5542	switch (status) {
5543	case PMCOUT_STATUS_OK:
5544		return ("OK");
5545	case PMCOUT_STATUS_ABORTED:
5546		return ("ABORTED");
5547	case PMCOUT_STATUS_OVERFLOW:
5548		return ("OVERFLOW");
5549	case PMCOUT_STATUS_UNDERFLOW:
5550		return ("UNDERFLOW");
5551	case PMCOUT_STATUS_FAILED:
5552		return ("FAILED");
5553	case PMCOUT_STATUS_ABORT_RESET:
5554		return ("ABORT_RESET");
5555	case PMCOUT_STATUS_IO_NOT_VALID:
5556		return ("IO_NOT_VALID");
5557	case PMCOUT_STATUS_NO_DEVICE:
5558		return ("NO_DEVICE");
5559	case PMCOUT_STATUS_ILLEGAL_PARAMETER:
5560		return ("ILLEGAL_PARAMETER");
5561	case PMCOUT_STATUS_LINK_FAILURE:
5562		return ("LINK_FAILURE");
5563	case PMCOUT_STATUS_PROG_ERROR:
5564		return ("PROG_ERROR");
5565	case PMCOUT_STATUS_EDC_IN_ERROR:
5566		return ("EDC_IN_ERROR");
5567	case PMCOUT_STATUS_EDC_OUT_ERROR:
5568		return ("EDC_OUT_ERROR");
5569	case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
5570		return ("ERROR_HW_TIMEOUT");
5571	case PMCOUT_STATUS_XFER_ERR_BREAK:
5572		return ("XFER_ERR_BREAK");
5573	case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
5574		return ("XFER_ERR_PHY_NOT_READY");
5575	case PMCOUT_STATUS_OPEN_CNX_PROTOCOL_NOT_SUPPORTED:
5576		return ("OPEN_CNX_PROTOCOL_NOT_SUPPORTED");
5577	case PMCOUT_STATUS_OPEN_CNX_ERROR_ZONE_VIOLATION:
5578		return ("OPEN_CNX_ERROR_ZONE_VIOLATION");
5579	case PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK:
5580		return ("OPEN_CNX_ERROR_BREAK");
5581	case PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS:
5582		return ("OPEN_CNX_ERROR_IT_NEXUS_LOSS");
5583	case PMCOUT_STATUS_OPENCNX_ERROR_BAD_DESTINATION:
5584		return ("OPENCNX_ERROR_BAD_DESTINATION");
5585	case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
5586		return ("OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED");
5587	case PMCOUT_STATUS_OPEN_CNX_ERROR_STP_RESOURCES_BUSY:
5588		return ("OPEN_CNX_ERROR_STP_RESOURCES_BUSY");
5589	case PMCOUT_STATUS_OPEN_CNX_ERROR_WRONG_DESTINATION:
5590		return ("OPEN_CNX_ERROR_WRONG_DESTINATION");
5591	case PMCOUT_STATUS_OPEN_CNX_ERROR_UNKNOWN_ERROR:
5592		return ("OPEN_CNX_ERROR_UNKNOWN_ERROR");
5593	case PMCOUT_STATUS_IO_XFER_ERROR_NAK_RECEIVED:
5594		return ("IO_XFER_ERROR_NAK_RECEIVED");
5595	case PMCOUT_STATUS_XFER_ERROR_ACK_NAK_TIMEOUT:
5596		return ("XFER_ERROR_ACK_NAK_TIMEOUT");
5597	case PMCOUT_STATUS_XFER_ERROR_PEER_ABORTED:
5598		return ("XFER_ERROR_PEER_ABORTED");
5599	case PMCOUT_STATUS_XFER_ERROR_RX_FRAME:
5600		return ("XFER_ERROR_RX_FRAME");
5601	case PMCOUT_STATUS_IO_XFER_ERROR_DMA:
5602		return ("IO_XFER_ERROR_DMA");
5603	case PMCOUT_STATUS_XFER_ERROR_CREDIT_TIMEOUT:
5604		return ("XFER_ERROR_CREDIT_TIMEOUT");
5605	case PMCOUT_STATUS_XFER_ERROR_SATA_LINK_TIMEOUT:
5606		return ("XFER_ERROR_SATA_LINK_TIMEOUT");
5607	case PMCOUT_STATUS_XFER_ERROR_SATA:
5608		return ("XFER_ERROR_SATA");
5609	case PMCOUT_STATUS_XFER_ERROR_REJECTED_NCQ_MODE:
5610		return ("XFER_ERROR_REJECTED_NCQ_MODE");
5611	case PMCOUT_STATUS_XFER_ERROR_ABORTED_DUE_TO_SRST:
5612		return ("XFER_ERROR_ABORTED_DUE_TO_SRST");
5613	case PMCOUT_STATUS_XFER_ERROR_ABORTED_NCQ_MODE:
5614		return ("XFER_ERROR_ABORTED_NCQ_MODE");
5615	case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
5616		return ("IO_XFER_OPEN_RETRY_TIMEOUT");
5617	case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
5618		return ("SMP_RESP_CONNECTION_ERROR");
5619	case PMCOUT_STATUS_XFER_ERROR_UNEXPECTED_PHASE:
5620		return ("XFER_ERROR_UNEXPECTED_PHASE");
5621	case PMCOUT_STATUS_XFER_ERROR_RDY_OVERRUN:
5622		return ("XFER_ERROR_RDY_OVERRUN");
5623	case PMCOUT_STATUS_XFER_ERROR_RDY_NOT_EXPECTED:
5624		return ("XFER_ERROR_RDY_NOT_EXPECTED");
5625	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT:
5626		return ("XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT");
5627	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_BREAK_BEFORE_ACK_NACK:
5628		return ("XFER_ERROR_CMD_ISSUE_BREAK_BEFORE_ACK_NACK");
5629	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_PHY_DOWN_BEFORE_ACK_NAK:
5630		return ("XFER_ERROR_CMD_ISSUE_PHY_DOWN_BEFORE_ACK_NAK");
5631	case PMCOUT_STATUS_XFER_ERROR_OFFSET_MISMATCH:
5632		return ("XFER_ERROR_OFFSET_MISMATCH");
5633	case PMCOUT_STATUS_XFER_ERROR_ZERO_DATA_LEN:
5634		return ("XFER_ERROR_ZERO_DATA_LEN");
5635	case PMCOUT_STATUS_XFER_CMD_FRAME_ISSUED:
5636		return ("XFER_CMD_FRAME_ISSUED");
5637	case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
5638		return ("ERROR_INTERNAL_SMP_RESOURCE");
5639	case PMCOUT_STATUS_IO_PORT_IN_RESET:
5640		return ("IO_PORT_IN_RESET");
5641	case PMCOUT_STATUS_IO_DS_NON_OPERATIONAL:
5642		return ("DEVICE STATE NON-OPERATIONAL");
5643	case PMCOUT_STATUS_IO_DS_IN_RECOVERY:
5644		return ("DEVICE STATE IN RECOVERY");
5645	case PMCOUT_STATUS_IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY:
5646		return ("OPEN CNX ERR HW RESOURCE BUSY");
5647	default:
5648		return (NULL);
5649	}
5650}
5651
5652uint64_t
5653pmcs_barray2wwn(uint8_t ba[8])
5654{
5655	uint64_t result = 0;
5656	int i;
5657
5658	for (i = 0; i < 8; i++) {
5659		result <<= 8;
5660		result |= ba[i];
5661	}
5662	return (result);
5663}
5664
5665void
5666pmcs_wwn2barray(uint64_t wwn, uint8_t ba[8])
5667{
5668	int i;
5669	for (i = 0; i < 8; i++) {
5670		ba[7 - i] = wwn & 0xff;
5671		wwn >>= 8;
5672	}
5673}
5674
5675void
5676pmcs_report_fwversion(pmcs_hw_t *pwp)
5677{
5678	const char *fwsupport;
5679	switch (PMCS_FW_TYPE(pwp)) {
5680	case PMCS_FW_TYPE_RELEASED:
5681		fwsupport = "Released";
5682		break;
5683	case PMCS_FW_TYPE_DEVELOPMENT:
5684		fwsupport = "Development";
5685		break;
5686	case PMCS_FW_TYPE_ALPHA:
5687		fwsupport = "Alpha";
5688		break;
5689	case PMCS_FW_TYPE_BETA:
5690		fwsupport = "Beta";
5691		break;
5692	default:
5693		fwsupport = "Special";
5694		break;
5695	}
5696	pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
5697	    "Chip Revision: %c; F/W Revision %x.%x.%x %s (ILA rev %08x)",
5698	    'A' + pwp->chiprev, PMCS_FW_MAJOR(pwp), PMCS_FW_MINOR(pwp),
5699	    PMCS_FW_MICRO(pwp), fwsupport, pwp->ila_ver);
5700}
5701
5702void
5703pmcs_phy_name(pmcs_hw_t *pwp, pmcs_phy_t *pptr, char *obuf, size_t olen)
5704{
5705	if (pptr->parent) {
5706		pmcs_phy_name(pwp, pptr->parent, obuf, olen);
5707		(void) snprintf(obuf, olen, "%s.%02x", obuf, pptr->phynum);
5708	} else {
5709		(void) snprintf(obuf, olen, "pp%02x", pptr->phynum);
5710	}
5711}
5712
5713/*
5714 * This function is called as a sanity check to ensure that a newly registered
5715 * PHY doesn't have a device_id that exists with another registered PHY.
5716 */
5717static boolean_t
5718pmcs_validate_devid(pmcs_phy_t *parent, pmcs_phy_t *phyp, uint32_t device_id)
5719{
5720	pmcs_phy_t *pptr, *pchild;
5721	boolean_t rval;
5722
5723	pptr = parent;
5724
5725	while (pptr) {
5726		if (pptr->valid_device_id && (pptr != phyp) &&
5727		    (pptr->device_id == device_id)) {
5728			/*
5729			 * This can still be OK if both of these PHYs actually
5730			 * represent the same device (e.g. expander).  It could
5731			 * be a case of a new "primary" PHY.  If the SAS address
5732			 * is the same and they have the same parent, we'll
5733			 * accept this if the PHY to be registered is the
5734			 * primary.
5735			 */
5736			if ((phyp->parent == pptr->parent) &&
5737			    (memcmp(phyp->sas_address,
5738			    pptr->sas_address, 8) == 0) && (phyp->width > 1)) {
5739				/*
5740				 * Move children over to the new primary and
5741				 * update both PHYs
5742				 */
5743				pmcs_lock_phy(pptr);
5744				phyp->children = pptr->children;
5745				pchild = phyp->children;
5746				while (pchild) {
5747					pchild->parent = phyp;
5748					pchild = pchild->sibling;
5749				}
5750				phyp->subsidiary = 0;
5751				phyp->ncphy = pptr->ncphy;
5752				/*
5753				 * device_id, valid_device_id, and configured
5754				 * will be set by the caller
5755				 */
5756				pptr->children = NULL;
5757				pptr->subsidiary = 1;
5758				pptr->ncphy = 0;
5759				pmcs_unlock_phy(pptr);
5760				pmcs_prt(pptr->pwp, PMCS_PRT_DEBUG, pptr, NULL,
5761				    "%s: Moving device_id %d from PHY %s to %s",
5762				    __func__, device_id, pptr->path,
5763				    phyp->path);
5764				return (B_TRUE);
5765			}
5766			pmcs_prt(pptr->pwp, PMCS_PRT_DEBUG, pptr, NULL,
5767			    "%s: phy %s already exists as %s with "
5768			    "device id 0x%x", __func__, phyp->path,
5769			    pptr->path, device_id);
5770			return (B_FALSE);
5771		}
5772
5773		if (pptr->children) {
5774			rval = pmcs_validate_devid(pptr->children, phyp,
5775			    device_id);
5776			if (rval == B_FALSE) {
5777				return (rval);
5778			}
5779		}
5780
5781		pptr = pptr->sibling;
5782	}
5783
5784	/* This PHY and device_id are valid */
5785	return (B_TRUE);
5786}
5787
5788/*
5789 * If the PHY is found, it is returned locked
5790 */
5791static pmcs_phy_t *
5792pmcs_find_phy_by_wwn_impl(pmcs_phy_t *phyp, uint8_t *wwn)
5793{
5794	pmcs_phy_t *matched_phy, *cphyp, *nphyp;
5795
5796	ASSERT(!mutex_owned(&phyp->phy_lock));
5797
5798	while (phyp) {
5799		pmcs_lock_phy(phyp);
5800
5801		if (phyp->valid_device_id) {
5802			if (memcmp(phyp->sas_address, wwn, 8) == 0) {
5803				return (phyp);
5804			}
5805		}
5806
5807		if (phyp->children) {
5808			cphyp = phyp->children;
5809			pmcs_unlock_phy(phyp);
5810			matched_phy = pmcs_find_phy_by_wwn_impl(cphyp, wwn);
5811			if (matched_phy) {
5812				ASSERT(mutex_owned(&matched_phy->phy_lock));
5813				return (matched_phy);
5814			}
5815			pmcs_lock_phy(phyp);
5816		}
5817
5818		/*
5819		 * Only iterate through non-root PHYs
5820		 */
5821		if (IS_ROOT_PHY(phyp)) {
5822			pmcs_unlock_phy(phyp);
5823			phyp = NULL;
5824		} else {
5825			nphyp = phyp->sibling;
5826			pmcs_unlock_phy(phyp);
5827			phyp = nphyp;
5828		}
5829	}
5830
5831	return (NULL);
5832}
5833
5834pmcs_phy_t *
5835pmcs_find_phy_by_wwn(pmcs_hw_t *pwp, uint64_t wwn)
5836{
5837	uint8_t ebstr[8];
5838	pmcs_phy_t *pptr, *matched_phy;
5839
5840	pmcs_wwn2barray(wwn, ebstr);
5841
5842	pptr = pwp->root_phys;
5843	while (pptr) {
5844		matched_phy = pmcs_find_phy_by_wwn_impl(pptr, ebstr);
5845		if (matched_phy) {
5846			ASSERT(mutex_owned(&matched_phy->phy_lock));
5847			return (matched_phy);
5848		}
5849
5850		pptr = pptr->sibling;
5851	}
5852
5853	return (NULL);
5854}
5855
5856
5857/*
5858 * pmcs_find_phy_by_sas_address
5859 *
5860 * Find a PHY that both matches "sas_addr" and is on "iport".
5861 * If a matching PHY is found, it is returned locked.
5862 */
5863pmcs_phy_t *
5864pmcs_find_phy_by_sas_address(pmcs_hw_t *pwp, pmcs_iport_t *iport,
5865    pmcs_phy_t *root, char *sas_addr)
5866{
5867	int ua_form = 1;
5868	uint64_t wwn;
5869	char addr[PMCS_MAX_UA_SIZE];
5870	pmcs_phy_t *pptr, *pnext, *pchild;
5871
5872	if (root == NULL) {
5873		pptr = pwp->root_phys;
5874	} else {
5875		pptr = root;
5876	}
5877
5878	while (pptr) {
5879		pmcs_lock_phy(pptr);
5880		/*
5881		 * If the PHY is dead or does not have a valid device ID,
5882		 * skip it.
5883		 */
5884		if ((pptr->dead) || (!pptr->valid_device_id)) {
5885			goto next_phy;
5886		}
5887
5888		if (pptr->iport != iport) {
5889			goto next_phy;
5890		}
5891
5892		wwn = pmcs_barray2wwn(pptr->sas_address);
5893		(void *) scsi_wwn_to_wwnstr(wwn, ua_form, addr);
5894		if (strncmp(addr, sas_addr, strlen(addr)) == 0) {
5895			return (pptr);
5896		}
5897
5898		if (pptr->children) {
5899			pchild = pptr->children;
5900			pmcs_unlock_phy(pptr);
5901			pnext = pmcs_find_phy_by_sas_address(pwp, iport, pchild,
5902			    sas_addr);
5903			if (pnext) {
5904				return (pnext);
5905			}
5906			pmcs_lock_phy(pptr);
5907		}
5908
5909next_phy:
5910		pnext = pptr->sibling;
5911		pmcs_unlock_phy(pptr);
5912		pptr = pnext;
5913	}
5914
5915	return (NULL);
5916}
5917
5918void
5919pmcs_fis_dump(pmcs_hw_t *pwp, fis_t fis)
5920{
5921	switch (fis[0] & 0xff) {
5922	case FIS_REG_H2DEV:
5923		pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
5924		    "FIS REGISTER HOST TO DEVICE: "
5925		    "OP=0x%02x Feature=0x%04x Count=0x%04x Device=0x%02x "
5926		    "LBA=%llu", BYTE2(fis[0]), BYTE3(fis[2]) << 8 |
5927		    BYTE3(fis[0]), WORD0(fis[3]), BYTE3(fis[1]),
5928		    (unsigned long long)
5929		    (((uint64_t)fis[2] & 0x00ffffff) << 24 |
5930		    ((uint64_t)fis[1] & 0x00ffffff)));
5931		break;
5932	case FIS_REG_D2H:
5933		pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
5934		    "FIS REGISTER DEVICE TO HOST: Status=0x%02x "
5935		    "Error=0x%02x Dev=0x%02x Count=0x%04x LBA=%llu",
5936		    BYTE2(fis[0]), BYTE3(fis[0]), BYTE3(fis[1]), WORD0(fis[3]),
5937		    (unsigned long long)(((uint64_t)fis[2] & 0x00ffffff) << 24 |
5938		    ((uint64_t)fis[1] & 0x00ffffff)));
5939		break;
5940	default:
5941		pmcs_prt(pwp, PMCS_PRT_INFO, NULL, NULL,
5942		    "FIS: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x",
5943		    fis[0], fis[1], fis[2], fis[3], fis[4]);
5944		break;
5945	}
5946}
5947
5948void
5949pmcs_print_entry(pmcs_hw_t *pwp, int level, char *msg, void *arg)
5950{
5951	uint32_t *mb = arg;
5952	size_t i;
5953
5954	pmcs_prt(pwp, level, NULL, NULL, msg);
5955	for (i = 0; i < (PMCS_QENTRY_SIZE / sizeof (uint32_t)); i += 4) {
5956		pmcs_prt(pwp, level, NULL, NULL,
5957		    "Offset %2lu: 0x%08x 0x%08x 0x%08x 0x%08x",
5958		    i * sizeof (uint32_t), LE_32(mb[i]),
5959		    LE_32(mb[i+1]), LE_32(mb[i+2]), LE_32(mb[i+3]));
5960	}
5961}
5962
5963/*
5964 * If phyp == NULL we're being called from the worker thread, in which
5965 * case we need to check all the PHYs.  In this case, the softstate lock
5966 * will be held.
5967 * If phyp is non-NULL, just issue the spinup release for the specified PHY
5968 * (which will already be locked).
5969 */
5970void
5971pmcs_spinup_release(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
5972{
5973	uint32_t *msg;
5974	struct pmcwork *pwrk;
5975	pmcs_phy_t *tphyp;
5976
5977	if (phyp != NULL) {
5978		ASSERT(mutex_owned(&phyp->phy_lock));
5979		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, phyp, NULL,
5980		    "%s: Issuing spinup release only for PHY %s", __func__,
5981		    phyp->path);
5982		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5983		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5984		if (msg == NULL || (pwrk =
5985		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
5986			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5987			SCHEDULE_WORK(pwp, PMCS_WORK_SPINUP_RELEASE);
5988			return;
5989		}
5990
5991		phyp->spinup_hold = 0;
5992		bzero(msg, PMCS_QENTRY_SIZE);
5993		pwrk->htag |= PMCS_TAG_NONIO_CMD;
5994		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5995		    PMCIN_LOCAL_PHY_CONTROL));
5996		msg[1] = LE_32(pwrk->htag);
5997		msg[2] = LE_32((0x10 << 8) | phyp->phynum);
5998
5999		pwrk->dtype = phyp->dtype;
6000		pwrk->state = PMCS_WORK_STATE_ONCHIP;
6001		pwrk->xp = phyp->target;
6002		mutex_exit(&pwrk->lock);
6003		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6004		return;
6005	}
6006
6007	ASSERT(mutex_owned(&pwp->lock));
6008
6009	tphyp = pwp->root_phys;
6010	while (tphyp) {
6011		pmcs_lock_phy(tphyp);
6012		if (tphyp->spinup_hold == 0) {
6013			pmcs_unlock_phy(tphyp);
6014			tphyp = tphyp->sibling;
6015			continue;
6016		}
6017
6018		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, tphyp, NULL,
6019		    "%s: Issuing spinup release for PHY %s", __func__,
6020		    tphyp->path);
6021
6022		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6023		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6024		if (msg == NULL || (pwrk =
6025		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
6026			pmcs_unlock_phy(tphyp);
6027			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6028			SCHEDULE_WORK(pwp, PMCS_WORK_SPINUP_RELEASE);
6029			break;
6030		}
6031
6032		tphyp->spinup_hold = 0;
6033		bzero(msg, PMCS_QENTRY_SIZE);
6034		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
6035		    PMCIN_LOCAL_PHY_CONTROL));
6036		msg[1] = LE_32(pwrk->htag);
6037		msg[2] = LE_32((0x10 << 8) | tphyp->phynum);
6038
6039		pwrk->dtype = tphyp->dtype;
6040		pwrk->state = PMCS_WORK_STATE_ONCHIP;
6041		pwrk->xp = tphyp->target;
6042		mutex_exit(&a