1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*
27 * PX Fault Management Architecture
28 */
29#include <sys/types.h>
30#include <sys/sunndi.h>
31#include <sys/sunddi.h>
32#include <sys/fm/protocol.h>
33#include <sys/fm/util.h>
34#include <sys/fm/io/pci.h>
35#include <sys/membar.h>
36#include "px_obj.h"
37
38extern uint_t px_ranges_phi_mask;
39
40#define	PX_PCIE_PANIC_BITS \
41	(PCIE_AER_UCE_DLP | PCIE_AER_UCE_FCP | PCIE_AER_UCE_TO | \
42	PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP | PCIE_AER_UCE_ECRC)
43#define	PX_PCIE_NO_PANIC_BITS \
44	(PCIE_AER_UCE_TRAINING | PCIE_AER_UCE_SD | PCIE_AER_UCE_CA | \
45	PCIE_AER_UCE_UC | PCIE_AER_UCE_UR)
46
47/*
48 * Global panicing state variabled used to control if further error handling
49 * should occur.  If the system is already panic'ing or if PX itself has
50 * recommended panic'ing the system, no further error handling should occur to
51 * prevent the system from hanging.
52 */
53boolean_t px_panicing = B_FALSE;
54
55static int px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr,
56    px_err_pcie_t *regs);
57
58#if defined(DEBUG)
59static void px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs);
60#else	/* DEBUG */
61#define	px_pcie_log 0 &&
62#endif	/* DEBUG */
63
64/*
65 * Initialize px FMA support
66 */
67int
68px_fm_attach(px_t *px_p)
69{
70	int		i;
71	dev_info_t	*dip = px_p->px_dip;
72	pcie_bus_t	*bus_p;
73
74	px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE |
75	    DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE;
76
77	/*
78	 * check parents' capability
79	 */
80	ddi_fm_init(dip, &px_p->px_fm_cap, &px_p->px_fm_ibc);
81
82	/*
83	 * parents need to be ereport and error handling capable
84	 */
85	ASSERT(px_p->px_fm_cap &&
86	    (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE));
87
88	/*
89	 * Initialize lock to synchronize fabric error handling
90	 */
91	mutex_init(&px_p->px_fm_mutex, NULL, MUTEX_DRIVER,
92	    (void *)px_p->px_fm_ibc);
93
94	px_p->px_pfd_idx = 0;
95	for (i = 0; i < 5; i++)
96		pcie_rc_init_pfd(dip, &px_p->px_pfd_arr[i]);
97	PCIE_DIP2PFD(dip) = px_p->px_pfd_arr;
98
99	bus_p = PCIE_DIP2BUS(dip);
100	bus_p->bus_rp_bdf = px_p->px_bdf;
101	bus_p->bus_rp_dip = dip;
102
103	return (DDI_SUCCESS);
104}
105
106/*
107 * Deregister FMA
108 */
109void
110px_fm_detach(px_t *px_p)
111{
112	int i;
113
114	mutex_destroy(&px_p->px_fm_mutex);
115	ddi_fm_fini(px_p->px_dip);
116	for (i = 0; i < 5; i++)
117		pcie_rc_fini_pfd(&px_p->px_pfd_arr[i]);
118}
119
120/*
121 * register error callback in parent
122 */
123void
124px_fm_cb_enable(px_t *px_p)
125{
126	ddi_fm_handler_register(px_p->px_dip, px_fm_callback, px_p);
127}
128
129void
130px_fm_cb_disable(px_t *px_p)
131{
132	ddi_fm_handler_unregister(px_p->px_dip);
133}
134
135/*
136 * Function used to setup access functions depending on level of desired
137 * protection.
138 */
139void
140px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip, pci_regspec_t *rp)
141{
142	uchar_t fflag;
143	ndi_err_t *errp;
144	ddi_acc_hdl_t *hp;
145	ddi_acc_impl_t *ap;
146
147	hp = mp->map_handlep;
148	ap = (ddi_acc_impl_t *)hp->ah_platform_private;
149	fflag = ap->ahi_common.ah_acc.devacc_attr_access;
150
151	if (mp->map_op == DDI_MO_MAP_LOCKED) {
152		ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL);
153		switch (fflag) {
154		case DDI_FLAGERR_ACC:
155			ap->ahi_get8 = i_ddi_prot_get8;
156			ap->ahi_get16 = i_ddi_prot_get16;
157			ap->ahi_get32 = i_ddi_prot_get32;
158			ap->ahi_get64 = i_ddi_prot_get64;
159			ap->ahi_put8 = i_ddi_prot_put8;
160			ap->ahi_put16 = i_ddi_prot_put16;
161			ap->ahi_put32 = i_ddi_prot_put32;
162			ap->ahi_put64 = i_ddi_prot_put64;
163			ap->ahi_rep_get8 = i_ddi_prot_rep_get8;
164			ap->ahi_rep_get16 = i_ddi_prot_rep_get16;
165			ap->ahi_rep_get32 = i_ddi_prot_rep_get32;
166			ap->ahi_rep_get64 = i_ddi_prot_rep_get64;
167			ap->ahi_rep_put8 = i_ddi_prot_rep_put8;
168			ap->ahi_rep_put16 = i_ddi_prot_rep_put16;
169			ap->ahi_rep_put32 = i_ddi_prot_rep_put32;
170			ap->ahi_rep_put64 = i_ddi_prot_rep_put64;
171			impl_acc_err_init(hp);
172			errp = ((ddi_acc_impl_t *)hp)->ahi_err;
173			if ((rp->pci_phys_hi & PCI_REG_ADDR_M) ==
174			    PCI_ADDR_CONFIG)
175				errp->err_cf = px_err_cfg_hdl_check;
176			else
177				errp->err_cf = px_err_pio_hdl_check;
178			break;
179		case DDI_CAUTIOUS_ACC :
180			ap->ahi_get8 = i_ddi_caut_get8;
181			ap->ahi_get16 = i_ddi_caut_get16;
182			ap->ahi_get32 = i_ddi_caut_get32;
183			ap->ahi_get64 = i_ddi_caut_get64;
184			ap->ahi_put8 = i_ddi_caut_put8;
185			ap->ahi_put16 = i_ddi_caut_put16;
186			ap->ahi_put32 = i_ddi_caut_put32;
187			ap->ahi_put64 = i_ddi_caut_put64;
188			ap->ahi_rep_get8 = i_ddi_caut_rep_get8;
189			ap->ahi_rep_get16 = i_ddi_caut_rep_get16;
190			ap->ahi_rep_get32 = i_ddi_caut_rep_get32;
191			ap->ahi_rep_get64 = i_ddi_caut_rep_get64;
192			ap->ahi_rep_put8 = i_ddi_caut_rep_put8;
193			ap->ahi_rep_put16 = i_ddi_caut_rep_put16;
194			ap->ahi_rep_put32 = i_ddi_caut_rep_put32;
195			ap->ahi_rep_put64 = i_ddi_caut_rep_put64;
196			impl_acc_err_init(hp);
197			errp = ((ddi_acc_impl_t *)hp)->ahi_err;
198			if ((rp->pci_phys_hi & PCI_REG_ADDR_M) ==
199			    PCI_ADDR_CONFIG)
200				errp->err_cf = px_err_cfg_hdl_check;
201			else
202				errp->err_cf = px_err_pio_hdl_check;
203			break;
204		default:
205			/* Illegal state, remove the handle from cache */
206			ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp);
207			break;
208		}
209	} else if (mp->map_op == DDI_MO_UNMAP) {
210		ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp);
211	}
212}
213
214/*
215 * Function used to initialize FMA for our children nodes. Called
216 * through pci busops when child node calls ddi_fm_init.
217 */
218/*ARGSUSED*/
219int
220px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap,
221    ddi_iblock_cookie_t *ibc_p)
222{
223	px_t *px_p = DIP_TO_STATE(dip);
224
225	ASSERT(ibc_p != NULL);
226	*ibc_p = px_p->px_fm_ibc;
227
228	return (px_p->px_fm_cap);
229}
230
231/*
232 * lock access for exclusive PCIe access
233 */
234void
235px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle)
236{
237	px_pec_t	*pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p;
238
239	/*
240	 * Exclusive access has been used for cautious put/get,
241	 * Both utilize i_ddi_ontrap which, on sparcv9, implements
242	 * similar protection as what on_trap() does, and which calls
243	 * membar  #Sync to flush out all cpu deferred errors
244	 * prior to get/put operation, so here we're not calling
245	 * membar  #Sync - a difference from what's in pci_bus_enter().
246	 */
247	mutex_enter(&pec_p->pec_pokefault_mutex);
248	pec_p->pec_acc_hdl = handle;
249}
250
251/*
252 * unlock access for exclusive PCIe access
253 */
254/* ARGSUSED */
255void
256px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle)
257{
258	px_t		*px_p = DIP_TO_STATE(dip);
259	px_pec_t	*pec_p = px_p->px_pec_p;
260
261	pec_p->pec_acc_hdl = NULL;
262	mutex_exit(&pec_p->pec_pokefault_mutex);
263}
264
265static uint64_t
266px_in_addr_range(dev_info_t *dip, pci_ranges_t *ranges_p, uint64_t addr)
267{
268	uint64_t	addr_low, addr_high;
269
270	addr_low = (uint64_t)(ranges_p->parent_high & px_ranges_phi_mask) << 32;
271	addr_low |= (uint64_t)ranges_p->parent_low;
272	addr_high = addr_low + ((uint64_t)ranges_p->size_high << 32) +
273	    (uint64_t)ranges_p->size_low;
274
275	DBG(DBG_ERR_INTR, dip, "Addr: 0x%llx high: 0x%llx low: 0x%llx\n",
276	    addr, addr_high, addr_low);
277
278	if ((addr < addr_high) && (addr >= addr_low))
279		return (addr_low);
280
281	return (0);
282}
283
284/*
285 * PCI error callback which is registered with our parent to call
286 * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors
287 * and PCI BERR/TO/UE on IO Loads.
288 */
289/*ARGSUSED*/
290int
291px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data)
292{
293	dev_info_t	*pdip = ddi_get_parent(dip);
294	px_t		*px_p = (px_t *)impl_data;
295	int		i, acc_type = 0;
296	int		lookup, rc_err, fab_err;
297	uint64_t	addr, base_addr;
298	uint64_t	fault_addr = (uint64_t)derr->fme_bus_specific;
299	pcie_req_id_t	bdf = PCIE_INVALID_BDF;
300	pci_ranges_t	*ranges_p;
301	int		range_len;
302	pf_data_t	*pfd_p;
303
304	/*
305	 * If the current thread already owns the px_fm_mutex, then we
306	 * have encountered an error while processing a previous
307	 * error.  Attempting to take the mutex again will cause the
308	 * system to deadlock.
309	 */
310	if (px_p->px_fm_mutex_owner == curthread)
311		return (DDI_FM_FATAL);
312
313	i_ddi_fm_handler_exit(pdip);
314
315	if (px_fm_enter(px_p) != DDI_SUCCESS) {
316		i_ddi_fm_handler_enter(pdip);
317		return (DDI_FM_FATAL);
318	}
319
320	/*
321	 * Make sure this failed load came from this PCIe port.	 Check by
322	 * matching the upper 32 bits of the address with the ranges property.
323	 */
324	range_len = px_p->px_ranges_length / sizeof (pci_ranges_t);
325	i = 0;
326	for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) {
327		base_addr = px_in_addr_range(dip, ranges_p, fault_addr);
328		if (base_addr) {
329			switch (ranges_p->child_high & PCI_ADDR_MASK) {
330			case PCI_ADDR_CONFIG:
331				acc_type = PF_ADDR_CFG;
332				addr = NULL;
333				bdf = (pcie_req_id_t)((fault_addr >> 12) &
334				    0xFFFF);
335				break;
336			case PCI_ADDR_IO:
337			case PCI_ADDR_MEM64:
338			case PCI_ADDR_MEM32:
339				acc_type = PF_ADDR_PIO;
340				addr = fault_addr - base_addr;
341				bdf = PCIE_INVALID_BDF;
342				break;
343			}
344			break;
345		}
346	}
347
348	/* This address doesn't belong to this leaf, just return with OK */
349	if (!acc_type) {
350		px_fm_exit(px_p);
351		i_ddi_fm_handler_enter(pdip);
352		return (DDI_FM_OK);
353	}
354
355	rc_err = px_err_cmn_intr(px_p, derr, PX_TRAP_CALL, PX_FM_BLOCK_ALL);
356	lookup = pf_hdl_lookup(dip, derr->fme_ena, acc_type, (uint64_t)addr,
357	    bdf);
358
359	pfd_p = px_rp_en_q(px_p, bdf, addr,
360	    (PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB));
361	PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_DATA;
362
363	/* Update affected info, either addr or bdf is not NULL */
364	if (addr) {
365		PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_ADDR;
366	} else if (PCIE_CHECK_VALID_BDF(bdf)) {
367		PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_BDF;
368		PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = bdf;
369	}
370
371	fab_err = px_scan_fabric(px_p, dip, derr);
372
373	px_fm_exit(px_p);
374	i_ddi_fm_handler_enter(pdip);
375
376	if (!px_die)
377		return (DDI_FM_OK);
378
379	if ((rc_err & (PX_PANIC | PX_PROTECTED)) ||
380	    (fab_err & PF_ERR_FATAL_FLAGS) ||
381	    (lookup == PF_HDL_NOTFOUND))
382		return (DDI_FM_FATAL);
383	else if ((rc_err == PX_NO_ERROR) && (fab_err == PF_ERR_NO_ERROR))
384		return (DDI_FM_OK);
385
386	return (DDI_FM_NONFATAL);
387}
388
389/*
390 * px_err_fabric_intr:
391 * Interrupt handler for PCIE fabric block.
392 * o lock
393 * o create derr
394 * o px_err_cmn_intr(leaf, with jbc)
395 * o send ereport(fire fmri, derr, payload = BDF)
396 * o dispatch (leaf)
397 * o unlock
398 * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
399 */
400/* ARGSUSED */
401uint_t
402px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, pcie_req_id_t rid)
403{
404	dev_info_t	*rpdip = px_p->px_dip;
405	int		rc_err, fab_err;
406	ddi_fm_error_t	derr;
407	uint32_t	rp_status;
408	uint16_t	ce_source, ue_source;
409	pf_data_t	*pfd_p;
410
411	if (px_fm_enter(px_p) != DDI_SUCCESS)
412		goto done;
413
414	/* Create the derr */
415	bzero(&derr, sizeof (ddi_fm_error_t));
416	derr.fme_version = DDI_FME_VERSION;
417	derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
418	derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
419
420	px_err_safeacc_check(px_p, &derr);
421
422	if (msg_code == PCIE_MSG_CODE_ERR_COR) {
423		rp_status = PCIE_AER_RE_STS_CE_RCVD;
424		ce_source = rid;
425		ue_source = 0;
426	} else {
427		rp_status = PCIE_AER_RE_STS_FE_NFE_RCVD;
428		ce_source = 0;
429		ue_source = rid;
430		if (msg_code == PCIE_MSG_CODE_ERR_NONFATAL)
431			rp_status |= PCIE_AER_RE_STS_NFE_MSGS_RCVD;
432		else {
433			rp_status |= PCIE_AER_RE_STS_FE_MSGS_RCVD;
434			rp_status |= PCIE_AER_RE_STS_FIRST_UC_FATAL;
435		}
436	}
437
438	if (derr.fme_flag == DDI_FM_ERR_UNEXPECTED) {
439		ddi_fm_ereport_post(rpdip, PCI_ERROR_SUBCLASS "." PCIEX_FABRIC,
440		    derr.fme_ena,
441		    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
442		    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE,
443		    "pcie_adv_rp_status", DATA_TYPE_UINT32, rp_status,
444		    "pcie_adv_rp_command", DATA_TYPE_UINT32, 0,
445		    "pcie_adv_rp_ce_src_id", DATA_TYPE_UINT16, ce_source,
446		    "pcie_adv_rp_ue_src_id", DATA_TYPE_UINT16, ue_source,
447		    NULL);
448	}
449
450	/* Ensure that the rid of the fabric message will get scanned. */
451	pfd_p = px_rp_en_q(px_p, rid, NULL, NULL);
452	PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_FABRIC;
453
454	rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE);
455
456	/* call rootport dispatch */
457	fab_err = px_scan_fabric(px_p, rpdip, &derr);
458
459	px_err_panic(rc_err, PX_RC, fab_err, B_TRUE);
460	px_fm_exit(px_p);
461	px_err_panic(rc_err, PX_RC, fab_err, B_FALSE);
462
463done:
464	return (DDI_INTR_CLAIMED);
465}
466
467/*
468 * px_scan_fabric:
469 *
470 * Check for drain state and if there is anything to scan.
471 *
472 * Note on pfd: Different interrupts will populate the pfd's differently.  The
473 * px driver can have a total of 5 different error sources, so it has a queue of
474 * 5 pfds.  Each valid PDF is linked together and passed to pf_scan_fabric.
475 *
476 * Each error handling will populate the following info in the pfd
477 *
478 *			Root Fault	 Intr Src	 Affected BDF
479 *			----------------+---------------+------------
480 * Callback/CPU Trap	Address/BDF	|DATA		|Lookup Addr
481 * Mondo 62/63 (sun4u)	decode error	|N/A		|N/A
482 * EPKT (sun4v)		decode epkt	|INTERNAL	|decode epkt
483 * Fabric Message	fabric payload	|FABRIC		|NULL
484 * Peek/Poke		Address/BDF	|NULL		|NULL
485 *			----------------+---------------+------------
486 */
487int
488px_scan_fabric(px_t *px_p, dev_info_t *rpdip, ddi_fm_error_t *derr) {
489	int fab_err = 0;
490
491	ASSERT(MUTEX_HELD(&px_p->px_fm_mutex));
492
493	if (!px_lib_is_in_drain_state(px_p) && px_p->px_pfd_idx) {
494		fab_err = pf_scan_fabric(rpdip, derr, px_p->px_pfd_arr);
495	}
496
497	return (fab_err);
498}
499
500/*
501 * px_err_safeacc_check:
502 * Check to see if a peek/poke and cautious access is currently being
503 * done on a particular leaf.
504 *
505 * Safe access reads induced fire errors will be handled by cpu trap handler
506 * which will call px_fm_callback() which calls this function. In that
507 * case, the derr fields will be set by trap handler with the correct values.
508 *
509 * Safe access writes induced errors will be handled by px interrupt
510 * handlers, this function will fill in the derr fields.
511 *
512 * If a cpu trap does occur, it will quiesce all other interrupts allowing
513 * the cpu trap error handling to finish before Fire receives an interrupt.
514 *
515 * If fire does indeed have an error when a cpu trap occurs as a result of
516 * a safe access, a trap followed by a Mondo/Fabric interrupt will occur.
517 * In which case derr will be initialized as "UNEXPECTED" by the interrupt
518 * handler and this function will need to find if this error occured in the
519 * middle of a safe access operation.
520 *
521 * @param px_p		leaf in which to check access
522 * @param derr		fm err data structure to be updated
523 */
524void
525px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr)
526{
527	px_pec_t 	*pec_p = px_p->px_pec_p;
528	int		acctype = pec_p->pec_safeacc_type;
529
530	ASSERT(MUTEX_HELD(&px_p->px_fm_mutex));
531
532	if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) {
533		return;
534	}
535
536	/* safe access checking */
537	switch (acctype) {
538	case DDI_FM_ERR_EXPECTED:
539		/*
540		 * cautious access protection, protected from all err.
541		 */
542		ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr,
543		    DDI_FME_VERSION);
544		derr->fme_flag = acctype;
545		derr->fme_acc_handle = pec_p->pec_acc_hdl;
546		break;
547	case DDI_FM_ERR_POKE:
548		/*
549		 * ddi_poke protection, check nexus and children for
550		 * expected errors.
551		 */
552		membar_sync();
553		derr->fme_flag = acctype;
554		break;
555	case DDI_FM_ERR_PEEK:
556		derr->fme_flag = acctype;
557		break;
558	}
559}
560
561/*
562 * Suggest panic if any EQ (except CE q) has overflown.
563 */
564int
565px_err_check_eq(dev_info_t *dip)
566{
567	px_t			*px_p = DIP_TO_STATE(dip);
568	px_msiq_state_t 	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
569	px_pec_t		*pec_p = px_p->px_pec_p;
570	msiqid_t		eq_no = msiq_state_p->msiq_1st_msiq_id;
571	pci_msiq_state_t	msiq_state;
572	int			i;
573
574	for (i = 0; i < msiq_state_p->msiq_cnt; i++) {
575		if (i + eq_no == pec_p->pec_corr_msg_msiq_id) /* skip CE q */
576			continue;
577		if ((px_lib_msiq_getstate(dip, i + eq_no, &msiq_state) !=
578		    DDI_SUCCESS) || msiq_state == PCI_MSIQ_STATE_ERROR)
579			return (PX_PANIC);
580	}
581	return (PX_NO_PANIC);
582}
583
584/* ARGSUSED */
585int
586px_err_check_pcie(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs,
587    pf_intr_type_t intr_type)
588{
589	px_t		*px_p = DIP_TO_STATE(dip);
590	pf_data_t	*pfd_p = px_get_pfd(px_p);
591	int		i;
592	pf_pcie_adv_err_regs_t *adv_reg = PCIE_ADV_REG(pfd_p);
593
594	PCIE_ROOT_EH_SRC(pfd_p)->intr_type = intr_type;
595
596	/*
597	 * set RC s_status in PCI term to coordinate with downstream fabric
598	 * errors ananlysis.
599	 */
600	if (regs->primary_ue & PCIE_AER_UCE_UR)
601		PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = PCI_STAT_R_MAST_AB;
602	if (regs->primary_ue & PCIE_AER_UCE_CA)
603		PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = PCI_STAT_R_TARG_AB;
604	if (regs->primary_ue & (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC))
605		PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = PCI_STAT_PERROR;
606
607	if (!regs->primary_ue)
608		goto done;
609
610	adv_reg->pcie_ce_status = regs->ce_reg;
611	adv_reg->pcie_ue_status = regs->ue_reg | regs->primary_ue;
612	PCIE_ADV_HDR(pfd_p, 0) = regs->rx_hdr1;
613	PCIE_ADV_HDR(pfd_p, 1) = regs->rx_hdr2;
614	PCIE_ADV_HDR(pfd_p, 2) = regs->rx_hdr3;
615	PCIE_ADV_HDR(pfd_p, 3) = regs->rx_hdr4;
616	for (i = regs->primary_ue; i != 1; i = i >> 1)
617		adv_reg->pcie_adv_ctl++;
618
619	if (regs->primary_ue & (PCIE_AER_UCE_UR | PCIE_AER_UCE_CA)) {
620		if (pf_tlp_decode(PCIE_DIP2BUS(dip), adv_reg) == DDI_SUCCESS)
621			PCIE_ROOT_FAULT(pfd_p)->scan_bdf =
622			    adv_reg->pcie_ue_tgt_bdf;
623	} else if (regs->primary_ue & PCIE_AER_UCE_PTLP) {
624		if (pf_tlp_decode(PCIE_DIP2BUS(dip), adv_reg) == DDI_SUCCESS) {
625			PCIE_ROOT_FAULT(pfd_p)->scan_bdf =
626			    adv_reg->pcie_ue_tgt_bdf;
627			if (adv_reg->pcie_ue_tgt_trans ==
628			    PF_ADDR_PIO)
629				PCIE_ROOT_FAULT(pfd_p)->scan_addr =
630				    adv_reg->pcie_ue_tgt_addr;
631		}
632
633		/*
634		 * Normally for Poisoned Completion TLPs we can look at the
635		 * transmit log header for the original request and the original
636		 * address, however this doesn't seem to be working.  HW BUG.
637		 */
638	}
639
640done:
641	px_pcie_log(dip, regs);
642
643	/* Return No Error here and let the pcie misc module analyse it */
644	return (PX_NO_ERROR);
645}
646
647#if defined(DEBUG)
648static void
649px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs)
650{
651	DBG(DBG_ERR_INTR, dip,
652	    "A PCIe RC error has occured\n"
653	    "\tCE: 0x%x UE: 0x%x Primary UE: 0x%x\n"
654	    "\tTX Hdr: 0x%x 0x%x 0x%x 0x%x\n\tRX Hdr: 0x%x 0x%x 0x%x 0x%x\n",
655	    regs->ce_reg, regs->ue_reg, regs->primary_ue,
656	    regs->tx_hdr1, regs->tx_hdr2, regs->tx_hdr3, regs->tx_hdr4,
657	    regs->rx_hdr1, regs->rx_hdr2, regs->rx_hdr3, regs->rx_hdr4);
658}
659#endif
660
661/*
662 * look through poisoned TLP cases and suggest panic/no panic depend on
663 * handle lookup.
664 */
665static int
666px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs)
667{
668	pf_pcie_adv_err_regs_t adv_reg;
669	pcie_req_id_t	bdf;
670	uint64_t	addr;
671	uint32_t	trans_type;
672	int		tlp_sts, tlp_cmd;
673	int		lookup = PF_HDL_NOTFOUND;
674
675	if (regs->primary_ue != PCIE_AER_UCE_PTLP)
676		return (PX_PANIC);
677
678	if (!regs->rx_hdr1)
679		goto done;
680
681	adv_reg.pcie_ue_hdr[0] = regs->rx_hdr1;
682	adv_reg.pcie_ue_hdr[1] = regs->rx_hdr2;
683	adv_reg.pcie_ue_hdr[2] = regs->rx_hdr3;
684	adv_reg.pcie_ue_hdr[3] = regs->rx_hdr4;
685
686	tlp_sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg);
687	tlp_cmd = ((pcie_tlp_hdr_t *)(adv_reg.pcie_ue_hdr))->type;
688
689	if (tlp_sts == DDI_FAILURE)
690		goto done;
691
692	bdf = adv_reg.pcie_ue_tgt_bdf;
693	addr = adv_reg.pcie_ue_tgt_addr;
694	trans_type = adv_reg.pcie_ue_tgt_trans;
695
696	switch (tlp_cmd) {
697	case PCIE_TLP_TYPE_CPL:
698	case PCIE_TLP_TYPE_CPLLK:
699		/*
700		 * Usually a PTLP is a CPL with data.  Grab the completer BDF
701		 * from the RX TLP, and the original address from the TX TLP.
702		 */
703		if (regs->tx_hdr1) {
704			adv_reg.pcie_ue_hdr[0] = regs->tx_hdr1;
705			adv_reg.pcie_ue_hdr[1] = regs->tx_hdr2;
706			adv_reg.pcie_ue_hdr[2] = regs->tx_hdr3;
707			adv_reg.pcie_ue_hdr[3] = regs->tx_hdr4;
708
709			lookup = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg);
710			if (lookup != DDI_SUCCESS)
711				break;
712			addr = adv_reg.pcie_ue_tgt_addr;
713			trans_type = adv_reg.pcie_ue_tgt_trans;
714		} /* FALLTHRU */
715	case PCIE_TLP_TYPE_IO:
716	case PCIE_TLP_TYPE_MEM:
717	case PCIE_TLP_TYPE_MEMLK:
718		lookup = pf_hdl_lookup(dip, derr->fme_ena, trans_type, addr,
719		    bdf);
720		break;
721	default:
722		lookup = PF_HDL_NOTFOUND;
723	}
724done:
725	return (lookup == PF_HDL_FOUND ? PX_NO_PANIC : PX_PANIC);
726}
727
728/*
729 * px_get_pdf automatically allocates a RC pf_data_t and returns a pointer to
730 * it.  This function should be used when an error requires a fabric scan.
731 */
732pf_data_t *
733px_get_pfd(px_t *px_p) {
734	int		idx = px_p->px_pfd_idx++;
735	pf_data_t	*pfd_p = &px_p->px_pfd_arr[idx];
736
737	/* Clear Old Data */
738	PCIE_ROOT_FAULT(pfd_p)->scan_bdf = PCIE_INVALID_BDF;
739	PCIE_ROOT_FAULT(pfd_p)->scan_addr = 0;
740	PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE;
741	PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL;
742	PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = NULL;
743	PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF;
744	PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = 0;
745	PCIE_ADV_REG(pfd_p)->pcie_ce_status = 0;
746	PCIE_ADV_REG(pfd_p)->pcie_ue_status = 0;
747	PCIE_ADV_REG(pfd_p)->pcie_adv_ctl = 0;
748
749	pfd_p->pe_next = NULL;
750
751	if (idx > 0) {
752		px_p->px_pfd_arr[idx - 1].pe_next = pfd_p;
753		pfd_p->pe_prev = &px_p->px_pfd_arr[idx - 1];
754	} else {
755		pfd_p->pe_prev = NULL;
756	}
757
758	pfd_p->pe_severity_flags = 0;
759	pfd_p->pe_severity_mask = 0;
760	pfd_p->pe_orig_severity_flags = 0;
761	pfd_p->pe_valid = B_TRUE;
762
763	return (pfd_p);
764}
765
766/*
767 * This function appends a pf_data structure to the error q which is used later
768 * during PCIe fabric scan.  It signifies:
769 * o errs rcvd in RC, that may have been propagated to/from the fabric
770 * o the fabric scan code should scan the device path of fault bdf/addr
771 *
772 * scan_bdf: The bdf that caused the fault, which may have error bits set.
773 * scan_addr: The PIO addr that caused the fault, such as failed PIO, but not
774 *	       failed DMAs.
775 * s_status: Secondary Status equivalent to why the fault occured.
776 *	     (ie S-TA/MA, R-TA)
777 * Either the scan bdf or addr may be NULL, but not both.
778 */
779pf_data_t *
780px_rp_en_q(px_t *px_p, pcie_req_id_t scan_bdf, uint32_t scan_addr,
781    uint16_t s_status)
782{
783	pf_data_t	*pfd_p;
784
785	if (!PCIE_CHECK_VALID_BDF(scan_bdf) && !scan_addr)
786		return (NULL);
787
788	pfd_p = px_get_pfd(px_p);
789
790	PCIE_ROOT_FAULT(pfd_p)->scan_bdf = scan_bdf;
791	PCIE_ROOT_FAULT(pfd_p)->scan_addr = (uint64_t)scan_addr;
792	PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status;
793
794	return (pfd_p);
795}
796
797
798/*
799 * Find and Mark CFG Handles as failed associated with the given BDF. We should
800 * always know the BDF for CFG accesses, since it is encoded in the address of
801 * the TLP.  Since there can be multiple cfg handles, mark them all as failed.
802 */
803/* ARGSUSED */
804int
805px_err_cfg_hdl_check(dev_info_t *dip, const void *handle, const void *arg1,
806    const void *arg2)
807{
808	int			status = DDI_FM_FATAL;
809	uint32_t		addr = *(uint32_t *)arg1;
810	uint16_t		bdf = *(uint16_t *)arg2;
811	pcie_bus_t		*bus_p;
812
813	DBG(DBG_ERR_INTR, dip, "Check CFG Hdl: dip 0x%p addr 0x%x bdf=0x%x\n",
814	    dip, addr, bdf);
815
816	bus_p = PCIE_DIP2BUS(dip);
817
818	/*
819	 * Because CFG and IO Acc Handlers are on the same cache list and both
820	 * types of hdls gets called for both types of errors.  For this checker
821	 * only mark the device as "Non-Fatal" if the addr == NULL and bdf !=
822	 * NULL.
823	 */
824	status = (!addr && (PCIE_CHECK_VALID_BDF(bdf) &&
825	    (bus_p->bus_bdf == bdf))) ? DDI_FM_NONFATAL : DDI_FM_FATAL;
826
827	return (status);
828}
829
830/*
831 * Find and Mark all ACC Handles associated with a give address and BDF as
832 * failed.  If the BDF != NULL, then check to see if the device has a ACC Handle
833 * associated with ADDR.  If the handle is not found, mark all the handles as
834 * failed.  If the BDF == NULL, mark the handle as failed if it is associated
835 * with ADDR.
836 */
837int
838px_err_pio_hdl_check(dev_info_t *dip, const void *handle, const void *arg1,
839    const void *arg2)
840{
841	dev_info_t		*px_dip;
842	px_t			*px_p;
843	pci_ranges_t		*ranges_p;
844	int			range_len;
845	ddi_acc_handle_t	ap = (ddi_acc_handle_t)handle;
846	ddi_acc_hdl_t		*hp = impl_acc_hdl_get(ap);
847	int			i, status = DDI_FM_FATAL;
848	uint64_t		fault_addr = *(uint64_t *)arg1;
849	uint16_t		bdf = *(uint16_t *)arg2;
850	uint64_t		base_addr, range_addr;
851	uint_t			size;
852
853	/*
854	 * Find the correct px dip.  On system with a real Root Port, it's the
855	 * node above the root port.  On systems without a real Root Port the px
856	 * dip is the bus_rp_dip.
857	 */
858	px_dip = PCIE_DIP2BUS(dip)->bus_rp_dip;
859
860	if (!PCIE_IS_RC(PCIE_DIP2BUS(px_dip)))
861		px_dip = ddi_get_parent(px_dip);
862
863	ASSERT(PCIE_IS_RC(PCIE_DIP2BUS(px_dip)));
864	px_p = INST_TO_STATE(ddi_get_instance(px_dip));
865
866	DBG(DBG_ERR_INTR, dip, "Check PIO Hdl: dip 0x%x addr 0x%x bdf=0x%x\n",
867	    dip, fault_addr, bdf);
868
869	/* Normalize the base addr to the addr and strip off the HB info. */
870	base_addr = (hp->ah_pfn << MMU_PAGESHIFT) + hp->ah_offset;
871	range_len = px_p->px_ranges_length / sizeof (pci_ranges_t);
872	i = 0;
873	for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) {
874		range_addr = px_in_addr_range(dip, ranges_p, base_addr);
875		if (range_addr) {
876			switch (ranges_p->child_high & PCI_ADDR_MASK) {
877			case PCI_ADDR_IO:
878			case PCI_ADDR_MEM64:
879			case PCI_ADDR_MEM32:
880				base_addr = base_addr - range_addr;
881				break;
882			}
883			break;
884		}
885	}
886
887	/*
888	 * Mark the handle as failed if the ADDR is mapped, or if we
889	 * know the BDF and ADDR == 0.
890	 */
891	size = hp->ah_len;
892	if (((fault_addr >= base_addr) && (fault_addr < (base_addr + size))) ||
893	    ((fault_addr == NULL) && (PCIE_CHECK_VALID_BDF(bdf) &&
894	    (bdf == PCIE_DIP2BUS(dip)->bus_bdf))))
895		status = DDI_FM_NONFATAL;
896
897	return (status);
898}
899
900/*
901 * Find and Mark all DNA Handles associated with a give address and BDF as
902 * failed.  If the BDF != NULL, then check to see if the device has a DMA Handle
903 * associated with ADDR.  If the handle is not found, mark all the handles as
904 * failed.  If the BDF == NULL, mark the handle as failed if it is associated
905 * with ADDR.
906 */
907int
908px_err_dma_hdl_check(dev_info_t *dip, const void *handle, const void *arg1,
909    const void *arg2)
910{
911	ddi_dma_impl_t		*pcie_dp;
912	int			status = DDI_FM_FATAL;
913	uint32_t		addr = *(uint32_t *)arg1;
914	uint16_t		bdf = *(uint16_t *)arg2;
915	uint32_t		base_addr;
916	uint_t			size;
917
918	DBG(DBG_ERR_INTR, dip, "Check PIO Hdl: dip 0x%x addr 0x%x bdf=0x%x\n",
919	    dip, addr, bdf);
920
921	pcie_dp = (ddi_dma_impl_t *)handle;
922	base_addr = (uint32_t)pcie_dp->dmai_mapping;
923	size = pcie_dp->dmai_size;
924
925	/*
926	 * Mark the handle as failed if the ADDR is mapped, or if we
927	 * know the BDF and ADDR == 0.
928	 */
929	if (((addr >= base_addr) && (addr < (base_addr + size))) ||
930	    ((addr == NULL) && PCIE_CHECK_VALID_BDF(bdf)))
931		status = DDI_FM_NONFATAL;
932
933	return (status);
934}
935
936int
937px_fm_enter(px_t *px_p) {
938	if (px_panicing || (px_p->px_fm_mutex_owner == curthread))
939		return (DDI_FAILURE);
940
941	mutex_enter(&px_p->px_fm_mutex);
942	/*
943	 * In rare cases when trap occurs and in the middle of scanning the
944	 * fabric, a PIO will fail in the scan fabric.  The CPU error handling
945	 * code will correctly panic the system, while a mondo for the failed
946	 * PIO may also show up.  Normally the mondo will try to grab the mutex
947	 * and wait until the callback finishes.  But in this rare case,
948	 * mutex_enter actually suceeds also continues to scan the fabric.
949	 *
950	 * This code below is designed specifically to check for this case.  If
951	 * we successfully grab the px_fm_mutex, the px_fm_mutex_owner better be
952	 * NULL.  If it isn't that means we are in the rare corner case.  Return
953	 * DDI_FAILURE, this should prevent PX from doing anymore error
954	 * handling.
955	 */
956	if (px_p->px_fm_mutex_owner) {
957		return (DDI_FAILURE);
958	}
959
960	px_p->px_fm_mutex_owner = curthread;
961
962	if (px_panicing) {
963		px_fm_exit(px_p);
964		return (DDI_FAILURE);
965	}
966
967	/* Signal the PCIe error handling module error handling is starting */
968	pf_eh_enter(PCIE_DIP2BUS(px_p->px_dip));
969
970	return (DDI_SUCCESS);
971}
972
973static void
974px_guest_panic(px_t *px_p)
975{
976	pf_data_t *root_pfd_p = PCIE_DIP2PFD(px_p->px_dip);
977	pf_data_t *pfd_p;
978	pcie_bus_t *bus_p, *root_bus_p;
979	pcie_req_id_list_t *rl;
980
981	/*
982	 * check if all devices under the root device are unassigned.
983	 * this function should quickly return in non-IOV environment.
984	 */
985	root_bus_p = PCIE_PFD2BUS(root_pfd_p);
986	if (PCIE_BDG_IS_UNASSIGNED(root_bus_p))
987		return;
988
989	for (pfd_p = root_pfd_p; pfd_p; pfd_p = pfd_p->pe_next) {
990		bus_p = PCIE_PFD2BUS(pfd_p);
991
992		/* assume all affected devs were in the error Q */
993		if (!PCIE_BUS2DOM(bus_p)->nfma_panic)
994			continue;
995
996		if (PCIE_IS_BDG(bus_p)) {
997			rl = PCIE_BDF_LIST_GET(bus_p);
998			while (rl) {
999				px_panic_domain(px_p, rl->bdf);
1000				rl = rl->next;
1001			}
1002		} else {
1003			px_panic_domain(px_p, bus_p->bus_bdf);
1004		}
1005		/* clear panic flag */
1006		PCIE_BUS2DOM(bus_p)->nfma_panic = B_FALSE;
1007	}
1008}
1009
1010void
1011px_fm_exit(px_t *px_p) {
1012	px_p->px_fm_mutex_owner = NULL;
1013	if (px_p->px_pfd_idx == 0) {
1014		mutex_exit(&px_p->px_fm_mutex);
1015		return;
1016	}
1017	/* panic the affected domains that are non-fma-capable */
1018	px_guest_panic(px_p);
1019	/* Signal the PCIe error handling module error handling is ending */
1020	pf_eh_exit(PCIE_DIP2BUS(px_p->px_dip));
1021	px_p->px_pfd_idx = 0;
1022	mutex_exit(&px_p->px_fm_mutex);
1023}
1024
1025/*
1026 * Panic if the err tunable is set and that we are not already in the middle
1027 * of panic'ing.
1028 *
1029 * rc_err = Error severity of PX specific errors
1030 * msg = Where the error was detected
1031 * fabric_err = Error severity of PCIe Fabric errors
1032 * isTest = Test if error severity causes panic
1033 */
1034#define	MSZ (sizeof (fm_msg) -strlen(fm_msg) - 1)
1035void
1036px_err_panic(int rc_err, int msg, int fabric_err, boolean_t isTest)
1037{
1038	char fm_msg[96] = "";
1039	int ferr = PX_NO_ERROR;
1040
1041	if (panicstr) {
1042		px_panicing = B_TRUE;
1043		return;
1044	}
1045
1046	if (!(rc_err & px_die))
1047		goto fabric;
1048	if (msg & PX_RC)
1049		(void) strncat(fm_msg, px_panic_rc_msg, MSZ);
1050	if (msg & PX_RP)
1051		(void) strncat(fm_msg, px_panic_rp_msg, MSZ);
1052	if (msg & PX_HB)
1053		(void) strncat(fm_msg, px_panic_hb_msg, MSZ);
1054
1055fabric:
1056	if (fabric_err & PF_ERR_FATAL_FLAGS)
1057		ferr = PX_PANIC;
1058	else if (fabric_err & ~(PF_ERR_FATAL_FLAGS | PF_ERR_NO_ERROR))
1059		ferr = PX_NO_PANIC;
1060
1061	if (ferr & px_die) {
1062		if (strlen(fm_msg)) {
1063			(void) strncat(fm_msg, " and", MSZ);
1064		}
1065		(void) strncat(fm_msg, px_panic_fab_msg, MSZ);
1066	}
1067
1068	if (strlen(fm_msg)) {
1069		px_panicing = B_TRUE;
1070		if (!isTest)
1071			fm_panic("Fatal error has occured in:%s.(0x%x)(0x%x)",
1072			    fm_msg, rc_err, fabric_err);
1073	}
1074}
1075