1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25/*
26 * PX nexus interrupt handling:
27 *	PX device interrupt handler wrapper
28 *	PIL lookup routine
29 *	PX device interrupt related initchild code
30 */
31
32#include <sys/types.h>
33#include <sys/kmem.h>
34#include <sys/async.h>
35#include <sys/spl.h>
36#include <sys/sunddi.h>
37#include <sys/fm/protocol.h>
38#include <sys/fm/util.h>
39#include <sys/machsystm.h>	/* e_ddi_nodeid_to_dip() */
40#include <sys/ddi_impldefs.h>
41#include <sys/sdt.h>
42#include <sys/atomic.h>
43#include "px_obj.h"
44#include <sys/ontrap.h>
45#include <sys/membar.h>
46#include <sys/clock.h>
47
48/*
49 * interrupt jabber:
50 *
51 * When an interrupt line is jabbering, every time the state machine for the
52 * associated ino is idled, a new mondo will be sent and the ino will go into
53 * the pending state again. The mondo will cause a new call to
54 * px_intr_wrapper() which normally idles the ino's state machine which would
55 * precipitate another trip round the loop.
56 *
57 * The loop can be broken by preventing the ino's state machine from being
58 * idled when an interrupt line is jabbering. See the comment at the
59 * beginning of px_intr_wrapper() explaining how the 'interrupt jabber
60 * protection' code does this.
61 */
62
63/*LINTLIBRARY*/
64
65/*
66 * If the unclaimed interrupt count has reached the limit set by
67 * pci_unclaimed_intr_max within the time limit, then all interrupts
68 * on this ino is blocked by not idling the interrupt state machine.
69 */
70static int
71px_spurintr(px_ino_pil_t *ipil_p)
72{
73	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
74	px_ih_t		*ih_p;
75	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
76	char		*err_fmt_str;
77	boolean_t	blocked = B_FALSE;
78	int		i;
79
80	if (ino_p->ino_unclaimed_intrs > px_unclaimed_intr_max)
81		return (DDI_INTR_CLAIMED);
82
83	if (!ino_p->ino_unclaimed_intrs)
84		ino_p->ino_spurintr_begin = ddi_get_lbolt();
85
86	ino_p->ino_unclaimed_intrs++;
87
88	if (ino_p->ino_unclaimed_intrs <= px_unclaimed_intr_max)
89		goto clear;
90
91	if (drv_hztousec(ddi_get_lbolt() - ino_p->ino_spurintr_begin)
92	    > px_spurintr_duration) {
93		ino_p->ino_unclaimed_intrs = 0;
94		goto clear;
95	}
96	err_fmt_str = "%s%d: ino 0x%x blocked";
97	blocked = B_TRUE;
98	goto warn;
99clear:
100	err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
101warn:
102	cmn_err(CE_WARN, err_fmt_str, NAMEINST(px_p->px_dip), ino_p->ino_ino);
103	for (ipil_p = ino_p->ino_ipil_p; ipil_p;
104	    ipil_p = ipil_p->ipil_next_p) {
105		for (i = 0, ih_p = ipil_p->ipil_ih_start;
106		    i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next)
107			cmn_err(CE_CONT, "!%s-%d#%x ", NAMEINST(ih_p->ih_dip),
108			    ih_p->ih_inum);
109	}
110	cmn_err(CE_CONT, "!\n");
111
112	/* Clear the pending state */
113	if (blocked == B_FALSE) {
114		if (px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
115		    INTR_IDLE_STATE) != DDI_SUCCESS)
116			return (DDI_INTR_UNCLAIMED);
117	}
118
119	return (DDI_INTR_CLAIMED);
120}
121
122extern uint64_t intr_get_time(void);
123
124/*
125 * px_intx_intr (INTx or legacy interrupt handler)
126 *
127 * This routine is used as wrapper around interrupt handlers installed by child
128 * device drivers.  This routine invokes the driver interrupt handlers and
129 * examines the return codes.
130 *
131 * There is a count of unclaimed interrupts kept on a per-ino basis. If at
132 * least one handler claims the interrupt then the counter is halved and the
133 * interrupt state machine is idled. If no handler claims the interrupt then
134 * the counter is incremented by one and the state machine is idled.
135 * If the count ever reaches the limit value set by pci_unclaimed_intr_max
136 * then the interrupt state machine is not idled thus preventing any further
137 * interrupts on that ino. The state machine will only be idled again if a
138 * handler is subsequently added or removed.
139 *
140 * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
141 * DDI_INTR_UNCLAIMED otherwise.
142 */
143uint_t
144px_intx_intr(caddr_t arg)
145{
146	px_ino_pil_t	*ipil_p = (px_ino_pil_t *)arg;
147	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
148	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
149	px_ih_t		*ih_p = ipil_p->ipil_ih_start;
150	ushort_t	pil = ipil_p->ipil_pil;
151	uint_t		result = 0, r = DDI_INTR_UNCLAIMED;
152	int		i;
153
154	DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
155	    "ino=%x sysino=%llx pil=%x ih_size=%x ih_lst=%x\n",
156	    ino_p->ino_ino, ino_p->ino_sysino, ipil_p->ipil_pil,
157	    ipil_p->ipil_ih_size, ipil_p->ipil_ih_head);
158
159	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next) {
160		dev_info_t *dip = ih_p->ih_dip;
161		uint_t (*handler)() = ih_p->ih_handler;
162		caddr_t arg1 = ih_p->ih_handler_arg1;
163		caddr_t arg2 = ih_p->ih_handler_arg2;
164
165		if (ih_p->ih_intr_state == PX_INTR_STATE_DISABLE) {
166			DBG(DBG_INTX_INTR, px_p->px_dip,
167			    "px_intx_intr: %s%d interrupt %d is disabled\n",
168			    ddi_driver_name(dip), ddi_get_instance(dip),
169			    ino_p->ino_ino);
170
171			continue;
172		}
173
174		DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
175		    "ino=%x handler=%p arg1 =%p arg2 = %p\n",
176		    ino_p->ino_ino, handler, arg1, arg2);
177
178		DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
179		    void *, handler, caddr_t, arg1, caddr_t, arg2);
180
181		r = (*handler)(arg1, arg2);
182
183		/*
184		 * Account for time used by this interrupt. Protect against
185		 * conflicting writes to ih_ticks from ib_intr_dist_all() by
186		 * using atomic ops.
187		 */
188
189		if (pil <= LOCK_LEVEL)
190			atomic_add_64(&ih_p->ih_ticks, intr_get_time());
191
192		DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
193		    void *, handler, caddr_t, arg1, int, r);
194
195		result += r;
196
197		if (px_check_all_handlers)
198			continue;
199		if (result)
200			break;
201	}
202
203	if (result)
204		ino_p->ino_claimed |= (1 << pil);
205
206	/* Interrupt can only be cleared after all pil levels are handled */
207	if (pil != ino_p->ino_lopil)
208		return (DDI_INTR_CLAIMED);
209
210	if (!ino_p->ino_claimed) {
211		if (px_unclaimed_intr_block)
212			return (px_spurintr(ipil_p));
213	}
214
215	ino_p->ino_unclaimed_intrs = 0;
216	ino_p->ino_claimed = 0;
217
218	/* Clear the pending state */
219	if (px_lib_intr_setstate(px_p->px_dip,
220	    ino_p->ino_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
221		return (DDI_INTR_UNCLAIMED);
222
223	return (DDI_INTR_CLAIMED);
224}
225
226/*
227 * px_msiq_intr (MSI/X or PCIe MSG interrupt handler)
228 *
229 * This routine is used as wrapper around interrupt handlers installed by child
230 * device drivers.  This routine invokes the driver interrupt handlers and
231 * examines the return codes.
232 *
233 * There is a count of unclaimed interrupts kept on a per-ino basis. If at
234 * least one handler claims the interrupt then the counter is halved and the
235 * interrupt state machine is idled. If no handler claims the interrupt then
236 * the counter is incremented by one and the state machine is idled.
237 * If the count ever reaches the limit value set by pci_unclaimed_intr_max
238 * then the interrupt state machine is not idled thus preventing any further
239 * interrupts on that ino. The state machine will only be idled again if a
240 * handler is subsequently added or removed.
241 *
242 * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
243 * DDI_INTR_UNCLAIMED otherwise.
244 */
245uint_t
246px_msiq_intr(caddr_t arg)
247{
248	px_ino_pil_t	*ipil_p = (px_ino_pil_t *)arg;
249	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
250	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
251	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
252	px_msiq_t	*msiq_p = ino_p->ino_msiq_p;
253	dev_info_t	*dip = px_p->px_dip;
254	ushort_t	pil = ipil_p->ipil_pil;
255	msiq_rec_t	msiq_rec, *msiq_rec_p = &msiq_rec;
256	msiqhead_t	*curr_head_p;
257	msiqtail_t	curr_tail_index;
258	msgcode_t	msg_code;
259	px_ih_t		*ih_p;
260	uint_t		ret = DDI_INTR_UNCLAIMED;
261	int		i, j;
262
263	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: msiq_id =%x ino=%x pil=%x "
264	    "ih_size=%x ih_lst=%x\n", msiq_p->msiq_id, ino_p->ino_ino,
265	    ipil_p->ipil_pil, ipil_p->ipil_ih_size, ipil_p->ipil_ih_head);
266
267	/*
268	 * The px_msiq_intr() handles multiple interrupt priorities and it
269	 * will set msiq->msiq_rec2process to the number of MSIQ records to
270	 * process while handling the highest priority interrupt. Subsequent
271	 * lower priority interrupts will just process any unprocessed MSIQ
272	 * records or will just return immediately.
273	 */
274	if (msiq_p->msiq_recs2process == 0) {
275		ASSERT(ino_p->ino_ipil_cntr == 0);
276		ino_p->ino_ipil_cntr = ino_p->ino_ipil_size;
277
278		/* Read current MSIQ tail index */
279		px_lib_msiq_gettail(dip, msiq_p->msiq_id, &curr_tail_index);
280		msiq_p->msiq_new_head_index = msiq_p->msiq_curr_head_index;
281
282		if (curr_tail_index < msiq_p->msiq_curr_head_index)
283			curr_tail_index += msiq_state_p->msiq_rec_cnt;
284
285		msiq_p->msiq_recs2process = curr_tail_index -
286		    msiq_p->msiq_curr_head_index;
287	}
288
289	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: curr_head %x new_head %x "
290	    "rec2process %x\n", msiq_p->msiq_curr_head_index,
291	    msiq_p->msiq_new_head_index, msiq_p->msiq_recs2process);
292
293	/* If all MSIQ records are already processed, just return immediately */
294	if ((msiq_p->msiq_new_head_index - msiq_p->msiq_curr_head_index)
295	    == msiq_p->msiq_recs2process)
296		goto intr_done;
297
298	curr_head_p = (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p +
299	    (msiq_p->msiq_curr_head_index * sizeof (msiq_rec_t)));
300
301	/*
302	 * Calculate the number of recs to process by taking the difference
303	 * between the head and tail pointers. For all records we always
304	 * verify that we have a valid record type before we do any processing.
305	 * If triggered, we should always have at least one valid record.
306	 */
307	for (i = 0; i < msiq_p->msiq_recs2process; i++) {
308		msiq_rec_type_t rec_type;
309
310		/* Read next MSIQ record */
311		px_lib_get_msiq_rec(dip, curr_head_p, msiq_rec_p);
312
313		rec_type = msiq_rec_p->msiq_rec_type;
314
315		DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSIQ RECORD, "
316		    "msiq_rec_type 0x%llx msiq_rec_rid 0x%llx\n",
317		    rec_type, msiq_rec_p->msiq_rec_rid);
318
319		if (!rec_type)
320			goto next_rec;
321
322		/* Check MSIQ record type */
323		switch (rec_type) {
324		case MSG_REC:
325			msg_code = msiq_rec_p->msiq_rec_data.msg.msg_code;
326			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: PCIE MSG "
327			    "record, msg type 0x%x\n", msg_code);
328			break;
329		case MSI32_REC:
330		case MSI64_REC:
331			msg_code = msiq_rec_p->msiq_rec_data.msi.msi_data;
332			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSI record, "
333			    "msi 0x%x\n", msg_code);
334			break;
335		default:
336			msg_code = 0;
337			cmn_err(CE_WARN, "%s%d: px_msiq_intr: 0x%x MSIQ "
338			    "record type is not supported",
339			    ddi_driver_name(dip), ddi_get_instance(dip),
340			    rec_type);
341
342			goto next_rec;
343		}
344
345		/*
346		 * Scan through px_ih_t linked list, searching for the
347		 * right px_ih_t, matching MSIQ record data.
348		 */
349		for (j = 0, ih_p = ipil_p->ipil_ih_start;
350		    ih_p && (j < ipil_p->ipil_ih_size) &&
351		    ((ih_p->ih_msg_code != msg_code) ||
352		    (ih_p->ih_rec_type != rec_type));
353		    ih_p = ih_p->ih_next, j++)
354			;
355
356		if ((ih_p->ih_msg_code == msg_code) &&
357		    (ih_p->ih_rec_type == rec_type)) {
358			dev_info_t *ih_dip = ih_p->ih_dip;
359			uint_t (*handler)() = ih_p->ih_handler;
360			caddr_t arg1 = ih_p->ih_handler_arg1;
361			caddr_t arg2 = ih_p->ih_handler_arg2;
362
363			DBG(DBG_MSIQ_INTR, ih_dip, "px_msiq_intr: ino=%x "
364			    "data=%x handler=%p arg1 =%p arg2=%p\n",
365			    ino_p->ino_ino, msg_code, handler, arg1, arg2);
366
367			DTRACE_PROBE4(interrupt__start, dev_info_t, ih_dip,
368			    void *, handler, caddr_t, arg1, caddr_t, arg2);
369
370			ih_p->ih_intr_flags = PX_INTR_PENDING;
371
372			/*
373			 * Special case for PCIE Error Messages.
374			 * The current frame work doesn't fit PCIE Err Msgs
375			 * This should be fixed when PCIE MESSAGES as a whole
376			 * is architected correctly.
377			 */
378			if ((rec_type == MSG_REC) &&
379			    ((msg_code == PCIE_MSG_CODE_ERR_COR) ||
380			    (msg_code == PCIE_MSG_CODE_ERR_NONFATAL) ||
381			    (msg_code == PCIE_MSG_CODE_ERR_FATAL))) {
382				ret = px_err_fabric_intr(px_p, msg_code,
383				    msiq_rec_p->msiq_rec_rid);
384			} else {
385				/* Clear MSI state */
386				px_lib_msi_setstate(dip, (msinum_t)msg_code,
387				    PCI_MSI_STATE_IDLE);
388
389				ret = (*handler)(arg1, arg2);
390			}
391
392			/*
393			 * Account for time used by this interrupt. Protect
394			 * against conflicting writes to ih_ticks from
395			 * ib_intr_dist_all() by using atomic ops.
396			 */
397
398			if (pil <= LOCK_LEVEL)
399				atomic_add_64(&ih_p->ih_ticks, intr_get_time());
400
401			DTRACE_PROBE4(interrupt__complete, dev_info_t, ih_dip,
402			    void *, handler, caddr_t, arg1, int, ret);
403
404			/* clear handler status flags */
405			ih_p->ih_intr_flags = PX_INTR_IDLE;
406
407			msiq_p->msiq_new_head_index++;
408			px_lib_clr_msiq_rec(ih_dip, curr_head_p);
409		} else {
410			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: "
411			    "No matching MSIQ record found\n");
412		}
413next_rec:
414		/* Get the pointer next EQ record */
415		curr_head_p = (msiqhead_t *)
416		    ((caddr_t)curr_head_p + sizeof (msiq_rec_t));
417
418		/* Check for overflow condition */
419		if (curr_head_p >= (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p
420		    + (msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t))))
421			curr_head_p = (msiqhead_t *)msiq_p->msiq_base_p;
422	}
423
424	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: No of MSIQ recs processed %x\n",
425	    (msiq_p->msiq_new_head_index - msiq_p->msiq_curr_head_index));
426
427	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: curr_head %x new_head %x "
428	    "rec2process %x\n", msiq_p->msiq_curr_head_index,
429	    msiq_p->msiq_new_head_index, msiq_p->msiq_recs2process);
430
431	/* ino_claimed used just for debugging purpose */
432	if (ret)
433		ino_p->ino_claimed |= (1 << pil);
434
435intr_done:
436	/* Interrupt can only be cleared after all pil levels are handled */
437	if (--ino_p->ino_ipil_cntr != 0)
438		return (DDI_INTR_CLAIMED);
439
440	if (msiq_p->msiq_new_head_index <= msiq_p->msiq_curr_head_index)  {
441		if (px_unclaimed_intr_block)
442			return (px_spurintr(ipil_p));
443	}
444
445	/*  Update MSIQ head index with no of MSIQ records processed */
446	if (msiq_p->msiq_new_head_index >= msiq_state_p->msiq_rec_cnt)
447		msiq_p->msiq_new_head_index -= msiq_state_p->msiq_rec_cnt;
448
449	msiq_p->msiq_curr_head_index = msiq_p->msiq_new_head_index;
450	px_lib_msiq_sethead(dip, msiq_p->msiq_id, msiq_p->msiq_new_head_index);
451
452	msiq_p->msiq_new_head_index = 0;
453	msiq_p->msiq_recs2process = 0;
454	ino_p->ino_claimed = 0;
455
456	/* Clear the pending state */
457	if (px_lib_intr_setstate(dip, ino_p->ino_sysino,
458	    INTR_IDLE_STATE) != DDI_SUCCESS)
459		return (DDI_INTR_UNCLAIMED);
460
461	return (DDI_INTR_CLAIMED);
462}
463
464dev_info_t *
465px_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
466{
467	dev_info_t	*cdip = rdip;
468
469	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
470		;
471
472	return (cdip);
473}
474
475/* ARGSUSED */
476int
477px_intx_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
478    ddi_intr_handle_impl_t *hdlp, void *result)
479{
480	px_t	*px_p = DIP_TO_STATE(dip);
481	int	ret = DDI_SUCCESS;
482
483	DBG(DBG_INTROPS, dip, "px_intx_ops: dip=%x rdip=%x intr_op=%x "
484	    "handle=%p\n", dip, rdip, intr_op, hdlp);
485
486	switch (intr_op) {
487	case DDI_INTROP_GETCAP:
488		ret = pci_intx_get_cap(rdip, (int *)result);
489		break;
490	case DDI_INTROP_SETCAP:
491		DBG(DBG_INTROPS, dip, "px_intx_ops: SetCap is not supported\n");
492		ret = DDI_ENOTSUP;
493		break;
494	case DDI_INTROP_ALLOC:
495		*(int *)result = hdlp->ih_scratch1;
496		break;
497	case DDI_INTROP_FREE:
498		break;
499	case DDI_INTROP_GETPRI:
500		*(int *)result = hdlp->ih_pri ?
501		    hdlp->ih_pri : pci_class_to_pil(rdip);
502		break;
503	case DDI_INTROP_SETPRI:
504		break;
505	case DDI_INTROP_ADDISR:
506		ret = px_add_intx_intr(dip, rdip, hdlp);
507		break;
508	case DDI_INTROP_REMISR:
509		ret = px_rem_intx_intr(dip, rdip, hdlp);
510		break;
511	case DDI_INTROP_GETTARGET:
512		ret = px_ib_get_intr_target(px_p, hdlp->ih_vector,
513		    (cpuid_t *)result);
514		break;
515	case DDI_INTROP_SETTARGET:
516		ret = DDI_ENOTSUP;
517		break;
518	case DDI_INTROP_ENABLE:
519		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
520		    hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_ENABLE, 0, 0);
521		break;
522	case DDI_INTROP_DISABLE:
523		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
524		    hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_DISABLE, 0, 0);
525		break;
526	case DDI_INTROP_SETMASK:
527		ret = pci_intx_set_mask(rdip);
528		break;
529	case DDI_INTROP_CLRMASK:
530		ret = pci_intx_clr_mask(rdip);
531		break;
532	case DDI_INTROP_GETPENDING:
533		ret = pci_intx_get_pending(rdip, (int *)result);
534		break;
535	case DDI_INTROP_NINTRS:
536	case DDI_INTROP_NAVAIL:
537		*(int *)result = i_ddi_get_intx_nintrs(rdip);
538		break;
539	default:
540		ret = DDI_ENOTSUP;
541		break;
542	}
543
544	return (ret);
545}
546
547/* ARGSUSED */
548int
549px_msix_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
550    ddi_intr_handle_impl_t *hdlp, void *result)
551{
552	px_t			*px_p = DIP_TO_STATE(dip);
553	px_msi_state_t		*msi_state_p = &px_p->px_ib_p->ib_msi_state;
554	msiq_rec_type_t		msiq_rec_type;
555	msi_type_t		msi_type;
556	uint64_t		msi_addr;
557	msinum_t		msi_num;
558	msiqid_t		msiq_id;
559	uint_t			nintrs;
560	int			ret = DDI_SUCCESS;
561
562	DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x "
563	    "handle=%p\n", dip, rdip, intr_op, hdlp);
564
565	/* Check for MSI64 support */
566	if ((hdlp->ih_cap & DDI_INTR_FLAG_MSI64) && msi_state_p->msi_addr64) {
567		msiq_rec_type = MSI64_REC;
568		msi_type = MSI64_TYPE;
569		msi_addr = msi_state_p->msi_addr64;
570	} else {
571		msiq_rec_type = MSI32_REC;
572		msi_type = MSI32_TYPE;
573		msi_addr = msi_state_p->msi_addr32;
574	}
575
576	(void) px_msi_get_msinum(px_p, hdlp->ih_dip,
577	    (hdlp->ih_flags & DDI_INTR_MSIX_DUP) ? hdlp->ih_main->ih_inum :
578	    hdlp->ih_inum, &msi_num);
579
580	switch (intr_op) {
581	case DDI_INTROP_GETCAP:
582		ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result);
583		break;
584	case DDI_INTROP_SETCAP:
585		DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n");
586		ret = DDI_ENOTSUP;
587		break;
588	case DDI_INTROP_ALLOC:
589		/*
590		 * We need to restrict this allocation in future
591		 * based on Resource Management policies.
592		 */
593		if ((ret = px_msi_alloc(px_p, rdip, hdlp->ih_type,
594		    hdlp->ih_inum, hdlp->ih_scratch1,
595		    (uintptr_t)hdlp->ih_scratch2,
596		    (int *)result)) != DDI_SUCCESS) {
597			DBG(DBG_INTROPS, dip, "px_msix_ops: allocation "
598			    "failed, rdip 0x%p type 0x%d inum 0x%x "
599			    "count 0x%x\n", rdip, hdlp->ih_type, hdlp->ih_inum,
600			    hdlp->ih_scratch1);
601
602			return (ret);
603		}
604
605		if ((hdlp->ih_type == DDI_INTR_TYPE_MSIX) &&
606		    (i_ddi_get_msix(rdip) == NULL)) {
607			ddi_intr_msix_t		*msix_p;
608
609			if (msix_p = pci_msix_init(rdip)) {
610				i_ddi_set_msix(rdip, msix_p);
611				break;
612			}
613
614			DBG(DBG_INTROPS, dip, "px_msix_ops: MSI-X allocation "
615			    "failed, rdip 0x%p inum 0x%x\n", rdip,
616			    hdlp->ih_inum);
617
618			(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
619			    hdlp->ih_scratch1);
620
621			return (DDI_FAILURE);
622		}
623
624		break;
625	case DDI_INTROP_FREE:
626		(void) pci_msi_unconfigure(rdip, hdlp->ih_type, hdlp->ih_inum);
627
628		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
629			goto msi_free;
630
631		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
632			break;
633
634		if (((i_ddi_intr_get_current_nintrs(hdlp->ih_dip) - 1) == 0) &&
635		    (i_ddi_get_msix(rdip))) {
636			pci_msix_fini(i_ddi_get_msix(rdip));
637			i_ddi_set_msix(rdip, NULL);
638		}
639msi_free:
640		(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
641		    hdlp->ih_scratch1);
642		break;
643	case DDI_INTROP_GETPRI:
644		*(int *)result = hdlp->ih_pri ?
645		    hdlp->ih_pri : pci_class_to_pil(rdip);
646		break;
647	case DDI_INTROP_SETPRI:
648		break;
649	case DDI_INTROP_ADDISR:
650		if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
651		    msiq_rec_type, msi_num, -1, &msiq_id)) != DDI_SUCCESS) {
652			DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler "
653			    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
654			return (ret);
655		}
656
657		DBG(DBG_INTROPS, dip, "px_msix_ops: msiq used 0x%x\n", msiq_id);
658
659		if ((ret = px_lib_msi_setmsiq(dip, msi_num,
660		    msiq_id, msi_type)) != DDI_SUCCESS) {
661			(void) px_rem_msiq_intr(dip, rdip,
662			    hdlp, msiq_rec_type, msi_num, msiq_id);
663			return (ret);
664		}
665
666		if ((ret = px_lib_msi_setstate(dip, msi_num,
667		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) {
668			(void) px_rem_msiq_intr(dip, rdip,
669			    hdlp, msiq_rec_type, msi_num, msiq_id);
670			return (ret);
671		}
672
673		if ((ret = px_lib_msi_setvalid(dip, msi_num,
674		    PCI_MSI_VALID)) != DDI_SUCCESS)
675			return (ret);
676
677		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
678		    px_msiqid_to_devino(px_p, msiq_id), hdlp->ih_pri,
679		    PX_INTR_STATE_ENABLE, msiq_rec_type, msi_num);
680
681		break;
682	case DDI_INTROP_DUPVEC:
683		DBG(DBG_INTROPS, dip, "px_msix_ops: dupisr - inum: %x, "
684		    "new_vector: %x\n", hdlp->ih_inum, hdlp->ih_scratch1);
685
686		ret = pci_msix_dup(hdlp->ih_dip, hdlp->ih_inum,
687		    hdlp->ih_scratch1);
688		break;
689	case DDI_INTROP_REMISR:
690		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
691		    &msiq_id)) != DDI_SUCCESS)
692			return (ret);
693
694		if ((ret = px_ib_update_intr_state(px_p, rdip,
695		    hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id),
696		    hdlp->ih_pri, PX_INTR_STATE_DISABLE, msiq_rec_type,
697		    msi_num)) != DDI_SUCCESS)
698			return (ret);
699
700		if ((ret = px_lib_msi_setvalid(dip, msi_num,
701		    PCI_MSI_INVALID)) != DDI_SUCCESS)
702			return (ret);
703
704		if ((ret = px_lib_msi_setstate(dip, msi_num,
705		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS)
706			return (ret);
707
708		ret = px_rem_msiq_intr(dip, rdip,
709		    hdlp, msiq_rec_type, msi_num, msiq_id);
710
711		break;
712	case DDI_INTROP_GETTARGET:
713		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
714		    &msiq_id)) != DDI_SUCCESS)
715			return (ret);
716
717		ret = px_ib_get_intr_target(px_p,
718		    px_msiqid_to_devino(px_p, msiq_id), (cpuid_t *)result);
719		break;
720	case DDI_INTROP_SETTARGET:
721		ret = px_ib_set_msix_target(px_p, hdlp, msi_num,
722		    *(cpuid_t *)result);
723		break;
724	case DDI_INTROP_ENABLE:
725		/*
726		 * For MSI, just clear the mask bit and return if curr_nenables
727		 * is > 1. For MSI-X, program MSI address and data for every
728		 * MSI-X vector including dup vectors irrespective of current
729		 * curr_nenables value.
730		 */
731		if ((pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) ||
732		    (hdlp->ih_type == DDI_INTR_TYPE_MSIX)) {
733			nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
734
735			if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
736			    nintrs, hdlp->ih_inum, msi_addr,
737			    hdlp->ih_type == DDI_INTR_TYPE_MSIX ?
738			    msi_num : msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
739				return (ret);
740
741			if (i_ddi_intr_get_current_nenables(rdip) < 1) {
742				if ((ret = pci_msi_enable_mode(rdip,
743				    hdlp->ih_type)) != DDI_SUCCESS)
744					return (ret);
745			}
746		}
747
748		if ((ret = pci_msi_clr_mask(rdip, hdlp->ih_type,
749		    hdlp->ih_inum)) != DDI_SUCCESS)
750			return (ret);
751
752		break;
753	case DDI_INTROP_DISABLE:
754		if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type,
755		    hdlp->ih_inum)) != DDI_SUCCESS)
756			return (ret);
757
758		/*
759		 * curr_nenables will be greater than 1 if rdip is using
760		 * MSI-X and also, if it is using DUP interface. If this
761		 * curr_enables is > 1, return after setting the mask bit.
762		 */
763		if (i_ddi_intr_get_current_nenables(rdip) > 1)
764			return (DDI_SUCCESS);
765
766		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type))
767		    != DDI_SUCCESS)
768			return (ret);
769
770		break;
771	case DDI_INTROP_BLOCKENABLE:
772		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
773
774		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
775		    nintrs, hdlp->ih_inum, msi_addr,
776		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
777			return (ret);
778
779		ret = pci_msi_enable_mode(rdip, hdlp->ih_type);
780		break;
781	case DDI_INTROP_BLOCKDISABLE:
782		ret = pci_msi_disable_mode(rdip, hdlp->ih_type);
783		break;
784	case DDI_INTROP_SETMASK:
785		ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
786		break;
787	case DDI_INTROP_CLRMASK:
788		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
789		break;
790	case DDI_INTROP_GETPENDING:
791		ret = pci_msi_get_pending(rdip, hdlp->ih_type,
792		    hdlp->ih_inum, (int *)result);
793		break;
794	case DDI_INTROP_NINTRS:
795		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
796		break;
797	case DDI_INTROP_NAVAIL:
798		/* XXX - a new interface may be needed */
799		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
800		break;
801	case DDI_INTROP_GETPOOL:
802		if (msi_state_p->msi_pool_p == NULL) {
803			*(ddi_irm_pool_t **)result = NULL;
804			return (DDI_ENOTSUP);
805		}
806		*(ddi_irm_pool_t **)result = msi_state_p->msi_pool_p;
807		ret = DDI_SUCCESS;
808		break;
809	default:
810		ret = DDI_ENOTSUP;
811		break;
812	}
813
814	return (ret);
815}
816
817static struct {
818	kstat_named_t pxintr_ks_name;
819	kstat_named_t pxintr_ks_type;
820	kstat_named_t pxintr_ks_cpu;
821	kstat_named_t pxintr_ks_pil;
822	kstat_named_t pxintr_ks_time;
823	kstat_named_t pxintr_ks_ino;
824	kstat_named_t pxintr_ks_cookie;
825	kstat_named_t pxintr_ks_devpath;
826	kstat_named_t pxintr_ks_buspath;
827} pxintr_ks_template = {
828	{ "name",	KSTAT_DATA_CHAR },
829	{ "type",	KSTAT_DATA_CHAR },
830	{ "cpu",	KSTAT_DATA_UINT64 },
831	{ "pil",	KSTAT_DATA_UINT64 },
832	{ "time",	KSTAT_DATA_UINT64 },
833	{ "ino",	KSTAT_DATA_UINT64 },
834	{ "cookie",	KSTAT_DATA_UINT64 },
835	{ "devpath",	KSTAT_DATA_STRING },
836	{ "buspath",	KSTAT_DATA_STRING },
837};
838
839static uint32_t pxintr_ks_instance;
840static char ih_devpath[MAXPATHLEN];
841static char ih_buspath[MAXPATHLEN];
842kmutex_t pxintr_ks_template_lock;
843
844int
845px_ks_update(kstat_t *ksp, int rw)
846{
847	px_ih_t *ih_p = ksp->ks_private;
848	int maxlen = sizeof (pxintr_ks_template.pxintr_ks_name.value.c);
849	px_ino_pil_t *ipil_p = ih_p->ih_ipil_p;
850	px_ino_t *ino_p = ipil_p->ipil_ino_p;
851	px_t *px_p = ino_p->ino_ib_p->ib_px_p;
852	devino_t ino;
853	sysino_t sysino;
854
855	ino = ino_p->ino_ino;
856	if (px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino) !=
857	    DDI_SUCCESS) {
858		cmn_err(CE_WARN, "px_ks_update: px_lib_intr_devino_to_sysino "
859		    "failed");
860	}
861
862	(void) snprintf(pxintr_ks_template.pxintr_ks_name.value.c, maxlen,
863	    "%s%d", ddi_driver_name(ih_p->ih_dip),
864	    ddi_get_instance(ih_p->ih_dip));
865
866	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
867	(void) ddi_pathname(px_p->px_dip, ih_buspath);
868	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_devpath, ih_devpath);
869	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_buspath, ih_buspath);
870
871	if (ih_p->ih_intr_state == PX_INTR_STATE_ENABLE) {
872
873		switch (i_ddi_intr_get_current_type(ih_p->ih_dip)) {
874		case DDI_INTR_TYPE_MSI:
875			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
876			    "msi");
877			break;
878		case DDI_INTR_TYPE_MSIX:
879			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
880			    "msix");
881			break;
882		default:
883			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
884			    "fixed");
885			break;
886		}
887
888		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = ino_p->ino_cpuid;
889		pxintr_ks_template.pxintr_ks_pil.value.ui64 = ipil_p->ipil_pil;
890		pxintr_ks_template.pxintr_ks_time.value.ui64 = ih_p->ih_nsec +
891		    (uint64_t)tick2ns((hrtime_t)ih_p->ih_ticks,
892		    ino_p->ino_cpuid);
893		pxintr_ks_template.pxintr_ks_ino.value.ui64 = ino;
894		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = sysino;
895	} else {
896		(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
897		    "disabled");
898		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = 0;
899		pxintr_ks_template.pxintr_ks_pil.value.ui64 = 0;
900		pxintr_ks_template.pxintr_ks_time.value.ui64 = 0;
901		pxintr_ks_template.pxintr_ks_ino.value.ui64 = 0;
902		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = 0;
903	}
904	return (0);
905}
906
907void
908px_create_intr_kstats(px_ih_t *ih_p)
909{
910	msiq_rec_type_t rec_type = ih_p->ih_rec_type;
911
912	ASSERT(ih_p->ih_ksp == NULL);
913
914	/*
915	 * Create pci_intrs::: kstats for all ih types except messages,
916	 * which represent unusual conditions and don't need to be tracked.
917	 */
918	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
919		ih_p->ih_ksp = kstat_create("pci_intrs",
920		    atomic_inc_32_nv(&pxintr_ks_instance), "config",
921		    "interrupts", KSTAT_TYPE_NAMED,
922		    sizeof (pxintr_ks_template) / sizeof (kstat_named_t),
923		    KSTAT_FLAG_VIRTUAL);
924	}
925	if (ih_p->ih_ksp != NULL) {
926		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
927		ih_p->ih_ksp->ks_lock = &pxintr_ks_template_lock;
928		ih_p->ih_ksp->ks_data = &pxintr_ks_template;
929		ih_p->ih_ksp->ks_private = ih_p;
930		ih_p->ih_ksp->ks_update = px_ks_update;
931	}
932}
933
934/*
935 * px_add_intx_intr:
936 *
937 * This function is called to register INTx and legacy hardware
938 * interrupt pins interrupts.
939 */
940int
941px_add_intx_intr(dev_info_t *dip, dev_info_t *rdip,
942    ddi_intr_handle_impl_t *hdlp)
943{
944	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
945	px_ib_t		*ib_p = px_p->px_ib_p;
946	devino_t	ino;
947	px_ih_t		*ih_p;
948	px_ino_t	*ino_p;
949	px_ino_pil_t	*ipil_p, *ipil_list;
950	int32_t		weight;
951	int		ret = DDI_SUCCESS;
952	cpuid_t		curr_cpu;
953
954	ino = hdlp->ih_vector;
955
956	DBG(DBG_A_INTX, dip, "px_add_intx_intr: rdip=%s%d ino=%x "
957	    "handler=%x arg1=%x arg2=%x\n", ddi_driver_name(rdip),
958	    ddi_get_instance(rdip), ino, hdlp->ih_cb_func,
959	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
960
961	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum,
962	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, 0, 0);
963
964	mutex_enter(&ib_p->ib_ino_lst_mutex);
965
966	ino_p = px_ib_locate_ino(ib_p, ino);
967	ipil_list = ino_p ? ino_p->ino_ipil_p : NULL;
968
969	if (hdlp->ih_pri == 0)
970		hdlp->ih_pri = pci_class_to_pil(rdip);
971
972	/* Sharing the INO using a PIL that already exists */
973	if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
974		if (px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, 0, 0)) {
975			DBG(DBG_A_INTX, dip, "px_add_intx_intr: "
976			    "dup intr #%d\n", hdlp->ih_inum);
977
978			ret = DDI_FAILURE;
979			goto fail1;
980		}
981
982		/* Save mondo value in hdlp */
983		hdlp->ih_vector = ino_p->ino_sysino;
984
985		if ((ret = px_ib_ino_add_intr(px_p, ipil_p,
986		    ih_p)) != DDI_SUCCESS)
987			goto fail1;
988
989		goto ino_done;
990	}
991
992	/* Sharing the INO using a new PIL */
993	if (ipil_list != NULL) {
994		/*
995		 * disable INO to avoid lopil race condition with
996		 * px_intx_intr
997		 */
998
999		if ((ret = px_lib_intr_gettarget(dip, ino_p->ino_sysino,
1000		    &curr_cpu)) != DDI_SUCCESS) {
1001			DBG(DBG_IB, dip,
1002			    "px_add_intx_intr px_intr_gettarget() failed\n");
1003
1004			goto fail1;
1005		}
1006
1007		/* Wait on pending interrupt */
1008		if ((ret = px_ib_intr_pend(dip, ino_p->ino_sysino)) !=
1009		    DDI_SUCCESS) {
1010			cmn_err(CE_WARN, "%s%d: px_add_intx_intr: "
1011			    "pending sysino 0x%lx(ino 0x%x) timeout",
1012			    ddi_driver_name(dip), ddi_get_instance(dip),
1013			    ino_p->ino_sysino, ino_p->ino_ino);
1014			goto fail1;
1015		}
1016	}
1017
1018	ipil_p = px_ib_new_ino_pil(ib_p, ino, hdlp->ih_pri, ih_p);
1019	ino_p = ipil_p->ipil_ino_p;
1020
1021	/* Save mondo value in hdlp */
1022	hdlp->ih_vector = ino_p->ino_sysino;
1023
1024	DBG(DBG_A_INTX, dip, "px_add_intx_intr: pil=0x%x mondo=0x%x\n",
1025	    hdlp->ih_pri, hdlp->ih_vector);
1026
1027	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1028	    (ddi_intr_handler_t *)px_intx_intr, (caddr_t)ipil_p, NULL);
1029
1030	ret = i_ddi_add_ivintr(hdlp);
1031
1032	/*
1033	 * Restore original interrupt handler
1034	 * and arguments in interrupt handle.
1035	 */
1036	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1037	    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1038
1039	if (ret != DDI_SUCCESS)
1040		goto fail2;
1041
1042	/* Save the pil for this ino */
1043	ipil_p->ipil_pil = hdlp->ih_pri;
1044
1045	/* Select cpu, saving it for sharing and removal */
1046	if (ipil_list == NULL) {
1047		if (ino_p->ino_cpuid == -1)
1048			ino_p->ino_cpuid = intr_dist_cpuid();
1049
1050		/* Enable interrupt */
1051		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1052	} else {
1053		/* Re-enable interrupt */
1054		PX_INTR_ENABLE(dip, ino_p->ino_sysino, curr_cpu);
1055	}
1056
1057ino_done:
1058	hdlp->ih_target = ino_p->ino_cpuid;
1059
1060	/* Add weight to the cpu that we are already targeting */
1061	weight = pci_class_to_intr_weight(rdip);
1062	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1063
1064	ih_p->ih_ipil_p = ipil_p;
1065	px_create_intr_kstats(ih_p);
1066	if (ih_p->ih_ksp)
1067		kstat_install(ih_p->ih_ksp);
1068	mutex_exit(&ib_p->ib_ino_lst_mutex);
1069
1070	DBG(DBG_A_INTX, dip, "px_add_intx_intr: done! Interrupt 0x%x pil=%x\n",
1071	    ino_p->ino_sysino, hdlp->ih_pri);
1072
1073	return (ret);
1074fail2:
1075	px_ib_delete_ino_pil(ib_p, ipil_p);
1076fail1:
1077	if (ih_p->ih_config_handle)
1078		pci_config_teardown(&ih_p->ih_config_handle);
1079
1080	mutex_exit(&ib_p->ib_ino_lst_mutex);
1081	kmem_free(ih_p, sizeof (px_ih_t));
1082
1083	DBG(DBG_A_INTX, dip, "px_add_intx_intr: Failed! Interrupt 0x%x "
1084	    "pil=%x\n", ino_p->ino_sysino, hdlp->ih_pri);
1085
1086	return (ret);
1087}
1088
1089/*
1090 * px_rem_intx_intr:
1091 *
1092 * This function is called to unregister INTx and legacy hardware
1093 * interrupt pins interrupts.
1094 */
1095int
1096px_rem_intx_intr(dev_info_t *dip, dev_info_t *rdip,
1097    ddi_intr_handle_impl_t *hdlp)
1098{
1099	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1100	px_ib_t		*ib_p = px_p->px_ib_p;
1101	devino_t	ino;
1102	cpuid_t		curr_cpu;
1103	px_ino_t	*ino_p;
1104	px_ino_pil_t	*ipil_p;
1105	px_ih_t		*ih_p;
1106	int		ret = DDI_SUCCESS;
1107
1108	ino = hdlp->ih_vector;
1109
1110	DBG(DBG_R_INTX, dip, "px_rem_intx_intr: rdip=%s%d ino=%x\n",
1111	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
1112
1113	mutex_enter(&ib_p->ib_ino_lst_mutex);
1114
1115	ino_p = px_ib_locate_ino(ib_p, ino);
1116	ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri);
1117	ih_p = px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, 0, 0);
1118
1119	/* Get the current cpu */
1120	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1121	    &curr_cpu)) != DDI_SUCCESS)
1122		goto fail;
1123
1124	if ((ret = px_ib_ino_rem_intr(px_p, ipil_p, ih_p)) != DDI_SUCCESS)
1125		goto fail;
1126
1127	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1128
1129	if (ipil_p->ipil_ih_size == 0) {
1130		hdlp->ih_vector = ino_p->ino_sysino;
1131		i_ddi_rem_ivintr(hdlp);
1132
1133		px_ib_delete_ino_pil(ib_p, ipil_p);
1134	}
1135
1136	if (ino_p->ino_ipil_size == 0) {
1137		kmem_free(ino_p, sizeof (px_ino_t));
1138	} else {
1139		/* Re-enable interrupt only if mapping register still shared */
1140		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1141	}
1142
1143fail:
1144	mutex_exit(&ib_p->ib_ino_lst_mutex);
1145	return (ret);
1146}
1147
1148/*
1149 * px_add_msiq_intr:
1150 *
1151 * This function is called to register MSI/Xs and PCIe message interrupts.
1152 */
1153int
1154px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1155    ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1156    msgcode_t msg_code, cpuid_t cpu_id, msiqid_t *msiq_id_p)
1157{
1158	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1159	px_ib_t		*ib_p = px_p->px_ib_p;
1160	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
1161	devino_t	ino;
1162	px_ih_t		*ih_p;
1163	px_ino_t	*ino_p;
1164	px_ino_pil_t	*ipil_p, *ipil_list;
1165	int32_t		weight;
1166	int		ret = DDI_SUCCESS;
1167
1168	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=0x%x "
1169	    "arg1=0x%x arg2=0x%x cpu=0x%x\n", ddi_driver_name(rdip),
1170	    ddi_get_instance(rdip), hdlp->ih_cb_func, hdlp->ih_cb_arg1,
1171	    hdlp->ih_cb_arg2, cpu_id);
1172
1173	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func,
1174	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code);
1175
1176	mutex_enter(&ib_p->ib_ino_lst_mutex);
1177
1178	ret = (cpu_id == -1) ? px_msiq_alloc(px_p, rec_type, msg_code,
1179	    msiq_id_p) : px_msiq_alloc_based_on_cpuid(px_p, rec_type,
1180	    cpu_id, msiq_id_p);
1181
1182	if (ret != DDI_SUCCESS) {
1183		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1184		    "msiq allocation failed\n");
1185		goto fail;
1186	}
1187
1188	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
1189
1190	ino_p = px_ib_locate_ino(ib_p, ino);
1191	ipil_list = ino_p ? ino_p->ino_ipil_p : NULL;
1192
1193	if (hdlp->ih_pri == 0)
1194		hdlp->ih_pri = pci_class_to_pil(rdip);
1195
1196	/* Sharing ino */
1197	if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
1198		if (px_ib_intr_locate_ih(ipil_p, rdip,
1199		    hdlp->ih_inum, rec_type, msg_code)) {
1200			DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1201			    "dup intr #%d\n", hdlp->ih_inum);
1202
1203			ret = DDI_FAILURE;
1204			goto fail1;
1205		}
1206
1207		/* Save mondo value in hdlp */
1208		hdlp->ih_vector = ino_p->ino_sysino;
1209
1210		if ((ret = px_ib_ino_add_intr(px_p, ipil_p,
1211		    ih_p)) != DDI_SUCCESS)
1212			goto fail1;
1213
1214		goto ino_done;
1215	}
1216
1217	ipil_p = px_ib_new_ino_pil(ib_p, ino, hdlp->ih_pri, ih_p);
1218	ino_p = ipil_p->ipil_ino_p;
1219
1220	ino_p->ino_msiq_p = msiq_state_p->msiq_p +
1221	    (*msiq_id_p - msiq_state_p->msiq_1st_msiq_id);
1222
1223	/* Save mondo value in hdlp */
1224	hdlp->ih_vector = ino_p->ino_sysino;
1225
1226	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: pil=0x%x mondo=0x%x\n",
1227	    hdlp->ih_pri, hdlp->ih_vector);
1228
1229	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1230	    (ddi_intr_handler_t *)px_msiq_intr, (caddr_t)ipil_p, NULL);
1231
1232	ret = i_ddi_add_ivintr(hdlp);
1233
1234	/*
1235	 * Restore original interrupt handler
1236	 * and arguments in interrupt handle.
1237	 */
1238	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1239	    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1240
1241	if (ret != DDI_SUCCESS)
1242		goto fail2;
1243
1244	/* Save the pil for this ino */
1245	ipil_p->ipil_pil = hdlp->ih_pri;
1246
1247	/* Select cpu, saving it for sharing and removal */
1248	if (ipil_list == NULL) {
1249		/* Enable MSIQ */
1250		px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE);
1251		px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID);
1252
1253		if (ino_p->ino_cpuid == -1)
1254			ino_p->ino_cpuid = intr_dist_cpuid();
1255
1256		/* Enable interrupt */
1257		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1258	}
1259
1260ino_done:
1261	hdlp->ih_target = ino_p->ino_cpuid;
1262
1263	/* Add weight to the cpu that we are already targeting */
1264	weight = pci_class_to_intr_weight(rdip);
1265	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1266
1267	ih_p->ih_ipil_p = ipil_p;
1268	px_create_intr_kstats(ih_p);
1269	if (ih_p->ih_ksp)
1270		kstat_install(ih_p->ih_ksp);
1271	mutex_exit(&ib_p->ib_ino_lst_mutex);
1272
1273	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: done! Interrupt 0x%x pil=%x\n",
1274	    ino_p->ino_sysino, hdlp->ih_pri);
1275
1276	return (ret);
1277fail2:
1278	px_ib_delete_ino_pil(ib_p, ipil_p);
1279fail1:
1280	(void) px_msiq_free(px_p, *msiq_id_p);
1281fail:
1282	if (ih_p->ih_config_handle)
1283		pci_config_teardown(&ih_p->ih_config_handle);
1284
1285	mutex_exit(&ib_p->ib_ino_lst_mutex);
1286	kmem_free(ih_p, sizeof (px_ih_t));
1287
1288	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: Failed! Interrupt 0x%x pil=%x\n",
1289	    ino_p->ino_sysino, hdlp->ih_pri);
1290
1291	return (ret);
1292}
1293
1294/*
1295 * px_rem_msiq_intr:
1296 *
1297 * This function is called to unregister MSI/Xs and PCIe message interrupts.
1298 */
1299int
1300px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1301    ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1302    msgcode_t msg_code, msiqid_t msiq_id)
1303{
1304	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1305	px_ib_t		*ib_p = px_p->px_ib_p;
1306	devino_t	ino = px_msiqid_to_devino(px_p, msiq_id);
1307	cpuid_t		curr_cpu;
1308	px_ino_t	*ino_p;
1309	px_ino_pil_t	*ipil_p;
1310	px_ih_t		*ih_p;
1311	int		ret = DDI_SUCCESS;
1312
1313	DBG(DBG_MSIQ, dip, "px_rem_msiq_intr: rdip=%s%d msiq_id=%x ino=%x\n",
1314	    ddi_driver_name(rdip), ddi_get_instance(rdip), msiq_id, ino);
1315
1316	mutex_enter(&ib_p->ib_ino_lst_mutex);
1317
1318	ino_p = px_ib_locate_ino(ib_p, ino);
1319	ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri);
1320	ih_p = px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, rec_type,
1321	    msg_code);
1322
1323	/* Get the current cpu */
1324	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1325	    &curr_cpu)) != DDI_SUCCESS)
1326		goto fail;
1327
1328	if ((ret = px_ib_ino_rem_intr(px_p, ipil_p, ih_p)) != DDI_SUCCESS)
1329		goto fail;
1330
1331	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1332
1333	if (ipil_p->ipil_ih_size == 0) {
1334		hdlp->ih_vector = ino_p->ino_sysino;
1335		i_ddi_rem_ivintr(hdlp);
1336
1337		px_ib_delete_ino_pil(ib_p, ipil_p);
1338
1339		if (ino_p->ino_ipil_size == 0)
1340			px_lib_msiq_setvalid(dip,
1341			    px_devino_to_msiqid(px_p, ino), PCI_MSIQ_INVALID);
1342	}
1343
1344	(void) px_msiq_free(px_p, msiq_id);
1345
1346	if (ino_p->ino_ipil_size) {
1347		/* Re-enable interrupt only if mapping register still shared */
1348		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1349	}
1350
1351fail:
1352	mutex_exit(&ib_p->ib_ino_lst_mutex);
1353	return (ret);
1354}
1355