xref: /illumos-gate/usr/src/uts/sun4/io/px/px_intr.c (revision 4bd2626c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * PX nexus interrupt handling:
28  *	PX device interrupt handler wrapper
29  *	PIL lookup routine
30  *	PX device interrupt related initchild code
31  */
32 
33 #include <sys/types.h>
34 #include <sys/kmem.h>
35 #include <sys/async.h>
36 #include <sys/spl.h>
37 #include <sys/sunddi.h>
38 #include <sys/fm/protocol.h>
39 #include <sys/fm/util.h>
40 #include <sys/machsystm.h>	/* e_ddi_nodeid_to_dip() */
41 #include <sys/ddi_impldefs.h>
42 #include <sys/sdt.h>
43 #include <sys/atomic.h>
44 #include "px_obj.h"
45 #include <sys/ontrap.h>
46 #include <sys/membar.h>
47 #include <sys/clock.h>
48 
49 /*
50  * interrupt jabber:
51  *
52  * When an interrupt line is jabbering, every time the state machine for the
53  * associated ino is idled, a new mondo will be sent and the ino will go into
54  * the pending state again. The mondo will cause a new call to
55  * px_intr_wrapper() which normally idles the ino's state machine which would
56  * precipitate another trip round the loop.
57  *
58  * The loop can be broken by preventing the ino's state machine from being
59  * idled when an interrupt line is jabbering. See the comment at the
60  * beginning of px_intr_wrapper() explaining how the 'interrupt jabber
61  * protection' code does this.
62  */
63 
64 /*LINTLIBRARY*/
65 
66 /*
67  * If the unclaimed interrupt count has reached the limit set by
68  * pci_unclaimed_intr_max within the time limit, then all interrupts
69  * on this ino is blocked by not idling the interrupt state machine.
70  */
71 static int
72 px_spurintr(px_ino_pil_t *ipil_p)
73 {
74 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
75 	px_ih_t		*ih_p;
76 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
77 	char		*err_fmt_str;
78 	boolean_t	blocked = B_FALSE;
79 	int		i;
80 
81 	if (ino_p->ino_unclaimed_intrs > px_unclaimed_intr_max)
82 		return (DDI_INTR_CLAIMED);
83 
84 	if (!ino_p->ino_unclaimed_intrs)
85 		ino_p->ino_spurintr_begin = ddi_get_lbolt();
86 
87 	ino_p->ino_unclaimed_intrs++;
88 
89 	if (ino_p->ino_unclaimed_intrs <= px_unclaimed_intr_max)
90 		goto clear;
91 
92 	if (drv_hztousec(ddi_get_lbolt() - ino_p->ino_spurintr_begin)
93 	    > px_spurintr_duration) {
94 		ino_p->ino_unclaimed_intrs = 0;
95 		goto clear;
96 	}
97 	err_fmt_str = "%s%d: ino 0x%x blocked";
98 	blocked = B_TRUE;
99 	goto warn;
100 clear:
101 	err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
102 warn:
103 	cmn_err(CE_WARN, err_fmt_str, NAMEINST(px_p->px_dip), ino_p->ino_ino);
104 	for (ipil_p = ino_p->ino_ipil_p; ipil_p;
105 	    ipil_p = ipil_p->ipil_next_p) {
106 		for (i = 0, ih_p = ipil_p->ipil_ih_start;
107 		    i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next)
108 			cmn_err(CE_CONT, "!%s-%d#%x ", NAMEINST(ih_p->ih_dip),
109 			    ih_p->ih_inum);
110 	}
111 	cmn_err(CE_CONT, "!\n");
112 
113 	/* Clear the pending state */
114 	if (blocked == B_FALSE) {
115 		if (px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
116 		    INTR_IDLE_STATE) != DDI_SUCCESS)
117 			return (DDI_INTR_UNCLAIMED);
118 	}
119 
120 	return (DDI_INTR_CLAIMED);
121 }
122 
123 extern uint64_t intr_get_time(void);
124 
125 /*
126  * px_intx_intr (INTx or legacy interrupt handler)
127  *
128  * This routine is used as wrapper around interrupt handlers installed by child
129  * device drivers.  This routine invokes the driver interrupt handlers and
130  * examines the return codes.
131  *
132  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
133  * least one handler claims the interrupt then the counter is halved and the
134  * interrupt state machine is idled. If no handler claims the interrupt then
135  * the counter is incremented by one and the state machine is idled.
136  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
137  * then the interrupt state machine is not idled thus preventing any further
138  * interrupts on that ino. The state machine will only be idled again if a
139  * handler is subsequently added or removed.
140  *
141  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
142  * DDI_INTR_UNCLAIMED otherwise.
143  */
144 uint_t
145 px_intx_intr(caddr_t arg)
146 {
147 	px_ino_pil_t	*ipil_p = (px_ino_pil_t *)arg;
148 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
149 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
150 	px_ih_t		*ih_p = ipil_p->ipil_ih_start;
151 	ushort_t	pil = ipil_p->ipil_pil;
152 	uint_t		result = 0, r = DDI_INTR_UNCLAIMED;
153 	int		i;
154 
155 	DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
156 	    "ino=%x sysino=%llx pil=%x ih_size=%x ih_lst=%x\n",
157 	    ino_p->ino_ino, ino_p->ino_sysino, ipil_p->ipil_pil,
158 	    ipil_p->ipil_ih_size, ipil_p->ipil_ih_head);
159 
160 	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next) {
161 		dev_info_t *dip = ih_p->ih_dip;
162 		uint_t (*handler)() = ih_p->ih_handler;
163 		caddr_t arg1 = ih_p->ih_handler_arg1;
164 		caddr_t arg2 = ih_p->ih_handler_arg2;
165 
166 		if (ih_p->ih_intr_state == PX_INTR_STATE_DISABLE) {
167 			DBG(DBG_INTX_INTR, px_p->px_dip,
168 			    "px_intx_intr: %s%d interrupt %d is disabled\n",
169 			    ddi_driver_name(dip), ddi_get_instance(dip),
170 			    ino_p->ino_ino);
171 
172 			continue;
173 		}
174 
175 		DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
176 		    "ino=%x handler=%p arg1 =%p arg2 = %p\n",
177 		    ino_p->ino_ino, handler, arg1, arg2);
178 
179 		DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
180 		    void *, handler, caddr_t, arg1, caddr_t, arg2);
181 
182 		r = (*handler)(arg1, arg2);
183 
184 		/*
185 		 * Account for time used by this interrupt. Protect against
186 		 * conflicting writes to ih_ticks from ib_intr_dist_all() by
187 		 * using atomic ops.
188 		 */
189 
190 		if (pil <= LOCK_LEVEL)
191 			atomic_add_64(&ih_p->ih_ticks, intr_get_time());
192 
193 		DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
194 		    void *, handler, caddr_t, arg1, int, r);
195 
196 		result += r;
197 
198 		if (px_check_all_handlers)
199 			continue;
200 		if (result)
201 			break;
202 	}
203 
204 	if (result)
205 		ino_p->ino_claimed |= (1 << pil);
206 
207 	/* Interrupt can only be cleared after all pil levels are handled */
208 	if (pil != ino_p->ino_lopil)
209 		return (DDI_INTR_CLAIMED);
210 
211 	if (!ino_p->ino_claimed) {
212 		if (px_unclaimed_intr_block)
213 			return (px_spurintr(ipil_p));
214 	}
215 
216 	ino_p->ino_unclaimed_intrs = 0;
217 	ino_p->ino_claimed = 0;
218 
219 	/* Clear the pending state */
220 	if (px_lib_intr_setstate(px_p->px_dip,
221 	    ino_p->ino_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
222 		return (DDI_INTR_UNCLAIMED);
223 
224 	return (DDI_INTR_CLAIMED);
225 }
226 
227 /*
228  * px_msiq_intr (MSI/X or PCIe MSG interrupt handler)
229  *
230  * This routine is used as wrapper around interrupt handlers installed by child
231  * device drivers.  This routine invokes the driver interrupt handlers and
232  * examines the return codes.
233  *
234  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
235  * least one handler claims the interrupt then the counter is halved and the
236  * interrupt state machine is idled. If no handler claims the interrupt then
237  * the counter is incremented by one and the state machine is idled.
238  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
239  * then the interrupt state machine is not idled thus preventing any further
240  * interrupts on that ino. The state machine will only be idled again if a
241  * handler is subsequently added or removed.
242  *
243  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
244  * DDI_INTR_UNCLAIMED otherwise.
245  */
246 uint_t
247 px_msiq_intr(caddr_t arg)
248 {
249 	px_ino_pil_t	*ipil_p = (px_ino_pil_t *)arg;
250 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
251 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
252 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
253 	px_msiq_t	*msiq_p = ino_p->ino_msiq_p;
254 	dev_info_t	*dip = px_p->px_dip;
255 	ushort_t	pil = ipil_p->ipil_pil;
256 	msiq_rec_t	msiq_rec, *msiq_rec_p = &msiq_rec;
257 	msiqhead_t	*curr_head_p;
258 	msiqtail_t	curr_tail_index;
259 	msgcode_t	msg_code;
260 	px_ih_t		*ih_p;
261 	uint_t		ret = DDI_INTR_UNCLAIMED;
262 	int		i, j;
263 
264 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: msiq_id =%x ino=%x pil=%x "
265 	    "ih_size=%x ih_lst=%x\n", msiq_p->msiq_id, ino_p->ino_ino,
266 	    ipil_p->ipil_pil, ipil_p->ipil_ih_size, ipil_p->ipil_ih_head);
267 
268 	/*
269 	 * The px_msiq_intr() handles multiple interrupt priorities and it
270 	 * will set msiq->msiq_rec2process to the number of MSIQ records to
271 	 * process while handling the highest priority interrupt. Subsequent
272 	 * lower priority interrupts will just process any unprocessed MSIQ
273 	 * records or will just return immediately.
274 	 */
275 	if (msiq_p->msiq_recs2process == 0) {
276 		ASSERT(ino_p->ino_ipil_cntr == 0);
277 		ino_p->ino_ipil_cntr = ino_p->ino_ipil_size;
278 
279 		/* Read current MSIQ tail index */
280 		px_lib_msiq_gettail(dip, msiq_p->msiq_id, &curr_tail_index);
281 		msiq_p->msiq_new_head_index = msiq_p->msiq_curr_head_index;
282 
283 		if (curr_tail_index < msiq_p->msiq_curr_head_index)
284 			curr_tail_index += msiq_state_p->msiq_rec_cnt;
285 
286 		msiq_p->msiq_recs2process = curr_tail_index -
287 		    msiq_p->msiq_curr_head_index;
288 	}
289 
290 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: curr_head %x new_head %x "
291 	    "rec2process %x\n", msiq_p->msiq_curr_head_index,
292 	    msiq_p->msiq_new_head_index, msiq_p->msiq_recs2process);
293 
294 	/* If all MSIQ records are already processed, just return immediately */
295 	if ((msiq_p->msiq_new_head_index - msiq_p->msiq_curr_head_index)
296 	    == msiq_p->msiq_recs2process)
297 		goto intr_done;
298 
299 	curr_head_p = (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p +
300 	    (msiq_p->msiq_curr_head_index * sizeof (msiq_rec_t)));
301 
302 	/*
303 	 * Calculate the number of recs to process by taking the difference
304 	 * between the head and tail pointers. For all records we always
305 	 * verify that we have a valid record type before we do any processing.
306 	 * If triggered, we should always have at least one valid record.
307 	 */
308 	for (i = 0; i < msiq_p->msiq_recs2process; i++) {
309 		msiq_rec_type_t rec_type;
310 
311 		/* Read next MSIQ record */
312 		px_lib_get_msiq_rec(dip, curr_head_p, msiq_rec_p);
313 
314 		rec_type = msiq_rec_p->msiq_rec_type;
315 
316 		DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSIQ RECORD, "
317 		    "msiq_rec_type 0x%llx msiq_rec_rid 0x%llx\n",
318 		    rec_type, msiq_rec_p->msiq_rec_rid);
319 
320 		if (!rec_type)
321 			goto next_rec;
322 
323 		/* Check MSIQ record type */
324 		switch (rec_type) {
325 		case MSG_REC:
326 			msg_code = msiq_rec_p->msiq_rec_data.msg.msg_code;
327 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: PCIE MSG "
328 			    "record, msg type 0x%x\n", msg_code);
329 			break;
330 		case MSI32_REC:
331 		case MSI64_REC:
332 			msg_code = msiq_rec_p->msiq_rec_data.msi.msi_data;
333 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSI record, "
334 			    "msi 0x%x\n", msg_code);
335 
336 			/* Clear MSI state */
337 			px_lib_msi_setstate(dip, (msinum_t)msg_code,
338 			    PCI_MSI_STATE_IDLE);
339 			break;
340 		default:
341 			msg_code = 0;
342 			cmn_err(CE_WARN, "%s%d: px_msiq_intr: 0x%x MSIQ "
343 			    "record type is not supported",
344 			    ddi_driver_name(dip), ddi_get_instance(dip),
345 			    rec_type);
346 
347 			goto next_rec;
348 		}
349 
350 		/*
351 		 * Scan through px_ih_t linked list, searching for the
352 		 * right px_ih_t, matching MSIQ record data.
353 		 */
354 		for (j = 0, ih_p = ipil_p->ipil_ih_start;
355 		    ih_p && (j < ipil_p->ipil_ih_size) &&
356 		    ((ih_p->ih_msg_code != msg_code) ||
357 		    (ih_p->ih_rec_type != rec_type));
358 		    ih_p = ih_p->ih_next, j++)
359 			;
360 
361 		if ((ih_p->ih_msg_code == msg_code) &&
362 		    (ih_p->ih_rec_type == rec_type)) {
363 			dev_info_t *dip = ih_p->ih_dip;
364 			uint_t (*handler)() = ih_p->ih_handler;
365 			caddr_t arg1 = ih_p->ih_handler_arg1;
366 			caddr_t arg2 = ih_p->ih_handler_arg2;
367 
368 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: ino=%x data=%x "
369 			    "handler=%p arg1 =%p arg2=%p\n", ino_p->ino_ino,
370 			    msg_code, handler, arg1, arg2);
371 
372 			DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
373 			    void *, handler, caddr_t, arg1, caddr_t, arg2);
374 
375 			ih_p->ih_retarget_flag = B_FALSE;
376 
377 			/*
378 			 * Special case for PCIE Error Messages.
379 			 * The current frame work doesn't fit PCIE Err Msgs
380 			 * This should be fixed when PCIE MESSAGES as a whole
381 			 * is architected correctly.
382 			 */
383 			if ((rec_type == MSG_REC) &&
384 			    ((msg_code == PCIE_MSG_CODE_ERR_COR) ||
385 			    (msg_code == PCIE_MSG_CODE_ERR_NONFATAL) ||
386 			    (msg_code == PCIE_MSG_CODE_ERR_FATAL))) {
387 				ret = px_err_fabric_intr(px_p, msg_code,
388 				    msiq_rec_p->msiq_rec_rid);
389 			} else
390 				ret = (*handler)(arg1, arg2);
391 
392 			/*
393 			 * Account for time used by this interrupt. Protect
394 			 * against conflicting writes to ih_ticks from
395 			 * ib_intr_dist_all() by using atomic ops.
396 			 */
397 
398 			if (pil <= LOCK_LEVEL)
399 				atomic_add_64(&ih_p->ih_ticks, intr_get_time());
400 
401 			DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
402 			    void *, handler, caddr_t, arg1, int, ret);
403 
404 			msiq_p->msiq_new_head_index++;
405 			px_lib_clr_msiq_rec(dip, curr_head_p);
406 		} else {
407 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr:"
408 			    "No matching MSIQ record found\n");
409 		}
410 next_rec:
411 		/* Get the pointer next EQ record */
412 		curr_head_p = (msiqhead_t *)
413 		    ((caddr_t)curr_head_p + sizeof (msiq_rec_t));
414 
415 		/* Check for overflow condition */
416 		if (curr_head_p >= (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p
417 		    + (msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t))))
418 			curr_head_p = (msiqhead_t *)msiq_p->msiq_base_p;
419 	}
420 
421 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: No of MSIQ recs processed %x\n",
422 	    (msiq_p->msiq_new_head_index - msiq_p->msiq_curr_head_index));
423 
424 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: curr_head %x new_head %x "
425 	    "rec2process %x\n", msiq_p->msiq_curr_head_index,
426 	    msiq_p->msiq_new_head_index, msiq_p->msiq_recs2process);
427 
428 	/* ino_claimed used just for debugging purpose */
429 	if (ret)
430 		ino_p->ino_claimed |= (1 << pil);
431 
432 intr_done:
433 	/* Interrupt can only be cleared after all pil levels are handled */
434 	if (--ino_p->ino_ipil_cntr != 0)
435 		return (DDI_INTR_CLAIMED);
436 
437 	if (msiq_p->msiq_new_head_index <= msiq_p->msiq_curr_head_index)  {
438 		if (px_unclaimed_intr_block)
439 			return (px_spurintr(ipil_p));
440 	}
441 
442 	/*  Update MSIQ head index with no of MSIQ records processed */
443 	if (msiq_p->msiq_new_head_index >= msiq_state_p->msiq_rec_cnt)
444 		msiq_p->msiq_new_head_index -= msiq_state_p->msiq_rec_cnt;
445 
446 	msiq_p->msiq_curr_head_index = msiq_p->msiq_new_head_index;
447 	px_lib_msiq_sethead(dip, msiq_p->msiq_id, msiq_p->msiq_new_head_index);
448 
449 	msiq_p->msiq_new_head_index = 0;
450 	msiq_p->msiq_recs2process = 0;
451 	ino_p->ino_claimed = 0;
452 
453 	/* Clear the pending state */
454 	if (px_lib_intr_setstate(dip, ino_p->ino_sysino,
455 	    INTR_IDLE_STATE) != DDI_SUCCESS)
456 		return (DDI_INTR_UNCLAIMED);
457 
458 	return (DDI_INTR_CLAIMED);
459 }
460 
461 dev_info_t *
462 px_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
463 {
464 	dev_info_t	*cdip = rdip;
465 
466 	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
467 		;
468 
469 	return (cdip);
470 }
471 
472 /* ARGSUSED */
473 int
474 px_intx_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
475     ddi_intr_handle_impl_t *hdlp, void *result)
476 {
477 	px_t	*px_p = DIP_TO_STATE(dip);
478 	int	ret = DDI_SUCCESS;
479 
480 	DBG(DBG_INTROPS, dip, "px_intx_ops: dip=%x rdip=%x intr_op=%x "
481 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
482 
483 	switch (intr_op) {
484 	case DDI_INTROP_GETCAP:
485 		ret = pci_intx_get_cap(rdip, (int *)result);
486 		break;
487 	case DDI_INTROP_SETCAP:
488 		DBG(DBG_INTROPS, dip, "px_intx_ops: SetCap is not supported\n");
489 		ret = DDI_ENOTSUP;
490 		break;
491 	case DDI_INTROP_ALLOC:
492 		*(int *)result = hdlp->ih_scratch1;
493 		break;
494 	case DDI_INTROP_FREE:
495 		break;
496 	case DDI_INTROP_GETPRI:
497 		*(int *)result = hdlp->ih_pri ?
498 		    hdlp->ih_pri : pci_class_to_pil(rdip);
499 		break;
500 	case DDI_INTROP_SETPRI:
501 		break;
502 	case DDI_INTROP_ADDISR:
503 		ret = px_add_intx_intr(dip, rdip, hdlp);
504 		break;
505 	case DDI_INTROP_REMISR:
506 		ret = px_rem_intx_intr(dip, rdip, hdlp);
507 		break;
508 	case DDI_INTROP_GETTARGET:
509 		ret = px_ib_get_intr_target(px_p, hdlp->ih_vector,
510 		    (cpuid_t *)result);
511 		break;
512 	case DDI_INTROP_SETTARGET:
513 		ret = DDI_ENOTSUP;
514 		break;
515 	case DDI_INTROP_ENABLE:
516 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
517 		    hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_ENABLE, 0, 0);
518 		break;
519 	case DDI_INTROP_DISABLE:
520 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
521 		    hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_DISABLE, 0, 0);
522 		break;
523 	case DDI_INTROP_SETMASK:
524 		ret = pci_intx_set_mask(rdip);
525 		break;
526 	case DDI_INTROP_CLRMASK:
527 		ret = pci_intx_clr_mask(rdip);
528 		break;
529 	case DDI_INTROP_GETPENDING:
530 		ret = pci_intx_get_pending(rdip, (int *)result);
531 		break;
532 	case DDI_INTROP_NINTRS:
533 	case DDI_INTROP_NAVAIL:
534 		*(int *)result = i_ddi_get_intx_nintrs(rdip);
535 		break;
536 	default:
537 		ret = DDI_ENOTSUP;
538 		break;
539 	}
540 
541 	return (ret);
542 }
543 
544 /* ARGSUSED */
545 int
546 px_msix_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
547     ddi_intr_handle_impl_t *hdlp, void *result)
548 {
549 	px_t			*px_p = DIP_TO_STATE(dip);
550 	px_msi_state_t		*msi_state_p = &px_p->px_ib_p->ib_msi_state;
551 	msiq_rec_type_t		msiq_rec_type;
552 	msi_type_t		msi_type;
553 	uint64_t		msi_addr;
554 	msinum_t		msi_num;
555 	msiqid_t		msiq_id;
556 	uint_t			nintrs;
557 	int			ret = DDI_SUCCESS;
558 
559 	DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x "
560 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
561 
562 	/* Check for MSI64 support */
563 	if ((hdlp->ih_cap & DDI_INTR_FLAG_MSI64) && msi_state_p->msi_addr64) {
564 		msiq_rec_type = MSI64_REC;
565 		msi_type = MSI64_TYPE;
566 		msi_addr = msi_state_p->msi_addr64;
567 	} else {
568 		msiq_rec_type = MSI32_REC;
569 		msi_type = MSI32_TYPE;
570 		msi_addr = msi_state_p->msi_addr32;
571 	}
572 
573 	(void) px_msi_get_msinum(px_p, hdlp->ih_dip,
574 	    (hdlp->ih_flags & DDI_INTR_MSIX_DUP) ? hdlp->ih_main->ih_inum :
575 	    hdlp->ih_inum, &msi_num);
576 
577 	switch (intr_op) {
578 	case DDI_INTROP_GETCAP:
579 		ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result);
580 		if (ret == DDI_SUCCESS)
581 			*(int *)result |= DDI_INTR_FLAG_RETARGETABLE;
582 		break;
583 	case DDI_INTROP_SETCAP:
584 		DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n");
585 		ret = DDI_ENOTSUP;
586 		break;
587 	case DDI_INTROP_ALLOC:
588 		/*
589 		 * We need to restrict this allocation in future
590 		 * based on Resource Management policies.
591 		 */
592 		if ((ret = px_msi_alloc(px_p, rdip, hdlp->ih_type,
593 		    hdlp->ih_inum, hdlp->ih_scratch1,
594 		    (uintptr_t)hdlp->ih_scratch2,
595 		    (int *)result)) != DDI_SUCCESS) {
596 			DBG(DBG_INTROPS, dip, "px_msix_ops: allocation "
597 			    "failed, rdip 0x%p type 0x%d inum 0x%x "
598 			    "count 0x%x\n", rdip, hdlp->ih_type, hdlp->ih_inum,
599 			    hdlp->ih_scratch1);
600 
601 			return (ret);
602 		}
603 
604 		if ((hdlp->ih_type == DDI_INTR_TYPE_MSIX) &&
605 		    (i_ddi_get_msix(rdip) == NULL)) {
606 			ddi_intr_msix_t		*msix_p;
607 
608 			if (msix_p = pci_msix_init(rdip)) {
609 				i_ddi_set_msix(rdip, msix_p);
610 				break;
611 			}
612 
613 			DBG(DBG_INTROPS, dip, "px_msix_ops: MSI-X allocation "
614 			    "failed, rdip 0x%p inum 0x%x\n", rdip,
615 			    hdlp->ih_inum);
616 
617 			(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
618 			    hdlp->ih_scratch1);
619 
620 			return (DDI_FAILURE);
621 		}
622 
623 		break;
624 	case DDI_INTROP_FREE:
625 		(void) pci_msi_unconfigure(rdip, hdlp->ih_type, hdlp->ih_inum);
626 
627 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
628 			goto msi_free;
629 
630 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
631 			break;
632 
633 		if (((i_ddi_intr_get_current_nintrs(hdlp->ih_dip) - 1) == 0) &&
634 		    (i_ddi_get_msix(rdip))) {
635 			pci_msix_fini(i_ddi_get_msix(rdip));
636 			i_ddi_set_msix(rdip, NULL);
637 		}
638 msi_free:
639 		(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
640 		    hdlp->ih_scratch1);
641 		break;
642 	case DDI_INTROP_GETPRI:
643 		*(int *)result = hdlp->ih_pri ?
644 		    hdlp->ih_pri : pci_class_to_pil(rdip);
645 		break;
646 	case DDI_INTROP_SETPRI:
647 		break;
648 	case DDI_INTROP_ADDISR:
649 		if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
650 		    msiq_rec_type, msi_num, -1, &msiq_id)) != DDI_SUCCESS) {
651 			DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler "
652 			    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
653 			return (ret);
654 		}
655 
656 		DBG(DBG_INTROPS, dip, "px_msix_ops: msiq used 0x%x\n", msiq_id);
657 
658 		if ((ret = px_lib_msi_setmsiq(dip, msi_num,
659 		    msiq_id, msi_type)) != DDI_SUCCESS) {
660 			(void) px_rem_msiq_intr(dip, rdip,
661 			    hdlp, msiq_rec_type, msi_num, msiq_id);
662 			return (ret);
663 		}
664 
665 		if ((ret = px_lib_msi_setstate(dip, msi_num,
666 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) {
667 			(void) px_rem_msiq_intr(dip, rdip,
668 			    hdlp, msiq_rec_type, msi_num, msiq_id);
669 			return (ret);
670 		}
671 
672 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
673 		    PCI_MSI_VALID)) != DDI_SUCCESS)
674 			return (ret);
675 
676 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
677 		    px_msiqid_to_devino(px_p, msiq_id), hdlp->ih_pri,
678 		    PX_INTR_STATE_ENABLE, msiq_rec_type, msi_num);
679 
680 		break;
681 	case DDI_INTROP_DUPVEC:
682 		DBG(DBG_INTROPS, dip, "px_msix_ops: dupisr - inum: %x, "
683 		    "new_vector: %x\n", hdlp->ih_inum, hdlp->ih_scratch1);
684 
685 		ret = pci_msix_dup(hdlp->ih_dip, hdlp->ih_inum,
686 		    hdlp->ih_scratch1);
687 		break;
688 	case DDI_INTROP_REMISR:
689 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
690 		    &msiq_id)) != DDI_SUCCESS)
691 			return (ret);
692 
693 		if ((ret = px_ib_update_intr_state(px_p, rdip,
694 		    hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id),
695 		    hdlp->ih_pri, PX_INTR_STATE_DISABLE, msiq_rec_type,
696 		    msi_num)) != DDI_SUCCESS)
697 			return (ret);
698 
699 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
700 		    PCI_MSI_INVALID)) != DDI_SUCCESS)
701 			return (ret);
702 
703 		if ((ret = px_lib_msi_setstate(dip, msi_num,
704 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS)
705 			return (ret);
706 
707 		ret = px_rem_msiq_intr(dip, rdip,
708 		    hdlp, msiq_rec_type, msi_num, msiq_id);
709 
710 		break;
711 	case DDI_INTROP_GETTARGET:
712 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
713 		    &msiq_id)) != DDI_SUCCESS)
714 			return (ret);
715 
716 		ret = px_ib_get_intr_target(px_p,
717 		    px_msiqid_to_devino(px_p, msiq_id), (cpuid_t *)result);
718 		break;
719 	case DDI_INTROP_SETTARGET:
720 		ret = px_ib_set_msix_target(px_p, hdlp, msi_num,
721 		    *(cpuid_t *)result);
722 		break;
723 	case DDI_INTROP_ENABLE:
724 		/*
725 		 * For MSI, just clear the mask bit and return if curr_nenables
726 		 * is > 1. For MSI-X, program MSI address and data for every
727 		 * MSI-X vector including dup vectors irrespective of current
728 		 * curr_nenables value.
729 		 */
730 		if ((pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) ||
731 		    (hdlp->ih_type == DDI_INTR_TYPE_MSIX)) {
732 			nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
733 
734 			if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
735 			    nintrs, hdlp->ih_inum, msi_addr,
736 			    hdlp->ih_type == DDI_INTR_TYPE_MSIX ?
737 			    msi_num : msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
738 				return (ret);
739 
740 			if (i_ddi_intr_get_current_nenables(rdip) < 1) {
741 				if ((ret = pci_msi_enable_mode(rdip,
742 				    hdlp->ih_type)) != DDI_SUCCESS)
743 					return (ret);
744 			}
745 		}
746 
747 		if ((ret = pci_msi_clr_mask(rdip, hdlp->ih_type,
748 		    hdlp->ih_inum)) != DDI_SUCCESS)
749 			return (ret);
750 
751 		break;
752 	case DDI_INTROP_DISABLE:
753 		if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type,
754 		    hdlp->ih_inum)) != DDI_SUCCESS)
755 			return (ret);
756 
757 		/*
758 		 * curr_nenables will be greater than 1 if rdip is using
759 		 * MSI-X and also, if it is using DUP interface. If this
760 		 * curr_enables is > 1, return after setting the mask bit.
761 		 */
762 		if (i_ddi_intr_get_current_nenables(rdip) > 1)
763 			return (DDI_SUCCESS);
764 
765 		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type))
766 		    != DDI_SUCCESS)
767 			return (ret);
768 
769 		break;
770 	case DDI_INTROP_BLOCKENABLE:
771 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
772 
773 		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
774 		    nintrs, hdlp->ih_inum, msi_addr,
775 		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
776 			return (ret);
777 
778 		ret = pci_msi_enable_mode(rdip, hdlp->ih_type);
779 		break;
780 	case DDI_INTROP_BLOCKDISABLE:
781 		ret = pci_msi_disable_mode(rdip, hdlp->ih_type);
782 		break;
783 	case DDI_INTROP_SETMASK:
784 		ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
785 		break;
786 	case DDI_INTROP_CLRMASK:
787 		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
788 		break;
789 	case DDI_INTROP_GETPENDING:
790 		ret = pci_msi_get_pending(rdip, hdlp->ih_type,
791 		    hdlp->ih_inum, (int *)result);
792 		break;
793 	case DDI_INTROP_NINTRS:
794 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
795 		break;
796 	case DDI_INTROP_NAVAIL:
797 		/* XXX - a new interface may be needed */
798 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
799 		break;
800 	case DDI_INTROP_GETPOOL:
801 		if (msi_state_p->msi_pool_p == NULL) {
802 			*(ddi_irm_pool_t **)result = NULL;
803 			return (DDI_ENOTSUP);
804 		}
805 		*(ddi_irm_pool_t **)result = msi_state_p->msi_pool_p;
806 		ret = DDI_SUCCESS;
807 		break;
808 	default:
809 		ret = DDI_ENOTSUP;
810 		break;
811 	}
812 
813 	return (ret);
814 }
815 
816 static struct {
817 	kstat_named_t pxintr_ks_name;
818 	kstat_named_t pxintr_ks_type;
819 	kstat_named_t pxintr_ks_cpu;
820 	kstat_named_t pxintr_ks_pil;
821 	kstat_named_t pxintr_ks_time;
822 	kstat_named_t pxintr_ks_ino;
823 	kstat_named_t pxintr_ks_cookie;
824 	kstat_named_t pxintr_ks_devpath;
825 	kstat_named_t pxintr_ks_buspath;
826 } pxintr_ks_template = {
827 	{ "name",	KSTAT_DATA_CHAR },
828 	{ "type",	KSTAT_DATA_CHAR },
829 	{ "cpu",	KSTAT_DATA_UINT64 },
830 	{ "pil",	KSTAT_DATA_UINT64 },
831 	{ "time",	KSTAT_DATA_UINT64 },
832 	{ "ino",	KSTAT_DATA_UINT64 },
833 	{ "cookie",	KSTAT_DATA_UINT64 },
834 	{ "devpath",	KSTAT_DATA_STRING },
835 	{ "buspath",	KSTAT_DATA_STRING },
836 };
837 
838 static uint32_t pxintr_ks_instance;
839 static char ih_devpath[MAXPATHLEN];
840 static char ih_buspath[MAXPATHLEN];
841 kmutex_t pxintr_ks_template_lock;
842 
843 int
844 px_ks_update(kstat_t *ksp, int rw)
845 {
846 	px_ih_t *ih_p = ksp->ks_private;
847 	int maxlen = sizeof (pxintr_ks_template.pxintr_ks_name.value.c);
848 	px_ino_pil_t *ipil_p = ih_p->ih_ipil_p;
849 	px_ino_t *ino_p = ipil_p->ipil_ino_p;
850 	px_t *px_p = ino_p->ino_ib_p->ib_px_p;
851 	devino_t ino;
852 	sysino_t sysino;
853 
854 	ino = ino_p->ino_ino;
855 	if (px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino) !=
856 	    DDI_SUCCESS) {
857 		cmn_err(CE_WARN, "px_ks_update: px_lib_intr_devino_to_sysino "
858 		    "failed");
859 	}
860 
861 	(void) snprintf(pxintr_ks_template.pxintr_ks_name.value.c, maxlen,
862 	    "%s%d", ddi_driver_name(ih_p->ih_dip),
863 	    ddi_get_instance(ih_p->ih_dip));
864 
865 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
866 	(void) ddi_pathname(px_p->px_dip, ih_buspath);
867 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_devpath, ih_devpath);
868 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_buspath, ih_buspath);
869 
870 	if (ih_p->ih_intr_state == PX_INTR_STATE_ENABLE) {
871 
872 		switch (i_ddi_intr_get_current_type(ih_p->ih_dip)) {
873 		case DDI_INTR_TYPE_MSI:
874 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
875 			    "msi");
876 			break;
877 		case DDI_INTR_TYPE_MSIX:
878 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
879 			    "msix");
880 			break;
881 		default:
882 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
883 			    "fixed");
884 			break;
885 		}
886 
887 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = ino_p->ino_cpuid;
888 		pxintr_ks_template.pxintr_ks_pil.value.ui64 = ipil_p->ipil_pil;
889 		pxintr_ks_template.pxintr_ks_time.value.ui64 = ih_p->ih_nsec +
890 		    (uint64_t)tick2ns((hrtime_t)ih_p->ih_ticks,
891 		    ino_p->ino_cpuid);
892 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = ino;
893 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = sysino;
894 	} else {
895 		(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
896 		    "disabled");
897 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = 0;
898 		pxintr_ks_template.pxintr_ks_pil.value.ui64 = 0;
899 		pxintr_ks_template.pxintr_ks_time.value.ui64 = 0;
900 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = 0;
901 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = 0;
902 	}
903 	return (0);
904 }
905 
906 void
907 px_create_intr_kstats(px_ih_t *ih_p)
908 {
909 	msiq_rec_type_t rec_type = ih_p->ih_rec_type;
910 
911 	ASSERT(ih_p->ih_ksp == NULL);
912 
913 	/*
914 	 * Create pci_intrs::: kstats for all ih types except messages,
915 	 * which represent unusual conditions and don't need to be tracked.
916 	 */
917 	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
918 		ih_p->ih_ksp = kstat_create("pci_intrs",
919 		    atomic_inc_32_nv(&pxintr_ks_instance), "config",
920 		    "interrupts", KSTAT_TYPE_NAMED,
921 		    sizeof (pxintr_ks_template) / sizeof (kstat_named_t),
922 		    KSTAT_FLAG_VIRTUAL);
923 	}
924 	if (ih_p->ih_ksp != NULL) {
925 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
926 		ih_p->ih_ksp->ks_lock = &pxintr_ks_template_lock;
927 		ih_p->ih_ksp->ks_data = &pxintr_ks_template;
928 		ih_p->ih_ksp->ks_private = ih_p;
929 		ih_p->ih_ksp->ks_update = px_ks_update;
930 	}
931 }
932 
933 /*
934  * px_add_intx_intr:
935  *
936  * This function is called to register INTx and legacy hardware
937  * interrupt pins interrupts.
938  */
939 int
940 px_add_intx_intr(dev_info_t *dip, dev_info_t *rdip,
941     ddi_intr_handle_impl_t *hdlp)
942 {
943 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
944 	px_ib_t		*ib_p = px_p->px_ib_p;
945 	devino_t	ino;
946 	px_ih_t		*ih_p;
947 	px_ino_t	*ino_p;
948 	px_ino_pil_t	*ipil_p, *ipil_list;
949 	int32_t		weight;
950 	int		ret = DDI_SUCCESS;
951 
952 	ino = hdlp->ih_vector;
953 
954 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: rdip=%s%d ino=%x "
955 	    "handler=%x arg1=%x arg2=%x\n", ddi_driver_name(rdip),
956 	    ddi_get_instance(rdip), ino, hdlp->ih_cb_func,
957 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
958 
959 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum,
960 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, 0, 0);
961 
962 	mutex_enter(&ib_p->ib_ino_lst_mutex);
963 
964 	ino_p = px_ib_locate_ino(ib_p, ino);
965 	ipil_list = ino_p ? ino_p->ino_ipil_p : NULL;
966 
967 	/* Sharing ino */
968 	if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
969 		if (px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, 0, 0)) {
970 			DBG(DBG_A_INTX, dip, "px_add_intx_intr: "
971 			    "dup intr #%d\n", hdlp->ih_inum);
972 
973 			ret = DDI_FAILURE;
974 			goto fail1;
975 		}
976 
977 		/* Save mondo value in hdlp */
978 		hdlp->ih_vector = ino_p->ino_sysino;
979 
980 		if ((ret = px_ib_ino_add_intr(px_p, ipil_p,
981 		    ih_p)) != DDI_SUCCESS)
982 			goto fail1;
983 
984 		goto ino_done;
985 	}
986 
987 	if (hdlp->ih_pri == 0)
988 		hdlp->ih_pri = pci_class_to_pil(rdip);
989 
990 	ipil_p = px_ib_new_ino_pil(ib_p, ino, hdlp->ih_pri, ih_p);
991 	ino_p = ipil_p->ipil_ino_p;
992 
993 	/* Save mondo value in hdlp */
994 	hdlp->ih_vector = ino_p->ino_sysino;
995 
996 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: pil=0x%x mondo=0x%x\n",
997 	    hdlp->ih_pri, hdlp->ih_vector);
998 
999 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1000 	    (ddi_intr_handler_t *)px_intx_intr, (caddr_t)ipil_p, NULL);
1001 
1002 	ret = i_ddi_add_ivintr(hdlp);
1003 
1004 	/*
1005 	 * Restore original interrupt handler
1006 	 * and arguments in interrupt handle.
1007 	 */
1008 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1009 	    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1010 
1011 	if (ret != DDI_SUCCESS)
1012 		goto fail2;
1013 
1014 	/* Save the pil for this ino */
1015 	ipil_p->ipil_pil = hdlp->ih_pri;
1016 
1017 	/* Select cpu, saving it for sharing and removal */
1018 	if (ipil_list == NULL) {
1019 		if (ino_p->ino_cpuid == -1)
1020 			ino_p->ino_cpuid = intr_dist_cpuid();
1021 
1022 		/* Enable interrupt */
1023 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1024 	}
1025 
1026 ino_done:
1027 	hdlp->ih_target = ino_p->ino_cpuid;
1028 
1029 	/* Add weight to the cpu that we are already targeting */
1030 	weight = pci_class_to_intr_weight(rdip);
1031 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1032 
1033 	ih_p->ih_ipil_p = ipil_p;
1034 	px_create_intr_kstats(ih_p);
1035 	if (ih_p->ih_ksp)
1036 		kstat_install(ih_p->ih_ksp);
1037 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1038 
1039 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: done! Interrupt 0x%x pil=%x\n",
1040 	    ino_p->ino_sysino, hdlp->ih_pri);
1041 
1042 	return (ret);
1043 fail2:
1044 	px_ib_delete_ino_pil(ib_p, ipil_p);
1045 fail1:
1046 	if (ih_p->ih_config_handle)
1047 		pci_config_teardown(&ih_p->ih_config_handle);
1048 
1049 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1050 	kmem_free(ih_p, sizeof (px_ih_t));
1051 
1052 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: Failed! Interrupt 0x%x "
1053 	    "pil=%x\n", ino_p->ino_sysino, hdlp->ih_pri);
1054 
1055 	return (ret);
1056 }
1057 
1058 /*
1059  * px_rem_intx_intr:
1060  *
1061  * This function is called to unregister INTx and legacy hardware
1062  * interrupt pins interrupts.
1063  */
1064 int
1065 px_rem_intx_intr(dev_info_t *dip, dev_info_t *rdip,
1066     ddi_intr_handle_impl_t *hdlp)
1067 {
1068 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1069 	px_ib_t		*ib_p = px_p->px_ib_p;
1070 	devino_t	ino;
1071 	cpuid_t		curr_cpu;
1072 	px_ino_t	*ino_p;
1073 	px_ino_pil_t	*ipil_p;
1074 	px_ih_t		*ih_p;
1075 	int		ret = DDI_SUCCESS;
1076 
1077 	ino = hdlp->ih_vector;
1078 
1079 	DBG(DBG_R_INTX, dip, "px_rem_intx_intr: rdip=%s%d ino=%x\n",
1080 	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
1081 
1082 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1083 
1084 	ino_p = px_ib_locate_ino(ib_p, ino);
1085 	ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri);
1086 	ih_p = px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, 0, 0);
1087 
1088 	/* Get the current cpu */
1089 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1090 	    &curr_cpu)) != DDI_SUCCESS)
1091 		goto fail;
1092 
1093 	if ((ret = px_ib_ino_rem_intr(px_p, ipil_p, ih_p)) != DDI_SUCCESS)
1094 		goto fail;
1095 
1096 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1097 
1098 	if (ipil_p->ipil_ih_size == 0) {
1099 		hdlp->ih_vector = ino_p->ino_sysino;
1100 		i_ddi_rem_ivintr(hdlp);
1101 
1102 		px_ib_delete_ino_pil(ib_p, ipil_p);
1103 	}
1104 
1105 	if (ino_p->ino_ipil_size == 0) {
1106 		kmem_free(ino_p, sizeof (px_ino_t));
1107 	} else {
1108 		/* Re-enable interrupt only if mapping register still shared */
1109 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1110 	}
1111 
1112 fail:
1113 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1114 	return (ret);
1115 }
1116 
1117 /*
1118  * px_add_msiq_intr:
1119  *
1120  * This function is called to register MSI/Xs and PCIe message interrupts.
1121  */
1122 int
1123 px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1124     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1125     msgcode_t msg_code, cpuid_t cpu_id, msiqid_t *msiq_id_p)
1126 {
1127 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1128 	px_ib_t		*ib_p = px_p->px_ib_p;
1129 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
1130 	devino_t	ino;
1131 	px_ih_t		*ih_p;
1132 	px_ino_t	*ino_p;
1133 	px_ino_pil_t	*ipil_p, *ipil_list;
1134 	int32_t		weight;
1135 	int		ret = DDI_SUCCESS;
1136 
1137 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=0x%x "
1138 	    "arg1=0x%x arg2=0x%x cpu=0x%x\n", ddi_driver_name(rdip),
1139 	    ddi_get_instance(rdip), hdlp->ih_cb_func, hdlp->ih_cb_arg1,
1140 	    hdlp->ih_cb_arg2, cpu_id);
1141 
1142 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func,
1143 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code);
1144 
1145 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1146 
1147 	ret = (cpu_id == -1) ? px_msiq_alloc(px_p, rec_type, msiq_id_p) :
1148 	    px_msiq_alloc_based_on_cpuid(px_p, rec_type, cpu_id, msiq_id_p);
1149 
1150 	if (ret != DDI_SUCCESS) {
1151 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1152 		    "msiq allocation failed\n");
1153 		goto fail;
1154 	}
1155 
1156 	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
1157 
1158 	ino_p = px_ib_locate_ino(ib_p, ino);
1159 	ipil_list = ino_p ? ino_p->ino_ipil_p : NULL;
1160 
1161 	/* Sharing ino */
1162 	if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
1163 		if (px_ib_intr_locate_ih(ipil_p, rdip,
1164 		    hdlp->ih_inum, rec_type, msg_code)) {
1165 			DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1166 			    "dup intr #%d\n", hdlp->ih_inum);
1167 
1168 			ret = DDI_FAILURE;
1169 			goto fail1;
1170 		}
1171 
1172 		/* Save mondo value in hdlp */
1173 		hdlp->ih_vector = ino_p->ino_sysino;
1174 
1175 		if ((ret = px_ib_ino_add_intr(px_p, ipil_p,
1176 		    ih_p)) != DDI_SUCCESS)
1177 			goto fail1;
1178 
1179 		goto ino_done;
1180 	}
1181 
1182 	if (hdlp->ih_pri == 0)
1183 		hdlp->ih_pri = pci_class_to_pil(rdip);
1184 
1185 	ipil_p = px_ib_new_ino_pil(ib_p, ino, hdlp->ih_pri, ih_p);
1186 	ino_p = ipil_p->ipil_ino_p;
1187 
1188 	ino_p->ino_msiq_p = msiq_state_p->msiq_p +
1189 	    (*msiq_id_p - msiq_state_p->msiq_1st_msiq_id);
1190 
1191 	/* Save mondo value in hdlp */
1192 	hdlp->ih_vector = ino_p->ino_sysino;
1193 
1194 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: pil=0x%x mondo=0x%x\n",
1195 	    hdlp->ih_pri, hdlp->ih_vector);
1196 
1197 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1198 	    (ddi_intr_handler_t *)px_msiq_intr, (caddr_t)ipil_p, NULL);
1199 
1200 	ret = i_ddi_add_ivintr(hdlp);
1201 
1202 	/*
1203 	 * Restore original interrupt handler
1204 	 * and arguments in interrupt handle.
1205 	 */
1206 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1207 	    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1208 
1209 	if (ret != DDI_SUCCESS)
1210 		goto fail2;
1211 
1212 	/* Save the pil for this ino */
1213 	ipil_p->ipil_pil = hdlp->ih_pri;
1214 
1215 	/* Select cpu, saving it for sharing and removal */
1216 	if (ipil_list == NULL) {
1217 		/* Enable MSIQ */
1218 		px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE);
1219 		px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID);
1220 
1221 		if (ino_p->ino_cpuid == -1)
1222 			ino_p->ino_cpuid = intr_dist_cpuid();
1223 
1224 		/* Enable interrupt */
1225 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1226 	}
1227 
1228 ino_done:
1229 	hdlp->ih_target = ino_p->ino_cpuid;
1230 
1231 	/* Add weight to the cpu that we are already targeting */
1232 	weight = pci_class_to_intr_weight(rdip);
1233 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1234 
1235 	ih_p->ih_ipil_p = ipil_p;
1236 	px_create_intr_kstats(ih_p);
1237 	if (ih_p->ih_ksp)
1238 		kstat_install(ih_p->ih_ksp);
1239 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1240 
1241 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: done! Interrupt 0x%x pil=%x\n",
1242 	    ino_p->ino_sysino, hdlp->ih_pri);
1243 
1244 	return (ret);
1245 fail2:
1246 	px_ib_delete_ino_pil(ib_p, ipil_p);
1247 fail1:
1248 	(void) px_msiq_free(px_p, *msiq_id_p);
1249 fail:
1250 	if (ih_p->ih_config_handle)
1251 		pci_config_teardown(&ih_p->ih_config_handle);
1252 
1253 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1254 	kmem_free(ih_p, sizeof (px_ih_t));
1255 
1256 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: Failed! Interrupt 0x%x pil=%x\n",
1257 	    ino_p->ino_sysino, hdlp->ih_pri);
1258 
1259 	return (ret);
1260 }
1261 
1262 /*
1263  * px_rem_msiq_intr:
1264  *
1265  * This function is called to unregister MSI/Xs and PCIe message interrupts.
1266  */
1267 int
1268 px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1269     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1270     msgcode_t msg_code, msiqid_t msiq_id)
1271 {
1272 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1273 	px_ib_t		*ib_p = px_p->px_ib_p;
1274 	devino_t	ino = px_msiqid_to_devino(px_p, msiq_id);
1275 	cpuid_t		curr_cpu;
1276 	px_ino_t	*ino_p;
1277 	px_ino_pil_t	*ipil_p;
1278 	px_ih_t		*ih_p;
1279 	int		ret = DDI_SUCCESS;
1280 
1281 	DBG(DBG_MSIQ, dip, "px_rem_msiq_intr: rdip=%s%d msiq_id=%x ino=%x\n",
1282 	    ddi_driver_name(rdip), ddi_get_instance(rdip), msiq_id, ino);
1283 
1284 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1285 
1286 	ino_p = px_ib_locate_ino(ib_p, ino);
1287 	ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri);
1288 	ih_p = px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, rec_type,
1289 	    msg_code);
1290 
1291 	/* Get the current cpu */
1292 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1293 	    &curr_cpu)) != DDI_SUCCESS)
1294 		goto fail;
1295 
1296 	if ((ret = px_ib_ino_rem_intr(px_p, ipil_p, ih_p)) != DDI_SUCCESS)
1297 		goto fail;
1298 
1299 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1300 
1301 	if (ipil_p->ipil_ih_size == 0) {
1302 		hdlp->ih_vector = ino_p->ino_sysino;
1303 		i_ddi_rem_ivintr(hdlp);
1304 
1305 		px_ib_delete_ino_pil(ib_p, ipil_p);
1306 
1307 		if (ino_p->ino_ipil_size == 0)
1308 			px_lib_msiq_setvalid(dip,
1309 			    px_devino_to_msiqid(px_p, ino), PCI_MSIQ_INVALID);
1310 	}
1311 
1312 	(void) px_msiq_free(px_p, msiq_id);
1313 
1314 	if (ino_p->ino_ipil_size) {
1315 		/* Re-enable interrupt only if mapping register still shared */
1316 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1317 	}
1318 
1319 fail:
1320 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1321 	return (ret);
1322 }
1323