xref: /illumos-gate/usr/src/uts/sun4/io/px/px_intr.c (revision 8c02a06e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * PX nexus interrupt handling:
30  *	PX device interrupt handler wrapper
31  *	PIL lookup routine
32  *	PX device interrupt related initchild code
33  */
34 
35 #include <sys/types.h>
36 #include <sys/kmem.h>
37 #include <sys/async.h>
38 #include <sys/spl.h>
39 #include <sys/sunddi.h>
40 #include <sys/fm/protocol.h>
41 #include <sys/fm/util.h>
42 #include <sys/machsystm.h>	/* e_ddi_nodeid_to_dip() */
43 #include <sys/ddi_impldefs.h>
44 #include <sys/sdt.h>
45 #include <sys/atomic.h>
46 #include "px_obj.h"
47 #include <sys/ontrap.h>
48 #include <sys/membar.h>
49 #include <sys/clock.h>
50 
51 /*
52  * interrupt jabber:
53  *
54  * When an interrupt line is jabbering, every time the state machine for the
55  * associated ino is idled, a new mondo will be sent and the ino will go into
56  * the pending state again. The mondo will cause a new call to
57  * px_intr_wrapper() which normally idles the ino's state machine which would
58  * precipitate another trip round the loop.
59  *
60  * The loop can be broken by preventing the ino's state machine from being
61  * idled when an interrupt line is jabbering. See the comment at the
62  * beginning of px_intr_wrapper() explaining how the 'interrupt jabber
63  * protection' code does this.
64  */
65 
66 /*LINTLIBRARY*/
67 
68 /*
69  * If the unclaimed interrupt count has reached the limit set by
70  * pci_unclaimed_intr_max within the time limit, then all interrupts
71  * on this ino is blocked by not idling the interrupt state machine.
72  */
73 static int
74 px_spurintr(px_ib_ino_info_t *ino_p)
75 {
76 	px_ih_t	*ih_p = ino_p->ino_ih_start;
77 	px_t	*px_p = ino_p->ino_ib_p->ib_px_p;
78 	char	*err_fmt_str;
79 	int	i;
80 
81 	if (ino_p->ino_unclaimed > px_unclaimed_intr_max)
82 		return (DDI_INTR_CLAIMED);
83 
84 	if (!ino_p->ino_unclaimed)
85 		ino_p->ino_spurintr_begin = ddi_get_lbolt();
86 
87 	ino_p->ino_unclaimed++;
88 
89 	if (ino_p->ino_unclaimed <= px_unclaimed_intr_max)
90 		goto clear;
91 
92 	if (drv_hztousec(ddi_get_lbolt() - ino_p->ino_spurintr_begin)
93 	    > px_spurintr_duration) {
94 		ino_p->ino_unclaimed = 0;
95 		goto clear;
96 	}
97 	err_fmt_str = "%s%d: ino 0x%x blocked";
98 	goto warn;
99 clear:
100 	/* Clear the pending state */
101 	if (px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
102 	    INTR_IDLE_STATE) != DDI_SUCCESS)
103 		return (DDI_INTR_UNCLAIMED);
104 
105 	err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
106 warn:
107 	cmn_err(CE_WARN, err_fmt_str, NAMEINST(px_p->px_dip), ino_p->ino_ino);
108 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_p = ih_p->ih_next)
109 		cmn_err(CE_CONT, "!%s-%d#%x ", NAMEINST(ih_p->ih_dip),
110 		    ih_p->ih_inum);
111 	cmn_err(CE_CONT, "!\n");
112 	return (DDI_INTR_CLAIMED);
113 }
114 
115 extern uint64_t intr_get_time(void);
116 
117 /*
118  * px_intx_intr (INTx or legacy interrupt handler)
119  *
120  * This routine is used as wrapper around interrupt handlers installed by child
121  * device drivers.  This routine invokes the driver interrupt handlers and
122  * examines the return codes.
123  *
124  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
125  * least one handler claims the interrupt then the counter is halved and the
126  * interrupt state machine is idled. If no handler claims the interrupt then
127  * the counter is incremented by one and the state machine is idled.
128  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
129  * then the interrupt state machine is not idled thus preventing any further
130  * interrupts on that ino. The state machine will only be idled again if a
131  * handler is subsequently added or removed.
132  *
133  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
134  * DDI_INTR_UNCLAIMED otherwise.
135  */
136 uint_t
137 px_intx_intr(caddr_t arg)
138 {
139 	px_ib_ino_info_t *ino_p = (px_ib_ino_info_t *)arg;
140 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
141 	px_ih_t		*ih_p = ino_p->ino_ih_start;
142 	uint_t		result = 0, r;
143 	int		i;
144 
145 	DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
146 	    "ino=%x sysino=%llx pil=%x ih_size=%x ih_lst=%x\n",
147 	    ino_p->ino_ino, ino_p->ino_sysino, ino_p->ino_pil,
148 	    ino_p->ino_ih_size, ino_p->ino_ih_head);
149 
150 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_p = ih_p->ih_next) {
151 		dev_info_t *dip = ih_p->ih_dip;
152 		uint_t (*handler)() = ih_p->ih_handler;
153 		caddr_t arg1 = ih_p->ih_handler_arg1;
154 		caddr_t arg2 = ih_p->ih_handler_arg2;
155 
156 		if (ih_p->ih_intr_state == PX_INTR_STATE_DISABLE) {
157 			DBG(DBG_INTX_INTR, px_p->px_dip,
158 			    "px_intx_intr: %s%d interrupt %d is disabled\n",
159 			    ddi_driver_name(dip), ddi_get_instance(dip),
160 			    ino_p->ino_ino);
161 
162 			continue;
163 		}
164 
165 		DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
166 		    "ino=%x handler=%p arg1 =%p arg2 = %p\n",
167 		    ino_p->ino_ino, handler, arg1, arg2);
168 
169 		DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
170 		    void *, handler, caddr_t, arg1, caddr_t, arg2);
171 
172 		r = (*handler)(arg1, arg2);
173 
174 		/*
175 		 * Account for time used by this interrupt. Protect against
176 		 * conflicting writes to ih_ticks from ib_intr_dist_all() by
177 		 * using atomic ops.
178 		 */
179 
180 		if (ino_p->ino_pil <= LOCK_LEVEL)
181 			atomic_add_64(&ih_p->ih_ticks, intr_get_time());
182 
183 		DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
184 		    void *, handler, caddr_t, arg1, int, r);
185 
186 		result += r;
187 
188 		if (px_check_all_handlers)
189 			continue;
190 		if (result)
191 			break;
192 	}
193 
194 	if (!result && px_unclaimed_intr_block)
195 		return (px_spurintr(ino_p));
196 
197 	ino_p->ino_unclaimed = 0;
198 
199 	/* Clear the pending state */
200 	if (px_lib_intr_setstate(ino_p->ino_ib_p->ib_px_p->px_dip,
201 	    ino_p->ino_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
202 		return (DDI_INTR_UNCLAIMED);
203 
204 	return (DDI_INTR_CLAIMED);
205 }
206 
207 /*
208  * px_msiq_intr (MSI/X or PCIe MSG interrupt handler)
209  *
210  * This routine is used as wrapper around interrupt handlers installed by child
211  * device drivers.  This routine invokes the driver interrupt handlers and
212  * examines the return codes.
213  *
214  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
215  * least one handler claims the interrupt then the counter is halved and the
216  * interrupt state machine is idled. If no handler claims the interrupt then
217  * the counter is incremented by one and the state machine is idled.
218  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
219  * then the interrupt state machine is not idled thus preventing any further
220  * interrupts on that ino. The state machine will only be idled again if a
221  * handler is subsequently added or removed.
222  *
223  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
224  * DDI_INTR_UNCLAIMED otherwise.
225  */
226 uint_t
227 px_msiq_intr(caddr_t arg)
228 {
229 	px_ib_ino_info_t	*ino_p = (px_ib_ino_info_t *)arg;
230 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
231 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
232 	px_msiq_t	*msiq_p = ino_p->ino_msiq_p;
233 	dev_info_t	*dip = px_p->px_dip;
234 	msiq_rec_t	msiq_rec, *msiq_rec_p = &msiq_rec;
235 	msiqhead_t	new_head_index = msiq_p->msiq_curr_head_idx;
236 	msiqhead_t	*curr_head_p;
237 	msiqtail_t	curr_tail_index;
238 	msgcode_t	msg_code;
239 	px_ih_t		*ih_p;
240 	int		i, j, ret;
241 	ushort_t	msiq_recs2process;
242 
243 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: msiq_id =%x ino=%x pil=%x "
244 	    "ih_size=%x ih_lst=%x\n", msiq_p->msiq_id, ino_p->ino_ino,
245 	    ino_p->ino_pil, ino_p->ino_ih_size, ino_p->ino_ih_head);
246 
247 	/* Read current MSIQ tail index */
248 	px_lib_msiq_gettail(dip, msiq_p->msiq_id, &curr_tail_index);
249 
250 	if (curr_tail_index < new_head_index)
251 		curr_tail_index += msiq_state_p->msiq_rec_cnt;
252 
253 	/*
254 	 * Calculate the number of recs to process by taking the difference
255 	 * between the head and tail pointers. For all records we always
256 	 * verify that we have a valid record type before we do any processing.
257 	 * If triggered, we should always have at least 1 valid record.
258 	 */
259 	msiq_recs2process = curr_tail_index - new_head_index;
260 
261 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: curr_head %x "
262 	    "rec2process %x\n", new_head_index, msiq_recs2process);
263 
264 	curr_head_p = (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p +
265 	    new_head_index * sizeof (msiq_rec_t));
266 
267 	for (i = 0; i < msiq_recs2process; i++) {
268 		/* Read MSIQ record */
269 		px_lib_get_msiq_rec(dip, curr_head_p, msiq_rec_p);
270 
271 		DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSIQ RECORD, "
272 		    "msiq_rec_type 0x%llx msiq_rec_rid 0x%llx\n",
273 		    msiq_rec_p->msiq_rec_type, msiq_rec_p->msiq_rec_rid);
274 
275 		if (!msiq_rec_p->msiq_rec_type)
276 			break;
277 
278 		/* Check MSIQ record type */
279 		switch (msiq_rec_p->msiq_rec_type) {
280 		case MSG_REC:
281 			msg_code = msiq_rec_p->msiq_rec_data.msg.msg_code;
282 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: PCIE MSG "
283 			    "record, msg type 0x%x\n", msg_code);
284 			break;
285 		case MSI32_REC:
286 		case MSI64_REC:
287 			msg_code = msiq_rec_p->msiq_rec_data.msi.msi_data;
288 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSI record, "
289 			    "msi 0x%x\n", msg_code);
290 
291 			/* Clear MSI state */
292 			px_lib_msi_setstate(dip, (msinum_t)msg_code,
293 			    PCI_MSI_STATE_IDLE);
294 			break;
295 		default:
296 			msg_code = 0;
297 			cmn_err(CE_WARN, "%s%d: px_msiq_intr: 0x%x MSIQ "
298 			    "record type is not supported",
299 			    ddi_driver_name(dip), ddi_get_instance(dip),
300 			    msiq_rec_p->msiq_rec_type);
301 			goto next_rec;
302 		}
303 
304 		/*
305 		 * Scan through px_ih_t linked list, searching for the
306 		 * right px_ih_t, matching MSIQ record data.
307 		 */
308 		for (j = 0, ih_p = ino_p->ino_ih_start;
309 		    ih_p && (j < ino_p->ino_ih_size) &&
310 		    ((ih_p->ih_msg_code != msg_code) ||
311 		    (ih_p->ih_rec_type != msiq_rec_p->msiq_rec_type));
312 		    ih_p = ih_p->ih_next, j++);
313 
314 		if ((ih_p->ih_msg_code == msg_code) &&
315 		    (ih_p->ih_rec_type == msiq_rec_p->msiq_rec_type)) {
316 			dev_info_t *dip = ih_p->ih_dip;
317 			uint_t (*handler)() = ih_p->ih_handler;
318 			caddr_t arg1 = ih_p->ih_handler_arg1;
319 			caddr_t arg2 = ih_p->ih_handler_arg2;
320 
321 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: ino=%x data=%x "
322 			    "handler=%p arg1 =%p arg2=%p\n", ino_p->ino_ino,
323 			    msg_code, handler, arg1, arg2);
324 
325 			DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
326 			    void *, handler, caddr_t, arg1, caddr_t, arg2);
327 
328 			/*
329 			 * Special case for PCIE Error Messages.
330 			 * The current frame work doesn't fit PCIE Err Msgs
331 			 * This should be fixed when PCIE MESSAGES as a whole
332 			 * is architected correctly.
333 			 */
334 			if ((msg_code == PCIE_MSG_CODE_ERR_COR) ||
335 			    (msg_code == PCIE_MSG_CODE_ERR_NONFATAL) ||
336 			    (msg_code == PCIE_MSG_CODE_ERR_FATAL)) {
337 				ret = px_err_fabric_intr(px_p, msg_code,
338 				    msiq_rec_p->msiq_rec_rid);
339 			} else
340 				ret = (*handler)(arg1, arg2);
341 
342 			/*
343 			 * Account for time used by this interrupt. Protect
344 			 * against conflicting writes to ih_ticks from
345 			 * ib_intr_dist_all() by using atomic ops.
346 			 */
347 
348 			if (ino_p->ino_pil <= LOCK_LEVEL)
349 				atomic_add_64(&ih_p->ih_ticks, intr_get_time());
350 
351 			DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
352 			    void *, handler, caddr_t, arg1, int, ret);
353 
354 			new_head_index++;
355 		} else {
356 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr:"
357 			    "No matching MSIQ record found\n");
358 		}
359 next_rec:
360 		/* Get the pointer next EQ record */
361 		curr_head_p = (msiqhead_t *)
362 		    ((caddr_t)curr_head_p + sizeof (msiq_rec_t));
363 
364 		/* Check for overflow condition */
365 		if (curr_head_p >= (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p
366 		    + msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t)))
367 			curr_head_p = (msiqhead_t *)msiq_p->msiq_base_p;
368 
369 		/* Zero out msiq_rec_type field */
370 		msiq_rec_p->msiq_rec_type = 0;
371 	}
372 
373 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: # of MSIQ recs processed %x\n",
374 	    (new_head_index - msiq_p->msiq_curr_head_idx));
375 
376 	if (new_head_index <= msiq_p->msiq_curr_head_idx) {
377 		if (px_unclaimed_intr_block) {
378 			return (px_spurintr(ino_p));
379 		}
380 	}
381 
382 	/*  Update MSIQ head index with no of MSIQ records processed */
383 	if (new_head_index >= msiq_state_p->msiq_rec_cnt)
384 		new_head_index -= msiq_state_p->msiq_rec_cnt;
385 
386 	msiq_p->msiq_curr_head_idx = new_head_index;
387 	px_lib_msiq_sethead(dip, msiq_p->msiq_id, new_head_index);
388 
389 	/* Clear the pending state */
390 	if (px_lib_intr_setstate(dip, ino_p->ino_sysino,
391 	    INTR_IDLE_STATE) != DDI_SUCCESS)
392 		return (DDI_INTR_UNCLAIMED);
393 
394 	return (DDI_INTR_CLAIMED);
395 }
396 
397 dev_info_t *
398 px_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
399 {
400 	dev_info_t	*cdip = rdip;
401 
402 	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
403 		;
404 
405 	return (cdip);
406 }
407 
408 /* Default class to pil value mapping */
409 px_class_val_t px_default_pil [] = {
410 	{0x000000, 0xff0000, 0x1},	/* Class code for pre-2.0 devices */
411 	{0x010000, 0xff0000, 0x4},	/* Mass Storage Controller */
412 	{0x020000, 0xff0000, 0x6},	/* Network Controller */
413 	{0x030000, 0xff0000, 0x9},	/* Display Controller */
414 	{0x040000, 0xff0000, 0x9},	/* Multimedia Controller */
415 	{0x050000, 0xff0000, 0x9},	/* Memory Controller */
416 	{0x060000, 0xff0000, 0x9},	/* Bridge Controller */
417 	{0x0c0000, 0xffff00, 0x9},	/* Serial Bus, FireWire (IEEE 1394) */
418 	{0x0c0100, 0xffff00, 0x4},	/* Serial Bus, ACCESS.bus */
419 	{0x0c0200, 0xffff00, 0x4},	/* Serial Bus, SSA */
420 	{0x0c0300, 0xffff00, 0x9},	/* Serial Bus Universal Serial Bus */
421 	{0x0c0400, 0xffff00, 0x6},	/* Serial Bus, Fibre Channel */
422 	{0x0c0600, 0xffff00, 0x6}	/* Serial Bus, Infiniband */
423 };
424 
425 /*
426  * Default class to intr_weight value mapping (% of CPU).  A driver.conf
427  * entry on or above the pci node like
428  *
429  *	pci-class-intr-weights= 0x020000, 0xff0000, 30;
430  *
431  * can be used to augment or override entries in the default table below.
432  *
433  * NB: The values below give NICs preference on redistribution, and provide
434  * NICs some isolation from other interrupt sources. We need better interfaces
435  * that allow the NIC driver to identify a specific NIC instance as high
436  * bandwidth, and thus deserving of separation from other low bandwidth
437  * NICs additional isolation from other interrupt sources.
438  *
439  * NB: We treat Infiniband like a NIC.
440  */
441 px_class_val_t px_default_intr_weight [] = {
442 	{0x020000, 0xff0000, 35},	/* Network Controller */
443 	{0x010000, 0xff0000, 10},	/* Mass Storage Controller */
444 	{0x0c0400, 0xffff00, 10},	/* Serial Bus, Fibre Channel */
445 	{0x0c0600, 0xffff00, 50}	/* Serial Bus, Infiniband */
446 };
447 
448 static uint32_t
449 px_match_class_val(uint32_t key, px_class_val_t *rec_p, int nrec,
450     uint32_t default_val)
451 {
452 	int	i;
453 
454 	for (i = 0; i < nrec; rec_p++, i++) {
455 		if ((rec_p->class_code & rec_p->class_mask) ==
456 		    (key & rec_p->class_mask))
457 			return (rec_p->class_val);
458 	}
459 
460 	return (default_val);
461 }
462 
463 /*
464  * px_class_to_val
465  *
466  * Return the configuration value, based on class code and sub class code,
467  * from the specified property based or default px_class_val_t table.
468  */
469 uint32_t
470 px_class_to_val(dev_info_t *rdip, char *property_name, px_class_val_t *rec_p,
471     int nrec, uint32_t default_val)
472 {
473 	int property_len;
474 	uint32_t class_code;
475 	px_class_val_t *conf;
476 	uint32_t val = default_val;
477 
478 	/*
479 	 * Use the "class-code" property to get the base and sub class
480 	 * codes for the requesting device.
481 	 */
482 	class_code = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, rdip,
483 	    DDI_PROP_DONTPASS, "class-code", -1);
484 
485 	if (class_code == -1)
486 		return (val);
487 
488 	/* look up the val from the default table */
489 	val = px_match_class_val(class_code, rec_p, nrec, val);
490 
491 	/* see if there is a more specific property specified value */
492 	if (ddi_getlongprop(DDI_DEV_T_ANY, rdip, DDI_PROP_NOTPROM,
493 	    property_name, (caddr_t)&conf, &property_len))
494 		return (val);
495 
496 	if ((property_len % sizeof (px_class_val_t)) == 0)
497 		val = px_match_class_val(class_code, conf,
498 		    property_len / sizeof (px_class_val_t), val);
499 	kmem_free(conf, property_len);
500 	return (val);
501 }
502 
503 /* px_class_to_pil: return the pil for a given device. */
504 uint32_t
505 px_class_to_pil(dev_info_t *rdip)
506 {
507 	uint32_t pil;
508 
509 	/* default pil is 0 (uninitialized) */
510 	pil = px_class_to_val(rdip,
511 	    "pci-class-priorities", px_default_pil,
512 	    sizeof (px_default_pil) / sizeof (px_class_val_t), 0);
513 
514 	/* range check the result */
515 	if (pil >= 0xf)
516 		pil = 0;
517 
518 	return (pil);
519 }
520 
521 /* px_class_to_intr_weight: return the intr_weight for a given device. */
522 static int32_t
523 px_class_to_intr_weight(dev_info_t *rdip)
524 {
525 	int32_t intr_weight;
526 
527 	/* default weight is 0% */
528 	intr_weight = px_class_to_val(rdip,
529 	    "pci-class-intr-weights", px_default_intr_weight,
530 	    sizeof (px_default_intr_weight) / sizeof (px_class_val_t), 0);
531 
532 	/* range check the result */
533 	if (intr_weight < 0)
534 		intr_weight = 0;
535 	if (intr_weight > 1000)
536 		intr_weight = 1000;
537 
538 	return (intr_weight);
539 }
540 
541 /* ARGSUSED */
542 int
543 px_intx_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
544     ddi_intr_handle_impl_t *hdlp, void *result)
545 {
546 	px_t	*px_p = DIP_TO_STATE(dip);
547 	int	ret = DDI_SUCCESS;
548 
549 	DBG(DBG_INTROPS, dip, "px_intx_ops: dip=%x rdip=%x intr_op=%x "
550 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
551 
552 	switch (intr_op) {
553 	case DDI_INTROP_GETCAP:
554 		ret = pci_intx_get_cap(rdip, (int *)result);
555 		break;
556 	case DDI_INTROP_SETCAP:
557 		DBG(DBG_INTROPS, dip, "px_intx_ops: SetCap is not supported\n");
558 		ret = DDI_ENOTSUP;
559 		break;
560 	case DDI_INTROP_ALLOC:
561 		*(int *)result = hdlp->ih_scratch1;
562 		break;
563 	case DDI_INTROP_FREE:
564 		break;
565 	case DDI_INTROP_GETPRI:
566 		*(int *)result = hdlp->ih_pri ?
567 		    hdlp->ih_pri : px_class_to_pil(rdip);
568 		break;
569 	case DDI_INTROP_SETPRI:
570 		break;
571 	case DDI_INTROP_ADDISR:
572 		ret = px_add_intx_intr(dip, rdip, hdlp);
573 		break;
574 	case DDI_INTROP_REMISR:
575 		ret = px_rem_intx_intr(dip, rdip, hdlp);
576 		break;
577 	case DDI_INTROP_ENABLE:
578 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
579 		    hdlp->ih_vector, PX_INTR_STATE_ENABLE, 0, 0);
580 		break;
581 	case DDI_INTROP_DISABLE:
582 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
583 		    hdlp->ih_vector, PX_INTR_STATE_DISABLE, 0, 0);
584 		break;
585 	case DDI_INTROP_SETMASK:
586 		ret = pci_intx_set_mask(rdip);
587 		break;
588 	case DDI_INTROP_CLRMASK:
589 		ret = pci_intx_clr_mask(rdip);
590 		break;
591 	case DDI_INTROP_GETPENDING:
592 		ret = pci_intx_get_pending(rdip, (int *)result);
593 		break;
594 	case DDI_INTROP_NINTRS:
595 	case DDI_INTROP_NAVAIL:
596 		*(int *)result = i_ddi_get_intx_nintrs(rdip);
597 		break;
598 	default:
599 		ret = DDI_ENOTSUP;
600 		break;
601 	}
602 
603 	return (ret);
604 }
605 
606 /* ARGSUSED */
607 int
608 px_msix_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
609     ddi_intr_handle_impl_t *hdlp, void *result)
610 {
611 	px_t			*px_p = DIP_TO_STATE(dip);
612 	px_msi_state_t		*msi_state_p = &px_p->px_ib_p->ib_msi_state;
613 	msiq_rec_type_t		msiq_rec_type;
614 	msi_type_t		msi_type;
615 	uint64_t		msi_addr;
616 	msinum_t		msi_num;
617 	msiqid_t		msiq_id;
618 	uint_t			nintrs;
619 	int			i, ret = DDI_SUCCESS;
620 
621 	DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x "
622 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
623 
624 	/* Check for MSI64 support */
625 	if ((hdlp->ih_cap & DDI_INTR_FLAG_MSI64) && msi_state_p->msi_addr64) {
626 		msiq_rec_type = MSI64_REC;
627 		msi_type = MSI64_TYPE;
628 		msi_addr = msi_state_p->msi_addr64;
629 	} else {
630 		msiq_rec_type = MSI32_REC;
631 		msi_type = MSI32_TYPE;
632 		msi_addr = msi_state_p->msi_addr32;
633 	}
634 
635 	switch (intr_op) {
636 	case DDI_INTROP_GETCAP:
637 		ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result);
638 		break;
639 	case DDI_INTROP_SETCAP:
640 		DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n");
641 		ret = DDI_ENOTSUP;
642 		break;
643 	case DDI_INTROP_ALLOC:
644 		/*
645 		 * We need to restrict this allocation in future
646 		 * based on Resource Management policies.
647 		 */
648 		if ((ret = px_msi_alloc(px_p, rdip, hdlp->ih_inum,
649 		    hdlp->ih_scratch1, (uintptr_t)hdlp->ih_scratch2, &msi_num,
650 		    (int *)result)) != DDI_SUCCESS) {
651 			DBG(DBG_INTROPS, dip, "px_msix_ops: allocation "
652 			    "failed, rdip 0x%p type 0x%d inum 0x%x "
653 			    "count 0x%x\n", rdip, hdlp->ih_type, hdlp->ih_inum,
654 			    hdlp->ih_scratch1);
655 
656 			return (ret);
657 		}
658 
659 		if ((hdlp->ih_type == DDI_INTR_TYPE_MSIX) &&
660 		    (i_ddi_get_msix(rdip) == NULL)) {
661 			ddi_intr_msix_t		*msix_p;
662 
663 			if (msix_p = pci_msix_init(rdip)) {
664 				i_ddi_set_msix(rdip, msix_p);
665 				break;
666 			}
667 
668 			DBG(DBG_INTROPS, dip, "px_msix_ops: MSI-X allocation "
669 			    "failed, rdip 0x%p inum 0x%x\n", rdip,
670 			    hdlp->ih_inum);
671 
672 			(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
673 			    hdlp->ih_scratch1);
674 
675 			return (DDI_FAILURE);
676 		}
677 
678 		break;
679 	case DDI_INTROP_FREE:
680 		(void) pci_msi_disable_mode(rdip, hdlp->ih_type, NULL);
681 		(void) pci_msi_unconfigure(rdip, hdlp->ih_type, hdlp->ih_inum);
682 
683 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
684 			goto msi_free;
685 
686 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
687 			break;
688 
689 		if (((i_ddi_intr_get_current_nintrs(hdlp->ih_dip) - 1) == 0) &&
690 		    (i_ddi_get_msix(rdip))) {
691 			pci_msix_fini(i_ddi_get_msix(rdip));
692 			i_ddi_set_msix(rdip, NULL);
693 		}
694 msi_free:
695 		(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
696 		    hdlp->ih_scratch1);
697 		break;
698 	case DDI_INTROP_GETPRI:
699 		*(int *)result = hdlp->ih_pri ?
700 		    hdlp->ih_pri : px_class_to_pil(rdip);
701 		break;
702 	case DDI_INTROP_SETPRI:
703 		break;
704 	case DDI_INTROP_ADDISR:
705 		if ((ret = px_msi_get_msinum(px_p, hdlp->ih_dip,
706 		    hdlp->ih_inum, &msi_num)) != DDI_SUCCESS)
707 			return (ret);
708 
709 		if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
710 		    msiq_rec_type, msi_num, &msiq_id)) != DDI_SUCCESS) {
711 			DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler "
712 			    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
713 			return (ret);
714 		}
715 
716 		DBG(DBG_INTROPS, dip, "px_msix_ops: msiq used 0x%x\n", msiq_id);
717 
718 		if ((ret = px_lib_msi_setmsiq(dip, msi_num,
719 		    msiq_id, msi_type)) != DDI_SUCCESS) {
720 			(void) px_rem_msiq_intr(dip, rdip,
721 			    hdlp, msiq_rec_type, msi_num, msiq_id);
722 			return (ret);
723 		}
724 
725 		if ((ret = px_lib_msi_setstate(dip, msi_num,
726 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) {
727 			(void) px_rem_msiq_intr(dip, rdip,
728 			    hdlp, msiq_rec_type, msi_num, msiq_id);
729 			return (ret);
730 		}
731 
732 		hdlp->ih_vector = msi_num;
733 		break;
734 	case DDI_INTROP_DUPVEC:
735 		DBG(DBG_INTROPS, dip, "px_msix_ops: dupisr - inum: %x, "
736 		    "new_vector: %x\n", hdlp->ih_inum, hdlp->ih_scratch1);
737 
738 		ret = pci_msix_dup(hdlp->ih_dip, hdlp->ih_inum,
739 		    hdlp->ih_scratch1);
740 		break;
741 	case DDI_INTROP_REMISR:
742 		msi_num = hdlp->ih_vector;
743 
744 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
745 		    &msiq_id)) != DDI_SUCCESS)
746 			return (ret);
747 
748 		if ((ret = px_lib_msi_setstate(dip, msi_num,
749 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS)
750 			return (ret);
751 
752 		ret = px_rem_msiq_intr(dip, rdip,
753 		    hdlp, msiq_rec_type, msi_num, msiq_id);
754 
755 		hdlp->ih_vector = 0;
756 		break;
757 	case DDI_INTROP_ENABLE:
758 		msi_num = hdlp->ih_vector;
759 
760 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
761 		    PCI_MSI_VALID)) != DDI_SUCCESS)
762 			return (ret);
763 
764 		if ((pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) ||
765 		    (hdlp->ih_type == DDI_INTR_TYPE_MSIX)) {
766 			nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
767 
768 			if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
769 			    nintrs, hdlp->ih_inum, msi_addr,
770 			    hdlp->ih_type == DDI_INTR_TYPE_MSIX ?
771 			    msi_num : msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
772 				return (ret);
773 
774 			if ((ret = pci_msi_enable_mode(rdip, hdlp->ih_type))
775 			    != DDI_SUCCESS)
776 				return (ret);
777 		}
778 
779 		if ((ret = pci_msi_clr_mask(rdip, hdlp->ih_type,
780 		    hdlp->ih_inum)) != DDI_SUCCESS)
781 			return (ret);
782 
783 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
784 			break;
785 
786 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
787 		    &msiq_id)) != DDI_SUCCESS)
788 			return (ret);
789 
790 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
791 		    px_msiqid_to_devino(px_p, msiq_id), PX_INTR_STATE_ENABLE,
792 		    msiq_rec_type, msi_num);
793 
794 		break;
795 	case DDI_INTROP_DISABLE:
796 		msi_num = hdlp->ih_vector;
797 
798 		if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type,
799 		    hdlp->ih_inum)) != DDI_SUCCESS)
800 			return (ret);
801 
802 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
803 		    PCI_MSI_INVALID)) != DDI_SUCCESS)
804 			return (ret);
805 
806 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
807 			break;
808 
809 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
810 		    &msiq_id)) != DDI_SUCCESS)
811 			return (ret);
812 
813 		ret = px_ib_update_intr_state(px_p, rdip,
814 		    hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id),
815 		    PX_INTR_STATE_DISABLE, msiq_rec_type, msi_num);
816 
817 		break;
818 	case DDI_INTROP_BLOCKENABLE:
819 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
820 		msi_num = hdlp->ih_vector;
821 
822 		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
823 		    nintrs, hdlp->ih_inum, msi_addr,
824 		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
825 			return (ret);
826 
827 		for (i = 0; i < nintrs; i++, msi_num++) {
828 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
829 			    PCI_MSI_VALID)) != DDI_SUCCESS)
830 				return (ret);
831 
832 			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
833 			    &msiq_id)) != DDI_SUCCESS)
834 				return (ret);
835 
836 			if ((ret = px_ib_update_intr_state(px_p, rdip,
837 			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
838 			    msiq_id), PX_INTR_STATE_ENABLE, msiq_rec_type,
839 			    msi_num)) != DDI_SUCCESS)
840 				return (ret);
841 		}
842 
843 		ret = pci_msi_enable_mode(rdip, hdlp->ih_type);
844 		break;
845 	case DDI_INTROP_BLOCKDISABLE:
846 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
847 		msi_num = hdlp->ih_vector;
848 
849 		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type,
850 		    hdlp->ih_cap & DDI_INTR_FLAG_BLOCK)) != DDI_SUCCESS)
851 			return (ret);
852 
853 		for (i = 0; i < nintrs; i++, msi_num++) {
854 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
855 			    PCI_MSI_INVALID)) != DDI_SUCCESS)
856 				return (ret);
857 
858 			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
859 			    &msiq_id)) != DDI_SUCCESS)
860 				return (ret);
861 
862 			if ((ret = px_ib_update_intr_state(px_p, rdip,
863 			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
864 			    msiq_id), PX_INTR_STATE_DISABLE, msiq_rec_type,
865 			    msi_num)) != DDI_SUCCESS)
866 				return (ret);
867 		}
868 
869 		break;
870 	case DDI_INTROP_SETMASK:
871 		ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
872 		break;
873 	case DDI_INTROP_CLRMASK:
874 		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
875 		break;
876 	case DDI_INTROP_GETPENDING:
877 		ret = pci_msi_get_pending(rdip, hdlp->ih_type,
878 		    hdlp->ih_inum, (int *)result);
879 		break;
880 	case DDI_INTROP_NINTRS:
881 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
882 		break;
883 	case DDI_INTROP_NAVAIL:
884 		/* XXX - a new interface may be needed */
885 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
886 		break;
887 	default:
888 		ret = DDI_ENOTSUP;
889 		break;
890 	}
891 
892 	return (ret);
893 }
894 
895 static struct {
896 	kstat_named_t pxintr_ks_name;
897 	kstat_named_t pxintr_ks_type;
898 	kstat_named_t pxintr_ks_cpu;
899 	kstat_named_t pxintr_ks_pil;
900 	kstat_named_t pxintr_ks_time;
901 	kstat_named_t pxintr_ks_ino;
902 	kstat_named_t pxintr_ks_cookie;
903 	kstat_named_t pxintr_ks_devpath;
904 	kstat_named_t pxintr_ks_buspath;
905 } pxintr_ks_template = {
906 	{ "name",	KSTAT_DATA_CHAR },
907 	{ "type",	KSTAT_DATA_CHAR },
908 	{ "cpu",	KSTAT_DATA_UINT64 },
909 	{ "pil",	KSTAT_DATA_UINT64 },
910 	{ "time",	KSTAT_DATA_UINT64 },
911 	{ "ino",	KSTAT_DATA_UINT64 },
912 	{ "cookie",	KSTAT_DATA_UINT64 },
913 	{ "devpath",	KSTAT_DATA_STRING },
914 	{ "buspath",	KSTAT_DATA_STRING },
915 };
916 
917 static uint32_t pxintr_ks_instance;
918 static char ih_devpath[MAXPATHLEN];
919 static char ih_buspath[MAXPATHLEN];
920 kmutex_t pxintr_ks_template_lock;
921 
922 int
923 px_ks_update(kstat_t *ksp, int rw)
924 {
925 	px_ih_t *ih_p = ksp->ks_private;
926 	int maxlen = sizeof (pxintr_ks_template.pxintr_ks_name.value.c);
927 	px_ib_t *ib_p = ih_p->ih_ino_p->ino_ib_p;
928 	px_t *px_p = ib_p->ib_px_p;
929 	devino_t ino;
930 	sysino_t sysino;
931 
932 	ino = ih_p->ih_ino_p->ino_ino;
933 	(void) px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino);
934 
935 	(void) snprintf(pxintr_ks_template.pxintr_ks_name.value.c, maxlen,
936 	    "%s%d", ddi_driver_name(ih_p->ih_dip),
937 	    ddi_get_instance(ih_p->ih_dip));
938 
939 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
940 	(void) ddi_pathname(px_p->px_dip, ih_buspath);
941 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_devpath, ih_devpath);
942 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_buspath, ih_buspath);
943 
944 	if (ih_p->ih_intr_state == PX_INTR_STATE_ENABLE) {
945 
946 		(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
947 		    (ih_p->ih_rec_type == 0) ? "fixed" : "msi");
948 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 =
949 		    ih_p->ih_ino_p->ino_cpuid;
950 		pxintr_ks_template.pxintr_ks_pil.value.ui64 =
951 		    ih_p->ih_ino_p->ino_pil;
952 		pxintr_ks_template.pxintr_ks_time.value.ui64 = ih_p->ih_nsec +
953 		    (uint64_t)tick2ns((hrtime_t)ih_p->ih_ticks,
954 			ih_p->ih_ino_p->ino_cpuid);
955 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = ino;
956 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = sysino;
957 	} else {
958 		(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
959 		    "disabled");
960 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = 0;
961 		pxintr_ks_template.pxintr_ks_pil.value.ui64 = 0;
962 		pxintr_ks_template.pxintr_ks_time.value.ui64 = 0;
963 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = 0;
964 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = 0;
965 	}
966 	return (0);
967 }
968 
969 void
970 px_create_intr_kstats(px_ih_t *ih_p)
971 {
972 	msiq_rec_type_t rec_type = ih_p->ih_rec_type;
973 
974 	ASSERT(ih_p->ih_ksp == NULL);
975 
976 	/*
977 	 * Create pci_intrs::: kstats for all ih types except messages,
978 	 * which represent unusual conditions and don't need to be tracked.
979 	 */
980 	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
981 		ih_p->ih_ksp = kstat_create("pci_intrs",
982 		    atomic_inc_32_nv(&pxintr_ks_instance), "config",
983 		    "interrupts", KSTAT_TYPE_NAMED,
984 		    sizeof (pxintr_ks_template) / sizeof (kstat_named_t),
985 		    KSTAT_FLAG_VIRTUAL);
986 	}
987 	if (ih_p->ih_ksp != NULL) {
988 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
989 		ih_p->ih_ksp->ks_lock = &pxintr_ks_template_lock;
990 		ih_p->ih_ksp->ks_data = &pxintr_ks_template;
991 		ih_p->ih_ksp->ks_private = ih_p;
992 		ih_p->ih_ksp->ks_update = px_ks_update;
993 	}
994 }
995 
996 /*
997  * px_add_intx_intr:
998  *
999  * This function is called to register INTx and legacy hardware
1000  * interrupt pins interrupts.
1001  */
1002 int
1003 px_add_intx_intr(dev_info_t *dip, dev_info_t *rdip,
1004     ddi_intr_handle_impl_t *hdlp)
1005 {
1006 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1007 	px_ib_t		*ib_p = px_p->px_ib_p;
1008 	devino_t	ino;
1009 	px_ih_t		*ih_p;
1010 	px_ib_ino_info_t *ino_p;
1011 	int32_t		weight;
1012 	int		ret = DDI_SUCCESS;
1013 
1014 	ino = hdlp->ih_vector;
1015 
1016 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: rdip=%s%d ino=%x "
1017 	    "handler=%x arg1=%x arg2=%x\n", ddi_driver_name(rdip),
1018 	    ddi_get_instance(rdip), ino, hdlp->ih_cb_func,
1019 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
1020 
1021 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum,
1022 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, 0, 0);
1023 
1024 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1025 
1026 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {	/* sharing ino */
1027 		uint32_t intr_index = hdlp->ih_inum;
1028 		if (px_ib_ino_locate_intr(ino_p, rdip, intr_index, 0, 0)) {
1029 			DBG(DBG_A_INTX, dip, "px_add_intx_intr: "
1030 			    "dup intr #%d\n", intr_index);
1031 
1032 			ret = DDI_FAILURE;
1033 			goto fail1;
1034 		}
1035 
1036 		/* Save mondo value in hdlp */
1037 		hdlp->ih_vector = ino_p->ino_sysino;
1038 
1039 		if ((ret = px_ib_ino_add_intr(px_p, ino_p, ih_p))
1040 		    != DDI_SUCCESS)
1041 			goto fail1;
1042 	} else {
1043 		ino_p = px_ib_new_ino(ib_p, ino, ih_p);
1044 
1045 		if (hdlp->ih_pri == 0)
1046 			hdlp->ih_pri = px_class_to_pil(rdip);
1047 
1048 		/* Save mondo value in hdlp */
1049 		hdlp->ih_vector = ino_p->ino_sysino;
1050 
1051 		DBG(DBG_A_INTX, dip, "px_add_intx_intr: pil=0x%x mondo=0x%x\n",
1052 		    hdlp->ih_pri, hdlp->ih_vector);
1053 
1054 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1055 		    (ddi_intr_handler_t *)px_intx_intr, (caddr_t)ino_p, NULL);
1056 
1057 		ret = i_ddi_add_ivintr(hdlp);
1058 
1059 		/*
1060 		 * Restore original interrupt handler
1061 		 * and arguments in interrupt handle.
1062 		 */
1063 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1064 		    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1065 
1066 		if (ret != DDI_SUCCESS)
1067 			goto fail2;
1068 
1069 		/* Save the pil for this ino */
1070 		ino_p->ino_pil = hdlp->ih_pri;
1071 
1072 		/* select cpu, saving it for sharing and removal */
1073 		ino_p->ino_cpuid = intr_dist_cpuid();
1074 
1075 		/* Enable interrupt */
1076 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1077 	}
1078 
1079 	/* add weight to the cpu that we are already targeting */
1080 	weight = px_class_to_intr_weight(rdip);
1081 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1082 
1083 	ih_p->ih_ino_p = ino_p;
1084 	px_create_intr_kstats(ih_p);
1085 	if (ih_p->ih_ksp)
1086 		kstat_install(ih_p->ih_ksp);
1087 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1088 
1089 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: done! Interrupt 0x%x pil=%x\n",
1090 	    ino_p->ino_sysino, hdlp->ih_pri);
1091 
1092 	return (ret);
1093 fail2:
1094 	px_ib_delete_ino(ib_p, ino_p);
1095 fail1:
1096 	if (ih_p->ih_config_handle)
1097 		pci_config_teardown(&ih_p->ih_config_handle);
1098 
1099 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1100 	kmem_free(ih_p, sizeof (px_ih_t));
1101 
1102 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: Failed! Interrupt 0x%x "
1103 	    "pil=%x\n", ino_p->ino_sysino, hdlp->ih_pri);
1104 
1105 	return (ret);
1106 }
1107 
1108 /*
1109  * px_rem_intx_intr:
1110  *
1111  * This function is called to unregister INTx and legacy hardware
1112  * interrupt pins interrupts.
1113  */
1114 int
1115 px_rem_intx_intr(dev_info_t *dip, dev_info_t *rdip,
1116     ddi_intr_handle_impl_t *hdlp)
1117 {
1118 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1119 	px_ib_t		*ib_p = px_p->px_ib_p;
1120 	devino_t	ino;
1121 	cpuid_t		curr_cpu;
1122 	px_ib_ino_info_t	*ino_p;
1123 	px_ih_t		*ih_p;
1124 	int		ret = DDI_SUCCESS;
1125 
1126 	ino = hdlp->ih_vector;
1127 
1128 	DBG(DBG_R_INTX, dip, "px_rem_intx_intr: rdip=%s%d ino=%x\n",
1129 	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
1130 
1131 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1132 
1133 	ino_p = px_ib_locate_ino(ib_p, ino);
1134 	ih_p = px_ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum, 0, 0);
1135 
1136 	/* Get the current cpu */
1137 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1138 	    &curr_cpu)) != DDI_SUCCESS)
1139 		goto fail;
1140 
1141 	if ((ret = px_ib_ino_rem_intr(px_p, ino_p, ih_p)) != DDI_SUCCESS)
1142 		goto fail;
1143 
1144 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1145 
1146 	if (ino_p->ino_ih_size == 0) {
1147 		if ((ret = px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
1148 		    INTR_DELIVERED_STATE)) != DDI_SUCCESS)
1149 			goto fail;
1150 
1151 		hdlp->ih_vector = ino_p->ino_sysino;
1152 		i_ddi_rem_ivintr(hdlp);
1153 
1154 		px_ib_delete_ino(ib_p, ino_p);
1155 		kmem_free(ino_p, sizeof (px_ib_ino_info_t));
1156 	} else {
1157 		/* Re-enable interrupt only if mapping regsiter still shared */
1158 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1159 	}
1160 
1161 fail:
1162 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1163 	return (ret);
1164 }
1165 
1166 /*
1167  * px_add_msiq_intr:
1168  *
1169  * This function is called to register MSI/Xs and PCIe message interrupts.
1170  */
1171 int
1172 px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1173     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1174     msgcode_t msg_code, msiqid_t *msiq_id_p)
1175 {
1176 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1177 	px_ib_t		*ib_p = px_p->px_ib_p;
1178 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
1179 	devino_t	ino;
1180 	px_ih_t		*ih_p;
1181 	px_ib_ino_info_t	*ino_p;
1182 	int32_t		weight;
1183 	int		ret = DDI_SUCCESS;
1184 
1185 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=%x "
1186 	    "arg1=%x arg2=%x\n", ddi_driver_name(rdip), ddi_get_instance(rdip),
1187 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
1188 
1189 	if ((ret = px_msiq_alloc(px_p, rec_type, msiq_id_p)) != DDI_SUCCESS) {
1190 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1191 		    "msiq allocation failed\n");
1192 		return (ret);
1193 	}
1194 
1195 	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
1196 
1197 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func,
1198 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code);
1199 
1200 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1201 
1202 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {	/* sharing ino */
1203 		uint32_t intr_index = hdlp->ih_inum;
1204 		if (px_ib_ino_locate_intr(ino_p, rdip,
1205 		    intr_index, rec_type, msg_code)) {
1206 			DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1207 			    "dup intr #%d\n", intr_index);
1208 
1209 			ret = DDI_FAILURE;
1210 			goto fail1;
1211 		}
1212 
1213 		if ((ret = px_ib_ino_add_intr(px_p, ino_p, ih_p))
1214 		    != DDI_SUCCESS)
1215 			goto fail1;
1216 	} else {
1217 		ino_p = px_ib_new_ino(ib_p, ino, ih_p);
1218 
1219 		ino_p->ino_msiq_p = msiq_state_p->msiq_p +
1220 		    (*msiq_id_p - msiq_state_p->msiq_1st_msiq_id);
1221 
1222 		if (hdlp->ih_pri == 0)
1223 			hdlp->ih_pri = px_class_to_pil(rdip);
1224 
1225 		/* Save mondo value in hdlp */
1226 		hdlp->ih_vector = ino_p->ino_sysino;
1227 
1228 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: pil=0x%x mondo=0x%x\n",
1229 		    hdlp->ih_pri, hdlp->ih_vector);
1230 
1231 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1232 		    (ddi_intr_handler_t *)px_msiq_intr, (caddr_t)ino_p, NULL);
1233 
1234 		ret = i_ddi_add_ivintr(hdlp);
1235 
1236 		/*
1237 		 * Restore original interrupt handler
1238 		 * and arguments in interrupt handle.
1239 		 */
1240 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1241 		    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1242 
1243 		if (ret != DDI_SUCCESS)
1244 			goto fail2;
1245 
1246 		/* Save the pil for this ino */
1247 		ino_p->ino_pil = hdlp->ih_pri;
1248 
1249 		/* Enable MSIQ */
1250 		px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE);
1251 		px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID);
1252 
1253 		/* select cpu, saving it for sharing and removal */
1254 		ino_p->ino_cpuid = intr_dist_cpuid();
1255 
1256 		/* Enable interrupt */
1257 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino_p->ino_ino);
1258 	}
1259 
1260 	/* add weight to the cpu that we are already targeting */
1261 	weight = px_class_to_intr_weight(rdip);
1262 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1263 
1264 	ih_p->ih_ino_p = ino_p;
1265 	px_create_intr_kstats(ih_p);
1266 	if (ih_p->ih_ksp)
1267 		kstat_install(ih_p->ih_ksp);
1268 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1269 
1270 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: done! Interrupt 0x%x pil=%x\n",
1271 	    ino_p->ino_sysino, hdlp->ih_pri);
1272 
1273 	return (ret);
1274 fail2:
1275 	px_ib_delete_ino(ib_p, ino_p);
1276 fail1:
1277 	if (ih_p->ih_config_handle)
1278 		pci_config_teardown(&ih_p->ih_config_handle);
1279 
1280 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1281 	kmem_free(ih_p, sizeof (px_ih_t));
1282 
1283 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: Failed! Interrupt 0x%x pil=%x\n",
1284 	    ino_p->ino_sysino, hdlp->ih_pri);
1285 
1286 	return (ret);
1287 }
1288 
1289 /*
1290  * px_rem_msiq_intr:
1291  *
1292  * This function is called to unregister MSI/Xs and PCIe message interrupts.
1293  */
1294 int
1295 px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1296     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1297     msgcode_t msg_code, msiqid_t msiq_id)
1298 {
1299 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1300 	px_ib_t		*ib_p = px_p->px_ib_p;
1301 	devino_t	ino = px_msiqid_to_devino(px_p, msiq_id);
1302 	cpuid_t		curr_cpu;
1303 	px_ib_ino_info_t *ino_p;
1304 	px_ih_t		*ih_p;
1305 	int		ret = DDI_SUCCESS;
1306 
1307 	DBG(DBG_MSIQ, dip, "px_rem_msiq_intr: rdip=%s%d msiq_id=%x ino=%x\n",
1308 	    ddi_driver_name(rdip), ddi_get_instance(rdip), msiq_id, ino);
1309 
1310 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1311 
1312 	ino_p = px_ib_locate_ino(ib_p, ino);
1313 	ih_p = px_ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum,
1314 	    rec_type, msg_code);
1315 
1316 	/* Get the current cpu */
1317 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1318 	    &curr_cpu)) != DDI_SUCCESS)
1319 		goto fail;
1320 
1321 	if ((ret = px_ib_ino_rem_intr(px_p, ino_p, ih_p)) != DDI_SUCCESS)
1322 		goto fail;
1323 
1324 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1325 
1326 	if (ino_p->ino_ih_size == 0) {
1327 		if ((ret = px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
1328 		    INTR_DELIVERED_STATE)) != DDI_SUCCESS)
1329 			goto fail;
1330 
1331 		px_lib_msiq_setvalid(dip, px_devino_to_msiqid(px_p, ino),
1332 		    PCI_MSIQ_INVALID);
1333 
1334 		hdlp->ih_vector = ino_p->ino_sysino;
1335 		i_ddi_rem_ivintr(hdlp);
1336 
1337 		px_ib_delete_ino(ib_p, ino_p);
1338 
1339 		(void) px_msiq_free(px_p, msiq_id);
1340 		kmem_free(ino_p, sizeof (px_ib_ino_info_t));
1341 	} else {
1342 		/* Re-enable interrupt only if mapping regsiter still shared */
1343 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1344 	}
1345 
1346 fail:
1347 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1348 	return (ret);
1349 }
1350