xref: /illumos-gate/usr/src/uts/sun4/io/px/px_ib.c (revision 7c478bd9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * PX Interrupt Block implementation
31  */
32 
33 #include <sys/types.h>
34 #include <sys/kmem.h>
35 #include <sys/async.h>
36 #include <sys/systm.h>		/* panicstr */
37 #include <sys/spl.h>
38 #include <sys/sunddi.h>
39 #include <sys/machsystm.h>	/* intr_dist_add */
40 #include <sys/ddi_impldefs.h>
41 #include <sys/cpuvar.h>
42 #include "px_obj.h"
43 
44 /*LINTLIBRARY*/
45 
46 static void px_ib_intr_redist(void *arg, int32_t weight_max, int32_t weight);
47 static void px_ib_intr_dist_en(dev_info_t *dip, cpuid_t cpu_id, devino_t ino,
48     boolean_t wait_flag);
49 static uint_t px_ib_intr_reset(void *arg);
50 
51 int
52 px_ib_attach(px_t *px_p)
53 {
54 	dev_info_t	*dip = px_p->px_dip;
55 	px_ib_t		*ib_p;
56 	sysino_t	sysino;
57 	px_fault_t	*fault_p = &px_p->px_fault;
58 
59 	DBG(DBG_IB, dip, "px_ib_attach\n");
60 
61 	if (px_lib_intr_devino_to_sysino(px_p->px_dip,
62 	    px_p->px_inos[PX_FAULT_PEC], &sysino) != DDI_SUCCESS)
63 		return (DDI_FAILURE);
64 
65 	/*
66 	 * Allocate interrupt block state structure and link it to
67 	 * the px state structure.
68 	 */
69 	ib_p = kmem_zalloc(sizeof (px_ib_t), KM_SLEEP);
70 	px_p->px_ib_p = ib_p;
71 	ib_p->ib_px_p = px_p;
72 	ib_p->ib_ino_lst = (px_ib_ino_info_t *)NULL;
73 
74 	mutex_init(&ib_p->ib_intr_lock, NULL, MUTEX_DRIVER, NULL);
75 	mutex_init(&ib_p->ib_ino_lst_mutex, NULL, MUTEX_DRIVER, NULL);
76 
77 	bus_func_register(BF_TYPE_RESINTR, px_ib_intr_reset, ib_p);
78 
79 	intr_dist_add_weighted(px_ib_intr_redist, ib_p);
80 
81 	/*
82 	 * Initialize PEC fault data structure
83 	 */
84 	fault_p->px_fh_dip = dip;
85 	fault_p->px_fh_sysino = sysino;
86 	fault_p->px_fh_lst = NULL;
87 	mutex_init(&fault_p->px_fh_lock, NULL, MUTEX_DRIVER, NULL);
88 
89 	/* Register IMU error */
90 	px_err_add_fh(fault_p, PX_ERR_IMU,
91 	    (caddr_t)px_p->px_address[PX_REG_CSR]);
92 
93 	return (DDI_SUCCESS);
94 }
95 
96 void
97 px_ib_detach(px_t *px_p)
98 {
99 	px_ib_t		*ib_p = px_p->px_ib_p;
100 	dev_info_t	*dip = px_p->px_dip;
101 
102 	DBG(DBG_IB, dip, "px_ib_detach\n");
103 
104 	px_err_rem(&px_p->px_fault, PX_FAULT_PEC);
105 
106 	bus_func_unregister(BF_TYPE_RESINTR, px_ib_intr_reset, ib_p);
107 	intr_dist_rem_weighted(px_ib_intr_redist, ib_p);
108 
109 	mutex_destroy(&ib_p->ib_ino_lst_mutex);
110 	mutex_destroy(&ib_p->ib_intr_lock);
111 
112 	px_ib_free_ino_all(ib_p);
113 
114 	px_p->px_ib_p = NULL;
115 	kmem_free(ib_p, sizeof (px_ib_t));
116 }
117 
118 static struct {
119 	kstat_named_t ihks_name;
120 	kstat_named_t ihks_type;
121 	kstat_named_t ihks_cpu;
122 	kstat_named_t ihks_pil;
123 	kstat_named_t ihks_time;
124 	kstat_named_t ihks_ino;
125 	kstat_named_t ihks_cookie;
126 	kstat_named_t ihks_devpath;
127 	kstat_named_t ihks_buspath;
128 } px_ih_ks_template = {
129 	{ "name",	KSTAT_DATA_CHAR },
130 	{ "type",	KSTAT_DATA_CHAR },
131 	{ "cpu",	KSTAT_DATA_UINT64 },
132 	{ "pil",	KSTAT_DATA_UINT64 },
133 	{ "time",	KSTAT_DATA_UINT64 },
134 	{ "ino",	KSTAT_DATA_UINT64 },
135 	{ "cookie",	KSTAT_DATA_UINT64 },
136 	{ "devpath",	KSTAT_DATA_STRING },
137 	{ "buspath",	KSTAT_DATA_STRING },
138 };
139 
140 static uint32_t ih_instance;
141 static kmutex_t ih_ks_template_lock;
142 
143 int
144 ih_ks_update(kstat_t *ksp, int rw)
145 {
146 	px_ih_t *ih_p = ksp->ks_private;
147 	int maxlen = sizeof (px_ih_ks_template.ihks_name.value.c);
148 	px_ib_t *ib_p = ih_p->ih_ino_p->ino_ib_p;
149 	px_t *px_p = ib_p->ib_px_p;
150 	devino_t ino;
151 	sysino_t sysino;
152 	char ih_devpath[MAXPATHLEN];
153 	char ih_buspath[MAXPATHLEN];
154 
155 	ino = ih_p->ih_ino_p->ino_ino;
156 	(void) px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino);
157 
158 	(void) snprintf(px_ih_ks_template.ihks_name.value.c, maxlen, "%s%d",
159 	    ddi_driver_name(ih_p->ih_dip),
160 	    ddi_get_instance(ih_p->ih_dip));
161 
162 	(void) strcpy(px_ih_ks_template.ihks_type.value.c,
163 	    (ih_p->ih_rec_type == 0) ? "fixed" : "msi");
164 	px_ih_ks_template.ihks_cpu.value.ui64 = ih_p->ih_ino_p->ino_cpuid;
165 	px_ih_ks_template.ihks_pil.value.ui64 = ih_p->ih_ino_p->ino_pil;
166 	px_ih_ks_template.ihks_time.value.ui64 = ih_p->ih_nsec + (uint64_t)
167 	    tick2ns((hrtime_t)ih_p->ih_ticks, ih_p->ih_ino_p->ino_cpuid);
168 	px_ih_ks_template.ihks_ino.value.ui64 = ino;
169 	px_ih_ks_template.ihks_cookie.value.ui64 = sysino;
170 
171 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
172 	(void) ddi_pathname(px_p->px_dip, ih_buspath);
173 	kstat_named_setstr(&px_ih_ks_template.ihks_devpath, ih_devpath);
174 	kstat_named_setstr(&px_ih_ks_template.ihks_buspath, ih_buspath);
175 
176 	return (0);
177 }
178 
179 void
180 px_ib_intr_enable(px_t *px_p, cpuid_t cpu_id, devino_t ino)
181 {
182 	px_ib_t		*ib_p = px_p->px_ib_p;
183 	sysino_t	sysino;
184 
185 	/*
186 	 * Determine the cpu for the interrupt
187 	 */
188 	mutex_enter(&ib_p->ib_intr_lock);
189 
190 	DBG(DBG_IB, px_p->px_dip,
191 	    "px_ib_intr_enable: ino=%x cpu_id=%x\n", ino, cpu_id);
192 
193 	if (px_lib_intr_devino_to_sysino(px_p->px_dip, ino,
194 	    &sysino) != DDI_SUCCESS) {
195 		DBG(DBG_IB, px_p->px_dip,
196 		    "px_ib_intr_enable: px_intr_devino_to_sysino() failed\n");
197 
198 		mutex_exit(&ib_p->ib_intr_lock);
199 		return;
200 	}
201 
202 	PX_INTR_ENABLE(px_p->px_dip, sysino, cpu_id);
203 
204 	mutex_exit(&ib_p->ib_intr_lock);
205 }
206 
207 /*ARGSUSED*/
208 void
209 px_ib_intr_disable(px_ib_t *ib_p, devino_t ino, int wait)
210 {
211 	sysino_t	sysino;
212 
213 	mutex_enter(&ib_p->ib_intr_lock);
214 
215 	DBG(DBG_IB, ib_p->ib_px_p->px_dip, "px_ib_intr_disable: ino=%x\n", ino);
216 
217 	/* Disable the interrupt */
218 	if (px_lib_intr_devino_to_sysino(ib_p->ib_px_p->px_dip, ino,
219 	    &sysino) != DDI_SUCCESS) {
220 		DBG(DBG_IB, ib_p->ib_px_p->px_dip,
221 		    "px_ib_intr_disable: px_intr_devino_to_sysino() failed\n");
222 
223 		mutex_exit(&ib_p->ib_intr_lock);
224 		return;
225 	}
226 
227 	PX_INTR_DISABLE(ib_p->ib_px_p->px_dip, sysino);
228 
229 	mutex_exit(&ib_p->ib_intr_lock);
230 }
231 
232 
233 static void
234 px_ib_intr_dist_en(dev_info_t *dip, cpuid_t cpu_id, devino_t ino,
235     boolean_t wait_flag)
236 {
237 	uint32_t	old_cpu_id;
238 	sysino_t	sysino;
239 	intr_valid_state_t	enabled = 0;
240 	hrtime_t	start_time;
241 	intr_state_t	intr_state;
242 	int		e;
243 
244 	DBG(DBG_IB, dip, "px_ib_intr_dist_en: ino=0x%x\n", ino);
245 
246 	if (px_lib_intr_devino_to_sysino(dip, ino, &sysino) != DDI_SUCCESS) {
247 		DBG(DBG_IB, dip, "px_ib_intr_dist_en: "
248 		    "px_intr_devino_to_sysino() failed, ino 0x%x\n", ino);
249 		return;
250 	}
251 
252 	/* Skip enabling disabled interrupts */
253 	if (px_lib_intr_getvalid(dip, sysino, &enabled) != DDI_SUCCESS) {
254 		DBG(DBG_IB, dip, "px_ib_intr_dist_en: px_intr_getvalid() "
255 		    "failed, sysino 0x%x\n", sysino);
256 		return;
257 	}
258 	if (!enabled)
259 		return;
260 
261 	/* Done if redistributed onto the same cpuid */
262 	if (px_lib_intr_gettarget(dip, sysino, &old_cpu_id) != DDI_SUCCESS) {
263 		DBG(DBG_IB, dip, "px_ib_intr_dist_en: "
264 		    "px_intr_gettarget() failed\n");
265 		return;
266 	}
267 	if (cpu_id == old_cpu_id)
268 		return;
269 
270 	if (!wait_flag)
271 		goto done;
272 
273 	/* Busy wait on pending interrupts */
274 	PX_INTR_DISABLE(dip, sysino);
275 
276 	for (start_time = gethrtime(); !panicstr &&
277 	    ((e = px_lib_intr_getstate(dip, sysino, &intr_state)) ==
278 		DDI_SUCCESS) &&
279 	    (intr_state == INTR_DELIVERED_STATE); /* */) {
280 		if (gethrtime() - start_time > px_intrpend_timeout) {
281 			cmn_err(CE_WARN,
282 			    "%s%d: px_ib_intr_dist_en: sysino 0x%x(ino 0x%x) "
283 			    "from cpu id 0x%x to 0x%x timeout",
284 			    ddi_driver_name(dip), ddi_get_instance(dip),
285 			    sysino, ino, old_cpu_id, cpu_id);
286 
287 			e = DDI_FAILURE;
288 			break;
289 		}
290 	}
291 
292 	if (e != DDI_SUCCESS)
293 		DBG(DBG_IB, dip, "px_ib_intr_dist_en: failed, "
294 		    "ino 0x%x sysino 0x%x\n", ino, sysino);
295 
296 done:
297 	PX_INTR_ENABLE(dip, sysino, cpu_id);
298 }
299 
300 
301 /*
302  * Redistribute interrupts of the specified weight. The first call has a weight
303  * of weight_max, which can be used to trigger initialization for
304  * redistribution. The inos with weight [weight_max, inf.) should be processed
305  * on the "weight == weight_max" call.  This first call is followed by calls
306  * of decreasing weights, inos of that weight should be processed.  The final
307  * call specifies a weight of zero, this can be used to trigger processing of
308  * stragglers.
309  */
310 static void
311 px_ib_intr_redist(void *arg, int32_t weight_max, int32_t weight)
312 {
313 	px_ib_t		*ib_p = (px_ib_t *)arg;
314 	px_t		*px_p = ib_p->ib_px_p;
315 	dev_info_t	*dip = px_p->px_dip;
316 	px_ib_ino_info_t *ino_p;
317 	px_ih_t		*ih_lst;
318 	int32_t		dweight = 0;
319 	int		i;
320 
321 	/* Redistribute internal interrupts */
322 	if (weight == 0) {
323 		devino_t	ino_pec = px_p->px_inos[PX_INTR_PEC];
324 		mutex_enter(&ib_p->ib_intr_lock);
325 		px_ib_intr_dist_en(dip, intr_dist_cpuid(), ino_pec, B_FALSE);
326 		mutex_exit(&ib_p->ib_intr_lock);
327 	}
328 
329 	/* Redistribute device interrupts */
330 	mutex_enter(&ib_p->ib_ino_lst_mutex);
331 
332 	for (ino_p = ib_p->ib_ino_lst; ino_p; ino_p = ino_p->ino_next) {
333 		uint32_t orig_cpuid;
334 
335 		/*
336 		 * Recomputes the sum of interrupt weights of devices that
337 		 * share the same ino upon first call marked by
338 		 * (weight == weight_max).
339 		 */
340 		if (weight == weight_max) {
341 			ino_p->ino_intr_weight = 0;
342 			for (i = 0, ih_lst = ino_p->ino_ih_head;
343 			    i < ino_p->ino_ih_size;
344 			    i++, ih_lst = ih_lst->ih_next) {
345 				dweight = i_ddi_get_intr_weight(ih_lst->ih_dip);
346 				if (dweight > 0)
347 					ino_p->ino_intr_weight += dweight;
348 			}
349 		}
350 
351 		/*
352 		 * As part of redistributing weighted interrupts over cpus,
353 		 * nexus redistributes device interrupts and updates
354 		 * cpu weight. The purpose is for the most light weighted
355 		 * cpu to take the next interrupt and gain weight, therefore
356 		 * attention demanding device gains more cpu attention by
357 		 * making itself heavy.
358 		 */
359 		if ((weight == ino_p->ino_intr_weight) ||
360 		    ((weight >= weight_max) &&
361 		    (ino_p->ino_intr_weight >= weight_max))) {
362 			orig_cpuid = ino_p->ino_cpuid;
363 			if (cpu[orig_cpuid] == NULL)
364 				orig_cpuid = CPU->cpu_id;
365 
366 			/* select cpuid to target and mark ino established */
367 			ino_p->ino_cpuid = intr_dist_cpuid();
368 
369 			/* Add device weight to targeted cpu. */
370 			for (i = 0, ih_lst = ino_p->ino_ih_head;
371 			    i < ino_p->ino_ih_size;
372 			    i++, ih_lst = ih_lst->ih_next) {
373 				hrtime_t ticks;
374 
375 				dweight = i_ddi_get_intr_weight(ih_lst->ih_dip);
376 				intr_dist_cpuid_add_device_weight(
377 				    ino_p->ino_cpuid, ih_lst->ih_dip, dweight);
378 
379 				/*
380 				 * different cpus may have different clock
381 				 * speeds. to account for this, whenever an
382 				 * interrupt is moved to a new CPU, we
383 				 * convert the accumulated ticks into nsec,
384 				 * based upon the clock rate of the prior
385 				 * CPU.
386 				 *
387 				 * It is possible that the prior CPU no longer
388 				 * exists. In this case, fall back to using
389 				 * this CPU's clock rate.
390 				 *
391 				 * Note that the value in ih_ticks has already
392 				 * been corrected for any power savings mode
393 				 * which might have been in effect.
394 				 *
395 				 * because we are updating two fields in
396 				 * ih_t we must lock ih_ks_template_lock to
397 				 * prevent someone from reading the kstats
398 				 * after we set ih_ticks to 0 and before we
399 				 * increment ih_nsec to compensate.
400 				 *
401 				 * we must also protect against the interrupt
402 				 * arriving and incrementing ih_ticks between
403 				 * the time we read it and when we reset it
404 				 * to 0. To do this we use atomic_swap.
405 				 */
406 
407 				mutex_enter(&ih_ks_template_lock);
408 				ticks = atomic_swap_64(&ih_lst->ih_ticks, 0);
409 				ih_lst->ih_nsec += (uint64_t)
410 				    tick2ns(ticks, orig_cpuid);
411 				mutex_exit(&ih_ks_template_lock);
412 			}
413 
414 			/* enable interrupt on new targeted cpu */
415 			px_ib_intr_dist_en(dip, ino_p->ino_cpuid,
416 			    ino_p->ino_ino, B_TRUE);
417 		}
418 	}
419 	mutex_exit(&ib_p->ib_ino_lst_mutex);
420 }
421 
422 /*
423  * Reset interrupts to IDLE.  This function is called during
424  * panic handling after redistributing interrupts; it's needed to
425  * support dumping to network devices after 'sync' from OBP.
426  *
427  * N.B.  This routine runs in a context where all other threads
428  * are permanently suspended.
429  */
430 static uint_t
431 px_ib_intr_reset(void *arg)
432 {
433 	px_ib_t		*ib_p = (px_ib_t *)arg;
434 
435 	DBG(DBG_IB, ib_p->ib_px_p->px_dip, "px_ib_intr_reset\n");
436 
437 	if (px_lib_intr_reset(ib_p->ib_px_p->px_dip) != DDI_SUCCESS)
438 		return (BF_FATAL);
439 
440 	return (BF_NONE);
441 }
442 
443 /*
444  * Locate ino_info structure on ib_p->ib_ino_lst according to ino#
445  * returns NULL if not found.
446  */
447 px_ib_ino_info_t *
448 px_ib_locate_ino(px_ib_t *ib_p, devino_t ino_num)
449 {
450 	px_ib_ino_info_t	*ino_p = ib_p->ib_ino_lst;
451 
452 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
453 
454 	for (; ino_p && ino_p->ino_ino != ino_num; ino_p = ino_p->ino_next);
455 
456 	return (ino_p);
457 }
458 
459 px_ib_ino_info_t *
460 px_ib_new_ino(px_ib_t *ib_p, devino_t ino_num, px_ih_t *ih_p)
461 {
462 	px_ib_ino_info_t	*ino_p = kmem_alloc(sizeof (px_ib_ino_info_t),
463 	    KM_SLEEP);
464 	sysino_t	sysino;
465 
466 	ino_p->ino_ino = ino_num;
467 	ino_p->ino_ib_p = ib_p;
468 	ino_p->ino_unclaimed = 0;
469 
470 	if (px_lib_intr_devino_to_sysino(ib_p->ib_px_p->px_dip, ino_p->ino_ino,
471 	    &sysino) != DDI_SUCCESS)
472 		return (NULL);
473 
474 	ino_p->ino_sysino = sysino;
475 
476 	/*
477 	 * Cannot disable interrupt since we might share slot
478 	 */
479 	ih_p->ih_next = ih_p;
480 	ino_p->ino_ih_head = ih_p;
481 	ino_p->ino_ih_tail = ih_p;
482 	ino_p->ino_ih_start = ih_p;
483 	ino_p->ino_ih_size = 1;
484 
485 	ino_p->ino_next = ib_p->ib_ino_lst;
486 	ib_p->ib_ino_lst = ino_p;
487 
488 	return (ino_p);
489 }
490 
491 /*
492  * The ino_p is retrieved by previous call to px_ib_locate_ino().
493  */
494 void
495 px_ib_delete_ino(px_ib_t *ib_p, px_ib_ino_info_t *ino_p)
496 {
497 	px_ib_ino_info_t	*list = ib_p->ib_ino_lst;
498 
499 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
500 
501 	if (list == ino_p)
502 		ib_p->ib_ino_lst = list->ino_next;
503 	else {
504 		for (; list->ino_next != ino_p; list = list->ino_next);
505 		list->ino_next = ino_p->ino_next;
506 	}
507 }
508 
509 /*
510  * Free all ino when we are detaching.
511  */
512 void
513 px_ib_free_ino_all(px_ib_t *ib_p)
514 {
515 	px_ib_ino_info_t	*tmp = ib_p->ib_ino_lst;
516 	px_ib_ino_info_t	*next = NULL;
517 
518 	while (tmp) {
519 		next = tmp->ino_next;
520 		kmem_free(tmp, sizeof (px_ib_ino_info_t));
521 		tmp = next;
522 	}
523 }
524 
525 int
526 px_ib_ino_add_intr(px_t *px_p, px_ib_ino_info_t *ino_p, px_ih_t *ih_p)
527 {
528 	px_ib_t		*ib_p = ino_p->ino_ib_p;
529 	devino_t	ino = ino_p->ino_ino;
530 	sysino_t	sysino = ino_p->ino_sysino;
531 	dev_info_t	*dip = px_p->px_dip;
532 	cpuid_t		curr_cpu;
533 	hrtime_t	start_time;
534 	intr_state_t	intr_state;
535 	int		ret = DDI_SUCCESS;
536 
537 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
538 	ASSERT(ib_p == px_p->px_ib_p);
539 
540 	DBG(DBG_IB, dip, "px_ib_ino_add_intr ino=%x\n", ino_p->ino_ino);
541 
542 	/* Disable the interrupt */
543 	if ((ret = px_lib_intr_gettarget(dip, sysino,
544 	    &curr_cpu)) != DDI_SUCCESS) {
545 		DBG(DBG_IB, dip,
546 		    "px_ib_ino_add_intr px_intr_gettarget() failed\n");
547 
548 		return (ret);
549 	}
550 
551 	PX_INTR_DISABLE(dip, sysino);
552 
553 	/* Busy wait on pending interrupt */
554 	for (start_time = gethrtime(); !panicstr &&
555 	    ((ret = px_lib_intr_getstate(dip, sysino, &intr_state))
556 	    == DDI_SUCCESS) && (intr_state == INTR_DELIVERED_STATE); /* */) {
557 		if (gethrtime() - start_time > px_intrpend_timeout) {
558 			cmn_err(CE_WARN, "%s%d: px_ib_ino_add_intr: pending "
559 			    "sysino 0x%x(ino 0x%x) timeout",
560 			    ddi_driver_name(dip), ddi_get_instance(dip),
561 			    sysino, ino);
562 
563 			ret = DDI_FAILURE;
564 			break;
565 		}
566 	}
567 
568 	if (ret != DDI_SUCCESS) {
569 		DBG(DBG_IB, dip, "px_ib_ino_add_intr: failed, "
570 		    "ino 0x%x sysino 0x%x\n", ino, sysino);
571 
572 		return (ret);
573 	}
574 
575 	/* Link up px_ispec_t portion of the ppd */
576 	ih_p->ih_next = ino_p->ino_ih_head;
577 	ino_p->ino_ih_tail->ih_next = ih_p;
578 	ino_p->ino_ih_tail = ih_p;
579 
580 	ino_p->ino_ih_start = ino_p->ino_ih_head;
581 	ino_p->ino_ih_size++;
582 
583 	/*
584 	 * If the interrupt was previously blocked (left in pending state)
585 	 * because of jabber we need to clear the pending state in case the
586 	 * jabber has gone away.
587 	 */
588 	if (ino_p->ino_unclaimed > px_unclaimed_intr_max) {
589 		cmn_err(CE_WARN,
590 		    "%s%d: px_ib_ino_add_intr: ino 0x%x has been unblocked",
591 		    ddi_driver_name(dip), ddi_get_instance(dip), ino);
592 
593 		ino_p->ino_unclaimed = 0;
594 		if ((ret = px_lib_intr_setstate(dip, sysino,
595 		    INTR_IDLE_STATE)) != DDI_SUCCESS) {
596 			DBG(DBG_IB, px_p->px_dip,
597 			    "px_ib_ino_add_intr px_intr_setstate failed\n");
598 
599 			return (ret);
600 		}
601 	}
602 
603 	/* Re-enable interrupt */
604 	PX_INTR_ENABLE(dip, sysino, curr_cpu);
605 
606 	return (ret);
607 }
608 
609 /*
610  * Removes px_ispec_t from the ino's link list.
611  * uses hardware mutex to lock out interrupt threads.
612  * Side effects: interrupt belongs to that ino is turned off on return.
613  * if we are sharing PX slot with other inos, the caller needs
614  * to turn it back on.
615  */
616 int
617 px_ib_ino_rem_intr(px_t *px_p, px_ib_ino_info_t *ino_p, px_ih_t *ih_p)
618 {
619 	devino_t	ino = ino_p->ino_ino;
620 	sysino_t	sysino = ino_p->ino_sysino;
621 	dev_info_t	*dip = px_p->px_dip;
622 	px_ih_t		*ih_lst = ino_p->ino_ih_head;
623 	hrtime_t	start_time;
624 	intr_state_t	intr_state;
625 	int		i, ret = DDI_SUCCESS;
626 
627 	ASSERT(MUTEX_HELD(&ino_p->ino_ib_p->ib_ino_lst_mutex));
628 
629 	DBG(DBG_IB, px_p->px_dip, "px_ib_ino_rem_intr ino=%x\n",
630 	    ino_p->ino_ino);
631 
632 	/* Disable the interrupt */
633 	PX_INTR_DISABLE(px_p->px_dip, sysino);
634 
635 	if (ino_p->ino_ih_size == 1) {
636 		if (ih_lst != ih_p)
637 			goto not_found;
638 
639 		/* No need to set head/tail as ino_p will be freed */
640 		goto reset;
641 	}
642 
643 	/* Busy wait on pending interrupt */
644 	for (start_time = gethrtime(); !panicstr &&
645 	    ((ret = px_lib_intr_getstate(dip, sysino, &intr_state))
646 	    == DDI_SUCCESS) && (intr_state == INTR_DELIVERED_STATE); /* */) {
647 		if (gethrtime() - start_time > px_intrpend_timeout) {
648 			cmn_err(CE_WARN, "%s%d: px_ib_ino_rem_intr: pending "
649 			    "sysino 0x%x(ino 0x%x) timeout",
650 			    ddi_driver_name(dip), ddi_get_instance(dip),
651 			    sysino, ino);
652 
653 			ret = DDI_FAILURE;
654 			break;
655 		}
656 	}
657 
658 	if (ret != DDI_SUCCESS) {
659 		DBG(DBG_IB, dip, "px_ib_ino_rem_intr: failed, "
660 		    "ino 0x%x sysino 0x%x\n", ino, sysino);
661 
662 		return (ret);
663 	}
664 
665 	/*
666 	 * If the interrupt was previously blocked (left in pending state)
667 	 * because of jabber we need to clear the pending state in case the
668 	 * jabber has gone away.
669 	 */
670 	if (ino_p->ino_unclaimed > px_unclaimed_intr_max) {
671 		cmn_err(CE_WARN, "%s%d: px_ib_ino_rem_intr: "
672 		    "ino 0x%x has been unblocked",
673 		    ddi_driver_name(dip), ddi_get_instance(dip), ino);
674 
675 		ino_p->ino_unclaimed = 0;
676 		if ((ret = px_lib_intr_setstate(dip, sysino,
677 		    INTR_IDLE_STATE)) != DDI_SUCCESS) {
678 			DBG(DBG_IB, px_p->px_dip,
679 			    "px_ib_ino_rem_intr px_intr_setstate failed\n");
680 
681 			return (ret);
682 		}
683 	}
684 
685 	/* Search the link list for ih_p */
686 	for (i = 0; (i < ino_p->ino_ih_size) &&
687 	    (ih_lst->ih_next != ih_p); i++, ih_lst = ih_lst->ih_next);
688 
689 	if (ih_lst->ih_next != ih_p)
690 		goto not_found;
691 
692 	/* Remove ih_p from the link list and maintain the head/tail */
693 	ih_lst->ih_next = ih_p->ih_next;
694 
695 	if (ino_p->ino_ih_head == ih_p)
696 		ino_p->ino_ih_head = ih_p->ih_next;
697 	if (ino_p->ino_ih_tail == ih_p)
698 		ino_p->ino_ih_tail = ih_lst;
699 
700 	ino_p->ino_ih_start = ino_p->ino_ih_head;
701 
702 reset:
703 	if (ih_p->ih_config_handle)
704 		pci_config_teardown(&ih_p->ih_config_handle);
705 	if (ih_p->ih_ksp != NULL)
706 		kstat_delete(ih_p->ih_ksp);
707 
708 	kmem_free(ih_p, sizeof (px_ih_t));
709 	ino_p->ino_ih_size--;
710 
711 	return (ret);
712 
713 not_found:
714 	DBG(DBG_R_INTX, ino_p->ino_ib_p->ib_px_p->px_dip,
715 		"ino_p=%x does not have ih_p=%x\n", ino_p, ih_p);
716 
717 	return (DDI_FAILURE);
718 }
719 
720 px_ih_t *
721 px_ib_ino_locate_intr(px_ib_ino_info_t *ino_p, dev_info_t *rdip,
722     uint32_t inum, msiq_rec_type_t rec_type, msgcode_t msg_code)
723 {
724 	px_ih_t	*ih_lst = ino_p->ino_ih_head;
725 	int	i;
726 
727 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_lst = ih_lst->ih_next) {
728 		if ((ih_lst->ih_dip == rdip) && (ih_lst->ih_inum == inum) &&
729 		    (ih_lst->ih_rec_type == rec_type) &&
730 		    (ih_lst->ih_msg_code == msg_code))
731 			return (ih_lst);
732 	}
733 
734 	return ((px_ih_t *)NULL);
735 }
736 
737 px_ih_t *
738 px_ib_alloc_ih(dev_info_t *rdip, uint32_t inum,
739     uint_t (*int_handler)(caddr_t int_handler_arg1, caddr_t int_handler_arg2),
740     caddr_t int_handler_arg1, caddr_t int_handler_arg2,
741     msiq_rec_type_t rec_type, msgcode_t msg_code)
742 {
743 	px_ih_t	*ih_p;
744 
745 	ih_p = kmem_alloc(sizeof (px_ih_t), KM_SLEEP);
746 	ih_p->ih_dip = rdip;
747 	ih_p->ih_inum = inum;
748 	ih_p->ih_intr_state = PX_INTR_STATE_DISABLE;
749 	ih_p->ih_handler = int_handler;
750 	ih_p->ih_handler_arg1 = int_handler_arg1;
751 	ih_p->ih_handler_arg2 = int_handler_arg2;
752 	ih_p->ih_config_handle = NULL;
753 	ih_p->ih_rec_type = rec_type;
754 	ih_p->ih_msg_code = msg_code;
755 	ih_p->ih_nsec = 0;
756 	ih_p->ih_ticks = 0;
757 
758 	/*
759 	 * Create pci_intrs::: kstats for all ih types except messages,
760 	 * which represent unusual conditions and don't need to be tracked.
761 	 */
762 	ih_p->ih_ksp = NULL;
763 	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
764 		ih_p->ih_ksp = kstat_create("pci_intrs",
765 		    atomic_inc_32_nv(&ih_instance), "config", "interrupts",
766 		    KSTAT_TYPE_NAMED,
767 		    sizeof (px_ih_ks_template) / sizeof (kstat_named_t),
768 		    KSTAT_FLAG_VIRTUAL);
769 	}
770 	if (ih_p->ih_ksp != NULL) {
771 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
772 		ih_p->ih_ksp->ks_lock = &ih_ks_template_lock;
773 		ih_p->ih_ksp->ks_data = &px_ih_ks_template;
774 		ih_p->ih_ksp->ks_private = ih_p;
775 		ih_p->ih_ksp->ks_update = ih_ks_update;
776 	}
777 
778 	return (ih_p);
779 }
780 
781 /*
782  * Only used for fixed or legacy interrupts.
783  */
784 int
785 px_ib_update_intr_state(px_t *px_p, dev_info_t *rdip,
786     uint_t inum, devino_t ino, uint_t new_intr_state)
787 {
788 	px_ib_t		*ib_p = px_p->px_ib_p;
789 	px_ib_ino_info_t *ino_p;
790 	px_ih_t		*ih_p;
791 	int		ret = DDI_FAILURE;
792 
793 	DBG(DBG_IB, px_p->px_dip, "ib_update_intr_state: %s%d "
794 	    "inum %x devino %x state %x\n", ddi_driver_name(rdip),
795 	    ddi_get_instance(rdip), inum, ino, new_intr_state);
796 
797 	mutex_enter(&ib_p->ib_ino_lst_mutex);
798 
799 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {
800 		if (ih_p = px_ib_ino_locate_intr(ino_p, rdip, inum, 0, 0)) {
801 			ih_p->ih_intr_state = new_intr_state;
802 			ret = DDI_SUCCESS;
803 		}
804 	}
805 
806 	mutex_exit(&ib_p->ib_ino_lst_mutex);
807 	return (ret);
808 }
809 
810 int
811 px_imu_intr(dev_info_t *dip, px_fh_t *fh_p)
812 {
813 	uint32_t offset = px_fhd_tbl[fh_p->fh_err_id].fhd_st;
814 	uint64_t stat = fh_p->fh_stat;
815 	if (stat)
816 		LOG(DBG_ERR_INTR, dip, "[%x]=%16llx imu stat\n", offset, stat);
817 	return (stat ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
818 }
819