xref: /illumos-gate/usr/src/uts/sun4u/io/pci/pci_ib.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * PCI Interrupt Block (RISCx) implementation
31  *	initialization
32  *	interrupt enable/disable/clear and mapping register manipulation
33  */
34 
35 #include <sys/types.h>
36 #include <sys/kmem.h>
37 #include <sys/async.h>
38 #include <sys/systm.h>		/* panicstr */
39 #include <sys/spl.h>
40 #include <sys/sunddi.h>
41 #include <sys/machsystm.h>	/* intr_dist_add */
42 #include <sys/ddi_impldefs.h>
43 #include <sys/clock.h>
44 #include <sys/cpuvar.h>
45 #include <sys/pci/pci_obj.h>
46 
47 #ifdef _STARFIRE
48 #include <sys/starfire.h>
49 #endif /* _STARFIRE */
50 
51 /*LINTLIBRARY*/
52 static uint_t ib_intr_reset(void *arg);
53 
54 void
55 ib_create(pci_t *pci_p)
56 {
57 	dev_info_t *dip = pci_p->pci_dip;
58 	ib_t *ib_p;
59 	uintptr_t a;
60 	int i;
61 
62 	/*
63 	 * Allocate interrupt block state structure and link it to
64 	 * the pci state structure.
65 	 */
66 	ib_p = kmem_zalloc(sizeof (ib_t), KM_SLEEP);
67 	pci_p->pci_ib_p = ib_p;
68 	ib_p->ib_pci_p = pci_p;
69 
70 	a = pci_ib_setup(ib_p);
71 
72 	/*
73 	 * Determine virtual addresses of interrupt mapping, clear and diag
74 	 * registers that have common offsets.
75 	 */
76 	ib_p->ib_slot_clear_intr_regs =
77 		a + COMMON_IB_SLOT_CLEAR_INTR_REG_OFFSET;
78 	ib_p->ib_intr_retry_timer_reg =
79 		(uint64_t *)(a + COMMON_IB_INTR_RETRY_TIMER_OFFSET);
80 	ib_p->ib_slot_intr_state_diag_reg =
81 		(uint64_t *)(a + COMMON_IB_SLOT_INTR_STATE_DIAG_REG);
82 	ib_p->ib_obio_intr_state_diag_reg =
83 		(uint64_t *)(a + COMMON_IB_OBIO_INTR_STATE_DIAG_REG);
84 
85 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
86 		ib_p->ib_upa_imr[0] = (volatile uint64_t *)
87 				(a + COMMON_IB_UPA0_INTR_MAP_REG_OFFSET);
88 		ib_p->ib_upa_imr[1] = (volatile uint64_t *)
89 				(a + COMMON_IB_UPA1_INTR_MAP_REG_OFFSET);
90 	}
91 
92 	DEBUG2(DBG_ATTACH, dip, "ib_create: slot_imr=%x, slot_cir=%x\n",
93 		ib_p->ib_slot_intr_map_regs, ib_p->ib_obio_intr_map_regs);
94 	DEBUG2(DBG_ATTACH, dip, "ib_create: obio_imr=%x, obio_cir=%x\n",
95 		ib_p->ib_slot_clear_intr_regs, ib_p->ib_obio_clear_intr_regs);
96 	DEBUG2(DBG_ATTACH, dip, "ib_create: upa0_imr=%x, upa1_imr=%x\n",
97 		ib_p->ib_upa_imr[0], ib_p->ib_upa_imr[1]);
98 	DEBUG3(DBG_ATTACH, dip,
99 		"ib_create: retry_timer=%x, obio_diag=%x slot_diag=%x\n",
100 		ib_p->ib_intr_retry_timer_reg,
101 		ib_p->ib_obio_intr_state_diag_reg,
102 		ib_p->ib_slot_intr_state_diag_reg);
103 
104 	ib_p->ib_ino_lst = (ib_ino_info_t *)NULL;
105 	mutex_init(&ib_p->ib_intr_lock, NULL, MUTEX_DRIVER, NULL);
106 	mutex_init(&ib_p->ib_ino_lst_mutex, NULL, MUTEX_DRIVER, NULL);
107 
108 	DEBUG1(DBG_ATTACH, dip, "ib_create: numproxy=%x\n",
109 		pci_p->pci_numproxy);
110 	for (i = 1; i <= pci_p->pci_numproxy; i++) {
111 		set_intr_mapping_reg(pci_p->pci_id,
112 			(uint64_t *)ib_p->ib_upa_imr[i - 1], i);
113 	}
114 
115 	ib_configure(ib_p);
116 	bus_func_register(BF_TYPE_RESINTR, ib_intr_reset, ib_p);
117 }
118 
119 void
120 ib_destroy(pci_t *pci_p)
121 {
122 	ib_t *ib_p = pci_p->pci_ib_p;
123 	dev_info_t *dip = pci_p->pci_dip;
124 
125 	DEBUG0(DBG_IB, dip, "ib_destroy\n");
126 	bus_func_unregister(BF_TYPE_RESINTR, ib_intr_reset, ib_p);
127 
128 	intr_dist_rem_weighted(ib_intr_dist_all, ib_p);
129 	mutex_destroy(&ib_p->ib_ino_lst_mutex);
130 	mutex_destroy(&ib_p->ib_intr_lock);
131 
132 	ib_free_ino_all(ib_p);
133 
134 	kmem_free(ib_p, sizeof (ib_t));
135 	pci_p->pci_ib_p = NULL;
136 }
137 
138 void
139 ib_configure(ib_t *ib_p)
140 {
141 	/* XXX could be different between psycho and schizo */
142 	*ib_p->ib_intr_retry_timer_reg = pci_intr_retry_intv;
143 }
144 
145 static struct {
146 	kstat_named_t ihks_name;
147 	kstat_named_t ihks_type;
148 	kstat_named_t ihks_cpu;
149 	kstat_named_t ihks_pil;
150 	kstat_named_t ihks_time;
151 	kstat_named_t ihks_ino;
152 	kstat_named_t ihks_cookie;
153 	kstat_named_t ihks_devpath;
154 	kstat_named_t ihks_buspath;
155 } ih_ks_template = {
156 	{ "name",	KSTAT_DATA_CHAR },
157 	{ "type",	KSTAT_DATA_CHAR },
158 	{ "cpu",	KSTAT_DATA_UINT64 },
159 	{ "pil",	KSTAT_DATA_UINT64 },
160 	{ "time",	KSTAT_DATA_UINT64 },
161 	{ "ino",	KSTAT_DATA_UINT64 },
162 	{ "cookie",	KSTAT_DATA_UINT64 },
163 	{ "devpath",	KSTAT_DATA_STRING },
164 	{ "buspath",	KSTAT_DATA_STRING },
165 };
166 static uint32_t ih_instance;
167 
168 static kmutex_t ih_ks_template_lock;
169 
170 int
171 ih_ks_update(kstat_t *ksp, int rw)
172 {
173 	ih_t *ih_p = ksp->ks_private;
174 	int maxlen = sizeof (ih_ks_template.ihks_name.value.c);
175 	ib_t *ib_p = ih_p->ih_ino_p->ino_ib_p;
176 	pci_t *pci_p = ib_p->ib_pci_p;
177 	ib_ino_t ino;
178 	char ih_devpath[MAXPATHLEN];
179 	char ih_buspath[MAXPATHLEN];
180 
181 	ino = ih_p->ih_ino_p->ino_ino;
182 
183 	(void) snprintf(ih_ks_template.ihks_name.value.c, maxlen, "%s%d",
184 	    ddi_driver_name(ih_p->ih_dip),
185 	    ddi_get_instance(ih_p->ih_dip));
186 	(void) strcpy(ih_ks_template.ihks_type.value.c, "fixed");
187 	ih_ks_template.ihks_cpu.value.ui64 = ih_p->ih_ino_p->ino_cpuid;
188 	ih_ks_template.ihks_pil.value.ui64 = ih_p->ih_ino_p->ino_pil;
189 	ih_ks_template.ihks_time.value.ui64 = ih_p->ih_nsec + (uint64_t)
190 	    tick2ns((hrtime_t)ih_p->ih_ticks, ih_p->ih_ino_p->ino_cpuid);
191 	ih_ks_template.ihks_ino.value.ui64 = ino;
192 	ih_ks_template.ihks_cookie.value.ui64 = IB_INO_TO_MONDO(ib_p, ino);
193 
194 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
195 	(void) ddi_pathname(pci_p->pci_dip, ih_buspath);
196 	kstat_named_setstr(&ih_ks_template.ihks_devpath, ih_devpath);
197 	kstat_named_setstr(&ih_ks_template.ihks_buspath, ih_buspath);
198 
199 	return (0);
200 }
201 
202 /*
203  * can only used for psycho internal interrupts thermal, power,
204  * ue, ce, pbm
205  */
206 void
207 ib_intr_enable(pci_t *pci_p, ib_ino_t ino)
208 {
209 	ib_t *ib_p = pci_p->pci_ib_p;
210 	ib_mondo_t mondo = IB_INO_TO_MONDO(ib_p, ino);
211 	volatile uint64_t *imr_p = ib_intr_map_reg_addr(ib_p, ino);
212 	uint_t cpu_id;
213 
214 	/*
215 	 * Determine the cpu for the interrupt.
216 	 */
217 	mutex_enter(&ib_p->ib_intr_lock);
218 	cpu_id = intr_dist_cpuid();
219 #ifdef _STARFIRE
220 	cpu_id = pc_translate_tgtid(IB2CB(ib_p)->cb_ittrans_cookie, cpu_id,
221 		IB_GET_MAPREG_INO(ino));
222 #endif /* _STARFIRE */
223 	DEBUG2(DBG_IB, pci_p->pci_dip,
224 		"ib_intr_enable: ino=%x cpu_id=%x\n", ino, cpu_id);
225 
226 	*imr_p = ib_get_map_reg(mondo, cpu_id);
227 	IB_INO_INTR_CLEAR(ib_clear_intr_reg_addr(ib_p, ino));
228 	mutex_exit(&ib_p->ib_intr_lock);
229 }
230 
231 /*
232  * Disable the interrupt via its interrupt mapping register.
233  * Can only be used for internal interrupts: thermal, power, ue, ce, pbm.
234  * If called under interrupt context, wait should be set to 0
235  */
236 void
237 ib_intr_disable(ib_t *ib_p, ib_ino_t ino, int wait)
238 {
239 	volatile uint64_t *imr_p = ib_intr_map_reg_addr(ib_p, ino);
240 	volatile uint64_t *state_reg_p = IB_INO_INTR_STATE_REG(ib_p, ino);
241 	hrtime_t start_time;
242 
243 	/* disable the interrupt */
244 	mutex_enter(&ib_p->ib_intr_lock);
245 	IB_INO_INTR_OFF(imr_p);
246 	*imr_p;	/* flush previous write */
247 	mutex_exit(&ib_p->ib_intr_lock);
248 
249 	if (!wait)
250 		goto wait_done;
251 
252 	start_time = gethrtime();
253 	/* busy wait if there is interrupt being processed */
254 	while (IB_INO_INTR_PENDING(state_reg_p, ino) && !panicstr) {
255 		if (gethrtime() - start_time > pci_intrpend_timeout) {
256 			pbm_t *pbm_p = ib_p->ib_pci_p->pci_pbm_p;
257 			cmn_err(CE_WARN, "%s:%s: ib_intr_disable timeout %x",
258 				pbm_p->pbm_nameinst_str,
259 				pbm_p->pbm_nameaddr_str, ino);
260 				break;
261 		}
262 	}
263 wait_done:
264 	IB_INO_INTR_PEND(ib_clear_intr_reg_addr(ib_p, ino));
265 #ifdef _STARFIRE
266 	pc_ittrans_cleanup(IB2CB(ib_p)->cb_ittrans_cookie,
267 	    (volatile uint64_t *)ino);
268 #endif /* _STARFIRE */
269 }
270 
271 /* can only used for psycho internal interrupts thermal, power, ue, ce, pbm */
272 void
273 ib_nintr_clear(ib_t *ib_p, ib_ino_t ino)
274 {
275 	uint64_t *clr_reg = ib_clear_intr_reg_addr(ib_p, ino);
276 	IB_INO_INTR_CLEAR(clr_reg);
277 }
278 
279 /*
280  * distribute PBM and UPA interrupts. ino is set to 0 by caller if we
281  * are dealing with UPA interrupts (without inos).
282  */
283 void
284 ib_intr_dist_nintr(ib_t *ib_p, ib_ino_t ino, volatile uint64_t *imr_p)
285 {
286 	volatile uint64_t imr = *imr_p;
287 	uint32_t cpu_id;
288 
289 	if (!IB_INO_INTR_ISON(imr))
290 		return;
291 
292 	cpu_id = intr_dist_cpuid();
293 
294 #ifdef _STARFIRE
295 	if (ino) {
296 		cpu_id = pc_translate_tgtid(IB2CB(ib_p)->cb_ittrans_cookie,
297 			cpu_id, IB_GET_MAPREG_INO(ino));
298 	}
299 #else /* _STARFIRE */
300 	if (ib_map_reg_get_cpu(*imr_p) == cpu_id)
301 		return;
302 #endif /* _STARFIRE */
303 
304 	*imr_p = ib_get_map_reg(IB_IMR2MONDO(imr), cpu_id);
305 	imr = *imr_p;	/* flush previous write */
306 }
307 
308 static void
309 ib_intr_dist(ib_t *ib_p, ib_ino_info_t *ino_p)
310 {
311 	uint32_t cpu_id = ino_p->ino_cpuid;
312 	ib_ino_t ino = ino_p->ino_ino;
313 	volatile uint64_t imr, *imr_p, *state_reg;
314 	hrtime_t start_time;
315 
316 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
317 	imr_p = ib_intr_map_reg_addr(ib_p, ino);
318 	state_reg = IB_INO_INTR_STATE_REG(ib_p, ino);
319 
320 #ifdef _STARFIRE
321 	/*
322 	 * For Starfire it is a pain to check the current target for
323 	 * the mondo since we have to read the PC asics ITTR slot
324 	 * assigned to this mondo. It will be much easier to assume
325 	 * the current target is always different and do the target
326 	 * reprogram all the time.
327 	 */
328 	cpu_id = pc_translate_tgtid(IB2CB(ib_p)->cb_ittrans_cookie, cpu_id,
329 		IB_GET_MAPREG_INO(ino));
330 #else
331 	if (ib_map_reg_get_cpu(*imr_p) == cpu_id) /* same cpu, no reprog */
332 		return;
333 #endif /* _STARFIRE */
334 
335 	/* disable interrupt, this could disrupt devices sharing our slot */
336 	IB_INO_INTR_OFF(imr_p);
337 	imr = *imr_p;	/* flush previous write */
338 
339 	/* busy wait if there is interrupt being processed */
340 	start_time = gethrtime();
341 	while (IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
342 		if (gethrtime() - start_time > pci_intrpend_timeout) {
343 			pbm_t *pbm_p = ib_p->ib_pci_p->pci_pbm_p;
344 			cmn_err(CE_WARN, "%s:%s: ib_intr_dist(%p,%x) timeout",
345 				pbm_p->pbm_nameinst_str,
346 				pbm_p->pbm_nameaddr_str,
347 				imr_p, IB_INO_TO_MONDO(ib_p, ino));
348 			break;
349 		}
350 	}
351 	*imr_p = ib_get_map_reg(IB_IMR2MONDO(imr), cpu_id);
352 	imr = *imr_p;	/* flush previous write */
353 }
354 
355 /*
356  * Redistribute interrupts of the specified weight. The first call has a weight
357  * of weight_max, which can be used to trigger initialization for
358  * redistribution. The inos with weight [weight_max, inf.) should be processed
359  * on the "weight == weight_max" call.  This first call is followed by calls
360  * of decreasing weights, inos of that weight should be processed.  The final
361  * call specifies a weight of zero, this can be used to trigger processing of
362  * stragglers.
363  */
364 void
365 ib_intr_dist_all(void *arg, int32_t weight_max, int32_t weight)
366 {
367 	ib_t *ib_p = (ib_t *)arg;
368 	pci_t *pci_p = ib_p->ib_pci_p;
369 	ib_ino_info_t *ino_p;
370 	ih_t *ih_lst;
371 	int32_t dweight;
372 	int i;
373 
374 	if (weight == 0) {
375 		mutex_enter(&ib_p->ib_intr_lock);
376 		if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
377 			for (i = 0; i < 2; i++)
378 				ib_intr_dist_nintr(ib_p, 0,
379 				    ib_p->ib_upa_imr[i]);
380 		}
381 		mutex_exit(&ib_p->ib_intr_lock);
382 	}
383 
384 	mutex_enter(&ib_p->ib_ino_lst_mutex);
385 
386 	/* Perform special processing for first call of a redistribution. */
387 	if (weight == weight_max) {
388 		for (ino_p = ib_p->ib_ino_lst; ino_p; ino_p = ino_p->ino_next) {
389 
390 			/*
391 			 * Clear ino_established of each ino on first call.
392 			 * The ino_established field may be used by a pci
393 			 * nexus driver's pci_intr_dist_cpuid implementation
394 			 * when detection of established pci slot-cpu binding
395 			 * for multi function pci cards.
396 			 */
397 			ino_p->ino_established = 0;
398 
399 			/*
400 			 * recompute the ino_intr_weight based on the device
401 			 * weight of all devinfo nodes sharing the ino (this
402 			 * will allow us to pick up new weights established by
403 			 * i_ddi_set_intr_weight()).
404 			 */
405 			ino_p->ino_intr_weight = 0;
406 			for (i = 0, ih_lst = ino_p->ino_ih_head;
407 			    i < ino_p->ino_ih_size;
408 			    i++, ih_lst = ih_lst->ih_next) {
409 				dweight = i_ddi_get_intr_weight(ih_lst->ih_dip);
410 				if (dweight > 0)
411 					ino_p->ino_intr_weight += dweight;
412 			}
413 		}
414 	}
415 
416 	for (ino_p = ib_p->ib_ino_lst; ino_p; ino_p = ino_p->ino_next) {
417 		uint32_t orig_cpuid;
418 
419 		/*
420 		 * Get the weight of the ino and determine if we are going to
421 		 * process call.  We wait until an ib_intr_dist_all call of
422 		 * the proper weight occurs to support redistribution of all
423 		 * heavy weighted interrupts first (across all nexus driver
424 		 * instances).  This is done to ensure optimal
425 		 * INTR_WEIGHTED_DIST behavior.
426 		 */
427 		if ((weight == ino_p->ino_intr_weight) ||
428 		    ((weight >= weight_max) &&
429 		    (ino_p->ino_intr_weight >= weight_max))) {
430 			/* select cpuid to target and mark ino established */
431 			orig_cpuid = ino_p->ino_cpuid;
432 			if (cpu[orig_cpuid] == NULL)
433 				orig_cpuid = CPU->cpu_id;
434 			ino_p->ino_cpuid = pci_intr_dist_cpuid(ib_p, ino_p);
435 			ino_p->ino_established = 1;
436 
437 			/* Add device weight of ino devinfos to targeted cpu. */
438 			for (i = 0, ih_lst = ino_p->ino_ih_head;
439 			    i < ino_p->ino_ih_size;
440 			    i++, ih_lst = ih_lst->ih_next) {
441 				hrtime_t ticks;
442 
443 				dweight = i_ddi_get_intr_weight(ih_lst->ih_dip);
444 				intr_dist_cpuid_add_device_weight(
445 				    ino_p->ino_cpuid, ih_lst->ih_dip, dweight);
446 
447 				/*
448 				 * different cpus may have different clock
449 				 * speeds. to account for this, whenever an
450 				 * interrupt is moved to a new CPU, we
451 				 * convert the accumulated ticks into nsec,
452 				 * based upon the clock rate of the prior
453 				 * CPU.
454 				 *
455 				 * It is possible that the prior CPU no longer
456 				 * exists. In this case, fall back to using
457 				 * this CPU's clock rate.
458 				 *
459 				 * Note that the value in ih_ticks has already
460 				 * been corrected for any power savings mode
461 				 * which might have been in effect.
462 				 *
463 				 * because we are updating two fields in
464 				 * ih_t we must lock ih_ks_template_lock to
465 				 * prevent someone from reading the kstats
466 				 * after we set ih_ticks to 0 and before we
467 				 * increment ih_nsec to compensate.
468 				 *
469 				 * we must also protect against the interrupt
470 				 * arriving and incrementing ih_ticks between
471 				 * the time we read it and when we reset it
472 				 * to 0. To do this we use atomic_swap.
473 				 */
474 
475 				mutex_enter(&ih_ks_template_lock);
476 				ticks = atomic_swap_64(&ih_lst->ih_ticks, 0);
477 				ih_lst->ih_nsec += (uint64_t)
478 				    tick2ns(ticks, orig_cpuid);
479 				mutex_exit(&ih_ks_template_lock);
480 			}
481 
482 			/* program the hardware */
483 			ib_intr_dist(ib_p, ino_p);
484 		}
485 	}
486 	mutex_exit(&ib_p->ib_ino_lst_mutex);
487 }
488 
489 /*
490  * Reset interrupts to IDLE.  This function is called during
491  * panic handling after redistributing interrupts; it's needed to
492  * support dumping to network devices after 'sync' from OBP.
493  *
494  * N.B.  This routine runs in a context where all other threads
495  * are permanently suspended.
496  */
497 static uint_t
498 ib_intr_reset(void *arg)
499 {
500 	ib_t *ib_p = (ib_t *)arg;
501 	ib_ino_t ino;
502 	uint64_t *clr_reg;
503 
504 	/*
505 	 * Note that we only actually care about interrupts that are
506 	 * potentially from network devices.
507 	 */
508 	for (ino = 0; ino <= ib_p->ib_max_ino; ino++) {
509 		clr_reg = ib_clear_intr_reg_addr(ib_p, ino);
510 		IB_INO_INTR_CLEAR(clr_reg);
511 	}
512 
513 	return (BF_NONE);
514 }
515 
516 void
517 ib_suspend(ib_t *ib_p)
518 {
519 	ib_ino_info_t *ip;
520 	pci_t *pci_p = ib_p->ib_pci_p;
521 
522 	/* save ino_lst interrupts' mapping registers content */
523 	mutex_enter(&ib_p->ib_ino_lst_mutex);
524 	for (ip = ib_p->ib_ino_lst; ip; ip = ip->ino_next)
525 		ip->ino_map_reg_save = *ip->ino_map_reg;
526 	mutex_exit(&ib_p->ib_ino_lst_mutex);
527 
528 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
529 		ib_p->ib_upa_imr_state[0] = *ib_p->ib_upa_imr[0];
530 		ib_p->ib_upa_imr_state[1] = *ib_p->ib_upa_imr[1];
531 	}
532 }
533 
534 void
535 ib_resume(ib_t *ib_p)
536 {
537 	ib_ino_info_t *ip;
538 	pci_t *pci_p = ib_p->ib_pci_p;
539 
540 	/* restore ino_lst interrupts' mapping registers content */
541 	mutex_enter(&ib_p->ib_ino_lst_mutex);
542 	for (ip = ib_p->ib_ino_lst; ip; ip = ip->ino_next) {
543 		IB_INO_INTR_CLEAR(ip->ino_clr_reg);	 /* set intr to idle */
544 		*ip->ino_map_reg = ip->ino_map_reg_save; /* restore IMR */
545 	}
546 	mutex_exit(&ib_p->ib_ino_lst_mutex);
547 
548 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
549 		*ib_p->ib_upa_imr[0] = ib_p->ib_upa_imr_state[0];
550 		*ib_p->ib_upa_imr[1] = ib_p->ib_upa_imr_state[1];
551 	}
552 }
553 
554 /*
555  * locate ino_info structure on ib_p->ib_ino_lst according to ino#
556  * returns NULL if not found.
557  */
558 ib_ino_info_t *
559 ib_locate_ino(ib_t *ib_p, ib_ino_t ino_num)
560 {
561 	ib_ino_info_t *ino_p = ib_p->ib_ino_lst;
562 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
563 
564 	for (; ino_p && ino_p->ino_ino != ino_num; ino_p = ino_p->ino_next);
565 	return (ino_p);
566 }
567 
568 #define	IB_INO_TO_SLOT(ino) (IB_IS_OBIO_INO(ino) ? 0xff : ((ino) & 0x1f) >> 2)
569 
570 ib_ino_info_t *
571 ib_new_ino(ib_t *ib_p, ib_ino_t ino_num, ih_t *ih_p)
572 {
573 	ib_ino_info_t *ino_p = kmem_alloc(sizeof (ib_ino_info_t), KM_SLEEP);
574 	ino_p->ino_ino = ino_num;
575 	ino_p->ino_slot_no = IB_INO_TO_SLOT(ino_num);
576 	ino_p->ino_ib_p = ib_p;
577 	ino_p->ino_clr_reg = ib_clear_intr_reg_addr(ib_p, ino_num);
578 	ino_p->ino_map_reg = ib_intr_map_reg_addr(ib_p, ino_num);
579 	ino_p->ino_unclaimed = 0;
580 
581 	/*
582 	 * cannot disable interrupt since we might share slot
583 	 * IB_INO_INTR_OFF(ino_p->ino_map_reg);
584 	 */
585 
586 	ih_p->ih_next = ih_p;
587 	ino_p->ino_ih_head = ih_p;
588 	ino_p->ino_ih_tail = ih_p;
589 	ino_p->ino_ih_start = ih_p;
590 	ino_p->ino_ih_size = 1;
591 
592 	ino_p->ino_next = ib_p->ib_ino_lst;
593 	ib_p->ib_ino_lst = ino_p;
594 	return (ino_p);
595 }
596 
597 /* the ino_p is retrieved by previous call to ib_locate_ino() */
598 void
599 ib_delete_ino(ib_t *ib_p, ib_ino_info_t *ino_p)
600 {
601 	ib_ino_info_t *list = ib_p->ib_ino_lst;
602 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
603 	if (list == ino_p)
604 		ib_p->ib_ino_lst = list->ino_next;
605 	else {
606 		for (; list->ino_next != ino_p; list = list->ino_next);
607 		list->ino_next = ino_p->ino_next;
608 	}
609 }
610 
611 /* free all ino when we are detaching */
612 void
613 ib_free_ino_all(ib_t *ib_p)
614 {
615 	ib_ino_info_t *tmp = ib_p->ib_ino_lst;
616 	ib_ino_info_t *next = NULL;
617 	while (tmp) {
618 		next = tmp->ino_next;
619 		kmem_free(tmp, sizeof (ib_ino_info_t));
620 		tmp = next;
621 	}
622 }
623 
624 void
625 ib_ino_add_intr(pci_t *pci_p, ib_ino_info_t *ino_p, ih_t *ih_p)
626 {
627 	ib_ino_t ino = ino_p->ino_ino;
628 	ib_t *ib_p = ino_p->ino_ib_p;
629 	volatile uint64_t *state_reg = IB_INO_INTR_STATE_REG(ib_p, ino);
630 	hrtime_t start_time;
631 
632 	ASSERT(ib_p == pci_p->pci_ib_p);
633 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
634 
635 	/* disable interrupt, this could disrupt devices sharing our slot */
636 	IB_INO_INTR_OFF(ino_p->ino_map_reg);
637 	*ino_p->ino_map_reg;
638 
639 	/* do NOT modify the link list until after the busy wait */
640 
641 	/*
642 	 * busy wait if there is interrupt being processed.
643 	 * either the pending state will be cleared by the interrupt wrapper
644 	 * or the interrupt will be marked as blocked indicating that it was
645 	 * jabbering.
646 	 */
647 	start_time = gethrtime();
648 	while ((ino_p->ino_unclaimed <= pci_unclaimed_intr_max) &&
649 		IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
650 		if (gethrtime() - start_time > pci_intrpend_timeout) {
651 			pbm_t *pbm_p = pci_p->pci_pbm_p;
652 			cmn_err(CE_WARN, "%s:%s: ib_ino_add_intr %x timeout",
653 				pbm_p->pbm_nameinst_str,
654 				pbm_p->pbm_nameaddr_str, ino);
655 			break;
656 		}
657 	}
658 
659 	/* link up pci_ispec_t portion of the ppd */
660 	ih_p->ih_next = ino_p->ino_ih_head;
661 	ino_p->ino_ih_tail->ih_next = ih_p;
662 	ino_p->ino_ih_tail = ih_p;
663 
664 	ino_p->ino_ih_start = ino_p->ino_ih_head;
665 	ino_p->ino_ih_size++;
666 
667 	/*
668 	 * if the interrupt was previously blocked (left in pending state)
669 	 * because of jabber we need to clear the pending state in case the
670 	 * jabber has gone away.
671 	 */
672 	if (ino_p->ino_unclaimed > pci_unclaimed_intr_max) {
673 		cmn_err(CE_WARN,
674 		    "%s%d: ib_ino_add_intr: ino 0x%x has been unblocked",
675 		    ddi_driver_name(pci_p->pci_dip),
676 		    ddi_get_instance(pci_p->pci_dip),
677 		    ino_p->ino_ino);
678 		ino_p->ino_unclaimed = 0;
679 		IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
680 	}
681 
682 	/* re-enable interrupt */
683 	IB_INO_INTR_ON(ino_p->ino_map_reg);
684 	*ino_p->ino_map_reg;
685 }
686 
687 /*
688  * removes pci_ispec_t from the ino's link list.
689  * uses hardware mutex to lock out interrupt threads.
690  * Side effects: interrupt belongs to that ino is turned off on return.
691  * if we are sharing PCI slot with other inos, the caller needs
692  * to turn it back on.
693  */
694 void
695 ib_ino_rem_intr(pci_t *pci_p, ib_ino_info_t *ino_p, ih_t *ih_p)
696 {
697 	int i;
698 	ib_ino_t ino = ino_p->ino_ino;
699 	ih_t *ih_lst = ino_p->ino_ih_head;
700 	volatile uint64_t *state_reg =
701 		IB_INO_INTR_STATE_REG(ino_p->ino_ib_p, ino);
702 	hrtime_t start_time;
703 
704 	ASSERT(MUTEX_HELD(&ino_p->ino_ib_p->ib_ino_lst_mutex));
705 	/* disable interrupt, this could disrupt devices sharing our slot */
706 	IB_INO_INTR_OFF(ino_p->ino_map_reg);
707 	*ino_p->ino_map_reg;
708 
709 	/* do NOT modify the link list until after the busy wait */
710 
711 	/*
712 	 * busy wait if there is interrupt being processed.
713 	 * either the pending state will be cleared by the interrupt wrapper
714 	 * or the interrupt will be marked as blocked indicating that it was
715 	 * jabbering.
716 	 */
717 	start_time = gethrtime();
718 	while ((ino_p->ino_unclaimed <= pci_unclaimed_intr_max) &&
719 		IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
720 		if (gethrtime() - start_time > pci_intrpend_timeout) {
721 			pbm_t *pbm_p = pci_p->pci_pbm_p;
722 			cmn_err(CE_WARN, "%s:%s: ib_ino_rem_intr %x timeout",
723 				pbm_p->pbm_nameinst_str,
724 				pbm_p->pbm_nameaddr_str, ino);
725 			break;
726 		}
727 	}
728 
729 	if (ino_p->ino_ih_size == 1) {
730 		if (ih_lst != ih_p)
731 			goto not_found;
732 		/* no need to set head/tail as ino_p will be freed */
733 		goto reset;
734 	}
735 
736 	/*
737 	 * if the interrupt was previously blocked (left in pending state)
738 	 * because of jabber we need to clear the pending state in case the
739 	 * jabber has gone away.
740 	 */
741 	if (ino_p->ino_unclaimed > pci_unclaimed_intr_max) {
742 		cmn_err(CE_WARN,
743 		    "%s%d: ib_ino_rem_intr: ino 0x%x has been unblocked",
744 		    ddi_driver_name(pci_p->pci_dip),
745 		    ddi_get_instance(pci_p->pci_dip),
746 		    ino_p->ino_ino);
747 		ino_p->ino_unclaimed = 0;
748 		IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
749 	}
750 
751 	/* search the link list for ih_p */
752 	for (i = 0;
753 		(i < ino_p->ino_ih_size) && (ih_lst->ih_next != ih_p);
754 		i++, ih_lst = ih_lst->ih_next);
755 	if (ih_lst->ih_next != ih_p)
756 		goto not_found;
757 
758 	/* remove ih_p from the link list and maintain the head/tail */
759 	ih_lst->ih_next = ih_p->ih_next;
760 	if (ino_p->ino_ih_head == ih_p)
761 		ino_p->ino_ih_head = ih_p->ih_next;
762 	if (ino_p->ino_ih_tail == ih_p)
763 		ino_p->ino_ih_tail = ih_lst;
764 	ino_p->ino_ih_start = ino_p->ino_ih_head;
765 reset:
766 	if (ih_p->ih_config_handle)
767 		pci_config_teardown(&ih_p->ih_config_handle);
768 	if (ih_p->ih_ksp != NULL)
769 		kstat_delete(ih_p->ih_ksp);
770 	kmem_free(ih_p, sizeof (ih_t));
771 	ino_p->ino_ih_size--;
772 
773 	return;
774 not_found:
775 	DEBUG2(DBG_R_INTX, ino_p->ino_ib_p->ib_pci_p->pci_dip,
776 		"ino_p=%x does not have ih_p=%x\n", ino_p, ih_p);
777 }
778 
779 ih_t *
780 ib_ino_locate_intr(ib_ino_info_t *ino_p, dev_info_t *rdip, uint32_t inum)
781 {
782 	ih_t *ih_lst = ino_p->ino_ih_head;
783 	int i;
784 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_lst = ih_lst->ih_next) {
785 		if (ih_lst->ih_dip == rdip &&
786 		    ih_lst->ih_inum == inum)
787 			return (ih_lst);
788 	}
789 	return ((ih_t *)NULL);
790 }
791 
792 ih_t *
793 ib_alloc_ih(dev_info_t *rdip, uint32_t inum,
794     uint_t (*int_handler)(caddr_t int_handler_arg1, caddr_t int_handler_arg2),
795     caddr_t int_handler_arg1,
796     caddr_t int_handler_arg2)
797 {
798 	ih_t *ih_p;
799 
800 	ih_p = kmem_alloc(sizeof (ih_t), KM_SLEEP);
801 	ih_p->ih_dip = rdip;
802 	ih_p->ih_inum = inum;
803 	ih_p->ih_intr_state = PCI_INTR_STATE_DISABLE;
804 	ih_p->ih_handler = int_handler;
805 	ih_p->ih_handler_arg1 = int_handler_arg1;
806 	ih_p->ih_handler_arg2 = int_handler_arg2;
807 	ih_p->ih_config_handle = NULL;
808 	ih_p->ih_nsec = 0;
809 	ih_p->ih_ticks = 0;
810 	ih_p->ih_ksp = kstat_create("pci_intrs",
811 	    atomic_inc_32_nv(&ih_instance), "config", "interrupts",
812 	    KSTAT_TYPE_NAMED, sizeof (ih_ks_template) / sizeof (kstat_named_t),
813 	    KSTAT_FLAG_VIRTUAL);
814 	if (ih_p->ih_ksp != NULL) {
815 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
816 		ih_p->ih_ksp->ks_lock = &ih_ks_template_lock;
817 		ih_p->ih_ksp->ks_data = &ih_ks_template;
818 		ih_p->ih_ksp->ks_private = ih_p;
819 		ih_p->ih_ksp->ks_update = ih_ks_update;
820 	}
821 
822 	return (ih_p);
823 }
824 
825 int
826 ib_update_intr_state(pci_t *pci_p, dev_info_t *rdip,
827     ddi_intr_handle_impl_t *hdlp, uint_t new_intr_state)
828 {
829 	ib_t		*ib_p = pci_p->pci_ib_p;
830 	ddi_ispec_t	*ip = (ddi_ispec_t *)hdlp->ih_private;
831 	ib_ino_info_t	*ino_p;
832 	ib_mondo_t	mondo;
833 	ih_t		*ih_p;
834 	int		ret = DDI_FAILURE;
835 
836 	mutex_enter(&ib_p->ib_ino_lst_mutex);
837 
838 	if ((mondo = pci_xlate_intr(pci_p->pci_dip, rdip, pci_p->pci_ib_p,
839 	    IB_MONDO_TO_INO(*ip->is_intr))) == 0) {
840 		mutex_exit(&ib_p->ib_ino_lst_mutex);
841 		return (ret);
842 	}
843 
844 	if (ino_p = ib_locate_ino(ib_p, IB_MONDO_TO_INO(mondo))) {
845 		if (ih_p = ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum)) {
846 			ih_p->ih_intr_state = new_intr_state;
847 			ret = DDI_SUCCESS;
848 		}
849 	}
850 
851 	mutex_exit(&ib_p->ib_ino_lst_mutex);
852 	return (ret);
853 }
854