xref: /illumos-gate/usr/src/uts/i86pc/os/x_call.c (revision 7c478bd9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Facilities for cross-processor subroutine calls using "mailbox" interrupts.
31  *
32  */
33 
34 #include <sys/types.h>
35 
36 #include <sys/param.h>
37 #include <sys/t_lock.h>
38 #include <sys/thread.h>
39 #include <sys/cpuvar.h>
40 #include <sys/x_call.h>
41 #include <sys/cpu.h>
42 #include <sys/psw.h>
43 #include <sys/sunddi.h>
44 #include <sys/mmu.h>
45 #include <sys/debug.h>
46 #include <sys/systm.h>
47 #include <sys/machsystm.h>
48 #include <sys/mutex_impl.h>
49 
50 static struct	xc_mbox xc_mboxes[X_CALL_LEVELS];
51 static kmutex_t xc_mbox_lock[X_CALL_LEVELS];
52 static uint_t 	xc_xlat_xcptoipl[X_CALL_LEVELS] = {
53 	XC_LO_PIL,
54 	XC_MED_PIL,
55 	XC_HI_PIL
56 };
57 
58 static void xc_common(xc_func_t, xc_arg_t, xc_arg_t, xc_arg_t,
59     int, cpuset_t, int);
60 
61 static int	xc_initialized = 0;
62 extern ulong_t	cpu_ready_set;
63 
64 void
65 xc_init()
66 {
67 	/*
68 	 * By making these mutexes type MUTEX_DRIVER, the ones below
69 	 * LOCK_LEVEL will be implemented as adaptive mutexes, and the
70 	 * ones above LOCK_LEVEL will be spin mutexes.
71 	 */
72 	mutex_init(&xc_mbox_lock[0], NULL, MUTEX_DRIVER,
73 	    (void *)ipltospl(XC_LO_PIL));
74 	mutex_init(&xc_mbox_lock[1], NULL, MUTEX_DRIVER,
75 	    (void *)ipltospl(XC_MED_PIL));
76 	mutex_init(&xc_mbox_lock[2], NULL, MUTEX_DRIVER,
77 	    (void *)ipltospl(XC_HI_PIL));
78 
79 	xc_initialized = 1;
80 }
81 
82 /*
83  * Used by the debugger to determine whether or not cross calls have been
84  * initialized and are safe to use.
85  */
86 int
87 kdi_xc_initialized(void)
88 {
89 	return (xc_initialized);
90 }
91 
92 #define	CAPTURE_CPU_ARG	0xffffffff
93 
94 /*
95  * X-call interrupt service routine.
96  *
97  * arg == X_CALL_MEDPRI	-  capture cpus.
98  *
99  * We're protected against changing CPUs by being a high-priority interrupt.
100  */
101 /*ARGSUSED*/
102 uint_t
103 xc_serv(caddr_t arg1, caddr_t arg2)
104 {
105 	int	op;
106 	int	pri = (int)(uintptr_t)arg1;
107 	struct cpu *cpup = CPU;
108 	xc_arg_t *argp;
109 	xc_arg_t arg2val;
110 	uint_t	tlbflush;
111 
112 	if (pri == X_CALL_MEDPRI) {
113 
114 		argp = &xc_mboxes[X_CALL_MEDPRI].arg2;
115 		arg2val = *argp;
116 		if (arg2val != CAPTURE_CPU_ARG &&
117 		    !(arg2val & (1 << cpup->cpu_id)))
118 			return (DDI_INTR_UNCLAIMED);
119 		ASSERT(arg2val == CAPTURE_CPU_ARG);
120 		if (cpup->cpu_m.xc_pend[pri] == 0)
121 			return (DDI_INTR_UNCLAIMED);
122 
123 		cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 0;
124 		cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 1;
125 
126 		for (;;) {
127 			if ((cpup->cpu_m.xc_state[X_CALL_MEDPRI] == XC_DONE) ||
128 				(cpup->cpu_m.xc_pend[X_CALL_MEDPRI]))
129 				break;
130 			ht_pause();
131 			return_instr();
132 		}
133 		return (DDI_INTR_CLAIMED);
134 	}
135 	if (cpup->cpu_m.xc_pend[pri] == 0)
136 		return (DDI_INTR_UNCLAIMED);
137 
138 	cpup->cpu_m.xc_pend[pri] = 0;
139 	op = cpup->cpu_m.xc_state[pri];
140 
141 	/*
142 	 * When invalidating TLB entries, wait until the initiator changes the
143 	 * memory PTE before doing any INVLPG. Otherwise, if the PTE in memory
144 	 * hasn't been changed, the processor's TLB Flush filter may ignore
145 	 * the INVLPG instruction.
146 	 */
147 	tlbflush = (cpup->cpu_m.xc_wait[pri] == 2);
148 
149 	/*
150 	 * Don't invoke a null function.
151 	 */
152 	if (xc_mboxes[pri].func != NULL) {
153 		if (!tlbflush)
154 			cpup->cpu_m.xc_retval[pri] = (*xc_mboxes[pri].func)
155 			    (xc_mboxes[pri].arg1, xc_mboxes[pri].arg2,
156 				xc_mboxes[pri].arg3);
157 	} else
158 		cpup->cpu_m.xc_retval[pri] = 0;
159 
160 	/*
161 	 * Acknowledge that we have completed the x-call operation.
162 	 */
163 	cpup->cpu_m.xc_ack[pri] = 1;
164 
165 	if (op == XC_CALL_OP)
166 		return (DDI_INTR_CLAIMED);
167 
168 	/*
169 	 * for (op == XC_SYNC_OP)
170 	 * Wait for the initiator of the x-call to indicate
171 	 * that all CPUs involved can proceed.
172 	 */
173 	while (cpup->cpu_m.xc_wait[pri]) {
174 		ht_pause();
175 		return_instr();
176 	}
177 
178 	while (cpup->cpu_m.xc_state[pri] != XC_DONE) {
179 		ht_pause();
180 		return_instr();
181 	}
182 
183 	/*
184 	 * Flush the TLB, if that's what is requested.
185 	 */
186 	if (xc_mboxes[pri].func != NULL && tlbflush) {
187 		cpup->cpu_m.xc_retval[pri] = (*xc_mboxes[pri].func)
188 		    (xc_mboxes[pri].arg1, xc_mboxes[pri].arg2,
189 			xc_mboxes[pri].arg3);
190 	}
191 
192 	/*
193 	 * Acknowledge that we have received the directive to continue.
194 	 */
195 	ASSERT(cpup->cpu_m.xc_ack[pri] == 0);
196 	cpup->cpu_m.xc_ack[pri] = 1;
197 
198 	return (DDI_INTR_CLAIMED);
199 }
200 
201 
202 /*
203  * xc_do_call:
204  */
205 static void
206 xc_do_call(
207 	xc_arg_t arg1,
208 	xc_arg_t arg2,
209 	xc_arg_t arg3,
210 	int pri,
211 	cpuset_t set,
212 	xc_func_t func,
213 	int sync)
214 {
215 	/*
216 	 * If the pri indicates a low priority lock (below LOCK_LEVEL),
217 	 * we must disable preemption to avoid migrating to another CPU
218 	 * during the call.
219 	 */
220 	if (pri == X_CALL_LOPRI) {
221 		kpreempt_disable();
222 	} else {
223 		pri = X_CALL_HIPRI;
224 	}
225 
226 	/* always grab highest mutex to avoid deadlock */
227 	mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]);
228 	xc_common(func, arg1, arg2, arg3, pri, set, sync);
229 	mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
230 	if (pri == X_CALL_LOPRI)
231 		kpreempt_enable();
232 }
233 
234 
235 /*
236  * xc_call: call specified function on all processors
237  * remotes may continue after service
238  * we wait here until everybody has completed.
239  */
240 void
241 xc_call(
242 	xc_arg_t arg1,
243 	xc_arg_t arg2,
244 	xc_arg_t arg3,
245 	int pri,
246 	cpuset_t set,
247 	xc_func_t func)
248 {
249 	xc_do_call(arg1, arg2, arg3, pri, set, func, 0);
250 }
251 
252 /*
253  * xc_sync: call specified function on all processors
254  * after doing work, each remote waits until we let
255  * it continue; send the contiunue after everyone has
256  * informed us that they are done.
257  */
258 void
259 xc_sync(
260 	xc_arg_t arg1,
261 	xc_arg_t arg2,
262 	xc_arg_t arg3,
263 	int pri,
264 	cpuset_t set,
265 	xc_func_t func)
266 {
267 	xc_do_call(arg1, arg2, arg3, pri, set, func, 1);
268 }
269 
270 /*
271  * xc_sync_wait: similar to xc_sync(), except that the starting
272  * cpu waits for all other cpus to check in before running its
273  * service locally.
274  */
275 void
276 xc_wait_sync(
277 	xc_arg_t arg1,
278 	xc_arg_t arg2,
279 	xc_arg_t arg3,
280 	int pri,
281 	cpuset_t set,
282 	xc_func_t func)
283 {
284 	xc_do_call(arg1, arg2, arg3, pri, set, func, 2);
285 }
286 
287 
288 /*
289  * The routines xc_capture_cpus and xc_release_cpus
290  * can be used in place of xc_sync in order to implement a critical
291  * code section where all CPUs in the system can be controlled.
292  * xc_capture_cpus is used to start the critical code section, and
293  * xc_release_cpus is used to end the critical code section.
294  */
295 
296 /*
297  * Capture the CPUs specified in order to start a x-call session,
298  * and/or to begin a critical section.
299  */
300 void
301 xc_capture_cpus(cpuset_t set)
302 {
303 	int cix;
304 	int lcx;
305 	struct cpu *cpup;
306 	int	i;
307 	cpuset_t *cpus;
308 	cpuset_t c;
309 
310 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
311 
312 	/*
313 	 * Prevent deadlocks where we take an interrupt and are waiting
314 	 * for a mutex owned by one of the CPUs that is captured for
315 	 * the x-call, while that CPU is waiting for some x-call signal
316 	 * to be set by us.
317 	 *
318 	 * This mutex also prevents preemption, since it raises SPL above
319 	 * LOCK_LEVEL (it is a spin-type driver mutex).
320 	 */
321 	/* always grab highest mutex to avoid deadlock */
322 	mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]);
323 	lcx = CPU->cpu_id;	/* now we're safe */
324 
325 	ASSERT(CPU->cpu_flags & CPU_READY);
326 
327 	/*
328 	 * Wait for all cpus
329 	 */
330 	cpus = (cpuset_t *)&xc_mboxes[X_CALL_MEDPRI].arg2;
331 	if (CPU_IN_SET(*cpus, CPU->cpu_id))
332 		CPUSET_ATOMIC_DEL(*cpus, CPU->cpu_id);
333 	for (;;) {
334 		c = *(volatile cpuset_t *)cpus;
335 		CPUSET_AND(c, cpu_ready_set);
336 		if (CPUSET_ISNULL(c))
337 			break;
338 		ht_pause();
339 	}
340 
341 	/*
342 	 * Store the set of CPUs involved in the x-call session, so that
343 	 * xc_release_cpus will know what CPUs to act upon.
344 	 */
345 	xc_mboxes[X_CALL_MEDPRI].set = set;
346 	xc_mboxes[X_CALL_MEDPRI].arg2 = CAPTURE_CPU_ARG;
347 
348 	/*
349 	 * Now capture each CPU in the set and cause it to go into a
350 	 * holding pattern.
351 	 */
352 	i = 0;
353 	for (cix = 0; cix < NCPU; cix++) {
354 		if ((cpup = cpu[cix]) == NULL ||
355 		    (cpup->cpu_flags & CPU_READY) == 0) {
356 			/*
357 			 * In case CPU wasn't ready, but becomes ready later,
358 			 * take the CPU out of the set now.
359 			 */
360 			CPUSET_DEL(set, cix);
361 			continue;
362 		}
363 		if (cix != lcx && CPU_IN_SET(set, cix)) {
364 			cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0;
365 			cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_HOLD;
366 			cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 1;
367 			send_dirint(cix, XC_MED_PIL);
368 		}
369 		i++;
370 		if (i >= ncpus)
371 			break;
372 	}
373 
374 	/*
375 	 * Wait here until all remote calls to complete.
376 	 */
377 	i = 0;
378 	for (cix = 0; cix < NCPU; cix++) {
379 		if (lcx != cix && CPU_IN_SET(set, cix)) {
380 			cpup = cpu[cix];
381 			while (cpup->cpu_m.xc_ack[X_CALL_MEDPRI] == 0) {
382 				ht_pause();
383 				return_instr();
384 			}
385 			cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0;
386 		}
387 		i++;
388 		if (i >= ncpus)
389 			break;
390 	}
391 
392 }
393 
394 /*
395  * Release the CPUs captured by xc_capture_cpus, thus terminating the
396  * x-call session and exiting the critical section.
397  */
398 void
399 xc_release_cpus(void)
400 {
401 	int cix;
402 	int lcx = (int)(CPU->cpu_id);
403 	cpuset_t set = xc_mboxes[X_CALL_MEDPRI].set;
404 	struct cpu *cpup;
405 	int	i;
406 
407 	ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI]));
408 
409 	/*
410 	 * Allow each CPU to exit its holding pattern.
411 	 */
412 	i = 0;
413 	for (cix = 0; cix < NCPU; cix++) {
414 		if ((cpup = cpu[cix]) == NULL)
415 			continue;
416 		if ((cpup->cpu_flags & CPU_READY) &&
417 		    (cix != lcx) && CPU_IN_SET(set, cix)) {
418 			/*
419 			 * Clear xc_ack since we will be waiting for it
420 			 * to be set again after we set XC_DONE.
421 			 */
422 			cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_DONE;
423 		}
424 		i++;
425 		if (i >= ncpus)
426 			break;
427 	}
428 
429 	xc_mboxes[X_CALL_MEDPRI].arg2 = 0;
430 	mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
431 }
432 
433 /*
434  * Common code to call a specified function on a set of processors.
435  * sync specifies what kind of waiting is done.
436  *	-1 - no waiting, don't release remotes
437  *	0 - no waiting, release remotes immediately
438  *	1 - run service locally w/o waiting for remotes.
439  *	2 - wait for remotes before running locally
440  */
441 static void
442 xc_common(
443 	xc_func_t func,
444 	xc_arg_t arg1,
445 	xc_arg_t arg2,
446 	xc_arg_t arg3,
447 	int pri,
448 	cpuset_t set,
449 	int sync)
450 {
451 	int cix;
452 	int lcx = (int)(CPU->cpu_id);
453 	struct cpu *cpup;
454 
455 	ASSERT(panicstr == NULL);
456 
457 	ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI]));
458 	ASSERT(CPU->cpu_flags & CPU_READY);
459 
460 	/*
461 	 * Set up the service definition mailbox.
462 	 */
463 	xc_mboxes[pri].func = func;
464 	xc_mboxes[pri].arg1 = arg1;
465 	xc_mboxes[pri].arg2 = arg2;
466 	xc_mboxes[pri].arg3 = arg3;
467 
468 	/*
469 	 * Request service on all remote processors.
470 	 */
471 	for (cix = 0; cix < NCPU; cix++) {
472 		if ((cpup = cpu[cix]) == NULL ||
473 		    (cpup->cpu_flags & CPU_READY) == 0) {
474 			/*
475 			 * In case CPU wasn't ready, but becomes ready later,
476 			 * take the CPU out of the set now.
477 			 */
478 			CPUSET_DEL(set, cix);
479 		} else if (cix != lcx && CPU_IN_SET(set, cix)) {
480 			CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
481 			cpup->cpu_m.xc_ack[pri] = 0;
482 			cpup->cpu_m.xc_wait[pri] = sync;
483 			if (sync > 0)
484 				cpup->cpu_m.xc_state[pri] = XC_SYNC_OP;
485 			else
486 				cpup->cpu_m.xc_state[pri] = XC_CALL_OP;
487 			cpup->cpu_m.xc_pend[pri] = 1;
488 			send_dirint(cix, xc_xlat_xcptoipl[pri]);
489 		}
490 	}
491 
492 	/*
493 	 * Run service locally if not waiting for remotes.
494 	 */
495 	if (sync != 2 && CPU_IN_SET(set, lcx) && func != NULL)
496 		CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3);
497 
498 	if (sync == -1)
499 		return;
500 
501 	/*
502 	 * Wait here until all remote calls complete.
503 	 */
504 	for (cix = 0; cix < NCPU; cix++) {
505 		if (lcx != cix && CPU_IN_SET(set, cix)) {
506 			cpup = cpu[cix];
507 			while (cpup->cpu_m.xc_ack[pri] == 0) {
508 				ht_pause();
509 				return_instr();
510 			}
511 			cpup->cpu_m.xc_ack[pri] = 0;
512 		}
513 	}
514 
515 	/*
516 	 * Run service locally if waiting for remotes.
517 	 */
518 	if (sync == 2 && CPU_IN_SET(set, lcx) && func != NULL)
519 		CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3);
520 
521 	if (sync == 0)
522 		return;
523 
524 	/*
525 	 * Release any waiting CPUs
526 	 */
527 	for (cix = 0; cix < NCPU; cix++) {
528 		if (lcx != cix && CPU_IN_SET(set, cix)) {
529 			cpup = cpu[cix];
530 			if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) {
531 				cpup->cpu_m.xc_wait[pri] = 0;
532 				cpup->cpu_m.xc_state[pri] = XC_DONE;
533 			}
534 		}
535 	}
536 
537 	/*
538 	 * Wait for all CPUs to acknowledge completion before we continue.
539 	 * Without this check it's possible (on a VM or hyper-threaded CPUs
540 	 * or in the presence of Service Management Interrupts which can all
541 	 * cause delays) for the remote processor to still be waiting by
542 	 * the time xc_common() is next invoked with the sync flag set
543 	 * resulting in a deadlock.
544 	 */
545 	for (cix = 0; cix < NCPU; cix++) {
546 		if (lcx != cix && CPU_IN_SET(set, cix)) {
547 			cpup = cpu[cix];
548 			if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) {
549 				while (cpup->cpu_m.xc_ack[pri] == 0) {
550 					ht_pause();
551 					return_instr();
552 				}
553 				cpup->cpu_m.xc_ack[pri] = 0;
554 			}
555 		}
556 	}
557 }
558 
559 /*
560  * xc_trycall: attempt to call specified function on all processors
561  * remotes may wait for a long time
562  * we continue immediately
563  */
564 void
565 xc_trycall(
566 	xc_arg_t arg1,
567 	xc_arg_t arg2,
568 	xc_arg_t arg3,
569 	cpuset_t set,
570 	xc_func_t func)
571 {
572 	int		save_kernel_preemption;
573 	extern int	IGNORE_KERNEL_PREEMPTION;
574 
575 	/*
576 	 * If we can grab the mutex, we'll do the cross-call.  If not -- if
577 	 * someone else is already doing a cross-call -- we won't.
578 	 */
579 
580 	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
581 	IGNORE_KERNEL_PREEMPTION = 1;
582 	if (mutex_tryenter(&xc_mbox_lock[X_CALL_HIPRI])) {
583 		xc_common(func, arg1, arg2, arg3, X_CALL_HIPRI, set, -1);
584 		mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
585 	}
586 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
587 }
588 
589 /*
590  * Used by the debugger to cross-call the other CPUs, thus causing them to
591  * enter the debugger.  We can't hold locks, so we spin on the cross-call
592  * lock until we get it.  When we get it, we send the cross-call, and assume
593  * that we successfully stopped the other CPUs.
594  */
595 void
596 kdi_xc_others(int this_cpu, void (*func)(void))
597 {
598 	extern int	IGNORE_KERNEL_PREEMPTION;
599 	int save_kernel_preemption;
600 	mutex_impl_t *lp;
601 	cpuset_t set;
602 	int x;
603 
604 	CPUSET_ALL_BUT(set, this_cpu);
605 
606 	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
607 	IGNORE_KERNEL_PREEMPTION = 1;
608 
609 	lp = (mutex_impl_t *)&xc_mbox_lock[X_CALL_HIPRI];
610 	for (x = 0; x < 0x400000; x++) {
611 		if (lock_spin_try(&lp->m_spin.m_spinlock)) {
612 			xc_common((xc_func_t)func, 0, 0, 0, X_CALL_HIPRI,
613 			    set, -1);
614 			lp->m_spin.m_spinlock = 0; /* XXX */
615 			break;
616 		}
617 		(void) xc_serv((caddr_t)X_CALL_MEDPRI, NULL);
618 	}
619 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
620 }
621