xref: /illumos-gate/usr/src/uts/i86pc/os/x_call.c (revision a563a037)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Facilities for cross-processor subroutine calls using "mailbox" interrupts.
30  *
31  */
32 
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/t_lock.h>
36 #include <sys/thread.h>
37 #include <sys/cpuvar.h>
38 #include <sys/x_call.h>
39 #include <sys/cpu.h>
40 #include <sys/psw.h>
41 #include <sys/sunddi.h>
42 #include <sys/debug.h>
43 #include <sys/systm.h>
44 #include <sys/archsystm.h>
45 #include <sys/machsystm.h>
46 #include <sys/mutex_impl.h>
47 #include <sys/traptrace.h>
48 
49 
50 static struct	xc_mbox xc_mboxes[X_CALL_LEVELS];
51 static kmutex_t xc_mbox_lock[X_CALL_LEVELS];
52 static uint_t 	xc_xlat_xcptoipl[X_CALL_LEVELS] = {
53 	XC_LO_PIL,
54 	XC_MED_PIL,
55 	XC_HI_PIL
56 };
57 
58 static void xc_common(xc_func_t, xc_arg_t, xc_arg_t, xc_arg_t,
59     int, cpuset_t, int);
60 
61 static int	xc_initialized = 0;
62 
63 void
64 xc_init()
65 {
66 	/*
67 	 * By making these mutexes type MUTEX_DRIVER, the ones below
68 	 * LOCK_LEVEL will be implemented as adaptive mutexes, and the
69 	 * ones above LOCK_LEVEL will be spin mutexes.
70 	 */
71 	mutex_init(&xc_mbox_lock[0], NULL, MUTEX_DRIVER,
72 	    (void *)ipltospl(XC_LO_PIL));
73 	mutex_init(&xc_mbox_lock[1], NULL, MUTEX_DRIVER,
74 	    (void *)ipltospl(XC_MED_PIL));
75 	mutex_init(&xc_mbox_lock[2], NULL, MUTEX_DRIVER,
76 	    (void *)ipltospl(XC_HI_PIL));
77 
78 	xc_initialized = 1;
79 }
80 
81 #if defined(TRAPTRACE)
82 
83 /*
84  * When xc_traptrace is on, put x-call records into the trap trace buffer.
85  */
86 int xc_traptrace;
87 
88 void
89 xc_make_trap_trace_entry(uint8_t marker, int pri, ulong_t arg)
90 {
91 	trap_trace_rec_t *ttr;
92 	struct _xc_entry *xce;
93 
94 	if (xc_traptrace == 0)
95 		return;
96 
97 	ttr = trap_trace_get_traceptr(TT_XCALL,
98 	    (ulong_t)caller(), (ulong_t)getfp());
99 	xce = &(ttr->ttr_info.xc_entry);
100 
101 	xce->xce_marker = marker;
102 	xce->xce_pri = pri;
103 	xce->xce_arg = arg;
104 
105 	if ((uint_t)pri < X_CALL_LEVELS) {
106 		struct machcpu *mcpu = &CPU->cpu_m;
107 
108 		xce->xce_pend = mcpu->xc_pend[pri];
109 		xce->xce_ack = mcpu->xc_ack[pri];
110 		xce->xce_state = mcpu->xc_state[pri];
111 		xce->xce_retval = mcpu->xc_retval[pri];
112 		xce->xce_func = (uintptr_t)xc_mboxes[pri].func;
113 	}
114 }
115 #endif
116 
117 #define	CAPTURE_CPU_ARG	~0UL
118 
119 /*
120  * X-call interrupt service routine.
121  *
122  * arg == X_CALL_MEDPRI	-  capture cpus.
123  *
124  * We're protected against changing CPUs by being a high-priority interrupt.
125  */
126 /*ARGSUSED*/
127 uint_t
128 xc_serv(caddr_t arg1, caddr_t arg2)
129 {
130 	int op;
131 	int pri = (int)(uintptr_t)arg1;
132 	struct cpu *cpup = CPU;
133 	xc_arg_t arg2val;
134 
135 	XC_TRACE(TT_XC_SVC_BEGIN, pri, (ulong_t)arg2);
136 
137 	if (pri == X_CALL_MEDPRI) {
138 
139 		arg2val = xc_mboxes[X_CALL_MEDPRI].arg2;
140 
141 		if (arg2val != CAPTURE_CPU_ARG ||
142 		    !CPU_IN_SET(xc_mboxes[X_CALL_MEDPRI].set, cpup->cpu_id))
143 			goto unclaimed;
144 
145 		ASSERT(arg2val == CAPTURE_CPU_ARG);
146 
147 		if (cpup->cpu_m.xc_pend[pri] == 0)
148 			goto unclaimed;
149 
150 		cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 0;
151 		cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 1;
152 
153 		for (;;) {
154 			if ((cpup->cpu_m.xc_state[X_CALL_MEDPRI] == XC_DONE) ||
155 			    (cpup->cpu_m.xc_pend[X_CALL_MEDPRI]))
156 				break;
157 			SMT_PAUSE();
158 		}
159 		CPUSET_DEL(xc_mboxes[X_CALL_MEDPRI].set, cpup->cpu_id);
160 		XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_CLAIMED);
161 		return (DDI_INTR_CLAIMED);
162 	}
163 
164 	if (cpup->cpu_m.xc_pend[pri] == 0)
165 		goto unclaimed;
166 
167 	cpup->cpu_m.xc_pend[pri] = 0;
168 	op = cpup->cpu_m.xc_state[pri];
169 
170 	/*
171 	 * Don't invoke a null function.
172 	 */
173 	if (xc_mboxes[pri].func != NULL) {
174 		cpup->cpu_m.xc_retval[pri] =
175 		    (*xc_mboxes[pri].func)(xc_mboxes[pri].arg1,
176 		    xc_mboxes[pri].arg2, xc_mboxes[pri].arg3);
177 	} else
178 		cpup->cpu_m.xc_retval[pri] = 0;
179 
180 	/*
181 	 * Acknowledge that we have completed the x-call operation.
182 	 */
183 	cpup->cpu_m.xc_ack[pri] = 1;
184 
185 	if (op != XC_CALL_OP) {
186 		/*
187 		 * for (op == XC_SYNC_OP)
188 		 * Wait for the initiator of the x-call to indicate
189 		 * that all CPUs involved can proceed.
190 		 */
191 		while (cpup->cpu_m.xc_wait[pri])
192 			SMT_PAUSE();
193 
194 		while (cpup->cpu_m.xc_state[pri] != XC_DONE)
195 			SMT_PAUSE();
196 
197 		/*
198 		 * Acknowledge that we have received the directive to continue.
199 		 */
200 		ASSERT(cpup->cpu_m.xc_ack[pri] == 0);
201 		cpup->cpu_m.xc_ack[pri] = 1;
202 	}
203 
204 	XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_CLAIMED);
205 	return (DDI_INTR_CLAIMED);
206 
207 unclaimed:
208 	XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_UNCLAIMED);
209 	return (DDI_INTR_UNCLAIMED);
210 }
211 
212 
213 /*
214  * xc_do_call:
215  */
216 static void
217 xc_do_call(
218 	xc_arg_t arg1,
219 	xc_arg_t arg2,
220 	xc_arg_t arg3,
221 	int pri,
222 	cpuset_t set,
223 	xc_func_t func,
224 	int sync)
225 {
226 	/*
227 	 * If the pri indicates a low priority lock (below LOCK_LEVEL),
228 	 * we must disable preemption to avoid migrating to another CPU
229 	 * during the call.
230 	 */
231 	if (pri == X_CALL_LOPRI) {
232 		kpreempt_disable();
233 	} else {
234 		pri = X_CALL_HIPRI;
235 	}
236 
237 	/* always grab highest mutex to avoid deadlock */
238 	mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]);
239 	xc_common(func, arg1, arg2, arg3, pri, set, sync);
240 	mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
241 	if (pri == X_CALL_LOPRI)
242 		kpreempt_enable();
243 }
244 
245 
246 /*
247  * xc_call: call specified function on all processors
248  * remotes may continue after service
249  * we wait here until everybody has completed.
250  */
251 void
252 xc_call(
253 	xc_arg_t arg1,
254 	xc_arg_t arg2,
255 	xc_arg_t arg3,
256 	int pri,
257 	cpuset_t set,
258 	xc_func_t func)
259 {
260 	xc_do_call(arg1, arg2, arg3, pri, set, func, 0);
261 }
262 
263 /*
264  * xc_sync: call specified function on all processors
265  * after doing work, each remote waits until we let
266  * it continue; send the contiunue after everyone has
267  * informed us that they are done.
268  */
269 void
270 xc_sync(
271 	xc_arg_t arg1,
272 	xc_arg_t arg2,
273 	xc_arg_t arg3,
274 	int pri,
275 	cpuset_t set,
276 	xc_func_t func)
277 {
278 	xc_do_call(arg1, arg2, arg3, pri, set, func, 1);
279 }
280 
281 /*
282  * The routines xc_capture_cpus and xc_release_cpus
283  * can be used in place of xc_sync in order to implement a critical
284  * code section where all CPUs in the system can be controlled.
285  * xc_capture_cpus is used to start the critical code section, and
286  * xc_release_cpus is used to end the critical code section.
287  */
288 
289 /*
290  * Capture the CPUs specified in order to start a x-call session,
291  * and/or to begin a critical section.
292  */
293 void
294 xc_capture_cpus(cpuset_t set)
295 {
296 	int cix;
297 	int lcx;
298 	struct cpu *cpup;
299 	int	i;
300 
301 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
302 
303 	/*
304 	 * Prevent deadlocks where we take an interrupt and are waiting
305 	 * for a mutex owned by one of the CPUs that is captured for
306 	 * the x-call, while that CPU is waiting for some x-call signal
307 	 * to be set by us.
308 	 *
309 	 * This mutex also prevents preemption, since it raises SPL above
310 	 * LOCK_LEVEL (it is a spin-type driver mutex).
311 	 */
312 	/* always grab highest mutex to avoid deadlock */
313 	mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]);
314 	lcx = CPU->cpu_id;	/* now we're safe */
315 
316 	ASSERT(CPU->cpu_flags & CPU_READY);
317 
318 	/*
319 	 * Wait for all cpus.
320 	 */
321 
322 	/*
323 	 * First remove ourself.
324 	 */
325 	if (CPU_IN_SET(xc_mboxes[X_CALL_MEDPRI].set, CPU->cpu_id))
326 		CPUSET_ATOMIC_DEL(xc_mboxes[X_CALL_MEDPRI].set, CPU->cpu_id);
327 	/*
328 	 * We must wait for all cpus to clear their bit from
329 	 * xc_mboxes[X_CALL_MEDPRI].set before we write to this set.
330 	 */
331 	for (;;) {
332 		CPUSET_AND(xc_mboxes[X_CALL_MEDPRI].set, cpu_ready_set);
333 		if (CPUSET_ISNULL(xc_mboxes[X_CALL_MEDPRI].set))
334 			break;
335 		SMT_PAUSE();
336 	}
337 
338 	/*
339 	 * Store the set of CPUs involved in the x-call session, so that
340 	 * xc_release_cpus will know what CPUs to act upon.
341 	 */
342 	xc_mboxes[X_CALL_MEDPRI].set = set;
343 	xc_mboxes[X_CALL_MEDPRI].arg2 = CAPTURE_CPU_ARG;
344 
345 	/*
346 	 * Now capture each CPU in the set and cause it to go into a
347 	 * holding pattern.
348 	 */
349 	i = 0;
350 	for (cix = 0; cix < NCPU; cix++) {
351 		if ((cpup = cpu[cix]) == NULL ||
352 		    (cpup->cpu_flags & CPU_READY) == 0) {
353 			/*
354 			 * In case CPU wasn't ready, but becomes ready later,
355 			 * take the CPU out of the set now.
356 			 */
357 			CPUSET_DEL(set, cix);
358 			continue;
359 		}
360 		if (cix != lcx && CPU_IN_SET(set, cix)) {
361 			cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0;
362 			cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_HOLD;
363 			cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 1;
364 			XC_TRACE(TT_XC_CAPTURE, X_CALL_MEDPRI, cix);
365 			send_dirint(cix, XC_MED_PIL);
366 		}
367 		i++;
368 		if (i >= ncpus)
369 			break;
370 	}
371 
372 	/*
373 	 * Wait here until all remote calls to acknowledge.
374 	 */
375 	i = 0;
376 	for (cix = 0; cix < NCPU; cix++) {
377 		if (lcx != cix && CPU_IN_SET(set, cix)) {
378 			cpup = cpu[cix];
379 			while (cpup->cpu_m.xc_ack[X_CALL_MEDPRI] == 0)
380 				SMT_PAUSE();
381 			cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0;
382 		}
383 		i++;
384 		if (i >= ncpus)
385 			break;
386 	}
387 
388 }
389 
390 /*
391  * Release the CPUs captured by xc_capture_cpus, thus terminating the
392  * x-call session and exiting the critical section.
393  */
394 void
395 xc_release_cpus(void)
396 {
397 	int cix;
398 	int lcx = (int)(CPU->cpu_id);
399 	cpuset_t set = xc_mboxes[X_CALL_MEDPRI].set;
400 	struct cpu *cpup;
401 	int	i;
402 
403 	ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI]));
404 
405 	/*
406 	 * Allow each CPU to exit its holding pattern.
407 	 */
408 	i = 0;
409 	for (cix = 0; cix < NCPU; cix++) {
410 		if ((cpup = cpu[cix]) == NULL)
411 			continue;
412 		if ((cpup->cpu_flags & CPU_READY) &&
413 		    (cix != lcx) && CPU_IN_SET(set, cix)) {
414 			/*
415 			 * Clear xc_ack since we will be waiting for it
416 			 * to be set again after we set XC_DONE.
417 			 */
418 			XC_TRACE(TT_XC_RELEASE, X_CALL_MEDPRI, cix);
419 			cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_DONE;
420 		}
421 		i++;
422 		if (i >= ncpus)
423 			break;
424 	}
425 
426 	xc_mboxes[X_CALL_MEDPRI].arg2 = 0;
427 	mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
428 }
429 
430 /*
431  * Common code to call a specified function on a set of processors.
432  * sync specifies what kind of waiting is done.
433  *	-1 - no waiting, don't release remotes
434  *	0 - no waiting, release remotes immediately
435  *	1 - run service locally w/o waiting for remotes.
436  */
437 static void
438 xc_common(
439 	xc_func_t func,
440 	xc_arg_t arg1,
441 	xc_arg_t arg2,
442 	xc_arg_t arg3,
443 	int pri,
444 	cpuset_t set,
445 	int sync)
446 {
447 	int cix;
448 	int do_local = 0;
449 	struct cpu *cpup;
450 	cpuset_t tset;
451 	int last_cpu = 0;
452 
453 	ASSERT(panicstr == NULL);
454 
455 	ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI]));
456 	ASSERT(CPU->cpu_flags & CPU_READY);
457 
458 	/*
459 	 * Set up the service definition mailbox.
460 	 */
461 	xc_mboxes[pri].func = func;
462 	xc_mboxes[pri].arg1 = arg1;
463 	xc_mboxes[pri].arg2 = arg2;
464 	xc_mboxes[pri].arg3 = arg3;
465 
466 	if (CPU_IN_SET(set, CPU->cpu_id)) {
467 		do_local = 1;
468 		CPUSET_DEL(set, CPU->cpu_id);
469 	}
470 
471 	/*
472 	 * Request service on all remote processors.
473 	 */
474 	tset = set;
475 	for (cix = 0; cix < max_ncpus; cix++) {
476 		if (!CPU_IN_SET(tset, cix))
477 			continue;
478 
479 		if ((cpup = cpu[cix]) == NULL ||
480 		    (cpup->cpu_flags & CPU_READY) == 0) {
481 			/*
482 			 * In case the CPU is not ready but becomes
483 			 * ready later, take it out of the set now.
484 			 */
485 			CPUSET_DEL(set, cix);
486 		} else {
487 			CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
488 			cpup->cpu_m.xc_ack[pri] = 0;
489 			cpup->cpu_m.xc_wait[pri] = sync;
490 			if (sync > 0)
491 				cpup->cpu_m.xc_state[pri] = XC_SYNC_OP;
492 			else
493 				cpup->cpu_m.xc_state[pri] = XC_CALL_OP;
494 			cpup->cpu_m.xc_pend[pri] = 1;
495 			XC_TRACE(TT_XC_START, pri, cix);
496 			send_dirint(cix, xc_xlat_xcptoipl[pri]);
497 			last_cpu = cix;
498 		}
499 
500 		CPUSET_DEL(tset, cix);
501 		if (CPUSET_ISNULL(tset))
502 			break;
503 	}
504 
505 	/*
506 	 * Run service locally
507 	 */
508 	if (do_local && func != NULL) {
509 		XC_TRACE(TT_XC_START, pri, CPU->cpu_id);
510 		CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3);
511 	}
512 
513 	if (sync == -1)
514 		return;
515 
516 	/*
517 	 * Wait here until all remote calls acknowledge.
518 	 */
519 	for (cix = 0; cix <= last_cpu; cix++) {
520 		if (CPU_IN_SET(set, cix)) {
521 			cpup = cpu[cix];
522 			while (cpup->cpu_m.xc_ack[pri] == 0)
523 				SMT_PAUSE();
524 			XC_TRACE(TT_XC_WAIT, pri, cix);
525 			cpup->cpu_m.xc_ack[pri] = 0;
526 		}
527 	}
528 
529 	if (sync == 0)
530 		return;
531 
532 	/*
533 	 * Release any waiting CPUs
534 	 */
535 	for (cix = 0; cix <= last_cpu; cix++) {
536 		if (CPU_IN_SET(set, cix)) {
537 			cpup = cpu[cix];
538 			if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) {
539 				cpup->cpu_m.xc_wait[pri] = 0;
540 				cpup->cpu_m.xc_state[pri] = XC_DONE;
541 			}
542 		}
543 	}
544 
545 	/*
546 	 * Wait for all CPUs to acknowledge completion before we continue.
547 	 * Without this check it's possible (on a VM or hyper-threaded CPUs
548 	 * or in the presence of Service Management Interrupts which can all
549 	 * cause delays) for the remote processor to still be waiting by
550 	 * the time xc_common() is next invoked with the sync flag set
551 	 * resulting in a deadlock.
552 	 */
553 	for (cix = 0; cix <= last_cpu; cix++) {
554 		if (CPU_IN_SET(set, cix)) {
555 			cpup = cpu[cix];
556 			if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) {
557 				while (cpup->cpu_m.xc_ack[pri] == 0)
558 					SMT_PAUSE();
559 				XC_TRACE(TT_XC_ACK, pri, cix);
560 				cpup->cpu_m.xc_ack[pri] = 0;
561 			}
562 		}
563 	}
564 }
565 
566 /*
567  * xc_trycall: attempt to call specified function on all processors
568  * remotes may wait for a long time
569  * we continue immediately
570  */
571 void
572 xc_trycall(
573 	xc_arg_t arg1,
574 	xc_arg_t arg2,
575 	xc_arg_t arg3,
576 	cpuset_t set,
577 	xc_func_t func)
578 {
579 	int		save_kernel_preemption;
580 	extern int	IGNORE_KERNEL_PREEMPTION;
581 
582 	/*
583 	 * If we can grab the mutex, we'll do the cross-call.  If not -- if
584 	 * someone else is already doing a cross-call -- we won't.
585 	 */
586 
587 	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
588 	IGNORE_KERNEL_PREEMPTION = 1;
589 	if (mutex_tryenter(&xc_mbox_lock[X_CALL_HIPRI])) {
590 		xc_common(func, arg1, arg2, arg3, X_CALL_HIPRI, set, -1);
591 		mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
592 	}
593 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
594 }
595 
596 /*
597  * Used by the debugger to cross-call the other CPUs, thus causing them to
598  * enter the debugger.  We can't hold locks, so we spin on the cross-call
599  * lock until we get it.  When we get it, we send the cross-call, and assume
600  * that we successfully stopped the other CPUs.
601  */
602 void
603 kdi_xc_others(int this_cpu, void (*func)(void))
604 {
605 	extern int	IGNORE_KERNEL_PREEMPTION;
606 	int save_kernel_preemption;
607 	mutex_impl_t *lp;
608 	cpuset_t set;
609 	int x;
610 
611 	if (!xc_initialized)
612 		return;
613 
614 	CPUSET_ALL_BUT(set, this_cpu);
615 
616 	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
617 	IGNORE_KERNEL_PREEMPTION = 1;
618 
619 	lp = (mutex_impl_t *)&xc_mbox_lock[X_CALL_HIPRI];
620 	for (x = 0; x < 0x400000; x++) {
621 		if (lock_spin_try(&lp->m_spin.m_spinlock)) {
622 			xc_common((xc_func_t)func, 0, 0, 0, X_CALL_HIPRI,
623 			    set, -1);
624 			lp->m_spin.m_spinlock = 0; /* XXX */
625 			break;
626 		}
627 		SMT_PAUSE();
628 	}
629 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
630 }
631