xref: /illumos-gate/usr/src/uts/i86pc/os/x_call.c (revision a8ea0c9d)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
541791439Sandrei  * Common Development and Distribution License (the "License").
641791439Sandrei  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22f34a7178SJoe Bonasera  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
25a3114836SGerry Liu /*
26a3114836SGerry Liu  * Copyright (c) 2010, Intel Corporation.
27a3114836SGerry Liu  * All rights reserved.
28*a8ea0c9dSJohn Levon  * Copyright 2018 Joyent, Inc.
29a3114836SGerry Liu  */
307c478bd9Sstevel@tonic-gate 
317c478bd9Sstevel@tonic-gate #include <sys/types.h>
327c478bd9Sstevel@tonic-gate #include <sys/param.h>
337c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
347c478bd9Sstevel@tonic-gate #include <sys/thread.h>
357c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
367c478bd9Sstevel@tonic-gate #include <sys/x_call.h>
37f34a7178SJoe Bonasera #include <sys/xc_levels.h>
387c478bd9Sstevel@tonic-gate #include <sys/cpu.h>
397c478bd9Sstevel@tonic-gate #include <sys/psw.h>
407c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
417c478bd9Sstevel@tonic-gate #include <sys/debug.h>
427c478bd9Sstevel@tonic-gate #include <sys/systm.h>
43ae115bc7Smrj #include <sys/archsystm.h>
447c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
457c478bd9Sstevel@tonic-gate #include <sys/mutex_impl.h>
46f34a7178SJoe Bonasera #include <sys/stack.h>
47f34a7178SJoe Bonasera #include <sys/promif.h>
48f34a7178SJoe Bonasera #include <sys/x86_archext.h>
49ae115bc7Smrj 
507c478bd9Sstevel@tonic-gate /*
51f34a7178SJoe Bonasera  * Implementation for cross-processor calls via interprocessor interrupts
52f34a7178SJoe Bonasera  *
53f34a7178SJoe Bonasera  * This implementation uses a message passing architecture to allow multiple
54f34a7178SJoe Bonasera  * concurrent cross calls to be in flight at any given time. We use the cmpxchg
5575d94465SJosef 'Jeff' Sipek  * instruction, aka atomic_cas_ptr(), to implement simple efficient work
5675d94465SJosef 'Jeff' Sipek  * queues for message passing between CPUs with almost no need for regular
5775d94465SJosef 'Jeff' Sipek  * locking.  See xc_extract() and xc_insert() below.
58f34a7178SJoe Bonasera  *
59f34a7178SJoe Bonasera  * The general idea is that initiating a cross call means putting a message
60f34a7178SJoe Bonasera  * on a target(s) CPU's work queue. Any synchronization is handled by passing
61f34a7178SJoe Bonasera  * the message back and forth between initiator and target(s).
62f34a7178SJoe Bonasera  *
63f34a7178SJoe Bonasera  * Every CPU has xc_work_cnt, which indicates it has messages to process.
64f34a7178SJoe Bonasera  * This value is incremented as message traffic is initiated and decremented
65f34a7178SJoe Bonasera  * with every message that finishes all processing.
66f34a7178SJoe Bonasera  *
67f34a7178SJoe Bonasera  * The code needs no mfence or other membar_*() calls. The uses of
6875d94465SJosef 'Jeff' Sipek  * atomic_cas_ptr(), atomic_cas_32() and atomic_dec_32() for the message
6975d94465SJosef 'Jeff' Sipek  * passing are implemented with LOCK prefix instructions which are
7075d94465SJosef 'Jeff' Sipek  * equivalent to mfence.
71f34a7178SJoe Bonasera  *
72f34a7178SJoe Bonasera  * One interesting aspect of this implmentation is that it allows 2 or more
73f34a7178SJoe Bonasera  * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
74f34a7178SJoe Bonasera  * The cross call processing by the CPUs will happen in any order with only
75f34a7178SJoe Bonasera  * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
76f34a7178SJoe Bonasera  * from cross calls before all slaves have invoked the function.
77f34a7178SJoe Bonasera  *
78f34a7178SJoe Bonasera  * The reason for this asynchronous approach is to allow for fast global
79f34a7178SJoe Bonasera  * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
80f34a7178SJoe Bonasera  * on a different Virtual Address at the same time. The old code required
81f34a7178SJoe Bonasera  * N squared IPIs. With this method, depending on timing, it could happen
82f34a7178SJoe Bonasera  * with just N IPIs.
83*a8ea0c9dSJohn Levon  *
84*a8ea0c9dSJohn Levon  * Here are the normal transitions for XC_MSG_* values in ->xc_command. A
85*a8ea0c9dSJohn Levon  * transition of "->" happens in the slave cpu and "=>" happens in the master
86*a8ea0c9dSJohn Levon  * cpu as the messages are passed back and forth.
877c478bd9Sstevel@tonic-gate  *
88f34a7178SJoe Bonasera  * FREE => ASYNC ->                       DONE => FREE
89f34a7178SJoe Bonasera  * FREE => CALL ->                        DONE => FREE
90f34a7178SJoe Bonasera  * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
917c478bd9Sstevel@tonic-gate  *
92*a8ea0c9dSJohn Levon  * The interesting one above is ASYNC. You might ask, why not go directly
93*a8ea0c9dSJohn Levon  * to FREE, instead of DONE? If it did that, it might be possible to exhaust
94f34a7178SJoe Bonasera  * the master's xc_free list if a master can generate ASYNC messages faster
95f34a7178SJoe Bonasera  * then the slave can process them. That could be handled with more complicated
96f34a7178SJoe Bonasera  * handling. However since nothing important uses ASYNC, I've not bothered.
977c478bd9Sstevel@tonic-gate  */
98*a8ea0c9dSJohn Levon 
99*a8ea0c9dSJohn Levon /*
100*a8ea0c9dSJohn Levon  * The default is to not enable collecting counts of IPI information, since
101*a8ea0c9dSJohn Levon  * the updating of shared cachelines could cause excess bus traffic.
102*a8ea0c9dSJohn Levon  */
103*a8ea0c9dSJohn Levon uint_t xc_collect_enable = 0;
104*a8ea0c9dSJohn Levon uint64_t xc_total_cnt = 0;	/* total #IPIs sent for cross calls */
105*a8ea0c9dSJohn Levon uint64_t xc_multi_cnt = 0;	/* # times we piggy backed on another IPI */
1067c478bd9Sstevel@tonic-gate 
107f34a7178SJoe Bonasera /*
108f34a7178SJoe Bonasera  * We allow for one high priority message at a time to happen in the system.
109f34a7178SJoe Bonasera  * This is used for panic, kmdb, etc., so no locking is done.
110f34a7178SJoe Bonasera  */
111c03aa626SJoe Bonasera static volatile cpuset_t xc_priority_set_store;
112c03aa626SJoe Bonasera static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store);
113f34a7178SJoe Bonasera static xc_data_t xc_priority_data;
1147c478bd9Sstevel@tonic-gate 
1157c478bd9Sstevel@tonic-gate /*
116f34a7178SJoe Bonasera  * Decrement a CPU's work count
1177c478bd9Sstevel@tonic-gate  */
1187c478bd9Sstevel@tonic-gate static void
xc_decrement(struct machcpu * mcpu)119f34a7178SJoe Bonasera xc_decrement(struct machcpu *mcpu)
1207c478bd9Sstevel@tonic-gate {
121f34a7178SJoe Bonasera 	atomic_dec_32(&mcpu->xc_work_cnt);
1227c478bd9Sstevel@tonic-gate }
1237c478bd9Sstevel@tonic-gate 
1247c478bd9Sstevel@tonic-gate /*
125f34a7178SJoe Bonasera  * Increment a CPU's work count and return the old value
1267c478bd9Sstevel@tonic-gate  */
127f34a7178SJoe Bonasera static int
xc_increment(struct machcpu * mcpu)128f34a7178SJoe Bonasera xc_increment(struct machcpu *mcpu)
1297c478bd9Sstevel@tonic-gate {
130f34a7178SJoe Bonasera 	int old;
131f34a7178SJoe Bonasera 	do {
132f34a7178SJoe Bonasera 		old = mcpu->xc_work_cnt;
13375d94465SJosef 'Jeff' Sipek 	} while (atomic_cas_32(&mcpu->xc_work_cnt, old, old + 1) != old);
134f34a7178SJoe Bonasera 	return (old);
1357c478bd9Sstevel@tonic-gate }
1367c478bd9Sstevel@tonic-gate 
1377c478bd9Sstevel@tonic-gate /*
138f34a7178SJoe Bonasera  * Put a message into a queue. The insertion is atomic no matter
139f34a7178SJoe Bonasera  * how many different inserts/extracts to the same queue happen.
1407c478bd9Sstevel@tonic-gate  */
141f34a7178SJoe Bonasera static void
xc_insert(void * queue,xc_msg_t * msg)142f34a7178SJoe Bonasera xc_insert(void *queue, xc_msg_t *msg)
1437c478bd9Sstevel@tonic-gate {
144f34a7178SJoe Bonasera 	xc_msg_t *old_head;
145bf73eaa5SJoe Bonasera 
146bf73eaa5SJoe Bonasera 	/*
147bf73eaa5SJoe Bonasera 	 * FREE messages should only ever be getting inserted into
148bf73eaa5SJoe Bonasera 	 * the xc_master CPUs xc_free queue.
149bf73eaa5SJoe Bonasera 	 */
150bf73eaa5SJoe Bonasera 	ASSERT(msg->xc_command != XC_MSG_FREE ||
151bf73eaa5SJoe Bonasera 	    cpu[msg->xc_master] == NULL || /* possible only during init */
152bf73eaa5SJoe Bonasera 	    queue == &cpu[msg->xc_master]->cpu_m.xc_free);
153bf73eaa5SJoe Bonasera 
154f34a7178SJoe Bonasera 	do {
155f34a7178SJoe Bonasera 		old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
156f34a7178SJoe Bonasera 		msg->xc_next = old_head;
15775d94465SJosef 'Jeff' Sipek 	} while (atomic_cas_ptr(queue, old_head, msg) != old_head);
1587c478bd9Sstevel@tonic-gate }
1597c478bd9Sstevel@tonic-gate 
1607c478bd9Sstevel@tonic-gate /*
161f34a7178SJoe Bonasera  * Extract a message from a queue. The extraction is atomic only
162f34a7178SJoe Bonasera  * when just one thread does extractions from the queue.
163f34a7178SJoe Bonasera  * If the queue is empty, NULL is returned.
1647c478bd9Sstevel@tonic-gate  */
165f34a7178SJoe Bonasera static xc_msg_t *
xc_extract(xc_msg_t ** queue)166f34a7178SJoe Bonasera xc_extract(xc_msg_t **queue)
167f34a7178SJoe Bonasera {
168f34a7178SJoe Bonasera 	xc_msg_t *old_head;
169f34a7178SJoe Bonasera 
170f34a7178SJoe Bonasera 	do {
171f34a7178SJoe Bonasera 		old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
172f34a7178SJoe Bonasera 		if (old_head == NULL)
173f34a7178SJoe Bonasera 			return (old_head);
17475d94465SJosef 'Jeff' Sipek 	} while (atomic_cas_ptr(queue, old_head, old_head->xc_next) !=
17575d94465SJosef 'Jeff' Sipek 	    old_head);
176f34a7178SJoe Bonasera 	old_head->xc_next = NULL;
177f34a7178SJoe Bonasera 	return (old_head);
178f34a7178SJoe Bonasera }
179f34a7178SJoe Bonasera 
180*a8ea0c9dSJohn Levon /*
181*a8ea0c9dSJohn Levon  * Extract the next message from the CPU's queue, and place the message in
182*a8ea0c9dSJohn Levon  * .xc_curmsg.  The latter is solely to make debugging (and ::xcall) more
183*a8ea0c9dSJohn Levon  * useful.
184*a8ea0c9dSJohn Levon  */
185*a8ea0c9dSJohn Levon static xc_msg_t *
xc_get(void)186*a8ea0c9dSJohn Levon xc_get(void)
187*a8ea0c9dSJohn Levon {
188*a8ea0c9dSJohn Levon 	struct machcpu *mcpup = &CPU->cpu_m;
189*a8ea0c9dSJohn Levon 	xc_msg_t *msg = xc_extract(&mcpup->xc_msgbox);
190*a8ea0c9dSJohn Levon 	mcpup->xc_curmsg = msg;
191*a8ea0c9dSJohn Levon 	return (msg);
192*a8ea0c9dSJohn Levon }
193*a8ea0c9dSJohn Levon 
1947c478bd9Sstevel@tonic-gate /*
195f34a7178SJoe Bonasera  * Initialize the machcpu fields used for cross calls
1967c478bd9Sstevel@tonic-gate  */
197f34a7178SJoe Bonasera static uint_t xc_initialized = 0;
198a3114836SGerry Liu 
1997c478bd9Sstevel@tonic-gate void
xc_init_cpu(struct cpu * cpup)200f34a7178SJoe Bonasera xc_init_cpu(struct cpu *cpup)
2017c478bd9Sstevel@tonic-gate {
202f34a7178SJoe Bonasera 	xc_msg_t *msg;
203f34a7178SJoe Bonasera 	int c;
2047c478bd9Sstevel@tonic-gate 
2057c478bd9Sstevel@tonic-gate 	/*
206a3114836SGerry Liu 	 * Allocate message buffers for the new CPU.
2077c478bd9Sstevel@tonic-gate 	 */
208a3114836SGerry Liu 	for (c = 0; c < max_ncpus; ++c) {
209a3114836SGerry Liu 		if (plat_dr_support_cpu()) {
210a3114836SGerry Liu 			/*
211a3114836SGerry Liu 			 * Allocate a message buffer for every CPU possible
212a3114836SGerry Liu 			 * in system, including our own, and add them to our xc
213a3114836SGerry Liu 			 * message queue.
214a3114836SGerry Liu 			 */
215a3114836SGerry Liu 			msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
216a3114836SGerry Liu 			msg->xc_command = XC_MSG_FREE;
217a3114836SGerry Liu 			msg->xc_master = cpup->cpu_id;
218a3114836SGerry Liu 			xc_insert(&cpup->cpu_m.xc_free, msg);
219a3114836SGerry Liu 		} else if (cpu[c] != NULL && cpu[c] != cpup) {
220a3114836SGerry Liu 			/*
221a3114836SGerry Liu 			 * Add a new message buffer to each existing CPU's free
222a3114836SGerry Liu 			 * list, as well as one for my list for each of them.
223a3114836SGerry Liu 			 * Note: cpu0 is statically inserted into cpu[] array,
224a3114836SGerry Liu 			 * so need to check cpu[c] isn't cpup itself to avoid
225a3114836SGerry Liu 			 * allocating extra message buffers for cpu0.
226a3114836SGerry Liu 			 */
227a3114836SGerry Liu 			msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
228a3114836SGerry Liu 			msg->xc_command = XC_MSG_FREE;
229a3114836SGerry Liu 			msg->xc_master = c;
230a3114836SGerry Liu 			xc_insert(&cpu[c]->cpu_m.xc_free, msg);
231a3114836SGerry Liu 
232a3114836SGerry Liu 			msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
233a3114836SGerry Liu 			msg->xc_command = XC_MSG_FREE;
234a3114836SGerry Liu 			msg->xc_master = cpup->cpu_id;
235a3114836SGerry Liu 			xc_insert(&cpup->cpu_m.xc_free, msg);
236a3114836SGerry Liu 		}
237a3114836SGerry Liu 	}
2387c478bd9Sstevel@tonic-gate 
239a3114836SGerry Liu 	if (!plat_dr_support_cpu()) {
240a3114836SGerry Liu 		/*
241a3114836SGerry Liu 		 * Add one for self messages if CPU hotplug is disabled.
242a3114836SGerry Liu 		 */
243f34a7178SJoe Bonasera 		msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
244f34a7178SJoe Bonasera 		msg->xc_command = XC_MSG_FREE;
245bf73eaa5SJoe Bonasera 		msg->xc_master = cpup->cpu_id;
246f34a7178SJoe Bonasera 		xc_insert(&cpup->cpu_m.xc_free, msg);
247f34a7178SJoe Bonasera 	}
2487c478bd9Sstevel@tonic-gate 
249a3114836SGerry Liu 	if (!xc_initialized)
250a3114836SGerry Liu 		xc_initialized = 1;
251a3114836SGerry Liu }
252a3114836SGerry Liu 
253a3114836SGerry Liu void
xc_fini_cpu(struct cpu * cpup)254a3114836SGerry Liu xc_fini_cpu(struct cpu *cpup)
255a3114836SGerry Liu {
256a3114836SGerry Liu 	xc_msg_t *msg;
257a3114836SGerry Liu 
258a3114836SGerry Liu 	ASSERT((cpup->cpu_flags & CPU_READY) == 0);
259a3114836SGerry Liu 	ASSERT(cpup->cpu_m.xc_msgbox == NULL);
260a3114836SGerry Liu 	ASSERT(cpup->cpu_m.xc_work_cnt == 0);
261a3114836SGerry Liu 
262a3114836SGerry Liu 	while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) {
263a3114836SGerry Liu 		kmem_free(msg, sizeof (*msg));
264a3114836SGerry Liu 	}
265a3114836SGerry Liu }
266a3114836SGerry Liu 
267a3114836SGerry Liu #define	XC_FLUSH_MAX_WAITS		1000
268a3114836SGerry Liu 
269a3114836SGerry Liu /* Flush inflight message buffers. */
270a3114836SGerry Liu int
xc_flush_cpu(struct cpu * cpup)271a3114836SGerry Liu xc_flush_cpu(struct cpu *cpup)
272a3114836SGerry Liu {
273a3114836SGerry Liu 	int i;
274a3114836SGerry Liu 
275a3114836SGerry Liu 	ASSERT((cpup->cpu_flags & CPU_READY) == 0);
276a3114836SGerry Liu 
2777c478bd9Sstevel@tonic-gate 	/*
278a3114836SGerry Liu 	 * Pause all working CPUs, which ensures that there's no CPU in
279a3114836SGerry Liu 	 * function xc_common().
280a3114836SGerry Liu 	 * This is used to work around a race condition window in xc_common()
281a3114836SGerry Liu 	 * between checking CPU_READY flag and increasing working item count.
282a563a037Sbholler 	 */
2830ed5c46eSJosef 'Jeff' Sipek 	pause_cpus(cpup, NULL);
284a3114836SGerry Liu 	start_cpus();
285a563a037Sbholler 
286a3114836SGerry Liu 	for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
287a3114836SGerry Liu 		if (cpup->cpu_m.xc_work_cnt == 0) {
288a3114836SGerry Liu 			break;
289a3114836SGerry Liu 		}
290a3114836SGerry Liu 		DELAY(1);
291a3114836SGerry Liu 	}
292a3114836SGerry Liu 	for (; i < XC_FLUSH_MAX_WAITS; i++) {
293a3114836SGerry Liu 		if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {
294a3114836SGerry Liu 			break;
295a3114836SGerry Liu 		}
296a3114836SGerry Liu 		DELAY(1);
297a3114836SGerry Liu 	}
298a3114836SGerry Liu 
299a3114836SGerry Liu 	return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
300f34a7178SJoe Bonasera }
3017c478bd9Sstevel@tonic-gate 
302f34a7178SJoe Bonasera /*
303f34a7178SJoe Bonasera  * X-call message processing routine. Note that this is used by both
304f34a7178SJoe Bonasera  * senders and recipients of messages.
305f34a7178SJoe Bonasera  *
306f34a7178SJoe Bonasera  * We're protected against changing CPUs by either being in a high-priority
307f34a7178SJoe Bonasera  * interrupt, having preemption disabled or by having a raised SPL.
308f34a7178SJoe Bonasera  */
309f34a7178SJoe Bonasera /*ARGSUSED*/
310f34a7178SJoe Bonasera uint_t
xc_serv(caddr_t arg1,caddr_t arg2)311f34a7178SJoe Bonasera xc_serv(caddr_t arg1, caddr_t arg2)
312f34a7178SJoe Bonasera {
313f34a7178SJoe Bonasera 	struct machcpu *mcpup = &(CPU->cpu_m);
314f34a7178SJoe Bonasera 	xc_msg_t *msg;
315f34a7178SJoe Bonasera 	xc_data_t *data;
316f34a7178SJoe Bonasera 	xc_msg_t *xc_waiters = NULL;
317f34a7178SJoe Bonasera 	uint32_t num_waiting = 0;
318f34a7178SJoe Bonasera 	xc_func_t func;
319f34a7178SJoe Bonasera 	xc_arg_t a1;
320f34a7178SJoe Bonasera 	xc_arg_t a2;
321f34a7178SJoe Bonasera 	xc_arg_t a3;
322f34a7178SJoe Bonasera 	uint_t rc = DDI_INTR_UNCLAIMED;
323f34a7178SJoe Bonasera 
324f34a7178SJoe Bonasera 	while (mcpup->xc_work_cnt != 0) {
325f34a7178SJoe Bonasera 		rc = DDI_INTR_CLAIMED;
3267c478bd9Sstevel@tonic-gate 
327f34a7178SJoe Bonasera 		/*
328f34a7178SJoe Bonasera 		 * We may have to wait for a message to arrive.
329f34a7178SJoe Bonasera 		 */
330*a8ea0c9dSJohn Levon 		for (msg = NULL; msg == NULL; msg = xc_get()) {
331bf73eaa5SJoe Bonasera 
3327c478bd9Sstevel@tonic-gate 			/*
333c03aa626SJoe Bonasera 			 * Alway check for and handle a priority message.
3347c478bd9Sstevel@tonic-gate 			 */
335c03aa626SJoe Bonasera 			if (BT_TEST(xc_priority_set, CPU->cpu_id)) {
336f34a7178SJoe Bonasera 				func = xc_priority_data.xc_func;
337f34a7178SJoe Bonasera 				a1 = xc_priority_data.xc_a1;
338f34a7178SJoe Bonasera 				a2 = xc_priority_data.xc_a2;
339f34a7178SJoe Bonasera 				a3 = xc_priority_data.xc_a3;
340*a8ea0c9dSJohn Levon 				BT_ATOMIC_CLEAR(xc_priority_set, CPU->cpu_id);
341f34a7178SJoe Bonasera 				xc_decrement(mcpup);
342f34a7178SJoe Bonasera 				func(a1, a2, a3);
343f34a7178SJoe Bonasera 				if (mcpup->xc_work_cnt == 0)
344f34a7178SJoe Bonasera 					return (rc);
345f34a7178SJoe Bonasera 			}
3467c478bd9Sstevel@tonic-gate 
347f34a7178SJoe Bonasera 			/*
348f34a7178SJoe Bonasera 			 * wait for a message to arrive
349f34a7178SJoe Bonasera 			 */
350bf73eaa5SJoe Bonasera 			SMT_PAUSE();
3517c478bd9Sstevel@tonic-gate 		}
352f34a7178SJoe Bonasera 
353f34a7178SJoe Bonasera 
354f34a7178SJoe Bonasera 		/*
355f34a7178SJoe Bonasera 		 * process the message
356f34a7178SJoe Bonasera 		 */
357f34a7178SJoe Bonasera 		switch (msg->xc_command) {
358f34a7178SJoe Bonasera 
359f34a7178SJoe Bonasera 		/*
360f34a7178SJoe Bonasera 		 * ASYNC gives back the message immediately, then we do the
361f34a7178SJoe Bonasera 		 * function and return with no more waiting.
362f34a7178SJoe Bonasera 		 */
363f34a7178SJoe Bonasera 		case XC_MSG_ASYNC:
364f34a7178SJoe Bonasera 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
365f34a7178SJoe Bonasera 			func = data->xc_func;
366f34a7178SJoe Bonasera 			a1 = data->xc_a1;
367f34a7178SJoe Bonasera 			a2 = data->xc_a2;
368f34a7178SJoe Bonasera 			a3 = data->xc_a3;
369f34a7178SJoe Bonasera 			msg->xc_command = XC_MSG_DONE;
370f34a7178SJoe Bonasera 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
371f34a7178SJoe Bonasera 			if (func != NULL)
372f34a7178SJoe Bonasera 				(void) (*func)(a1, a2, a3);
373f34a7178SJoe Bonasera 			xc_decrement(mcpup);
3747c478bd9Sstevel@tonic-gate 			break;
3757c478bd9Sstevel@tonic-gate 
376f34a7178SJoe Bonasera 		/*
377f34a7178SJoe Bonasera 		 * SYNC messages do the call, then send it back to the master
378f34a7178SJoe Bonasera 		 * in WAITING mode
379f34a7178SJoe Bonasera 		 */
380f34a7178SJoe Bonasera 		case XC_MSG_SYNC:
381f34a7178SJoe Bonasera 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
382f34a7178SJoe Bonasera 			if (data->xc_func != NULL)
383f34a7178SJoe Bonasera 				(void) (*data->xc_func)(data->xc_a1,
384f34a7178SJoe Bonasera 				    data->xc_a2, data->xc_a3);
385f34a7178SJoe Bonasera 			msg->xc_command = XC_MSG_WAITING;
386f34a7178SJoe Bonasera 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
387f34a7178SJoe Bonasera 			break;
3887c478bd9Sstevel@tonic-gate 
389f34a7178SJoe Bonasera 		/*
390f34a7178SJoe Bonasera 		 * WAITING messsages are collected by the master until all
391f34a7178SJoe Bonasera 		 * have arrived. Once all arrive, we release them back to
392f34a7178SJoe Bonasera 		 * the slaves
393f34a7178SJoe Bonasera 		 */
394f34a7178SJoe Bonasera 		case XC_MSG_WAITING:
395f34a7178SJoe Bonasera 			xc_insert(&xc_waiters, msg);
396f34a7178SJoe Bonasera 			if (++num_waiting < mcpup->xc_wait_cnt)
397f34a7178SJoe Bonasera 				break;
398f34a7178SJoe Bonasera 			while ((msg = xc_extract(&xc_waiters)) != NULL) {
399f34a7178SJoe Bonasera 				msg->xc_command = XC_MSG_RELEASED;
400f34a7178SJoe Bonasera 				xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox,
401f34a7178SJoe Bonasera 				    msg);
402f34a7178SJoe Bonasera 				--num_waiting;
403f34a7178SJoe Bonasera 			}
404f34a7178SJoe Bonasera 			if (num_waiting != 0)
405f34a7178SJoe Bonasera 				panic("wrong number waiting");
406f34a7178SJoe Bonasera 			mcpup->xc_wait_cnt = 0;
407f34a7178SJoe Bonasera 			break;
4087c478bd9Sstevel@tonic-gate 
409f34a7178SJoe Bonasera 		/*
410f34a7178SJoe Bonasera 		 * CALL messages do the function and then, like RELEASE,
411f34a7178SJoe Bonasera 		 * send the message is back to master as DONE.
412f34a7178SJoe Bonasera 		 */
413f34a7178SJoe Bonasera 		case XC_MSG_CALL:
414f34a7178SJoe Bonasera 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
415f34a7178SJoe Bonasera 			if (data->xc_func != NULL)
416f34a7178SJoe Bonasera 				(void) (*data->xc_func)(data->xc_a1,
417f34a7178SJoe Bonasera 				    data->xc_a2, data->xc_a3);
418f34a7178SJoe Bonasera 			/*FALLTHROUGH*/
419f34a7178SJoe Bonasera 		case XC_MSG_RELEASED:
420f34a7178SJoe Bonasera 			msg->xc_command = XC_MSG_DONE;
421f34a7178SJoe Bonasera 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
422f34a7178SJoe Bonasera 			xc_decrement(mcpup);
423f34a7178SJoe Bonasera 			break;
4247c478bd9Sstevel@tonic-gate 
425f34a7178SJoe Bonasera 		/*
426f34a7178SJoe Bonasera 		 * DONE means a slave has completely finished up.
427f34a7178SJoe Bonasera 		 * Once we collect all the DONE messages, we'll exit
428f34a7178SJoe Bonasera 		 * processing too.
429f34a7178SJoe Bonasera 		 */
430f34a7178SJoe Bonasera 		case XC_MSG_DONE:
431f34a7178SJoe Bonasera 			msg->xc_command = XC_MSG_FREE;
432f34a7178SJoe Bonasera 			xc_insert(&mcpup->xc_free, msg);
433f34a7178SJoe Bonasera 			xc_decrement(mcpup);
4347c478bd9Sstevel@tonic-gate 			break;
4357c478bd9Sstevel@tonic-gate 
436f34a7178SJoe Bonasera 		case XC_MSG_FREE:
437bf73eaa5SJoe Bonasera 			panic("free message 0x%p in msgbox", (void *)msg);
438f34a7178SJoe Bonasera 			break;
439f34a7178SJoe Bonasera 
440f34a7178SJoe Bonasera 		default:
441bf73eaa5SJoe Bonasera 			panic("bad message 0x%p in msgbox", (void *)msg);
442f34a7178SJoe Bonasera 			break;
443f34a7178SJoe Bonasera 		}
444*a8ea0c9dSJohn Levon 
445*a8ea0c9dSJohn Levon 		CPU->cpu_m.xc_curmsg = NULL;
446f34a7178SJoe Bonasera 	}
447f34a7178SJoe Bonasera 	return (rc);
4487c478bd9Sstevel@tonic-gate }
4497c478bd9Sstevel@tonic-gate 
4507c478bd9Sstevel@tonic-gate /*
451f34a7178SJoe Bonasera  * Initiate cross call processing.
4527c478bd9Sstevel@tonic-gate  */
4537c478bd9Sstevel@tonic-gate static void
xc_common(xc_func_t func,xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set,uint_t command)4547c478bd9Sstevel@tonic-gate xc_common(
4557c478bd9Sstevel@tonic-gate 	xc_func_t func,
4567c478bd9Sstevel@tonic-gate 	xc_arg_t arg1,
4577c478bd9Sstevel@tonic-gate 	xc_arg_t arg2,
4587c478bd9Sstevel@tonic-gate 	xc_arg_t arg3,
459f34a7178SJoe Bonasera 	ulong_t *set,
460f34a7178SJoe Bonasera 	uint_t command)
4617c478bd9Sstevel@tonic-gate {
462f34a7178SJoe Bonasera 	int c;
4637c478bd9Sstevel@tonic-gate 	struct cpu *cpup;
464f34a7178SJoe Bonasera 	xc_msg_t *msg;
465f34a7178SJoe Bonasera 	xc_data_t *data;
466f34a7178SJoe Bonasera 	int cnt;
467f34a7178SJoe Bonasera 	int save_spl;
468f34a7178SJoe Bonasera 
469f34a7178SJoe Bonasera 	if (!xc_initialized) {
470f34a7178SJoe Bonasera 		if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) &&
471f34a7178SJoe Bonasera 		    func != NULL)
472f34a7178SJoe Bonasera 			(void) (*func)(arg1, arg2, arg3);
473f34a7178SJoe Bonasera 		return;
474f34a7178SJoe Bonasera 	}
4757c478bd9Sstevel@tonic-gate 
476f34a7178SJoe Bonasera 	save_spl = splr(ipltospl(XC_HI_PIL));
4777c478bd9Sstevel@tonic-gate 
4787c478bd9Sstevel@tonic-gate 	/*
479f34a7178SJoe Bonasera 	 * fill in cross call data
4807c478bd9Sstevel@tonic-gate 	 */
481f34a7178SJoe Bonasera 	data = &CPU->cpu_m.xc_data;
482f34a7178SJoe Bonasera 	data->xc_func = func;
483f34a7178SJoe Bonasera 	data->xc_a1 = arg1;
484f34a7178SJoe Bonasera 	data->xc_a2 = arg2;
485f34a7178SJoe Bonasera 	data->xc_a3 = arg3;
486a563a037Sbholler 
4877c478bd9Sstevel@tonic-gate 	/*
488f34a7178SJoe Bonasera 	 * Post messages to all CPUs involved that are CPU_READY
4897c478bd9Sstevel@tonic-gate 	 */
490f34a7178SJoe Bonasera 	CPU->cpu_m.xc_wait_cnt = 0;
491a3114836SGerry Liu 	for (c = 0; c < max_ncpus; ++c) {
492f34a7178SJoe Bonasera 		if (!BT_TEST(set, c))
493f34a7178SJoe Bonasera 			continue;
494f34a7178SJoe Bonasera 		cpup = cpu[c];
495f34a7178SJoe Bonasera 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
496a563a037Sbholler 			continue;
497a563a037Sbholler 
498f34a7178SJoe Bonasera 		/*
499f34a7178SJoe Bonasera 		 * Fill out a new message.
500f34a7178SJoe Bonasera 		 */
501f34a7178SJoe Bonasera 		msg = xc_extract(&CPU->cpu_m.xc_free);
502f34a7178SJoe Bonasera 		if (msg == NULL)
503f34a7178SJoe Bonasera 			panic("Ran out of free xc_msg_t's");
504f34a7178SJoe Bonasera 		msg->xc_command = command;
505bf73eaa5SJoe Bonasera 		if (msg->xc_master != CPU->cpu_id)
506bf73eaa5SJoe Bonasera 			panic("msg %p has wrong xc_master", (void *)msg);
507f34a7178SJoe Bonasera 		msg->xc_slave = c;
508a563a037Sbholler 
509f34a7178SJoe Bonasera 		/*
510f34a7178SJoe Bonasera 		 * Increment my work count for all messages that I'll
511f34a7178SJoe Bonasera 		 * transition from DONE to FREE.
512f34a7178SJoe Bonasera 		 * Also remember how many XC_MSG_WAITINGs to look for
513f34a7178SJoe Bonasera 		 */
514f34a7178SJoe Bonasera 		(void) xc_increment(&CPU->cpu_m);
515f34a7178SJoe Bonasera 		if (command == XC_MSG_SYNC)
516f34a7178SJoe Bonasera 			++CPU->cpu_m.xc_wait_cnt;
517f34a7178SJoe Bonasera 
518f34a7178SJoe Bonasera 		/*
519f34a7178SJoe Bonasera 		 * Increment the target CPU work count then insert the message
520f34a7178SJoe Bonasera 		 * in the target msgbox. If I post the first bit of work
521f34a7178SJoe Bonasera 		 * for the target to do, send an IPI to the target CPU.
522f34a7178SJoe Bonasera 		 */
523f34a7178SJoe Bonasera 		cnt = xc_increment(&cpup->cpu_m);
524f34a7178SJoe Bonasera 		xc_insert(&cpup->cpu_m.xc_msgbox, msg);
525f34a7178SJoe Bonasera 		if (cpup != CPU) {
526f34a7178SJoe Bonasera 			if (cnt == 0) {
527f34a7178SJoe Bonasera 				CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
528f34a7178SJoe Bonasera 				send_dirint(c, XC_HI_PIL);
529f34a7178SJoe Bonasera 				if (xc_collect_enable)
530f34a7178SJoe Bonasera 					++xc_total_cnt;
531f34a7178SJoe Bonasera 			} else if (xc_collect_enable) {
532f34a7178SJoe Bonasera 				++xc_multi_cnt;
533f34a7178SJoe Bonasera 			}
534f34a7178SJoe Bonasera 		}
5357c478bd9Sstevel@tonic-gate 	}
5367c478bd9Sstevel@tonic-gate 
5377c478bd9Sstevel@tonic-gate 	/*
538f34a7178SJoe Bonasera 	 * Now drop into the message handler until all work is done
5397c478bd9Sstevel@tonic-gate 	 */
540f34a7178SJoe Bonasera 	(void) xc_serv(NULL, NULL);
541f34a7178SJoe Bonasera 	splx(save_spl);
542f34a7178SJoe Bonasera }
5437c478bd9Sstevel@tonic-gate 
544f34a7178SJoe Bonasera /*
545f34a7178SJoe Bonasera  * Push out a priority cross call.
546f34a7178SJoe Bonasera  */
547f34a7178SJoe Bonasera static void
xc_priority_common(xc_func_t func,xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set)548f34a7178SJoe Bonasera xc_priority_common(
549f34a7178SJoe Bonasera 	xc_func_t func,
550f34a7178SJoe Bonasera 	xc_arg_t arg1,
551f34a7178SJoe Bonasera 	xc_arg_t arg2,
552f34a7178SJoe Bonasera 	xc_arg_t arg3,
553f34a7178SJoe Bonasera 	ulong_t *set)
554f34a7178SJoe Bonasera {
555f34a7178SJoe Bonasera 	int i;
556f34a7178SJoe Bonasera 	int c;
557f34a7178SJoe Bonasera 	struct cpu *cpup;
5587c478bd9Sstevel@tonic-gate 
5597c478bd9Sstevel@tonic-gate 	/*
560c03aa626SJoe Bonasera 	 * Wait briefly for any previous xc_priority to have finished.
5617c478bd9Sstevel@tonic-gate 	 */
562a3114836SGerry Liu 	for (c = 0; c < max_ncpus; ++c) {
563c03aa626SJoe Bonasera 		cpup = cpu[c];
564c03aa626SJoe Bonasera 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
565c03aa626SJoe Bonasera 			continue;
566c03aa626SJoe Bonasera 
567c03aa626SJoe Bonasera 		/*
568c03aa626SJoe Bonasera 		 * The value of 40000 here is from old kernel code. It
569c03aa626SJoe Bonasera 		 * really should be changed to some time based value, since
570c03aa626SJoe Bonasera 		 * under a hypervisor, there's no guarantee a remote CPU
571c03aa626SJoe Bonasera 		 * is even scheduled.
572c03aa626SJoe Bonasera 		 */
573c03aa626SJoe Bonasera 		for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i)
574c03aa626SJoe Bonasera 			SMT_PAUSE();
575c03aa626SJoe Bonasera 
576c03aa626SJoe Bonasera 		/*
577c03aa626SJoe Bonasera 		 * Some CPU did not respond to a previous priority request. It's
578c03aa626SJoe Bonasera 		 * probably deadlocked with interrupts blocked or some such
579c03aa626SJoe Bonasera 		 * problem. We'll just erase the previous request - which was
580c03aa626SJoe Bonasera 		 * most likely a kmdb_enter that has already expired - and plow
581c03aa626SJoe Bonasera 		 * ahead.
582c03aa626SJoe Bonasera 		 */
583c03aa626SJoe Bonasera 		if (BT_TEST(xc_priority_set, c)) {
584*a8ea0c9dSJohn Levon 			BT_ATOMIC_CLEAR(xc_priority_set, c);
585c03aa626SJoe Bonasera 			if (cpup->cpu_m.xc_work_cnt > 0)
586c03aa626SJoe Bonasera 				xc_decrement(&cpup->cpu_m);
587c03aa626SJoe Bonasera 		}
5887c478bd9Sstevel@tonic-gate 	}
5897c478bd9Sstevel@tonic-gate 
5907c478bd9Sstevel@tonic-gate 	/*
591f34a7178SJoe Bonasera 	 * fill in cross call data
5927c478bd9Sstevel@tonic-gate 	 */
593f34a7178SJoe Bonasera 	xc_priority_data.xc_func = func;
594f34a7178SJoe Bonasera 	xc_priority_data.xc_a1 = arg1;
595f34a7178SJoe Bonasera 	xc_priority_data.xc_a2 = arg2;
596f34a7178SJoe Bonasera 	xc_priority_data.xc_a3 = arg3;
5977c478bd9Sstevel@tonic-gate 
5987c478bd9Sstevel@tonic-gate 	/*
599f34a7178SJoe Bonasera 	 * Post messages to all CPUs involved that are CPU_READY
600f34a7178SJoe Bonasera 	 * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
6017c478bd9Sstevel@tonic-gate 	 */
602a3114836SGerry Liu 	for (c = 0; c < max_ncpus; ++c) {
603f34a7178SJoe Bonasera 		if (!BT_TEST(set, c))
604f34a7178SJoe Bonasera 			continue;
605f34a7178SJoe Bonasera 		cpup = cpu[c];
606f34a7178SJoe Bonasera 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
607f34a7178SJoe Bonasera 		    cpup == CPU)
608f34a7178SJoe Bonasera 			continue;
609f34a7178SJoe Bonasera 		(void) xc_increment(&cpup->cpu_m);
610*a8ea0c9dSJohn Levon 		BT_ATOMIC_SET(xc_priority_set, c);
611f34a7178SJoe Bonasera 		send_dirint(c, XC_HI_PIL);
612f34a7178SJoe Bonasera 		for (i = 0; i < 10; ++i) {
61375d94465SJosef 'Jeff' Sipek 			(void) atomic_cas_ptr(&cpup->cpu_m.xc_msgbox,
614f34a7178SJoe Bonasera 			    cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
6157c478bd9Sstevel@tonic-gate 		}
6167c478bd9Sstevel@tonic-gate 	}
6177c478bd9Sstevel@tonic-gate }
6187c478bd9Sstevel@tonic-gate 
6197c478bd9Sstevel@tonic-gate /*
620f34a7178SJoe Bonasera  * Do cross call to all other CPUs with absolutely no waiting or handshaking.
621f34a7178SJoe Bonasera  * This should only be used for extraordinary operations, like panic(), which
622f34a7178SJoe Bonasera  * need to work, in some fashion, in a not completely functional system.
623f34a7178SJoe Bonasera  * All other uses that want minimal waiting should use xc_call_nowait().
6247c478bd9Sstevel@tonic-gate  */
6257c478bd9Sstevel@tonic-gate void
xc_priority(xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set,xc_func_t func)626f34a7178SJoe Bonasera xc_priority(
6277c478bd9Sstevel@tonic-gate 	xc_arg_t arg1,
6287c478bd9Sstevel@tonic-gate 	xc_arg_t arg2,
6297c478bd9Sstevel@tonic-gate 	xc_arg_t arg3,
630f34a7178SJoe Bonasera 	ulong_t *set,
6317c478bd9Sstevel@tonic-gate 	xc_func_t func)
6327c478bd9Sstevel@tonic-gate {
633f34a7178SJoe Bonasera 	extern int IGNORE_KERNEL_PREEMPTION;
634f34a7178SJoe Bonasera 	int save_spl = splr(ipltospl(XC_HI_PIL));
635f34a7178SJoe Bonasera 	int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
6367c478bd9Sstevel@tonic-gate 
6377c478bd9Sstevel@tonic-gate 	IGNORE_KERNEL_PREEMPTION = 1;
638f34a7178SJoe Bonasera 	xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set);
6397c478bd9Sstevel@tonic-gate 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
640f34a7178SJoe Bonasera 	splx(save_spl);
6417c478bd9Sstevel@tonic-gate }
6427c478bd9Sstevel@tonic-gate 
6437c478bd9Sstevel@tonic-gate /*
644f34a7178SJoe Bonasera  * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger.
6457c478bd9Sstevel@tonic-gate  */
6467c478bd9Sstevel@tonic-gate void
kdi_xc_others(int this_cpu,void (* func)(void))6477c478bd9Sstevel@tonic-gate kdi_xc_others(int this_cpu, void (*func)(void))
6487c478bd9Sstevel@tonic-gate {
649f34a7178SJoe Bonasera 	extern int IGNORE_KERNEL_PREEMPTION;
6507c478bd9Sstevel@tonic-gate 	int save_kernel_preemption;
6517c478bd9Sstevel@tonic-gate 	cpuset_t set;
6527c478bd9Sstevel@tonic-gate 
653ae115bc7Smrj 	if (!xc_initialized)
654ae115bc7Smrj 		return;
655