xref: /illumos-gate/usr/src/uts/i86pc/os/x_call.c (revision bf73eaa5)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
541791439Sandrei  * Common Development and Distribution License (the "License").
641791439Sandrei  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22f34a7178SJoe Bonasera  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/types.h>
277c478bd9Sstevel@tonic-gate #include <sys/param.h>
287c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
297c478bd9Sstevel@tonic-gate #include <sys/thread.h>
307c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
317c478bd9Sstevel@tonic-gate #include <sys/x_call.h>
32f34a7178SJoe Bonasera #include <sys/xc_levels.h>
337c478bd9Sstevel@tonic-gate #include <sys/cpu.h>
347c478bd9Sstevel@tonic-gate #include <sys/psw.h>
357c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
367c478bd9Sstevel@tonic-gate #include <sys/debug.h>
377c478bd9Sstevel@tonic-gate #include <sys/systm.h>
38ae115bc7Smrj #include <sys/archsystm.h>
397c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
407c478bd9Sstevel@tonic-gate #include <sys/mutex_impl.h>
41f34a7178SJoe Bonasera #include <sys/stack.h>
42f34a7178SJoe Bonasera #include <sys/promif.h>
43f34a7178SJoe Bonasera #include <sys/x86_archext.h>
44ae115bc7Smrj 
457c478bd9Sstevel@tonic-gate /*
46f34a7178SJoe Bonasera  * Implementation for cross-processor calls via interprocessor interrupts
47f34a7178SJoe Bonasera  *
48f34a7178SJoe Bonasera  * This implementation uses a message passing architecture to allow multiple
49f34a7178SJoe Bonasera  * concurrent cross calls to be in flight at any given time. We use the cmpxchg
50f34a7178SJoe Bonasera  * instruction, aka casptr(), to implement simple efficient work queues for
51f34a7178SJoe Bonasera  * message passing between CPUs with almost no need for regular locking.
52f34a7178SJoe Bonasera  * See xc_extract() and xc_insert() below.
53f34a7178SJoe Bonasera  *
54f34a7178SJoe Bonasera  * The general idea is that initiating a cross call means putting a message
55f34a7178SJoe Bonasera  * on a target(s) CPU's work queue. Any synchronization is handled by passing
56f34a7178SJoe Bonasera  * the message back and forth between initiator and target(s).
57f34a7178SJoe Bonasera  *
58f34a7178SJoe Bonasera  * Every CPU has xc_work_cnt, which indicates it has messages to process.
59f34a7178SJoe Bonasera  * This value is incremented as message traffic is initiated and decremented
60f34a7178SJoe Bonasera  * with every message that finishes all processing.
61f34a7178SJoe Bonasera  *
62f34a7178SJoe Bonasera  * The code needs no mfence or other membar_*() calls. The uses of
63f34a7178SJoe Bonasera  * casptr(), cas32() and atomic_dec_32() for the message passing are
64f34a7178SJoe Bonasera  * implemented with LOCK prefix instructions which are equivalent to mfence.
65f34a7178SJoe Bonasera  *
66f34a7178SJoe Bonasera  * One interesting aspect of this implmentation is that it allows 2 or more
67f34a7178SJoe Bonasera  * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
68f34a7178SJoe Bonasera  * The cross call processing by the CPUs will happen in any order with only
69f34a7178SJoe Bonasera  * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
70f34a7178SJoe Bonasera  * from cross calls before all slaves have invoked the function.
71f34a7178SJoe Bonasera  *
72f34a7178SJoe Bonasera  * The reason for this asynchronous approach is to allow for fast global
73f34a7178SJoe Bonasera  * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
74f34a7178SJoe Bonasera  * on a different Virtual Address at the same time. The old code required
75f34a7178SJoe Bonasera  * N squared IPIs. With this method, depending on timing, it could happen
76f34a7178SJoe Bonasera  * with just N IPIs.
777c478bd9Sstevel@tonic-gate  */
78ae115bc7Smrj 
79f34a7178SJoe Bonasera /*
80f34a7178SJoe Bonasera  * The default is to not enable collecting counts of IPI information, since
81f34a7178SJoe Bonasera  * the updating of shared cachelines could cause excess bus traffic.
82f34a7178SJoe Bonasera  */
83f34a7178SJoe Bonasera uint_t xc_collect_enable = 0;
84f34a7178SJoe Bonasera uint64_t xc_total_cnt = 0;	/* total #IPIs sent for cross calls */
85f34a7178SJoe Bonasera uint64_t xc_multi_cnt = 0;	/* # times we piggy backed on another IPI */
867c478bd9Sstevel@tonic-gate 
877c478bd9Sstevel@tonic-gate /*
88f34a7178SJoe Bonasera  * Values for message states. Here are the normal transitions. A transition
89f34a7178SJoe Bonasera  * of "->" happens in the slave cpu and "=>" happens in the master cpu as
90f34a7178SJoe Bonasera  * the messages are passed back and forth.
917c478bd9Sstevel@tonic-gate  *
92f34a7178SJoe Bonasera  * FREE => ASYNC ->                       DONE => FREE
93f34a7178SJoe Bonasera  * FREE => CALL ->                        DONE => FREE
94f34a7178SJoe Bonasera  * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
957c478bd9Sstevel@tonic-gate  *
96f34a7178SJoe Bonasera  * The interesing one above is ASYNC. You might ask, why not go directly
97f34a7178SJoe Bonasera  * to FREE, instead of DONE. If it did that, it might be possible to exhaust
98f34a7178SJoe Bonasera  * the master's xc_free list if a master can generate ASYNC messages faster
99f34a7178SJoe Bonasera  * then the slave can process them. That could be handled with more complicated
100f34a7178SJoe Bonasera  * handling. However since nothing important uses ASYNC, I've not bothered.
1017c478bd9Sstevel@tonic-gate  */
102f34a7178SJoe Bonasera #define	XC_MSG_FREE	(0)	/* msg in xc_free queue */
103f34a7178SJoe Bonasera #define	XC_MSG_ASYNC	(1)	/* msg in slave xc_msgbox */
104f34a7178SJoe Bonasera #define	XC_MSG_CALL	(2)	/* msg in slave xc_msgbox */
105f34a7178SJoe Bonasera #define	XC_MSG_SYNC	(3)	/* msg in slave xc_msgbox */
106f34a7178SJoe Bonasera #define	XC_MSG_WAITING	(4)	/* msg in master xc_msgbox or xc_waiters */
107f34a7178SJoe Bonasera #define	XC_MSG_RELEASED	(5)	/* msg in slave xc_msgbox */
108f34a7178SJoe Bonasera #define	XC_MSG_DONE	(6)	/* msg in master xc_msgbox */
1097c478bd9Sstevel@tonic-gate 
110f34a7178SJoe Bonasera /*
111f34a7178SJoe Bonasera  * We allow for one high priority message at a time to happen in the system.
112f34a7178SJoe Bonasera  * This is used for panic, kmdb, etc., so no locking is done.
113f34a7178SJoe Bonasera  */
114c03aa626SJoe Bonasera static volatile cpuset_t xc_priority_set_store;
115c03aa626SJoe Bonasera static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store);
116f34a7178SJoe Bonasera static xc_data_t xc_priority_data;
1177c478bd9Sstevel@tonic-gate 
118c03aa626SJoe Bonasera /*
119c03aa626SJoe Bonasera  * Wrappers to avoid C compiler warnings due to volatile. The atomic bit
120c03aa626SJoe Bonasera  * operations don't accept volatile bit vectors - which is a bit silly.
121c03aa626SJoe Bonasera  */
122c03aa626SJoe Bonasera #define	XC_BT_SET(vector, b)	BT_ATOMIC_SET((ulong_t *)(vector), (b))
123c03aa626SJoe Bonasera #define	XC_BT_CLEAR(vector, b)	BT_ATOMIC_CLEAR((ulong_t *)(vector), (b))
124c03aa626SJoe Bonasera 
1257c478bd9Sstevel@tonic-gate /*
126f34a7178SJoe Bonasera  * Decrement a CPU's work count
1277c478bd9Sstevel@tonic-gate  */
1287c478bd9Sstevel@tonic-gate static void
129f34a7178SJoe Bonasera xc_decrement(struct machcpu *mcpu)
1307c478bd9Sstevel@tonic-gate {
131f34a7178SJoe Bonasera 	atomic_dec_32(&mcpu->xc_work_cnt);
1327c478bd9Sstevel@tonic-gate }
1337c478bd9Sstevel@tonic-gate 
1347c478bd9Sstevel@tonic-gate /*
135f34a7178SJoe Bonasera  * Increment a CPU's work count and return the old value
1367c478bd9Sstevel@tonic-gate  */
137f34a7178SJoe Bonasera static int
138f34a7178SJoe Bonasera xc_increment(struct machcpu *mcpu)
1397c478bd9Sstevel@tonic-gate {
140f34a7178SJoe Bonasera 	int old;
141f34a7178SJoe Bonasera 	do {
142f34a7178SJoe Bonasera 		old = mcpu->xc_work_cnt;
143f34a7178SJoe Bonasera 	} while (cas32((uint32_t *)&mcpu->xc_work_cnt, old, old + 1) != old);
144f34a7178SJoe Bonasera 	return (old);
1457c478bd9Sstevel@tonic-gate }
1467c478bd9Sstevel@tonic-gate 
1477c478bd9Sstevel@tonic-gate /*
148f34a7178SJoe Bonasera  * Put a message into a queue. The insertion is atomic no matter
149f34a7178SJoe Bonasera  * how many different inserts/extracts to the same queue happen.
1507c478bd9Sstevel@tonic-gate  */
151f34a7178SJoe Bonasera static void
152f34a7178SJoe Bonasera xc_insert(void *queue, xc_msg_t *msg)
1537c478bd9Sstevel@tonic-gate {
154f34a7178SJoe Bonasera 	xc_msg_t *old_head;
155*bf73eaa5SJoe Bonasera 
156*bf73eaa5SJoe Bonasera 	/*
157*bf73eaa5SJoe Bonasera 	 * FREE messages should only ever be getting inserted into
158*bf73eaa5SJoe Bonasera 	 * the xc_master CPUs xc_free queue.
159*bf73eaa5SJoe Bonasera 	 */
160*bf73eaa5SJoe Bonasera 	ASSERT(msg->xc_command != XC_MSG_FREE ||
161*bf73eaa5SJoe Bonasera 	    cpu[msg->xc_master] == NULL || /* possible only during init */
162*bf73eaa5SJoe Bonasera 	    queue == &cpu[msg->xc_master]->cpu_m.xc_free);
163*bf73eaa5SJoe Bonasera 
164f34a7178SJoe Bonasera 	do {
165f34a7178SJoe Bonasera 		old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
166f34a7178SJoe Bonasera 		msg->xc_next = old_head;
167f34a7178SJoe Bonasera 	} while (casptr(queue, old_head, msg) != old_head);
1687c478bd9Sstevel@tonic-gate }
1697c478bd9Sstevel@tonic-gate 
1707c478bd9Sstevel@tonic-gate /*
171f34a7178SJoe Bonasera  * Extract a message from a queue. The extraction is atomic only
172f34a7178SJoe Bonasera  * when just one thread does extractions from the queue.
173f34a7178SJoe Bonasera  * If the queue is empty, NULL is returned.
1747c478bd9Sstevel@tonic-gate  */
175f34a7178SJoe Bonasera static xc_msg_t *
176f34a7178SJoe Bonasera xc_extract(xc_msg_t **queue)
177f34a7178SJoe Bonasera {
178f34a7178SJoe Bonasera 	xc_msg_t *old_head;
179f34a7178SJoe Bonasera 
180f34a7178SJoe Bonasera 	do {
181f34a7178SJoe Bonasera 		old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
182f34a7178SJoe Bonasera 		if (old_head == NULL)
183f34a7178SJoe Bonasera 			return (old_head);
184f34a7178SJoe Bonasera 	} while (casptr(queue, old_head, old_head->xc_next) != old_head);
185f34a7178SJoe Bonasera 	old_head->xc_next = NULL;
186f34a7178SJoe Bonasera 	return (old_head);
187f34a7178SJoe Bonasera }
188f34a7178SJoe Bonasera 
1897c478bd9Sstevel@tonic-gate 
1907c478bd9Sstevel@tonic-gate /*
191f34a7178SJoe Bonasera  * Initialize the machcpu fields used for cross calls
1927c478bd9Sstevel@tonic-gate  */
193f34a7178SJoe Bonasera static uint_t xc_initialized = 0;
1947c478bd9Sstevel@tonic-gate void
195f34a7178SJoe Bonasera xc_init_cpu(struct cpu *cpup)
1967c478bd9Sstevel@tonic-gate {
197f34a7178SJoe Bonasera 	xc_msg_t *msg;
198f34a7178SJoe Bonasera 	int c;
1997c478bd9Sstevel@tonic-gate 
2007c478bd9Sstevel@tonic-gate 	/*
201f34a7178SJoe Bonasera 	 * add a new msg to each existing CPU's free list, as well as one for
202*bf73eaa5SJoe Bonasera 	 * my list for each of them. ncpus has an inconsistent value when this
203*bf73eaa5SJoe Bonasera 	 * function is called, so use cpup->cpu_id.
2047c478bd9Sstevel@tonic-gate 	 */
205*bf73eaa5SJoe Bonasera 	for (c = 0; c < cpup->cpu_id; ++c) {
206f34a7178SJoe Bonasera 		if (cpu[c] == NULL)
207f34a7178SJoe Bonasera 			continue;
208f34a7178SJoe Bonasera 		msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
209f34a7178SJoe Bonasera 		msg->xc_command = XC_MSG_FREE;
210*bf73eaa5SJoe Bonasera 		msg->xc_master = c;
211f34a7178SJoe Bonasera 		xc_insert(&cpu[c]->cpu_m.xc_free, msg);
2127c478bd9Sstevel@tonic-gate 
213f34a7178SJoe Bonasera 		msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
214f34a7178SJoe Bonasera 		msg->xc_command = XC_MSG_FREE;
215*bf73eaa5SJoe Bonasera 		msg->xc_master = cpup->cpu_id;
216f34a7178SJoe Bonasera 		xc_insert(&cpup->cpu_m.xc_free, msg);
217f34a7178SJoe Bonasera 	}
2187c478bd9Sstevel@tonic-gate 
2197c478bd9Sstevel@tonic-gate 	/*
220f34a7178SJoe Bonasera 	 * Add one for self messages
221a563a037Sbholler 	 */
222f34a7178SJoe Bonasera 	msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
223f34a7178SJoe Bonasera 	msg->xc_command = XC_MSG_FREE;
224*bf73eaa5SJoe Bonasera 	msg->xc_master = cpup->cpu_id;
225f34a7178SJoe Bonasera 	xc_insert(&cpup->cpu_m.xc_free, msg);
226a563a037Sbholler 
227f34a7178SJoe Bonasera 	if (!xc_initialized)
228f34a7178SJoe Bonasera 		xc_initialized = 1;
229f34a7178SJoe Bonasera }
2307c478bd9Sstevel@tonic-gate 
231f34a7178SJoe Bonasera /*
232f34a7178SJoe Bonasera  * X-call message processing routine. Note that this is used by both
233f34a7178SJoe Bonasera  * senders and recipients of messages.
234f34a7178SJoe Bonasera  *
235f34a7178SJoe Bonasera  * We're protected against changing CPUs by either being in a high-priority
236f34a7178SJoe Bonasera  * interrupt, having preemption disabled or by having a raised SPL.
237f34a7178SJoe Bonasera  */
238f34a7178SJoe Bonasera /*ARGSUSED*/
239f34a7178SJoe Bonasera uint_t
240f34a7178SJoe Bonasera xc_serv(caddr_t arg1, caddr_t arg2)
241f34a7178SJoe Bonasera {
242f34a7178SJoe Bonasera 	struct machcpu *mcpup = &(CPU->cpu_m);
243f34a7178SJoe Bonasera 	xc_msg_t *msg;
244f34a7178SJoe Bonasera 	xc_data_t *data;
245f34a7178SJoe Bonasera 	xc_msg_t *xc_waiters = NULL;
246f34a7178SJoe Bonasera 	uint32_t num_waiting = 0;
247f34a7178SJoe Bonasera 	xc_func_t func;
248f34a7178SJoe Bonasera 	xc_arg_t a1;
249f34a7178SJoe Bonasera 	xc_arg_t a2;
250f34a7178SJoe Bonasera 	xc_arg_t a3;
251f34a7178SJoe Bonasera 	uint_t rc = DDI_INTR_UNCLAIMED;
252f34a7178SJoe Bonasera 
253f34a7178SJoe Bonasera 	while (mcpup->xc_work_cnt != 0) {
254f34a7178SJoe Bonasera 		rc = DDI_INTR_CLAIMED;
2557c478bd9Sstevel@tonic-gate 
256f34a7178SJoe Bonasera 		/*
257f34a7178SJoe Bonasera 		 * We may have to wait for a message to arrive.
258f34a7178SJoe Bonasera 		 */
259*bf73eaa5SJoe Bonasera 		for (msg = NULL; msg == NULL;
260*bf73eaa5SJoe Bonasera 		    msg = xc_extract(&mcpup->xc_msgbox)) {
261*bf73eaa5SJoe Bonasera 
2627c478bd9Sstevel@tonic-gate 			/*
263c03aa626SJoe Bonasera 			 * Alway check for and handle a priority message.
2647c478bd9Sstevel@tonic-gate 			 */
265c03aa626SJoe Bonasera 			if (BT_TEST(xc_priority_set, CPU->cpu_id)) {
266f34a7178SJoe Bonasera 				func = xc_priority_data.xc_func;
267f34a7178SJoe Bonasera 				a1 = xc_priority_data.xc_a1;
268f34a7178SJoe Bonasera 				a2 = xc_priority_data.xc_a2;
269f34a7178SJoe Bonasera 				a3 = xc_priority_data.xc_a3;
270c03aa626SJoe Bonasera 				XC_BT_CLEAR(xc_priority_set, CPU->cpu_id);
271f34a7178SJoe Bonasera 				xc_decrement(mcpup);
272f34a7178SJoe Bonasera 				func(a1, a2, a3);
273f34a7178SJoe Bonasera 				if (mcpup->xc_work_cnt == 0)
274f34a7178SJoe Bonasera 					return (rc);
275f34a7178SJoe Bonasera 			}
2767c478bd9Sstevel@tonic-gate 
277f34a7178SJoe Bonasera 			/*
278f34a7178SJoe Bonasera 			 * wait for a message to arrive
279f34a7178SJoe Bonasera 			 */
280*bf73eaa5SJoe Bonasera 			SMT_PAUSE();
2817c478bd9Sstevel@tonic-gate 		}
282f34a7178SJoe Bonasera 
283f34a7178SJoe Bonasera 
284f34a7178SJoe Bonasera 		/*
285f34a7178SJoe Bonasera 		 * process the message
286f34a7178SJoe Bonasera 		 */
287f34a7178SJoe Bonasera 		switch (msg->xc_command) {
288f34a7178SJoe Bonasera 
289f34a7178SJoe Bonasera 		/*
290f34a7178SJoe Bonasera 		 * ASYNC gives back the message immediately, then we do the
291f34a7178SJoe Bonasera 		 * function and return with no more waiting.
292f34a7178SJoe Bonasera 		 */
293f34a7178SJoe Bonasera 		case XC_MSG_ASYNC:
294f34a7178SJoe Bonasera 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
295f34a7178SJoe Bonasera 			func = data->xc_func;
296f34a7178SJoe Bonasera 			a1 = data->xc_a1;
297f34a7178SJoe Bonasera 			a2 = data->xc_a2;
298f34a7178SJoe Bonasera 			a3 = data->xc_a3;
299f34a7178SJoe Bonasera 			msg->xc_command = XC_MSG_DONE;
300f34a7178SJoe Bonasera 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
301f34a7178SJoe Bonasera 			if (func != NULL)
302f34a7178SJoe Bonasera 				(void) (*func)(a1, a2, a3);
303f34a7178SJoe Bonasera 			xc_decrement(mcpup);
3047c478bd9Sstevel@tonic-gate 			break;
3057c478bd9Sstevel@tonic-gate 
306f34a7178SJoe Bonasera 		/*
307f34a7178SJoe Bonasera 		 * SYNC messages do the call, then send it back to the master
308f34a7178SJoe Bonasera 		 * in WAITING mode
309f34a7178SJoe Bonasera 		 */
310f34a7178SJoe Bonasera 		case XC_MSG_SYNC:
311f34a7178SJoe Bonasera 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
312f34a7178SJoe Bonasera 			if (data->xc_func != NULL)
313f34a7178SJoe Bonasera 				(void) (*data->xc_func)(data->xc_a1,
314f34a7178SJoe Bonasera 				    data->xc_a2, data->xc_a3);
315f34a7178SJoe Bonasera 			msg->xc_command = XC_MSG_WAITING;
316f34a7178SJoe Bonasera 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
317f34a7178SJoe Bonasera 			break;
3187c478bd9Sstevel@tonic-gate 
319f34a7178SJoe Bonasera 		/*
320f34a7178SJoe Bonasera 		 * WAITING messsages are collected by the master until all
321f34a7178SJoe Bonasera 		 * have arrived. Once all arrive, we release them back to
322f34a7178SJoe Bonasera 		 * the slaves
323f34a7178SJoe Bonasera 		 */
324f34a7178SJoe Bonasera 		case XC_MSG_WAITING:
325f34a7178SJoe Bonasera 			xc_insert(&xc_waiters, msg);
326f34a7178SJoe Bonasera 			if (++num_waiting < mcpup->xc_wait_cnt)
327f34a7178SJoe Bonasera 				break;
328f34a7178SJoe Bonasera 			while ((msg = xc_extract(&xc_waiters)) != NULL) {
329f34a7178SJoe Bonasera 				msg->xc_command = XC_MSG_RELEASED;
330f34a7178SJoe Bonasera 				xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox,
331f34a7178SJoe Bonasera 				    msg);
332f34a7178SJoe Bonasera 				--num_waiting;
333f34a7178SJoe Bonasera 			}
334f34a7178SJoe Bonasera 			if (num_waiting != 0)
335f34a7178SJoe Bonasera 				panic("wrong number waiting");
336f34a7178SJoe Bonasera 			mcpup->xc_wait_cnt = 0;
337f34a7178SJoe Bonasera 			break;
3387c478bd9Sstevel@tonic-gate 
339f34a7178SJoe Bonasera 		/*
340f34a7178SJoe Bonasera 		 * CALL messages do the function and then, like RELEASE,
341f34a7178SJoe Bonasera 		 * send the message is back to master as DONE.
342f34a7178SJoe Bonasera 		 */
343f34a7178SJoe Bonasera 		case XC_MSG_CALL:
344f34a7178SJoe Bonasera 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
345f34a7178SJoe Bonasera 			if (data->xc_func != NULL)
346f34a7178SJoe Bonasera 				(void) (*data->xc_func)(data->xc_a1,
347f34a7178SJoe Bonasera 				    data->xc_a2, data->xc_a3);
348f34a7178SJoe Bonasera 			/*FALLTHROUGH*/
349f34a7178SJoe Bonasera 		case XC_MSG_RELEASED:
350f34a7178SJoe Bonasera 			msg->xc_command = XC_MSG_DONE;
351f34a7178SJoe Bonasera 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
352f34a7178SJoe Bonasera 			xc_decrement(mcpup);
353f34a7178SJoe Bonasera 			break;
3547c478bd9Sstevel@tonic-gate 
355f34a7178SJoe Bonasera 		/*
356f34a7178SJoe Bonasera 		 * DONE means a slave has completely finished up.
357f34a7178SJoe Bonasera 		 * Once we collect all the DONE messages, we'll exit
358f34a7178SJoe Bonasera 		 * processing too.
359f34a7178SJoe Bonasera 		 */
360f34a7178SJoe Bonasera 		case XC_MSG_DONE:
361f34a7178SJoe Bonasera 			msg->xc_command = XC_MSG_FREE;
362f34a7178SJoe Bonasera 			xc_insert(&mcpup->xc_free, msg);
363f34a7178SJoe Bonasera 			xc_decrement(mcpup);
3647c478bd9Sstevel@tonic-gate 			break;
3657c478bd9Sstevel@tonic-gate 
366f34a7178SJoe Bonasera 		case XC_MSG_FREE:
367*bf73eaa5SJoe Bonasera 			panic("free message 0x%p in msgbox", (void *)msg);
368f34a7178SJoe Bonasera 			break;
369f34a7178SJoe Bonasera 
370f34a7178SJoe Bonasera 		default:
371*bf73eaa5SJoe Bonasera 			panic("bad message 0x%p in msgbox", (void *)msg);
372f34a7178SJoe Bonasera 			break;
373f34a7178SJoe Bonasera 		}
374f34a7178SJoe Bonasera 	}
375f34a7178SJoe Bonasera 	return (rc);
3767c478bd9Sstevel@tonic-gate }
3777c478bd9Sstevel@tonic-gate 
3787c478bd9Sstevel@tonic-gate /*
379f34a7178SJoe Bonasera  * Initiate cross call processing.
3807c478bd9Sstevel@tonic-gate  */
3817c478bd9Sstevel@tonic-gate static void
3827c478bd9Sstevel@tonic-gate xc_common(
3837c478bd9Sstevel@tonic-gate 	xc_func_t func,
3847c478bd9Sstevel@tonic-gate 	xc_arg_t arg1,
3857c478bd9Sstevel@tonic-gate 	xc_arg_t arg2,
3867c478bd9Sstevel@tonic-gate 	xc_arg_t arg3,
387f34a7178SJoe Bonasera 	ulong_t *set,
388f34a7178SJoe Bonasera 	uint_t command)
3897c478bd9Sstevel@tonic-gate {
390f34a7178SJoe Bonasera 	int c;
3917c478bd9Sstevel@tonic-gate 	struct cpu *cpup;
392f34a7178SJoe Bonasera 	xc_msg_t *msg;
393f34a7178SJoe Bonasera 	xc_data_t *data;
394f34a7178SJoe Bonasera 	int cnt;
395f34a7178SJoe Bonasera 	int save_spl;
396f34a7178SJoe Bonasera 
397f34a7178SJoe Bonasera 	if (!xc_initialized) {
398f34a7178SJoe Bonasera 		if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) &&
399f34a7178SJoe Bonasera 		    func != NULL)
400f34a7178SJoe Bonasera 			(void) (*func)(arg1, arg2, arg3);
401f34a7178SJoe Bonasera 		return;
402f34a7178SJoe Bonasera 	}
4037c478bd9Sstevel@tonic-gate 
404f34a7178SJoe Bonasera 	save_spl = splr(ipltospl(XC_HI_PIL));
4057c478bd9Sstevel@tonic-gate 
4067c478bd9Sstevel@tonic-gate 	/*
407f34a7178SJoe Bonasera 	 * fill in cross call data
4087c478bd9Sstevel@tonic-gate 	 */
409f34a7178SJoe Bonasera 	data = &CPU->cpu_m.xc_data;
410f34a7178SJoe Bonasera 	data->xc_func = func;
411f34a7178SJoe Bonasera 	data->xc_a1 = arg1;
412f34a7178SJoe Bonasera 	data->xc_a2 = arg2;
413f34a7178SJoe Bonasera 	data->xc_a3 = arg3;
414a563a037Sbholler 
4157c478bd9Sstevel@tonic-gate 	/*
416f34a7178SJoe Bonasera 	 * Post messages to all CPUs involved that are CPU_READY
4177c478bd9Sstevel@tonic-gate 	 */
418f34a7178SJoe Bonasera 	CPU->cpu_m.xc_wait_cnt = 0;
419f34a7178SJoe Bonasera 	for (c = 0; c < ncpus; ++c) {
420f34a7178SJoe Bonasera 		if (!BT_TEST(set, c))
421f34a7178SJoe Bonasera 			continue;
422f34a7178SJoe Bonasera 		cpup = cpu[c];
423f34a7178SJoe Bonasera 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
424a563a037Sbholler 			continue;
425a563a037Sbholler 
426f34a7178SJoe Bonasera 		/*
427f34a7178SJoe Bonasera 		 * Fill out a new message.
428f34a7178SJoe Bonasera 		 */
429f34a7178SJoe Bonasera 		msg = xc_extract(&CPU->cpu_m.xc_free);
430f34a7178SJoe Bonasera 		if (msg == NULL)
431f34a7178SJoe Bonasera 			panic("Ran out of free xc_msg_t's");
432f34a7178SJoe Bonasera 		msg->xc_command = command;
433*bf73eaa5SJoe Bonasera 		if (msg->xc_master != CPU->cpu_id)
434*bf73eaa5SJoe Bonasera 			panic("msg %p has wrong xc_master", (void *)msg);
435f34a7178SJoe Bonasera 		msg->xc_slave = c;
436a563a037Sbholler 
437f34a7178SJoe Bonasera 		/*
438f34a7178SJoe Bonasera 		 * Increment my work count for all messages that I'll
439f34a7178SJoe Bonasera 		 * transition from DONE to FREE.
440f34a7178SJoe Bonasera 		 * Also remember how many XC_MSG_WAITINGs to look for
441f34a7178SJoe Bonasera 		 */
442f34a7178SJoe Bonasera 		(void) xc_increment(&CPU->cpu_m);
443f34a7178SJoe Bonasera 		if (command == XC_MSG_SYNC)
444f34a7178SJoe Bonasera 			++CPU->cpu_m.xc_wait_cnt;
445f34a7178SJoe Bonasera 
446f34a7178SJoe Bonasera 		/*
447f34a7178SJoe Bonasera 		 * Increment the target CPU work count then insert the message
448f34a7178SJoe Bonasera 		 * in the target msgbox. If I post the first bit of work
449f34a7178SJoe Bonasera 		 * for the target to do, send an IPI to the target CPU.
450f34a7178SJoe Bonasera 		 */
451f34a7178SJoe Bonasera 		cnt = xc_increment(&cpup->cpu_m);
452f34a7178SJoe Bonasera 		xc_insert(&cpup->cpu_m.xc_msgbox, msg);
453f34a7178SJoe Bonasera 		if (cpup != CPU) {
454f34a7178SJoe Bonasera 			if (cnt == 0) {
455f34a7178SJoe Bonasera 				CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
456f34a7178SJoe Bonasera 				send_dirint(c, XC_HI_PIL);
457f34a7178SJoe Bonasera 				if (xc_collect_enable)
458f34a7178SJoe Bonasera 					++xc_total_cnt;
459f34a7178SJoe Bonasera 			} else if (xc_collect_enable) {
460f34a7178SJoe Bonasera 				++xc_multi_cnt;
461f34a7178SJoe Bonasera 			}
462f34a7178SJoe Bonasera 		}
4637c478bd9Sstevel@tonic-gate 	}
4647c478bd9Sstevel@tonic-gate 
4657c478bd9Sstevel@tonic-gate 	/*
466f34a7178SJoe Bonasera 	 * Now drop into the message handler until all work is done
4677c478bd9Sstevel@tonic-gate 	 */
468f34a7178SJoe Bonasera 	(void) xc_serv(NULL, NULL);
469f34a7178SJoe Bonasera 	splx(save_spl);
470f34a7178SJoe Bonasera }
4717c478bd9Sstevel@tonic-gate 
472f34a7178SJoe Bonasera /*
473f34a7178SJoe Bonasera  * Push out a priority cross call.
474f34a7178SJoe Bonasera  */
475f34a7178SJoe Bonasera static void
476f34a7178SJoe Bonasera xc_priority_common(
477f34a7178SJoe Bonasera 	xc_func_t func,
478f34a7178SJoe Bonasera 	xc_arg_t arg1,
479f34a7178SJoe Bonasera 	xc_arg_t arg2,
480f34a7178SJoe Bonasera 	xc_arg_t arg3,
481f34a7178SJoe Bonasera 	ulong_t *set)
482f34a7178SJoe Bonasera {
483f34a7178SJoe Bonasera 	int i;
484f34a7178SJoe Bonasera 	int c;
485f34a7178SJoe Bonasera 	struct cpu *cpup;
4867c478bd9Sstevel@tonic-gate 
4877c478bd9Sstevel@tonic-gate 	/*
488c03aa626SJoe Bonasera 	 * Wait briefly for any previous xc_priority to have finished.
4897c478bd9Sstevel@tonic-gate 	 */
490c03aa626SJoe Bonasera 	for (c = 0; c < ncpus; ++c) {
491c03aa626SJoe Bonasera 		cpup = cpu[c];
492c03aa626SJoe Bonasera 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
493c03aa626SJoe Bonasera 			continue;
494c03aa626SJoe Bonasera 
495c03aa626SJoe Bonasera 		/*
496c03aa626SJoe Bonasera 		 * The value of 40000 here is from old kernel code. It
497c03aa626SJoe Bonasera 		 * really should be changed to some time based value, since
498c03aa626SJoe Bonasera 		 * under a hypervisor, there's no guarantee a remote CPU
499c03aa626SJoe Bonasera 		 * is even scheduled.
500c03aa626SJoe Bonasera 		 */
501c03aa626SJoe Bonasera 		for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i)
502c03aa626SJoe Bonasera 			SMT_PAUSE();
503c03aa626SJoe Bonasera 
504c03aa626SJoe Bonasera 		/*
505c03aa626SJoe Bonasera 		 * Some CPU did not respond to a previous priority request. It's
506c03aa626SJoe Bonasera 		 * probably deadlocked with interrupts blocked or some such
507c03aa626SJoe Bonasera 		 * problem. We'll just erase the previous request - which was
508c03aa626SJoe Bonasera 		 * most likely a kmdb_enter that has already expired - and plow
509c03aa626SJoe Bonasera 		 * ahead.
510c03aa626SJoe Bonasera 		 */
511c03aa626SJoe Bonasera 		if (BT_TEST(xc_priority_set, c)) {
512c03aa626SJoe Bonasera 			XC_BT_CLEAR(xc_priority_set, c);
513c03aa626SJoe Bonasera 			if (cpup->cpu_m.xc_work_cnt > 0)
514c03aa626SJoe Bonasera 				xc_decrement(&cpup->cpu_m);
515c03aa626SJoe Bonasera 		}
5167c478bd9Sstevel@tonic-gate 	}
5177c478bd9Sstevel@tonic-gate 
5187c478bd9Sstevel@tonic-gate 	/*
519f34a7178SJoe Bonasera 	 * fill in cross call data
5207c478bd9Sstevel@tonic-gate 	 */
521f34a7178SJoe Bonasera 	xc_priority_data.xc_func = func;
522f34a7178SJoe Bonasera 	xc_priority_data.xc_a1 = arg1;
523f34a7178SJoe Bonasera 	xc_priority_data.xc_a2 = arg2;
524f34a7178SJoe Bonasera 	xc_priority_data.xc_a3 = arg3;
5257c478bd9Sstevel@tonic-gate 
5267c478bd9Sstevel@tonic-gate 	/*
527f34a7178SJoe Bonasera 	 * Post messages to all CPUs involved that are CPU_READY
528f34a7178SJoe Bonasera 	 * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
5297c478bd9Sstevel@tonic-gate 	 */
530f34a7178SJoe Bonasera 	for (c = 0; c < ncpus; ++c) {
531f34a7178SJoe Bonasera 		if (!BT_TEST(set, c))
532f34a7178SJoe Bonasera 			continue;
533f34a7178SJoe Bonasera 		cpup = cpu[c];
534f34a7178SJoe Bonasera 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
535f34a7178SJoe Bonasera 		    cpup == CPU)
536f34a7178SJoe Bonasera 			continue;
537f34a7178SJoe Bonasera 		(void) xc_increment(&cpup->cpu_m);
538c03aa626SJoe Bonasera 		XC_BT_SET(xc_priority_set, c);
539f34a7178SJoe Bonasera 		send_dirint(c, XC_HI_PIL);
540f34a7178SJoe Bonasera 		for (i = 0; i < 10; ++i) {
541f34a7178SJoe Bonasera 			(void) casptr(&cpup->cpu_m.xc_msgbox,
542f34a7178SJoe Bonasera 			    cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
5437c478bd9Sstevel@tonic-gate 		}
5447c478bd9Sstevel@tonic-gate 	}
5457c478bd9Sstevel@tonic-gate }
5467c478bd9Sstevel@tonic-gate 
5477c478bd9Sstevel@tonic-gate /*
548f34a7178SJoe Bonasera  * Do cross call to all other CPUs with absolutely no waiting or handshaking.
549f34a7178SJoe Bonasera  * This should only be used for extraordinary operations, like panic(), which
550f34a7178SJoe Bonasera  * need to work, in some fashion, in a not completely functional system.
551f34a7178SJoe Bonasera  * All other uses that want minimal waiting should use xc_call_nowait().
5527c478bd9Sstevel@tonic-gate  */
5537c478bd9Sstevel@tonic-gate void
554f34a7178SJoe Bonasera xc_priority(
5557c478bd9Sstevel@tonic-gate 	xc_arg_t arg1,
5567c478bd9Sstevel@tonic-gate 	xc_arg_t arg2,
5577c478bd9Sstevel@tonic-gate 	xc_arg_t arg3,
558f34a7178SJoe Bonasera 	ulong_t *set,
5597c478bd9Sstevel@tonic-gate 	xc_func_t func)
5607c478bd9Sstevel@tonic-gate {
561f34a7178SJoe Bonasera 	extern int IGNORE_KERNEL_PREEMPTION;
562f34a7178SJoe Bonasera 	int save_spl = splr(ipltospl(XC_HI_PIL));
563f34a7178SJoe Bonasera 	int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
5647c478bd9Sstevel@tonic-gate 
5657c478bd9Sstevel@tonic-gate 	IGNORE_KERNEL_PREEMPTION = 1;
566f34a7178SJoe Bonasera 	xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set);
5677c478bd9Sstevel@tonic-gate 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
568f34a7178SJoe Bonasera 	splx(save_spl);
5697c478bd9Sstevel@tonic-gate }
5707c478bd9Sstevel@tonic-gate 
5717c478bd9Sstevel@tonic-gate /*
572f34a7178SJoe Bonasera  * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger.
5737c478bd9Sstevel@tonic-gate  */
5747c478bd9Sstevel@tonic-gate void
5757c478bd9Sstevel@tonic-gate kdi_xc_others(int this_cpu, void (*func)(void))
5767c478bd9Sstevel@tonic-gate {
577f34a7178SJoe Bonasera 	extern int IGNORE_KERNEL_PREEMPTION;
5787c478bd9Sstevel@tonic-gate 	int save_kernel_preemption;
5797c478bd9Sstevel@tonic-gate 	cpuset_t set;
5807c478bd9Sstevel@tonic-gate 
581ae115bc7Smrj 	if (!xc_initialized)
582ae115bc7Smrj 		return;
583ae115bc7Smrj 
5847c478bd9Sstevel@tonic-gate 	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
5857c478bd9Sstevel@tonic-gate 	IGNORE_KERNEL_PREEMPTION = 1;
586f34a7178SJoe Bonasera 	CPUSET_ALL_BUT(set, this_cpu);
587f34a7178SJoe Bonasera 	xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set));
5887c478bd9Sstevel@tonic-gate 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
5897c478bd9Sstevel@tonic-gate }
590f34a7178SJoe Bonasera 
591f34a7178SJoe Bonasera 
592f34a7178SJoe Bonasera 
593f34a7178SJoe Bonasera /*
594f34a7178SJoe Bonasera  * Invoke function on specified processors. Remotes may continue after
595f34a7178SJoe Bonasera  * service with no waiting. xc_call_nowait() may return immediately too.
596f34a7178SJoe Bonasera  */
597f34a7178SJoe Bonasera void
598f34a7178SJoe Bonasera xc_call_nowait(
599f34a7178SJoe Bonasera 	xc_arg_t arg1,
600f34a7178SJoe Bonasera 	xc_arg_t arg2,
601f34a7178SJoe Bonasera 	xc_arg_t arg3,
602f34a7178SJoe Bonasera 	ulong_t *set,
603f34a7178SJoe Bonasera 	xc_func_t func)
604f34a7178SJoe Bonasera {
605f34a7178SJoe Bonasera 	xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC);
606f34a7178SJoe Bonasera }
607f34a7178SJoe Bonasera 
608f34a7178SJoe Bonasera /*
609f34a7178SJoe Bonasera  * Invoke function on specified processors. Remotes may continue after
610f34a7178SJoe Bonasera  * service with no waiting. xc_call() returns only after remotes have finished.
611f34a7178SJoe Bonasera  */
612f34a7178SJoe Bonasera void
613f34a7178SJoe Bonasera xc_call(
614f34a7178SJoe Bonasera 	xc_arg_t arg1,
615f34a7178SJoe Bonasera 	xc_arg_t arg2,
616f34a7178SJoe Bonasera 	xc_arg_t arg3,
617f34a7178SJoe Bonasera 	ulong_t *set,
618f34a7178SJoe Bonasera 	xc_func_t func)
619f34a7178SJoe Bonasera {
620f34a7178SJoe Bonasera 	xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL);
621f34a7178SJoe Bonasera }
622f34a7178SJoe Bonasera 
623f34a7178SJoe Bonasera /*
624f34a7178SJoe Bonasera  * Invoke function on specified processors. Remotes wait until all have
625f34a7178SJoe Bonasera  * finished. xc_sync() also waits until all remotes have finished.
626f34a7178SJoe Bonasera  */
627f34a7178SJoe Bonasera void
628f34a7178SJoe Bonasera xc_sync(
629f34a7178SJoe Bonasera 	xc_arg_t arg1,
630f34a7178SJoe Bonasera 	xc_arg_t arg2,
631f34a7178SJoe Bonasera 	xc_arg_t arg3,
632f34a7178SJoe Bonasera 	ulong_t *set,
633f34a7178SJoe Bonasera 	xc_func_t func)
634f34a7178SJoe Bonasera {
635f34a7178SJoe Bonasera 	xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC);
636f34a7178SJoe Bonasera }
637