17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
541791439Sandrei * Common Development and Distribution License (the "License").
641791439Sandrei * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
22f34a7178SJoe Bonasera * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
237c478bd9Sstevel@tonic-gate * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate */
25a3114836SGerry Liu /*
26a3114836SGerry Liu * Copyright (c) 2010, Intel Corporation.
27a3114836SGerry Liu * All rights reserved.
28*a8ea0c9dSJohn Levon * Copyright 2018 Joyent, Inc.
29a3114836SGerry Liu */
307c478bd9Sstevel@tonic-gate
317c478bd9Sstevel@tonic-gate #include <sys/types.h>
327c478bd9Sstevel@tonic-gate #include <sys/param.h>
337c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
347c478bd9Sstevel@tonic-gate #include <sys/thread.h>
357c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
367c478bd9Sstevel@tonic-gate #include <sys/x_call.h>
37f34a7178SJoe Bonasera #include <sys/xc_levels.h>
387c478bd9Sstevel@tonic-gate #include <sys/cpu.h>
397c478bd9Sstevel@tonic-gate #include <sys/psw.h>
407c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
417c478bd9Sstevel@tonic-gate #include <sys/debug.h>
427c478bd9Sstevel@tonic-gate #include <sys/systm.h>
43ae115bc7Smrj #include <sys/archsystm.h>
447c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
457c478bd9Sstevel@tonic-gate #include <sys/mutex_impl.h>
46f34a7178SJoe Bonasera #include <sys/stack.h>
47f34a7178SJoe Bonasera #include <sys/promif.h>
48f34a7178SJoe Bonasera #include <sys/x86_archext.h>
49ae115bc7Smrj
507c478bd9Sstevel@tonic-gate /*
51f34a7178SJoe Bonasera * Implementation for cross-processor calls via interprocessor interrupts
52f34a7178SJoe Bonasera *
53f34a7178SJoe Bonasera * This implementation uses a message passing architecture to allow multiple
54f34a7178SJoe Bonasera * concurrent cross calls to be in flight at any given time. We use the cmpxchg
5575d94465SJosef 'Jeff' Sipek * instruction, aka atomic_cas_ptr(), to implement simple efficient work
5675d94465SJosef 'Jeff' Sipek * queues for message passing between CPUs with almost no need for regular
5775d94465SJosef 'Jeff' Sipek * locking. See xc_extract() and xc_insert() below.
58f34a7178SJoe Bonasera *
59f34a7178SJoe Bonasera * The general idea is that initiating a cross call means putting a message
60f34a7178SJoe Bonasera * on a target(s) CPU's work queue. Any synchronization is handled by passing
61f34a7178SJoe Bonasera * the message back and forth between initiator and target(s).
62f34a7178SJoe Bonasera *
63f34a7178SJoe Bonasera * Every CPU has xc_work_cnt, which indicates it has messages to process.
64f34a7178SJoe Bonasera * This value is incremented as message traffic is initiated and decremented
65f34a7178SJoe Bonasera * with every message that finishes all processing.
66f34a7178SJoe Bonasera *
67f34a7178SJoe Bonasera * The code needs no mfence or other membar_*() calls. The uses of
6875d94465SJosef 'Jeff' Sipek * atomic_cas_ptr(), atomic_cas_32() and atomic_dec_32() for the message
6975d94465SJosef 'Jeff' Sipek * passing are implemented with LOCK prefix instructions which are
7075d94465SJosef 'Jeff' Sipek * equivalent to mfence.
71f34a7178SJoe Bonasera *
72f34a7178SJoe Bonasera * One interesting aspect of this implmentation is that it allows 2 or more
73f34a7178SJoe Bonasera * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
74f34a7178SJoe Bonasera * The cross call processing by the CPUs will happen in any order with only
75f34a7178SJoe Bonasera * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
76f34a7178SJoe Bonasera * from cross calls before all slaves have invoked the function.
77f34a7178SJoe Bonasera *
78f34a7178SJoe Bonasera * The reason for this asynchronous approach is to allow for fast global
79f34a7178SJoe Bonasera * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
80f34a7178SJoe Bonasera * on a different Virtual Address at the same time. The old code required
81f34a7178SJoe Bonasera * N squared IPIs. With this method, depending on timing, it could happen
82f34a7178SJoe Bonasera * with just N IPIs.
83*a8ea0c9dSJohn Levon *
84*a8ea0c9dSJohn Levon * Here are the normal transitions for XC_MSG_* values in ->xc_command. A
85*a8ea0c9dSJohn Levon * transition of "->" happens in the slave cpu and "=>" happens in the master
86*a8ea0c9dSJohn Levon * cpu as the messages are passed back and forth.
877c478bd9Sstevel@tonic-gate *
88f34a7178SJoe Bonasera * FREE => ASYNC -> DONE => FREE
89f34a7178SJoe Bonasera * FREE => CALL -> DONE => FREE
90f34a7178SJoe Bonasera * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
917c478bd9Sstevel@tonic-gate *
92*a8ea0c9dSJohn Levon * The interesting one above is ASYNC. You might ask, why not go directly
93*a8ea0c9dSJohn Levon * to FREE, instead of DONE? If it did that, it might be possible to exhaust
94f34a7178SJoe Bonasera * the master's xc_free list if a master can generate ASYNC messages faster
95f34a7178SJoe Bonasera * then the slave can process them. That could be handled with more complicated
96f34a7178SJoe Bonasera * handling. However since nothing important uses ASYNC, I've not bothered.
977c478bd9Sstevel@tonic-gate */
98*a8ea0c9dSJohn Levon
99*a8ea0c9dSJohn Levon /*
100*a8ea0c9dSJohn Levon * The default is to not enable collecting counts of IPI information, since
101*a8ea0c9dSJohn Levon * the updating of shared cachelines could cause excess bus traffic.
102*a8ea0c9dSJohn Levon */
103*a8ea0c9dSJohn Levon uint_t xc_collect_enable = 0;
104*a8ea0c9dSJohn Levon uint64_t xc_total_cnt = 0; /* total #IPIs sent for cross calls */
105*a8ea0c9dSJohn Levon uint64_t xc_multi_cnt = 0; /* # times we piggy backed on another IPI */
1067c478bd9Sstevel@tonic-gate
107f34a7178SJoe Bonasera /*
108f34a7178SJoe Bonasera * We allow for one high priority message at a time to happen in the system.
109f34a7178SJoe Bonasera * This is used for panic, kmdb, etc., so no locking is done.
110f34a7178SJoe Bonasera */
111c03aa626SJoe Bonasera static volatile cpuset_t xc_priority_set_store;
112c03aa626SJoe Bonasera static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store);
113f34a7178SJoe Bonasera static xc_data_t xc_priority_data;
1147c478bd9Sstevel@tonic-gate
1157c478bd9Sstevel@tonic-gate /*
116f34a7178SJoe Bonasera * Decrement a CPU's work count
1177c478bd9Sstevel@tonic-gate */
1187c478bd9Sstevel@tonic-gate static void
xc_decrement(struct machcpu * mcpu)119f34a7178SJoe Bonasera xc_decrement(struct machcpu *mcpu)
1207c478bd9Sstevel@tonic-gate {
121f34a7178SJoe Bonasera atomic_dec_32(&mcpu->xc_work_cnt);
1227c478bd9Sstevel@tonic-gate }
1237c478bd9Sstevel@tonic-gate
1247c478bd9Sstevel@tonic-gate /*
125f34a7178SJoe Bonasera * Increment a CPU's work count and return the old value
1267c478bd9Sstevel@tonic-gate */
127f34a7178SJoe Bonasera static int
xc_increment(struct machcpu * mcpu)128f34a7178SJoe Bonasera xc_increment(struct machcpu *mcpu)
1297c478bd9Sstevel@tonic-gate {
130f34a7178SJoe Bonasera int old;
131f34a7178SJoe Bonasera do {
132f34a7178SJoe Bonasera old = mcpu->xc_work_cnt;
13375d94465SJosef 'Jeff' Sipek } while (atomic_cas_32(&mcpu->xc_work_cnt, old, old + 1) != old);
134f34a7178SJoe Bonasera return (old);
1357c478bd9Sstevel@tonic-gate }
1367c478bd9Sstevel@tonic-gate
1377c478bd9Sstevel@tonic-gate /*
138f34a7178SJoe Bonasera * Put a message into a queue. The insertion is atomic no matter
139f34a7178SJoe Bonasera * how many different inserts/extracts to the same queue happen.
1407c478bd9Sstevel@tonic-gate */
141f34a7178SJoe Bonasera static void
xc_insert(void * queue,xc_msg_t * msg)142f34a7178SJoe Bonasera xc_insert(void *queue, xc_msg_t *msg)
1437c478bd9Sstevel@tonic-gate {
144f34a7178SJoe Bonasera xc_msg_t *old_head;
145bf73eaa5SJoe Bonasera
146bf73eaa5SJoe Bonasera /*
147bf73eaa5SJoe Bonasera * FREE messages should only ever be getting inserted into
148bf73eaa5SJoe Bonasera * the xc_master CPUs xc_free queue.
149bf73eaa5SJoe Bonasera */
150bf73eaa5SJoe Bonasera ASSERT(msg->xc_command != XC_MSG_FREE ||
151bf73eaa5SJoe Bonasera cpu[msg->xc_master] == NULL || /* possible only during init */
152bf73eaa5SJoe Bonasera queue == &cpu[msg->xc_master]->cpu_m.xc_free);
153bf73eaa5SJoe Bonasera
154f34a7178SJoe Bonasera do {
155f34a7178SJoe Bonasera old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
156f34a7178SJoe Bonasera msg->xc_next = old_head;
15775d94465SJosef 'Jeff' Sipek } while (atomic_cas_ptr(queue, old_head, msg) != old_head);
1587c478bd9Sstevel@tonic-gate }
1597c478bd9Sstevel@tonic-gate
1607c478bd9Sstevel@tonic-gate /*
161f34a7178SJoe Bonasera * Extract a message from a queue. The extraction is atomic only
162f34a7178SJoe Bonasera * when just one thread does extractions from the queue.
163f34a7178SJoe Bonasera * If the queue is empty, NULL is returned.
1647c478bd9Sstevel@tonic-gate */
165f34a7178SJoe Bonasera static xc_msg_t *
xc_extract(xc_msg_t ** queue)166f34a7178SJoe Bonasera xc_extract(xc_msg_t **queue)
167f34a7178SJoe Bonasera {
168f34a7178SJoe Bonasera xc_msg_t *old_head;
169f34a7178SJoe Bonasera
170f34a7178SJoe Bonasera do {
171f34a7178SJoe Bonasera old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
172f34a7178SJoe Bonasera if (old_head == NULL)
173f34a7178SJoe Bonasera return (old_head);
17475d94465SJosef 'Jeff' Sipek } while (atomic_cas_ptr(queue, old_head, old_head->xc_next) !=
17575d94465SJosef 'Jeff' Sipek old_head);
176f34a7178SJoe Bonasera old_head->xc_next = NULL;
177f34a7178SJoe Bonasera return (old_head);
178f34a7178SJoe Bonasera }
179f34a7178SJoe Bonasera
180*a8ea0c9dSJohn Levon /*
181*a8ea0c9dSJohn Levon * Extract the next message from the CPU's queue, and place the message in
182*a8ea0c9dSJohn Levon * .xc_curmsg. The latter is solely to make debugging (and ::xcall) more
183*a8ea0c9dSJohn Levon * useful.
184*a8ea0c9dSJohn Levon */
185*a8ea0c9dSJohn Levon static xc_msg_t *
xc_get(void)186*a8ea0c9dSJohn Levon xc_get(void)
187*a8ea0c9dSJohn Levon {
188*a8ea0c9dSJohn Levon struct machcpu *mcpup = &CPU->cpu_m;
189*a8ea0c9dSJohn Levon xc_msg_t *msg = xc_extract(&mcpup->xc_msgbox);
190*a8ea0c9dSJohn Levon mcpup->xc_curmsg = msg;
191*a8ea0c9dSJohn Levon return (msg);
192*a8ea0c9dSJohn Levon }
193*a8ea0c9dSJohn Levon
1947c478bd9Sstevel@tonic-gate /*
195f34a7178SJoe Bonasera * Initialize the machcpu fields used for cross calls
1967c478bd9Sstevel@tonic-gate */
197f34a7178SJoe Bonasera static uint_t xc_initialized = 0;
198a3114836SGerry Liu
1997c478bd9Sstevel@tonic-gate void
xc_init_cpu(struct cpu * cpup)200f34a7178SJoe Bonasera xc_init_cpu(struct cpu *cpup)
2017c478bd9Sstevel@tonic-gate {
202f34a7178SJoe Bonasera xc_msg_t *msg;
203f34a7178SJoe Bonasera int c;
2047c478bd9Sstevel@tonic-gate
2057c478bd9Sstevel@tonic-gate /*
206a3114836SGerry Liu * Allocate message buffers for the new CPU.
2077c478bd9Sstevel@tonic-gate */
208a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) {
209a3114836SGerry Liu if (plat_dr_support_cpu()) {
210a3114836SGerry Liu /*
211a3114836SGerry Liu * Allocate a message buffer for every CPU possible
212a3114836SGerry Liu * in system, including our own, and add them to our xc
213a3114836SGerry Liu * message queue.
214a3114836SGerry Liu */
215a3114836SGerry Liu msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
216a3114836SGerry Liu msg->xc_command = XC_MSG_FREE;
217a3114836SGerry Liu msg->xc_master = cpup->cpu_id;
218a3114836SGerry Liu xc_insert(&cpup->cpu_m.xc_free, msg);
219a3114836SGerry Liu } else if (cpu[c] != NULL && cpu[c] != cpup) {
220a3114836SGerry Liu /*
221a3114836SGerry Liu * Add a new message buffer to each existing CPU's free
222a3114836SGerry Liu * list, as well as one for my list for each of them.
223a3114836SGerry Liu * Note: cpu0 is statically inserted into cpu[] array,
224a3114836SGerry Liu * so need to check cpu[c] isn't cpup itself to avoid
225a3114836SGerry Liu * allocating extra message buffers for cpu0.
226a3114836SGerry Liu */
227a3114836SGerry Liu msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
228a3114836SGerry Liu msg->xc_command = XC_MSG_FREE;
229a3114836SGerry Liu msg->xc_master = c;
230a3114836SGerry Liu xc_insert(&cpu[c]->cpu_m.xc_free, msg);
231a3114836SGerry Liu
232a3114836SGerry Liu msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
233a3114836SGerry Liu msg->xc_command = XC_MSG_FREE;
234a3114836SGerry Liu msg->xc_master = cpup->cpu_id;
235a3114836SGerry Liu xc_insert(&cpup->cpu_m.xc_free, msg);
236a3114836SGerry Liu }
237a3114836SGerry Liu }
2387c478bd9Sstevel@tonic-gate
239a3114836SGerry Liu if (!plat_dr_support_cpu()) {
240a3114836SGerry Liu /*
241a3114836SGerry Liu * Add one for self messages if CPU hotplug is disabled.
242a3114836SGerry Liu */
243f34a7178SJoe Bonasera msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
244f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE;
245bf73eaa5SJoe Bonasera msg->xc_master = cpup->cpu_id;
246f34a7178SJoe Bonasera xc_insert(&cpup->cpu_m.xc_free, msg);
247f34a7178SJoe Bonasera }
2487c478bd9Sstevel@tonic-gate
249a3114836SGerry Liu if (!xc_initialized)
250a3114836SGerry Liu xc_initialized = 1;
251a3114836SGerry Liu }
252a3114836SGerry Liu
253a3114836SGerry Liu void
xc_fini_cpu(struct cpu * cpup)254a3114836SGerry Liu xc_fini_cpu(struct cpu *cpup)
255a3114836SGerry Liu {
256a3114836SGerry Liu xc_msg_t *msg;
257a3114836SGerry Liu
258a3114836SGerry Liu ASSERT((cpup->cpu_flags & CPU_READY) == 0);
259a3114836SGerry Liu ASSERT(cpup->cpu_m.xc_msgbox == NULL);
260a3114836SGerry Liu ASSERT(cpup->cpu_m.xc_work_cnt == 0);
261a3114836SGerry Liu
262a3114836SGerry Liu while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) {
263a3114836SGerry Liu kmem_free(msg, sizeof (*msg));
264a3114836SGerry Liu }
265a3114836SGerry Liu }
266a3114836SGerry Liu
267a3114836SGerry Liu #define XC_FLUSH_MAX_WAITS 1000
268a3114836SGerry Liu
269a3114836SGerry Liu /* Flush inflight message buffers. */
270a3114836SGerry Liu int
xc_flush_cpu(struct cpu * cpup)271a3114836SGerry Liu xc_flush_cpu(struct cpu *cpup)
272a3114836SGerry Liu {
273a3114836SGerry Liu int i;
274a3114836SGerry Liu
275a3114836SGerry Liu ASSERT((cpup->cpu_flags & CPU_READY) == 0);
276a3114836SGerry Liu
2777c478bd9Sstevel@tonic-gate /*
278a3114836SGerry Liu * Pause all working CPUs, which ensures that there's no CPU in
279a3114836SGerry Liu * function xc_common().
280a3114836SGerry Liu * This is used to work around a race condition window in xc_common()
281a3114836SGerry Liu * between checking CPU_READY flag and increasing working item count.
282a563a037Sbholler */
2830ed5c46eSJosef 'Jeff' Sipek pause_cpus(cpup, NULL);
284a3114836SGerry Liu start_cpus();
285a563a037Sbholler
286a3114836SGerry Liu for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
287a3114836SGerry Liu if (cpup->cpu_m.xc_work_cnt == 0) {
288a3114836SGerry Liu break;
289a3114836SGerry Liu }
290a3114836SGerry Liu DELAY(1);
291a3114836SGerry Liu }
292a3114836SGerry Liu for (; i < XC_FLUSH_MAX_WAITS; i++) {
293a3114836SGerry Liu if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {
294a3114836SGerry Liu break;
295a3114836SGerry Liu }
296a3114836SGerry Liu DELAY(1);
297a3114836SGerry Liu }
298a3114836SGerry Liu
299a3114836SGerry Liu return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
300f34a7178SJoe Bonasera }
3017c478bd9Sstevel@tonic-gate
302f34a7178SJoe Bonasera /*
303f34a7178SJoe Bonasera * X-call message processing routine. Note that this is used by both
304f34a7178SJoe Bonasera * senders and recipients of messages.
305f34a7178SJoe Bonasera *
306f34a7178SJoe Bonasera * We're protected against changing CPUs by either being in a high-priority
307f34a7178SJoe Bonasera * interrupt, having preemption disabled or by having a raised SPL.
308f34a7178SJoe Bonasera */
309f34a7178SJoe Bonasera /*ARGSUSED*/
310f34a7178SJoe Bonasera uint_t
xc_serv(caddr_t arg1,caddr_t arg2)311f34a7178SJoe Bonasera xc_serv(caddr_t arg1, caddr_t arg2)
312f34a7178SJoe Bonasera {
313f34a7178SJoe Bonasera struct machcpu *mcpup = &(CPU->cpu_m);
314f34a7178SJoe Bonasera xc_msg_t *msg;
315f34a7178SJoe Bonasera xc_data_t *data;
316f34a7178SJoe Bonasera xc_msg_t *xc_waiters = NULL;
317f34a7178SJoe Bonasera uint32_t num_waiting = 0;
318f34a7178SJoe Bonasera xc_func_t func;
319f34a7178SJoe Bonasera xc_arg_t a1;
320f34a7178SJoe Bonasera xc_arg_t a2;
321f34a7178SJoe Bonasera xc_arg_t a3;
322f34a7178SJoe Bonasera uint_t rc = DDI_INTR_UNCLAIMED;
323f34a7178SJoe Bonasera
324f34a7178SJoe Bonasera while (mcpup->xc_work_cnt != 0) {
325f34a7178SJoe Bonasera rc = DDI_INTR_CLAIMED;
3267c478bd9Sstevel@tonic-gate
327f34a7178SJoe Bonasera /*
328f34a7178SJoe Bonasera * We may have to wait for a message to arrive.
329f34a7178SJoe Bonasera */
330*a8ea0c9dSJohn Levon for (msg = NULL; msg == NULL; msg = xc_get()) {
331bf73eaa5SJoe Bonasera
3327c478bd9Sstevel@tonic-gate /*
333c03aa626SJoe Bonasera * Alway check for and handle a priority message.
3347c478bd9Sstevel@tonic-gate */
335c03aa626SJoe Bonasera if (BT_TEST(xc_priority_set, CPU->cpu_id)) {
336f34a7178SJoe Bonasera func = xc_priority_data.xc_func;
337f34a7178SJoe Bonasera a1 = xc_priority_data.xc_a1;
338f34a7178SJoe Bonasera a2 = xc_priority_data.xc_a2;
339f34a7178SJoe Bonasera a3 = xc_priority_data.xc_a3;
340*a8ea0c9dSJohn Levon BT_ATOMIC_CLEAR(xc_priority_set, CPU->cpu_id);
341f34a7178SJoe Bonasera xc_decrement(mcpup);
342f34a7178SJoe Bonasera func(a1, a2, a3);
343f34a7178SJoe Bonasera if (mcpup->xc_work_cnt == 0)
344f34a7178SJoe Bonasera return (rc);
345f34a7178SJoe Bonasera }
3467c478bd9Sstevel@tonic-gate
347f34a7178SJoe Bonasera /*
348f34a7178SJoe Bonasera * wait for a message to arrive
349f34a7178SJoe Bonasera */
350bf73eaa5SJoe Bonasera SMT_PAUSE();
3517c478bd9Sstevel@tonic-gate }
352f34a7178SJoe Bonasera
353f34a7178SJoe Bonasera
354f34a7178SJoe Bonasera /*
355f34a7178SJoe Bonasera * process the message
356f34a7178SJoe Bonasera */
357f34a7178SJoe Bonasera switch (msg->xc_command) {
358f34a7178SJoe Bonasera
359f34a7178SJoe Bonasera /*
360f34a7178SJoe Bonasera * ASYNC gives back the message immediately, then we do the
361f34a7178SJoe Bonasera * function and return with no more waiting.
362f34a7178SJoe Bonasera */
363f34a7178SJoe Bonasera case XC_MSG_ASYNC:
364f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data;
365f34a7178SJoe Bonasera func = data->xc_func;
366f34a7178SJoe Bonasera a1 = data->xc_a1;
367f34a7178SJoe Bonasera a2 = data->xc_a2;
368f34a7178SJoe Bonasera a3 = data->xc_a3;
369f34a7178SJoe Bonasera msg->xc_command = XC_MSG_DONE;
370f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
371f34a7178SJoe Bonasera if (func != NULL)
372f34a7178SJoe Bonasera (void) (*func)(a1, a2, a3);
373f34a7178SJoe Bonasera xc_decrement(mcpup);
3747c478bd9Sstevel@tonic-gate break;
3757c478bd9Sstevel@tonic-gate
376f34a7178SJoe Bonasera /*
377f34a7178SJoe Bonasera * SYNC messages do the call, then send it back to the master
378f34a7178SJoe Bonasera * in WAITING mode
379f34a7178SJoe Bonasera */
380f34a7178SJoe Bonasera case XC_MSG_SYNC:
381f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data;
382f34a7178SJoe Bonasera if (data->xc_func != NULL)
383f34a7178SJoe Bonasera (void) (*data->xc_func)(data->xc_a1,
384f34a7178SJoe Bonasera data->xc_a2, data->xc_a3);
385f34a7178SJoe Bonasera msg->xc_command = XC_MSG_WAITING;
386f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
387f34a7178SJoe Bonasera break;
3887c478bd9Sstevel@tonic-gate
389f34a7178SJoe Bonasera /*
390f34a7178SJoe Bonasera * WAITING messsages are collected by the master until all
391f34a7178SJoe Bonasera * have arrived. Once all arrive, we release them back to
392f34a7178SJoe Bonasera * the slaves
393f34a7178SJoe Bonasera */
394f34a7178SJoe Bonasera case XC_MSG_WAITING:
395f34a7178SJoe Bonasera xc_insert(&xc_waiters, msg);
396f34a7178SJoe Bonasera if (++num_waiting < mcpup->xc_wait_cnt)
397f34a7178SJoe Bonasera break;
398f34a7178SJoe Bonasera while ((msg = xc_extract(&xc_waiters)) != NULL) {
399f34a7178SJoe Bonasera msg->xc_command = XC_MSG_RELEASED;
400f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox,
401f34a7178SJoe Bonasera msg);
402f34a7178SJoe Bonasera --num_waiting;
403f34a7178SJoe Bonasera }
404f34a7178SJoe Bonasera if (num_waiting != 0)
405f34a7178SJoe Bonasera panic("wrong number waiting");
406f34a7178SJoe Bonasera mcpup->xc_wait_cnt = 0;
407f34a7178SJoe Bonasera break;
4087c478bd9Sstevel@tonic-gate
409f34a7178SJoe Bonasera /*
410f34a7178SJoe Bonasera * CALL messages do the function and then, like RELEASE,
411f34a7178SJoe Bonasera * send the message is back to master as DONE.
412f34a7178SJoe Bonasera */
413f34a7178SJoe Bonasera case XC_MSG_CALL:
414f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data;
415f34a7178SJoe Bonasera if (data->xc_func != NULL)
416f34a7178SJoe Bonasera (void) (*data->xc_func)(data->xc_a1,
417f34a7178SJoe Bonasera data->xc_a2, data->xc_a3);
418f34a7178SJoe Bonasera /*FALLTHROUGH*/
419f34a7178SJoe Bonasera case XC_MSG_RELEASED:
420f34a7178SJoe Bonasera msg->xc_command = XC_MSG_DONE;
421f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
422f34a7178SJoe Bonasera xc_decrement(mcpup);
423f34a7178SJoe Bonasera break;
4247c478bd9Sstevel@tonic-gate
425f34a7178SJoe Bonasera /*
426f34a7178SJoe Bonasera * DONE means a slave has completely finished up.
427f34a7178SJoe Bonasera * Once we collect all the DONE messages, we'll exit
428f34a7178SJoe Bonasera * processing too.
429f34a7178SJoe Bonasera */
430f34a7178SJoe Bonasera case XC_MSG_DONE:
431f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE;
432f34a7178SJoe Bonasera xc_insert(&mcpup->xc_free, msg);
433f34a7178SJoe Bonasera xc_decrement(mcpup);
4347c478bd9Sstevel@tonic-gate break;
4357c478bd9Sstevel@tonic-gate
436f34a7178SJoe Bonasera case XC_MSG_FREE:
437bf73eaa5SJoe Bonasera panic("free message 0x%p in msgbox", (void *)msg);
438f34a7178SJoe Bonasera break;
439f34a7178SJoe Bonasera
440f34a7178SJoe Bonasera default:
441bf73eaa5SJoe Bonasera panic("bad message 0x%p in msgbox", (void *)msg);
442f34a7178SJoe Bonasera break;
443f34a7178SJoe Bonasera }
444*a8ea0c9dSJohn Levon
445*a8ea0c9dSJohn Levon CPU->cpu_m.xc_curmsg = NULL;
446f34a7178SJoe Bonasera }
447f34a7178SJoe Bonasera return (rc);
4487c478bd9Sstevel@tonic-gate }
4497c478bd9Sstevel@tonic-gate
4507c478bd9Sstevel@tonic-gate /*
451f34a7178SJoe Bonasera * Initiate cross call processing.
4527c478bd9Sstevel@tonic-gate */
4537c478bd9Sstevel@tonic-gate static void
xc_common(xc_func_t func,xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set,uint_t command)4547c478bd9Sstevel@tonic-gate xc_common(
4557c478bd9Sstevel@tonic-gate xc_func_t func,
4567c478bd9Sstevel@tonic-gate xc_arg_t arg1,
4577c478bd9Sstevel@tonic-gate xc_arg_t arg2,
4587c478bd9Sstevel@tonic-gate xc_arg_t arg3,
459f34a7178SJoe Bonasera ulong_t *set,
460f34a7178SJoe Bonasera uint_t command)
4617c478bd9Sstevel@tonic-gate {
462f34a7178SJoe Bonasera int c;
4637c478bd9Sstevel@tonic-gate struct cpu *cpup;
464f34a7178SJoe Bonasera xc_msg_t *msg;
465f34a7178SJoe Bonasera xc_data_t *data;
466f34a7178SJoe Bonasera int cnt;
467f34a7178SJoe Bonasera int save_spl;
468f34a7178SJoe Bonasera
469f34a7178SJoe Bonasera if (!xc_initialized) {
470f34a7178SJoe Bonasera if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) &&
471f34a7178SJoe Bonasera func != NULL)
472f34a7178SJoe Bonasera (void) (*func)(arg1, arg2, arg3);
473f34a7178SJoe Bonasera return;
474f34a7178SJoe Bonasera }
4757c478bd9Sstevel@tonic-gate
476f34a7178SJoe Bonasera save_spl = splr(ipltospl(XC_HI_PIL));
4777c478bd9Sstevel@tonic-gate
4787c478bd9Sstevel@tonic-gate /*
479f34a7178SJoe Bonasera * fill in cross call data
4807c478bd9Sstevel@tonic-gate */
481f34a7178SJoe Bonasera data = &CPU->cpu_m.xc_data;
482f34a7178SJoe Bonasera data->xc_func = func;
483f34a7178SJoe Bonasera data->xc_a1 = arg1;
484f34a7178SJoe Bonasera data->xc_a2 = arg2;
485f34a7178SJoe Bonasera data->xc_a3 = arg3;
486a563a037Sbholler
4877c478bd9Sstevel@tonic-gate /*
488f34a7178SJoe Bonasera * Post messages to all CPUs involved that are CPU_READY
4897c478bd9Sstevel@tonic-gate */
490f34a7178SJoe Bonasera CPU->cpu_m.xc_wait_cnt = 0;
491a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) {
492f34a7178SJoe Bonasera if (!BT_TEST(set, c))
493f34a7178SJoe Bonasera continue;
494f34a7178SJoe Bonasera cpup = cpu[c];
495f34a7178SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
496a563a037Sbholler continue;
497a563a037Sbholler
498f34a7178SJoe Bonasera /*
499f34a7178SJoe Bonasera * Fill out a new message.
500f34a7178SJoe Bonasera */
501f34a7178SJoe Bonasera msg = xc_extract(&CPU->cpu_m.xc_free);
502f34a7178SJoe Bonasera if (msg == NULL)
503f34a7178SJoe Bonasera panic("Ran out of free xc_msg_t's");
504f34a7178SJoe Bonasera msg->xc_command = command;
505bf73eaa5SJoe Bonasera if (msg->xc_master != CPU->cpu_id)
506bf73eaa5SJoe Bonasera panic("msg %p has wrong xc_master", (void *)msg);
507f34a7178SJoe Bonasera msg->xc_slave = c;
508a563a037Sbholler
509f34a7178SJoe Bonasera /*
510f34a7178SJoe Bonasera * Increment my work count for all messages that I'll
511f34a7178SJoe Bonasera * transition from DONE to FREE.
512f34a7178SJoe Bonasera * Also remember how many XC_MSG_WAITINGs to look for
513f34a7178SJoe Bonasera */
514f34a7178SJoe Bonasera (void) xc_increment(&CPU->cpu_m);
515f34a7178SJoe Bonasera if (command == XC_MSG_SYNC)
516f34a7178SJoe Bonasera ++CPU->cpu_m.xc_wait_cnt;
517f34a7178SJoe Bonasera
518f34a7178SJoe Bonasera /*
519f34a7178SJoe Bonasera * Increment the target CPU work count then insert the message
520f34a7178SJoe Bonasera * in the target msgbox. If I post the first bit of work
521f34a7178SJoe Bonasera * for the target to do, send an IPI to the target CPU.
522f34a7178SJoe Bonasera */
523f34a7178SJoe Bonasera cnt = xc_increment(&cpup->cpu_m);
524f34a7178SJoe Bonasera xc_insert(&cpup->cpu_m.xc_msgbox, msg);
525f34a7178SJoe Bonasera if (cpup != CPU) {
526f34a7178SJoe Bonasera if (cnt == 0) {
527f34a7178SJoe Bonasera CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
528f34a7178SJoe Bonasera send_dirint(c, XC_HI_PIL);
529f34a7178SJoe Bonasera if (xc_collect_enable)
530f34a7178SJoe Bonasera ++xc_total_cnt;
531f34a7178SJoe Bonasera } else if (xc_collect_enable) {
532f34a7178SJoe Bonasera ++xc_multi_cnt;
533f34a7178SJoe Bonasera }
534f34a7178SJoe Bonasera }
5357c478bd9Sstevel@tonic-gate }
5367c478bd9Sstevel@tonic-gate
5377c478bd9Sstevel@tonic-gate /*
538f34a7178SJoe Bonasera * Now drop into the message handler until all work is done
5397c478bd9Sstevel@tonic-gate */
540f34a7178SJoe Bonasera (void) xc_serv(NULL, NULL);
541f34a7178SJoe Bonasera splx(save_spl);
542f34a7178SJoe Bonasera }
5437c478bd9Sstevel@tonic-gate
544f34a7178SJoe Bonasera /*
545f34a7178SJoe Bonasera * Push out a priority cross call.
546f34a7178SJoe Bonasera */
547f34a7178SJoe Bonasera static void
xc_priority_common(xc_func_t func,xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set)548f34a7178SJoe Bonasera xc_priority_common(
549f34a7178SJoe Bonasera xc_func_t func,
550f34a7178SJoe Bonasera xc_arg_t arg1,
551f34a7178SJoe Bonasera xc_arg_t arg2,
552f34a7178SJoe Bonasera xc_arg_t arg3,
553f34a7178SJoe Bonasera ulong_t *set)
554f34a7178SJoe Bonasera {
555f34a7178SJoe Bonasera int i;
556f34a7178SJoe Bonasera int c;
557f34a7178SJoe Bonasera struct cpu *cpup;
5587c478bd9Sstevel@tonic-gate
5597c478bd9Sstevel@tonic-gate /*
560c03aa626SJoe Bonasera * Wait briefly for any previous xc_priority to have finished.
5617c478bd9Sstevel@tonic-gate */
562a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) {
563c03aa626SJoe Bonasera cpup = cpu[c];
564c03aa626SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
565c03aa626SJoe Bonasera continue;
566c03aa626SJoe Bonasera
567c03aa626SJoe Bonasera /*
568c03aa626SJoe Bonasera * The value of 40000 here is from old kernel code. It
569c03aa626SJoe Bonasera * really should be changed to some time based value, since
570c03aa626SJoe Bonasera * under a hypervisor, there's no guarantee a remote CPU
571c03aa626SJoe Bonasera * is even scheduled.
572c03aa626SJoe Bonasera */
573c03aa626SJoe Bonasera for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i)
574c03aa626SJoe Bonasera SMT_PAUSE();
575c03aa626SJoe Bonasera
576c03aa626SJoe Bonasera /*
577c03aa626SJoe Bonasera * Some CPU did not respond to a previous priority request. It's
578c03aa626SJoe Bonasera * probably deadlocked with interrupts blocked or some such
579c03aa626SJoe Bonasera * problem. We'll just erase the previous request - which was
580c03aa626SJoe Bonasera * most likely a kmdb_enter that has already expired - and plow
581c03aa626SJoe Bonasera * ahead.
582c03aa626SJoe Bonasera */
583c03aa626SJoe Bonasera if (BT_TEST(xc_priority_set, c)) {
584*a8ea0c9dSJohn Levon BT_ATOMIC_CLEAR(xc_priority_set, c);
585c03aa626SJoe Bonasera if (cpup->cpu_m.xc_work_cnt > 0)
586c03aa626SJoe Bonasera xc_decrement(&cpup->cpu_m);
587c03aa626SJoe Bonasera }
5887c478bd9Sstevel@tonic-gate }
5897c478bd9Sstevel@tonic-gate
5907c478bd9Sstevel@tonic-gate /*
591f34a7178SJoe Bonasera * fill in cross call data
5927c478bd9Sstevel@tonic-gate */
593f34a7178SJoe Bonasera xc_priority_data.xc_func = func;
594f34a7178SJoe Bonasera xc_priority_data.xc_a1 = arg1;
595f34a7178SJoe Bonasera xc_priority_data.xc_a2 = arg2;
596f34a7178SJoe Bonasera xc_priority_data.xc_a3 = arg3;
5977c478bd9Sstevel@tonic-gate
5987c478bd9Sstevel@tonic-gate /*
599f34a7178SJoe Bonasera * Post messages to all CPUs involved that are CPU_READY
600f34a7178SJoe Bonasera * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
6017c478bd9Sstevel@tonic-gate */
602a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) {
603f34a7178SJoe Bonasera if (!BT_TEST(set, c))
604f34a7178SJoe Bonasera continue;
605f34a7178SJoe Bonasera cpup = cpu[c];
606f34a7178SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
607f34a7178SJoe Bonasera cpup == CPU)
608f34a7178SJoe Bonasera continue;
609f34a7178SJoe Bonasera (void) xc_increment(&cpup->cpu_m);
610*a8ea0c9dSJohn Levon BT_ATOMIC_SET(xc_priority_set, c);
611f34a7178SJoe Bonasera send_dirint(c, XC_HI_PIL);
612f34a7178SJoe Bonasera for (i = 0; i < 10; ++i) {
61375d94465SJosef 'Jeff' Sipek (void) atomic_cas_ptr(&cpup->cpu_m.xc_msgbox,
614f34a7178SJoe Bonasera cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
6157c478bd9Sstevel@tonic-gate }
6167c478bd9Sstevel@tonic-gate }
6177c478bd9Sstevel@tonic-gate }
6187c478bd9Sstevel@tonic-gate
6197c478bd9Sstevel@tonic-gate /*
620f34a7178SJoe Bonasera * Do cross call to all other CPUs with absolutely no waiting or handshaking.
621f34a7178SJoe Bonasera * This should only be used for extraordinary operations, like panic(), which
622f34a7178SJoe Bonasera * need to work, in some fashion, in a not completely functional system.
623f34a7178SJoe Bonasera * All other uses that want minimal waiting should use xc_call_nowait().
6247c478bd9Sstevel@tonic-gate */
6257c478bd9Sstevel@tonic-gate void
xc_priority(xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set,xc_func_t func)626f34a7178SJoe Bonasera xc_priority(
6277c478bd9Sstevel@tonic-gate xc_arg_t arg1,
6287c478bd9Sstevel@tonic-gate xc_arg_t arg2,
6297c478bd9Sstevel@tonic-gate xc_arg_t arg3,
630f34a7178SJoe Bonasera ulong_t *set,
6317c478bd9Sstevel@tonic-gate xc_func_t func)
6327c478bd9Sstevel@tonic-gate {
633f34a7178SJoe Bonasera extern int IGNORE_KERNEL_PREEMPTION;
634f34a7178SJoe Bonasera int save_spl = splr(ipltospl(XC_HI_PIL));
635f34a7178SJoe Bonasera int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
6367c478bd9Sstevel@tonic-gate
6377c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = 1;
638f34a7178SJoe Bonasera xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set);
6397c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
640f34a7178SJoe Bonasera splx(save_spl);
6417c478bd9Sstevel@tonic-gate }
6427c478bd9Sstevel@tonic-gate
6437c478bd9Sstevel@tonic-gate /*
644f34a7178SJoe Bonasera * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger.
6457c478bd9Sstevel@tonic-gate */
6467c478bd9Sstevel@tonic-gate void
kdi_xc_others(int this_cpu,void (* func)(void))6477c478bd9Sstevel@tonic-gate kdi_xc_others(int this_cpu, void (*func)(void))
6487c478bd9Sstevel@tonic-gate {
649f34a7178SJoe Bonasera extern int IGNORE_KERNEL_PREEMPTION;
6507c478bd9Sstevel@tonic-gate int save_kernel_preemption;
6517c478bd9Sstevel@tonic-gate cpuset_t set;
6527c478bd9Sstevel@tonic-gate
653ae115bc7Smrj if (!xc_initialized)
654ae115bc7Smrj return;
655ae115bc7Smrj
6567c478bd9Sstevel@tonic-gate save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
6577c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = 1;
658f34a7178SJoe Bonasera CPUSET_ALL_BUT(set, this_cpu);
659f34a7178SJoe Bonasera xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set));
6607c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
6617c478bd9Sstevel@tonic-gate }
662f34a7178SJoe Bonasera
663f34a7178SJoe Bonasera
664f34a7178SJoe Bonasera
665f34a7178SJoe Bonasera /*
666f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes may continue after
667f34a7178SJoe Bonasera * service with no waiting. xc_call_nowait() may return immediately too.
668f34a7178SJoe Bonasera */
669f34a7178SJoe Bonasera void
xc_call_nowait(xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set,xc_func_t func)670f34a7178SJoe Bonasera xc_call_nowait(
671f34a7178SJoe Bonasera xc_arg_t arg1,
672f34a7178SJoe Bonasera xc_arg_t arg2,
673f34a7178SJoe Bonasera xc_arg_t arg3,
674f34a7178SJoe Bonasera ulong_t *set,
675f34a7178SJoe Bonasera xc_func_t func)
676f34a7178SJoe Bonasera {
677f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC);
678f34a7178SJoe Bonasera }
679f34a7178SJoe Bonasera
680f34a7178SJoe Bonasera /*
681f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes may continue after
682f34a7178SJoe Bonasera * service with no waiting. xc_call() returns only after remotes have finished.
683f34a7178SJoe Bonasera */
684f34a7178SJoe Bonasera void
xc_call(xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set,xc_func_t func)685f34a7178SJoe Bonasera xc_call(
686f34a7178SJoe Bonasera xc_arg_t arg1,
687f34a7178SJoe Bonasera xc_arg_t arg2,
688f34a7178SJoe Bonasera xc_arg_t arg3,
689f34a7178SJoe Bonasera ulong_t *set,
690f34a7178SJoe Bonasera xc_func_t func)
691f34a7178SJoe Bonasera {
692f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL);
693f34a7178SJoe Bonasera }
694f34a7178SJoe Bonasera
695f34a7178SJoe Bonasera /*
696f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes wait until all have
697f34a7178SJoe Bonasera * finished. xc_sync() also waits until all remotes have finished.
698f34a7178SJoe Bonasera */
699f34a7178SJoe Bonasera void
xc_sync(xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3,ulong_t * set,xc_func_t func)700f34a7178SJoe Bonasera xc_sync(
701f34a7178SJoe Bonasera xc_arg_t arg1,
702f34a7178SJoe Bonasera xc_arg_t arg2,
703f34a7178SJoe Bonasera xc_arg_t arg3,
704f34a7178SJoe Bonasera ulong_t *set,
705f34a7178SJoe Bonasera xc_func_t func)
706f34a7178SJoe Bonasera {
707f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC);
708f34a7178SJoe Bonasera }
709