17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 541791439Sandrei * Common Development and Distribution License (the "License"). 641791439Sandrei * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22f34a7178SJoe Bonasera * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #include <sys/types.h> 277c478bd9Sstevel@tonic-gate #include <sys/param.h> 287c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 297c478bd9Sstevel@tonic-gate #include <sys/thread.h> 307c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 317c478bd9Sstevel@tonic-gate #include <sys/x_call.h> 32f34a7178SJoe Bonasera #include <sys/xc_levels.h> 337c478bd9Sstevel@tonic-gate #include <sys/cpu.h> 347c478bd9Sstevel@tonic-gate #include <sys/psw.h> 357c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 367c478bd9Sstevel@tonic-gate #include <sys/debug.h> 377c478bd9Sstevel@tonic-gate #include <sys/systm.h> 38ae115bc7Smrj #include <sys/archsystm.h> 397c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 407c478bd9Sstevel@tonic-gate #include <sys/mutex_impl.h> 41f34a7178SJoe Bonasera #include <sys/stack.h> 42f34a7178SJoe Bonasera #include <sys/promif.h> 43f34a7178SJoe Bonasera #include <sys/x86_archext.h> 44ae115bc7Smrj 457c478bd9Sstevel@tonic-gate /* 46f34a7178SJoe Bonasera * Implementation for cross-processor calls via interprocessor interrupts 47f34a7178SJoe Bonasera * 48f34a7178SJoe Bonasera * This implementation uses a message passing architecture to allow multiple 49f34a7178SJoe Bonasera * concurrent cross calls to be in flight at any given time. We use the cmpxchg 50f34a7178SJoe Bonasera * instruction, aka casptr(), to implement simple efficient work queues for 51f34a7178SJoe Bonasera * message passing between CPUs with almost no need for regular locking. 52f34a7178SJoe Bonasera * See xc_extract() and xc_insert() below. 53f34a7178SJoe Bonasera * 54f34a7178SJoe Bonasera * The general idea is that initiating a cross call means putting a message 55f34a7178SJoe Bonasera * on a target(s) CPU's work queue. Any synchronization is handled by passing 56f34a7178SJoe Bonasera * the message back and forth between initiator and target(s). 57f34a7178SJoe Bonasera * 58f34a7178SJoe Bonasera * Every CPU has xc_work_cnt, which indicates it has messages to process. 59f34a7178SJoe Bonasera * This value is incremented as message traffic is initiated and decremented 60f34a7178SJoe Bonasera * with every message that finishes all processing. 61f34a7178SJoe Bonasera * 62f34a7178SJoe Bonasera * The code needs no mfence or other membar_*() calls. The uses of 63f34a7178SJoe Bonasera * casptr(), cas32() and atomic_dec_32() for the message passing are 64f34a7178SJoe Bonasera * implemented with LOCK prefix instructions which are equivalent to mfence. 65f34a7178SJoe Bonasera * 66f34a7178SJoe Bonasera * One interesting aspect of this implmentation is that it allows 2 or more 67f34a7178SJoe Bonasera * CPUs to initiate cross calls to intersecting sets of CPUs at the same time. 68f34a7178SJoe Bonasera * The cross call processing by the CPUs will happen in any order with only 69f34a7178SJoe Bonasera * a guarantee, for xc_call() and xc_sync(), that an initiator won't return 70f34a7178SJoe Bonasera * from cross calls before all slaves have invoked the function. 71f34a7178SJoe Bonasera * 72f34a7178SJoe Bonasera * The reason for this asynchronous approach is to allow for fast global 73f34a7178SJoe Bonasera * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation 74f34a7178SJoe Bonasera * on a different Virtual Address at the same time. The old code required 75f34a7178SJoe Bonasera * N squared IPIs. With this method, depending on timing, it could happen 76f34a7178SJoe Bonasera * with just N IPIs. 777c478bd9Sstevel@tonic-gate */ 78ae115bc7Smrj 79f34a7178SJoe Bonasera /* 80f34a7178SJoe Bonasera * The default is to not enable collecting counts of IPI information, since 81f34a7178SJoe Bonasera * the updating of shared cachelines could cause excess bus traffic. 82f34a7178SJoe Bonasera */ 83f34a7178SJoe Bonasera uint_t xc_collect_enable = 0; 84f34a7178SJoe Bonasera uint64_t xc_total_cnt = 0; /* total #IPIs sent for cross calls */ 85f34a7178SJoe Bonasera uint64_t xc_multi_cnt = 0; /* # times we piggy backed on another IPI */ 867c478bd9Sstevel@tonic-gate 877c478bd9Sstevel@tonic-gate /* 88f34a7178SJoe Bonasera * Values for message states. Here are the normal transitions. A transition 89f34a7178SJoe Bonasera * of "->" happens in the slave cpu and "=>" happens in the master cpu as 90f34a7178SJoe Bonasera * the messages are passed back and forth. 917c478bd9Sstevel@tonic-gate * 92f34a7178SJoe Bonasera * FREE => ASYNC -> DONE => FREE 93f34a7178SJoe Bonasera * FREE => CALL -> DONE => FREE 94f34a7178SJoe Bonasera * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE 957c478bd9Sstevel@tonic-gate * 96f34a7178SJoe Bonasera * The interesing one above is ASYNC. You might ask, why not go directly 97f34a7178SJoe Bonasera * to FREE, instead of DONE. If it did that, it might be possible to exhaust 98f34a7178SJoe Bonasera * the master's xc_free list if a master can generate ASYNC messages faster 99f34a7178SJoe Bonasera * then the slave can process them. That could be handled with more complicated 100f34a7178SJoe Bonasera * handling. However since nothing important uses ASYNC, I've not bothered. 1017c478bd9Sstevel@tonic-gate */ 102f34a7178SJoe Bonasera #define XC_MSG_FREE (0) /* msg in xc_free queue */ 103f34a7178SJoe Bonasera #define XC_MSG_ASYNC (1) /* msg in slave xc_msgbox */ 104f34a7178SJoe Bonasera #define XC_MSG_CALL (2) /* msg in slave xc_msgbox */ 105f34a7178SJoe Bonasera #define XC_MSG_SYNC (3) /* msg in slave xc_msgbox */ 106f34a7178SJoe Bonasera #define XC_MSG_WAITING (4) /* msg in master xc_msgbox or xc_waiters */ 107f34a7178SJoe Bonasera #define XC_MSG_RELEASED (5) /* msg in slave xc_msgbox */ 108f34a7178SJoe Bonasera #define XC_MSG_DONE (6) /* msg in master xc_msgbox */ 1097c478bd9Sstevel@tonic-gate 110f34a7178SJoe Bonasera /* 111f34a7178SJoe Bonasera * We allow for one high priority message at a time to happen in the system. 112f34a7178SJoe Bonasera * This is used for panic, kmdb, etc., so no locking is done. 113f34a7178SJoe Bonasera */ 114c03aa626SJoe Bonasera static volatile cpuset_t xc_priority_set_store; 115c03aa626SJoe Bonasera static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store); 116f34a7178SJoe Bonasera static xc_data_t xc_priority_data; 1177c478bd9Sstevel@tonic-gate 118c03aa626SJoe Bonasera /* 119c03aa626SJoe Bonasera * Wrappers to avoid C compiler warnings due to volatile. The atomic bit 120c03aa626SJoe Bonasera * operations don't accept volatile bit vectors - which is a bit silly. 121c03aa626SJoe Bonasera */ 122c03aa626SJoe Bonasera #define XC_BT_SET(vector, b) BT_ATOMIC_SET((ulong_t *)(vector), (b)) 123c03aa626SJoe Bonasera #define XC_BT_CLEAR(vector, b) BT_ATOMIC_CLEAR((ulong_t *)(vector), (b)) 124c03aa626SJoe Bonasera 1257c478bd9Sstevel@tonic-gate /* 126f34a7178SJoe Bonasera * Decrement a CPU's work count 1277c478bd9Sstevel@tonic-gate */ 1287c478bd9Sstevel@tonic-gate static void 129f34a7178SJoe Bonasera xc_decrement(struct machcpu *mcpu) 1307c478bd9Sstevel@tonic-gate { 131f34a7178SJoe Bonasera atomic_dec_32(&mcpu->xc_work_cnt); 1327c478bd9Sstevel@tonic-gate } 1337c478bd9Sstevel@tonic-gate 1347c478bd9Sstevel@tonic-gate /* 135f34a7178SJoe Bonasera * Increment a CPU's work count and return the old value 1367c478bd9Sstevel@tonic-gate */ 137f34a7178SJoe Bonasera static int 138f34a7178SJoe Bonasera xc_increment(struct machcpu *mcpu) 1397c478bd9Sstevel@tonic-gate { 140f34a7178SJoe Bonasera int old; 141f34a7178SJoe Bonasera do { 142f34a7178SJoe Bonasera old = mcpu->xc_work_cnt; 143f34a7178SJoe Bonasera } while (cas32((uint32_t *)&mcpu->xc_work_cnt, old, old + 1) != old); 144f34a7178SJoe Bonasera return (old); 1457c478bd9Sstevel@tonic-gate } 1467c478bd9Sstevel@tonic-gate 1477c478bd9Sstevel@tonic-gate /* 148f34a7178SJoe Bonasera * Put a message into a queue. The insertion is atomic no matter 149f34a7178SJoe Bonasera * how many different inserts/extracts to the same queue happen. 1507c478bd9Sstevel@tonic-gate */ 151f34a7178SJoe Bonasera static void 152f34a7178SJoe Bonasera xc_insert(void *queue, xc_msg_t *msg) 1537c478bd9Sstevel@tonic-gate { 154f34a7178SJoe Bonasera xc_msg_t *old_head; 155*bf73eaa5SJoe Bonasera 156*bf73eaa5SJoe Bonasera /* 157*bf73eaa5SJoe Bonasera * FREE messages should only ever be getting inserted into 158*bf73eaa5SJoe Bonasera * the xc_master CPUs xc_free queue. 159*bf73eaa5SJoe Bonasera */ 160*bf73eaa5SJoe Bonasera ASSERT(msg->xc_command != XC_MSG_FREE || 161*bf73eaa5SJoe Bonasera cpu[msg->xc_master] == NULL || /* possible only during init */ 162*bf73eaa5SJoe Bonasera queue == &cpu[msg->xc_master]->cpu_m.xc_free); 163*bf73eaa5SJoe Bonasera 164f34a7178SJoe Bonasera do { 165f34a7178SJoe Bonasera old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue; 166f34a7178SJoe Bonasera msg->xc_next = old_head; 167f34a7178SJoe Bonasera } while (casptr(queue, old_head, msg) != old_head); 1687c478bd9Sstevel@tonic-gate } 1697c478bd9Sstevel@tonic-gate 1707c478bd9Sstevel@tonic-gate /* 171f34a7178SJoe Bonasera * Extract a message from a queue. The extraction is atomic only 172f34a7178SJoe Bonasera * when just one thread does extractions from the queue. 173f34a7178SJoe Bonasera * If the queue is empty, NULL is returned. 1747c478bd9Sstevel@tonic-gate */ 175f34a7178SJoe Bonasera static xc_msg_t * 176f34a7178SJoe Bonasera xc_extract(xc_msg_t **queue) 177f34a7178SJoe Bonasera { 178f34a7178SJoe Bonasera xc_msg_t *old_head; 179f34a7178SJoe Bonasera 180f34a7178SJoe Bonasera do { 181f34a7178SJoe Bonasera old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue; 182f34a7178SJoe Bonasera if (old_head == NULL) 183f34a7178SJoe Bonasera return (old_head); 184f34a7178SJoe Bonasera } while (casptr(queue, old_head, old_head->xc_next) != old_head); 185f34a7178SJoe Bonasera old_head->xc_next = NULL; 186f34a7178SJoe Bonasera return (old_head); 187f34a7178SJoe Bonasera } 188f34a7178SJoe Bonasera 1897c478bd9Sstevel@tonic-gate 1907c478bd9Sstevel@tonic-gate /* 191f34a7178SJoe Bonasera * Initialize the machcpu fields used for cross calls 1927c478bd9Sstevel@tonic-gate */ 193f34a7178SJoe Bonasera static uint_t xc_initialized = 0; 1947c478bd9Sstevel@tonic-gate void 195f34a7178SJoe Bonasera xc_init_cpu(struct cpu *cpup) 1967c478bd9Sstevel@tonic-gate { 197f34a7178SJoe Bonasera xc_msg_t *msg; 198f34a7178SJoe Bonasera int c; 1997c478bd9Sstevel@tonic-gate 2007c478bd9Sstevel@tonic-gate /* 201f34a7178SJoe Bonasera * add a new msg to each existing CPU's free list, as well as one for 202*bf73eaa5SJoe Bonasera * my list for each of them. ncpus has an inconsistent value when this 203*bf73eaa5SJoe Bonasera * function is called, so use cpup->cpu_id. 2047c478bd9Sstevel@tonic-gate */ 205*bf73eaa5SJoe Bonasera for (c = 0; c < cpup->cpu_id; ++c) { 206f34a7178SJoe Bonasera if (cpu[c] == NULL) 207f34a7178SJoe Bonasera continue; 208f34a7178SJoe Bonasera msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 209f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE; 210*bf73eaa5SJoe Bonasera msg->xc_master = c; 211f34a7178SJoe Bonasera xc_insert(&cpu[c]->cpu_m.xc_free, msg); 2127c478bd9Sstevel@tonic-gate 213f34a7178SJoe Bonasera msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 214f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE; 215*bf73eaa5SJoe Bonasera msg->xc_master = cpup->cpu_id; 216f34a7178SJoe Bonasera xc_insert(&cpup->cpu_m.xc_free, msg); 217f34a7178SJoe Bonasera } 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate /* 220f34a7178SJoe Bonasera * Add one for self messages 221a563a037Sbholler */ 222f34a7178SJoe Bonasera msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 223f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE; 224*bf73eaa5SJoe Bonasera msg->xc_master = cpup->cpu_id; 225f34a7178SJoe Bonasera xc_insert(&cpup->cpu_m.xc_free, msg); 226a563a037Sbholler 227f34a7178SJoe Bonasera if (!xc_initialized) 228f34a7178SJoe Bonasera xc_initialized = 1; 229f34a7178SJoe Bonasera } 2307c478bd9Sstevel@tonic-gate 231f34a7178SJoe Bonasera /* 232f34a7178SJoe Bonasera * X-call message processing routine. Note that this is used by both 233f34a7178SJoe Bonasera * senders and recipients of messages. 234f34a7178SJoe Bonasera * 235f34a7178SJoe Bonasera * We're protected against changing CPUs by either being in a high-priority 236f34a7178SJoe Bonasera * interrupt, having preemption disabled or by having a raised SPL. 237f34a7178SJoe Bonasera */ 238f34a7178SJoe Bonasera /*ARGSUSED*/ 239f34a7178SJoe Bonasera uint_t 240f34a7178SJoe Bonasera xc_serv(caddr_t arg1, caddr_t arg2) 241f34a7178SJoe Bonasera { 242f34a7178SJoe Bonasera struct machcpu *mcpup = &(CPU->cpu_m); 243f34a7178SJoe Bonasera xc_msg_t *msg; 244f34a7178SJoe Bonasera xc_data_t *data; 245f34a7178SJoe Bonasera xc_msg_t *xc_waiters = NULL; 246f34a7178SJoe Bonasera uint32_t num_waiting = 0; 247f34a7178SJoe Bonasera xc_func_t func; 248f34a7178SJoe Bonasera xc_arg_t a1; 249f34a7178SJoe Bonasera xc_arg_t a2; 250f34a7178SJoe Bonasera xc_arg_t a3; 251f34a7178SJoe Bonasera uint_t rc = DDI_INTR_UNCLAIMED; 252f34a7178SJoe Bonasera 253f34a7178SJoe Bonasera while (mcpup->xc_work_cnt != 0) { 254f34a7178SJoe Bonasera rc = DDI_INTR_CLAIMED; 2557c478bd9Sstevel@tonic-gate 256f34a7178SJoe Bonasera /* 257f34a7178SJoe Bonasera * We may have to wait for a message to arrive. 258f34a7178SJoe Bonasera */ 259*bf73eaa5SJoe Bonasera for (msg = NULL; msg == NULL; 260*bf73eaa5SJoe Bonasera msg = xc_extract(&mcpup->xc_msgbox)) { 261*bf73eaa5SJoe Bonasera 2627c478bd9Sstevel@tonic-gate /* 263c03aa626SJoe Bonasera * Alway check for and handle a priority message. 2647c478bd9Sstevel@tonic-gate */ 265c03aa626SJoe Bonasera if (BT_TEST(xc_priority_set, CPU->cpu_id)) { 266f34a7178SJoe Bonasera func = xc_priority_data.xc_func; 267f34a7178SJoe Bonasera a1 = xc_priority_data.xc_a1; 268f34a7178SJoe Bonasera a2 = xc_priority_data.xc_a2; 269f34a7178SJoe Bonasera a3 = xc_priority_data.xc_a3; 270c03aa626SJoe Bonasera XC_BT_CLEAR(xc_priority_set, CPU->cpu_id); 271f34a7178SJoe Bonasera xc_decrement(mcpup); 272f34a7178SJoe Bonasera func(a1, a2, a3); 273f34a7178SJoe Bonasera if (mcpup->xc_work_cnt == 0) 274f34a7178SJoe Bonasera return (rc); 275f34a7178SJoe Bonasera } 2767c478bd9Sstevel@tonic-gate 277f34a7178SJoe Bonasera /* 278f34a7178SJoe Bonasera * wait for a message to arrive 279f34a7178SJoe Bonasera */ 280*bf73eaa5SJoe Bonasera SMT_PAUSE(); 2817c478bd9Sstevel@tonic-gate } 282f34a7178SJoe Bonasera 283f34a7178SJoe Bonasera 284f34a7178SJoe Bonasera /* 285f34a7178SJoe Bonasera * process the message 286f34a7178SJoe Bonasera */ 287f34a7178SJoe Bonasera switch (msg->xc_command) { 288f34a7178SJoe Bonasera 289f34a7178SJoe Bonasera /* 290f34a7178SJoe Bonasera * ASYNC gives back the message immediately, then we do the 291f34a7178SJoe Bonasera * function and return with no more waiting. 292f34a7178SJoe Bonasera */ 293f34a7178SJoe Bonasera case XC_MSG_ASYNC: 294f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data; 295f34a7178SJoe Bonasera func = data->xc_func; 296f34a7178SJoe Bonasera a1 = data->xc_a1; 297f34a7178SJoe Bonasera a2 = data->xc_a2; 298f34a7178SJoe Bonasera a3 = data->xc_a3; 299f34a7178SJoe Bonasera msg->xc_command = XC_MSG_DONE; 300f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg); 301f34a7178SJoe Bonasera if (func != NULL) 302f34a7178SJoe Bonasera (void) (*func)(a1, a2, a3); 303f34a7178SJoe Bonasera xc_decrement(mcpup); 3047c478bd9Sstevel@tonic-gate break; 3057c478bd9Sstevel@tonic-gate 306f34a7178SJoe Bonasera /* 307f34a7178SJoe Bonasera * SYNC messages do the call, then send it back to the master 308f34a7178SJoe Bonasera * in WAITING mode 309f34a7178SJoe Bonasera */ 310f34a7178SJoe Bonasera case XC_MSG_SYNC: 311f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data; 312f34a7178SJoe Bonasera if (data->xc_func != NULL) 313f34a7178SJoe Bonasera (void) (*data->xc_func)(data->xc_a1, 314f34a7178SJoe Bonasera data->xc_a2, data->xc_a3); 315f34a7178SJoe Bonasera msg->xc_command = XC_MSG_WAITING; 316f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg); 317f34a7178SJoe Bonasera break; 3187c478bd9Sstevel@tonic-gate 319f34a7178SJoe Bonasera /* 320f34a7178SJoe Bonasera * WAITING messsages are collected by the master until all 321f34a7178SJoe Bonasera * have arrived. Once all arrive, we release them back to 322f34a7178SJoe Bonasera * the slaves 323f34a7178SJoe Bonasera */ 324f34a7178SJoe Bonasera case XC_MSG_WAITING: 325f34a7178SJoe Bonasera xc_insert(&xc_waiters, msg); 326f34a7178SJoe Bonasera if (++num_waiting < mcpup->xc_wait_cnt) 327f34a7178SJoe Bonasera break; 328f34a7178SJoe Bonasera while ((msg = xc_extract(&xc_waiters)) != NULL) { 329f34a7178SJoe Bonasera msg->xc_command = XC_MSG_RELEASED; 330f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox, 331f34a7178SJoe Bonasera msg); 332f34a7178SJoe Bonasera --num_waiting; 333f34a7178SJoe Bonasera } 334f34a7178SJoe Bonasera if (num_waiting != 0) 335f34a7178SJoe Bonasera panic("wrong number waiting"); 336f34a7178SJoe Bonasera mcpup->xc_wait_cnt = 0; 337f34a7178SJoe Bonasera break; 3387c478bd9Sstevel@tonic-gate 339f34a7178SJoe Bonasera /* 340f34a7178SJoe Bonasera * CALL messages do the function and then, like RELEASE, 341f34a7178SJoe Bonasera * send the message is back to master as DONE. 342f34a7178SJoe Bonasera */ 343f34a7178SJoe Bonasera case XC_MSG_CALL: 344f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data; 345f34a7178SJoe Bonasera if (data->xc_func != NULL) 346f34a7178SJoe Bonasera (void) (*data->xc_func)(data->xc_a1, 347f34a7178SJoe Bonasera data->xc_a2, data->xc_a3); 348f34a7178SJoe Bonasera /*FALLTHROUGH*/ 349f34a7178SJoe Bonasera case XC_MSG_RELEASED: 350f34a7178SJoe Bonasera msg->xc_command = XC_MSG_DONE; 351f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg); 352f34a7178SJoe Bonasera xc_decrement(mcpup); 353f34a7178SJoe Bonasera break; 3547c478bd9Sstevel@tonic-gate 355f34a7178SJoe Bonasera /* 356f34a7178SJoe Bonasera * DONE means a slave has completely finished up. 357f34a7178SJoe Bonasera * Once we collect all the DONE messages, we'll exit 358f34a7178SJoe Bonasera * processing too. 359f34a7178SJoe Bonasera */ 360f34a7178SJoe Bonasera case XC_MSG_DONE: 361f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE; 362f34a7178SJoe Bonasera xc_insert(&mcpup->xc_free, msg); 363f34a7178SJoe Bonasera xc_decrement(mcpup); 3647c478bd9Sstevel@tonic-gate break; 3657c478bd9Sstevel@tonic-gate 366f34a7178SJoe Bonasera case XC_MSG_FREE: 367*bf73eaa5SJoe Bonasera panic("free message 0x%p in msgbox", (void *)msg); 368f34a7178SJoe Bonasera break; 369f34a7178SJoe Bonasera 370f34a7178SJoe Bonasera default: 371*bf73eaa5SJoe Bonasera panic("bad message 0x%p in msgbox", (void *)msg); 372f34a7178SJoe Bonasera break; 373f34a7178SJoe Bonasera } 374f34a7178SJoe Bonasera } 375f34a7178SJoe Bonasera return (rc); 3767c478bd9Sstevel@tonic-gate } 3777c478bd9Sstevel@tonic-gate 3787c478bd9Sstevel@tonic-gate /* 379f34a7178SJoe Bonasera * Initiate cross call processing. 3807c478bd9Sstevel@tonic-gate */ 3817c478bd9Sstevel@tonic-gate static void 3827c478bd9Sstevel@tonic-gate xc_common( 3837c478bd9Sstevel@tonic-gate xc_func_t func, 3847c478bd9Sstevel@tonic-gate xc_arg_t arg1, 3857c478bd9Sstevel@tonic-gate xc_arg_t arg2, 3867c478bd9Sstevel@tonic-gate xc_arg_t arg3, 387f34a7178SJoe Bonasera ulong_t *set, 388f34a7178SJoe Bonasera uint_t command) 3897c478bd9Sstevel@tonic-gate { 390f34a7178SJoe Bonasera int c; 3917c478bd9Sstevel@tonic-gate struct cpu *cpup; 392f34a7178SJoe Bonasera xc_msg_t *msg; 393f34a7178SJoe Bonasera xc_data_t *data; 394f34a7178SJoe Bonasera int cnt; 395f34a7178SJoe Bonasera int save_spl; 396f34a7178SJoe Bonasera 397f34a7178SJoe Bonasera if (!xc_initialized) { 398f34a7178SJoe Bonasera if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) && 399f34a7178SJoe Bonasera func != NULL) 400f34a7178SJoe Bonasera (void) (*func)(arg1, arg2, arg3); 401f34a7178SJoe Bonasera return; 402f34a7178SJoe Bonasera } 4037c478bd9Sstevel@tonic-gate 404f34a7178SJoe Bonasera save_spl = splr(ipltospl(XC_HI_PIL)); 4057c478bd9Sstevel@tonic-gate 4067c478bd9Sstevel@tonic-gate /* 407f34a7178SJoe Bonasera * fill in cross call data 4087c478bd9Sstevel@tonic-gate */ 409f34a7178SJoe Bonasera data = &CPU->cpu_m.xc_data; 410f34a7178SJoe Bonasera data->xc_func = func; 411f34a7178SJoe Bonasera data->xc_a1 = arg1; 412f34a7178SJoe Bonasera data->xc_a2 = arg2; 413f34a7178SJoe Bonasera data->xc_a3 = arg3; 414a563a037Sbholler 4157c478bd9Sstevel@tonic-gate /* 416f34a7178SJoe Bonasera * Post messages to all CPUs involved that are CPU_READY 4177c478bd9Sstevel@tonic-gate */ 418f34a7178SJoe Bonasera CPU->cpu_m.xc_wait_cnt = 0; 419f34a7178SJoe Bonasera for (c = 0; c < ncpus; ++c) { 420f34a7178SJoe Bonasera if (!BT_TEST(set, c)) 421f34a7178SJoe Bonasera continue; 422f34a7178SJoe Bonasera cpup = cpu[c]; 423f34a7178SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY)) 424a563a037Sbholler continue; 425a563a037Sbholler 426f34a7178SJoe Bonasera /* 427f34a7178SJoe Bonasera * Fill out a new message. 428f34a7178SJoe Bonasera */ 429f34a7178SJoe Bonasera msg = xc_extract(&CPU->cpu_m.xc_free); 430f34a7178SJoe Bonasera if (msg == NULL) 431f34a7178SJoe Bonasera panic("Ran out of free xc_msg_t's"); 432f34a7178SJoe Bonasera msg->xc_command = command; 433*bf73eaa5SJoe Bonasera if (msg->xc_master != CPU->cpu_id) 434*bf73eaa5SJoe Bonasera panic("msg %p has wrong xc_master", (void *)msg); 435f34a7178SJoe Bonasera msg->xc_slave = c; 436a563a037Sbholler 437f34a7178SJoe Bonasera /* 438f34a7178SJoe Bonasera * Increment my work count for all messages that I'll 439f34a7178SJoe Bonasera * transition from DONE to FREE. 440f34a7178SJoe Bonasera * Also remember how many XC_MSG_WAITINGs to look for 441f34a7178SJoe Bonasera */ 442f34a7178SJoe Bonasera (void) xc_increment(&CPU->cpu_m); 443f34a7178SJoe Bonasera if (command == XC_MSG_SYNC) 444f34a7178SJoe Bonasera ++CPU->cpu_m.xc_wait_cnt; 445f34a7178SJoe Bonasera 446f34a7178SJoe Bonasera /* 447f34a7178SJoe Bonasera * Increment the target CPU work count then insert the message 448f34a7178SJoe Bonasera * in the target msgbox. If I post the first bit of work 449f34a7178SJoe Bonasera * for the target to do, send an IPI to the target CPU. 450f34a7178SJoe Bonasera */ 451f34a7178SJoe Bonasera cnt = xc_increment(&cpup->cpu_m); 452f34a7178SJoe Bonasera xc_insert(&cpup->cpu_m.xc_msgbox, msg); 453f34a7178SJoe Bonasera if (cpup != CPU) { 454f34a7178SJoe Bonasera if (cnt == 0) { 455f34a7178SJoe Bonasera CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 456f34a7178SJoe Bonasera send_dirint(c, XC_HI_PIL); 457f34a7178SJoe Bonasera if (xc_collect_enable) 458f34a7178SJoe Bonasera ++xc_total_cnt; 459f34a7178SJoe Bonasera } else if (xc_collect_enable) { 460f34a7178SJoe Bonasera ++xc_multi_cnt; 461f34a7178SJoe Bonasera } 462f34a7178SJoe Bonasera } 4637c478bd9Sstevel@tonic-gate } 4647c478bd9Sstevel@tonic-gate 4657c478bd9Sstevel@tonic-gate /* 466f34a7178SJoe Bonasera * Now drop into the message handler until all work is done 4677c478bd9Sstevel@tonic-gate */ 468f34a7178SJoe Bonasera (void) xc_serv(NULL, NULL); 469f34a7178SJoe Bonasera splx(save_spl); 470f34a7178SJoe Bonasera } 4717c478bd9Sstevel@tonic-gate 472f34a7178SJoe Bonasera /* 473f34a7178SJoe Bonasera * Push out a priority cross call. 474f34a7178SJoe Bonasera */ 475f34a7178SJoe Bonasera static void 476f34a7178SJoe Bonasera xc_priority_common( 477f34a7178SJoe Bonasera xc_func_t func, 478f34a7178SJoe Bonasera xc_arg_t arg1, 479f34a7178SJoe Bonasera xc_arg_t arg2, 480f34a7178SJoe Bonasera xc_arg_t arg3, 481f34a7178SJoe Bonasera ulong_t *set) 482f34a7178SJoe Bonasera { 483f34a7178SJoe Bonasera int i; 484f34a7178SJoe Bonasera int c; 485f34a7178SJoe Bonasera struct cpu *cpup; 4867c478bd9Sstevel@tonic-gate 4877c478bd9Sstevel@tonic-gate /* 488c03aa626SJoe Bonasera * Wait briefly for any previous xc_priority to have finished. 4897c478bd9Sstevel@tonic-gate */ 490c03aa626SJoe Bonasera for (c = 0; c < ncpus; ++c) { 491c03aa626SJoe Bonasera cpup = cpu[c]; 492c03aa626SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY)) 493c03aa626SJoe Bonasera continue; 494c03aa626SJoe Bonasera 495c03aa626SJoe Bonasera /* 496c03aa626SJoe Bonasera * The value of 40000 here is from old kernel code. It 497c03aa626SJoe Bonasera * really should be changed to some time based value, since 498c03aa626SJoe Bonasera * under a hypervisor, there's no guarantee a remote CPU 499c03aa626SJoe Bonasera * is even scheduled. 500c03aa626SJoe Bonasera */ 501c03aa626SJoe Bonasera for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i) 502c03aa626SJoe Bonasera SMT_PAUSE(); 503c03aa626SJoe Bonasera 504c03aa626SJoe Bonasera /* 505c03aa626SJoe Bonasera * Some CPU did not respond to a previous priority request. It's 506c03aa626SJoe Bonasera * probably deadlocked with interrupts blocked or some such 507c03aa626SJoe Bonasera * problem. We'll just erase the previous request - which was 508c03aa626SJoe Bonasera * most likely a kmdb_enter that has already expired - and plow 509c03aa626SJoe Bonasera * ahead. 510c03aa626SJoe Bonasera */ 511c03aa626SJoe Bonasera if (BT_TEST(xc_priority_set, c)) { 512c03aa626SJoe Bonasera XC_BT_CLEAR(xc_priority_set, c); 513c03aa626SJoe Bonasera if (cpup->cpu_m.xc_work_cnt > 0) 514c03aa626SJoe Bonasera xc_decrement(&cpup->cpu_m); 515c03aa626SJoe Bonasera } 5167c478bd9Sstevel@tonic-gate } 5177c478bd9Sstevel@tonic-gate 5187c478bd9Sstevel@tonic-gate /* 519f34a7178SJoe Bonasera * fill in cross call data 5207c478bd9Sstevel@tonic-gate */ 521f34a7178SJoe Bonasera xc_priority_data.xc_func = func; 522f34a7178SJoe Bonasera xc_priority_data.xc_a1 = arg1; 523f34a7178SJoe Bonasera xc_priority_data.xc_a2 = arg2; 524f34a7178SJoe Bonasera xc_priority_data.xc_a3 = arg3; 5257c478bd9Sstevel@tonic-gate 5267c478bd9Sstevel@tonic-gate /* 527f34a7178SJoe Bonasera * Post messages to all CPUs involved that are CPU_READY 528f34a7178SJoe Bonasera * We'll always IPI, plus bang on the xc_msgbox for i86_mwait() 5297c478bd9Sstevel@tonic-gate */ 530f34a7178SJoe Bonasera for (c = 0; c < ncpus; ++c) { 531f34a7178SJoe Bonasera if (!BT_TEST(set, c)) 532f34a7178SJoe Bonasera continue; 533f34a7178SJoe Bonasera cpup = cpu[c]; 534f34a7178SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) || 535f34a7178SJoe Bonasera cpup == CPU) 536f34a7178SJoe Bonasera continue; 537f34a7178SJoe Bonasera (void) xc_increment(&cpup->cpu_m); 538c03aa626SJoe Bonasera XC_BT_SET(xc_priority_set, c); 539f34a7178SJoe Bonasera send_dirint(c, XC_HI_PIL); 540f34a7178SJoe Bonasera for (i = 0; i < 10; ++i) { 541f34a7178SJoe Bonasera (void) casptr(&cpup->cpu_m.xc_msgbox, 542f34a7178SJoe Bonasera cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox); 5437c478bd9Sstevel@tonic-gate } 5447c478bd9Sstevel@tonic-gate } 5457c478bd9Sstevel@tonic-gate } 5467c478bd9Sstevel@tonic-gate 5477c478bd9Sstevel@tonic-gate /* 548f34a7178SJoe Bonasera * Do cross call to all other CPUs with absolutely no waiting or handshaking. 549f34a7178SJoe Bonasera * This should only be used for extraordinary operations, like panic(), which 550f34a7178SJoe Bonasera * need to work, in some fashion, in a not completely functional system. 551f34a7178SJoe Bonasera * All other uses that want minimal waiting should use xc_call_nowait(). 5527c478bd9Sstevel@tonic-gate */ 5537c478bd9Sstevel@tonic-gate void 554f34a7178SJoe Bonasera xc_priority( 5557c478bd9Sstevel@tonic-gate xc_arg_t arg1, 5567c478bd9Sstevel@tonic-gate xc_arg_t arg2, 5577c478bd9Sstevel@tonic-gate xc_arg_t arg3, 558f34a7178SJoe Bonasera ulong_t *set, 5597c478bd9Sstevel@tonic-gate xc_func_t func) 5607c478bd9Sstevel@tonic-gate { 561f34a7178SJoe Bonasera extern int IGNORE_KERNEL_PREEMPTION; 562f34a7178SJoe Bonasera int save_spl = splr(ipltospl(XC_HI_PIL)); 563f34a7178SJoe Bonasera int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 5647c478bd9Sstevel@tonic-gate 5657c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = 1; 566f34a7178SJoe Bonasera xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set); 5677c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 568f34a7178SJoe Bonasera splx(save_spl); 5697c478bd9Sstevel@tonic-gate } 5707c478bd9Sstevel@tonic-gate 5717c478bd9Sstevel@tonic-gate /* 572f34a7178SJoe Bonasera * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger. 5737c478bd9Sstevel@tonic-gate */ 5747c478bd9Sstevel@tonic-gate void 5757c478bd9Sstevel@tonic-gate kdi_xc_others(int this_cpu, void (*func)(void)) 5767c478bd9Sstevel@tonic-gate { 577f34a7178SJoe Bonasera extern int IGNORE_KERNEL_PREEMPTION; 5787c478bd9Sstevel@tonic-gate int save_kernel_preemption; 5797c478bd9Sstevel@tonic-gate cpuset_t set; 5807c478bd9Sstevel@tonic-gate 581ae115bc7Smrj if (!xc_initialized) 582ae115bc7Smrj return; 583ae115bc7Smrj 5847c478bd9Sstevel@tonic-gate save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 5857c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = 1; 586f34a7178SJoe Bonasera CPUSET_ALL_BUT(set, this_cpu); 587f34a7178SJoe Bonasera xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set)); 5887c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 5897c478bd9Sstevel@tonic-gate } 590f34a7178SJoe Bonasera 591f34a7178SJoe Bonasera 592f34a7178SJoe Bonasera 593f34a7178SJoe Bonasera /* 594f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes may continue after 595f34a7178SJoe Bonasera * service with no waiting. xc_call_nowait() may return immediately too. 596f34a7178SJoe Bonasera */ 597f34a7178SJoe Bonasera void 598f34a7178SJoe Bonasera xc_call_nowait( 599f34a7178SJoe Bonasera xc_arg_t arg1, 600f34a7178SJoe Bonasera xc_arg_t arg2, 601f34a7178SJoe Bonasera xc_arg_t arg3, 602f34a7178SJoe Bonasera ulong_t *set, 603f34a7178SJoe Bonasera xc_func_t func) 604f34a7178SJoe Bonasera { 605f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC); 606f34a7178SJoe Bonasera } 607f34a7178SJoe Bonasera 608f34a7178SJoe Bonasera /* 609f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes may continue after 610f34a7178SJoe Bonasera * service with no waiting. xc_call() returns only after remotes have finished. 611f34a7178SJoe Bonasera */ 612f34a7178SJoe Bonasera void 613f34a7178SJoe Bonasera xc_call( 614f34a7178SJoe Bonasera xc_arg_t arg1, 615f34a7178SJoe Bonasera xc_arg_t arg2, 616f34a7178SJoe Bonasera xc_arg_t arg3, 617f34a7178SJoe Bonasera ulong_t *set, 618f34a7178SJoe Bonasera xc_func_t func) 619f34a7178SJoe Bonasera { 620f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL); 621f34a7178SJoe Bonasera } 622f34a7178SJoe Bonasera 623f34a7178SJoe Bonasera /* 624f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes wait until all have 625f34a7178SJoe Bonasera * finished. xc_sync() also waits until all remotes have finished. 626f34a7178SJoe Bonasera */ 627f34a7178SJoe Bonasera void 628f34a7178SJoe Bonasera xc_sync( 629f34a7178SJoe Bonasera xc_arg_t arg1, 630f34a7178SJoe Bonasera xc_arg_t arg2, 631f34a7178SJoe Bonasera xc_arg_t arg3, 632f34a7178SJoe Bonasera ulong_t *set, 633f34a7178SJoe Bonasera xc_func_t func) 634f34a7178SJoe Bonasera { 635f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC); 636f34a7178SJoe Bonasera } 637