17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 541791439Sandrei * Common Development and Distribution License (the "License"). 641791439Sandrei * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22f34a7178SJoe Bonasera * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 25a3114836SGerry Liu /* 26a3114836SGerry Liu * Copyright (c) 2010, Intel Corporation. 27a3114836SGerry Liu * All rights reserved. 28a3114836SGerry Liu */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate #include <sys/types.h> 317c478bd9Sstevel@tonic-gate #include <sys/param.h> 327c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 337c478bd9Sstevel@tonic-gate #include <sys/thread.h> 347c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 357c478bd9Sstevel@tonic-gate #include <sys/x_call.h> 36f34a7178SJoe Bonasera #include <sys/xc_levels.h> 377c478bd9Sstevel@tonic-gate #include <sys/cpu.h> 387c478bd9Sstevel@tonic-gate #include <sys/psw.h> 397c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 407c478bd9Sstevel@tonic-gate #include <sys/debug.h> 417c478bd9Sstevel@tonic-gate #include <sys/systm.h> 42ae115bc7Smrj #include <sys/archsystm.h> 437c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 447c478bd9Sstevel@tonic-gate #include <sys/mutex_impl.h> 45f34a7178SJoe Bonasera #include <sys/stack.h> 46f34a7178SJoe Bonasera #include <sys/promif.h> 47f34a7178SJoe Bonasera #include <sys/x86_archext.h> 48ae115bc7Smrj 497c478bd9Sstevel@tonic-gate /* 50f34a7178SJoe Bonasera * Implementation for cross-processor calls via interprocessor interrupts 51f34a7178SJoe Bonasera * 52f34a7178SJoe Bonasera * This implementation uses a message passing architecture to allow multiple 53f34a7178SJoe Bonasera * concurrent cross calls to be in flight at any given time. We use the cmpxchg 5475d94465SJosef 'Jeff' Sipek * instruction, aka atomic_cas_ptr(), to implement simple efficient work 5575d94465SJosef 'Jeff' Sipek * queues for message passing between CPUs with almost no need for regular 5675d94465SJosef 'Jeff' Sipek * locking. See xc_extract() and xc_insert() below. 57f34a7178SJoe Bonasera * 58f34a7178SJoe Bonasera * The general idea is that initiating a cross call means putting a message 59f34a7178SJoe Bonasera * on a target(s) CPU's work queue. Any synchronization is handled by passing 60f34a7178SJoe Bonasera * the message back and forth between initiator and target(s). 61f34a7178SJoe Bonasera * 62f34a7178SJoe Bonasera * Every CPU has xc_work_cnt, which indicates it has messages to process. 63f34a7178SJoe Bonasera * This value is incremented as message traffic is initiated and decremented 64f34a7178SJoe Bonasera * with every message that finishes all processing. 65f34a7178SJoe Bonasera * 66f34a7178SJoe Bonasera * The code needs no mfence or other membar_*() calls. The uses of 6775d94465SJosef 'Jeff' Sipek * atomic_cas_ptr(), atomic_cas_32() and atomic_dec_32() for the message 6875d94465SJosef 'Jeff' Sipek * passing are implemented with LOCK prefix instructions which are 6975d94465SJosef 'Jeff' Sipek * equivalent to mfence. 70f34a7178SJoe Bonasera * 71f34a7178SJoe Bonasera * One interesting aspect of this implmentation is that it allows 2 or more 72f34a7178SJoe Bonasera * CPUs to initiate cross calls to intersecting sets of CPUs at the same time. 73f34a7178SJoe Bonasera * The cross call processing by the CPUs will happen in any order with only 74f34a7178SJoe Bonasera * a guarantee, for xc_call() and xc_sync(), that an initiator won't return 75f34a7178SJoe Bonasera * from cross calls before all slaves have invoked the function. 76f34a7178SJoe Bonasera * 77f34a7178SJoe Bonasera * The reason for this asynchronous approach is to allow for fast global 78f34a7178SJoe Bonasera * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation 79f34a7178SJoe Bonasera * on a different Virtual Address at the same time. The old code required 80f34a7178SJoe Bonasera * N squared IPIs. With this method, depending on timing, it could happen 81f34a7178SJoe Bonasera * with just N IPIs. 827c478bd9Sstevel@tonic-gate */ 83ae115bc7Smrj 84f34a7178SJoe Bonasera /* 85f34a7178SJoe Bonasera * The default is to not enable collecting counts of IPI information, since 86f34a7178SJoe Bonasera * the updating of shared cachelines could cause excess bus traffic. 87f34a7178SJoe Bonasera */ 88f34a7178SJoe Bonasera uint_t xc_collect_enable = 0; 89f34a7178SJoe Bonasera uint64_t xc_total_cnt = 0; /* total #IPIs sent for cross calls */ 90f34a7178SJoe Bonasera uint64_t xc_multi_cnt = 0; /* # times we piggy backed on another IPI */ 917c478bd9Sstevel@tonic-gate 927c478bd9Sstevel@tonic-gate /* 93f34a7178SJoe Bonasera * Values for message states. Here are the normal transitions. A transition 94f34a7178SJoe Bonasera * of "->" happens in the slave cpu and "=>" happens in the master cpu as 95f34a7178SJoe Bonasera * the messages are passed back and forth. 967c478bd9Sstevel@tonic-gate * 97f34a7178SJoe Bonasera * FREE => ASYNC -> DONE => FREE 98f34a7178SJoe Bonasera * FREE => CALL -> DONE => FREE 99f34a7178SJoe Bonasera * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE 1007c478bd9Sstevel@tonic-gate * 101f34a7178SJoe Bonasera * The interesing one above is ASYNC. You might ask, why not go directly 102f34a7178SJoe Bonasera * to FREE, instead of DONE. If it did that, it might be possible to exhaust 103f34a7178SJoe Bonasera * the master's xc_free list if a master can generate ASYNC messages faster 104f34a7178SJoe Bonasera * then the slave can process them. That could be handled with more complicated 105f34a7178SJoe Bonasera * handling. However since nothing important uses ASYNC, I've not bothered. 1067c478bd9Sstevel@tonic-gate */ 107f34a7178SJoe Bonasera #define XC_MSG_FREE (0) /* msg in xc_free queue */ 108f34a7178SJoe Bonasera #define XC_MSG_ASYNC (1) /* msg in slave xc_msgbox */ 109f34a7178SJoe Bonasera #define XC_MSG_CALL (2) /* msg in slave xc_msgbox */ 110f34a7178SJoe Bonasera #define XC_MSG_SYNC (3) /* msg in slave xc_msgbox */ 111f34a7178SJoe Bonasera #define XC_MSG_WAITING (4) /* msg in master xc_msgbox or xc_waiters */ 112f34a7178SJoe Bonasera #define XC_MSG_RELEASED (5) /* msg in slave xc_msgbox */ 113f34a7178SJoe Bonasera #define XC_MSG_DONE (6) /* msg in master xc_msgbox */ 1147c478bd9Sstevel@tonic-gate 115f34a7178SJoe Bonasera /* 116f34a7178SJoe Bonasera * We allow for one high priority message at a time to happen in the system. 117f34a7178SJoe Bonasera * This is used for panic, kmdb, etc., so no locking is done. 118f34a7178SJoe Bonasera */ 119c03aa626SJoe Bonasera static volatile cpuset_t xc_priority_set_store; 120c03aa626SJoe Bonasera static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store); 121f34a7178SJoe Bonasera static xc_data_t xc_priority_data; 1227c478bd9Sstevel@tonic-gate 123c03aa626SJoe Bonasera /* 124c03aa626SJoe Bonasera * Wrappers to avoid C compiler warnings due to volatile. The atomic bit 125c03aa626SJoe Bonasera * operations don't accept volatile bit vectors - which is a bit silly. 126c03aa626SJoe Bonasera */ 127c03aa626SJoe Bonasera #define XC_BT_SET(vector, b) BT_ATOMIC_SET((ulong_t *)(vector), (b)) 128c03aa626SJoe Bonasera #define XC_BT_CLEAR(vector, b) BT_ATOMIC_CLEAR((ulong_t *)(vector), (b)) 129c03aa626SJoe Bonasera 1307c478bd9Sstevel@tonic-gate /* 131f34a7178SJoe Bonasera * Decrement a CPU's work count 1327c478bd9Sstevel@tonic-gate */ 1337c478bd9Sstevel@tonic-gate static void 134f34a7178SJoe Bonasera xc_decrement(struct machcpu *mcpu) 1357c478bd9Sstevel@tonic-gate { 136f34a7178SJoe Bonasera atomic_dec_32(&mcpu->xc_work_cnt); 1377c478bd9Sstevel@tonic-gate } 1387c478bd9Sstevel@tonic-gate 1397c478bd9Sstevel@tonic-gate /* 140f34a7178SJoe Bonasera * Increment a CPU's work count and return the old value 1417c478bd9Sstevel@tonic-gate */ 142f34a7178SJoe Bonasera static int 143f34a7178SJoe Bonasera xc_increment(struct machcpu *mcpu) 1447c478bd9Sstevel@tonic-gate { 145f34a7178SJoe Bonasera int old; 146f34a7178SJoe Bonasera do { 147f34a7178SJoe Bonasera old = mcpu->xc_work_cnt; 14875d94465SJosef 'Jeff' Sipek } while (atomic_cas_32(&mcpu->xc_work_cnt, old, old + 1) != old); 149f34a7178SJoe Bonasera return (old); 1507c478bd9Sstevel@tonic-gate } 1517c478bd9Sstevel@tonic-gate 1527c478bd9Sstevel@tonic-gate /* 153f34a7178SJoe Bonasera * Put a message into a queue. The insertion is atomic no matter 154f34a7178SJoe Bonasera * how many different inserts/extracts to the same queue happen. 1557c478bd9Sstevel@tonic-gate */ 156f34a7178SJoe Bonasera static void 157f34a7178SJoe Bonasera xc_insert(void *queue, xc_msg_t *msg) 1587c478bd9Sstevel@tonic-gate { 159f34a7178SJoe Bonasera xc_msg_t *old_head; 160bf73eaa5SJoe Bonasera 161bf73eaa5SJoe Bonasera /* 162bf73eaa5SJoe Bonasera * FREE messages should only ever be getting inserted into 163bf73eaa5SJoe Bonasera * the xc_master CPUs xc_free queue. 164bf73eaa5SJoe Bonasera */ 165bf73eaa5SJoe Bonasera ASSERT(msg->xc_command != XC_MSG_FREE || 166bf73eaa5SJoe Bonasera cpu[msg->xc_master] == NULL || /* possible only during init */ 167bf73eaa5SJoe Bonasera queue == &cpu[msg->xc_master]->cpu_m.xc_free); 168bf73eaa5SJoe Bonasera 169f34a7178SJoe Bonasera do { 170f34a7178SJoe Bonasera old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue; 171f34a7178SJoe Bonasera msg->xc_next = old_head; 17275d94465SJosef 'Jeff' Sipek } while (atomic_cas_ptr(queue, old_head, msg) != old_head); 1737c478bd9Sstevel@tonic-gate } 1747c478bd9Sstevel@tonic-gate 1757c478bd9Sstevel@tonic-gate /* 176f34a7178SJoe Bonasera * Extract a message from a queue. The extraction is atomic only 177f34a7178SJoe Bonasera * when just one thread does extractions from the queue. 178f34a7178SJoe Bonasera * If the queue is empty, NULL is returned. 1797c478bd9Sstevel@tonic-gate */ 180f34a7178SJoe Bonasera static xc_msg_t * 181f34a7178SJoe Bonasera xc_extract(xc_msg_t **queue) 182f34a7178SJoe Bonasera { 183f34a7178SJoe Bonasera xc_msg_t *old_head; 184f34a7178SJoe Bonasera 185f34a7178SJoe Bonasera do { 186f34a7178SJoe Bonasera old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue; 187f34a7178SJoe Bonasera if (old_head == NULL) 188f34a7178SJoe Bonasera return (old_head); 18975d94465SJosef 'Jeff' Sipek } while (atomic_cas_ptr(queue, old_head, old_head->xc_next) != 19075d94465SJosef 'Jeff' Sipek old_head); 191f34a7178SJoe Bonasera old_head->xc_next = NULL; 192f34a7178SJoe Bonasera return (old_head); 193f34a7178SJoe Bonasera } 194f34a7178SJoe Bonasera 1957c478bd9Sstevel@tonic-gate /* 196f34a7178SJoe Bonasera * Initialize the machcpu fields used for cross calls 1977c478bd9Sstevel@tonic-gate */ 198f34a7178SJoe Bonasera static uint_t xc_initialized = 0; 199a3114836SGerry Liu 2007c478bd9Sstevel@tonic-gate void 201f34a7178SJoe Bonasera xc_init_cpu(struct cpu *cpup) 2027c478bd9Sstevel@tonic-gate { 203f34a7178SJoe Bonasera xc_msg_t *msg; 204f34a7178SJoe Bonasera int c; 2057c478bd9Sstevel@tonic-gate 2067c478bd9Sstevel@tonic-gate /* 207a3114836SGerry Liu * Allocate message buffers for the new CPU. 2087c478bd9Sstevel@tonic-gate */ 209a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) { 210a3114836SGerry Liu if (plat_dr_support_cpu()) { 211a3114836SGerry Liu /* 212a3114836SGerry Liu * Allocate a message buffer for every CPU possible 213a3114836SGerry Liu * in system, including our own, and add them to our xc 214a3114836SGerry Liu * message queue. 215a3114836SGerry Liu */ 216a3114836SGerry Liu msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 217a3114836SGerry Liu msg->xc_command = XC_MSG_FREE; 218a3114836SGerry Liu msg->xc_master = cpup->cpu_id; 219a3114836SGerry Liu xc_insert(&cpup->cpu_m.xc_free, msg); 220a3114836SGerry Liu } else if (cpu[c] != NULL && cpu[c] != cpup) { 221a3114836SGerry Liu /* 222a3114836SGerry Liu * Add a new message buffer to each existing CPU's free 223a3114836SGerry Liu * list, as well as one for my list for each of them. 224a3114836SGerry Liu * Note: cpu0 is statically inserted into cpu[] array, 225a3114836SGerry Liu * so need to check cpu[c] isn't cpup itself to avoid 226a3114836SGerry Liu * allocating extra message buffers for cpu0. 227a3114836SGerry Liu */ 228a3114836SGerry Liu msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 229a3114836SGerry Liu msg->xc_command = XC_MSG_FREE; 230a3114836SGerry Liu msg->xc_master = c; 231a3114836SGerry Liu xc_insert(&cpu[c]->cpu_m.xc_free, msg); 232a3114836SGerry Liu 233a3114836SGerry Liu msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 234a3114836SGerry Liu msg->xc_command = XC_MSG_FREE; 235a3114836SGerry Liu msg->xc_master = cpup->cpu_id; 236a3114836SGerry Liu xc_insert(&cpup->cpu_m.xc_free, msg); 237a3114836SGerry Liu } 238a3114836SGerry Liu } 2397c478bd9Sstevel@tonic-gate 240a3114836SGerry Liu if (!plat_dr_support_cpu()) { 241a3114836SGerry Liu /* 242a3114836SGerry Liu * Add one for self messages if CPU hotplug is disabled. 243a3114836SGerry Liu */ 244f34a7178SJoe Bonasera msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 245f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE; 246bf73eaa5SJoe Bonasera msg->xc_master = cpup->cpu_id; 247f34a7178SJoe Bonasera xc_insert(&cpup->cpu_m.xc_free, msg); 248f34a7178SJoe Bonasera } 2497c478bd9Sstevel@tonic-gate 250a3114836SGerry Liu if (!xc_initialized) 251a3114836SGerry Liu xc_initialized = 1; 252a3114836SGerry Liu } 253a3114836SGerry Liu 254a3114836SGerry Liu void 255a3114836SGerry Liu xc_fini_cpu(struct cpu *cpup) 256a3114836SGerry Liu { 257a3114836SGerry Liu xc_msg_t *msg; 258a3114836SGerry Liu 259a3114836SGerry Liu ASSERT((cpup->cpu_flags & CPU_READY) == 0); 260a3114836SGerry Liu ASSERT(cpup->cpu_m.xc_msgbox == NULL); 261a3114836SGerry Liu ASSERT(cpup->cpu_m.xc_work_cnt == 0); 262a3114836SGerry Liu 263a3114836SGerry Liu while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) { 264a3114836SGerry Liu kmem_free(msg, sizeof (*msg)); 265a3114836SGerry Liu } 266a3114836SGerry Liu } 267a3114836SGerry Liu 268a3114836SGerry Liu #define XC_FLUSH_MAX_WAITS 1000 269a3114836SGerry Liu 270a3114836SGerry Liu /* Flush inflight message buffers. */ 271a3114836SGerry Liu int 272a3114836SGerry Liu xc_flush_cpu(struct cpu *cpup) 273a3114836SGerry Liu { 274a3114836SGerry Liu int i; 275a3114836SGerry Liu 276a3114836SGerry Liu ASSERT((cpup->cpu_flags & CPU_READY) == 0); 277a3114836SGerry Liu 2787c478bd9Sstevel@tonic-gate /* 279a3114836SGerry Liu * Pause all working CPUs, which ensures that there's no CPU in 280a3114836SGerry Liu * function xc_common(). 281a3114836SGerry Liu * This is used to work around a race condition window in xc_common() 282a3114836SGerry Liu * between checking CPU_READY flag and increasing working item count. 283a563a037Sbholler */ 284*0ed5c46eSJosef 'Jeff' Sipek pause_cpus(cpup, NULL); 285a3114836SGerry Liu start_cpus(); 286a563a037Sbholler 287a3114836SGerry Liu for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) { 288a3114836SGerry Liu if (cpup->cpu_m.xc_work_cnt == 0) { 289a3114836SGerry Liu break; 290a3114836SGerry Liu } 291a3114836SGerry Liu DELAY(1); 292a3114836SGerry Liu } 293a3114836SGerry Liu for (; i < XC_FLUSH_MAX_WAITS; i++) { 294a3114836SGerry Liu if (!BT_TEST(xc_priority_set, cpup->cpu_id)) { 295a3114836SGerry Liu break; 296a3114836SGerry Liu } 297a3114836SGerry Liu DELAY(1); 298a3114836SGerry Liu } 299a3114836SGerry Liu 300a3114836SGerry Liu return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0); 301f34a7178SJoe Bonasera } 3027c478bd9Sstevel@tonic-gate 303f34a7178SJoe Bonasera /* 304f34a7178SJoe Bonasera * X-call message processing routine. Note that this is used by both 305f34a7178SJoe Bonasera * senders and recipients of messages. 306f34a7178SJoe Bonasera * 307f34a7178SJoe Bonasera * We're protected against changing CPUs by either being in a high-priority 308f34a7178SJoe Bonasera * interrupt, having preemption disabled or by having a raised SPL. 309f34a7178SJoe Bonasera */ 310f34a7178SJoe Bonasera /*ARGSUSED*/ 311f34a7178SJoe Bonasera uint_t 312f34a7178SJoe Bonasera xc_serv(caddr_t arg1, caddr_t arg2) 313f34a7178SJoe Bonasera { 314f34a7178SJoe Bonasera struct machcpu *mcpup = &(CPU->cpu_m); 315f34a7178SJoe Bonasera xc_msg_t *msg; 316f34a7178SJoe Bonasera xc_data_t *data; 317f34a7178SJoe Bonasera xc_msg_t *xc_waiters = NULL; 318f34a7178SJoe Bonasera uint32_t num_waiting = 0; 319f34a7178SJoe Bonasera xc_func_t func; 320f34a7178SJoe Bonasera xc_arg_t a1; 321f34a7178SJoe Bonasera xc_arg_t a2; 322f34a7178SJoe Bonasera xc_arg_t a3; 323f34a7178SJoe Bonasera uint_t rc = DDI_INTR_UNCLAIMED; 324f34a7178SJoe Bonasera 325f34a7178SJoe Bonasera while (mcpup->xc_work_cnt != 0) { 326f34a7178SJoe Bonasera rc = DDI_INTR_CLAIMED; 3277c478bd9Sstevel@tonic-gate 328f34a7178SJoe Bonasera /* 329f34a7178SJoe Bonasera * We may have to wait for a message to arrive. 330f34a7178SJoe Bonasera */ 331bf73eaa5SJoe Bonasera for (msg = NULL; msg == NULL; 332bf73eaa5SJoe Bonasera msg = xc_extract(&mcpup->xc_msgbox)) { 333bf73eaa5SJoe Bonasera 3347c478bd9Sstevel@tonic-gate /* 335c03aa626SJoe Bonasera * Alway check for and handle a priority message. 3367c478bd9Sstevel@tonic-gate */ 337c03aa626SJoe Bonasera if (BT_TEST(xc_priority_set, CPU->cpu_id)) { 338f34a7178SJoe Bonasera func = xc_priority_data.xc_func; 339f34a7178SJoe Bonasera a1 = xc_priority_data.xc_a1; 340f34a7178SJoe Bonasera a2 = xc_priority_data.xc_a2; 341f34a7178SJoe Bonasera a3 = xc_priority_data.xc_a3; 342c03aa626SJoe Bonasera XC_BT_CLEAR(xc_priority_set, CPU->cpu_id); 343f34a7178SJoe Bonasera xc_decrement(mcpup); 344f34a7178SJoe Bonasera func(a1, a2, a3); 345f34a7178SJoe Bonasera if (mcpup->xc_work_cnt == 0) 346f34a7178SJoe Bonasera return (rc); 347f34a7178SJoe Bonasera } 3487c478bd9Sstevel@tonic-gate 349f34a7178SJoe Bonasera /* 350f34a7178SJoe Bonasera * wait for a message to arrive 351f34a7178SJoe Bonasera */ 352bf73eaa5SJoe Bonasera SMT_PAUSE(); 3537c478bd9Sstevel@tonic-gate } 354f34a7178SJoe Bonasera 355f34a7178SJoe Bonasera 356f34a7178SJoe Bonasera /* 357f34a7178SJoe Bonasera * process the message 358f34a7178SJoe Bonasera */ 359f34a7178SJoe Bonasera switch (msg->xc_command) { 360f34a7178SJoe Bonasera 361f34a7178SJoe Bonasera /* 362f34a7178SJoe Bonasera * ASYNC gives back the message immediately, then we do the 363f34a7178SJoe Bonasera * function and return with no more waiting. 364f34a7178SJoe Bonasera */ 365f34a7178SJoe Bonasera case XC_MSG_ASYNC: 366f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data; 367f34a7178SJoe Bonasera func = data->xc_func; 368f34a7178SJoe Bonasera a1 = data->xc_a1; 369f34a7178SJoe Bonasera a2 = data->xc_a2; 370f34a7178SJoe Bonasera a3 = data->xc_a3; 371f34a7178SJoe Bonasera msg->xc_command = XC_MSG_DONE; 372f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg); 373f34a7178SJoe Bonasera if (func != NULL) 374f34a7178SJoe Bonasera (void) (*func)(a1, a2, a3); 375f34a7178SJoe Bonasera xc_decrement(mcpup); 3767c478bd9Sstevel@tonic-gate break; 3777c478bd9Sstevel@tonic-gate 378f34a7178SJoe Bonasera /* 379f34a7178SJoe Bonasera * SYNC messages do the call, then send it back to the master 380f34a7178SJoe Bonasera * in WAITING mode 381f34a7178SJoe Bonasera */ 382f34a7178SJoe Bonasera case XC_MSG_SYNC: 383f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data; 384f34a7178SJoe Bonasera if (data->xc_func != NULL) 385f34a7178SJoe Bonasera (void) (*data->xc_func)(data->xc_a1, 386f34a7178SJoe Bonasera data->xc_a2, data->xc_a3); 387f34a7178SJoe Bonasera msg->xc_command = XC_MSG_WAITING; 388f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg); 389f34a7178SJoe Bonasera break; 3907c478bd9Sstevel@tonic-gate 391f34a7178SJoe Bonasera /* 392f34a7178SJoe Bonasera * WAITING messsages are collected by the master until all 393f34a7178SJoe Bonasera * have arrived. Once all arrive, we release them back to 394f34a7178SJoe Bonasera * the slaves 395f34a7178SJoe Bonasera */ 396f34a7178SJoe Bonasera case XC_MSG_WAITING: 397f34a7178SJoe Bonasera xc_insert(&xc_waiters, msg); 398f34a7178SJoe Bonasera if (++num_waiting < mcpup->xc_wait_cnt) 399f34a7178SJoe Bonasera break; 400f34a7178SJoe Bonasera while ((msg = xc_extract(&xc_waiters)) != NULL) { 401f34a7178SJoe Bonasera msg->xc_command = XC_MSG_RELEASED; 402f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox, 403f34a7178SJoe Bonasera msg); 404f34a7178SJoe Bonasera --num_waiting; 405f34a7178SJoe Bonasera } 406f34a7178SJoe Bonasera if (num_waiting != 0) 407f34a7178SJoe Bonasera panic("wrong number waiting"); 408f34a7178SJoe Bonasera mcpup->xc_wait_cnt = 0; 409f34a7178SJoe Bonasera break; 4107c478bd9Sstevel@tonic-gate 411f34a7178SJoe Bonasera /* 412f34a7178SJoe Bonasera * CALL messages do the function and then, like RELEASE, 413f34a7178SJoe Bonasera * send the message is back to master as DONE. 414f34a7178SJoe Bonasera */ 415f34a7178SJoe Bonasera case XC_MSG_CALL: 416f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data; 417f34a7178SJoe Bonasera if (data->xc_func != NULL) 418f34a7178SJoe Bonasera (void) (*data->xc_func)(data->xc_a1, 419f34a7178SJoe Bonasera data->xc_a2, data->xc_a3); 420f34a7178SJoe Bonasera /*FALLTHROUGH*/ 421f34a7178SJoe Bonasera case XC_MSG_RELEASED: 422f34a7178SJoe Bonasera msg->xc_command = XC_MSG_DONE; 423f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg); 424f34a7178SJoe Bonasera xc_decrement(mcpup); 425f34a7178SJoe Bonasera break; 4267c478bd9Sstevel@tonic-gate 427f34a7178SJoe Bonasera /* 428f34a7178SJoe Bonasera * DONE means a slave has completely finished up. 429f34a7178SJoe Bonasera * Once we collect all the DONE messages, we'll exit 430f34a7178SJoe Bonasera * processing too. 431f34a7178SJoe Bonasera */ 432f34a7178SJoe Bonasera case XC_MSG_DONE: 433f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE; 434f34a7178SJoe Bonasera xc_insert(&mcpup->xc_free, msg); 435f34a7178SJoe Bonasera xc_decrement(mcpup); 4367c478bd9Sstevel@tonic-gate break; 4377c478bd9Sstevel@tonic-gate 438f34a7178SJoe Bonasera case XC_MSG_FREE: 439bf73eaa5SJoe Bonasera panic("free message 0x%p in msgbox", (void *)msg); 440f34a7178SJoe Bonasera break; 441f34a7178SJoe Bonasera 442f34a7178SJoe Bonasera default: 443bf73eaa5SJoe Bonasera panic("bad message 0x%p in msgbox", (void *)msg); 444f34a7178SJoe Bonasera break; 445f34a7178SJoe Bonasera } 446f34a7178SJoe Bonasera } 447f34a7178SJoe Bonasera return (rc); 4487c478bd9Sstevel@tonic-gate } 4497c478bd9Sstevel@tonic-gate 4507c478bd9Sstevel@tonic-gate /* 451f34a7178SJoe Bonasera * Initiate cross call processing. 4527c478bd9Sstevel@tonic-gate */ 4537c478bd9Sstevel@tonic-gate static void 4547c478bd9Sstevel@tonic-gate xc_common( 4557c478bd9Sstevel@tonic-gate xc_func_t func, 4567c478bd9Sstevel@tonic-gate xc_arg_t arg1, 4577c478bd9Sstevel@tonic-gate xc_arg_t arg2, 4587c478bd9Sstevel@tonic-gate xc_arg_t arg3, 459f34a7178SJoe Bonasera ulong_t *set, 460f34a7178SJoe Bonasera uint_t command) 4617c478bd9Sstevel@tonic-gate { 462f34a7178SJoe Bonasera int c; 4637c478bd9Sstevel@tonic-gate struct cpu *cpup; 464f34a7178SJoe Bonasera xc_msg_t *msg; 465f34a7178SJoe Bonasera xc_data_t *data; 466f34a7178SJoe Bonasera int cnt; 467f34a7178SJoe Bonasera int save_spl; 468f34a7178SJoe Bonasera 469f34a7178SJoe Bonasera if (!xc_initialized) { 470f34a7178SJoe Bonasera if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) && 471f34a7178SJoe Bonasera func != NULL) 472f34a7178SJoe Bonasera (void) (*func)(arg1, arg2, arg3); 473f34a7178SJoe Bonasera return; 474f34a7178SJoe Bonasera } 4757c478bd9Sstevel@tonic-gate 476f34a7178SJoe Bonasera save_spl = splr(ipltospl(XC_HI_PIL)); 4777c478bd9Sstevel@tonic-gate 4787c478bd9Sstevel@tonic-gate /* 479f34a7178SJoe Bonasera * fill in cross call data 4807c478bd9Sstevel@tonic-gate */ 481f34a7178SJoe Bonasera data = &CPU->cpu_m.xc_data; 482f34a7178SJoe Bonasera data->xc_func = func; 483f34a7178SJoe Bonasera data->xc_a1 = arg1; 484f34a7178SJoe Bonasera data->xc_a2 = arg2; 485f34a7178SJoe Bonasera data->xc_a3 = arg3; 486a563a037Sbholler 4877c478bd9Sstevel@tonic-gate /* 488f34a7178SJoe Bonasera * Post messages to all CPUs involved that are CPU_READY 4897c478bd9Sstevel@tonic-gate */ 490f34a7178SJoe Bonasera CPU->cpu_m.xc_wait_cnt = 0; 491a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) { 492f34a7178SJoe Bonasera if (!BT_TEST(set, c)) 493f34a7178SJoe Bonasera continue; 494f34a7178SJoe Bonasera cpup = cpu[c]; 495f34a7178SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY)) 496a563a037Sbholler continue; 497a563a037Sbholler 498f34a7178SJoe Bonasera /* 499f34a7178SJoe Bonasera * Fill out a new message. 500f34a7178SJoe Bonasera */ 501f34a7178SJoe Bonasera msg = xc_extract(&CPU->cpu_m.xc_free); 502f34a7178SJoe Bonasera if (msg == NULL) 503f34a7178SJoe Bonasera panic("Ran out of free xc_msg_t's"); 504f34a7178SJoe Bonasera msg->xc_command = command; 505bf73eaa5SJoe Bonasera if (msg->xc_master != CPU->cpu_id) 506bf73eaa5SJoe Bonasera panic("msg %p has wrong xc_master", (void *)msg); 507f34a7178SJoe Bonasera msg->xc_slave = c; 508a563a037Sbholler 509f34a7178SJoe Bonasera /* 510f34a7178SJoe Bonasera * Increment my work count for all messages that I'll 511f34a7178SJoe Bonasera * transition from DONE to FREE. 512f34a7178SJoe Bonasera * Also remember how many XC_MSG_WAITINGs to look for 513f34a7178SJoe Bonasera */ 514f34a7178SJoe Bonasera (void) xc_increment(&CPU->cpu_m); 515f34a7178SJoe Bonasera if (command == XC_MSG_SYNC) 516f34a7178SJoe Bonasera ++CPU->cpu_m.xc_wait_cnt; 517f34a7178SJoe Bonasera 518f34a7178SJoe Bonasera /* 519f34a7178SJoe Bonasera * Increment the target CPU work count then insert the message 520f34a7178SJoe Bonasera * in the target msgbox. If I post the first bit of work 521f34a7178SJoe Bonasera * for the target to do, send an IPI to the target CPU. 522f34a7178SJoe Bonasera */ 523f34a7178SJoe Bonasera cnt = xc_increment(&cpup->cpu_m); 524f34a7178SJoe Bonasera xc_insert(&cpup->cpu_m.xc_msgbox, msg); 525f34a7178SJoe Bonasera if (cpup != CPU) { 526f34a7178SJoe Bonasera if (cnt == 0) { 527f34a7178SJoe Bonasera CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 528f34a7178SJoe Bonasera send_dirint(c, XC_HI_PIL); 529f34a7178SJoe Bonasera if (xc_collect_enable) 530f34a7178SJoe Bonasera ++xc_total_cnt; 531f34a7178SJoe Bonasera } else if (xc_collect_enable) { 532f34a7178SJoe Bonasera ++xc_multi_cnt; 533f34a7178SJoe Bonasera } 534f34a7178SJoe Bonasera } 5357c478bd9Sstevel@tonic-gate } 5367c478bd9Sstevel@tonic-gate 5377c478bd9Sstevel@tonic-gate /* 538f34a7178SJoe Bonasera * Now drop into the message handler until all work is done 5397c478bd9Sstevel@tonic-gate */ 540f34a7178SJoe Bonasera (void) xc_serv(NULL, NULL); 541f34a7178SJoe Bonasera splx(save_spl); 542f34a7178SJoe Bonasera } 5437c478bd9Sstevel@tonic-gate 544f34a7178SJoe Bonasera /* 545f34a7178SJoe Bonasera * Push out a priority cross call. 546f34a7178SJoe Bonasera */ 547f34a7178SJoe Bonasera static void 548f34a7178SJoe Bonasera xc_priority_common( 549f34a7178SJoe Bonasera xc_func_t func, 550f34a7178SJoe Bonasera xc_arg_t arg1, 551f34a7178SJoe Bonasera xc_arg_t arg2, 552f34a7178SJoe Bonasera xc_arg_t arg3, 553f34a7178SJoe Bonasera ulong_t *set) 554f34a7178SJoe Bonasera { 555f34a7178SJoe Bonasera int i; 556f34a7178SJoe Bonasera int c; 557f34a7178SJoe Bonasera struct cpu *cpup; 5587c478bd9Sstevel@tonic-gate 5597c478bd9Sstevel@tonic-gate /* 560c03aa626SJoe Bonasera * Wait briefly for any previous xc_priority to have finished. 5617c478bd9Sstevel@tonic-gate */ 562a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) { 563c03aa626SJoe Bonasera cpup = cpu[c]; 564c03aa626SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY)) 565c03aa626SJoe Bonasera continue; 566c03aa626SJoe Bonasera 567c03aa626SJoe Bonasera /* 568c03aa626SJoe Bonasera * The value of 40000 here is from old kernel code. It 569c03aa626SJoe Bonasera * really should be changed to some time based value, since 570c03aa626SJoe Bonasera * under a hypervisor, there's no guarantee a remote CPU 571c03aa626SJoe Bonasera * is even scheduled. 572c03aa626SJoe Bonasera */ 573c03aa626SJoe Bonasera for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i) 574c03aa626SJoe Bonasera SMT_PAUSE(); 575c03aa626SJoe Bonasera 576c03aa626SJoe Bonasera /* 577c03aa626SJoe Bonasera * Some CPU did not respond to a previous priority request. It's 578c03aa626SJoe Bonasera * probably deadlocked with interrupts blocked or some such 579c03aa626SJoe Bonasera * problem. We'll just erase the previous request - which was 580c03aa626SJoe Bonasera * most likely a kmdb_enter that has already expired - and plow 581c03aa626SJoe Bonasera * ahead. 582c03aa626SJoe Bonasera */ 583c03aa626SJoe Bonasera if (BT_TEST(xc_priority_set, c)) { 584c03aa626SJoe Bonasera XC_BT_CLEAR(xc_priority_set, c); 585c03aa626SJoe Bonasera if (cpup->cpu_m.xc_work_cnt > 0) 586c03aa626SJoe Bonasera xc_decrement(&cpup->cpu_m); 587c03aa626SJoe Bonasera } 5887c478bd9Sstevel@tonic-gate } 5897c478bd9Sstevel@tonic-gate 5907c478bd9Sstevel@tonic-gate /* 591f34a7178SJoe Bonasera * fill in cross call data 5927c478bd9Sstevel@tonic-gate */ 593f34a7178SJoe Bonasera xc_priority_data.xc_func = func; 594f34a7178SJoe Bonasera xc_priority_data.xc_a1 = arg1; 595f34a7178SJoe Bonasera xc_priority_data.xc_a2 = arg2; 596f34a7178SJoe Bonasera xc_priority_data.xc_a3 = arg3; 5977c478bd9Sstevel@tonic-gate 5987c478bd9Sstevel@tonic-gate /* 599f34a7178SJoe Bonasera * Post messages to all CPUs involved that are CPU_READY 600f34a7178SJoe Bonasera * We'll always IPI, plus bang on the xc_msgbox for i86_mwait() 6017c478bd9Sstevel@tonic-gate */ 602a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) { 603f34a7178SJoe Bonasera if (!BT_TEST(set, c)) 604f34a7178SJoe Bonasera continue; 605f34a7178SJoe Bonasera cpup = cpu[c]; 606f34a7178SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) || 607f34a7178SJoe Bonasera cpup == CPU) 608f34a7178SJoe Bonasera continue; 609f34a7178SJoe Bonasera (void) xc_increment(&cpup->cpu_m); 610c03aa626SJoe Bonasera XC_BT_SET(xc_priority_set, c); 611f34a7178SJoe Bonasera send_dirint(c, XC_HI_PIL); 612f34a7178SJoe Bonasera for (i = 0; i < 10; ++i) { 61375d94465SJosef 'Jeff' Sipek (void) atomic_cas_ptr(&cpup->cpu_m.xc_msgbox, 614f34a7178SJoe Bonasera cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox); 6157c478bd9Sstevel@tonic-gate } 6167c478bd9Sstevel@tonic-gate } 6177c478bd9Sstevel@tonic-gate } 6187c478bd9Sstevel@tonic-gate 6197c478bd9Sstevel@tonic-gate /* 620f34a7178SJoe Bonasera * Do cross call to all other CPUs with absolutely no waiting or handshaking. 621f34a7178SJoe Bonasera * This should only be used for extraordinary operations, like panic(), which 622f34a7178SJoe Bonasera * need to work, in some fashion, in a not completely functional system. 623f34a7178SJoe Bonasera * All other uses that want minimal waiting should use xc_call_nowait(). 6247c478bd9Sstevel@tonic-gate */ 6257c478bd9Sstevel@tonic-gate void 626f34a7178SJoe Bonasera xc_priority( 6277c478bd9Sstevel@tonic-gate xc_arg_t arg1, 6287c478bd9Sstevel@tonic-gate xc_arg_t arg2, 6297c478bd9Sstevel@tonic-gate xc_arg_t arg3, 630f34a7178SJoe Bonasera ulong_t *set, 6317c478bd9Sstevel@tonic-gate xc_func_t func) 6327c478bd9Sstevel@tonic-gate { 633f34a7178SJoe Bonasera extern int IGNORE_KERNEL_PREEMPTION; 634f34a7178SJoe Bonasera int save_spl = splr(ipltospl(XC_HI_PIL)); 635f34a7178SJoe Bonasera int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 6367c478bd9Sstevel@tonic-gate 6377c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = 1; 638f34a7178SJoe Bonasera xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set); 6397c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 640f34a7178SJoe Bonasera splx(save_spl); 6417c478bd9Sstevel@tonic-gate } 6427c478bd9Sstevel@tonic-gate 6437c478bd9Sstevel@tonic-gate /* 644f34a7178SJoe Bonasera * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger. 6457c478bd9Sstevel@tonic-gate */ 6467c478bd9Sstevel@tonic-gate void 6477c478bd9Sstevel@tonic-gate kdi_xc_others(int this_cpu, void (*func)(void)) 6487c478bd9Sstevel@tonic-gate { 649f34a7178SJoe Bonasera extern int IGNORE_KERNEL_PREEMPTION; 6507c478bd9Sstevel@tonic-gate int save_kernel_preemption; 6517c478bd9Sstevel@tonic-gate cpuset_t set; 6527c478bd9Sstevel@tonic-gate 653ae115bc7Smrj if (!xc_initialized) 654ae115bc7Smrj return; 655ae115bc7Smrj 6567c478bd9Sstevel@tonic-gate save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 6577c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = 1; 658f34a7178SJoe Bonasera CPUSET_ALL_BUT(set, this_cpu); 659f34a7178SJoe Bonasera xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set)); 6607c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 6617c478bd9Sstevel@tonic-gate } 662f34a7178SJoe Bonasera 663f34a7178SJoe Bonasera 664f34a7178SJoe Bonasera 665f34a7178SJoe Bonasera /* 666f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes may continue after 667f34a7178SJoe Bonasera * service with no waiting. xc_call_nowait() may return immediately too. 668f34a7178SJoe Bonasera */ 669f34a7178SJoe Bonasera void 670f34a7178SJoe Bonasera xc_call_nowait( 671f34a7178SJoe Bonasera xc_arg_t arg1, 672f34a7178SJoe Bonasera xc_arg_t arg2, 673f34a7178SJoe Bonasera xc_arg_t arg3, 674f34a7178SJoe Bonasera ulong_t *set, 675f34a7178SJoe Bonasera xc_func_t func) 676f34a7178SJoe Bonasera { 677f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC); 678f34a7178SJoe Bonasera } 679f34a7178SJoe Bonasera 680f34a7178SJoe Bonasera /* 681f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes may continue after 682f34a7178SJoe Bonasera * service with no waiting. xc_call() returns only after remotes have finished. 683f34a7178SJoe Bonasera */ 684f34a7178SJoe Bonasera void 685f34a7178SJoe Bonasera xc_call( 686f34a7178SJoe Bonasera xc_arg_t arg1, 687f34a7178SJoe Bonasera xc_arg_t arg2, 688f34a7178SJoe Bonasera xc_arg_t arg3, 689f34a7178SJoe Bonasera ulong_t *set, 690f34a7178SJoe Bonasera xc_func_t func) 691f34a7178SJoe Bonasera { 692f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL); 693f34a7178SJoe Bonasera } 694f34a7178SJoe Bonasera 695f34a7178SJoe Bonasera /* 696f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes wait until all have 697f34a7178SJoe Bonasera * finished. xc_sync() also waits until all remotes have finished. 698f34a7178SJoe Bonasera */ 699f34a7178SJoe Bonasera void 700f34a7178SJoe Bonasera xc_sync( 701f34a7178SJoe Bonasera xc_arg_t arg1, 702f34a7178SJoe Bonasera xc_arg_t arg2, 703f34a7178SJoe Bonasera xc_arg_t arg3, 704f34a7178SJoe Bonasera ulong_t *set, 705f34a7178SJoe Bonasera xc_func_t func) 706f34a7178SJoe Bonasera { 707f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC); 708f34a7178SJoe Bonasera } 709