17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 541791439Sandrei * Common Development and Distribution License (the "License"). 641791439Sandrei * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22f34a7178SJoe Bonasera * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 25*a3114836SGerry Liu /* 26*a3114836SGerry Liu * Copyright (c) 2010, Intel Corporation. 27*a3114836SGerry Liu * All rights reserved. 28*a3114836SGerry Liu */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate #include <sys/types.h> 317c478bd9Sstevel@tonic-gate #include <sys/param.h> 327c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 337c478bd9Sstevel@tonic-gate #include <sys/thread.h> 347c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 357c478bd9Sstevel@tonic-gate #include <sys/x_call.h> 36f34a7178SJoe Bonasera #include <sys/xc_levels.h> 377c478bd9Sstevel@tonic-gate #include <sys/cpu.h> 387c478bd9Sstevel@tonic-gate #include <sys/psw.h> 397c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 407c478bd9Sstevel@tonic-gate #include <sys/debug.h> 417c478bd9Sstevel@tonic-gate #include <sys/systm.h> 42ae115bc7Smrj #include <sys/archsystm.h> 437c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 447c478bd9Sstevel@tonic-gate #include <sys/mutex_impl.h> 45f34a7178SJoe Bonasera #include <sys/stack.h> 46f34a7178SJoe Bonasera #include <sys/promif.h> 47f34a7178SJoe Bonasera #include <sys/x86_archext.h> 48ae115bc7Smrj 497c478bd9Sstevel@tonic-gate /* 50f34a7178SJoe Bonasera * Implementation for cross-processor calls via interprocessor interrupts 51f34a7178SJoe Bonasera * 52f34a7178SJoe Bonasera * This implementation uses a message passing architecture to allow multiple 53f34a7178SJoe Bonasera * concurrent cross calls to be in flight at any given time. We use the cmpxchg 54f34a7178SJoe Bonasera * instruction, aka casptr(), to implement simple efficient work queues for 55f34a7178SJoe Bonasera * message passing between CPUs with almost no need for regular locking. 56f34a7178SJoe Bonasera * See xc_extract() and xc_insert() below. 57f34a7178SJoe Bonasera * 58f34a7178SJoe Bonasera * The general idea is that initiating a cross call means putting a message 59f34a7178SJoe Bonasera * on a target(s) CPU's work queue. Any synchronization is handled by passing 60f34a7178SJoe Bonasera * the message back and forth between initiator and target(s). 61f34a7178SJoe Bonasera * 62f34a7178SJoe Bonasera * Every CPU has xc_work_cnt, which indicates it has messages to process. 63f34a7178SJoe Bonasera * This value is incremented as message traffic is initiated and decremented 64f34a7178SJoe Bonasera * with every message that finishes all processing. 65f34a7178SJoe Bonasera * 66f34a7178SJoe Bonasera * The code needs no mfence or other membar_*() calls. The uses of 67f34a7178SJoe Bonasera * casptr(), cas32() and atomic_dec_32() for the message passing are 68f34a7178SJoe Bonasera * implemented with LOCK prefix instructions which are equivalent to mfence. 69f34a7178SJoe Bonasera * 70f34a7178SJoe Bonasera * One interesting aspect of this implmentation is that it allows 2 or more 71f34a7178SJoe Bonasera * CPUs to initiate cross calls to intersecting sets of CPUs at the same time. 72f34a7178SJoe Bonasera * The cross call processing by the CPUs will happen in any order with only 73f34a7178SJoe Bonasera * a guarantee, for xc_call() and xc_sync(), that an initiator won't return 74f34a7178SJoe Bonasera * from cross calls before all slaves have invoked the function. 75f34a7178SJoe Bonasera * 76f34a7178SJoe Bonasera * The reason for this asynchronous approach is to allow for fast global 77f34a7178SJoe Bonasera * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation 78f34a7178SJoe Bonasera * on a different Virtual Address at the same time. The old code required 79f34a7178SJoe Bonasera * N squared IPIs. With this method, depending on timing, it could happen 80f34a7178SJoe Bonasera * with just N IPIs. 817c478bd9Sstevel@tonic-gate */ 82ae115bc7Smrj 83f34a7178SJoe Bonasera /* 84f34a7178SJoe Bonasera * The default is to not enable collecting counts of IPI information, since 85f34a7178SJoe Bonasera * the updating of shared cachelines could cause excess bus traffic. 86f34a7178SJoe Bonasera */ 87f34a7178SJoe Bonasera uint_t xc_collect_enable = 0; 88f34a7178SJoe Bonasera uint64_t xc_total_cnt = 0; /* total #IPIs sent for cross calls */ 89f34a7178SJoe Bonasera uint64_t xc_multi_cnt = 0; /* # times we piggy backed on another IPI */ 907c478bd9Sstevel@tonic-gate 917c478bd9Sstevel@tonic-gate /* 92f34a7178SJoe Bonasera * Values for message states. Here are the normal transitions. A transition 93f34a7178SJoe Bonasera * of "->" happens in the slave cpu and "=>" happens in the master cpu as 94f34a7178SJoe Bonasera * the messages are passed back and forth. 957c478bd9Sstevel@tonic-gate * 96f34a7178SJoe Bonasera * FREE => ASYNC -> DONE => FREE 97f34a7178SJoe Bonasera * FREE => CALL -> DONE => FREE 98f34a7178SJoe Bonasera * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE 997c478bd9Sstevel@tonic-gate * 100f34a7178SJoe Bonasera * The interesing one above is ASYNC. You might ask, why not go directly 101f34a7178SJoe Bonasera * to FREE, instead of DONE. If it did that, it might be possible to exhaust 102f34a7178SJoe Bonasera * the master's xc_free list if a master can generate ASYNC messages faster 103f34a7178SJoe Bonasera * then the slave can process them. That could be handled with more complicated 104f34a7178SJoe Bonasera * handling. However since nothing important uses ASYNC, I've not bothered. 1057c478bd9Sstevel@tonic-gate */ 106f34a7178SJoe Bonasera #define XC_MSG_FREE (0) /* msg in xc_free queue */ 107f34a7178SJoe Bonasera #define XC_MSG_ASYNC (1) /* msg in slave xc_msgbox */ 108f34a7178SJoe Bonasera #define XC_MSG_CALL (2) /* msg in slave xc_msgbox */ 109f34a7178SJoe Bonasera #define XC_MSG_SYNC (3) /* msg in slave xc_msgbox */ 110f34a7178SJoe Bonasera #define XC_MSG_WAITING (4) /* msg in master xc_msgbox or xc_waiters */ 111f34a7178SJoe Bonasera #define XC_MSG_RELEASED (5) /* msg in slave xc_msgbox */ 112f34a7178SJoe Bonasera #define XC_MSG_DONE (6) /* msg in master xc_msgbox */ 1137c478bd9Sstevel@tonic-gate 114f34a7178SJoe Bonasera /* 115f34a7178SJoe Bonasera * We allow for one high priority message at a time to happen in the system. 116f34a7178SJoe Bonasera * This is used for panic, kmdb, etc., so no locking is done. 117f34a7178SJoe Bonasera */ 118c03aa626SJoe Bonasera static volatile cpuset_t xc_priority_set_store; 119c03aa626SJoe Bonasera static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store); 120f34a7178SJoe Bonasera static xc_data_t xc_priority_data; 1217c478bd9Sstevel@tonic-gate 122c03aa626SJoe Bonasera /* 123c03aa626SJoe Bonasera * Wrappers to avoid C compiler warnings due to volatile. The atomic bit 124c03aa626SJoe Bonasera * operations don't accept volatile bit vectors - which is a bit silly. 125c03aa626SJoe Bonasera */ 126c03aa626SJoe Bonasera #define XC_BT_SET(vector, b) BT_ATOMIC_SET((ulong_t *)(vector), (b)) 127c03aa626SJoe Bonasera #define XC_BT_CLEAR(vector, b) BT_ATOMIC_CLEAR((ulong_t *)(vector), (b)) 128c03aa626SJoe Bonasera 1297c478bd9Sstevel@tonic-gate /* 130f34a7178SJoe Bonasera * Decrement a CPU's work count 1317c478bd9Sstevel@tonic-gate */ 1327c478bd9Sstevel@tonic-gate static void 133f34a7178SJoe Bonasera xc_decrement(struct machcpu *mcpu) 1347c478bd9Sstevel@tonic-gate { 135f34a7178SJoe Bonasera atomic_dec_32(&mcpu->xc_work_cnt); 1367c478bd9Sstevel@tonic-gate } 1377c478bd9Sstevel@tonic-gate 1387c478bd9Sstevel@tonic-gate /* 139f34a7178SJoe Bonasera * Increment a CPU's work count and return the old value 1407c478bd9Sstevel@tonic-gate */ 141f34a7178SJoe Bonasera static int 142f34a7178SJoe Bonasera xc_increment(struct machcpu *mcpu) 1437c478bd9Sstevel@tonic-gate { 144f34a7178SJoe Bonasera int old; 145f34a7178SJoe Bonasera do { 146f34a7178SJoe Bonasera old = mcpu->xc_work_cnt; 147f34a7178SJoe Bonasera } while (cas32((uint32_t *)&mcpu->xc_work_cnt, old, old + 1) != old); 148f34a7178SJoe Bonasera return (old); 1497c478bd9Sstevel@tonic-gate } 1507c478bd9Sstevel@tonic-gate 1517c478bd9Sstevel@tonic-gate /* 152f34a7178SJoe Bonasera * Put a message into a queue. The insertion is atomic no matter 153f34a7178SJoe Bonasera * how many different inserts/extracts to the same queue happen. 1547c478bd9Sstevel@tonic-gate */ 155f34a7178SJoe Bonasera static void 156f34a7178SJoe Bonasera xc_insert(void *queue, xc_msg_t *msg) 1577c478bd9Sstevel@tonic-gate { 158f34a7178SJoe Bonasera xc_msg_t *old_head; 159bf73eaa5SJoe Bonasera 160bf73eaa5SJoe Bonasera /* 161bf73eaa5SJoe Bonasera * FREE messages should only ever be getting inserted into 162bf73eaa5SJoe Bonasera * the xc_master CPUs xc_free queue. 163bf73eaa5SJoe Bonasera */ 164bf73eaa5SJoe Bonasera ASSERT(msg->xc_command != XC_MSG_FREE || 165bf73eaa5SJoe Bonasera cpu[msg->xc_master] == NULL || /* possible only during init */ 166bf73eaa5SJoe Bonasera queue == &cpu[msg->xc_master]->cpu_m.xc_free); 167bf73eaa5SJoe Bonasera 168f34a7178SJoe Bonasera do { 169f34a7178SJoe Bonasera old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue; 170f34a7178SJoe Bonasera msg->xc_next = old_head; 171f34a7178SJoe Bonasera } while (casptr(queue, old_head, msg) != old_head); 1727c478bd9Sstevel@tonic-gate } 1737c478bd9Sstevel@tonic-gate 1747c478bd9Sstevel@tonic-gate /* 175f34a7178SJoe Bonasera * Extract a message from a queue. The extraction is atomic only 176f34a7178SJoe Bonasera * when just one thread does extractions from the queue. 177f34a7178SJoe Bonasera * If the queue is empty, NULL is returned. 1787c478bd9Sstevel@tonic-gate */ 179f34a7178SJoe Bonasera static xc_msg_t * 180f34a7178SJoe Bonasera xc_extract(xc_msg_t **queue) 181f34a7178SJoe Bonasera { 182f34a7178SJoe Bonasera xc_msg_t *old_head; 183f34a7178SJoe Bonasera 184f34a7178SJoe Bonasera do { 185f34a7178SJoe Bonasera old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue; 186f34a7178SJoe Bonasera if (old_head == NULL) 187f34a7178SJoe Bonasera return (old_head); 188f34a7178SJoe Bonasera } while (casptr(queue, old_head, old_head->xc_next) != old_head); 189f34a7178SJoe Bonasera old_head->xc_next = NULL; 190f34a7178SJoe Bonasera return (old_head); 191f34a7178SJoe Bonasera } 192f34a7178SJoe Bonasera 1937c478bd9Sstevel@tonic-gate /* 194f34a7178SJoe Bonasera * Initialize the machcpu fields used for cross calls 1957c478bd9Sstevel@tonic-gate */ 196f34a7178SJoe Bonasera static uint_t xc_initialized = 0; 197*a3114836SGerry Liu 1987c478bd9Sstevel@tonic-gate void 199f34a7178SJoe Bonasera xc_init_cpu(struct cpu *cpup) 2007c478bd9Sstevel@tonic-gate { 201f34a7178SJoe Bonasera xc_msg_t *msg; 202f34a7178SJoe Bonasera int c; 2037c478bd9Sstevel@tonic-gate 2047c478bd9Sstevel@tonic-gate /* 205*a3114836SGerry Liu * Allocate message buffers for the new CPU. 2067c478bd9Sstevel@tonic-gate */ 207*a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) { 208*a3114836SGerry Liu if (plat_dr_support_cpu()) { 209*a3114836SGerry Liu /* 210*a3114836SGerry Liu * Allocate a message buffer for every CPU possible 211*a3114836SGerry Liu * in system, including our own, and add them to our xc 212*a3114836SGerry Liu * message queue. 213*a3114836SGerry Liu */ 214*a3114836SGerry Liu msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 215*a3114836SGerry Liu msg->xc_command = XC_MSG_FREE; 216*a3114836SGerry Liu msg->xc_master = cpup->cpu_id; 217*a3114836SGerry Liu xc_insert(&cpup->cpu_m.xc_free, msg); 218*a3114836SGerry Liu } else if (cpu[c] != NULL && cpu[c] != cpup) { 219*a3114836SGerry Liu /* 220*a3114836SGerry Liu * Add a new message buffer to each existing CPU's free 221*a3114836SGerry Liu * list, as well as one for my list for each of them. 222*a3114836SGerry Liu * Note: cpu0 is statically inserted into cpu[] array, 223*a3114836SGerry Liu * so need to check cpu[c] isn't cpup itself to avoid 224*a3114836SGerry Liu * allocating extra message buffers for cpu0. 225*a3114836SGerry Liu */ 226*a3114836SGerry Liu msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 227*a3114836SGerry Liu msg->xc_command = XC_MSG_FREE; 228*a3114836SGerry Liu msg->xc_master = c; 229*a3114836SGerry Liu xc_insert(&cpu[c]->cpu_m.xc_free, msg); 230*a3114836SGerry Liu 231*a3114836SGerry Liu msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 232*a3114836SGerry Liu msg->xc_command = XC_MSG_FREE; 233*a3114836SGerry Liu msg->xc_master = cpup->cpu_id; 234*a3114836SGerry Liu xc_insert(&cpup->cpu_m.xc_free, msg); 235*a3114836SGerry Liu } 236*a3114836SGerry Liu } 2377c478bd9Sstevel@tonic-gate 238*a3114836SGerry Liu if (!plat_dr_support_cpu()) { 239*a3114836SGerry Liu /* 240*a3114836SGerry Liu * Add one for self messages if CPU hotplug is disabled. 241*a3114836SGerry Liu */ 242f34a7178SJoe Bonasera msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 243f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE; 244bf73eaa5SJoe Bonasera msg->xc_master = cpup->cpu_id; 245f34a7178SJoe Bonasera xc_insert(&cpup->cpu_m.xc_free, msg); 246f34a7178SJoe Bonasera } 2477c478bd9Sstevel@tonic-gate 248*a3114836SGerry Liu if (!xc_initialized) 249*a3114836SGerry Liu xc_initialized = 1; 250*a3114836SGerry Liu } 251*a3114836SGerry Liu 252*a3114836SGerry Liu void 253*a3114836SGerry Liu xc_fini_cpu(struct cpu *cpup) 254*a3114836SGerry Liu { 255*a3114836SGerry Liu xc_msg_t *msg; 256*a3114836SGerry Liu 257*a3114836SGerry Liu ASSERT((cpup->cpu_flags & CPU_READY) == 0); 258*a3114836SGerry Liu ASSERT(cpup->cpu_m.xc_msgbox == NULL); 259*a3114836SGerry Liu ASSERT(cpup->cpu_m.xc_work_cnt == 0); 260*a3114836SGerry Liu 261*a3114836SGerry Liu while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) { 262*a3114836SGerry Liu kmem_free(msg, sizeof (*msg)); 263*a3114836SGerry Liu } 264*a3114836SGerry Liu } 265*a3114836SGerry Liu 266*a3114836SGerry Liu #define XC_FLUSH_MAX_WAITS 1000 267*a3114836SGerry Liu 268*a3114836SGerry Liu /* Flush inflight message buffers. */ 269*a3114836SGerry Liu int 270*a3114836SGerry Liu xc_flush_cpu(struct cpu *cpup) 271*a3114836SGerry Liu { 272*a3114836SGerry Liu int i; 273*a3114836SGerry Liu 274*a3114836SGerry Liu ASSERT((cpup->cpu_flags & CPU_READY) == 0); 275*a3114836SGerry Liu 2767c478bd9Sstevel@tonic-gate /* 277*a3114836SGerry Liu * Pause all working CPUs, which ensures that there's no CPU in 278*a3114836SGerry Liu * function xc_common(). 279*a3114836SGerry Liu * This is used to work around a race condition window in xc_common() 280*a3114836SGerry Liu * between checking CPU_READY flag and increasing working item count. 281a563a037Sbholler */ 282*a3114836SGerry Liu pause_cpus(cpup); 283*a3114836SGerry Liu start_cpus(); 284a563a037Sbholler 285*a3114836SGerry Liu for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) { 286*a3114836SGerry Liu if (cpup->cpu_m.xc_work_cnt == 0) { 287*a3114836SGerry Liu break; 288*a3114836SGerry Liu } 289*a3114836SGerry Liu DELAY(1); 290*a3114836SGerry Liu } 291*a3114836SGerry Liu for (; i < XC_FLUSH_MAX_WAITS; i++) { 292*a3114836SGerry Liu if (!BT_TEST(xc_priority_set, cpup->cpu_id)) { 293*a3114836SGerry Liu break; 294*a3114836SGerry Liu } 295*a3114836SGerry Liu DELAY(1); 296*a3114836SGerry Liu } 297*a3114836SGerry Liu 298*a3114836SGerry Liu return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0); 299f34a7178SJoe Bonasera } 3007c478bd9Sstevel@tonic-gate 301f34a7178SJoe Bonasera /* 302f34a7178SJoe Bonasera * X-call message processing routine. Note that this is used by both 303f34a7178SJoe Bonasera * senders and recipients of messages. 304f34a7178SJoe Bonasera * 305f34a7178SJoe Bonasera * We're protected against changing CPUs by either being in a high-priority 306f34a7178SJoe Bonasera * interrupt, having preemption disabled or by having a raised SPL. 307f34a7178SJoe Bonasera */ 308f34a7178SJoe Bonasera /*ARGSUSED*/ 309f34a7178SJoe Bonasera uint_t 310f34a7178SJoe Bonasera xc_serv(caddr_t arg1, caddr_t arg2) 311f34a7178SJoe Bonasera { 312f34a7178SJoe Bonasera struct machcpu *mcpup = &(CPU->cpu_m); 313f34a7178SJoe Bonasera xc_msg_t *msg; 314f34a7178SJoe Bonasera xc_data_t *data; 315f34a7178SJoe Bonasera xc_msg_t *xc_waiters = NULL; 316f34a7178SJoe Bonasera uint32_t num_waiting = 0; 317f34a7178SJoe Bonasera xc_func_t func; 318f34a7178SJoe Bonasera xc_arg_t a1; 319f34a7178SJoe Bonasera xc_arg_t a2; 320f34a7178SJoe Bonasera xc_arg_t a3; 321f34a7178SJoe Bonasera uint_t rc = DDI_INTR_UNCLAIMED; 322f34a7178SJoe Bonasera 323f34a7178SJoe Bonasera while (mcpup->xc_work_cnt != 0) { 324f34a7178SJoe Bonasera rc = DDI_INTR_CLAIMED; 3257c478bd9Sstevel@tonic-gate 326f34a7178SJoe Bonasera /* 327f34a7178SJoe Bonasera * We may have to wait for a message to arrive. 328f34a7178SJoe Bonasera */ 329bf73eaa5SJoe Bonasera for (msg = NULL; msg == NULL; 330bf73eaa5SJoe Bonasera msg = xc_extract(&mcpup->xc_msgbox)) { 331bf73eaa5SJoe Bonasera 3327c478bd9Sstevel@tonic-gate /* 333c03aa626SJoe Bonasera * Alway check for and handle a priority message. 3347c478bd9Sstevel@tonic-gate */ 335c03aa626SJoe Bonasera if (BT_TEST(xc_priority_set, CPU->cpu_id)) { 336f34a7178SJoe Bonasera func = xc_priority_data.xc_func; 337f34a7178SJoe Bonasera a1 = xc_priority_data.xc_a1; 338f34a7178SJoe Bonasera a2 = xc_priority_data.xc_a2; 339f34a7178SJoe Bonasera a3 = xc_priority_data.xc_a3; 340c03aa626SJoe Bonasera XC_BT_CLEAR(xc_priority_set, CPU->cpu_id); 341f34a7178SJoe Bonasera xc_decrement(mcpup); 342f34a7178SJoe Bonasera func(a1, a2, a3); 343f34a7178SJoe Bonasera if (mcpup->xc_work_cnt == 0) 344f34a7178SJoe Bonasera return (rc); 345f34a7178SJoe Bonasera } 3467c478bd9Sstevel@tonic-gate 347f34a7178SJoe Bonasera /* 348f34a7178SJoe Bonasera * wait for a message to arrive 349f34a7178SJoe Bonasera */ 350bf73eaa5SJoe Bonasera SMT_PAUSE(); 3517c478bd9Sstevel@tonic-gate } 352f34a7178SJoe Bonasera 353f34a7178SJoe Bonasera 354f34a7178SJoe Bonasera /* 355f34a7178SJoe Bonasera * process the message 356f34a7178SJoe Bonasera */ 357f34a7178SJoe Bonasera switch (msg->xc_command) { 358f34a7178SJoe Bonasera 359f34a7178SJoe Bonasera /* 360f34a7178SJoe Bonasera * ASYNC gives back the message immediately, then we do the 361f34a7178SJoe Bonasera * function and return with no more waiting. 362f34a7178SJoe Bonasera */ 363f34a7178SJoe Bonasera case XC_MSG_ASYNC: 364f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data; 365f34a7178SJoe Bonasera func = data->xc_func; 366f34a7178SJoe Bonasera a1 = data->xc_a1; 367f34a7178SJoe Bonasera a2 = data->xc_a2; 368f34a7178SJoe Bonasera a3 = data->xc_a3; 369f34a7178SJoe Bonasera msg->xc_command = XC_MSG_DONE; 370f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg); 371f34a7178SJoe Bonasera if (func != NULL) 372f34a7178SJoe Bonasera (void) (*func)(a1, a2, a3); 373f34a7178SJoe Bonasera xc_decrement(mcpup); 3747c478bd9Sstevel@tonic-gate break; 3757c478bd9Sstevel@tonic-gate 376f34a7178SJoe Bonasera /* 377f34a7178SJoe Bonasera * SYNC messages do the call, then send it back to the master 378f34a7178SJoe Bonasera * in WAITING mode 379f34a7178SJoe Bonasera */ 380f34a7178SJoe Bonasera case XC_MSG_SYNC: 381f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data; 382f34a7178SJoe Bonasera if (data->xc_func != NULL) 383f34a7178SJoe Bonasera (void) (*data->xc_func)(data->xc_a1, 384f34a7178SJoe Bonasera data->xc_a2, data->xc_a3); 385f34a7178SJoe Bonasera msg->xc_command = XC_MSG_WAITING; 386f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg); 387f34a7178SJoe Bonasera break; 3887c478bd9Sstevel@tonic-gate 389f34a7178SJoe Bonasera /* 390f34a7178SJoe Bonasera * WAITING messsages are collected by the master until all 391f34a7178SJoe Bonasera * have arrived. Once all arrive, we release them back to 392f34a7178SJoe Bonasera * the slaves 393f34a7178SJoe Bonasera */ 394f34a7178SJoe Bonasera case XC_MSG_WAITING: 395f34a7178SJoe Bonasera xc_insert(&xc_waiters, msg); 396f34a7178SJoe Bonasera if (++num_waiting < mcpup->xc_wait_cnt) 397f34a7178SJoe Bonasera break; 398f34a7178SJoe Bonasera while ((msg = xc_extract(&xc_waiters)) != NULL) { 399f34a7178SJoe Bonasera msg->xc_command = XC_MSG_RELEASED; 400f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox, 401f34a7178SJoe Bonasera msg); 402f34a7178SJoe Bonasera --num_waiting; 403f34a7178SJoe Bonasera } 404f34a7178SJoe Bonasera if (num_waiting != 0) 405f34a7178SJoe Bonasera panic("wrong number waiting"); 406f34a7178SJoe Bonasera mcpup->xc_wait_cnt = 0; 407f34a7178SJoe Bonasera break; 4087c478bd9Sstevel@tonic-gate 409f34a7178SJoe Bonasera /* 410f34a7178SJoe Bonasera * CALL messages do the function and then, like RELEASE, 411f34a7178SJoe Bonasera * send the message is back to master as DONE. 412f34a7178SJoe Bonasera */ 413f34a7178SJoe Bonasera case XC_MSG_CALL: 414f34a7178SJoe Bonasera data = &cpu[msg->xc_master]->cpu_m.xc_data; 415f34a7178SJoe Bonasera if (data->xc_func != NULL) 416f34a7178SJoe Bonasera (void) (*data->xc_func)(data->xc_a1, 417f34a7178SJoe Bonasera data->xc_a2, data->xc_a3); 418f34a7178SJoe Bonasera /*FALLTHROUGH*/ 419f34a7178SJoe Bonasera case XC_MSG_RELEASED: 420f34a7178SJoe Bonasera msg->xc_command = XC_MSG_DONE; 421f34a7178SJoe Bonasera xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg); 422f34a7178SJoe Bonasera xc_decrement(mcpup); 423f34a7178SJoe Bonasera break; 4247c478bd9Sstevel@tonic-gate 425f34a7178SJoe Bonasera /* 426f34a7178SJoe Bonasera * DONE means a slave has completely finished up. 427f34a7178SJoe Bonasera * Once we collect all the DONE messages, we'll exit 428f34a7178SJoe Bonasera * processing too. 429f34a7178SJoe Bonasera */ 430f34a7178SJoe Bonasera case XC_MSG_DONE: 431f34a7178SJoe Bonasera msg->xc_command = XC_MSG_FREE; 432f34a7178SJoe Bonasera xc_insert(&mcpup->xc_free, msg); 433f34a7178SJoe Bonasera xc_decrement(mcpup); 4347c478bd9Sstevel@tonic-gate break; 4357c478bd9Sstevel@tonic-gate 436f34a7178SJoe Bonasera case XC_MSG_FREE: 437bf73eaa5SJoe Bonasera panic("free message 0x%p in msgbox", (void *)msg); 438f34a7178SJoe Bonasera break; 439f34a7178SJoe Bonasera 440f34a7178SJoe Bonasera default: 441bf73eaa5SJoe Bonasera panic("bad message 0x%p in msgbox", (void *)msg); 442f34a7178SJoe Bonasera break; 443f34a7178SJoe Bonasera } 444f34a7178SJoe Bonasera } 445f34a7178SJoe Bonasera return (rc); 4467c478bd9Sstevel@tonic-gate } 4477c478bd9Sstevel@tonic-gate 4487c478bd9Sstevel@tonic-gate /* 449f34a7178SJoe Bonasera * Initiate cross call processing. 4507c478bd9Sstevel@tonic-gate */ 4517c478bd9Sstevel@tonic-gate static void 4527c478bd9Sstevel@tonic-gate xc_common( 4537c478bd9Sstevel@tonic-gate xc_func_t func, 4547c478bd9Sstevel@tonic-gate xc_arg_t arg1, 4557c478bd9Sstevel@tonic-gate xc_arg_t arg2, 4567c478bd9Sstevel@tonic-gate xc_arg_t arg3, 457f34a7178SJoe Bonasera ulong_t *set, 458f34a7178SJoe Bonasera uint_t command) 4597c478bd9Sstevel@tonic-gate { 460f34a7178SJoe Bonasera int c; 4617c478bd9Sstevel@tonic-gate struct cpu *cpup; 462f34a7178SJoe Bonasera xc_msg_t *msg; 463f34a7178SJoe Bonasera xc_data_t *data; 464f34a7178SJoe Bonasera int cnt; 465f34a7178SJoe Bonasera int save_spl; 466f34a7178SJoe Bonasera 467f34a7178SJoe Bonasera if (!xc_initialized) { 468f34a7178SJoe Bonasera if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) && 469f34a7178SJoe Bonasera func != NULL) 470f34a7178SJoe Bonasera (void) (*func)(arg1, arg2, arg3); 471f34a7178SJoe Bonasera return; 472f34a7178SJoe Bonasera } 4737c478bd9Sstevel@tonic-gate 474f34a7178SJoe Bonasera save_spl = splr(ipltospl(XC_HI_PIL)); 4757c478bd9Sstevel@tonic-gate 4767c478bd9Sstevel@tonic-gate /* 477f34a7178SJoe Bonasera * fill in cross call data 4787c478bd9Sstevel@tonic-gate */ 479f34a7178SJoe Bonasera data = &CPU->cpu_m.xc_data; 480f34a7178SJoe Bonasera data->xc_func = func; 481f34a7178SJoe Bonasera data->xc_a1 = arg1; 482f34a7178SJoe Bonasera data->xc_a2 = arg2; 483f34a7178SJoe Bonasera data->xc_a3 = arg3; 484a563a037Sbholler 4857c478bd9Sstevel@tonic-gate /* 486f34a7178SJoe Bonasera * Post messages to all CPUs involved that are CPU_READY 4877c478bd9Sstevel@tonic-gate */ 488f34a7178SJoe Bonasera CPU->cpu_m.xc_wait_cnt = 0; 489*a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) { 490f34a7178SJoe Bonasera if (!BT_TEST(set, c)) 491f34a7178SJoe Bonasera continue; 492f34a7178SJoe Bonasera cpup = cpu[c]; 493f34a7178SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY)) 494a563a037Sbholler continue; 495a563a037Sbholler 496f34a7178SJoe Bonasera /* 497f34a7178SJoe Bonasera * Fill out a new message. 498f34a7178SJoe Bonasera */ 499f34a7178SJoe Bonasera msg = xc_extract(&CPU->cpu_m.xc_free); 500f34a7178SJoe Bonasera if (msg == NULL) 501f34a7178SJoe Bonasera panic("Ran out of free xc_msg_t's"); 502f34a7178SJoe Bonasera msg->xc_command = command; 503bf73eaa5SJoe Bonasera if (msg->xc_master != CPU->cpu_id) 504bf73eaa5SJoe Bonasera panic("msg %p has wrong xc_master", (void *)msg); 505f34a7178SJoe Bonasera msg->xc_slave = c; 506a563a037Sbholler 507f34a7178SJoe Bonasera /* 508f34a7178SJoe Bonasera * Increment my work count for all messages that I'll 509f34a7178SJoe Bonasera * transition from DONE to FREE. 510f34a7178SJoe Bonasera * Also remember how many XC_MSG_WAITINGs to look for 511f34a7178SJoe Bonasera */ 512f34a7178SJoe Bonasera (void) xc_increment(&CPU->cpu_m); 513f34a7178SJoe Bonasera if (command == XC_MSG_SYNC) 514f34a7178SJoe Bonasera ++CPU->cpu_m.xc_wait_cnt; 515f34a7178SJoe Bonasera 516f34a7178SJoe Bonasera /* 517f34a7178SJoe Bonasera * Increment the target CPU work count then insert the message 518f34a7178SJoe Bonasera * in the target msgbox. If I post the first bit of work 519f34a7178SJoe Bonasera * for the target to do, send an IPI to the target CPU. 520f34a7178SJoe Bonasera */ 521f34a7178SJoe Bonasera cnt = xc_increment(&cpup->cpu_m); 522f34a7178SJoe Bonasera xc_insert(&cpup->cpu_m.xc_msgbox, msg); 523f34a7178SJoe Bonasera if (cpup != CPU) { 524f34a7178SJoe Bonasera if (cnt == 0) { 525f34a7178SJoe Bonasera CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 526f34a7178SJoe Bonasera send_dirint(c, XC_HI_PIL); 527f34a7178SJoe Bonasera if (xc_collect_enable) 528f34a7178SJoe Bonasera ++xc_total_cnt; 529f34a7178SJoe Bonasera } else if (xc_collect_enable) { 530f34a7178SJoe Bonasera ++xc_multi_cnt; 531f34a7178SJoe Bonasera } 532f34a7178SJoe Bonasera } 5337c478bd9Sstevel@tonic-gate } 5347c478bd9Sstevel@tonic-gate 5357c478bd9Sstevel@tonic-gate /* 536f34a7178SJoe Bonasera * Now drop into the message handler until all work is done 5377c478bd9Sstevel@tonic-gate */ 538f34a7178SJoe Bonasera (void) xc_serv(NULL, NULL); 539f34a7178SJoe Bonasera splx(save_spl); 540f34a7178SJoe Bonasera } 5417c478bd9Sstevel@tonic-gate 542f34a7178SJoe Bonasera /* 543f34a7178SJoe Bonasera * Push out a priority cross call. 544f34a7178SJoe Bonasera */ 545f34a7178SJoe Bonasera static void 546f34a7178SJoe Bonasera xc_priority_common( 547f34a7178SJoe Bonasera xc_func_t func, 548f34a7178SJoe Bonasera xc_arg_t arg1, 549f34a7178SJoe Bonasera xc_arg_t arg2, 550f34a7178SJoe Bonasera xc_arg_t arg3, 551f34a7178SJoe Bonasera ulong_t *set) 552f34a7178SJoe Bonasera { 553f34a7178SJoe Bonasera int i; 554f34a7178SJoe Bonasera int c; 555f34a7178SJoe Bonasera struct cpu *cpup; 5567c478bd9Sstevel@tonic-gate 5577c478bd9Sstevel@tonic-gate /* 558c03aa626SJoe Bonasera * Wait briefly for any previous xc_priority to have finished. 5597c478bd9Sstevel@tonic-gate */ 560*a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) { 561c03aa626SJoe Bonasera cpup = cpu[c]; 562c03aa626SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY)) 563c03aa626SJoe Bonasera continue; 564c03aa626SJoe Bonasera 565c03aa626SJoe Bonasera /* 566c03aa626SJoe Bonasera * The value of 40000 here is from old kernel code. It 567c03aa626SJoe Bonasera * really should be changed to some time based value, since 568c03aa626SJoe Bonasera * under a hypervisor, there's no guarantee a remote CPU 569c03aa626SJoe Bonasera * is even scheduled. 570c03aa626SJoe Bonasera */ 571c03aa626SJoe Bonasera for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i) 572c03aa626SJoe Bonasera SMT_PAUSE(); 573c03aa626SJoe Bonasera 574c03aa626SJoe Bonasera /* 575c03aa626SJoe Bonasera * Some CPU did not respond to a previous priority request. It's 576c03aa626SJoe Bonasera * probably deadlocked with interrupts blocked or some such 577c03aa626SJoe Bonasera * problem. We'll just erase the previous request - which was 578c03aa626SJoe Bonasera * most likely a kmdb_enter that has already expired - and plow 579c03aa626SJoe Bonasera * ahead. 580c03aa626SJoe Bonasera */ 581c03aa626SJoe Bonasera if (BT_TEST(xc_priority_set, c)) { 582c03aa626SJoe Bonasera XC_BT_CLEAR(xc_priority_set, c); 583c03aa626SJoe Bonasera if (cpup->cpu_m.xc_work_cnt > 0) 584c03aa626SJoe Bonasera xc_decrement(&cpup->cpu_m); 585c03aa626SJoe Bonasera } 5867c478bd9Sstevel@tonic-gate } 5877c478bd9Sstevel@tonic-gate 5887c478bd9Sstevel@tonic-gate /* 589f34a7178SJoe Bonasera * fill in cross call data 5907c478bd9Sstevel@tonic-gate */ 591f34a7178SJoe Bonasera xc_priority_data.xc_func = func; 592f34a7178SJoe Bonasera xc_priority_data.xc_a1 = arg1; 593f34a7178SJoe Bonasera xc_priority_data.xc_a2 = arg2; 594f34a7178SJoe Bonasera xc_priority_data.xc_a3 = arg3; 5957c478bd9Sstevel@tonic-gate 5967c478bd9Sstevel@tonic-gate /* 597f34a7178SJoe Bonasera * Post messages to all CPUs involved that are CPU_READY 598f34a7178SJoe Bonasera * We'll always IPI, plus bang on the xc_msgbox for i86_mwait() 5997c478bd9Sstevel@tonic-gate */ 600*a3114836SGerry Liu for (c = 0; c < max_ncpus; ++c) { 601f34a7178SJoe Bonasera if (!BT_TEST(set, c)) 602f34a7178SJoe Bonasera continue; 603f34a7178SJoe Bonasera cpup = cpu[c]; 604f34a7178SJoe Bonasera if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) || 605f34a7178SJoe Bonasera cpup == CPU) 606f34a7178SJoe Bonasera continue; 607f34a7178SJoe Bonasera (void) xc_increment(&cpup->cpu_m); 608c03aa626SJoe Bonasera XC_BT_SET(xc_priority_set, c); 609f34a7178SJoe Bonasera send_dirint(c, XC_HI_PIL); 610f34a7178SJoe Bonasera for (i = 0; i < 10; ++i) { 611f34a7178SJoe Bonasera (void) casptr(&cpup->cpu_m.xc_msgbox, 612f34a7178SJoe Bonasera cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox); 6137c478bd9Sstevel@tonic-gate } 6147c478bd9Sstevel@tonic-gate } 6157c478bd9Sstevel@tonic-gate } 6167c478bd9Sstevel@tonic-gate 6177c478bd9Sstevel@tonic-gate /* 618f34a7178SJoe Bonasera * Do cross call to all other CPUs with absolutely no waiting or handshaking. 619f34a7178SJoe Bonasera * This should only be used for extraordinary operations, like panic(), which 620f34a7178SJoe Bonasera * need to work, in some fashion, in a not completely functional system. 621f34a7178SJoe Bonasera * All other uses that want minimal waiting should use xc_call_nowait(). 6227c478bd9Sstevel@tonic-gate */ 6237c478bd9Sstevel@tonic-gate void 624f34a7178SJoe Bonasera xc_priority( 6257c478bd9Sstevel@tonic-gate xc_arg_t arg1, 6267c478bd9Sstevel@tonic-gate xc_arg_t arg2, 6277c478bd9Sstevel@tonic-gate xc_arg_t arg3, 628f34a7178SJoe Bonasera ulong_t *set, 6297c478bd9Sstevel@tonic-gate xc_func_t func) 6307c478bd9Sstevel@tonic-gate { 631f34a7178SJoe Bonasera extern int IGNORE_KERNEL_PREEMPTION; 632f34a7178SJoe Bonasera int save_spl = splr(ipltospl(XC_HI_PIL)); 633f34a7178SJoe Bonasera int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 6347c478bd9Sstevel@tonic-gate 6357c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = 1; 636f34a7178SJoe Bonasera xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set); 6377c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 638f34a7178SJoe Bonasera splx(save_spl); 6397c478bd9Sstevel@tonic-gate } 6407c478bd9Sstevel@tonic-gate 6417c478bd9Sstevel@tonic-gate /* 642f34a7178SJoe Bonasera * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger. 6437c478bd9Sstevel@tonic-gate */ 6447c478bd9Sstevel@tonic-gate void 6457c478bd9Sstevel@tonic-gate kdi_xc_others(int this_cpu, void (*func)(void)) 6467c478bd9Sstevel@tonic-gate { 647f34a7178SJoe Bonasera extern int IGNORE_KERNEL_PREEMPTION; 6487c478bd9Sstevel@tonic-gate int save_kernel_preemption; 6497c478bd9Sstevel@tonic-gate cpuset_t set; 6507c478bd9Sstevel@tonic-gate 651ae115bc7Smrj if (!xc_initialized) 652ae115bc7Smrj return; 653ae115bc7Smrj 6547c478bd9Sstevel@tonic-gate save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 6557c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = 1; 656f34a7178SJoe Bonasera CPUSET_ALL_BUT(set, this_cpu); 657f34a7178SJoe Bonasera xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set)); 6587c478bd9Sstevel@tonic-gate IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 6597c478bd9Sstevel@tonic-gate } 660f34a7178SJoe Bonasera 661f34a7178SJoe Bonasera 662f34a7178SJoe Bonasera 663f34a7178SJoe Bonasera /* 664f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes may continue after 665f34a7178SJoe Bonasera * service with no waiting. xc_call_nowait() may return immediately too. 666f34a7178SJoe Bonasera */ 667f34a7178SJoe Bonasera void 668f34a7178SJoe Bonasera xc_call_nowait( 669f34a7178SJoe Bonasera xc_arg_t arg1, 670f34a7178SJoe Bonasera xc_arg_t arg2, 671f34a7178SJoe Bonasera xc_arg_t arg3, 672f34a7178SJoe Bonasera ulong_t *set, 673f34a7178SJoe Bonasera xc_func_t func) 674f34a7178SJoe Bonasera { 675f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC); 676f34a7178SJoe Bonasera } 677f34a7178SJoe Bonasera 678f34a7178SJoe Bonasera /* 679f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes may continue after 680f34a7178SJoe Bonasera * service with no waiting. xc_call() returns only after remotes have finished. 681f34a7178SJoe Bonasera */ 682f34a7178SJoe Bonasera void 683f34a7178SJoe Bonasera xc_call( 684f34a7178SJoe Bonasera xc_arg_t arg1, 685f34a7178SJoe Bonasera xc_arg_t arg2, 686f34a7178SJoe Bonasera xc_arg_t arg3, 687f34a7178SJoe Bonasera ulong_t *set, 688f34a7178SJoe Bonasera xc_func_t func) 689f34a7178SJoe Bonasera { 690f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL); 691f34a7178SJoe Bonasera } 692f34a7178SJoe Bonasera 693f34a7178SJoe Bonasera /* 694f34a7178SJoe Bonasera * Invoke function on specified processors. Remotes wait until all have 695f34a7178SJoe Bonasera * finished. xc_sync() also waits until all remotes have finished. 696f34a7178SJoe Bonasera */ 697f34a7178SJoe Bonasera void 698f34a7178SJoe Bonasera xc_sync( 699f34a7178SJoe Bonasera xc_arg_t arg1, 700f34a7178SJoe Bonasera xc_arg_t arg2, 701f34a7178SJoe Bonasera xc_arg_t arg3, 702f34a7178SJoe Bonasera ulong_t *set, 703f34a7178SJoe Bonasera xc_func_t func) 704f34a7178SJoe Bonasera { 705f34a7178SJoe Bonasera xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC); 706f34a7178SJoe Bonasera } 707