1*843e1988Sjohnlev /* 2*843e1988Sjohnlev * CDDL HEADER START 3*843e1988Sjohnlev * 4*843e1988Sjohnlev * The contents of this file are subject to the terms of the 5*843e1988Sjohnlev * Common Development and Distribution License (the "License"). 6*843e1988Sjohnlev * You may not use this file except in compliance with the License. 7*843e1988Sjohnlev * 8*843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing. 10*843e1988Sjohnlev * See the License for the specific language governing permissions 11*843e1988Sjohnlev * and limitations under the License. 12*843e1988Sjohnlev * 13*843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each 14*843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the 16*843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying 17*843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner] 18*843e1988Sjohnlev * 19*843e1988Sjohnlev * CDDL HEADER END 20*843e1988Sjohnlev */ 21*843e1988Sjohnlev 22*843e1988Sjohnlev /* 23*843e1988Sjohnlev * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24*843e1988Sjohnlev * Use is subject to license terms. 25*843e1988Sjohnlev */ 26*843e1988Sjohnlev 27*843e1988Sjohnlev #pragma ident "%Z%%M% %I% %E% SMI" 28*843e1988Sjohnlev 29*843e1988Sjohnlev #include <sys/types.h> 30*843e1988Sjohnlev #include <sys/systm.h> 31*843e1988Sjohnlev #include <sys/param.h> 32*843e1988Sjohnlev #include <sys/taskq.h> 33*843e1988Sjohnlev #include <sys/cmn_err.h> 34*843e1988Sjohnlev #include <sys/archsystm.h> 35*843e1988Sjohnlev #include <sys/machsystm.h> 36*843e1988Sjohnlev #include <sys/segments.h> 37*843e1988Sjohnlev #include <sys/cpuvar.h> 38*843e1988Sjohnlev #include <sys/psw.h> 39*843e1988Sjohnlev #include <sys/x86_archext.h> 40*843e1988Sjohnlev #include <sys/controlregs.h> 41*843e1988Sjohnlev #include <vm/as.h> 42*843e1988Sjohnlev #include <vm/hat.h> 43*843e1988Sjohnlev #include <vm/hat_i86.h> 44*843e1988Sjohnlev #include <sys/mman.h> 45*843e1988Sjohnlev #include <sys/hypervisor.h> 46*843e1988Sjohnlev #include <xen/sys/xenbus_impl.h> 47*843e1988Sjohnlev #include <sys/xpv_panic.h> 48*843e1988Sjohnlev #include <util/sscanf.h> 49*843e1988Sjohnlev #include <sys/cpu.h> 50*843e1988Sjohnlev #include <asm/cpu.h> 51*843e1988Sjohnlev 52*843e1988Sjohnlev #include <xen/public/vcpu.h> 53*843e1988Sjohnlev #include <xen/public/io/xs_wire.h> 54*843e1988Sjohnlev 55*843e1988Sjohnlev struct xen_evt_data cpu0_evt_data; /* cpu0's pending event data */ 56*843e1988Sjohnlev 57*843e1988Sjohnlev static taskq_t *cpu_config_tq; 58*843e1988Sjohnlev static void vcpu_config_event(struct xenbus_watch *, const char **, uint_t); 59*843e1988Sjohnlev static int xen_vcpu_initialize(processorid_t, vcpu_guest_context_t *); 60*843e1988Sjohnlev 61*843e1988Sjohnlev /* 62*843e1988Sjohnlev * These routines allocate any global state that might be needed 63*843e1988Sjohnlev * while starting cpus. For virtual cpus, there is no such state. 64*843e1988Sjohnlev */ 65*843e1988Sjohnlev int 66*843e1988Sjohnlev mach_cpucontext_init(void) 67*843e1988Sjohnlev { 68*843e1988Sjohnlev return (0); 69*843e1988Sjohnlev } 70*843e1988Sjohnlev 71*843e1988Sjohnlev void 72*843e1988Sjohnlev do_cpu_config_watch(int state) 73*843e1988Sjohnlev { 74*843e1988Sjohnlev static struct xenbus_watch cpu_config_watch; 75*843e1988Sjohnlev 76*843e1988Sjohnlev if (state != XENSTORE_UP) 77*843e1988Sjohnlev return; 78*843e1988Sjohnlev cpu_config_watch.node = "cpu"; 79*843e1988Sjohnlev cpu_config_watch.callback = vcpu_config_event; 80*843e1988Sjohnlev if (register_xenbus_watch(&cpu_config_watch)) { 81*843e1988Sjohnlev taskq_destroy(cpu_config_tq); 82*843e1988Sjohnlev cmn_err(CE_WARN, "do_cpu_config_watch: " 83*843e1988Sjohnlev "failed to set vcpu config watch"); 84*843e1988Sjohnlev } 85*843e1988Sjohnlev 86*843e1988Sjohnlev } 87*843e1988Sjohnlev 88*843e1988Sjohnlev /* 89*843e1988Sjohnlev * This routine is called after all the "normal" MP startup has 90*843e1988Sjohnlev * been done; a good place to start watching xen store for virtual 91*843e1988Sjohnlev * cpu hot plug events. 92*843e1988Sjohnlev */ 93*843e1988Sjohnlev void 94*843e1988Sjohnlev mach_cpucontext_fini(void) 95*843e1988Sjohnlev { 96*843e1988Sjohnlev 97*843e1988Sjohnlev cpu_config_tq = taskq_create("vcpu config taskq", 1, 98*843e1988Sjohnlev maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE); 99*843e1988Sjohnlev 100*843e1988Sjohnlev (void) xs_register_xenbus_callback(do_cpu_config_watch); 101*843e1988Sjohnlev } 102*843e1988Sjohnlev 103*843e1988Sjohnlev /* 104*843e1988Sjohnlev * Fill in the remaining CPU context and initialize it. 105*843e1988Sjohnlev */ 106*843e1988Sjohnlev static int 107*843e1988Sjohnlev mp_set_cpu_context(vcpu_guest_context_t *vgc, cpu_t *cp) 108*843e1988Sjohnlev { 109*843e1988Sjohnlev uint_t vec, iopl; 110*843e1988Sjohnlev 111*843e1988Sjohnlev vgc->flags = VGCF_IN_KERNEL; 112*843e1988Sjohnlev 113*843e1988Sjohnlev /* 114*843e1988Sjohnlev * fpu_ctx we leave as zero; on first fault we'll store 115*843e1988Sjohnlev * sse_initial into it anyway. 116*843e1988Sjohnlev */ 117*843e1988Sjohnlev 118*843e1988Sjohnlev #if defined(__amd64) 119*843e1988Sjohnlev vgc->user_regs.cs = KCS_SEL | SEL_KPL; /* force to ring 3 */ 120*843e1988Sjohnlev #else 121*843e1988Sjohnlev vgc->user_regs.cs = KCS_SEL; 122*843e1988Sjohnlev #endif 123*843e1988Sjohnlev vgc->user_regs.ds = KDS_SEL; 124*843e1988Sjohnlev vgc->user_regs.es = KDS_SEL; 125*843e1988Sjohnlev vgc->user_regs.ss = KDS_SEL; 126*843e1988Sjohnlev vgc->kernel_ss = KDS_SEL; 127*843e1988Sjohnlev 128*843e1988Sjohnlev /* 129*843e1988Sjohnlev * Allow I/O privilege level for Dom0 kernel. 130*843e1988Sjohnlev */ 131*843e1988Sjohnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) 132*843e1988Sjohnlev iopl = (PS_IOPL & 0x1000); /* ring 1 */ 133*843e1988Sjohnlev else 134*843e1988Sjohnlev iopl = 0; 135*843e1988Sjohnlev 136*843e1988Sjohnlev #if defined(__amd64) 137*843e1988Sjohnlev vgc->user_regs.fs = 0; 138*843e1988Sjohnlev vgc->user_regs.gs = 0; 139*843e1988Sjohnlev vgc->user_regs.rflags = F_OFF | iopl; 140*843e1988Sjohnlev #elif defined(__i386) 141*843e1988Sjohnlev vgc->user_regs.fs = KFS_SEL; 142*843e1988Sjohnlev vgc->user_regs.gs = KGS_SEL; 143*843e1988Sjohnlev vgc->user_regs.eflags = F_OFF | iopl; 144*843e1988Sjohnlev vgc->event_callback_cs = vgc->user_regs.cs; 145*843e1988Sjohnlev vgc->failsafe_callback_cs = vgc->user_regs.cs; 146*843e1988Sjohnlev #endif 147*843e1988Sjohnlev 148*843e1988Sjohnlev /* 149*843e1988Sjohnlev * Initialize the trap_info_t from the IDT 150*843e1988Sjohnlev */ 151*843e1988Sjohnlev #if !defined(__lint) 152*843e1988Sjohnlev ASSERT(NIDT == sizeof (vgc->trap_ctxt) / sizeof (vgc->trap_ctxt[0])); 153*843e1988Sjohnlev #endif 154*843e1988Sjohnlev for (vec = 0; vec < NIDT; vec++) { 155*843e1988Sjohnlev trap_info_t *ti = &vgc->trap_ctxt[vec]; 156*843e1988Sjohnlev 157*843e1988Sjohnlev if (xen_idt_to_trap_info(vec, 158*843e1988Sjohnlev &cp->cpu_m.mcpu_idt[vec], ti) == 0) { 159*843e1988Sjohnlev ti->cs = KCS_SEL; 160*843e1988Sjohnlev ti->vector = vec; 161*843e1988Sjohnlev } 162*843e1988Sjohnlev } 163*843e1988Sjohnlev 164*843e1988Sjohnlev /* 165*843e1988Sjohnlev * No LDT 166*843e1988Sjohnlev */ 167*843e1988Sjohnlev 168*843e1988Sjohnlev /* 169*843e1988Sjohnlev * (We assert in various places that the GDT is (a) aligned on a 170*843e1988Sjohnlev * page boundary and (b) one page long, so this really should fit..) 171*843e1988Sjohnlev */ 172*843e1988Sjohnlev #ifdef CRASH_XEN 173*843e1988Sjohnlev vgc->gdt_frames[0] = pa_to_ma(mmu_btop(cp->cpu_m.mcpu_gdtpa)); 174*843e1988Sjohnlev #else 175*843e1988Sjohnlev vgc->gdt_frames[0] = pfn_to_mfn(mmu_btop(cp->cpu_m.mcpu_gdtpa)); 176*843e1988Sjohnlev #endif 177*843e1988Sjohnlev vgc->gdt_ents = NGDT; 178*843e1988Sjohnlev 179*843e1988Sjohnlev vgc->ctrlreg[0] = CR0_ENABLE_FPU_FLAGS(getcr0()); 180*843e1988Sjohnlev 181*843e1988Sjohnlev #if defined(__i386) 182*843e1988Sjohnlev if (mmu.pae_hat) 183*843e1988Sjohnlev vgc->ctrlreg[3] = 184*843e1988Sjohnlev xen_pfn_to_cr3(pfn_to_mfn(kas.a_hat->hat_htable->ht_pfn)); 185*843e1988Sjohnlev else 186*843e1988Sjohnlev #endif 187*843e1988Sjohnlev vgc->ctrlreg[3] = 188*843e1988Sjohnlev pa_to_ma(mmu_ptob(kas.a_hat->hat_htable->ht_pfn)); 189*843e1988Sjohnlev 190*843e1988Sjohnlev vgc->ctrlreg[4] = getcr4(); 191*843e1988Sjohnlev 192*843e1988Sjohnlev vgc->event_callback_eip = (uintptr_t)xen_callback; 193*843e1988Sjohnlev vgc->failsafe_callback_eip = (uintptr_t)xen_failsafe_callback; 194*843e1988Sjohnlev vgc->flags |= VGCF_failsafe_disables_events; 195*843e1988Sjohnlev 196*843e1988Sjohnlev #if defined(__amd64) 197*843e1988Sjohnlev /* 198*843e1988Sjohnlev * XXPV should this be moved to init_cpu_syscall? 199*843e1988Sjohnlev */ 200*843e1988Sjohnlev vgc->syscall_callback_eip = (uintptr_t)sys_syscall; 201*843e1988Sjohnlev vgc->flags |= VGCF_syscall_disables_events; 202*843e1988Sjohnlev 203*843e1988Sjohnlev ASSERT(vgc->user_regs.gs == 0); 204*843e1988Sjohnlev vgc->gs_base_kernel = (uintptr_t)cp; 205*843e1988Sjohnlev #endif 206*843e1988Sjohnlev 207*843e1988Sjohnlev return (xen_vcpu_initialize(cp->cpu_id, vgc)); 208*843e1988Sjohnlev } 209*843e1988Sjohnlev 210*843e1988Sjohnlev /* 211*843e1988Sjohnlev * Create a guest virtual cpu context so that the virtual cpu 212*843e1988Sjohnlev * springs into life in the domain just about to call mp_startup() 213*843e1988Sjohnlev * 214*843e1988Sjohnlev * Virtual CPUs must be initialized once in the lifetime of the domain; 215*843e1988Sjohnlev * after that subsequent attempts to start them will fail with X_EEXIST. 216*843e1988Sjohnlev * 217*843e1988Sjohnlev * Thus 'alloc' -really- creates and initializes the virtual 218*843e1988Sjohnlev * CPU context just once. Once the initialisation succeeds, we never 219*843e1988Sjohnlev * free it, nor the regular cpu_t to which it refers. 220*843e1988Sjohnlev */ 221*843e1988Sjohnlev void * 222*843e1988Sjohnlev mach_cpucontext_alloc(struct cpu *cp) 223*843e1988Sjohnlev { 224*843e1988Sjohnlev kthread_t *tp = cp->cpu_thread; 225*843e1988Sjohnlev vcpu_guest_context_t vgc; 226*843e1988Sjohnlev 227*843e1988Sjohnlev int err = 1; 228*843e1988Sjohnlev 229*843e1988Sjohnlev /* 230*843e1988Sjohnlev * First, augment the incoming cpu structure 231*843e1988Sjohnlev * - vcpu pointer reference 232*843e1988Sjohnlev * - pending event storage area 233*843e1988Sjohnlev * - physical address of GDT 234*843e1988Sjohnlev */ 235*843e1988Sjohnlev cp->cpu_m.mcpu_vcpu_info = 236*843e1988Sjohnlev &HYPERVISOR_shared_info->vcpu_info[cp->cpu_id]; 237*843e1988Sjohnlev cp->cpu_m.mcpu_evt_pend = kmem_zalloc( 238*843e1988Sjohnlev sizeof (struct xen_evt_data), KM_SLEEP); 239*843e1988Sjohnlev cp->cpu_m.mcpu_gdtpa = 240*843e1988Sjohnlev mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)cp->cpu_gdt)); 241*843e1988Sjohnlev 242*843e1988Sjohnlev if ((err = xen_gdt_setprot(cp, PROT_READ)) != 0) 243*843e1988Sjohnlev goto done; 244*843e1988Sjohnlev 245*843e1988Sjohnlev /* 246*843e1988Sjohnlev * Now set up the vcpu context so that we can start this vcpu 247*843e1988Sjohnlev * in the kernel at tp->t_pc (mp_startup). Note that the 248*843e1988Sjohnlev * thread will thread_exit() shortly after performing the 249*843e1988Sjohnlev * initialization; in particular, we will *never* take a 250*843e1988Sjohnlev * privilege transition on this thread. 251*843e1988Sjohnlev */ 252*843e1988Sjohnlev 253*843e1988Sjohnlev bzero(&vgc, sizeof (vgc)); 254*843e1988Sjohnlev 255*843e1988Sjohnlev #ifdef __amd64 256*843e1988Sjohnlev vgc.user_regs.rip = tp->t_pc; 257*843e1988Sjohnlev vgc.user_regs.rsp = tp->t_sp; 258*843e1988Sjohnlev vgc.user_regs.rbp = tp->t_sp - 2 * sizeof (greg_t); 259*843e1988Sjohnlev #else 260*843e1988Sjohnlev vgc.user_regs.eip = tp->t_pc; 261*843e1988Sjohnlev vgc.user_regs.esp = tp->t_sp; 262*843e1988Sjohnlev vgc.user_regs.ebp = tp->t_sp - 2 * sizeof (greg_t); 263*843e1988Sjohnlev #endif 264*843e1988Sjohnlev /* 265*843e1988Sjohnlev * XXPV Fix resume, if Russ didn't already fix it. 266*843e1988Sjohnlev * 267*843e1988Sjohnlev * Note that resume unconditionally puts t->t_stk + sizeof (regs) 268*843e1988Sjohnlev * into kernel_sp via HYPERVISOR_stack_switch. This anticipates 269*843e1988Sjohnlev * that only lwps take traps that switch to the kernel stack; 270*843e1988Sjohnlev * part of creating an lwp adjusts the stack by subtracting 271*843e1988Sjohnlev * sizeof (struct regs) off t_stk. 272*843e1988Sjohnlev * 273*843e1988Sjohnlev * The more interesting question is, why do we do all the work 274*843e1988Sjohnlev * of a fully fledged lwp for a plain thread? In particular 275*843e1988Sjohnlev * we don't have to call HYPERVISOR_stack_switch for lwp-less threads 276*843e1988Sjohnlev * or futz with the LDT. This should probably all be done with 277*843e1988Sjohnlev * an lwp context operator to keep pure thread context switch fast. 278*843e1988Sjohnlev */ 279*843e1988Sjohnlev vgc.kernel_sp = (ulong_t)tp->t_stk; 280*843e1988Sjohnlev 281*843e1988Sjohnlev err = mp_set_cpu_context(&vgc, cp); 282*843e1988Sjohnlev 283*843e1988Sjohnlev done: 284*843e1988Sjohnlev if (err) { 285*843e1988Sjohnlev mach_cpucontext_free(cp, NULL, err); 286*843e1988Sjohnlev return (NULL); 287*843e1988Sjohnlev } 288*843e1988Sjohnlev return (cp); 289*843e1988Sjohnlev } 290*843e1988Sjohnlev 291*843e1988Sjohnlev /* 292*843e1988Sjohnlev * By the time we are called either we have successfully started 293*843e1988Sjohnlev * the cpu, or our attempt to start it has failed. 294*843e1988Sjohnlev */ 295*843e1988Sjohnlev 296*843e1988Sjohnlev /*ARGSUSED*/ 297*843e1988Sjohnlev void 298*843e1988Sjohnlev mach_cpucontext_free(struct cpu *cp, void *arg, int err) 299*843e1988Sjohnlev { 300*843e1988Sjohnlev switch (err) { 301*843e1988Sjohnlev case 0: 302*843e1988Sjohnlev break; 303*843e1988Sjohnlev case ETIMEDOUT: 304*843e1988Sjohnlev /* 305*843e1988Sjohnlev * The vcpu context is loaded into the hypervisor, and 306*843e1988Sjohnlev * we've tried to start it, but the vcpu has not been set 307*843e1988Sjohnlev * running yet, for whatever reason. We arrange to -not- 308*843e1988Sjohnlev * free any data structures it may be referencing. In 309*843e1988Sjohnlev * particular, we've already told the hypervisor about 310*843e1988Sjohnlev * the GDT, and so we can't map it read-write again. 311*843e1988Sjohnlev */ 312*843e1988Sjohnlev break; 313*843e1988Sjohnlev default: 314*843e1988Sjohnlev (void) xen_gdt_setprot(cp, PROT_READ | PROT_WRITE); 315*843e1988Sjohnlev kmem_free(cp->cpu_m.mcpu_evt_pend, 316*843e1988Sjohnlev sizeof (struct xen_evt_data)); 317*843e1988Sjohnlev break; 318*843e1988Sjohnlev } 319*843e1988Sjohnlev } 320*843e1988Sjohnlev 321*843e1988Sjohnlev /* 322*843e1988Sjohnlev * Reset this CPU's context. Clear out any pending evtchn data, since event 323*843e1988Sjohnlev * channel numbers will all change when we resume. 324*843e1988Sjohnlev */ 325*843e1988Sjohnlev void 326*843e1988Sjohnlev mach_cpucontext_reset(cpu_t *cp) 327*843e1988Sjohnlev { 328*843e1988Sjohnlev bzero(cp->cpu_m.mcpu_evt_pend, sizeof (struct xen_evt_data)); 329*843e1988Sjohnlev /* mcpu_intr_pending ? */ 330*843e1988Sjohnlev } 331*843e1988Sjohnlev 332*843e1988Sjohnlev static void 333*843e1988Sjohnlev pcb_to_user_regs(label_t *pcb, vcpu_guest_context_t *vgc) 334*843e1988Sjohnlev { 335*843e1988Sjohnlev #ifdef __amd64 336*843e1988Sjohnlev vgc->user_regs.rip = pcb->val[REG_LABEL_PC]; 337*843e1988Sjohnlev vgc->user_regs.rsp = pcb->val[REG_LABEL_SP]; 338*843e1988Sjohnlev vgc->user_regs.rbp = pcb->val[REG_LABEL_BP]; 339*843e1988Sjohnlev vgc->user_regs.rbx = pcb->val[REG_LABEL_RBX]; 340*843e1988Sjohnlev vgc->user_regs.r12 = pcb->val[REG_LABEL_R12]; 341*843e1988Sjohnlev vgc->user_regs.r13 = pcb->val[REG_LABEL_R13]; 342*843e1988Sjohnlev vgc->user_regs.r14 = pcb->val[REG_LABEL_R14]; 343*843e1988Sjohnlev vgc->user_regs.r15 = pcb->val[REG_LABEL_R15]; 344*843e1988Sjohnlev #else /* __amd64 */ 345*843e1988Sjohnlev vgc->user_regs.eip = pcb->val[REG_LABEL_PC]; 346*843e1988Sjohnlev vgc->user_regs.esp = pcb->val[REG_LABEL_SP]; 347*843e1988Sjohnlev vgc->user_regs.ebp = pcb->val[REG_LABEL_BP]; 348*843e1988Sjohnlev vgc->user_regs.ebx = pcb->val[REG_LABEL_EBX]; 349*843e1988Sjohnlev vgc->user_regs.esi = pcb->val[REG_LABEL_ESI]; 350*843e1988Sjohnlev vgc->user_regs.edi = pcb->val[REG_LABEL_EDI]; 351*843e1988Sjohnlev #endif /* __amd64 */ 352*843e1988Sjohnlev } 353*843e1988Sjohnlev 354*843e1988Sjohnlev /* 355*843e1988Sjohnlev * Restore the context of a CPU during resume. The CPU must either 356*843e1988Sjohnlev * have been blocked in cpu_idle() (running the idle thread), if it was 357*843e1988Sjohnlev * offline, or inside cpu_pause_thread(). Either way we can restore safely 358*843e1988Sjohnlev * from the t_pcb. 359*843e1988Sjohnlev */ 360*843e1988Sjohnlev void 361*843e1988Sjohnlev mach_cpucontext_restore(cpu_t *cp) 362*843e1988Sjohnlev { 363*843e1988Sjohnlev vcpu_guest_context_t vgc; 364*843e1988Sjohnlev int err; 365*843e1988Sjohnlev 366*843e1988Sjohnlev ASSERT(cp->cpu_thread == cp->cpu_pause_thread || 367*843e1988Sjohnlev cp->cpu_thread == cp->cpu_idle_thread); 368*843e1988Sjohnlev 369*843e1988Sjohnlev bzero(&vgc, sizeof (vgc)); 370*843e1988Sjohnlev 371*843e1988Sjohnlev pcb_to_user_regs(&cp->cpu_thread->t_pcb, &vgc); 372*843e1988Sjohnlev 373*843e1988Sjohnlev /* 374*843e1988Sjohnlev * We're emulating a longjmp() here: in particular, we need to bump the 375*843e1988Sjohnlev * stack pointer to account for the pop of xIP that returning from 376*843e1988Sjohnlev * longjmp() normally would do, and set the return value in xAX to 1. 377*843e1988Sjohnlev */ 378*843e1988Sjohnlev #ifdef __amd64 379*843e1988Sjohnlev vgc.user_regs.rax = 1; 380*843e1988Sjohnlev vgc.user_regs.rsp += sizeof (ulong_t); 381*843e1988Sjohnlev #else 382*843e1988Sjohnlev vgc.user_regs.eax = 1; 383*843e1988Sjohnlev vgc.user_regs.esp += sizeof (ulong_t); 384*843e1988Sjohnlev #endif 385*843e1988Sjohnlev 386*843e1988Sjohnlev vgc.kernel_sp = cp->cpu_thread->t_sp; 387*843e1988Sjohnlev 388*843e1988Sjohnlev err = mp_set_cpu_context(&vgc, cp); 389*843e1988Sjohnlev 390*843e1988Sjohnlev ASSERT(err == 0); 391*843e1988Sjohnlev } 392*843e1988Sjohnlev 393*843e1988Sjohnlev void 394*843e1988Sjohnlev mach_cpu_idle(void) 395*843e1988Sjohnlev { 396*843e1988Sjohnlev if (IN_XPV_PANIC()) { 397*843e1988Sjohnlev xpv_panic_halt(); 398*843e1988Sjohnlev } else { 399*843e1988Sjohnlev (void) setjmp(&curthread->t_pcb); 400*843e1988Sjohnlev CPUSET_ATOMIC_ADD(cpu_suspend_set, CPU->cpu_id); 401*843e1988Sjohnlev (void) HYPERVISOR_block(); 402*843e1988Sjohnlev CPUSET_ATOMIC_DEL(cpu_suspend_set, CPU->cpu_id); 403*843e1988Sjohnlev } 404*843e1988Sjohnlev } 405*843e1988Sjohnlev 406*843e1988Sjohnlev void 407*843e1988Sjohnlev mach_cpu_halt(char *msg) 408*843e1988Sjohnlev { 409*843e1988Sjohnlev if (msg) 410*843e1988Sjohnlev prom_printf("%s\n", msg); 411*843e1988Sjohnlev (void) xen_vcpu_down(CPU->cpu_id); 412*843e1988Sjohnlev } 413*843e1988Sjohnlev 414*843e1988Sjohnlev void 415*843e1988Sjohnlev mach_cpu_pause(volatile char *safe) 416*843e1988Sjohnlev { 417*843e1988Sjohnlev ulong_t flags; 418*843e1988Sjohnlev 419*843e1988Sjohnlev flags = intr_clear(); 420*843e1988Sjohnlev 421*843e1988Sjohnlev if (setjmp(&curthread->t_pcb) == 0) { 422*843e1988Sjohnlev CPUSET_ATOMIC_ADD(cpu_suspend_set, CPU->cpu_id); 423*843e1988Sjohnlev /* 424*843e1988Sjohnlev * This cpu is now safe. 425*843e1988Sjohnlev */ 426*843e1988Sjohnlev *safe = PAUSE_WAIT; 427*843e1988Sjohnlev membar_enter(); 428*843e1988Sjohnlev } 429*843e1988Sjohnlev 430*843e1988Sjohnlev while (*safe != PAUSE_IDLE) 431*843e1988Sjohnlev SMT_PAUSE(); 432*843e1988Sjohnlev 433*843e1988Sjohnlev CPUSET_ATOMIC_DEL(cpu_suspend_set, CPU->cpu_id); 434*843e1988Sjohnlev 435*843e1988Sjohnlev intr_restore(flags); 436*843e1988Sjohnlev } 437*843e1988Sjohnlev 438*843e1988Sjohnlev /* 439*843e1988Sjohnlev * Virtual CPU management. 440*843e1988Sjohnlev * 441*843e1988Sjohnlev * VCPUs can be controlled in one of two ways; through the domain itself 442*843e1988Sjohnlev * (psradm, p_online(), etc.), and via changes in xenstore (vcpu_config()). 443*843e1988Sjohnlev * Unfortunately, the terminology is used in different ways; they work out as 444*843e1988Sjohnlev * follows: 445*843e1988Sjohnlev * 446*843e1988Sjohnlev * P_ONLINE: the VCPU is up and running, taking interrupts and running threads 447*843e1988Sjohnlev * 448*843e1988Sjohnlev * P_OFFLINE: the VCPU is up and running, but quiesced (i.e. blocked in the 449*843e1988Sjohnlev * hypervisor on the idle thread). It must be up since a downed VCPU cannot 450*843e1988Sjohnlev * receive interrupts, and we require this for offline CPUs in Solaris. 451*843e1988Sjohnlev * 452*843e1988Sjohnlev * P_POWEROFF: the VCPU is down (we never called xen_vcpu_up(), or called 453*843e1988Sjohnlev * xen_vcpu_down() for it). It can't take interrupts or run anything, though 454*843e1988Sjohnlev * if it has run previously, its software state (cpu_t, machcpu structures, IPI 455*843e1988Sjohnlev * event channels, etc.) will still exist. 456*843e1988Sjohnlev * 457*843e1988Sjohnlev * The hypervisor has two notions of CPU states as represented in the store: 458*843e1988Sjohnlev * 459*843e1988Sjohnlev * "offline": the VCPU is down. Corresponds to P_POWEROFF. 460*843e1988Sjohnlev * 461*843e1988Sjohnlev * "online": the VCPU is running. Corresponds to a CPU state other than 462*843e1988Sjohnlev * P_POWEROFF. 463*843e1988Sjohnlev * 464*843e1988Sjohnlev * Currently, only a notification via xenstore can bring a CPU into a 465*843e1988Sjohnlev * P_POWEROFF state, and only the domain can change between P_ONLINE, P_NOINTR, 466*843e1988Sjohnlev * P_OFFLINE, etc. We need to be careful to treat xenstore notifications 467*843e1988Sjohnlev * idempotently, as we'll get 'duplicate' entries when we resume a domain. 468*843e1988Sjohnlev * 469*843e1988Sjohnlev * Note that the xenstore configuration is strictly advisory, in that a domain 470*843e1988Sjohnlev * can choose to ignore it and still power up a VCPU in the offline state. To 471*843e1988Sjohnlev * play nice, we don't allow it. Thus, any attempt to power on/off a CPU is 472*843e1988Sjohnlev * ENOTSUP from within Solaris. 473*843e1988Sjohnlev */ 474*843e1988Sjohnlev 475*843e1988Sjohnlev /*ARGSUSED*/ 476*843e1988Sjohnlev int 477*843e1988Sjohnlev mp_cpu_poweron(struct cpu *cp) 478*843e1988Sjohnlev { 479*843e1988Sjohnlev return (ENOTSUP); 480*843e1988Sjohnlev } 481*843e1988Sjohnlev 482*843e1988Sjohnlev /*ARGSUSED*/ 483*843e1988Sjohnlev int 484*843e1988Sjohnlev mp_cpu_poweroff(struct cpu *cp) 485*843e1988Sjohnlev { 486*843e1988Sjohnlev return (ENOTSUP); 487*843e1988Sjohnlev } 488*843e1988Sjohnlev 489*843e1988Sjohnlev static int 490*843e1988Sjohnlev poweron_vcpu(struct cpu *cp) 491*843e1988Sjohnlev { 492*843e1988Sjohnlev int error; 493*843e1988Sjohnlev 494*843e1988Sjohnlev ASSERT(MUTEX_HELD(&cpu_lock)); 495*843e1988Sjohnlev 496*843e1988Sjohnlev if (HYPERVISOR_vcpu_op(VCPUOP_is_up, cp->cpu_id, NULL) != 0) { 497*843e1988Sjohnlev printf("poweron_vcpu: vcpu%d is not available!\n", 498*843e1988Sjohnlev cp->cpu_id); 499*843e1988Sjohnlev return (ENXIO); 500*843e1988Sjohnlev } 501*843e1988Sjohnlev 502*843e1988Sjohnlev if ((error = xen_vcpu_up(cp->cpu_id)) == 0) { 503*843e1988Sjohnlev CPUSET_ADD(cpu_ready_set, cp->cpu_id); 504*843e1988Sjohnlev cp->cpu_flags |= CPU_EXISTS | CPU_READY | CPU_RUNNING; 505*843e1988Sjohnlev cp->cpu_flags &= ~CPU_POWEROFF; 506*843e1988Sjohnlev /* 507*843e1988Sjohnlev * There are some nasty races possible here. 508*843e1988Sjohnlev * Tell the vcpu it's up one more time. 509*843e1988Sjohnlev * XXPV Is this enough? Is this safe? 510*843e1988Sjohnlev */ 511*843e1988Sjohnlev (void) xen_vcpu_up(cp->cpu_id); 512*843e1988Sjohnlev 513*843e1988Sjohnlev cpu_set_state(cp); 514*843e1988Sjohnlev } 515*843e1988Sjohnlev return (error); 516*843e1988Sjohnlev } 517*843e1988Sjohnlev 518*843e1988Sjohnlev static int 519*843e1988Sjohnlev poweroff_poke(void) 520*843e1988Sjohnlev { 521*843e1988Sjohnlev CPUSET_ATOMIC_DEL(cpu_suspend_set, CPU->cpu_id); 522*843e1988Sjohnlev return (0); 523*843e1988Sjohnlev } 524*843e1988Sjohnlev 525*843e1988Sjohnlev /* 526*843e1988Sjohnlev * We must ensure that the VCPU reaches a safe state (in the suspend set, and 527*843e1988Sjohnlev * thus is not going to change) before we can power it off. The VCPU could 528*843e1988Sjohnlev * still be in mach_cpu_pause() and about to head back out; so just checking 529*843e1988Sjohnlev * cpu_suspend_set() isn't sufficient to make sure the VCPU has stopped moving. 530*843e1988Sjohnlev * Instead, we xcall it to delete itself from the set; whichever way it comes 531*843e1988Sjohnlev * back from that xcall, it won't mark itself in the set until it's safely back 532*843e1988Sjohnlev * in mach_cpu_idle(). 533*843e1988Sjohnlev */ 534*843e1988Sjohnlev static int 535*843e1988Sjohnlev poweroff_vcpu(struct cpu *cp) 536*843e1988Sjohnlev { 537*843e1988Sjohnlev int error; 538*843e1988Sjohnlev cpuset_t set; 539*843e1988Sjohnlev 540*843e1988Sjohnlev ASSERT(MUTEX_HELD(&cpu_lock)); 541*843e1988Sjohnlev 542*843e1988Sjohnlev ASSERT(CPU->cpu_id != cp->cpu_id); 543*843e1988Sjohnlev ASSERT(cp->cpu_flags & CPU_QUIESCED); 544*843e1988Sjohnlev 545*843e1988Sjohnlev CPUSET_ONLY(set, cp->cpu_id); 546*843e1988Sjohnlev 547*843e1988Sjohnlev xc_sync(0, 0, 0, X_CALL_HIPRI, set, (xc_func_t)poweroff_poke); 548*843e1988Sjohnlev 549*843e1988Sjohnlev while (!CPU_IN_SET(cpu_suspend_set, cp->cpu_id)) 550*843e1988Sjohnlev SMT_PAUSE(); 551*843e1988Sjohnlev 552*843e1988Sjohnlev if ((error = xen_vcpu_down(cp->cpu_id)) == 0) { 553*843e1988Sjohnlev ASSERT(CPU_IN_SET(cpu_suspend_set, cp->cpu_id)); 554*843e1988Sjohnlev CPUSET_DEL(cpu_ready_set, cp->cpu_id); 555*843e1988Sjohnlev cp->cpu_flags |= CPU_POWEROFF | CPU_OFFLINE; 556*843e1988Sjohnlev cp->cpu_flags &= 557*843e1988Sjohnlev ~(CPU_RUNNING | CPU_READY | CPU_EXISTS | CPU_ENABLE); 558*843e1988Sjohnlev 559*843e1988Sjohnlev cpu_set_state(cp); 560*843e1988Sjohnlev } 561*843e1988Sjohnlev return (error); 562*843e1988Sjohnlev } 563*843e1988Sjohnlev 564*843e1988Sjohnlev static int 565*843e1988Sjohnlev vcpu_config_poweroff(processorid_t id) 566*843e1988Sjohnlev { 567*843e1988Sjohnlev int oldstate; 568*843e1988Sjohnlev int error; 569*843e1988Sjohnlev cpu_t *cp; 570*843e1988Sjohnlev 571*843e1988Sjohnlev mutex_enter(&cpu_lock); 572*843e1988Sjohnlev 573*843e1988Sjohnlev if ((cp = cpu_get(id)) == NULL) { 574*843e1988Sjohnlev mutex_exit(&cpu_lock); 575*843e1988Sjohnlev return (ESRCH); 576*843e1988Sjohnlev } 577*843e1988Sjohnlev 578*843e1988Sjohnlev if (cpu_get_state(cp) == P_POWEROFF) { 579*843e1988Sjohnlev mutex_exit(&cpu_lock); 580*843e1988Sjohnlev return (0); 581*843e1988Sjohnlev } 582*843e1988Sjohnlev 583*843e1988Sjohnlev mutex_exit(&cpu_lock); 584*843e1988Sjohnlev 585*843e1988Sjohnlev do { 586*843e1988Sjohnlev error = p_online_internal(id, P_OFFLINE, 587*843e1988Sjohnlev &oldstate); 588*843e1988Sjohnlev 589*843e1988Sjohnlev if (error != 0) 590*843e1988Sjohnlev break; 591*843e1988Sjohnlev 592*843e1988Sjohnlev /* 593*843e1988Sjohnlev * So we just changed it to P_OFFLINE. But then we dropped 594*843e1988Sjohnlev * cpu_lock, so now it is possible for another thread to change 595*843e1988Sjohnlev * the cpu back to a different, non-quiesced state e.g. 596*843e1988Sjohnlev * P_ONLINE. 597*843e1988Sjohnlev */ 598*843e1988Sjohnlev mutex_enter(&cpu_lock); 599*843e1988Sjohnlev if ((cp = cpu_get(id)) == NULL) 600*843e1988Sjohnlev error = ESRCH; 601*843e1988Sjohnlev else { 602*843e1988Sjohnlev if (cp->cpu_flags & CPU_QUIESCED) 603*843e1988Sjohnlev error = poweroff_vcpu(cp); 604*843e1988Sjohnlev else 605*843e1988Sjohnlev error = EBUSY; 606*843e1988Sjohnlev } 607*843e1988Sjohnlev mutex_exit(&cpu_lock); 608*843e1988Sjohnlev } while (error == EBUSY); 609*843e1988Sjohnlev 610*843e1988Sjohnlev return (error); 611*843e1988Sjohnlev } 612*843e1988Sjohnlev 613*843e1988Sjohnlev /* 614*843e1988Sjohnlev * Add a new virtual cpu to the domain. 615*843e1988Sjohnlev */ 616*843e1988Sjohnlev static int 617*843e1988Sjohnlev vcpu_config_new(processorid_t id) 618*843e1988Sjohnlev { 619*843e1988Sjohnlev extern int start_cpu(processorid_t); 620*843e1988Sjohnlev int error; 621*843e1988Sjohnlev 622*843e1988Sjohnlev if (ncpus == 1) { 623*843e1988Sjohnlev printf("cannot (yet) add cpus to a single-cpu domain\n"); 624*843e1988Sjohnlev return (ENOTSUP); 625*843e1988Sjohnlev } 626*843e1988Sjohnlev 627*843e1988Sjohnlev affinity_set(CPU_CURRENT); 628*843e1988Sjohnlev error = start_cpu(id); 629*843e1988Sjohnlev affinity_clear(); 630*843e1988Sjohnlev return (error); 631*843e1988Sjohnlev } 632*843e1988Sjohnlev 633*843e1988Sjohnlev static int 634*843e1988Sjohnlev vcpu_config_poweron(processorid_t id) 635*843e1988Sjohnlev { 636*843e1988Sjohnlev cpu_t *cp; 637*843e1988Sjohnlev int oldstate; 638*843e1988Sjohnlev int error; 639*843e1988Sjohnlev 640*843e1988Sjohnlev if (id >= ncpus) 641*843e1988Sjohnlev return (vcpu_config_new(id)); 642*843e1988Sjohnlev 643*843e1988Sjohnlev mutex_enter(&cpu_lock); 644*843e1988Sjohnlev 645*843e1988Sjohnlev if ((cp = cpu_get(id)) == NULL) { 646*843e1988Sjohnlev mutex_exit(&cpu_lock); 647*843e1988Sjohnlev return (ESRCH); 648*843e1988Sjohnlev } 649*843e1988Sjohnlev 650*843e1988Sjohnlev if (cpu_get_state(cp) != P_POWEROFF) { 651*843e1988Sjohnlev mutex_exit(&cpu_lock); 652*843e1988Sjohnlev return (0); 653*843e1988Sjohnlev } 654*843e1988Sjohnlev 655*843e1988Sjohnlev if ((error = poweron_vcpu(cp)) != 0) { 656*843e1988Sjohnlev mutex_exit(&cpu_lock); 657*843e1988Sjohnlev return (error); 658*843e1988Sjohnlev } 659*843e1988Sjohnlev 660*843e1988Sjohnlev mutex_exit(&cpu_lock); 661*843e1988Sjohnlev 662*843e1988Sjohnlev return (p_online_internal(id, P_ONLINE, &oldstate)); 663*843e1988Sjohnlev } 664*843e1988Sjohnlev 665*843e1988Sjohnlev #define REPORT_LEN 128 666*843e1988Sjohnlev 667*843e1988Sjohnlev static void 668*843e1988Sjohnlev vcpu_config_report(processorid_t id, uint_t newstate, int error) 669*843e1988Sjohnlev { 670*843e1988Sjohnlev char *report = kmem_alloc(REPORT_LEN, KM_SLEEP); 671*843e1988Sjohnlev size_t len; 672*843e1988Sjohnlev char *ps; 673*843e1988Sjohnlev 674*843e1988Sjohnlev switch (newstate) { 675*843e1988Sjohnlev case P_ONLINE: 676*843e1988Sjohnlev ps = PS_ONLINE; 677*843e1988Sjohnlev break; 678*843e1988Sjohnlev case P_POWEROFF: 679*843e1988Sjohnlev ps = PS_POWEROFF; 680*843e1988Sjohnlev break; 681*843e1988Sjohnlev default: 682*843e1988Sjohnlev cmn_err(CE_PANIC, "unknown state %u\n", newstate); 683*843e1988Sjohnlev break; 684*843e1988Sjohnlev } 685*843e1988Sjohnlev 686*843e1988Sjohnlev len = snprintf(report, REPORT_LEN, 687*843e1988Sjohnlev "cpu%d: externally initiated %s", id, ps); 688*843e1988Sjohnlev 689*843e1988Sjohnlev if (!error) { 690*843e1988Sjohnlev cmn_err(CE_CONT, "!%s\n", report); 691*843e1988Sjohnlev kmem_free(report, REPORT_LEN); 692*843e1988Sjohnlev return; 693*843e1988Sjohnlev } 694*843e1988Sjohnlev 695*843e1988Sjohnlev len += snprintf(report + len, REPORT_LEN - len, 696*843e1988Sjohnlev " failed, error %d: ", error); 697*843e1988Sjohnlev switch (error) { 698*843e1988Sjohnlev case EEXIST: 699*843e1988Sjohnlev len += snprintf(report + len, REPORT_LEN - len, 700*843e1988Sjohnlev "cpu already %s", ps ? ps : "?"); 701*843e1988Sjohnlev break; 702*843e1988Sjohnlev case ESRCH: 703*843e1988Sjohnlev len += snprintf(report + len, REPORT_LEN - len, 704*843e1988Sjohnlev "cpu not found"); 705*843e1988Sjohnlev break; 706*843e1988Sjohnlev case EINVAL: 707*843e1988Sjohnlev case EALREADY: 708*843e1988Sjohnlev break; 709*843e1988Sjohnlev case EPERM: 710*843e1988Sjohnlev len += snprintf(report + len, REPORT_LEN - len, 711*843e1988Sjohnlev "insufficient privilege (0x%x)", id); 712*843e1988Sjohnlev break; 713*843e1988Sjohnlev case EBUSY: 714*843e1988Sjohnlev switch (newstate) { 715*843e1988Sjohnlev case P_ONLINE: 716*843e1988Sjohnlev /* 717*843e1988Sjohnlev * This return comes from mp_cpu_start - 718*843e1988Sjohnlev * we cannot 'start' the boot CPU. 719*843e1988Sjohnlev */ 720*843e1988Sjohnlev len += snprintf(report + len, REPORT_LEN - len, 721*843e1988Sjohnlev "already running"); 722*843e1988Sjohnlev break; 723*843e1988Sjohnlev case P_POWEROFF: 724*843e1988Sjohnlev len += snprintf(report + len, REPORT_LEN - len, 725*843e1988Sjohnlev "bound lwps?"); 726*843e1988Sjohnlev break; 727*843e1988Sjohnlev default: 728*843e1988Sjohnlev break; 729*843e1988Sjohnlev } 730*843e1988Sjohnlev default: 731*843e1988Sjohnlev break; 732*843e1988Sjohnlev } 733*843e1988Sjohnlev 734*843e1988Sjohnlev cmn_err(CE_CONT, "%s\n", report); 735*843e1988Sjohnlev kmem_free(report, REPORT_LEN); 736*843e1988Sjohnlev } 737*843e1988Sjohnlev 738*843e1988Sjohnlev static void 739*843e1988Sjohnlev vcpu_config(void *arg) 740*843e1988Sjohnlev { 741*843e1988Sjohnlev int id = (int)(uintptr_t)arg; 742*843e1988Sjohnlev int error; 743*843e1988Sjohnlev char dir[16]; 744*843e1988Sjohnlev char *state; 745*843e1988Sjohnlev 746*843e1988Sjohnlev if ((uint_t)id >= max_ncpus) { 747*843e1988Sjohnlev cmn_err(CE_WARN, 748*843e1988Sjohnlev "vcpu_config: cpu%d does not fit in this domain", id); 749*843e1988Sjohnlev return; 750*843e1988Sjohnlev } 751*843e1988Sjohnlev 752*843e1988Sjohnlev (void) snprintf(dir, sizeof (dir), "cpu/%d", id); 753*843e1988Sjohnlev state = kmem_alloc(MAXPATHLEN, KM_SLEEP); 754*843e1988Sjohnlev if (xenbus_scanf(XBT_NULL, dir, "availability", "%s", state) == 0) { 755*843e1988Sjohnlev if (strcmp(state, "online") == 0) { 756*843e1988Sjohnlev error = vcpu_config_poweron(id); 757*843e1988Sjohnlev vcpu_config_report(id, P_ONLINE, error); 758*843e1988Sjohnlev } else if (strcmp(state, "offline") == 0) { 759*843e1988Sjohnlev error = vcpu_config_poweroff(id); 760*843e1988Sjohnlev vcpu_config_report(id, P_POWEROFF, error); 761*843e1988Sjohnlev } else { 762*843e1988Sjohnlev cmn_err(CE_WARN, 763*843e1988Sjohnlev "cpu%d: unknown target state '%s'", id, state); 764*843e1988Sjohnlev } 765*843e1988Sjohnlev } else 766*843e1988Sjohnlev cmn_err(CE_WARN, 767*843e1988Sjohnlev "cpu%d: unable to read target state from xenstore", id); 768*843e1988Sjohnlev 769*843e1988Sjohnlev kmem_free(state, MAXPATHLEN); 770*843e1988Sjohnlev } 771*843e1988Sjohnlev 772*843e1988Sjohnlev /*ARGSUSED*/ 773*843e1988Sjohnlev static void 774*843e1988Sjohnlev vcpu_config_event(struct xenbus_watch *watch, const char **vec, uint_t len) 775*843e1988Sjohnlev { 776*843e1988Sjohnlev const char *path = vec[XS_WATCH_PATH]; 777*843e1988Sjohnlev processorid_t id; 778*843e1988Sjohnlev char *s; 779*843e1988Sjohnlev 780*843e1988Sjohnlev if ((s = strstr(path, "cpu/")) != NULL && 781*843e1988Sjohnlev sscanf(s, "cpu/%d", &id) == 1) { 782*843e1988Sjohnlev /* 783*843e1988Sjohnlev * Run the virtual CPU configuration on a separate thread to 784*843e1988Sjohnlev * avoid blocking on this event for too long (and for now, 785*843e1988Sjohnlev * to ensure configuration requests are serialized.) 786*843e1988Sjohnlev */ 787*843e1988Sjohnlev (void) taskq_dispatch(cpu_config_tq, 788*843e1988Sjohnlev vcpu_config, (void *)(uintptr_t)id, 0); 789*843e1988Sjohnlev } 790*843e1988Sjohnlev } 791*843e1988Sjohnlev 792*843e1988Sjohnlev static int 793*843e1988Sjohnlev xen_vcpu_initialize(processorid_t id, vcpu_guest_context_t *vgc) 794*843e1988Sjohnlev { 795*843e1988Sjohnlev int err; 796*843e1988Sjohnlev 797*843e1988Sjohnlev if ((err = HYPERVISOR_vcpu_op(VCPUOP_initialise, id, vgc)) != 0) { 798*843e1988Sjohnlev char *str; 799*843e1988Sjohnlev int level = CE_WARN; 800*843e1988Sjohnlev 801*843e1988Sjohnlev switch (err) { 802*843e1988Sjohnlev case -X_EINVAL: 803*843e1988Sjohnlev /* 804*843e1988Sjohnlev * This interface squashes multiple error sources 805*843e1988Sjohnlev * to one error code. In particular, an X_EINVAL 806*843e1988Sjohnlev * code can mean: 807*843e1988Sjohnlev * 808*843e1988Sjohnlev * - the vcpu id is out of range 809*843e1988Sjohnlev * - cs or ss are in ring 0 810*843e1988Sjohnlev * - cr3 is wrong 811*843e1988Sjohnlev * - an entry in the new gdt is above the 812*843e1988Sjohnlev * reserved entry 813*843e1988Sjohnlev * - a frame underneath the new gdt is bad 814*843e1988Sjohnlev */ 815*843e1988Sjohnlev str = "something is wrong :("; 816*843e1988Sjohnlev break; 817*843e1988Sjohnlev case -X_ENOENT: 818*843e1988Sjohnlev str = "no such cpu"; 819*843e1988Sjohnlev break; 820*843e1988Sjohnlev case -X_ENOMEM: 821*843e1988Sjohnlev str = "no mem to copy ctxt"; 822*843e1988Sjohnlev break; 823*843e1988Sjohnlev case -X_EFAULT: 824*843e1988Sjohnlev str = "bad address"; 825*843e1988Sjohnlev break; 826*843e1988Sjohnlev case -X_EEXIST: 827*843e1988Sjohnlev /* 828*843e1988Sjohnlev * Hmm. This error is returned if the vcpu has already 829*843e1988Sjohnlev * been initialized once before in the lifetime of this 830*843e1988Sjohnlev * domain. This is a logic error in the kernel. 831*843e1988Sjohnlev */ 832*843e1988Sjohnlev level = CE_PANIC; 833*843e1988Sjohnlev str = "already initialized"; 834*843e1988Sjohnlev break; 835*843e1988Sjohnlev default: 836*843e1988Sjohnlev level = CE_PANIC; 837*843e1988Sjohnlev str = "<unexpected>"; 838*843e1988Sjohnlev break; 839*843e1988Sjohnlev } 840*843e1988Sjohnlev 841*843e1988Sjohnlev cmn_err(level, "vcpu%d: failed to init: error %d: %s", 842*843e1988Sjohnlev id, -err, str); 843*843e1988Sjohnlev } 844*843e1988Sjohnlev return (err); 845*843e1988Sjohnlev } 846*843e1988Sjohnlev 847*843e1988Sjohnlev long 848*843e1988Sjohnlev xen_vcpu_up(processorid_t id) 849*843e1988Sjohnlev { 850*843e1988Sjohnlev long err; 851*843e1988Sjohnlev 852*843e1988Sjohnlev if ((err = HYPERVISOR_vcpu_op(VCPUOP_up, id, NULL)) != 0) { 853*843e1988Sjohnlev char *str; 854*843e1988Sjohnlev 855*843e1988Sjohnlev switch (err) { 856*843e1988Sjohnlev case -X_ENOENT: 857*843e1988Sjohnlev str = "no such cpu"; 858*843e1988Sjohnlev break; 859*843e1988Sjohnlev case -X_EINVAL: 860*843e1988Sjohnlev /* 861*843e1988Sjohnlev * Perhaps this is diagnostic overkill. 862*843e1988Sjohnlev */ 863*843e1988Sjohnlev if (HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL) < 0) 864*843e1988Sjohnlev str = "bad cpuid"; 865*843e1988Sjohnlev else 866*843e1988Sjohnlev str = "not initialized"; 867*843e1988Sjohnlev break; 868*843e1988Sjohnlev default: 869*843e1988Sjohnlev str = "<unexpected>"; 870*843e1988Sjohnlev break; 871*843e1988Sjohnlev } 872*843e1988Sjohnlev 873*843e1988Sjohnlev printf("vcpu%d: failed to start: error %d: %s\n", 874*843e1988Sjohnlev id, -(int)err, str); 875*843e1988Sjohnlev return (EBFONT); /* deliberately silly */ 876*843e1988Sjohnlev } 877*843e1988Sjohnlev return (err); 878*843e1988Sjohnlev } 879*843e1988Sjohnlev 880*843e1988Sjohnlev long 881*843e1988Sjohnlev xen_vcpu_down(processorid_t id) 882*843e1988Sjohnlev { 883*843e1988Sjohnlev long err; 884*843e1988Sjohnlev 885*843e1988Sjohnlev if ((err = HYPERVISOR_vcpu_op(VCPUOP_down, id, NULL)) != 0) { 886*843e1988Sjohnlev /* 887*843e1988Sjohnlev * X_ENOENT: no such cpu 888*843e1988Sjohnlev * X_EINVAL: bad cpuid 889*843e1988Sjohnlev */ 890*843e1988Sjohnlev panic("vcpu%d: failed to stop: error %d", id, -(int)err); 891*843e1988Sjohnlev } 892*843e1988Sjohnlev 893*843e1988Sjohnlev return (err); 894*843e1988Sjohnlev } 895