xref: /illumos-gate/usr/src/uts/intel/os/hma.c (revision e869dbac)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019 Joyent, Inc.
14  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
15  */
16 
17 #include <sys/cpuvar.h>
18 #include <sys/types.h>
19 #include <sys/errno.h>
20 #include <sys/machsystm.h>
21 #include <sys/archsystm.h>
22 #include <sys/controlregs.h>
23 #include <sys/x86_archext.h>
24 #include <sys/id_space.h>
25 #include <sys/hma.h>
26 #include <sys/cmn_err.h>
27 #include <vm/hat.h>
28 #include <vm/as.h>
29 
30 struct hma_reg {
31 	const char	*hr_name;
32 	list_node_t	hr_node;
33 };
34 
35 static kmutex_t hma_lock;
36 static list_t hma_registrations;
37 static boolean_t hma_exclusive = B_FALSE;
38 int hma_disable = 0;
39 
40 static boolean_t hma_vmx_ready = B_FALSE;
41 static const char *hma_vmx_error = NULL;
42 static id_space_t *hma_vmx_vpid;
43 
44 /*
45  * The bulk of HMA state (VMX & SVM) is protected by cpu_lock, rather than a
46  * mutex specific to the module.  It (cpu_lock) is already required for the
47  * state needed to perform setup on all CPUs, so it was a natural fit to
48  * protect this data too.
49  */
50 typedef enum hma_cpu_state {
51 	HCS_UNINITIALIZED = 0,
52 	HCS_READY,
53 	HCS_ERROR
54 } hma_cpu_state_t;
55 static hma_cpu_state_t hma_cpu_status[NCPU];
56 
57 /* HMA-internal tracking of optional VMX capabilities */
58 typedef enum {
59 	HVC_EPT		= (1 << 0),
60 	HVC_VPID	= (1 << 1),
61 	HVC_INVEPT_ONE	= (1 << 2),
62 	HVC_INVEPT_ALL	= (1 << 3),
63 } hma_vmx_capab_t;
64 
65 static void *hma_vmx_vmxon_page[NCPU];
66 static uintptr_t hma_vmx_vmxon_pa[NCPU];
67 static uint32_t hma_vmx_revision;
68 static hma_vmx_capab_t hma_vmx_capabs = 0;
69 
70 static boolean_t hma_svm_ready = B_FALSE;
71 static const char *hma_svm_error = NULL;
72 static uint32_t hma_svm_features;
73 static uint32_t hma_svm_max_asid;
74 
75 static void *hma_svm_hsave_page[NCPU];
76 static uintptr_t hma_svm_hsave_pa[NCPU];
77 
78 static hma_svm_asid_t hma_svm_cpu_asid[NCPU];
79 
80 
81 static int hma_vmx_init(void);
82 static int hma_svm_init(void);
83 
84 /* Helpers from ml/hma_asm.s */
85 int hma_vmx_do_invept(int, uintptr_t);
86 int hma_vmx_vmxon(uintptr_t);
87 
88 void
hma_init(void)89 hma_init(void)
90 {
91 	mutex_init(&hma_lock, NULL, MUTEX_DEFAULT, NULL);
92 	list_create(&hma_registrations, sizeof (struct hma_reg),
93 	    offsetof(struct hma_reg, hr_node));
94 
95 	if (hma_disable != 0) {
96 		cmn_err(CE_CONT, "?hma_init: disabled");
97 		return;
98 	}
99 
100 	switch (cpuid_getvendor(CPU)) {
101 	case X86_VENDOR_Intel:
102 		(void) hma_vmx_init();
103 		break;
104 	case X86_VENDOR_AMD:
105 	case X86_VENDOR_HYGON:
106 		(void) hma_svm_init();
107 		break;
108 	default:
109 		break;
110 	}
111 }
112 
113 static hma_reg_t *
hma_register_backend(const char * name)114 hma_register_backend(const char *name)
115 {
116 	struct hma_reg *reg;
117 	boolean_t is_ready;
118 
119 	ASSERT(MUTEX_HELD(&hma_lock));
120 
121 	switch (cpuid_getvendor(CPU)) {
122 	case X86_VENDOR_Intel:
123 		is_ready = hma_vmx_ready;
124 		break;
125 	case X86_VENDOR_AMD:
126 	case X86_VENDOR_HYGON:
127 		is_ready = hma_svm_ready;
128 		break;
129 	default:
130 		is_ready = B_FALSE;
131 		break;
132 	}
133 
134 	if (!is_ready)
135 		return (NULL);
136 
137 	reg = kmem_zalloc(sizeof (*reg), KM_SLEEP);
138 	reg->hr_name = name;
139 	list_insert_tail(&hma_registrations, reg);
140 
141 	return (reg);
142 }
143 
144 hma_reg_t *
hma_register(const char * name)145 hma_register(const char *name)
146 {
147 	struct hma_reg *reg = NULL;
148 
149 	VERIFY(name != NULL);
150 
151 	mutex_enter(&hma_lock);
152 
153 	if (!hma_exclusive)
154 		reg = hma_register_backend(name);
155 
156 	mutex_exit(&hma_lock);
157 
158 	return (reg);
159 }
160 
161 hma_reg_t *
hma_register_exclusive(const char * name)162 hma_register_exclusive(const char *name)
163 {
164 	struct hma_reg *reg = NULL;
165 
166 	VERIFY(name != NULL);
167 
168 	mutex_enter(&hma_lock);
169 
170 	if (list_is_empty(&hma_registrations)) {
171 		reg = hma_register_backend(name);
172 		if (reg != NULL)
173 			hma_exclusive = B_TRUE;
174 	}
175 
176 	mutex_exit(&hma_lock);
177 
178 	return (reg);
179 }
180 
181 void
hma_unregister(hma_reg_t * reg)182 hma_unregister(hma_reg_t *reg)
183 {
184 	VERIFY(reg != NULL);
185 	VERIFY(!list_is_empty(&hma_registrations));
186 
187 	mutex_enter(&hma_lock);
188 	list_remove(&hma_registrations, reg);
189 	if (hma_exclusive && list_is_empty(&hma_registrations))
190 		hma_exclusive = B_FALSE;
191 	mutex_exit(&hma_lock);
192 	kmem_free(reg, sizeof (*reg));
193 }
194 
195 /*
196  * VPID 0 is reserved for instances where VPID is disabled.  Some hypervisors
197  * (read: bhyve) reserve lower-order VPIDs for use in fallback behavior if
198  * unique VPIDs could not be allocated for all the vCPUs belonging to a VM.
199  */
200 #define	HMA_VPID_RESERVED	NCPU
201 
202 uint16_t
hma_vmx_vpid_alloc(void)203 hma_vmx_vpid_alloc(void)
204 {
205 	id_t res;
206 
207 	/* Do not bother if the CPU lacks support */
208 	if ((hma_vmx_capabs & HVC_VPID) == 0) {
209 		return (0);
210 	}
211 
212 	res = id_alloc_nosleep(hma_vmx_vpid);
213 	if (res == -1) {
214 		return (0);
215 	} else {
216 		ASSERT(res > HMA_VPID_RESERVED && res <= UINT16_MAX);
217 		return (res);
218 	}
219 }
220 
221 void
hma_vmx_vpid_free(uint16_t vpid)222 hma_vmx_vpid_free(uint16_t vpid)
223 {
224 	VERIFY(vpid > HMA_VPID_RESERVED);
225 	id_free(hma_vmx_vpid, (id_t)vpid);
226 }
227 
228 #define	INVEPT_SINGLE_CONTEXT	1
229 #define	INVEPT_ALL_CONTEXTS	2
230 
231 static int
hma_vmx_invept_xcall(xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3 __unused)232 hma_vmx_invept_xcall(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3 __unused)
233 {
234 	int flag = (int)arg1;
235 	uintptr_t eptp = (uintptr_t)arg2;
236 
237 	ASSERT(flag == INVEPT_SINGLE_CONTEXT || flag == INVEPT_ALL_CONTEXTS);
238 
239 	VERIFY0(hma_vmx_do_invept(flag, eptp));
240 	return (0);
241 }
242 
243 void
hma_vmx_invept_allcpus(uintptr_t eptp)244 hma_vmx_invept_allcpus(uintptr_t eptp)
245 {
246 	int flag = -1;
247 	cpuset_t set;
248 
249 	if ((hma_vmx_capabs & HVC_INVEPT_ONE) != 0) {
250 		flag = INVEPT_SINGLE_CONTEXT;
251 	} else if ((hma_vmx_capabs & HVC_INVEPT_ALL) != 0) {
252 		flag = INVEPT_ALL_CONTEXTS;
253 		eptp = 0;
254 	} else {
255 		return;
256 	}
257 
258 	cpuset_zero(&set);
259 	mutex_enter(&cpu_lock);
260 
261 	cpuset_or(&set, &cpu_active_set);
262 	xc_call((xc_arg_t)flag, (xc_arg_t)eptp, 0, CPUSET2BV(set),
263 	    hma_vmx_invept_xcall);
264 
265 	mutex_exit(&cpu_lock);
266 }
267 
268 static int
hma_vmx_cpu_vmxon(xc_arg_t arg1 __unused,xc_arg_t arg2 __unused,xc_arg_t arg3 __unused)269 hma_vmx_cpu_vmxon(xc_arg_t arg1 __unused, xc_arg_t arg2 __unused,
270     xc_arg_t arg3 __unused)
271 {
272 	uint64_t fctrl;
273 	processorid_t id = CPU->cpu_seqid;
274 	void *vmxon_region = hma_vmx_vmxon_page[id];
275 	uintptr_t vmxon_pa = hma_vmx_vmxon_pa[id];
276 
277 	VERIFY(vmxon_region != NULL && vmxon_pa != 0);
278 
279 	/*
280 	 * Ensure that the VMX support and lock bits are enabled in the
281 	 * feature-control MSR.
282 	 */
283 	fctrl = rdmsr(MSR_IA32_FEAT_CTRL);
284 	if ((fctrl & IA32_FEAT_CTRL_LOCK) == 0 ||
285 	    (fctrl & IA32_FEAT_CTRL_VMX_EN) == 0) {
286 		fctrl = fctrl | IA32_FEAT_CTRL_VMX_EN | IA32_FEAT_CTRL_LOCK;
287 		wrmsr(MSR_IA32_FEAT_CTRL, fctrl);
288 	}
289 
290 	setcr4(getcr4() | CR4_VMXE);
291 
292 	if (hma_vmx_vmxon(vmxon_pa) == 0) {
293 		hma_cpu_status[id] = HCS_READY;
294 	} else {
295 		hma_cpu_status[id] = HCS_ERROR;
296 
297 		/*
298 		 * If VMX has already been marked active and available for the
299 		 * system, then failure to perform VMXON on a newly-onlined CPU
300 		 * represents a fatal problem.  Continuing on would mean
301 		 * failure for any hypervisor thread which landed here.
302 		 */
303 		if (hma_vmx_ready) {
304 			panic("VMXON failure after VMX marked ready");
305 		}
306 	}
307 	return (0);
308 }
309 
310 static int
hma_vmx_cpu_setup(cpu_setup_t what,int id,void * arg __unused)311 hma_vmx_cpu_setup(cpu_setup_t what, int id, void *arg __unused)
312 {
313 	hma_cpu_state_t state;
314 
315 	ASSERT(MUTEX_HELD(&cpu_lock));
316 	ASSERT(id >= 0 && id < NCPU);
317 
318 	if (what != CPU_ON) {
319 		/*
320 		 * For the purposes of VMX setup, only the CPU_ON event is of
321 		 * interest.  Letting VMX state linger on an offline CPU should
322 		 * not cause any harm.
323 		 *
324 		 * This logic assumes that any offlining activity is strictly
325 		 * administrative in nature and will not alter any existing
326 		 * configuration (such as %cr4 bits previously set).
327 		 */
328 		return (0);
329 	}
330 
331 	state = hma_cpu_status[id];
332 	if (state == HCS_ERROR) {
333 		return (-1);
334 	}
335 
336 	/* Allocate the VMXON page for this CPU, if not already done */
337 	if (hma_vmx_vmxon_page[id] == NULL) {
338 		caddr_t va;
339 		pfn_t pfn;
340 
341 		va = kmem_alloc(PAGESIZE, KM_SLEEP);
342 		VERIFY0((uintptr_t)va & PAGEOFFSET);
343 		hma_vmx_vmxon_page[id] = va;
344 
345 		/* Initialize the VMX revision field as expected */
346 		bcopy(&hma_vmx_revision, va, sizeof (hma_vmx_revision));
347 
348 		/*
349 		 * Cache the physical address of the VMXON page rather than
350 		 * looking it up later when the potential blocking of
351 		 * hat_getpfnum would be less acceptable.
352 		 */
353 		pfn = hat_getpfnum(kas.a_hat, va);
354 		hma_vmx_vmxon_pa[id] = (pfn << PAGESHIFT);
355 	} else {
356 		VERIFY(hma_vmx_vmxon_pa[id] != 0);
357 	}
358 
359 	if (state == HCS_UNINITIALIZED) {
360 		cpuset_t set;
361 
362 		/* Activate VMX on this CPU */
363 		cpuset_zero(&set);
364 		cpuset_add(&set, id);
365 		xc_call(0, 0, 0, CPUSET2BV(set), hma_vmx_cpu_vmxon);
366 	} else {
367 		VERIFY3U(state, ==, HCS_READY);
368 
369 		/*
370 		 * If an already-initialized CPU is going back online, perform
371 		 * an all-contexts invept to eliminate the possibility of
372 		 * cached EPT state causing issues.
373 		 */
374 		if ((hma_vmx_capabs & HVC_INVEPT_ALL) != 0) {
375 			cpuset_t set;
376 
377 			cpuset_zero(&set);
378 			cpuset_add(&set, id);
379 			xc_call((xc_arg_t)INVEPT_ALL_CONTEXTS, 0, 0,
380 			    CPUSET2BV(set), hma_vmx_invept_xcall);
381 		}
382 	}
383 
384 	return (hma_cpu_status[id] != HCS_READY);
385 }
386 
387 /*
388  * Determining the availability of VM execution controls is somewhat different
389  * from conventional means, where one simply checks for asserted bits in the
390  * MSR value.  Instead, these execution control MSRs are split into two halves:
391  * the lower 32-bits indicating capabilities which can be zeroed in the VMCS
392  * field and the upper 32-bits indicating capabilities which can be set to one.
393  *
394  * It is described in detail in Appendix A.3 of SDM volume 3.
395  */
396 #define	VMX_CTL_ONE_SETTING(val, flag)	\
397 	(((val) & ((uint64_t)(flag) << 32)) != 0)
398 
399 static const char *
hma_vmx_query_details(void)400 hma_vmx_query_details(void)
401 {
402 	boolean_t query_true_ctl = B_FALSE;
403 	uint64_t msr;
404 
405 	/* The basic INS/OUTS functionality is cited as a necessary prereq */
406 	msr = rdmsr(MSR_IA32_VMX_BASIC);
407 	if ((msr & IA32_VMX_BASIC_INS_OUTS) == 0) {
408 		return ("VMX does not support INS/OUTS");
409 	}
410 
411 	/* Record the VMX revision for later VMXON usage */
412 	hma_vmx_revision = (uint32_t)msr;
413 
414 	/*
415 	 * Bit 55 in the VMX_BASIC MSR determines how VMX control information
416 	 * can be queried.
417 	 */
418 	query_true_ctl = (msr & IA32_VMX_BASIC_TRUE_CTRLS) != 0;
419 
420 	/* Check for EPT and VPID support */
421 	msr = rdmsr(query_true_ctl ?
422 	    MSR_IA32_VMX_TRUE_PROCBASED_CTLS : MSR_IA32_VMX_PROCBASED_CTLS);
423 	if (VMX_CTL_ONE_SETTING(msr, IA32_VMX_PROCBASED_2ND_CTLS)) {
424 		msr = rdmsr(MSR_IA32_VMX_PROCBASED2_CTLS);
425 		if (VMX_CTL_ONE_SETTING(msr, IA32_VMX_PROCBASED2_EPT)) {
426 			hma_vmx_capabs |= HVC_EPT;
427 		}
428 		if (VMX_CTL_ONE_SETTING(msr, IA32_VMX_PROCBASED2_VPID)) {
429 			hma_vmx_capabs |= HVC_VPID;
430 		}
431 	}
432 
433 	/* Check for INVEPT support */
434 	if ((hma_vmx_capabs & HVC_EPT) != 0) {
435 		msr = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
436 		if ((msr & IA32_VMX_EPT_VPID_INVEPT) != 0) {
437 			if ((msr & IA32_VMX_EPT_VPID_INVEPT_SINGLE) != 0) {
438 				hma_vmx_capabs |= HVC_INVEPT_ONE;
439 			}
440 			if ((msr & IA32_VMX_EPT_VPID_INVEPT_ALL) != 0) {
441 				hma_vmx_capabs |= HVC_INVEPT_ALL;
442 			}
443 		}
444 	}
445 
446 	return (NULL);
447 }
448 
449 static int
hma_vmx_init(void)450 hma_vmx_init(void)
451 {
452 	cpu_t *cp;
453 	uint64_t msr;
454 	int err = 0;
455 	const char *msg = NULL;
456 
457 	if (!is_x86_feature(x86_featureset, X86FSET_VMX)) {
458 		msg = "CPU does not support VMX";
459 		goto bail;
460 	}
461 
462 	/* Has the BIOS set the feature-control lock bit without VMX enabled? */
463 	msr = rdmsr(MSR_IA32_FEAT_CTRL);
464 	if ((msr & IA32_FEAT_CTRL_LOCK) != 0 &&
465 	    (msr & IA32_FEAT_CTRL_VMX_EN) == 0) {
466 		msg = "VMX support disabled by BIOS";
467 		goto bail;
468 	}
469 
470 	msg = hma_vmx_query_details();
471 	if (msg != NULL) {
472 		goto bail;
473 	}
474 
475 	mutex_enter(&cpu_lock);
476 	/* Perform VMX configuration for already-online CPUs. */
477 	cp = cpu_active;
478 	do {
479 		err = hma_vmx_cpu_setup(CPU_ON, cp->cpu_seqid, NULL);
480 		if (err != 0) {
481 			msg = "failure during VMXON setup";
482 			mutex_exit(&cpu_lock);
483 			goto bail;
484 		}
485 	} while ((cp = cp->cpu_next_onln) != cpu_active);
486 
487 	/*
488 	 * Register callback for later-onlined CPUs and perform other remaining
489 	 * resource allocation.
490 	 */
491 	register_cpu_setup_func(hma_vmx_cpu_setup, NULL);
492 	mutex_exit(&cpu_lock);
493 
494 	hma_vmx_vpid = id_space_create("hma_vmx_vpid", HMA_VPID_RESERVED + 1,
495 	    UINT16_MAX);
496 	hma_vmx_ready = B_TRUE;
497 
498 	return (0);
499 
500 bail:
501 	hma_vmx_error = msg;
502 	cmn_err(CE_NOTE, "!hma_vmx_init: %s", msg);
503 	return (-1);
504 }
505 
506 #define	VMCB_FLUSH_NOTHING	0x0
507 #define	VMCB_FLUSH_ALL		0x1
508 #define	VMCB_FLUSH_ASID		0x3
509 
510 void
hma_svm_asid_init(hma_svm_asid_t * vcp)511 hma_svm_asid_init(hma_svm_asid_t *vcp)
512 {
513 	/*
514 	 * Initialize the generation to 0, forcing an ASID allocation on first
515 	 * entry.  Leave the ASID at 0, so if the host forgoes the call to
516 	 * hma_svm_asid_update(), SVM will bail on the invalid vcpu state.
517 	 */
518 	vcp->hsa_gen = 0;
519 	vcp->hsa_asid = 0;
520 }
521 
522 uint8_t
hma_svm_asid_update(hma_svm_asid_t * vcp,boolean_t flush_by_asid,boolean_t npt_flush)523 hma_svm_asid_update(hma_svm_asid_t *vcp, boolean_t flush_by_asid,
524     boolean_t npt_flush)
525 {
526 	hma_svm_asid_t *hcp;
527 	ulong_t iflag;
528 	uint8_t res = VMCB_FLUSH_NOTHING;
529 
530 	/*
531 	 * If NPT changes dictate a TLB flush and by-ASID flushing is not
532 	 * supported/used, force a fresh ASID allocation.
533 	 */
534 	if (npt_flush && !flush_by_asid) {
535 		vcp->hsa_gen = 0;
536 	}
537 
538 	/*
539 	 * It is expected that ASID resource updates will commonly be done
540 	 * inside a VMM critical section where the GIF is already cleared,
541 	 * preventing any possibility of interruption.  Since that cannot be
542 	 * checked (there is no easy way to read the GIF), %rflags.IF is also
543 	 * cleared for edge cases where an ASID update is performed outside of
544 	 * such a GIF-safe critical section.
545 	 */
546 	iflag = intr_clear();
547 
548 	hcp = &hma_svm_cpu_asid[CPU->cpu_seqid];
549 	if (vcp->hsa_gen != hcp->hsa_gen) {
550 		hcp->hsa_asid++;
551 
552 		if (hcp->hsa_asid >= hma_svm_max_asid) {
553 			/* Keep the ASID properly constrained */
554 			hcp->hsa_asid = 1;
555 			hcp->hsa_gen++;
556 			if (hcp->hsa_gen == 0) {
557 				/*
558 				 * Stay clear of the '0' sentinel value for
559 				 * generation, if wrapping around.
560 				 */
561 				hcp->hsa_gen = 1;
562 			}
563 		}
564 		vcp->hsa_gen = hcp->hsa_gen;
565 		vcp->hsa_asid = hcp->hsa_asid;
566 
567 		ASSERT(vcp->hsa_asid != 0);
568 		ASSERT3U(vcp->hsa_asid, <, hma_svm_max_asid);
569 
570 		if (flush_by_asid) {
571 			res = VMCB_FLUSH_ASID;
572 		} else {
573 			res = VMCB_FLUSH_ALL;
574 		}
575 	} else if (npt_flush) {
576 		ASSERT(flush_by_asid);
577 		res = VMCB_FLUSH_ASID;
578 	}
579 
580 	intr_restore(iflag);
581 	return (res);
582 }
583 
584 static int
hma_svm_cpu_activate(xc_arg_t arg1 __unused,xc_arg_t arg2 __unused,xc_arg_t arg3 __unused)585 hma_svm_cpu_activate(xc_arg_t arg1 __unused, xc_arg_t arg2 __unused,
586     xc_arg_t arg3 __unused)
587 {
588 	const processorid_t id = CPU->cpu_seqid;
589 	const uintptr_t hsave_pa = hma_svm_hsave_pa[id];
590 	uint64_t efer;
591 
592 	VERIFY(hsave_pa != 0);
593 
594 	/* Enable SVM via EFER */
595 	efer = rdmsr(MSR_AMD_EFER);
596 	efer |= AMD_EFER_SVME;
597 	wrmsr(MSR_AMD_EFER, efer);
598 
599 	/* Setup hsave area */
600 	wrmsr(MSR_AMD_VM_HSAVE_PA, hsave_pa);
601 
602 	hma_cpu_status[id] = HCS_READY;
603 	return (0);
604 }
605 
606 static int
hma_svm_cpu_setup(cpu_setup_t what,int id,void * arg __unused)607 hma_svm_cpu_setup(cpu_setup_t what, int id, void *arg __unused)
608 {
609 	ASSERT(MUTEX_HELD(&cpu_lock));
610 	ASSERT(id >= 0 && id < NCPU);
611 
612 	switch (what) {
613 	case CPU_CONFIG:
614 	case CPU_ON:
615 	case CPU_INIT:
616 		break;
617 	default:
618 		/*
619 		 * Other events, such as CPU offlining, are of no interest.
620 		 * Letting the SVM state linger should not cause any harm.
621 		 *
622 		 * This logic assumes that any offlining activity is strictly
623 		 * administrative in nature and will not alter any existing
624 		 * configuration (such as EFER bits previously set).
625 		 */
626 		return (0);
627 	}
628 
629 	/* Perform initialization if it has not been previously attempted. */
630 	if (hma_cpu_status[id] != HCS_UNINITIALIZED) {
631 		return ((hma_cpu_status[id] == HCS_READY) ? 0 : -1);
632 	}
633 
634 	/* Allocate the hsave page for this CPU */
635 	if (hma_svm_hsave_page[id] == NULL) {
636 		caddr_t va;
637 		pfn_t pfn;
638 
639 		va = kmem_alloc(PAGESIZE, KM_SLEEP);
640 		VERIFY0((uintptr_t)va & PAGEOFFSET);
641 		hma_svm_hsave_page[id] = va;
642 
643 		/*
644 		 * Cache the physical address of the hsave page rather than
645 		 * looking it up later when the potential blocking of
646 		 * hat_getpfnum would be less acceptable.
647 		 */
648 		pfn = hat_getpfnum(kas.a_hat, va);
649 		hma_svm_hsave_pa[id] = (pfn << PAGESHIFT);
650 	} else {
651 		VERIFY(hma_svm_hsave_pa[id] != 0);
652 	}
653 
654 	kpreempt_disable();
655 	if (CPU->cpu_seqid == id) {
656 		/* Perform svm setup directly if this CPU is the target */
657 		(void) hma_svm_cpu_activate(0, 0, 0);
658 		kpreempt_enable();
659 	} else {
660 		cpuset_t set;
661 
662 		/* Use a cross-call if a remote CPU is the target */
663 		kpreempt_enable();
664 		cpuset_zero(&set);
665 		cpuset_add(&set, id);
666 		xc_call(0, 0, 0, CPUSET2BV(set), hma_svm_cpu_activate);
667 	}
668 
669 	return (hma_cpu_status[id] != HCS_READY);
670 }
671 
672 static int
hma_svm_init(void)673 hma_svm_init(void)
674 {
675 	uint64_t msr;
676 	const char *msg = NULL;
677 	struct cpuid_regs regs;
678 	cpu_t *cp;
679 
680 	if (!is_x86_feature(x86_featureset, X86FSET_SVM)) {
681 		msg = "CPU does not support SVM";
682 		goto bail;
683 	}
684 
685 	msr = rdmsr(MSR_AMD_VM_CR);
686 	if ((msr & AMD_VM_CR_SVMDIS) != 0) {
687 		msg = "SVM disabled by BIOS";
688 		goto bail;
689 	}
690 
691 	regs.cp_eax = 0x8000000a;
692 	(void) cpuid_insn(NULL, &regs);
693 	const uint32_t nasid = regs.cp_ebx;
694 	const uint32_t feat = regs.cp_edx;
695 
696 	if (nasid == 0) {
697 		msg = "Not enough ASIDs for guests";
698 		goto bail;
699 	}
700 	if ((feat & CPUID_AMD_EDX_NESTED_PAGING) == 0) {
701 		msg = "CPU does not support nested paging";
702 		goto bail;
703 	}
704 	if ((feat & CPUID_AMD_EDX_NRIPS) == 0) {
705 		msg = "CPU does not support NRIP save";
706 		goto bail;
707 	}
708 
709 	hma_svm_features = feat;
710 	hma_svm_max_asid = nasid;
711 
712 	mutex_enter(&cpu_lock);
713 	/* Perform SVM configuration for already-online CPUs. */
714 	cp = cpu_active;
715 	do {
716 		int err = hma_svm_cpu_setup(CPU_ON, cp->cpu_seqid, NULL);
717 		if (err != 0) {
718 			msg = "failure during SVM setup";
719 			mutex_exit(&cpu_lock);
720 			goto bail;
721 		}
722 	} while ((cp = cp->cpu_next_onln) != cpu_active);
723 
724 	/*
725 	 * Register callback for later-onlined CPUs and perform other remaining
726 	 * resource allocation.
727 	 */
728 	register_cpu_setup_func(hma_svm_cpu_setup, NULL);
729 	mutex_exit(&cpu_lock);
730 
731 	/* Initialize per-CPU ASID state. */
732 	for (uint_t i = 0; i < NCPU; i++) {
733 		/*
734 		 * Skip past sentinel 0 value for generation.  Doing so for
735 		 * ASID is unneeded, since it will be incremented during the
736 		 * first allocation.
737 		 */
738 		hma_svm_cpu_asid[i].hsa_gen = 1;
739 		hma_svm_cpu_asid[i].hsa_asid = 0;
740 	}
741 
742 	hma_svm_ready = B_TRUE;
743 	return (0);
744 
745 bail:
746 	hma_svm_error = msg;
747 	cmn_err(CE_NOTE, "!hma_svm_init: %s", msg);
748 	return (-1);
749 }
750