1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_SYS_MACHPRIVREGS_H
28 #define	_SYS_MACHPRIVREGS_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/hypervisor.h>
33 
34 /*
35  * Platform dependent instruction sequences for manipulating
36  * privileged state
37  */
38 
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42 
43 /*
44  * CLI and STI are quite complex to virtualize!
45  */
46 
47 #if defined(__amd64)
48 
49 #define	CURVCPU(r)					\
50 	movq	%gs:CPU_VCPU_INFO, r
51 
52 #define	CURTHREAD(r)					\
53 	movq	%gs:CPU_THREAD, r
54 
55 #elif defined(__i386)
56 
57 #define	CURVCPU(r)					\
58 	movl	%gs:CPU_VCPU_INFO, r
59 
60 #define	CURTHREAD(r)					\
61 	movl	%gs:CPU_THREAD, r
62 
63 #endif	/* __i386 */
64 
65 #define	XEN_TEST_EVENT_PENDING(r)			\
66 	testb	$0xff, VCPU_INFO_EVTCHN_UPCALL_PENDING(r)
67 
68 #define	XEN_SET_UPCALL_MASK(r)				\
69 	movb	$1, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
70 
71 #define	XEN_GET_UPCALL_MASK(r, mask)			\
72 	movb	VCPU_INFO_EVTCHN_UPCALL_MASK(r), mask
73 
74 #define	XEN_TEST_UPCALL_MASK(r)				\
75 	testb	$1, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
76 
77 #define	XEN_CLEAR_UPCALL_MASK(r)			\
78 	ASSERT_UPCALL_MASK_IS_SET;			\
79 	movb	$0, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
80 
81 #ifdef DEBUG
82 
83 /*
84  * Much logic depends on the upcall mask being set at
85  * various points in the code; use this macro to validate.
86  *
87  * Need to use CURVCPU(r) to establish the vcpu pointer.
88  */
89 #if defined(__amd64)
90 
91 #define	ASSERT_UPCALL_MASK_IS_SET			\
92 	pushq	%r11;					\
93 	CURVCPU(%r11);					\
94 	XEN_TEST_UPCALL_MASK(%r11);			\
95 	jne	6f;					\
96 	cmpl	$0, stistipanic(%rip);			\
97 	jle	6f;					\
98 	movl	$-1, stistipanic(%rip);			\
99 	movq	stistimsg(%rip), %rdi;			\
100 	xorl	%eax, %eax;				\
101 	call	panic;					\
102 6:	pushq	%rax;					\
103 	pushq	%rbx;					\
104 	movl	%gs:CPU_ID, %eax;			\
105 	leaq	.+0(%rip), %r11;			\
106 	leaq	laststi(%rip), %rbx;			\
107 	movq	%r11, (%rbx, %rax, 8);			\
108 	popq	%rbx;					\
109 	popq	%rax;					\
110 	popq	%r11
111 
112 #define	SAVE_CLI_LOCATION				\
113 	pushq	%rax;					\
114 	pushq	%rbx;					\
115 	pushq	%rcx;					\
116 	movl	%gs:CPU_ID, %eax;			\
117 	leaq	.+0(%rip), %rcx;			\
118 	leaq	lastcli, %rbx;				\
119 	movq	%rcx, (%rbx, %rax, 8);			\
120 	popq	%rcx;					\
121 	popq	%rbx;					\
122 	popq	%rax;					\
123 
124 #elif defined(__i386)
125 
126 #define	ASSERT_UPCALL_MASK_IS_SET			\
127 	pushl	%ecx;					\
128 	CURVCPU(%ecx);					\
129 	XEN_TEST_UPCALL_MASK(%ecx);			\
130 	jne	6f;					\
131 	cmpl	$0, stistipanic;			\
132 	jle	6f;					\
133 	movl	$-1, stistipanic;			\
134 	movl	stistimsg, %ecx;			\
135 	pushl	%ecx;					\
136 	call	panic;					\
137 6:	pushl	%eax;					\
138 	pushl	%ebx;					\
139 	movl	%gs:CPU_ID, %eax;			\
140 	leal	.+0, %ecx;				\
141 	leal	laststi, %ebx;				\
142 	movl	%ecx, (%ebx, %eax, 4);			\
143 	popl	%ebx;					\
144 	popl	%eax;					\
145 	popl	%ecx
146 
147 #define	SAVE_CLI_LOCATION				\
148 	pushl	%eax;					\
149 	pushl	%ebx;					\
150 	pushl	%ecx;					\
151 	movl	%gs:CPU_ID, %eax;			\
152 	leal	.+0, %ecx;				\
153 	leal	lastcli, %ebx;				\
154 	movl	%ecx, (%ebx, %eax, 4);			\
155 	popl	%ecx;					\
156 	popl	%ebx;					\
157 	popl	%eax;					\
158 
159 #endif	/* __i386 */
160 
161 #else	/* DEBUG */
162 
163 #define	ASSERT_UPCALL_MASK_IS_SET	/* empty */
164 #define	SAVE_CLI_LOCATION		/* empty */
165 
166 #endif	/* DEBUG */
167 
168 #define	KPREEMPT_DISABLE(t)				\
169 	addb	$1, T_PREEMPT(t)
170 
171 #define	KPREEMPT_ENABLE_NOKP(t)				\
172 	subb	$1, T_PREEMPT(t)
173 
174 #define	CLI(r)						\
175 	CURTHREAD(r);					\
176 	KPREEMPT_DISABLE(r);				\
177 	CURVCPU(r);					\
178 	XEN_SET_UPCALL_MASK(r);				\
179 	SAVE_CLI_LOCATION;				\
180 	CURTHREAD(r);					\
181 	KPREEMPT_ENABLE_NOKP(r)
182 
183 #define	CLIRET(r, ret)					\
184 	CURTHREAD(r);					\
185 	KPREEMPT_DISABLE(r);				\
186 	CURVCPU(r);					\
187 	XEN_GET_UPCALL_MASK(r, ret);			\
188 	XEN_SET_UPCALL_MASK(r);				\
189 	SAVE_CLI_LOCATION;				\
190 	CURTHREAD(r);					\
191 	KPREEMPT_ENABLE_NOKP(r)
192 
193 /*
194  * We use the fact that HYPERVISOR_block will clear the upcall mask
195  * for us and then give us an upcall if there is a pending event
196  * to achieve getting a callback on this cpu without the danger of
197  * being preempted and migrating to another cpu between the upcall
198  * enable and the callback delivery.
199  */
200 #if defined(__amd64)
201 
202 #define	STI_CLOBBER		/* clobbers %rax, %rdi, %r11 */		\
203 	CURVCPU(%r11);							\
204 	ASSERT_UPCALL_MASK_IS_SET;					\
205 	movw	$0x100, %ax;	/* assume mask set, pending clear */	\
206 	movw	$0, %di;	/* clear mask and pending */		\
207 	lock;								\
208 	cmpxchgw %di, VCPU_INFO_EVTCHN_UPCALL_PENDING(%r11);		\
209 	jz	7f;		/* xchg worked, we're done */		\
210 	movl	$__HYPERVISOR_sched_op, %eax; /* have pending upcall */	\
211 	movl	$SCHEDOP_block, %edi;					\
212 	pushq	%rsi;	/* hypercall clobbers C param regs plus r10 */	\
213 	pushq	%rcx;							\
214 	pushq	%rdx;							\
215 	pushq	%r8;							\
216 	pushq	%r9;							\
217 	pushq	%r10;							\
218 	TRAP_INSTR;	/* clear upcall mask, force upcall */ 		\
219 	popq	%r10;							\
220 	popq	%r9;							\
221 	popq	%r8;							\
222 	popq	%rdx;							\
223 	popq	%rcx;							\
224 	popq	%rsi;							\
225 7:
226 
227 #define	STI								\
228 	pushq	%r11;							\
229 	pushq	%rdi;							\
230 	pushq	%rax;							\
231 	STI_CLOBBER;	/* clobbers %r11, %rax, %rdi */			\
232 	popq	%rax;							\
233 	popq	%rdi;							\
234 	popq	%r11
235 
236 #elif defined(__i386)
237 
238 #define	STI_CLOBBER		/* clobbers %eax, %ebx, %ecx */		\
239 	CURVCPU(%ecx);							\
240 	ASSERT_UPCALL_MASK_IS_SET;					\
241 	movw	$0x100, %ax;	/* assume mask set, pending clear */	\
242 	movw	$0, %bx;	/* clear mask and pending */		\
243 	lock;								\
244 	cmpxchgw %bx, VCPU_INFO_EVTCHN_UPCALL_PENDING(%ecx);		\
245 	jz	7f;		/* xchg worked, we're done */		\
246 	movl	$__HYPERVISOR_sched_op, %eax; /* have pending upcall */	\
247 	movl	$SCHEDOP_block, %ebx;					\
248 	TRAP_INSTR;		/* clear upcall mask, force upcall */	\
249 7:
250 
251 #define	STI						\
252 	pushl	%eax;					\
253 	pushl	%ebx;					\
254 	pushl	%ecx;					\
255 	STI_CLOBBER;	/* clobbers %eax, %ebx, %ecx */	\
256 	popl	%ecx;					\
257 	popl	%ebx;					\
258 	popl	%eax
259 
260 #endif	/* __i386 */
261 
262 /*
263  * Map the PS_IE bit to the hypervisor's event mask bit
264  * To -set- the event mask, we have to do a CLI
265  * To -clear- the event mask, we have to do a STI
266  * (with all the accompanying pre-emption and callbacks, ick)
267  *
268  * And vice versa.
269  */
270 
271 #if defined(__amd64)
272 
273 #define	IE_TO_EVENT_MASK(rtmp, rfl)		\
274 	testq	$PS_IE, rfl;			\
275 	jnz	4f;				\
276 	CLI(rtmp);				\
277 	jmp	5f;				\
278 4:	STI;					\
279 5:
280 
281 #define	EVENT_MASK_TO_IE(rtmp, rfl)		\
282 	andq	$_BITNOT(PS_IE), rfl;		\
283 	CURVCPU(rtmp);				\
284 	XEN_TEST_UPCALL_MASK(rtmp);		\
285 	jnz	1f;				\
286 	orq	$PS_IE, rfl;			\
287 1:
288 
289 #elif defined(__i386)
290 
291 #define	IE_TO_EVENT_MASK(rtmp, rfl)		\
292 	testl	$PS_IE, rfl;			\
293 	jnz	4f;				\
294 	CLI(rtmp);				\
295 	jmp	5f;				\
296 4:	STI;					\
297 5:
298 
299 #define	EVENT_MASK_TO_IE(rtmp, rfl)		\
300 	andl	$_BITNOT(PS_IE), rfl;		\
301 	CURVCPU(rtmp);				\
302 	XEN_TEST_UPCALL_MASK(rtmp);		\
303 	jnz	1f;				\
304 	orl	$PS_IE, rfl;			\
305 1:
306 
307 #endif	/* __i386 */
308 
309 /*
310  * Used to re-enable interrupts in the body of exception handlers
311  */
312 
313 #if defined(__amd64)
314 
315 #define	ENABLE_INTR_FLAGS		\
316 	pushq	$F_ON;			\
317 	popfq;				\
318 	STI
319 
320 #elif defined(__i386)
321 
322 #define	ENABLE_INTR_FLAGS		\
323 	pushl	$F_ON;			\
324 	popfl;				\
325 	STI
326 
327 #endif	/* __i386 */
328 
329 /*
330  * Virtualize IRET and SYSRET
331  */
332 
333 #if defined(__amd64)
334 
335 #if defined(DEBUG)
336 
337 /*
338  * Die nastily with a #ud trap if we are about to switch to user
339  * mode in HYPERVISOR_IRET and RUPDATE_PENDING is set.
340  */
341 #define	__ASSERT_NO_RUPDATE_PENDING			\
342 	pushq	%r15;					\
343 	cmpw	$KCS_SEL, 0x10(%rsp);			\
344 	je	1f;					\
345 	movq	%gs:CPU_THREAD, %r15;			\
346 	movq	T_LWP(%r15), %r15;			\
347 	testb	$0x1, PCB_RUPDATE(%r15);		\
348 	je	1f;					\
349 	ud2;						\
350 1:	popq	%r15
351 
352 #else	/* DEBUG */
353 
354 #define	__ASSERT_NO_RUPDATE_PENDING
355 
356 #endif	/* DEBUG */
357 
358 /*
359  * Switching from guest kernel to user mode.
360  * flag == VGCF_IN_SYSCALL => return via sysret
361  * flag == 0 => return via iretq
362  *
363  * See definition in public/arch-x86_64.h. Stack going in must be:
364  * rax, r11, rcx, flags, rip, cs, rflags, rsp, ss.
365  */
366 #define	HYPERVISOR_IRET(flag)			\
367 	__ASSERT_NO_RUPDATE_PENDING;		\
368 	pushq	$flag;				\
369 	pushq	%rcx;				\
370 	pushq	%r11;				\
371 	pushq	%rax;				\
372 	movl	$__HYPERVISOR_iret, %eax;	\
373 	syscall;				\
374 	ud2	/* die nastily if we return! */
375 
376 #define	IRET	HYPERVISOR_IRET(0)
377 #define	SYSRETQ	HYPERVISOR_IRET(VGCF_IN_SYSCALL)
378 #define	SYSRETL	ud2		/* 32-bit syscall/sysret not supported */
379 #define	SWAPGS	/* empty - handled in hypervisor */
380 
381 #elif defined(__i386)
382 
383 /*
384  * Switching from guest kernel to user mode.
385  * See definition in public/arch-x86_32.h. Stack going in must be:
386  * eax, flags, eip, cs, eflags, esp, ss.
387  */
388 #define	HYPERVISOR_IRET				\
389 	pushl	%eax;				\
390 	movl	$__HYPERVISOR_iret, %eax;	\
391 	int	$0x82;				\
392 	ud2	/* die nastily if we return! */
393 
394 #define	IRET	HYPERVISOR_IRET
395 #define	SYSRET	ud2		/* 32-bit syscall/sysret not supported */
396 
397 #endif	/* __i386 */
398 
399 
400 /*
401  * Xen 3.x wedges the current value of upcall_mask into unused byte of
402  * saved %cs on stack at the time of passing through a trap or interrupt
403  * gate.  Since Xen also updates PS_IE in %[e,r]lags as well, we always
404  * mask off the saved upcall mask so the kernel and/or tools like debuggers
405  * will not be confused about bits set in reserved portions of %cs slot.
406  *
407  * See xen/include/public/arch-x86_[32,64].h:cpu_user_regs_t for details.
408  */
409 #if defined(__amd64)
410 
411 #define	CLEAN_CS	movb	$0, REGOFF_CS+4(%rsp)
412 
413 #elif defined(__i386)
414 
415 #define	CLEAN_CS	movb	$0, REGOFF_CS+2(%esp)
416 
417 #endif	/* __i386 */
418 
419 /*
420  * All exceptions for amd64 have %r11 and %rcx on the stack.
421  * Just pop them back into their appropriate registers and
422  * let it get saved as is running native.
423  */
424 #if defined(__amd64)
425 
426 #define	XPV_TRAP_POP	\
427 	popq	%rcx;	\
428 	popq	%r11
429 
430 #define	XPV_TRAP_PUSH	\
431 	pushq	%r11;	\
432 	pushq	%rcx
433 
434 #endif	/* __amd64 */
435 
436 
437 /*
438  * Macros for saving the original segment registers and restoring them
439  * for fast traps.
440  */
441 #if defined(__amd64)
442 
443 /*
444  * Smaller versions of INTR_PUSH and INTR_POP for fast traps.
445  * The following registers have been pushed onto the stack by
446  * hardware at this point:
447  *
448  *	greg_t	r_rip;
449  *	greg_t	r_cs;
450  *	greg_t	r_rfl;
451  *	greg_t	r_rsp;
452  *	greg_t	r_ss;
453  *
454  * This handler is executed both by 32-bit and 64-bit applications.
455  * 64-bit applications allow us to treat the set (%rdi, %rsi, %rdx,
456  * %rcx, %r8, %r9, %r10, %r11, %rax) as volatile across function calls.
457  * However, 32-bit applications only expect (%eax, %edx, %ecx) to be volatile
458  * across a function call -- in particular, %esi and %edi MUST be saved!
459  *
460  * We could do this differently by making a FAST_INTR_PUSH32 for 32-bit
461  * programs, and FAST_INTR_PUSH for 64-bit programs, but it doesn't seem
462  * particularly worth it.
463  *
464  */
465 #define	FAST_INTR_PUSH			\
466 	INTGATE_INIT_KERNEL_FLAGS;	\
467 	popq	%rcx;			\
468 	popq	%r11;			\
469 	subq    $REGOFF_RIP, %rsp;	\
470 	movq    %rsi, REGOFF_RSI(%rsp);	\
471 	movq    %rdi, REGOFF_RDI(%rsp);	\
472 	CLEAN_CS
473 
474 #define	FAST_INTR_POP			\
475 	movq    REGOFF_RSI(%rsp), %rsi;	\
476 	movq    REGOFF_RDI(%rsp), %rdi;	\
477 	addq    $REGOFF_RIP, %rsp
478 
479 #define	FAST_INTR_RETURN		\
480 	ASSERT_UPCALL_MASK_IS_SET;	\
481 	HYPERVISOR_IRET(0)
482 
483 #elif defined(__i386)
484 
485 #define	FAST_INTR_PUSH			\
486 	cld;				\
487 	__SEGREGS_PUSH			\
488 	__SEGREGS_LOAD_KERNEL		\
489 
490 #define	FAST_INTR_POP			\
491 	__SEGREGS_POP
492 
493 #define	FAST_INTR_RETURN		\
494 	IRET
495 
496 #endif	/* __i386 */
497 
498 /*
499  * Handling the CR0.TS bit for floating point handling.
500  *
501  * When the TS bit is *set*, attempts to touch the floating
502  * point hardware will result in a #nm trap.
503  */
504 #if defined(__amd64)
505 
506 #define	STTS(rtmp)				\
507 	pushq	%rdi;				\
508 	movl	$1, %edi;			\
509 	call	HYPERVISOR_fpu_taskswitch;	\
510 	popq	%rdi
511 
512 #define	CLTS					\
513 	pushq	%rdi;				\
514 	xorl	%edi, %edi;			\
515 	call	HYPERVISOR_fpu_taskswitch;	\
516 	popq	%rdi
517 
518 #elif defined(__i386)
519 
520 #define	STTS(r)					\
521 	pushl	$1;				\
522 	call	HYPERVISOR_fpu_taskswitch;	\
523 	addl	$4, %esp
524 
525 #define	CLTS					\
526 	pushl	$0;				\
527 	call	HYPERVISOR_fpu_taskswitch;	\
528 	addl	$4, %esp
529 
530 #endif	/* __i386 */
531 
532 #ifdef __cplusplus
533 }
534 #endif
535 
536 #endif	/* _SYS_MACHPRIVREGS_H */
537