1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_SYS_MACHPRIVREGS_H
28 #define	_SYS_MACHPRIVREGS_H
29 
30 #include <sys/hypervisor.h>
31 
32 /*
33  * Platform dependent instruction sequences for manipulating
34  * privileged state
35  */
36 
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 
41 /*
42  * CLI and STI are quite complex to virtualize!
43  */
44 
45 #if defined(__amd64)
46 
47 #define	CURVCPU(r)					\
48 	movq	%gs:CPU_VCPU_INFO, r
49 
50 #define	CURTHREAD(r)					\
51 	movq	%gs:CPU_THREAD, r
52 
53 #elif defined(__i386)
54 
55 #define	CURVCPU(r)					\
56 	movl	%gs:CPU_VCPU_INFO, r
57 
58 #define	CURTHREAD(r)					\
59 	movl	%gs:CPU_THREAD, r
60 
61 #endif	/* __i386 */
62 
63 #define	XEN_TEST_EVENT_PENDING(r)			\
64 	testb	$0xff, VCPU_INFO_EVTCHN_UPCALL_PENDING(r)
65 
66 #define	XEN_SET_UPCALL_MASK(r)				\
67 	movb	$1, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
68 
69 #define	XEN_GET_UPCALL_MASK(r, mask)			\
70 	movb	VCPU_INFO_EVTCHN_UPCALL_MASK(r), mask
71 
72 #define	XEN_TEST_UPCALL_MASK(r)				\
73 	testb	$1, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
74 
75 #define	XEN_CLEAR_UPCALL_MASK(r)			\
76 	ASSERT_UPCALL_MASK_IS_SET;			\
77 	movb	$0, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
78 
79 #ifdef DEBUG
80 
81 /*
82  * Much logic depends on the upcall mask being set at
83  * various points in the code; use this macro to validate.
84  *
85  * Need to use CURVCPU(r) to establish the vcpu pointer.
86  */
87 #if defined(__amd64)
88 
89 #define	ASSERT_UPCALL_MASK_IS_SET			\
90 	pushq	%r11;					\
91 	CURVCPU(%r11);					\
92 	XEN_TEST_UPCALL_MASK(%r11);			\
93 	jne	6f;					\
94 	cmpl	$0, stistipanic(%rip);			\
95 	jle	6f;					\
96 	movl	$-1, stistipanic(%rip);			\
97 	movq	stistimsg(%rip), %rdi;			\
98 	xorl	%eax, %eax;				\
99 	call	panic;					\
100 6:	pushq	%rax;					\
101 	pushq	%rbx;					\
102 	movl	%gs:CPU_ID, %eax;			\
103 	leaq	.+0(%rip), %r11;			\
104 	leaq	laststi(%rip), %rbx;			\
105 	movq	%r11, (%rbx, %rax, 8);			\
106 	popq	%rbx;					\
107 	popq	%rax;					\
108 	popq	%r11
109 
110 #define	SAVE_CLI_LOCATION				\
111 	pushq	%rax;					\
112 	pushq	%rbx;					\
113 	pushq	%rcx;					\
114 	movl	%gs:CPU_ID, %eax;			\
115 	leaq	.+0(%rip), %rcx;			\
116 	leaq	lastcli, %rbx;				\
117 	movq	%rcx, (%rbx, %rax, 8);			\
118 	popq	%rcx;					\
119 	popq	%rbx;					\
120 	popq	%rax;					\
121 
122 #elif defined(__i386)
123 
124 #define	ASSERT_UPCALL_MASK_IS_SET			\
125 	pushl	%ecx;					\
126 	CURVCPU(%ecx);					\
127 	XEN_TEST_UPCALL_MASK(%ecx);			\
128 	jne	6f;					\
129 	cmpl	$0, stistipanic;			\
130 	jle	6f;					\
131 	movl	$-1, stistipanic;			\
132 	movl	stistimsg, %ecx;			\
133 	pushl	%ecx;					\
134 	call	panic;					\
135 6:	pushl	%eax;					\
136 	pushl	%ebx;					\
137 	movl	%gs:CPU_ID, %eax;			\
138 	leal	.+0, %ecx;				\
139 	leal	laststi, %ebx;				\
140 	movl	%ecx, (%ebx, %eax, 4);			\
141 	popl	%ebx;					\
142 	popl	%eax;					\
143 	popl	%ecx
144 
145 #define	SAVE_CLI_LOCATION				\
146 	pushl	%eax;					\
147 	pushl	%ebx;					\
148 	pushl	%ecx;					\
149 	movl	%gs:CPU_ID, %eax;			\
150 	leal	.+0, %ecx;				\
151 	leal	lastcli, %ebx;				\
152 	movl	%ecx, (%ebx, %eax, 4);			\
153 	popl	%ecx;					\
154 	popl	%ebx;					\
155 	popl	%eax;					\
156 
157 #endif	/* __i386 */
158 
159 #else	/* DEBUG */
160 
161 #define	ASSERT_UPCALL_MASK_IS_SET	/* empty */
162 #define	SAVE_CLI_LOCATION		/* empty */
163 
164 #endif	/* DEBUG */
165 
166 #define	KPREEMPT_DISABLE(t)				\
167 	addb	$1, T_PREEMPT(t)
168 
169 #define	KPREEMPT_ENABLE_NOKP(t)				\
170 	subb	$1, T_PREEMPT(t)
171 
172 #define	CLI(r)						\
173 	CURTHREAD(r);					\
174 	KPREEMPT_DISABLE(r);				\
175 	CURVCPU(r);					\
176 	XEN_SET_UPCALL_MASK(r);				\
177 	SAVE_CLI_LOCATION;				\
178 	CURTHREAD(r);					\
179 	KPREEMPT_ENABLE_NOKP(r)
180 
181 #define	CLIRET(r, ret)					\
182 	CURTHREAD(r);					\
183 	KPREEMPT_DISABLE(r);				\
184 	CURVCPU(r);					\
185 	XEN_GET_UPCALL_MASK(r, ret);			\
186 	XEN_SET_UPCALL_MASK(r);				\
187 	SAVE_CLI_LOCATION;				\
188 	CURTHREAD(r);					\
189 	KPREEMPT_ENABLE_NOKP(r)
190 
191 /*
192  * We use the fact that HYPERVISOR_block will clear the upcall mask
193  * for us and then give us an upcall if there is a pending event
194  * to achieve getting a callback on this cpu without the danger of
195  * being preempted and migrating to another cpu between the upcall
196  * enable and the callback delivery.
197  */
198 #if defined(__amd64)
199 
200 #define	STI_CLOBBER		/* clobbers %rax, %rdi, %r11 */		\
201 	CURVCPU(%r11);							\
202 	ASSERT_UPCALL_MASK_IS_SET;					\
203 	movw	$0x100, %ax;	/* assume mask set, pending clear */	\
204 	movw	$0, %di;	/* clear mask and pending */		\
205 	lock;								\
206 	cmpxchgw %di, VCPU_INFO_EVTCHN_UPCALL_PENDING(%r11);		\
207 	jz	7f;		/* xchg worked, we're done */		\
208 	movl	$__HYPERVISOR_sched_op, %eax; /* have pending upcall */	\
209 	movl	$SCHEDOP_block, %edi;					\
210 	pushq	%rsi;	/* hypercall clobbers C param regs plus r10 */	\
211 	pushq	%rcx;							\
212 	pushq	%rdx;							\
213 	pushq	%r8;							\
214 	pushq	%r9;							\
215 	pushq	%r10;							\
216 	TRAP_INSTR;	/* clear upcall mask, force upcall */		\
217 	popq	%r10;							\
218 	popq	%r9;							\
219 	popq	%r8;							\
220 	popq	%rdx;							\
221 	popq	%rcx;							\
222 	popq	%rsi;							\
223 7:
224 
225 #define	STI								\
226 	pushq	%r11;							\
227 	pushq	%rdi;							\
228 	pushq	%rax;							\
229 	STI_CLOBBER;	/* clobbers %r11, %rax, %rdi */			\
230 	popq	%rax;							\
231 	popq	%rdi;							\
232 	popq	%r11
233 
234 #elif defined(__i386)
235 
236 #define	STI_CLOBBER		/* clobbers %eax, %ebx, %ecx */		\
237 	CURVCPU(%ecx);							\
238 	ASSERT_UPCALL_MASK_IS_SET;					\
239 	movw	$0x100, %ax;	/* assume mask set, pending clear */	\
240 	movw	$0, %bx;	/* clear mask and pending */		\
241 	lock;								\
242 	cmpxchgw %bx, VCPU_INFO_EVTCHN_UPCALL_PENDING(%ecx);		\
243 	jz	7f;		/* xchg worked, we're done */		\
244 	movl	$__HYPERVISOR_sched_op, %eax; /* have pending upcall */	\
245 	movl	$SCHEDOP_block, %ebx;					\
246 	TRAP_INSTR;		/* clear upcall mask, force upcall */	\
247 7:
248 
249 #define	STI						\
250 	pushl	%eax;					\
251 	pushl	%ebx;					\
252 	pushl	%ecx;					\
253 	STI_CLOBBER;	/* clobbers %eax, %ebx, %ecx */	\
254 	popl	%ecx;					\
255 	popl	%ebx;					\
256 	popl	%eax
257 
258 #endif	/* __i386 */
259 
260 /*
261  * Map the PS_IE bit to the hypervisor's event mask bit
262  * To -set- the event mask, we have to do a CLI
263  * To -clear- the event mask, we have to do a STI
264  * (with all the accompanying pre-emption and callbacks, ick)
265  *
266  * And vice versa.
267  */
268 
269 #if defined(__amd64)
270 
271 #define	IE_TO_EVENT_MASK(rtmp, rfl)		\
272 	testq	$PS_IE, rfl;			\
273 	jnz	4f;				\
274 	CLI(rtmp);				\
275 	jmp	5f;				\
276 4:	STI;					\
277 5:
278 
279 #define	EVENT_MASK_TO_IE(rtmp, rfl)		\
280 	andq	$_BITNOT(PS_IE), rfl;		\
281 	CURVCPU(rtmp);				\
282 	XEN_TEST_UPCALL_MASK(rtmp);		\
283 	jnz	1f;				\
284 	orq	$PS_IE, rfl;			\
285 1:
286 
287 #elif defined(__i386)
288 
289 #define	IE_TO_EVENT_MASK(rtmp, rfl)		\
290 	testl	$PS_IE, rfl;			\
291 	jnz	4f;				\
292 	CLI(rtmp);				\
293 	jmp	5f;				\
294 4:	STI;					\
295 5:
296 
297 #define	EVENT_MASK_TO_IE(rtmp, rfl)		\
298 	andl	$_BITNOT(PS_IE), rfl;		\
299 	CURVCPU(rtmp);				\
300 	XEN_TEST_UPCALL_MASK(rtmp);		\
301 	jnz	1f;				\
302 	orl	$PS_IE, rfl;			\
303 1:
304 
305 #endif	/* __i386 */
306 
307 /*
308  * Used to re-enable interrupts in the body of exception handlers
309  */
310 
311 #if defined(__amd64)
312 
313 #define	ENABLE_INTR_FLAGS		\
314 	pushq	$F_ON;			\
315 	popfq;				\
316 	STI
317 
318 #elif defined(__i386)
319 
320 #define	ENABLE_INTR_FLAGS		\
321 	pushl	$F_ON;			\
322 	popfl;				\
323 	STI
324 
325 #endif	/* __i386 */
326 
327 /*
328  * Virtualize IRET and SYSRET
329  */
330 
331 #if defined(__amd64)
332 
333 #if defined(DEBUG)
334 
335 /*
336  * Die nastily with a #ud trap if we are about to switch to user
337  * mode in HYPERVISOR_IRET and RUPDATE_PENDING is set.
338  */
339 #define	__ASSERT_NO_RUPDATE_PENDING			\
340 	pushq	%r15;					\
341 	cmpw	$KCS_SEL, 0x10(%rsp);			\
342 	je	1f;					\
343 	movq	%gs:CPU_THREAD, %r15;			\
344 	movq	T_LWP(%r15), %r15;			\
345 	testb	$0x1, PCB_RUPDATE(%r15);		\
346 	je	1f;					\
347 	ud2;						\
348 1:	popq	%r15
349 
350 #else	/* DEBUG */
351 
352 #define	__ASSERT_NO_RUPDATE_PENDING
353 
354 #endif	/* DEBUG */
355 
356 /*
357  * Switching from guest kernel to user mode.
358  * flag == VGCF_IN_SYSCALL => return via sysret
359  * flag == 0 => return via iretq
360  *
361  * See definition in public/arch-x86_64.h. Stack going in must be:
362  * rax, r11, rcx, flags, rip, cs, rflags, rsp, ss.
363  */
364 #define	HYPERVISOR_IRET(flag)			\
365 	__ASSERT_NO_RUPDATE_PENDING;		\
366 	pushq	$flag;				\
367 	pushq	%rcx;				\
368 	pushq	%r11;				\
369 	pushq	%rax;				\
370 	movl	$__HYPERVISOR_iret, %eax;	\
371 	syscall;				\
372 	ud2	/* die nastily if we return! */
373 
374 #define	IRET	HYPERVISOR_IRET(0)
375 
376 /*
377  * XXPV: Normally we would expect to use sysret to return from kernel to
378  *       user mode when using the syscall instruction. The iret hypercall
379  *       does support both iret and sysret semantics. For us to use sysret
380  *	 style would require that we use the hypervisor's private descriptors
381  *	 that obey syscall instruction's imposed segment selector ordering.
382  *	 With iret we can use whatever %cs value we choose. We should fix
383  *	 this to use sysret one day.
384  */
385 #define	SYSRETQ	HYPERVISOR_IRET(0)
386 #define	SYSRETL	ud2		/* 32-bit syscall/sysret not supported */
387 #define	SWAPGS	/* empty - handled in hypervisor */
388 
389 /*
390  * As of GNU binutils 2.37, the assembler has split the 'sysexit' instruction
391  * into 'sysexitl' and 'sysexitq'. Using a plain 'sysexit' is interpreted as
392  * 'sysexitl' but comes with a warning about the assumption being made. Since
393  * all warnings are treated as errors in the kernel build, this results in a
394  * build failure. Unfortunately the desired 'sysexitl' cannot be used since
395  * older versions of the GNU assembler do not understand it.
396  * The following macro emits the correct byte sequence for 'sysexitl' on this
397  * platform.
398  */
399 #define	SYSEXITL .byte 0x0f, 0x35
400 
401 #elif defined(__i386)
402 
403 /*
404  * Switching from guest kernel to user mode.
405  * See definition in public/arch-x86_32.h. Stack going in must be:
406  * eax, flags, eip, cs, eflags, esp, ss.
407  */
408 #define	HYPERVISOR_IRET				\
409 	pushl	%eax;				\
410 	movl	$__HYPERVISOR_iret, %eax;	\
411 	int	$0x82;				\
412 	ud2	/* die nastily if we return! */
413 
414 #define	IRET	HYPERVISOR_IRET
415 #define	SYSRET	ud2		/* 32-bit syscall/sysret not supported */
416 
417 #endif	/* __i386 */
418 
419 
420 /*
421  * Xen 3.x wedges the current value of upcall_mask into unused byte of
422  * saved %cs on stack at the time of passing through a trap or interrupt
423  * gate.  Since Xen also updates PS_IE in %[e,r]lags as well, we always
424  * mask off the saved upcall mask so the kernel and/or tools like debuggers
425  * will not be confused about bits set in reserved portions of %cs slot.
426  *
427  * See xen/include/public/arch-x86_[32,64].h:cpu_user_regs_t for details.
428  */
429 #if defined(__amd64)
430 
431 #define	CLEAN_CS	movb	$0, REGOFF_CS+4(%rsp)
432 
433 #elif defined(__i386)
434 
435 #define	CLEAN_CS	movb	$0, REGOFF_CS+2(%esp)
436 
437 #endif	/* __i386 */
438 
439 /*
440  * All exceptions for amd64 have %r11 and %rcx on the stack.
441  * Just pop them back into their appropriate registers and
442  * let it get saved as is running native.
443  */
444 #if defined(__amd64)
445 
446 #define	XPV_TRAP_POP	\
447 	popq	%rcx;	\
448 	popq	%r11
449 
450 #define	XPV_TRAP_PUSH	\
451 	pushq	%r11;	\
452 	pushq	%rcx
453 
454 #endif	/* __amd64 */
455 
456 
457 /*
458  * Macros for saving the original segment registers and restoring them
459  * for fast traps.
460  */
461 #if defined(__amd64)
462 
463 /*
464  * Smaller versions of INTR_PUSH and INTR_POP for fast traps.
465  * The following registers have been pushed onto the stack by
466  * hardware at this point:
467  *
468  *	greg_t	r_rip;
469  *	greg_t	r_cs;
470  *	greg_t	r_rfl;
471  *	greg_t	r_rsp;
472  *	greg_t	r_ss;
473  *
474  * This handler is executed both by 32-bit and 64-bit applications.
475  * 64-bit applications allow us to treat the set (%rdi, %rsi, %rdx,
476  * %rcx, %r8, %r9, %r10, %r11, %rax) as volatile across function calls.
477  * However, 32-bit applications only expect (%eax, %edx, %ecx) to be volatile
478  * across a function call -- in particular, %esi and %edi MUST be saved!
479  *
480  * We could do this differently by making a FAST_INTR_PUSH32 for 32-bit
481  * programs, and FAST_INTR_PUSH for 64-bit programs, but it doesn't seem
482  * particularly worth it.
483  *
484  */
485 #define	FAST_INTR_PUSH			\
486 	INTGATE_INIT_KERNEL_FLAGS;	\
487 	popq	%rcx;			\
488 	popq	%r11;			\
489 	subq    $REGOFF_RIP, %rsp;	\
490 	movq    %rsi, REGOFF_RSI(%rsp);	\
491 	movq    %rdi, REGOFF_RDI(%rsp);	\
492 	CLEAN_CS
493 
494 #define	FAST_INTR_POP			\
495 	movq    REGOFF_RSI(%rsp), %rsi;	\
496 	movq    REGOFF_RDI(%rsp), %rdi;	\
497 	addq    $REGOFF_RIP, %rsp
498 
499 #define	FAST_INTR_RETURN		\
500 	ASSERT_UPCALL_MASK_IS_SET;	\
501 	HYPERVISOR_IRET(0)
502 
503 #elif defined(__i386)
504 
505 #define	FAST_INTR_PUSH			\
506 	cld;				\
507 	__SEGREGS_PUSH			\
508 	__SEGREGS_LOAD_KERNEL		\
509 
510 #define	FAST_INTR_POP			\
511 	__SEGREGS_POP
512 
513 #define	FAST_INTR_RETURN		\
514 	IRET
515 
516 #endif	/* __i386 */
517 
518 /*
519  * Handling the CR0.TS bit for floating point handling.
520  *
521  * When the TS bit is *set*, attempts to touch the floating
522  * point hardware will result in a #nm trap.
523  */
524 #if defined(__amd64)
525 
526 #define	STTS(rtmp)				\
527 	pushq	%rdi;				\
528 	movl	$1, %edi;			\
529 	call	HYPERVISOR_fpu_taskswitch;	\
530 	popq	%rdi
531 
532 #define	CLTS					\
533 	pushq	%rdi;				\
534 	xorl	%edi, %edi;			\
535 	call	HYPERVISOR_fpu_taskswitch;	\
536 	popq	%rdi
537 
538 #elif defined(__i386)
539 
540 #define	STTS(r)					\
541 	pushl	$1;				\
542 	call	HYPERVISOR_fpu_taskswitch;	\
543 	addl	$4, %esp
544 
545 #define	CLTS					\
546 	pushl	$0;				\
547 	call	HYPERVISOR_fpu_taskswitch;	\
548 	addl	$4, %esp
549 
550 #endif	/* __i386 */
551 
552 #ifdef __cplusplus
553 }
554 #endif
555 
556 #endif	/* _SYS_MACHPRIVREGS_H */
557