xref: /gfx-drm/usr/src/uts/intel/ml/ia32.il (revision 6132907e)
1*6132907eSGordon Ross/*
2*6132907eSGordon Ross * CDDL HEADER START
3*6132907eSGordon Ross *
4*6132907eSGordon Ross * The contents of this file are subject to the terms of the
5*6132907eSGordon Ross * Common Development and Distribution License (the "License").
6*6132907eSGordon Ross * You may not use this file except in compliance with the License.
7*6132907eSGordon Ross *
8*6132907eSGordon Ross * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*6132907eSGordon Ross * or http://www.opensolaris.org/os/licensing.
10*6132907eSGordon Ross * See the License for the specific language governing permissions
11*6132907eSGordon Ross * and limitations under the License.
12*6132907eSGordon Ross *
13*6132907eSGordon Ross * When distributing Covered Code, include this CDDL HEADER in each
14*6132907eSGordon Ross * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*6132907eSGordon Ross * If applicable, add the following below this CDDL HEADER, with the
16*6132907eSGordon Ross * fields enclosed by brackets "[]" replaced with your own identifying
17*6132907eSGordon Ross * information: Portions Copyright [yyyy] [name of copyright owner]
18*6132907eSGordon Ross *
19*6132907eSGordon Ross * CDDL HEADER END
20*6132907eSGordon Ross */
21*6132907eSGordon Ross
22*6132907eSGordon Ross/*
23*6132907eSGordon Ross * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24*6132907eSGordon Ross * Use is subject to license terms.
25*6132907eSGordon Ross */
26*6132907eSGordon Ross
27*6132907eSGordon Ross/
28*6132907eSGordon Ross/ Inline functions for i386 kernels.
29*6132907eSGordon Ross/	Shared between all x86 platform variants.
30*6132907eSGordon Ross/
31*6132907eSGordon Ross
32*6132907eSGordon Ross/
33*6132907eSGordon Ross/ return current thread pointer
34*6132907eSGordon Ross/
35*6132907eSGordon Ross/ NOTE: the "0x10" should be replaced by the computed value of the
36*6132907eSGordon Ross/	offset of "cpu_thread" from the beginning of the struct cpu.
37*6132907eSGordon Ross/	Including "assym.h" does not work, however, since that stuff
38*6132907eSGordon Ross/	is PSM-specific and is only visible to the 'unix' build anyway.
39*6132907eSGordon Ross/	Same with current cpu pointer, where "0xc" should be replaced
40*6132907eSGordon Ross/	by the computed value of the offset of "cpu_self".
41*6132907eSGordon Ross/	Ugh -- what a disaster.
42*6132907eSGordon Ross/
43*6132907eSGordon Ross	.inline	threadp,0
44*6132907eSGordon Ross	movl	%gs:0x10, %eax
45*6132907eSGordon Ross	.end
46*6132907eSGordon Ross
47*6132907eSGordon Ross/
48*6132907eSGordon Ross/ return current cpu pointer
49*6132907eSGordon Ross/
50*6132907eSGordon Ross	.inline	curcpup,0
51*6132907eSGordon Ross	movl	%gs:0xc, %eax
52*6132907eSGordon Ross	.end
53*6132907eSGordon Ross
54*6132907eSGordon Ross/
55*6132907eSGordon Ross/ return caller
56*6132907eSGordon Ross/
57*6132907eSGordon Ross	.inline caller,0
58*6132907eSGordon Ross	movl	4(%ebp), %eax
59*6132907eSGordon Ross	.end
60*6132907eSGordon Ross
61*6132907eSGordon Ross/
62*6132907eSGordon Ross/ convert ipl to spl.  This is the identity function for i86
63*6132907eSGordon Ross/
64*6132907eSGordon Ross	.inline	ipltospl,0
65*6132907eSGordon Ross	movl	(%esp), %eax
66*6132907eSGordon Ross	.end
67*6132907eSGordon Ross
68*6132907eSGordon Ross/
69*6132907eSGordon Ross/ find the low order bit in a word
70*6132907eSGordon Ross/
71*6132907eSGordon Ross	.inline lowbit,4
72*6132907eSGordon Ross	movl	$-1, %eax
73*6132907eSGordon Ross	bsfl	(%esp), %eax
74*6132907eSGordon Ross	incl	%eax
75*6132907eSGordon Ross	.end
76*6132907eSGordon Ross
77*6132907eSGordon Ross/
78*6132907eSGordon Ross/ find the high order bit in a word
79*6132907eSGordon Ross/
80*6132907eSGordon Ross	.inline highbit,4
81*6132907eSGordon Ross	movl	$-1, %eax
82*6132907eSGordon Ross	bsrl	(%esp), %eax
83*6132907eSGordon Ross	incl	%eax
84*6132907eSGordon Ross	.end
85*6132907eSGordon Ross
86*6132907eSGordon Ross/
87*6132907eSGordon Ross/ Networking byte order functions (too bad, Intel has the wrong byte order)
88*6132907eSGordon Ross/
89*6132907eSGordon Ross	.inline	htonll,4
90*6132907eSGordon Ross	movl	(%esp), %edx
91*6132907eSGordon Ross	movl	4(%esp), %eax
92*6132907eSGordon Ross	bswap	%edx
93*6132907eSGordon Ross	bswap	%eax
94*6132907eSGordon Ross	.end
95*6132907eSGordon Ross
96*6132907eSGordon Ross	.inline	ntohll,4
97*6132907eSGordon Ross	movl	(%esp), %edx
98*6132907eSGordon Ross	movl	4(%esp), %eax
99*6132907eSGordon Ross	bswap	%edx
100*6132907eSGordon Ross	bswap	%eax
101*6132907eSGordon Ross	.end
102*6132907eSGordon Ross
103*6132907eSGordon Ross	.inline	htonl,4
104*6132907eSGordon Ross	movl	(%esp), %eax
105*6132907eSGordon Ross	bswap	%eax
106*6132907eSGordon Ross	.end
107*6132907eSGordon Ross
108*6132907eSGordon Ross	.inline	ntohl,4
109*6132907eSGordon Ross	movl	(%esp), %eax
110*6132907eSGordon Ross	bswap	%eax
111*6132907eSGordon Ross	.end
112*6132907eSGordon Ross
113*6132907eSGordon Ross	.inline	htons,4
114*6132907eSGordon Ross	movl	(%esp), %eax
115*6132907eSGordon Ross	bswap	%eax
116*6132907eSGordon Ross	shrl	$16, %eax
117*6132907eSGordon Ross	.end
118*6132907eSGordon Ross
119*6132907eSGordon Ross	.inline	ntohs,4
120*6132907eSGordon Ross	movl	(%esp), %eax
121*6132907eSGordon Ross	bswap	%eax
122*6132907eSGordon Ross	shrl	$16, %eax
123*6132907eSGordon Ross	.end
124*6132907eSGordon Ross
125*6132907eSGordon Ross/*
126*6132907eSGordon Ross * multiply two long numbers and yield a u_longlong_t result
127*6132907eSGordon Ross * Provided to manipulate hrtime_t values.
128*6132907eSGordon Ross */
129*6132907eSGordon Ross	.inline mul32, 8
130*6132907eSGordon Ross	movl	4(%esp), %eax
131*6132907eSGordon Ross	movl	(%esp), %ecx
132*6132907eSGordon Ross	mull	%ecx
133*6132907eSGordon Ross	.end
134*6132907eSGordon Ross
135*6132907eSGordon Ross/*
136*6132907eSGordon Ross * Unlock hres_lock and increment the count value. (See clock.h)
137*6132907eSGordon Ross */
138*6132907eSGordon Ross	.inline unlock_hres_lock, 0
139*6132907eSGordon Ross	lock
140*6132907eSGordon Ross	incl	hres_lock
141*6132907eSGordon Ross	.end
142*6132907eSGordon Ross
143*6132907eSGordon Ross	.inline	atomic_orb,8
144*6132907eSGordon Ross	movl	(%esp), %eax
145*6132907eSGordon Ross	movl    4(%esp), %edx
146*6132907eSGordon Ross	lock
147*6132907eSGordon Ross	orb	%dl,(%eax)
148*6132907eSGordon Ross	.end
149*6132907eSGordon Ross
150*6132907eSGordon Ross	.inline	atomic_andb,8
151*6132907eSGordon Ross	movl	(%esp), %eax
152*6132907eSGordon Ross	movl    4(%esp), %edx
153*6132907eSGordon Ross	lock
154*6132907eSGordon Ross	andb	%dl,(%eax)
155*6132907eSGordon Ross	.end
156*6132907eSGordon Ross
157*6132907eSGordon Ross/*
158*6132907eSGordon Ross * atomic inc/dec operations.
159*6132907eSGordon Ross *	void atomic_inc16(uint16_t *addr) { ++*addr; }
160*6132907eSGordon Ross *	void atomic_dec16(uint16_t *addr) { --*addr; }
161*6132907eSGordon Ross */
162*6132907eSGordon Ross	.inline	atomic_inc16,4
163*6132907eSGordon Ross	movl	(%esp), %eax
164*6132907eSGordon Ross	lock
165*6132907eSGordon Ross	incw	(%eax)
166*6132907eSGordon Ross	.end
167*6132907eSGordon Ross
168*6132907eSGordon Ross	.inline	atomic_dec16,4
169*6132907eSGordon Ross	movl	(%esp), %eax
170*6132907eSGordon Ross	lock
171*6132907eSGordon Ross	decw	(%eax)
172*6132907eSGordon Ross	.end
173*6132907eSGordon Ross
174*6132907eSGordon Ross/*
175*6132907eSGordon Ross * Call the pause instruction.  To the Pentium 4 Xeon processor, it acts as
176*6132907eSGordon Ross * a hint that the code sequence is a busy spin-wait loop.  Without a pause
177*6132907eSGordon Ross * instruction in these loops, the P4 Xeon processor may suffer a severe
178*6132907eSGordon Ross * penalty when exiting the loop because the processor detects a possible
179*6132907eSGordon Ross * memory violation.  Inserting the pause instruction significantly reduces
180*6132907eSGordon Ross * the likelihood of a memory order violation, improving performance.
181*6132907eSGordon Ross * The pause instruction is a NOP on all other IA-32 processors.
182*6132907eSGordon Ross */
183*6132907eSGordon Ross	.inline ht_pause, 0
184*6132907eSGordon Ross	rep			/ our compiler doesn't support "pause" yet,
185*6132907eSGordon Ross	nop			/ so we're using "F3 90" opcode directly
186*6132907eSGordon Ross	.end
187*6132907eSGordon Ross
188*6132907eSGordon Ross/*
189*6132907eSGordon Ross * prefetch 64 bytes
190*6132907eSGordon Ross *
191*6132907eSGordon Ross * prefetch is an SSE extension which is not supported on older 32-bit processors
192*6132907eSGordon Ross * so define this as a no-op for now
193*6132907eSGordon Ross */
194*6132907eSGordon Ross
195*6132907eSGordon Ross 	.inline	prefetch_read_many,4
196*6132907eSGordon Ross/	movl		(%esp), %eax
197*6132907eSGordon Ross/	prefetcht0	(%eax)
198*6132907eSGordon Ross/	prefetcht0	32(%eax)
199*6132907eSGordon Ross	.end
200*6132907eSGordon Ross
201*6132907eSGordon Ross 	.inline	prefetch_read_once,4
202*6132907eSGordon Ross/	movl		(%esp), %eax
203*6132907eSGordon Ross/	prefetchnta	(%eax)
204*6132907eSGordon Ross/	prefetchnta	32(%eax)
205*6132907eSGordon Ross	.end
206*6132907eSGordon Ross
207*6132907eSGordon Ross 	.inline	prefetch_write_many,4
208*6132907eSGordon Ross/	movl		(%esp), %eax
209*6132907eSGordon Ross/	prefetcht0	(%eax)
210*6132907eSGordon Ross/	prefetcht0	32(%eax)
211*6132907eSGordon Ross	.end
212*6132907eSGordon Ross
213*6132907eSGordon Ross 	.inline	prefetch_write_once,4
214*6132907eSGordon Ross/	movl		(%esp), %eax
215*6132907eSGordon Ross/	prefetcht0	(%eax)
216*6132907eSGordon Ross/	prefetcht0	32(%eax)
217*6132907eSGordon Ross	.end
218*6132907eSGordon Ross
219