1*6132907eSGordon Ross/* 2*6132907eSGordon Ross * CDDL HEADER START 3*6132907eSGordon Ross * 4*6132907eSGordon Ross * The contents of this file are subject to the terms of the 5*6132907eSGordon Ross * Common Development and Distribution License (the "License"). 6*6132907eSGordon Ross * You may not use this file except in compliance with the License. 7*6132907eSGordon Ross * 8*6132907eSGordon Ross * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*6132907eSGordon Ross * or http://www.opensolaris.org/os/licensing. 10*6132907eSGordon Ross * See the License for the specific language governing permissions 11*6132907eSGordon Ross * and limitations under the License. 12*6132907eSGordon Ross * 13*6132907eSGordon Ross * When distributing Covered Code, include this CDDL HEADER in each 14*6132907eSGordon Ross * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*6132907eSGordon Ross * If applicable, add the following below this CDDL HEADER, with the 16*6132907eSGordon Ross * fields enclosed by brackets "[]" replaced with your own identifying 17*6132907eSGordon Ross * information: Portions Copyright [yyyy] [name of copyright owner] 18*6132907eSGordon Ross * 19*6132907eSGordon Ross * CDDL HEADER END 20*6132907eSGordon Ross */ 21*6132907eSGordon Ross 22*6132907eSGordon Ross/* 23*6132907eSGordon Ross * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24*6132907eSGordon Ross * Use is subject to license terms. 25*6132907eSGordon Ross */ 26*6132907eSGordon Ross 27*6132907eSGordon Ross/ 28*6132907eSGordon Ross/ Inline functions for i386 kernels. 29*6132907eSGordon Ross/ Shared between all x86 platform variants. 30*6132907eSGordon Ross/ 31*6132907eSGordon Ross 32*6132907eSGordon Ross/ 33*6132907eSGordon Ross/ return current thread pointer 34*6132907eSGordon Ross/ 35*6132907eSGordon Ross/ NOTE: the "0x10" should be replaced by the computed value of the 36*6132907eSGordon Ross/ offset of "cpu_thread" from the beginning of the struct cpu. 37*6132907eSGordon Ross/ Including "assym.h" does not work, however, since that stuff 38*6132907eSGordon Ross/ is PSM-specific and is only visible to the 'unix' build anyway. 39*6132907eSGordon Ross/ Same with current cpu pointer, where "0xc" should be replaced 40*6132907eSGordon Ross/ by the computed value of the offset of "cpu_self". 41*6132907eSGordon Ross/ Ugh -- what a disaster. 42*6132907eSGordon Ross/ 43*6132907eSGordon Ross .inline threadp,0 44*6132907eSGordon Ross movl %gs:0x10, %eax 45*6132907eSGordon Ross .end 46*6132907eSGordon Ross 47*6132907eSGordon Ross/ 48*6132907eSGordon Ross/ return current cpu pointer 49*6132907eSGordon Ross/ 50*6132907eSGordon Ross .inline curcpup,0 51*6132907eSGordon Ross movl %gs:0xc, %eax 52*6132907eSGordon Ross .end 53*6132907eSGordon Ross 54*6132907eSGordon Ross/ 55*6132907eSGordon Ross/ return caller 56*6132907eSGordon Ross/ 57*6132907eSGordon Ross .inline caller,0 58*6132907eSGordon Ross movl 4(%ebp), %eax 59*6132907eSGordon Ross .end 60*6132907eSGordon Ross 61*6132907eSGordon Ross/ 62*6132907eSGordon Ross/ convert ipl to spl. This is the identity function for i86 63*6132907eSGordon Ross/ 64*6132907eSGordon Ross .inline ipltospl,0 65*6132907eSGordon Ross movl (%esp), %eax 66*6132907eSGordon Ross .end 67*6132907eSGordon Ross 68*6132907eSGordon Ross/ 69*6132907eSGordon Ross/ find the low order bit in a word 70*6132907eSGordon Ross/ 71*6132907eSGordon Ross .inline lowbit,4 72*6132907eSGordon Ross movl $-1, %eax 73*6132907eSGordon Ross bsfl (%esp), %eax 74*6132907eSGordon Ross incl %eax 75*6132907eSGordon Ross .end 76*6132907eSGordon Ross 77*6132907eSGordon Ross/ 78*6132907eSGordon Ross/ find the high order bit in a word 79*6132907eSGordon Ross/ 80*6132907eSGordon Ross .inline highbit,4 81*6132907eSGordon Ross movl $-1, %eax 82*6132907eSGordon Ross bsrl (%esp), %eax 83*6132907eSGordon Ross incl %eax 84*6132907eSGordon Ross .end 85*6132907eSGordon Ross 86*6132907eSGordon Ross/ 87*6132907eSGordon Ross/ Networking byte order functions (too bad, Intel has the wrong byte order) 88*6132907eSGordon Ross/ 89*6132907eSGordon Ross .inline htonll,4 90*6132907eSGordon Ross movl (%esp), %edx 91*6132907eSGordon Ross movl 4(%esp), %eax 92*6132907eSGordon Ross bswap %edx 93*6132907eSGordon Ross bswap %eax 94*6132907eSGordon Ross .end 95*6132907eSGordon Ross 96*6132907eSGordon Ross .inline ntohll,4 97*6132907eSGordon Ross movl (%esp), %edx 98*6132907eSGordon Ross movl 4(%esp), %eax 99*6132907eSGordon Ross bswap %edx 100*6132907eSGordon Ross bswap %eax 101*6132907eSGordon Ross .end 102*6132907eSGordon Ross 103*6132907eSGordon Ross .inline htonl,4 104*6132907eSGordon Ross movl (%esp), %eax 105*6132907eSGordon Ross bswap %eax 106*6132907eSGordon Ross .end 107*6132907eSGordon Ross 108*6132907eSGordon Ross .inline ntohl,4 109*6132907eSGordon Ross movl (%esp), %eax 110*6132907eSGordon Ross bswap %eax 111*6132907eSGordon Ross .end 112*6132907eSGordon Ross 113*6132907eSGordon Ross .inline htons,4 114*6132907eSGordon Ross movl (%esp), %eax 115*6132907eSGordon Ross bswap %eax 116*6132907eSGordon Ross shrl $16, %eax 117*6132907eSGordon Ross .end 118*6132907eSGordon Ross 119*6132907eSGordon Ross .inline ntohs,4 120*6132907eSGordon Ross movl (%esp), %eax 121*6132907eSGordon Ross bswap %eax 122*6132907eSGordon Ross shrl $16, %eax 123*6132907eSGordon Ross .end 124*6132907eSGordon Ross 125*6132907eSGordon Ross/* 126*6132907eSGordon Ross * multiply two long numbers and yield a u_longlong_t result 127*6132907eSGordon Ross * Provided to manipulate hrtime_t values. 128*6132907eSGordon Ross */ 129*6132907eSGordon Ross .inline mul32, 8 130*6132907eSGordon Ross movl 4(%esp), %eax 131*6132907eSGordon Ross movl (%esp), %ecx 132*6132907eSGordon Ross mull %ecx 133*6132907eSGordon Ross .end 134*6132907eSGordon Ross 135*6132907eSGordon Ross/* 136*6132907eSGordon Ross * Unlock hres_lock and increment the count value. (See clock.h) 137*6132907eSGordon Ross */ 138*6132907eSGordon Ross .inline unlock_hres_lock, 0 139*6132907eSGordon Ross lock 140*6132907eSGordon Ross incl hres_lock 141*6132907eSGordon Ross .end 142*6132907eSGordon Ross 143*6132907eSGordon Ross .inline atomic_orb,8 144*6132907eSGordon Ross movl (%esp), %eax 145*6132907eSGordon Ross movl 4(%esp), %edx 146*6132907eSGordon Ross lock 147*6132907eSGordon Ross orb %dl,(%eax) 148*6132907eSGordon Ross .end 149*6132907eSGordon Ross 150*6132907eSGordon Ross .inline atomic_andb,8 151*6132907eSGordon Ross movl (%esp), %eax 152*6132907eSGordon Ross movl 4(%esp), %edx 153*6132907eSGordon Ross lock 154*6132907eSGordon Ross andb %dl,(%eax) 155*6132907eSGordon Ross .end 156*6132907eSGordon Ross 157*6132907eSGordon Ross/* 158*6132907eSGordon Ross * atomic inc/dec operations. 159*6132907eSGordon Ross * void atomic_inc16(uint16_t *addr) { ++*addr; } 160*6132907eSGordon Ross * void atomic_dec16(uint16_t *addr) { --*addr; } 161*6132907eSGordon Ross */ 162*6132907eSGordon Ross .inline atomic_inc16,4 163*6132907eSGordon Ross movl (%esp), %eax 164*6132907eSGordon Ross lock 165*6132907eSGordon Ross incw (%eax) 166*6132907eSGordon Ross .end 167*6132907eSGordon Ross 168*6132907eSGordon Ross .inline atomic_dec16,4 169*6132907eSGordon Ross movl (%esp), %eax 170*6132907eSGordon Ross lock 171*6132907eSGordon Ross decw (%eax) 172*6132907eSGordon Ross .end 173*6132907eSGordon Ross 174*6132907eSGordon Ross/* 175*6132907eSGordon Ross * Call the pause instruction. To the Pentium 4 Xeon processor, it acts as 176*6132907eSGordon Ross * a hint that the code sequence is a busy spin-wait loop. Without a pause 177*6132907eSGordon Ross * instruction in these loops, the P4 Xeon processor may suffer a severe 178*6132907eSGordon Ross * penalty when exiting the loop because the processor detects a possible 179*6132907eSGordon Ross * memory violation. Inserting the pause instruction significantly reduces 180*6132907eSGordon Ross * the likelihood of a memory order violation, improving performance. 181*6132907eSGordon Ross * The pause instruction is a NOP on all other IA-32 processors. 182*6132907eSGordon Ross */ 183*6132907eSGordon Ross .inline ht_pause, 0 184*6132907eSGordon Ross rep / our compiler doesn't support "pause" yet, 185*6132907eSGordon Ross nop / so we're using "F3 90" opcode directly 186*6132907eSGordon Ross .end 187*6132907eSGordon Ross 188*6132907eSGordon Ross/* 189*6132907eSGordon Ross * prefetch 64 bytes 190*6132907eSGordon Ross * 191*6132907eSGordon Ross * prefetch is an SSE extension which is not supported on older 32-bit processors 192*6132907eSGordon Ross * so define this as a no-op for now 193*6132907eSGordon Ross */ 194*6132907eSGordon Ross 195*6132907eSGordon Ross .inline prefetch_read_many,4 196*6132907eSGordon Ross/ movl (%esp), %eax 197*6132907eSGordon Ross/ prefetcht0 (%eax) 198*6132907eSGordon Ross/ prefetcht0 32(%eax) 199*6132907eSGordon Ross .end 200*6132907eSGordon Ross 201*6132907eSGordon Ross .inline prefetch_read_once,4 202*6132907eSGordon Ross/ movl (%esp), %eax 203*6132907eSGordon Ross/ prefetchnta (%eax) 204*6132907eSGordon Ross/ prefetchnta 32(%eax) 205*6132907eSGordon Ross .end 206*6132907eSGordon Ross 207*6132907eSGordon Ross .inline prefetch_write_many,4 208*6132907eSGordon Ross/ movl (%esp), %eax 209*6132907eSGordon Ross/ prefetcht0 (%eax) 210*6132907eSGordon Ross/ prefetcht0 32(%eax) 211*6132907eSGordon Ross .end 212*6132907eSGordon Ross 213*6132907eSGordon Ross .inline prefetch_write_once,4 214*6132907eSGordon Ross/ movl (%esp), %eax 215*6132907eSGordon Ross/ prefetcht0 (%eax) 216*6132907eSGordon Ross/ prefetcht0 32(%eax) 217*6132907eSGordon Ross .end 218*6132907eSGordon Ross 219