125c28e83SPiotr Jasiukajtis/* 225c28e83SPiotr Jasiukajtis * CDDL HEADER START 325c28e83SPiotr Jasiukajtis * 425c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 525c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 625c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 725c28e83SPiotr Jasiukajtis * 825c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 925c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 1025c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 1125c28e83SPiotr Jasiukajtis * and limitations under the License. 1225c28e83SPiotr Jasiukajtis * 1325c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 1425c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1525c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 1625c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 1725c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 1825c28e83SPiotr Jasiukajtis * 1925c28e83SPiotr Jasiukajtis * CDDL HEADER END 2025c28e83SPiotr Jasiukajtis */ 2125c28e83SPiotr Jasiukajtis/* 2225c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 2325c28e83SPiotr Jasiukajtis */ 2425c28e83SPiotr Jasiukajtis/* 2525c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 2625c28e83SPiotr Jasiukajtis * Use is subject to license terms. 2725c28e83SPiotr Jasiukajtis */ 2825c28e83SPiotr Jasiukajtis 2925c28e83SPiotr Jasiukajtis .file "exp.s" 3025c28e83SPiotr Jasiukajtis 3125c28e83SPiotr Jasiukajtis#include "libm.h" 3225c28e83SPiotr JasiukajtisLIBM_ANSI_PRAGMA_WEAK(exp,function) 3325c28e83SPiotr Jasiukajtis#include "libm_protos.h" 3425c28e83SPiotr Jasiukajtis 3525c28e83SPiotr Jasiukajtis ENTRY(exp) 3625c28e83SPiotr Jasiukajtis movl 8(%esp),%ecx / ecx <-- hi_32(x) 3725c28e83SPiotr Jasiukajtis andl $0x7fffffff,%ecx / ecx <-- hi_32(|x|) 3825c28e83SPiotr Jasiukajtis cmpl $0x3fe62e42,%ecx / Is |x| < ln(2)? 3925c28e83SPiotr Jasiukajtis jb .shortcut / If so, take a shortcut. 4025c28e83SPiotr Jasiukajtis je .check_tail / |x| may be only slightly < ln(2) 4125c28e83SPiotr Jasiukajtis cmpl $0x7ff00000,%ecx / hi_32(|x|) >= hi_32(INF)? 4225c28e83SPiotr Jasiukajtis jae .not_finite / if so, x is not finite 4325c28e83SPiotr Jasiukajtis.finite_non_special: / Here, ln(2) < |x| < INF 4425c28e83SPiotr Jasiukajtis fldl 4(%esp) / push x 4525c28e83SPiotr Jasiukajtis subl $8,%esp 4625c28e83SPiotr Jasiukajtis /// overhead of RP save/restore; 63/15 4725c28e83SPiotr Jasiukajtis fstcw (%esp) /// ; 15/3 4825c28e83SPiotr Jasiukajtis movw (%esp),%ax /// ; 4/1 4925c28e83SPiotr Jasiukajtis movw %ax,4(%esp) /// save old RP; 2/1 5025c28e83SPiotr Jasiukajtis orw $0x0300,%ax /// force 64-bit RP; 2/1 5125c28e83SPiotr Jasiukajtis movw %ax,(%esp) /// ; 2/1 5225c28e83SPiotr Jasiukajtis fldcw (%esp) /// ; 19/4 5325c28e83SPiotr Jasiukajtis fldl2e / push log2e }not for xtndd_dbl 5425c28e83SPiotr Jasiukajtis fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl 5525c28e83SPiotr Jasiukajtis fld %st(0) / duplicate stack top 5625c28e83SPiotr Jasiukajtis frndint / [z],z 5725c28e83SPiotr Jasiukajtis fucom / This and the next 3 instructions 5825c28e83SPiotr Jasiukajtis fstsw %ax / add 10 clocks to runtime of the 5925c28e83SPiotr Jasiukajtis sahf / main branch, but save about 265 6025c28e83SPiotr Jasiukajtis je .z_integral / upon detection of integral z. 6125c28e83SPiotr Jasiukajtis / [z] != z, compute exp(x) 6225c28e83SPiotr Jasiukajtis fxch / z,[z] 6325c28e83SPiotr Jasiukajtis fsub %st(1),%st / z-[z],[z] 6425c28e83SPiotr Jasiukajtis f2xm1 / 2**(z-[z])-1,[z] 6525c28e83SPiotr Jasiukajtis fld1 / 1,2**(z-[z])-1,[z] 6625c28e83SPiotr Jasiukajtis faddp %st,%st(1) / 2**(z-[z]) ,[z] 6725c28e83SPiotr Jasiukajtis.merge: 6825c28e83SPiotr Jasiukajtis fscale / exp(x) ,[z] 6925c28e83SPiotr Jasiukajtis fstp %st(1) 7025c28e83SPiotr Jasiukajtis fstcw (%esp) / restore RD 7125c28e83SPiotr Jasiukajtis movw (%esp),%dx 7225c28e83SPiotr Jasiukajtis andw $0xfcff,%dx 7325c28e83SPiotr Jasiukajtis movw 4(%esp),%cx 7425c28e83SPiotr Jasiukajtis andw $0x0300,%cx 7525c28e83SPiotr Jasiukajtis orw %dx,%cx 7625c28e83SPiotr Jasiukajtis movw %cx,(%esp) 7725c28e83SPiotr Jasiukajtis fldcw (%esp) /// restore old RP; 19/4 7825c28e83SPiotr Jasiukajtis fstpl (%esp) / round to double 7925c28e83SPiotr Jasiukajtis fldl (%esp) / exp(x) rounded to double 8025c28e83SPiotr Jasiukajtis fxam / determine class of exp(x) 8125c28e83SPiotr Jasiukajtis add $8,%esp 8225c28e83SPiotr Jasiukajtis fstsw %ax / store status in ax 8325c28e83SPiotr Jasiukajtis andw $0x4500,%ax 8425c28e83SPiotr Jasiukajtis cmpw $0x0500,%ax 8525c28e83SPiotr Jasiukajtis je .overflow 8625c28e83SPiotr Jasiukajtis cmpw $0x4000,%ax 8725c28e83SPiotr Jasiukajtis je .underflow 8825c28e83SPiotr Jasiukajtis ret 8925c28e83SPiotr Jasiukajtis 9025c28e83SPiotr Jasiukajtis.overflow: 9125c28e83SPiotr Jasiukajtis fstp %st(0) / stack empty 9225c28e83SPiotr Jasiukajtis push %ebp 9325c28e83SPiotr Jasiukajtis mov %esp,%ebp 9425c28e83SPiotr Jasiukajtis PIC_SETUP(1) 9525c28e83SPiotr Jasiukajtis pushl $6 9625c28e83SPiotr Jasiukajtis jmp .error 9725c28e83SPiotr Jasiukajtis 9825c28e83SPiotr Jasiukajtis.underflow: 9925c28e83SPiotr Jasiukajtis fstp %st(0) / stack empty 10025c28e83SPiotr Jasiukajtis push %ebp 10125c28e83SPiotr Jasiukajtis mov %esp,%ebp 10225c28e83SPiotr Jasiukajtis PIC_SETUP(2) 10325c28e83SPiotr Jasiukajtis pushl $7 10425c28e83SPiotr Jasiukajtis 10525c28e83SPiotr Jasiukajtis.error: 10625c28e83SPiotr Jasiukajtis pushl 12(%ebp) / high x 10725c28e83SPiotr Jasiukajtis pushl 8(%ebp) / low x 10825c28e83SPiotr Jasiukajtis pushl 12(%ebp) / high x 10925c28e83SPiotr Jasiukajtis pushl 8(%ebp) / low x 11025c28e83SPiotr Jasiukajtis call PIC_F(_SVID_libm_err) 11125c28e83SPiotr Jasiukajtis addl $20,%esp 11225c28e83SPiotr Jasiukajtis PIC_WRAPUP 11325c28e83SPiotr Jasiukajtis leave 11425c28e83SPiotr Jasiukajtis ret 11525c28e83SPiotr Jasiukajtis 11625c28e83SPiotr Jasiukajtis.z_integral: / here, z is integral 11725c28e83SPiotr Jasiukajtis fstp %st(0) / ,z 11825c28e83SPiotr Jasiukajtis fld1 / 1,z 11925c28e83SPiotr Jasiukajtis jmp .merge 12025c28e83SPiotr Jasiukajtis 12125c28e83SPiotr Jasiukajtis.check_tail: 12225c28e83SPiotr Jasiukajtis movl 4(%esp),%edx / edx <-- lo_32(x) 12325c28e83SPiotr Jasiukajtis cmpl $0xfefa39ef,%edx / Is |x| slightly < ln(2)? 12425c28e83SPiotr Jasiukajtis ja .finite_non_special / branch if |x| slightly > ln(2) 12525c28e83SPiotr Jasiukajtis.shortcut: 12625c28e83SPiotr Jasiukajtis / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1, 12725c28e83SPiotr Jasiukajtis / whence z is in f2xm1's domain. 12825c28e83SPiotr Jasiukajtis fldl 4(%esp) / push x 12925c28e83SPiotr Jasiukajtis fldl2e / push log2e }not for xtndd_dbl 13025c28e83SPiotr Jasiukajtis fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl 13125c28e83SPiotr Jasiukajtis f2xm1 / 2**(x*log2(e))-1 = e**x - 1 13225c28e83SPiotr Jasiukajtis fld1 / 1,2**(z)-1 13325c28e83SPiotr Jasiukajtis faddp %st,%st(1) / 2**(z) = e**x 13425c28e83SPiotr Jasiukajtis ret 13525c28e83SPiotr Jasiukajtis 13625c28e83SPiotr Jasiukajtis.not_finite: 13725c28e83SPiotr Jasiukajtis / Here, flags still have settings from execution of 13825c28e83SPiotr Jasiukajtis / cmpl $0x7ff00000,%ecx / hi_32(|x|) > hi_32(INF)? 139*55fea89dSDan Cross ja .NaN_or_pinf / if not, x may be +/- INF 14025c28e83SPiotr Jasiukajtis movl 4(%esp),%edx / edx <-- lo_32(x) 14125c28e83SPiotr Jasiukajtis cmpl $0,%edx / lo_32(x) = 0? 14225c28e83SPiotr Jasiukajtis jne .NaN_or_pinf / if not, x is NaN 14325c28e83SPiotr Jasiukajtis movl 8(%esp),%eax / eax <-- hi_32(x) 14425c28e83SPiotr Jasiukajtis andl $0x80000000,%eax / here, x is infinite, but +/-? 14525c28e83SPiotr Jasiukajtis jz .NaN_or_pinf / branch if x = +INF 14625c28e83SPiotr Jasiukajtis fldz / Here, x = -inf, so return 0 14725c28e83SPiotr Jasiukajtis ret 14825c28e83SPiotr Jasiukajtis 14925c28e83SPiotr Jasiukajtis.NaN_or_pinf: 15025c28e83SPiotr Jasiukajtis / Here, x = NaN or +inf, so load x and return immediately. 15125c28e83SPiotr Jasiukajtis fldl 4(%esp) 15225c28e83SPiotr Jasiukajtis fwait 15325c28e83SPiotr Jasiukajtis ret 15425c28e83SPiotr Jasiukajtis .align 4 15525c28e83SPiotr Jasiukajtis SET_SIZE(exp) 156