xref: /illumos-gate/usr/src/lib/libm/i386/src/exp.S (revision 55fea89d)
125c28e83SPiotr Jasiukajtis/*
225c28e83SPiotr Jasiukajtis * CDDL HEADER START
325c28e83SPiotr Jasiukajtis *
425c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
525c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
625c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
725c28e83SPiotr Jasiukajtis *
825c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
925c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
1025c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
1125c28e83SPiotr Jasiukajtis * and limitations under the License.
1225c28e83SPiotr Jasiukajtis *
1325c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
1425c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1525c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
1625c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
1725c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
1825c28e83SPiotr Jasiukajtis *
1925c28e83SPiotr Jasiukajtis * CDDL HEADER END
2025c28e83SPiotr Jasiukajtis */
2125c28e83SPiotr Jasiukajtis/*
2225c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
2325c28e83SPiotr Jasiukajtis */
2425c28e83SPiotr Jasiukajtis/*
2525c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
2625c28e83SPiotr Jasiukajtis * Use is subject to license terms.
2725c28e83SPiotr Jasiukajtis */
2825c28e83SPiotr Jasiukajtis
2925c28e83SPiotr Jasiukajtis        .file "exp.s"
3025c28e83SPiotr Jasiukajtis
3125c28e83SPiotr Jasiukajtis#include "libm.h"
3225c28e83SPiotr JasiukajtisLIBM_ANSI_PRAGMA_WEAK(exp,function)
3325c28e83SPiotr Jasiukajtis#include "libm_protos.h"
3425c28e83SPiotr Jasiukajtis
3525c28e83SPiotr Jasiukajtis	ENTRY(exp)
3625c28e83SPiotr Jasiukajtis	movl	8(%esp),%ecx		/ ecx <-- hi_32(x)
3725c28e83SPiotr Jasiukajtis	andl	$0x7fffffff,%ecx	/ ecx <-- hi_32(|x|)
3825c28e83SPiotr Jasiukajtis	cmpl	$0x3fe62e42,%ecx	/ Is |x| < ln(2)?
3925c28e83SPiotr Jasiukajtis	jb	.shortcut		/ If so, take a shortcut.
4025c28e83SPiotr Jasiukajtis	je	.check_tail		/ |x| may be only slightly < ln(2)
4125c28e83SPiotr Jasiukajtis	cmpl	$0x7ff00000,%ecx	/ hi_32(|x|) >= hi_32(INF)?
4225c28e83SPiotr Jasiukajtis	jae	.not_finite		/ if so, x is not finite
4325c28e83SPiotr Jasiukajtis.finite_non_special:			/ Here, ln(2) < |x| < INF
4425c28e83SPiotr Jasiukajtis	fldl	4(%esp)			/ push x
4525c28e83SPiotr Jasiukajtis	subl	$8,%esp
4625c28e83SPiotr Jasiukajtis					/// overhead of RP save/restore; 63/15
4725c28e83SPiotr Jasiukajtis	fstcw	(%esp)			/// ; 15/3
4825c28e83SPiotr Jasiukajtis	movw	(%esp),%ax		/// ; 4/1
4925c28e83SPiotr Jasiukajtis	movw	%ax,4(%esp)		/// save old RP; 2/1
5025c28e83SPiotr Jasiukajtis	orw	$0x0300,%ax		/// force 64-bit RP; 2/1
5125c28e83SPiotr Jasiukajtis	movw	%ax,(%esp)		/// ; 2/1
5225c28e83SPiotr Jasiukajtis	fldcw	(%esp)			/// ; 19/4
5325c28e83SPiotr Jasiukajtis	fldl2e				/ push log2e   }not for xtndd_dbl
5425c28e83SPiotr Jasiukajtis	fmulp	%st,%st(1)		/ z = x*log2e  }not for xtndd_dbl
5525c28e83SPiotr Jasiukajtis	fld	%st(0)			/ duplicate stack top
5625c28e83SPiotr Jasiukajtis	frndint				/ [z],z
5725c28e83SPiotr Jasiukajtis	fucom				/ This and the next 3 instructions
5825c28e83SPiotr Jasiukajtis	fstsw  %ax			/ add 10 clocks to runtime of the
5925c28e83SPiotr Jasiukajtis	sahf				/ main branch, but save about 265
6025c28e83SPiotr Jasiukajtis	je      .z_integral		/ upon detection of integral z.
6125c28e83SPiotr Jasiukajtis	/ [z] != z, compute exp(x)
6225c28e83SPiotr Jasiukajtis	fxch				/ z,[z]
6325c28e83SPiotr Jasiukajtis	fsub    %st(1),%st		/ z-[z],[z]
6425c28e83SPiotr Jasiukajtis	f2xm1				/ 2**(z-[z])-1,[z]
6525c28e83SPiotr Jasiukajtis	fld1				/ 1,2**(z-[z])-1,[z]
6625c28e83SPiotr Jasiukajtis	faddp	%st,%st(1)		/   2**(z-[z])  ,[z]
6725c28e83SPiotr Jasiukajtis.merge:
6825c28e83SPiotr Jasiukajtis	fscale				/   exp(x)      ,[z]
6925c28e83SPiotr Jasiukajtis	fstp	%st(1)
7025c28e83SPiotr Jasiukajtis	fstcw	(%esp)			/ restore RD
7125c28e83SPiotr Jasiukajtis	movw	(%esp),%dx
7225c28e83SPiotr Jasiukajtis	andw	$0xfcff,%dx
7325c28e83SPiotr Jasiukajtis	movw	4(%esp),%cx
7425c28e83SPiotr Jasiukajtis	andw	$0x0300,%cx
7525c28e83SPiotr Jasiukajtis	orw	%dx,%cx
7625c28e83SPiotr Jasiukajtis	movw	%cx,(%esp)
7725c28e83SPiotr Jasiukajtis	fldcw	(%esp)			/// restore old RP; 19/4
7825c28e83SPiotr Jasiukajtis	fstpl	(%esp)			/ round to double
7925c28e83SPiotr Jasiukajtis	fldl	(%esp)			/ exp(x) rounded to double
8025c28e83SPiotr Jasiukajtis	fxam				/ determine class of exp(x)
8125c28e83SPiotr Jasiukajtis	add	$8,%esp
8225c28e83SPiotr Jasiukajtis	fstsw	%ax			/ store status in ax
8325c28e83SPiotr Jasiukajtis	andw	$0x4500,%ax
8425c28e83SPiotr Jasiukajtis	cmpw	$0x0500,%ax
8525c28e83SPiotr Jasiukajtis	je	.overflow
8625c28e83SPiotr Jasiukajtis	cmpw	$0x4000,%ax
8725c28e83SPiotr Jasiukajtis	je	.underflow
8825c28e83SPiotr Jasiukajtis	ret
8925c28e83SPiotr Jasiukajtis
9025c28e83SPiotr Jasiukajtis.overflow:
9125c28e83SPiotr Jasiukajtis	fstp	%st(0)			/ stack empty
9225c28e83SPiotr Jasiukajtis	push	%ebp
9325c28e83SPiotr Jasiukajtis	mov	%esp,%ebp
9425c28e83SPiotr Jasiukajtis	PIC_SETUP(1)
9525c28e83SPiotr Jasiukajtis	pushl	$6
9625c28e83SPiotr Jasiukajtis	jmp	.error
9725c28e83SPiotr Jasiukajtis
9825c28e83SPiotr Jasiukajtis.underflow:
9925c28e83SPiotr Jasiukajtis	fstp	%st(0)			/ stack empty
10025c28e83SPiotr Jasiukajtis	push	%ebp
10125c28e83SPiotr Jasiukajtis	mov	%esp,%ebp
10225c28e83SPiotr Jasiukajtis	PIC_SETUP(2)
10325c28e83SPiotr Jasiukajtis	pushl	$7
10425c28e83SPiotr Jasiukajtis
10525c28e83SPiotr Jasiukajtis.error:
10625c28e83SPiotr Jasiukajtis	pushl	12(%ebp)		/ high x
10725c28e83SPiotr Jasiukajtis	pushl	8(%ebp)			/ low x
10825c28e83SPiotr Jasiukajtis	pushl	12(%ebp)		/ high x
10925c28e83SPiotr Jasiukajtis	pushl	8(%ebp)			/ low x
11025c28e83SPiotr Jasiukajtis	call	PIC_F(_SVID_libm_err)
11125c28e83SPiotr Jasiukajtis	addl	$20,%esp
11225c28e83SPiotr Jasiukajtis	PIC_WRAPUP
11325c28e83SPiotr Jasiukajtis	leave
11425c28e83SPiotr Jasiukajtis	ret
11525c28e83SPiotr Jasiukajtis
11625c28e83SPiotr Jasiukajtis.z_integral:				/ here, z is integral
11725c28e83SPiotr Jasiukajtis	fstp	%st(0)			/ ,z
11825c28e83SPiotr Jasiukajtis	fld1				/ 1,z
11925c28e83SPiotr Jasiukajtis	jmp	.merge
12025c28e83SPiotr Jasiukajtis
12125c28e83SPiotr Jasiukajtis.check_tail:
12225c28e83SPiotr Jasiukajtis	movl	4(%esp),%edx		/ edx <-- lo_32(x)
12325c28e83SPiotr Jasiukajtis	cmpl	$0xfefa39ef,%edx	/ Is |x| slightly < ln(2)?
12425c28e83SPiotr Jasiukajtis	ja	.finite_non_special	/ branch if |x| slightly > ln(2)
12525c28e83SPiotr Jasiukajtis.shortcut:
12625c28e83SPiotr Jasiukajtis	/ Here, |x| < ln(2), so |z| = |x*log2(e)| < 1,
12725c28e83SPiotr Jasiukajtis	/ whence z is in f2xm1's domain.
12825c28e83SPiotr Jasiukajtis	fldl	4(%esp)			/ push x
12925c28e83SPiotr Jasiukajtis	fldl2e				/ push log2e  }not for xtndd_dbl
13025c28e83SPiotr Jasiukajtis	fmulp	%st,%st(1)		/ z = x*log2e }not for xtndd_dbl
13125c28e83SPiotr Jasiukajtis	f2xm1				/ 2**(x*log2(e))-1 = e**x - 1
13225c28e83SPiotr Jasiukajtis	fld1				/ 1,2**(z)-1
13325c28e83SPiotr Jasiukajtis	faddp	%st,%st(1)		/   2**(z) = e**x
13425c28e83SPiotr Jasiukajtis	ret
13525c28e83SPiotr Jasiukajtis
13625c28e83SPiotr Jasiukajtis.not_finite:
13725c28e83SPiotr Jasiukajtis	/ Here, flags still have settings from execution of
13825c28e83SPiotr Jasiukajtis	/	cmpl	$0x7ff00000,%ecx	/ hi_32(|x|) > hi_32(INF)?
139*55fea89dSDan Cross	ja	.NaN_or_pinf		/ if not, x may be +/- INF
14025c28e83SPiotr Jasiukajtis	movl	4(%esp),%edx		/ edx <-- lo_32(x)
14125c28e83SPiotr Jasiukajtis	cmpl	$0,%edx			/ lo_32(x) = 0?
14225c28e83SPiotr Jasiukajtis	jne	.NaN_or_pinf		/ if not, x is NaN
14325c28e83SPiotr Jasiukajtis	movl	8(%esp),%eax		/ eax <-- hi_32(x)
14425c28e83SPiotr Jasiukajtis	andl	$0x80000000,%eax	/ here, x is infinite, but +/-?
14525c28e83SPiotr Jasiukajtis	jz	.NaN_or_pinf		/ branch if x = +INF
14625c28e83SPiotr Jasiukajtis	fldz				/ Here, x = -inf, so return 0
14725c28e83SPiotr Jasiukajtis	ret
14825c28e83SPiotr Jasiukajtis
14925c28e83SPiotr Jasiukajtis.NaN_or_pinf:
15025c28e83SPiotr Jasiukajtis	/ Here, x = NaN or +inf, so load x and return immediately.
15125c28e83SPiotr Jasiukajtis	fldl	4(%esp)
15225c28e83SPiotr Jasiukajtis	fwait
15325c28e83SPiotr Jasiukajtis	ret
15425c28e83SPiotr Jasiukajtis	.align	4
15525c28e83SPiotr Jasiukajtis	SET_SIZE(exp)
156