xref: /illumos-gate/usr/src/lib/libm/i386/src/expm1f.S (revision 55fea89d)
125c28e83SPiotr Jasiukajtis/*
225c28e83SPiotr Jasiukajtis * CDDL HEADER START
325c28e83SPiotr Jasiukajtis *
425c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
525c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
625c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
725c28e83SPiotr Jasiukajtis *
825c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
925c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
1025c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
1125c28e83SPiotr Jasiukajtis * and limitations under the License.
1225c28e83SPiotr Jasiukajtis *
1325c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
1425c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1525c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
1625c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
1725c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
1825c28e83SPiotr Jasiukajtis *
1925c28e83SPiotr Jasiukajtis * CDDL HEADER END
2025c28e83SPiotr Jasiukajtis */
2125c28e83SPiotr Jasiukajtis/*
2225c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
2325c28e83SPiotr Jasiukajtis */
2425c28e83SPiotr Jasiukajtis/*
2525c28e83SPiotr Jasiukajtis * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
2625c28e83SPiotr Jasiukajtis * Use is subject to license terms.
2725c28e83SPiotr Jasiukajtis */
2825c28e83SPiotr Jasiukajtis
2925c28e83SPiotr Jasiukajtis        .file "expm1f.s"
3025c28e83SPiotr Jasiukajtis
3125c28e83SPiotr Jasiukajtis#include "libm.h"
3225c28e83SPiotr JasiukajtisLIBM_ANSI_PRAGMA_WEAK(expm1f,function)
3325c28e83SPiotr Jasiukajtis
3425c28e83SPiotr Jasiukajtis	.data
3525c28e83SPiotr Jasiukajtis	.align	4
3625c28e83SPiotr Jasiukajtis.mhundred:	.float	-100.0
3725c28e83SPiotr Jasiukajtis
3825c28e83SPiotr Jasiukajtis	ENTRY(expm1f)
3925c28e83SPiotr Jasiukajtis	movl	4(%esp),%ecx		/ ecx <-- x
4025c28e83SPiotr Jasiukajtis	andl	$0x7fffffff,%ecx	/ ecx <-- |x|
4125c28e83SPiotr Jasiukajtis	cmpl	$0x3f317217,%ecx	/ Is |x| < ln(2)?
4225c28e83SPiotr Jasiukajtis	jbe	.shortcut		/ If so, take a shortcut.
4325c28e83SPiotr Jasiukajtis	cmpl	$0x7f800000,%ecx	/ |x| >= INF?
4425c28e83SPiotr Jasiukajtis	jae	.not_finite		/ if so, x is not finite
4525c28e83SPiotr Jasiukajtis	flds	4(%esp)			/ push x
4625c28e83SPiotr Jasiukajtis
4725c28e83SPiotr Jasiukajtis	subl	$8,%esp			/ save RP and set round-to-64-bits
4825c28e83SPiotr Jasiukajtis	fstcw	(%esp)
4925c28e83SPiotr Jasiukajtis	movw	(%esp),%ax
5025c28e83SPiotr Jasiukajtis	movw	%ax,4(%esp)
5125c28e83SPiotr Jasiukajtis	orw	$0x0300,%ax
5225c28e83SPiotr Jasiukajtis	movw	%ax,(%esp)
5325c28e83SPiotr Jasiukajtis	fldcw	(%esp)
5425c28e83SPiotr Jasiukajtis
5525c28e83SPiotr Jasiukajtis	fldl2e				/ push log2e   }not for xtndd_dbl
5625c28e83SPiotr Jasiukajtis	fmulp	%st,%st(1)		/ z = x*log2e  }not for xtndd_dbl
5725c28e83SPiotr Jasiukajtis	fld	%st(0)			/ duplicate stack top
5825c28e83SPiotr Jasiukajtis	frndint				/ [z],z
5925c28e83SPiotr Jasiukajtis	fucom				/ This and the next 3 instructions
6025c28e83SPiotr Jasiukajtis	fstsw	%ax			/ add 10 clocks to runtime of the
6125c28e83SPiotr Jasiukajtis	sahf				/ main branch, but save about 265
6225c28e83SPiotr Jasiukajtis	je      .z_integral		/ upon detection of integral z.
6325c28e83SPiotr Jasiukajtis	/ [z] != 0, compute exp(x) and then subtract one to get expm1(x)
6425c28e83SPiotr Jasiukajtis	fxch				/ z,[z]
6525c28e83SPiotr Jasiukajtis	fsub    %st(1),%st		/ z-[z],[z]
6625c28e83SPiotr Jasiukajtis	f2xm1				/ 2**(z-[z])-1,[z]
67*55fea89dSDan Cross	/ avoid spurious underflow when scaling to compute exp(x)
6825c28e83SPiotr Jasiukajtis	PIC_SETUP(1)
6925c28e83SPiotr Jasiukajtis	flds	PIC_L(.mhundred)
7025c28e83SPiotr Jasiukajtis	PIC_WRAPUP
7125c28e83SPiotr Jasiukajtis	fucom	%st(2)			/ if -100 !< [z], then use -100
7225c28e83SPiotr Jasiukajtis	fstsw	%ax
7325c28e83SPiotr Jasiukajtis	sahf
7425c28e83SPiotr Jasiukajtis	jb	.got_int_part
7525c28e83SPiotr Jasiukajtis	fxch	%st(2)
7625c28e83SPiotr Jasiukajtis.got_int_part:
7725c28e83SPiotr Jasiukajtis	fstp	%st(0)			/   2**(z-[z])-1,max([z],-100)
7825c28e83SPiotr Jasiukajtis	fld1				/ 1,2**(z-[z])-1,max([z],-100)
7925c28e83SPiotr Jasiukajtis	faddp	%st,%st(1)		/   2**(z-[z])  ,max([z],-100)
8025c28e83SPiotr Jasiukajtis	fscale				/   exp(x)      ,max([z],-100)
8125c28e83SPiotr Jasiukajtis	fld1				/ 1,exp(x)      ,max([z],-100)
8225c28e83SPiotr Jasiukajtis	fsubrp	%st,%st(1)		/   exp(x)-1    ,max([z],-100)
8325c28e83SPiotr Jasiukajtis	fstp	%st(1)
8425c28e83SPiotr Jasiukajtis
8525c28e83SPiotr Jasiukajtis	fstcw	(%esp)			/ restore old RP
8625c28e83SPiotr Jasiukajtis	movw	(%esp),%dx
8725c28e83SPiotr Jasiukajtis	andw	$0xfcff,%dx
8825c28e83SPiotr Jasiukajtis	movw	4(%esp),%cx
8925c28e83SPiotr Jasiukajtis	andw	$0x0300,%cx
9025c28e83SPiotr Jasiukajtis	orw	%dx,%cx
9125c28e83SPiotr Jasiukajtis	movw	%cx,(%esp)
9225c28e83SPiotr Jasiukajtis	fldcw	(%esp)
9325c28e83SPiotr Jasiukajtis	add	$8,%esp
9425c28e83SPiotr Jasiukajtis
9525c28e83SPiotr Jasiukajtis	ret
9625c28e83SPiotr Jasiukajtis
9725c28e83SPiotr Jasiukajtis.z_integral:				/ here, z is integral
9825c28e83SPiotr Jasiukajtis	fstp	%st(0)			/ ,z
99*55fea89dSDan Cross	/ avoid spurious underflow when scaling to compute exp(x)
10025c28e83SPiotr Jasiukajtis	PIC_SETUP(2)
10125c28e83SPiotr Jasiukajtis	flds	PIC_L(.mhundred)
10225c28e83SPiotr Jasiukajtis	PIC_WRAPUP
10325c28e83SPiotr Jasiukajtis	fucom	%st(1)			/ if -100 !< [z], then use -100
10425c28e83SPiotr Jasiukajtis	fstsw	%ax
10525c28e83SPiotr Jasiukajtis	sahf
10625c28e83SPiotr Jasiukajtis	jb	.scale_wont_ovfl
10725c28e83SPiotr Jasiukajtis	fxch	%st(1)
10825c28e83SPiotr Jasiukajtis.scale_wont_ovfl:
10925c28e83SPiotr Jasiukajtis	fstp	%st(0)			/   max([z],-100)
11025c28e83SPiotr Jasiukajtis	fld1				/ 1,max([z],-100)
11125c28e83SPiotr Jasiukajtis	fscale				/   exp(x)      ,max([z],-100)
11225c28e83SPiotr Jasiukajtis	fld1				/ 1,exp(x)      ,max([z],-100)
11325c28e83SPiotr Jasiukajtis	fsubrp	%st,%st(1)		/   exp(x)-1    ,max([z],-100)
11425c28e83SPiotr Jasiukajtis	fstp	%st(1)
11525c28e83SPiotr Jasiukajtis
11625c28e83SPiotr Jasiukajtis	fstcw	(%esp)			/ restore old RP
11725c28e83SPiotr Jasiukajtis	movw	(%esp),%dx
11825c28e83SPiotr Jasiukajtis	andw	$0xfcff,%dx
11925c28e83SPiotr Jasiukajtis	movw	4(%esp),%cx
12025c28e83SPiotr Jasiukajtis	andw	$0x0300,%cx
12125c28e83SPiotr Jasiukajtis	orw	%dx,%cx
12225c28e83SPiotr Jasiukajtis	movw	%cx,(%esp)
12325c28e83SPiotr Jasiukajtis	fldcw	(%esp)
12425c28e83SPiotr Jasiukajtis	add	$8,%esp
12525c28e83SPiotr Jasiukajtis
12625c28e83SPiotr Jasiukajtis	ret
12725c28e83SPiotr Jasiukajtis
12825c28e83SPiotr Jasiukajtis.shortcut:
12925c28e83SPiotr Jasiukajtis	/ Here, |x| < ln(2), so |z| = |x*log2(e)| < 1,
13025c28e83SPiotr Jasiukajtis	/ whence z is in f2xm1's domain.
13125c28e83SPiotr Jasiukajtis	flds	4(%esp)			/ push x
13225c28e83SPiotr Jasiukajtis	fldl2e				/ push log2e  }not for xtndd_dbl
13325c28e83SPiotr Jasiukajtis	fmulp	%st,%st(1)		/ z = x*log2e }not for xtndd_dbl
13425c28e83SPiotr Jasiukajtis	f2xm1				/ 2**(x*log2(e))-1 = e**x - 1
13525c28e83SPiotr Jasiukajtis	ret
13625c28e83SPiotr Jasiukajtis
13725c28e83SPiotr Jasiukajtis.not_finite:
138*55fea89dSDan Cross	ja	.NaN_or_pinf		/ branch if x is NaN
13925c28e83SPiotr Jasiukajtis	movl	4(%esp),%eax		/ eax <-- x
14025c28e83SPiotr Jasiukajtis	andl	$0x80000000,%eax	/ here, x is infinite, but +/-?
14125c28e83SPiotr Jasiukajtis	jz	.NaN_or_pinf		/ branch if x = +INF
14225c28e83SPiotr Jasiukajtis	fld1				/ Here, x = -inf, so return -1
14325c28e83SPiotr Jasiukajtis	fchs
14425c28e83SPiotr Jasiukajtis	ret
14525c28e83SPiotr Jasiukajtis
14625c28e83SPiotr Jasiukajtis.NaN_or_pinf:
14725c28e83SPiotr Jasiukajtis	/ Here, x = NaN or +inf, so load x and return immediately.
14825c28e83SPiotr Jasiukajtis	flds	4(%esp)
14925c28e83SPiotr Jasiukajtis	fwait
15025c28e83SPiotr Jasiukajtis	ret
15125c28e83SPiotr Jasiukajtis	.align	4
15225c28e83SPiotr Jasiukajtis	SET_SIZE(expm1f)
153