xref: /illumos-gate/usr/src/lib/libm/i386/src/exp.S (revision 55fea89d)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29        .file "exp.s"
30
31#include "libm.h"
32LIBM_ANSI_PRAGMA_WEAK(exp,function)
33#include "libm_protos.h"
34
35	ENTRY(exp)
36	movl	8(%esp),%ecx		/ ecx <-- hi_32(x)
37	andl	$0x7fffffff,%ecx	/ ecx <-- hi_32(|x|)
38	cmpl	$0x3fe62e42,%ecx	/ Is |x| < ln(2)?
39	jb	.shortcut		/ If so, take a shortcut.
40	je	.check_tail		/ |x| may be only slightly < ln(2)
41	cmpl	$0x7ff00000,%ecx	/ hi_32(|x|) >= hi_32(INF)?
42	jae	.not_finite		/ if so, x is not finite
43.finite_non_special:			/ Here, ln(2) < |x| < INF
44	fldl	4(%esp)			/ push x
45	subl	$8,%esp
46					/// overhead of RP save/restore; 63/15
47	fstcw	(%esp)			/// ; 15/3
48	movw	(%esp),%ax		/// ; 4/1
49	movw	%ax,4(%esp)		/// save old RP; 2/1
50	orw	$0x0300,%ax		/// force 64-bit RP; 2/1
51	movw	%ax,(%esp)		/// ; 2/1
52	fldcw	(%esp)			/// ; 19/4
53	fldl2e				/ push log2e   }not for xtndd_dbl
54	fmulp	%st,%st(1)		/ z = x*log2e  }not for xtndd_dbl
55	fld	%st(0)			/ duplicate stack top
56	frndint				/ [z],z
57	fucom				/ This and the next 3 instructions
58	fstsw  %ax			/ add 10 clocks to runtime of the
59	sahf				/ main branch, but save about 265
60	je      .z_integral		/ upon detection of integral z.
61	/ [z] != z, compute exp(x)
62	fxch				/ z,[z]
63	fsub    %st(1),%st		/ z-[z],[z]
64	f2xm1				/ 2**(z-[z])-1,[z]
65	fld1				/ 1,2**(z-[z])-1,[z]
66	faddp	%st,%st(1)		/   2**(z-[z])  ,[z]
67.merge:
68	fscale				/   exp(x)      ,[z]
69	fstp	%st(1)
70	fstcw	(%esp)			/ restore RD
71	movw	(%esp),%dx
72	andw	$0xfcff,%dx
73	movw	4(%esp),%cx
74	andw	$0x0300,%cx
75	orw	%dx,%cx
76	movw	%cx,(%esp)
77	fldcw	(%esp)			/// restore old RP; 19/4
78	fstpl	(%esp)			/ round to double
79	fldl	(%esp)			/ exp(x) rounded to double
80	fxam				/ determine class of exp(x)
81	add	$8,%esp
82	fstsw	%ax			/ store status in ax
83	andw	$0x4500,%ax
84	cmpw	$0x0500,%ax
85	je	.overflow
86	cmpw	$0x4000,%ax
87	je	.underflow
88	ret
89
90.overflow:
91	fstp	%st(0)			/ stack empty
92	push	%ebp
93	mov	%esp,%ebp
94	PIC_SETUP(1)
95	pushl	$6
96	jmp	.error
97
98.underflow:
99	fstp	%st(0)			/ stack empty
100	push	%ebp
101	mov	%esp,%ebp
102	PIC_SETUP(2)
103	pushl	$7
104
105.error:
106	pushl	12(%ebp)		/ high x
107	pushl	8(%ebp)			/ low x
108	pushl	12(%ebp)		/ high x
109	pushl	8(%ebp)			/ low x
110	call	PIC_F(_SVID_libm_err)
111	addl	$20,%esp
112	PIC_WRAPUP
113	leave
114	ret
115
116.z_integral:				/ here, z is integral
117	fstp	%st(0)			/ ,z
118	fld1				/ 1,z
119	jmp	.merge
120
121.check_tail:
122	movl	4(%esp),%edx		/ edx <-- lo_32(x)
123	cmpl	$0xfefa39ef,%edx	/ Is |x| slightly < ln(2)?
124	ja	.finite_non_special	/ branch if |x| slightly > ln(2)
125.shortcut:
126	/ Here, |x| < ln(2), so |z| = |x*log2(e)| < 1,
127	/ whence z is in f2xm1's domain.
128	fldl	4(%esp)			/ push x
129	fldl2e				/ push log2e  }not for xtndd_dbl
130	fmulp	%st,%st(1)		/ z = x*log2e }not for xtndd_dbl
131	f2xm1				/ 2**(x*log2(e))-1 = e**x - 1
132	fld1				/ 1,2**(z)-1
133	faddp	%st,%st(1)		/   2**(z) = e**x
134	ret
135
136.not_finite:
137	/ Here, flags still have settings from execution of
138	/	cmpl	$0x7ff00000,%ecx	/ hi_32(|x|) > hi_32(INF)?
139	ja	.NaN_or_pinf		/ if not, x may be +/- INF
140	movl	4(%esp),%edx		/ edx <-- lo_32(x)
141	cmpl	$0,%edx			/ lo_32(x) = 0?
142	jne	.NaN_or_pinf		/ if not, x is NaN
143	movl	8(%esp),%eax		/ eax <-- hi_32(x)
144	andl	$0x80000000,%eax	/ here, x is infinite, but +/-?
145	jz	.NaN_or_pinf		/ branch if x = +INF
146	fldz				/ Here, x = -inf, so return 0
147	ret
148
149.NaN_or_pinf:
150	/ Here, x = NaN or +inf, so load x and return immediately.
151	fldl	4(%esp)
152	fwait
153	ret
154	.align	4
155	SET_SIZE(exp)
156