xref: /illumos-gate/usr/src/lib/libm/i386/src/pow.S (revision 5d9d9091)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29        .file "pow.s"
30
31/ Note: 0^NaN should not signal "invalid" but this implementation
32/ does because y is placed on the NPX stack.
33
34/ Special cases:
35/
36/ x ** 0 is 1				_SVID_libm_err if x is 0 or NaN
37/ 1 ** y is 1				(C99)
38/ x ** NaN is NaN
39/ NaN ** y (except 0) is NaN
40/ x ** 1 is x
41/ +-(|x| > 1) **  +inf is +inf
42/ +-(|x| > 1) **  -inf is +0
43/ +-(|x| < 1) **  +inf is +0
44/ +-(|x| < 1) **  -inf is +inf
45/ (-1) ** +-inf is +1			(C99)
46/ +0 ** +y (except 0, NaN)		is +0
47/ -0 ** +y (except 0, NaN, odd int)	is +0
48/ -0 ** +y (odd int)			is -0
49/ +-0 ** -y (except 0, NaN)		_SVID_libm_err
50/ +inf ** +y (except 0, NaN)		is +inf
51/ +inf ** -y (except 0, NaN)		is +0
52/ -inf ** +-y (except 0, NaN)		is -0 ** -+y (NO z flag)
53/ x ** -1 is 1/x
54/ x ** 2 is x*x
55/ -x ** y (an integer) is (-1)**(y) * (+x)**(y)
56/ x ** y (x negative & y not integer)	_SVID_libm_err
57/ if x and y are finite and x**y = 0	_SVID_libm_err (underflow)
58/ if x and y are finite and x**y = inf	_SVID_libm_err (overflow)
59
60#include "libm.h"
61LIBM_ANSI_PRAGMA_WEAK(pow,function)
62#include "libm_protos.h"
63#include "xpg6.h"
64
65	.data
66	.align	4
67negzero:
68	.float	-0.0
69one:
70	.float	1.0
71negone:
72	.float	-1.0
73two:
74	.float	2.0
75Snan:
76	.long	0x7f800001
77pinfinity:
78	.long	0x7f800000
79ninfinity:
80	.long	0xff800000
81
82
83	ENTRY(pow)
84	pushl	%ebp
85	movl	%esp,%ebp
86	PIC_SETUP(1)
87
88	fldl	8(%ebp)			/ x
89	fxam				/ determine class of x
90	fnstsw	%ax			/ store status in %ax
91	movb	%ah,%dh			/ %dh <- condition code of x
92
93	fldl	16(%ebp)		/ y , x
94	fxam				/ determine class of y
95	fnstsw	%ax			/ store status in %ax
96	movb	%ah,%dl			/ %dl <- condition code of y
97
98	call	.pow_main		/// LOCAL
99	PIC_WRAPUP
100	leave
101	ret
102
103.pow_main:
104	/ x ** 0 is 1 unless x is 0 or a NaN
105	movb	%dl,%cl
106	andb	$0x45,%cl
107	cmpb	$0x40,%cl		/ C3=1 C2=0 C1=? C0=0 when +-0
108	jne	1f
109	movb	%dh,%cl
110	andb	$0x45,%cl
111	cmpb	$0x40,%cl		/ C3=1 C2=0 C1=? C0=0 when +-0
112	jne	2f
113	/ 0^0
114	pushl	$20
115	jmp	.SVIDerr		/ SVID error handler
1162:
117	cmpb	$0x01,%cl		/// C3=0 C2=0 C1=? C0=1 when +-NaN
118	jne	2f
119	/ NaN^0
120	pushl	$42
121	jmp	.SVIDerr
1222:
123	/ (not 0 or NaN)^0
124	fstp	%st(0)			/ x
125	fstp	%st(0)			/ stack empty
126	fld1				/ 1
127	ret
128
1291:	/ y is not zero
130	PIC_G_LOAD(movzwl,__xpg6,eax)
131	andl	$_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax
132	cmpl	$0,%eax
133	je	1f
134
135	/ C99: 1 ** anything is 1
136	fld1				/ 1, y, x
137	fucomp	%st(2)			/ y, x
138	fnstsw	%ax			/ store status in %ax
139	sahf				/ 80387 flags in %ax to 80386 flags
140	jp	1f			/ so that pow(NaN1,NaN2) returns NaN2
141	jne	1f
142	fstp	%st(0)			/ x
143	ret
144
1451:
146	/ x ** NaN is NaN
147	movb	%dl,%cl
148	andb	$0x45,%cl
149	cmpb	$0x01,%cl		/ C3=0 C2=0 C1=? C0=1 when +-NaN
150	jne	1f
151	fstp	%st(1)			/ y
152	ret
153
1541:	/ y is not NaN
155	/ NaN ** y (except 0) is NaN
156	movb	%dh,%cl
157	andb	$0x45,%cl
158	cmpb	$0x01,%cl		/ C3=0 C2=0 C1=? C0=1 when +-NaN
159	jne	1f
160	fstp	%st(0)			/ x
161	ret
162
1631:	/ x is not NaN
164	/ x ** 1 is x
165	fcoms	PIC_L(one)		/ y , x
166	fnstsw	%ax			/ store status in %ax
167	sahf				/ 80387 flags in %ax to 80386 flags
168	jne	1f
169	fstp	%st(0)			/ x
170	ret
171
1721:	/ y is not 1
173	/ +-(|x| > 1) **  +inf is +inf
174	/ +-(|x| > 1) **  -inf is +0
175	/ +-(|x| < 1) **  +inf is +0
176	/ +-(|x| < 1) **  -inf is +inf
177	/ +-(|x| = 1) ** +-inf is NaN
178	movb	%dl,%cl
179	andb	$0x47,%cl
180	cmpb	$0x05,%cl		/ C3=0 C2=1 C1=0 C0=1 when +inf
181	je	.yispinf
182	cmpb	$0x07,%cl		/ C3=0 C2=1 C1=1 C0=1 when -inf
183	je	.yisninf
184
185	/ +0 ** +y (except 0, NaN)		is +0
186	/ -0 ** +y (except 0, NaN, odd int)	is +0
187	/ +0 ** -y (except 0, NaN)		is +inf (z flag)
188	/ -0 ** -y (except 0, NaN, odd int)	is +inf (z flag)
189	/ -0 ** y (odd int)			is - (+0 ** x)
190	movb	%dh,%cl
191	andb	$0x47,%cl
192	cmpb	$0x40,%cl		/ C3=1 C2=0 C1=0 C0=0 when +0
193	je	.xispzero
194	cmpb	$0x42,%cl		/ C3=1 C2=0 C1=1 C0=0 when -0
195	je	.xisnzero
196
197	/ +inf ** +y (except 0, NaN)	is +inf
198	/ +inf ** -y (except 0, NaN)	is +0
199	/ -inf ** +-y (except 0, NaN)	is -0 ** -+y (NO z flag)
200	movb	%dh,%cl
201	andb	$0x47,%cl
202	cmpb	$0x05,%cl		/ C3=0 C2=1 C1=0 C0=1 when +inf
203	je	.xispinf
204	cmpb	$0x07,%cl		/ C3=0 C2=1 C1=1 C0=1 when -inf
205	je	.xisninf
206
207	/ x ** -1 is 1/x
208	fcoms	PIC_L(negone)		/ y , x
209	fnstsw	%ax			/ store status in %ax
210	sahf				/ 80387 flags in %ax to 80386 flags
211	jne	1f
212	fld	%st(1)			/ x , y , x
213	fdivrs	PIC_L(one)		/ 1/x , y , x
214	jmp	.signok			/ check for over/underflow
215
2161:	/ y is not -1
217	/ x ** 2 is x*x
218	fcoms	PIC_L(two)		/ y , x
219	fnstsw	%ax			/ store status in %ax
220	sahf				/ 80387 flags in %ax to 80386 flags
221	jne	1f
222	fld	%st(1)			/ x , y , x
223	fld	%st(0)			/ x , x , y , x
224	fmulp				/ x^2 , y , x
225	jmp	.signok			/ check for over/underflow
226
2271:	/ y is not 2
228	/ make copies of x & y
229	fld	%st(1)			/ x , y , x
230	fld	%st(1)			/ y , x , y , x
231
232	/ -x ** y (an integer) is (-1)**(y) * (+x)**(y)
233	/ x ** y (x negative & y not integer) is  NaN
234	movl	$0,%ecx			/ track whether to flip sign of result
235	fld	%st(1)			/ x , y , x , y , x
236	ftst				/ compare %st(0) with 0
237	fnstsw	%ax			/ store status in %ax
238	sahf				/ 80387 flags in %ax to 80386 flags
239	fstp	%st(0)			/ y , x , y , x
240	ja	.merge			/ x > 0
241	/ x < 0
242	call	.y_is_int
243	cmpl	$0,%ecx
244	jne	1f
245	/ x < 0, y is non-integral
246	fstp	%st(0)			/ x , y , x
247	fstp	%st(0)			/ y , x
248	pushl	$24
249	jmp	.SVIDerr		/ SVID error handler
250
2511:	/ x < 0 & y = int
252	fxch				/ x , y , y , x
253	fchs				/ px = -x , y , y , x
254	fxch				/ y , px , y , x
255.merge:
256	/ px > 0
257	fxch				/ px , y , y , x
258
259	/ x**y   =   exp(y*ln(x))
260	fyl2x				/ t=y*log2(px) , y , x
261	fld	%st(0)			/ t , t , y , x
262	frndint				/ [t] , t , y , x
263	fxch				/ t , [t] , y , x
264	fucom
265	fnstsw	%ax			/ store status in %ax
266	sahf				/ 80387 flags in %ax to 80386 flags
267	je	1f			/ t is integral
268	fsub    %st(1),%st		/ t-[t] , [t] , y , x
269	f2xm1				/ 2**(t-[t])-1 , [t] , y , x
270	fadds	PIC_L(one)		/ 2**(t-[t]) , [t] , y , x
271	fscale				/ 2**t = px**y , [t] , y , x
272	jmp	2f
2731:
274	fstp    %st(0)                  / t=[t] , y , x
275	fld1                            / 1 , t , y , x
276	fscale                          / 1*2**t = x**y , t , y , x
2772:
278	fstp	%st(1)			/ x**y , y , x
279	cmpl	$1,%ecx
280	jne	.signok
281	fchs				/ change sign since x<0 & y=-int
282.signok:
283	subl	$8,%esp
284	fstpl	(%esp)			/ round to double precision
285	fldl	(%esp)			/ place result on NPX stack
286	addl	$8,%esp
287
288	fxam				/ determine class of x**y
289	fnstsw	%ax			/ store status in %ax
290	andw	$0x4500,%ax
291	/ check for overflow
292	cmpw	$0x0500,%ax		/ C0=0 C1=1 C2=? C3=1 then +-inf
293	jne	1f
294	/ x^y overflows
295	fstp	%st(0)			/ y , x
296	pushl	$21
297	jmp	.SVIDerr
2981:
299	/ check for underflow
300	cmpw	$0x4000,%ax		/ C0=1 C1=0 C2=? C3=0 then +-0
301	jne	1f
302	/ x^y underflows
303	fstp	%st(0)			/ y , x
304	pushl	$22
305	jmp	.SVIDerr
3061:
307	fstp	%st(2)			/ y , x**y
308	fstp	%st(0)			/ x**y
309	ret
310
311/ ------------------------------------------------------------------------
312
313.xispinf:
314	ftst				/ compare %st(0) with 0
315	fnstsw	%ax			/ store status in %ax
316	sahf				/ 80387 flags in %ax to 80386 flags
317	ja	.retpinf		/ y > 0
318	jmp	.retpzero		/ y < 0
319
320.xisninf:
321	/ -inf ** +-y is -0 ** -+y
322	fchs				/ -y , x
323	flds	PIC_L(negzero)		/ -0 , -y , x
324	fstp	%st(2)			/ -y , -0
325	jmp	.xisnzero
326
327.yispinf:
328	fld	%st(1)			/ x , y , x
329	fabs				/ |x| , y , x
330	fcomps	PIC_L(one)		/ y , x
331	fnstsw	%ax			/ store status in %ax
332	sahf				/ 80387 flags in %ax to 80386 flags
333	je	.retponeorinvalid	/ x == -1	C99
334	ja	.retpinf		/ |x| > 1
335	jmp	.retpzero		/ |x| < 1
336
337.yisninf:
338	fld	%st(1)			/ x , y , x
339	fabs				/ |x| , y , x
340	fcomps	PIC_L(one)		/ y , x
341	fnstsw	%ax			/ store status in %ax
342	sahf				/ 80387 flags in %ax to 80386 flags
343	je	.retponeorinvalid	/ x == -1	C99
344	ja	.retpzero		/ |x| > 1
345	jmp	.retpinf		/ |x| < 1
346
347.xispzero:
348	/ y cannot be 0 or NaN ; stack has	y , x
349	ftst				/ compare %st(0) with 0
350	fnstsw	%ax			/ store status in %ax
351	sahf				/ 80387 flags in %ax to 80386 flags
352	ja	.retpzero		/ y > 0
353	/ x = +0 & y < 0
354	jmp	.SVIDzerotoneg
355
356.xisnzero:
357	/ y cannot be 0 or NaN ; stack has	y , x
358	call	.y_is_int
359	cmpl	$1,%ecx
360	jne	1f			/ y is not an odd integer
361	/ y is an odd integer
362	ftst				/ compare %st(0) with 0
363	fnstsw	%ax			/ store status in %ax
364	sahf				/ 80387 flags in %ax to 80386 flags
365	ja	.retnzero		/ y > 0
366	/ x = -0 & y < 0 (odd int)	return -inf (z flag)
367	/ x = -inf & y != 0 or NaN	return -inf (NO z flag)
368	movb	%dh,%cl
369	andb	$0x45,%cl
370	cmpb	$0x05,%cl		/ C3=0 C2=1 C1=? C0=1 when +-inf
371	jne	.SVIDzerotoneg
372	fstp	%st(0)			/ x
373	fstp	%st(0)			/ stack empty
374	flds	PIC_L(ninfinity)	/ -inf
375	ret
376
3771:	/ y is not an odd integer
378	ftst				/ compare %st(0) with 0
379	fnstsw	%ax			/ store status in %ax
380	sahf				/ 80387 flags in %ax to 80386 flags
381	ja	.retpzero		/ y > 0
382	/ x = -0 & y < 0 (not odd int)	return +inf (z flag)
383	/ x = -inf & y not 0 or NaN 	return +inf (NO z flag)
384	movb	%dh,%cl
385	andb	$0x45,%cl
386	cmpb	$0x05,%cl		/ C3=0 C2=1 C1=? C0=1 when +-inf
387	jne	.SVIDzerotoneg
388	jmp	.retpinf		/ return +inf (NO z flag)
389
390.retpzero:
391	fstp	%st(0)			/ x
392	fstp	%st(0)			/ stack empty
393	fldz				/ +0
394	ret
395
396.retnzero:
397	fstp	%st(0)			/ x
398	fstp	%st(0)			/ stack empty
399	flds	PIC_L(negzero)		/ -0
400	ret
401
402.retponeorinvalid:
403	PIC_G_LOAD(movzwl,__xpg6,eax)
404	andl	$_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax
405	cmpl	$0,%eax
406	je	1f
407	fstp	%st(0)			/ x
408	fstp	%st(0)			/ stack empty
409	fld1				/ 1
410	ret
411
4121:
413	fstp	%st(0)			/ x
414	fstp	%st(0)			/ stack empty
415	flds	PIC_L(Snan)		/ Q NaN (i flag)
416	fwait
417	ret
418
419.retpinf:
420	fstp	%st(0)			/ x
421	fstp	%st(0)			/ stack empty
422	flds	PIC_L(pinfinity)	/ +inf
423	ret
424
425.SVIDzerotoneg:
426	pushl	$23
427.SVIDerr:
428	/ At this point the fp stack contains  y , x  and the number
429	/ of the error case has been pushed on the memory stack.
430	subl	$16,%esp
431	fstpl	8(%esp)			/ push y
432	fstpl	(%esp)			/ push x; NPX stack empty
433	call	PIC_F(_SVID_libm_err)	/ report result/error according to SVID
434	addl	$20,%esp
435	ret
436
437/ Set %ecx to 2 if y is an even integer, 1 if y is an odd integer,
438/ 0 otherwise.  Assume y is not zero.  Do not raise inexact or modify
439/ %edx.
440.y_is_int:
441	movl	20(%ebp),%eax
442	andl	$0x7fffffff,%eax	/ |y|
443	cmpl	$0x43400000,%eax
444	jae	1f			/ |y| >= 2^53, an even int
445	cmpl	$0x3ff00000,%eax
446	jb	2f			/ |y| < 1, can't be an int
447	movl	%eax,%ecx
448	sarl	$20,%ecx
449	subl	$0x433,%ecx
450	negl	%ecx			/ 52 - unbiased exponent of y
451	movl	16(%ebp),%eax
452	bsfl	%eax,%eax		/ index of least sig. 1 bit
453	jne	3f			/ jump if 1 bit found
454	movl	20(%ebp),%eax
455	bsfl	%eax,%eax
456	addl	$32,%eax		/ 32 + index of least sig. 1 bit
4573:
458	cmpl	%ecx,%eax
459	jb	2f
460	ja	1f
461	movl	$1,%ecx
462	ret
4631:
464	movl	$2,%ecx
465	ret
4662:
467	xorl	%ecx,%ecx
468	ret
469	.align	4
470	SET_SIZE(pow)
471