xref: /illumos-gate/usr/src/lib/libm/common/m9x/fmaxf.c (revision 1ec68d33)
1*25c28e83SPiotr Jasiukajtis /*
2*25c28e83SPiotr Jasiukajtis  * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis  *
4*25c28e83SPiotr Jasiukajtis  * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis  * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis  * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis  *
8*25c28e83SPiotr Jasiukajtis  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis  * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis  * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis  * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis  *
13*25c28e83SPiotr Jasiukajtis  * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis  * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis  * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis  * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis  *
19*25c28e83SPiotr Jasiukajtis  * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis  */
21*25c28e83SPiotr Jasiukajtis 
22*25c28e83SPiotr Jasiukajtis /*
23*25c28e83SPiotr Jasiukajtis  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24*25c28e83SPiotr Jasiukajtis  */
25*25c28e83SPiotr Jasiukajtis /*
26*25c28e83SPiotr Jasiukajtis  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27*25c28e83SPiotr Jasiukajtis  * Use is subject to license terms.
28*25c28e83SPiotr Jasiukajtis  */
29*25c28e83SPiotr Jasiukajtis 
30*25c28e83SPiotr Jasiukajtis #pragma weak fmaxf = __fmaxf
31*25c28e83SPiotr Jasiukajtis 
32*25c28e83SPiotr Jasiukajtis /*
33*25c28e83SPiotr Jasiukajtis  * fmax(x,y) returns the larger of x and y.  If just one of the
34*25c28e83SPiotr Jasiukajtis  * arguments is NaN, fmax returns the other argument.  If both
35*25c28e83SPiotr Jasiukajtis  * arguments are NaN, fmax returns NaN (ideally, one of the
36*25c28e83SPiotr Jasiukajtis  * argument NaNs).
37*25c28e83SPiotr Jasiukajtis  *
38*25c28e83SPiotr Jasiukajtis  * C99 does not require that fmax(-0,+0) = fmax(+0,-0) = +0, but
39*25c28e83SPiotr Jasiukajtis  * ideally fmax should satisfy this.
40*25c28e83SPiotr Jasiukajtis  *
41*25c28e83SPiotr Jasiukajtis  * C99 makes no mention of exceptions for fmax.  I suppose ideally
42*25c28e83SPiotr Jasiukajtis  * either fmax never raises any exceptions or else it raises the
43*25c28e83SPiotr Jasiukajtis  * invalid operation exception if and only if some argument is a
44*25c28e83SPiotr Jasiukajtis  * signaling NaN.  In the former case, fmax should always return
45*25c28e83SPiotr Jasiukajtis  * one of its arguments.  In the latter, fmax shouldn't return a
46*25c28e83SPiotr Jasiukajtis  * signaling NaN, although when both arguments are signaling NaNs,
47*25c28e83SPiotr Jasiukajtis  * this ideal is at odds with the stipulation that fmax should
48*25c28e83SPiotr Jasiukajtis  * always return one of its arguments.
49*25c28e83SPiotr Jasiukajtis  *
50*25c28e83SPiotr Jasiukajtis  * Commutativity of fmax follows from the properties listed above
51*25c28e83SPiotr Jasiukajtis  * except when both arguments are NaN.  In that case, fmax may be
52*25c28e83SPiotr Jasiukajtis  * declared commutative by fiat because there is no portable way
53*25c28e83SPiotr Jasiukajtis  * to tell different NaNs apart.  Ideally fmax would be truly com-
54*25c28e83SPiotr Jasiukajtis  * mutative for all arguments.
55*25c28e83SPiotr Jasiukajtis  *
56*25c28e83SPiotr Jasiukajtis  * On SPARC V8, fmax must involve tests and branches.  Ideally,
57*25c28e83SPiotr Jasiukajtis  * an implementation on SPARC V9 should avoid branching, using
58*25c28e83SPiotr Jasiukajtis  * conditional moves instead where necessary, and be as efficient
59*25c28e83SPiotr Jasiukajtis  * as possible in its use of other resources.
60*25c28e83SPiotr Jasiukajtis  *
61*25c28e83SPiotr Jasiukajtis  * It appears to be impossible to attain all of the aforementioned
62*25c28e83SPiotr Jasiukajtis  * ideals simultaneously.  The implementation below satisfies the
63*25c28e83SPiotr Jasiukajtis  * following (on SPARC):
64*25c28e83SPiotr Jasiukajtis  *
65*25c28e83SPiotr Jasiukajtis  * 1. fmax(x,y) returns the larger of x and y if neither x nor y
66*25c28e83SPiotr Jasiukajtis  *    is NaN and the non-NaN argument if just one of x or y is NaN.
67*25c28e83SPiotr Jasiukajtis  *    If both x and y are NaN, fmax(x,y) returns x unchanged.
68*25c28e83SPiotr Jasiukajtis  * 2. fmax(-0,+0) = fmax(+0,-0) = +0.
69*25c28e83SPiotr Jasiukajtis  * 3. If either argument is a signaling NaN, fmax raises the invalid
70*25c28e83SPiotr Jasiukajtis  *    operation exception.  Otherwise, it raises no exceptions.
71*25c28e83SPiotr Jasiukajtis  */
72*25c28e83SPiotr Jasiukajtis 
73*25c28e83SPiotr Jasiukajtis #include "libm.h"	/* for isgreaterequal macro */
74*25c28e83SPiotr Jasiukajtis 
75*25c28e83SPiotr Jasiukajtis float
__fmaxf(float x,float y)76*25c28e83SPiotr Jasiukajtis __fmaxf(float x, float y) {
77*25c28e83SPiotr Jasiukajtis 	/*
78*25c28e83SPiotr Jasiukajtis 	 * On SPARC v8plus/v9, this could be implemented as follows
79*25c28e83SPiotr Jasiukajtis 	 * (assuming %f0 = x, %f1 = y, return value left in %f0):
80*25c28e83SPiotr Jasiukajtis 	 *
81*25c28e83SPiotr Jasiukajtis 	 * fcmps	%fcc0,%f1,%f1
82*25c28e83SPiotr Jasiukajtis 	 * fmovsu	%fcc0,%f0,%f1
83*25c28e83SPiotr Jasiukajtis 	 * fcmps	%fcc0,%f0,%f1
84*25c28e83SPiotr Jasiukajtis 	 * fmovsul	%fcc0,%f1,%f0
85*25c28e83SPiotr Jasiukajtis 	 * st		%f0,[x]
86*25c28e83SPiotr Jasiukajtis 	 * st		%f1,[y]
87*25c28e83SPiotr Jasiukajtis 	 * ld		[x],%l0
88*25c28e83SPiotr Jasiukajtis 	 * ld		[y],%l1
89*25c28e83SPiotr Jasiukajtis 	 * and		%l0,%l1,%l2
90*25c28e83SPiotr Jasiukajtis 	 * sethi	%hi(0x80000000),%l3
91*25c28e83SPiotr Jasiukajtis 	 * andn		%l3,%l2,%l2
92*25c28e83SPiotr Jasiukajtis 	 * andn		%l0,%l2,%l0
93*25c28e83SPiotr Jasiukajtis 	 * st		%l0,[x]
94*25c28e83SPiotr Jasiukajtis 	 * ld		[x],%f0
95*25c28e83SPiotr Jasiukajtis 	 *
96*25c28e83SPiotr Jasiukajtis 	 * If VIS instructions are available, use this code instead:
97*25c28e83SPiotr Jasiukajtis 	 *
98*25c28e83SPiotr Jasiukajtis 	 * fcmps	%fcc0,%f1,%f1
99*25c28e83SPiotr Jasiukajtis 	 * fmovsu	%fcc0,%f0,%f1
100*25c28e83SPiotr Jasiukajtis 	 * fcmps	%fcc0,%f0,%f1
101*25c28e83SPiotr Jasiukajtis 	 * fmovsul	%fcc0,%f1,%f0
102*25c28e83SPiotr Jasiukajtis 	 * fands	%f0,%f1,%f2
103*25c28e83SPiotr Jasiukajtis 	 * fzeros	%f3
104*25c28e83SPiotr Jasiukajtis 	 * fnegs	%f3,%f3
105*25c28e83SPiotr Jasiukajtis 	 * fandnot2s %f3,%f2,%f2
106*25c28e83SPiotr Jasiukajtis 	 * fandnot2s %f0,%f2,%f0
107*25c28e83SPiotr Jasiukajtis 	 *
108*25c28e83SPiotr Jasiukajtis 	 * If VIS 3.0 instructions are available, use this:
109*25c28e83SPiotr Jasiukajtis 	 *
110*25c28e83SPiotr Jasiukajtis 	 * flcmps	%fcc0,%f0,%f1
111*25c28e83SPiotr Jasiukajtis 	 * fmovslg	%fcc0,%f1,%f0	! move if %fcc0 is 1 or 2
112*25c28e83SPiotr Jasiukajtis 	 */
113*25c28e83SPiotr Jasiukajtis 
114*25c28e83SPiotr Jasiukajtis 	union {
115*25c28e83SPiotr Jasiukajtis 		unsigned i;
116*25c28e83SPiotr Jasiukajtis 		float f;
117*25c28e83SPiotr Jasiukajtis 	} xx, yy;
118*25c28e83SPiotr Jasiukajtis 	unsigned s;
119*25c28e83SPiotr Jasiukajtis 
120*25c28e83SPiotr Jasiukajtis 	/* if y is nan, replace it by x */
121*25c28e83SPiotr Jasiukajtis 	if (y != y)
122*25c28e83SPiotr Jasiukajtis 		y = x;
123*25c28e83SPiotr Jasiukajtis 
124*25c28e83SPiotr Jasiukajtis 	/* if x is nan, replace it by y */
125*25c28e83SPiotr Jasiukajtis 	if (x != x)
126*25c28e83SPiotr Jasiukajtis 		x = y;
127*25c28e83SPiotr Jasiukajtis 
128*25c28e83SPiotr Jasiukajtis 	/* At this point, x and y are either both numeric, or both NaN */
129*25c28e83SPiotr Jasiukajtis 	if (!isnan(x) && !isgreaterequal(x, y))
130*25c28e83SPiotr Jasiukajtis 		x = y;
131*25c28e83SPiotr Jasiukajtis 
132*25c28e83SPiotr Jasiukajtis 	/*
133*25c28e83SPiotr Jasiukajtis 	 * clear the sign of the result if either x or y has its sign clear
134*25c28e83SPiotr Jasiukajtis 	 */
135*25c28e83SPiotr Jasiukajtis 	xx.f = x;
136*25c28e83SPiotr Jasiukajtis 	yy.f = y;
137*25c28e83SPiotr Jasiukajtis 	s = ~(xx.i & yy.i) & 0x80000000;
138*25c28e83SPiotr Jasiukajtis 	xx.i &= ~s;
139*25c28e83SPiotr Jasiukajtis 
140*25c28e83SPiotr Jasiukajtis 	return (xx.f);
141*25c28e83SPiotr Jasiukajtis }
142