1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * _D_cplx_div(z, w) returns z / w with infinities handled according
29 * to C99.
30 *
31 * If z and w are both finite and w is nonzero, _D_cplx_div(z, w)
32 * delivers the complex quotient q according to the usual formula:
33 * let a = Re(z), b = Im(z), c = Re(w), and d = Im(w); then q = x +
34 * I * y where x = (a * c + b * d) / r and y = (b * c - a * d) / r
35 * with r = c * c + d * d. This implementation scales to avoid
36 * premature underflow or overflow.
37 *
38 * If z is neither NaN nor zero and w is zero, or if z is infinite
39 * and w is finite and nonzero, _D_cplx_div delivers an infinite
40 * result. If z is finite and w is infinite, _D_cplx_div delivers
41 * a zero result.
42 *
43 * If z and w are both zero or both infinite, or if either z or w is
44 * a complex NaN, _D_cplx_div delivers NaN + I * NaN. C99 doesn't
45 * specify these cases.
46 *
47 * This implementation can raise spurious underflow, overflow, in-
48 * valid operation, inexact, and division-by-zero exceptions. C99
49 * allows this.
50 *
51 * Warning: Do not attempt to "optimize" this code by removing multi-
52 * plications by zero.
53 */
54
55 #if !defined(sparc) && !defined(__sparc)
56 #error This code is for SPARC only
57 #endif
58
59 static union {
60 int i[2];
61 double d;
62 } inf = {
63 0x7ff00000, 0
64 };
65
66 /*
67 * Return +1 if x is +Inf, -1 if x is -Inf, and 0 otherwise
68 */
69 static int
testinf(double x)70 testinf(double x)
71 {
72 union {
73 int i[2];
74 double d;
75 } xx;
76
77 xx.d = x;
78 return (((((xx.i[0] << 1) - 0xffe00000) | xx.i[1]) == 0)?
79 (1 | (xx.i[0] >> 31)) : 0);
80 }
81
82 double _Complex
_D_cplx_div(double _Complex z,double _Complex w)83 _D_cplx_div(double _Complex z, double _Complex w)
84 {
85 double _Complex v;
86 union {
87 int i[2];
88 double d;
89 } aa, bb, cc, dd, ss;
90 double a, b, c, d, r;
91 int ha, hb, hc, hd, hz, hw, hs, i, j;
92
93 /*
94 * The following is equivalent to
95 *
96 * a = creal(z); b = cimag(z);
97 * c = creal(w); d = cimag(w);
98 */
99 a = ((double *)&z)[0];
100 b = ((double *)&z)[1];
101 c = ((double *)&w)[0];
102 d = ((double *)&w)[1];
103
104 /* extract high-order words to estimate |z| and |w| */
105 aa.d = a;
106 bb.d = b;
107 ha = aa.i[0] & ~0x80000000;
108 hb = bb.i[0] & ~0x80000000;
109 hz = (ha > hb)? ha : hb;
110
111 cc.d = c;
112 dd.d = d;
113 hc = cc.i[0] & ~0x80000000;
114 hd = dd.i[0] & ~0x80000000;
115 hw = (hc > hd)? hc : hd;
116
117 /* check for special cases */
118 if (hw >= 0x7ff00000) { /* w is inf or nan */
119 r = 0.0;
120 i = testinf(c);
121 j = testinf(d);
122 if (i | j) { /* w is infinite */
123 /*
124 * "factor out" infinity, being careful to preserve
125 * signs of finite values
126 */
127 c = i? i : ((cc.i[0] < 0)? -0.0 : 0.0);
128 d = j? j : ((dd.i[0] < 0)? -0.0 : 0.0);
129 if (hz >= 0x7fe00000) {
130 /* scale to avoid overflow below */
131 c *= 0.5;
132 d *= 0.5;
133 }
134 }
135 ((double *)&v)[0] = (a * c + b * d) * r;
136 ((double *)&v)[1] = (b * c - a * d) * r;
137 return (v);
138 }
139
140 if (hw < 0x00100000) {
141 /*
142 * This nonsense is needed to work around some SPARC
143 * implementations of nonstandard mode; if both parts
144 * of w are subnormal, multiply them by one to force
145 * them to be flushed to zero when nonstandard mode
146 * is enabled. Sheesh.
147 */
148 cc.d = c = c * 1.0;
149 dd.d = d = d * 1.0;
150 hc = cc.i[0] & ~0x80000000;
151 hd = dd.i[0] & ~0x80000000;
152 hw = (hc > hd)? hc : hd;
153 }
154
155 if (hw == 0 && (cc.i[1] | dd.i[1]) == 0) {
156 /* w is zero; multiply z by 1/Re(w) - I * Im(w) */
157 c = 1.0 / c;
158 i = testinf(a);
159 j = testinf(b);
160 if (i | j) { /* z is infinite */
161 a = i;
162 b = j;
163 }
164 ((double *)&v)[0] = a * c + b * d;
165 ((double *)&v)[1] = b * c - a * d;
166 return (v);
167 }
168
169 if (hz >= 0x7ff00000) { /* z is inf or nan */
170 r = 1.0;
171 i = testinf(a);
172 j = testinf(b);
173 if (i | j) { /* z is infinite */
174 a = i;
175 b = j;
176 r = inf.d;
177 }
178 ((double *)&v)[0] = (a * c + b * d) * r;
179 ((double *)&v)[1] = (b * c - a * d) * r;
180 return (v);
181 }
182
183 /*
184 * Scale c and d to compute 1/|w|^2 and the real and imaginary
185 * parts of the quotient.
186 *
187 * Note that for any s, if we let c' = sc, d' = sd, c'' = sc',
188 * and d'' = sd', then
189 *
190 * (ac'' + bd'') / (c'^2 + d'^2) = (ac + bd) / (c^2 + d^2)
191 *
192 * and similarly for the imaginary part of the quotient. We want
193 * to choose s such that (i) r := 1/(c'^2 + d'^2) can be computed
194 * without overflow or harmful underflow, and (ii) (ac'' + bd'')
195 * and (bc'' - ad'') can be computed without spurious overflow or
196 * harmful underflow. To avoid unnecessary rounding, we restrict
197 * s to a power of two.
198 *
199 * To satisfy (i), we need to choose s such that max(|c'|,|d'|)
200 * is not too far from one. To satisfy (ii), we need to choose
201 * s such that max(|c''|,|d''|) is also not too far from one.
202 * There is some leeway in our choice, but to keep the logic
203 * from getting overly complicated, we simply attempt to roughly
204 * balance these constraints by choosing s so as to make r about
205 * the same size as max(|c''|,|d''|). This corresponds to choos-
206 * ing s to be a power of two near |w|^(-3/4).
207 *
208 * Regarding overflow, observe that if max(|c''|,|d''|) <= 1/2,
209 * then the computation of (ac'' + bd'') and (bc'' - ad'') can-
210 * not overflow; otherwise, the computation of either of these
211 * values can only incur overflow if the true result would be
212 * within a factor of two of the overflow threshold. In other
213 * words, if we bias the choice of s such that at least one of
214 *
215 * max(|c''|,|d''|) <= 1/2 or r >= 2
216 *
217 * always holds, then no undeserved overflow can occur.
218 *
219 * To cope with underflow, note that if r < 2^-53, then any
220 * intermediate results that underflow are insignificant; either
221 * they will be added to normal results, rendering the under-
222 * flow no worse than ordinary roundoff, or they will contribute
223 * to a final result that is smaller than the smallest subnormal
224 * number. Therefore, we need only modify the preceding logic
225 * when z is very small and w is not too far from one. In that
226 * case, we can reduce the effect of any intermediate underflow
227 * to no worse than ordinary roundoff error by choosing s so as
228 * to make max(|c''|,|d''|) large enough that at least one of
229 * (ac'' + bd'') or (bc'' - ad'') is normal.
230 */
231 hs = (((hw >> 2) - hw) + 0x6fd7ffff) & 0xfff00000;
232 if (hz < 0x07200000) { /* |z| < 2^-909 */
233 if (((hw - 0x32800000) | (0x47100000 - hw)) >= 0)
234 hs = (((0x47100000 - hw) >> 1) & 0xfff00000)
235 + 0x3ff00000;
236 }
237 ss.i[0] = hs;
238 ss.i[1] = 0;
239
240 c *= ss.d;
241 d *= ss.d;
242 r = 1.0 / (c * c + d * d);
243
244 c *= ss.d;
245 d *= ss.d;
246 ((double *)&v)[0] = (a * c + b * d) * r;
247 ((double *)&v)[1] = (b * c - a * d) * r;
248 return (v);
249 }
250