/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ .file "__vexp.S" #include "libm.h" RO_DATA /******************************************************************** * vexp() algorithm is from mopt:f_exp.c. Basics are included here * to supplement comments within this file. vexp() has been unrolled * to a depth of 3. Only element 0 is documented. * * Note 1: INVLN2_256, LN2_256H, and LN2_256L were originally scaled by * 2^44 to allow *2^k w/o shifting within the FP registers. These * had to be removed for CHEETAH to avoid the fdtox of a very large * number, which would trap to kernel (2^52). * * Let x = (k + j/256)ln2 + r * then exp(x) = exp(ln2^(k+j/256)) * exp(r) * = 2^k * 2^(j/256) * exp(r) * where r is polynomial approximation * exp(r) = 1 + r + r^2*B1 + r^3*B2 + r^4*B3 * = 1 + r*(1+r*(B1+r*(B2+r*B3))) * let * p = r*(1+r*(B1+r*(B2+r*B3))) ! notice, not quite exp(r) * q = 2^(j/256) (high 64 bits) * t = 2^(j/256) (extra precision) ! both from _TBL_exp_z[] * then * 2^(j/256) * exp(r) = (q+t)(1+p) ~ q + ( t + q*p ) * then actual computation is 2^k * ( q + ( t + q*p ) ) * ********************************************************************/ .align 16 TBL: .word 0x3ff00000,0x00000000 .word 0x00000000,0x00000000 .word 0x3ff00b1a,0xfa5abcbf .word 0xbc84f6b2,0xa7609f71 .word 0x3ff0163d,0xa9fb3335 .word 0x3c9b6129,0x9ab8cdb7 .word 0x3ff02168,0x143b0281 .word 0xbc82bf31,0x0fc54eb6 .word 0x3ff02c9a,0x3e778061 .word 0xbc719083,0x535b085d .word 0x3ff037d4,0x2e11bbcc .word 0x3c656811,0xeeade11a .word 0x3ff04315,0xe86e7f85 .word 0xbc90a31c,0x1977c96e .word 0x3ff04e5f,0x72f654b1 .word 0x3c84c379,0x3aa0d08c .word 0x3ff059b0,0xd3158574 .word 0x3c8d73e2,0xa475b465 .word 0x3ff0650a,0x0e3c1f89 .word 0xbc95cb7b,0x5799c396 .word 0x3ff0706b,0x29ddf6de .word 0xbc8c91df,0xe2b13c26 .word 0x3ff07bd4,0x2b72a836 .word 0x3c832334,0x54458700 .word 0x3ff08745,0x18759bc8 .word 0x3c6186be,0x4bb284ff .word 0x3ff092bd,0xf66607e0 .word 0xbc968063,0x800a3fd1 .word 0x3ff09e3e,0xcac6f383 .word 0x3c914878,0x18316136 .word 0x3ff0a9c7,0x9b1f3919 .word 0x3c85d16c,0x873d1d38 .word 0x3ff0b558,0x6cf9890f .word 0x3c98a62e,0x4adc610a .word 0x3ff0c0f1,0x45e46c85 .word 0x3c94f989,0x06d21cef .word 0x3ff0cc92,0x2b7247f7 .word 0x3c901edc,0x16e24f71 .word 0x3ff0d83b,0x23395dec .word 0xbc9bc14d,0xe43f316a .word 0x3ff0e3ec,0x32d3d1a2 .word 0x3c403a17,0x27c57b53 .word 0x3ff0efa5,0x5fdfa9c5 .word 0xbc949db9,0xbc54021b .word 0x3ff0fb66,0xaffed31b .word 0xbc6b9bed,0xc44ebd7b .word 0x3ff10730,0x28d7233e .word 0x3c8d46eb,0x1692fdd5 .word 0x3ff11301,0xd0125b51 .word 0xbc96c510,0x39449b3a .word 0x3ff11edb,0xab5e2ab6 .word 0xbc9ca454,0xf703fb72 .word 0x3ff12abd,0xc06c31cc .word 0xbc51b514,0xb36ca5c7 .word 0x3ff136a8,0x14f204ab .word 0xbc67108f,0xba48dcf0 .word 0x3ff1429a,0xaea92de0 .word 0xbc932fbf,0x9af1369e .word 0x3ff14e95,0x934f312e .word 0xbc8b91e8,0x39bf44ab .word 0x3ff15a98,0xc8a58e51 .word 0x3c82406a,0xb9eeab0a .word 0x3ff166a4,0x5471c3c2 .word 0x3c58f23b,0x82ea1a32 .word 0x3ff172b8,0x3c7d517b .word 0xbc819041,0xb9d78a76 .word 0x3ff17ed4,0x8695bbc0 .word 0x3c709e3f,0xe2ac5a64 .word 0x3ff18af9,0x388c8dea .word 0xbc911023,0xd1970f6c .word 0x3ff19726,0x58375d2f .word 0x3c94aadd,0x85f17e08 .word 0x3ff1a35b,0xeb6fcb75 .word 0x3c8e5b4c,0x7b4968e4 .word 0x3ff1af99,0xf8138a1c .word 0x3c97bf85,0xa4b69280 .word 0x3ff1bbe0,0x84045cd4 .word 0xbc995386,0x352ef607 .word 0x3ff1c82f,0x95281c6b .word 0x3c900977,0x8010f8c9 .word 0x3ff1d487,0x3168b9aa .word 0x3c9e016e,0x00a2643c .word 0x3ff1e0e7,0x5eb44027 .word 0xbc96fdd8,0x088cb6de .word 0x3ff1ed50,0x22fcd91d .word 0xbc91df98,0x027bb78c .word 0x3ff1f9c1,0x8438ce4d .word 0xbc9bf524,0xa097af5c .word 0x3ff2063b,0x88628cd6 .word 0x3c8dc775,0x814a8494 .word 0x3ff212be,0x3578a819 .word 0x3c93592d,0x2cfcaac9 .word 0x3ff21f49,0x917ddc96 .word 0x3c82a97e,0x9494a5ee .word 0x3ff22bdd,0xa27912d1 .word 0x3c8d34fb,0x5577d69e .word 0x3ff2387a,0x6e756238 .word 0x3c99b07e,0xb6c70573 .word 0x3ff2451f,0xfb82140a .word 0x3c8acfcc,0x911ca996 .word 0x3ff251ce,0x4fb2a63f .word 0x3c8ac155,0xbef4f4a4 .word 0x3ff25e85,0x711ece75 .word 0x3c93e1a2,0x4ac31b2c .word 0x3ff26b45,0x65e27cdd .word 0x3c82bd33,0x9940e9d9 .word 0x3ff2780e,0x341ddf29 .word 0x3c9e067c,0x05f9e76c .word 0x3ff284df,0xe1f56381 .word 0xbc9a4c3a,0x8c3f0d7e .word 0x3ff291ba,0x7591bb70 .word 0xbc82cc72,0x28401cbc .word 0x3ff29e9d,0xf51fdee1 .word 0x3c8612e8,0xafad1255 .word 0x3ff2ab8a,0x66d10f13 .word 0xbc995743,0x191690a7 .word 0x3ff2b87f,0xd0dad990 .word 0xbc410adc,0xd6381aa4 .word 0x3ff2c57e,0x39771b2f .word 0xbc950145,0xa6eb5124 .word 0x3ff2d285,0xa6e4030b .word 0x3c900247,0x54db41d5 .word 0x3ff2df96,0x1f641589 .word 0x3c9d16cf,0xfbbce198 .word 0x3ff2ecaf,0xa93e2f56 .word 0x3c71ca0f,0x45d52383 .word 0x3ff2f9d2,0x4abd886b .word 0xbc653c55,0x532bda93 .word 0x3ff306fe,0x0a31b715 .word 0x3c86f46a,0xd23182e4 .word 0x3ff31432,0xedeeb2fd .word 0x3c8959a3,0xf3f3fcd0 .word 0x3ff32170,0xfc4cd831 .word 0x3c8a9ce7,0x8e18047c .word 0x3ff32eb8,0x3ba8ea32 .word 0xbc9c45e8,0x3cb4f318 .word 0x3ff33c08,0xb26416ff .word 0x3c932721,0x843659a6 .word 0x3ff34962,0x66e3fa2d .word 0xbc835a75,0x930881a4 .word 0x3ff356c5,0x5f929ff1 .word 0xbc8b5cee,0x5c4e4628 .word 0x3ff36431,0xa2de883b .word 0xbc8c3144,0xa06cb85e .word 0x3ff371a7,0x373aa9cb .word 0xbc963aea,0xbf42eae2 .word 0x3ff37f26,0x231e754a .word 0xbc99f5ca,0x9eceb23c .word 0x3ff38cae,0x6d05d866 .word 0xbc9e958d,0x3c9904bd .word 0x3ff39a40,0x1b7140ef .word 0xbc99a9a5,0xfc8e2934 .word 0x3ff3a7db,0x34e59ff7 .word 0xbc75e436,0xd661f5e3 .word 0x3ff3b57f,0xbfec6cf4 .word 0x3c954c66,0xe26fff18 .word 0x3ff3c32d,0xc313a8e5 .word 0xbc9efff8,0x375d29c3 .word 0x3ff3d0e5,0x44ede173 .word 0x3c7fe8d0,0x8c284c71 .word 0x3ff3dea6,0x4c123422 .word 0x3c8ada09,0x11f09ebc .word 0x3ff3ec70,0xdf1c5175 .word 0xbc8af663,0x7b8c9bca .word 0x3ff3fa45,0x04ac801c .word 0xbc97d023,0xf956f9f3 .word 0x3ff40822,0xc367a024 .word 0x3c8bddf8,0xb6f4d048 .word 0x3ff4160a,0x21f72e2a .word 0xbc5ef369,0x1c309278 .word 0x3ff423fb,0x2709468a .word 0xbc98462d,0xc0b314dd .word 0x3ff431f5,0xd950a897 .word 0xbc81c7dd,0xe35f7998 .word 0x3ff43ffa,0x3f84b9d4 .word 0x3c8880be,0x9704c002 .word 0x3ff44e08,0x6061892d .word 0x3c489b7a,0x04ef80d0 .word 0x3ff45c20,0x42a7d232 .word 0xbc686419,0x82fb1f8e .word 0x3ff46a41,0xed1d0057 .word 0x3c9c944b,0xd1648a76 .word 0x3ff4786d,0x668b3237 .word 0xbc9c20f0,0xed445733 .word 0x3ff486a2,0xb5c13cd0 .word 0x3c73c1a3,0xb69062f0 .word 0x3ff494e1,0xe192aed2 .word 0xbc83b289,0x5e499ea0 .word 0x3ff4a32a,0xf0d7d3de .word 0x3c99cb62,0xf3d1be56 .word 0x3ff4b17d,0xea6db7d7 .word 0xbc8125b8,0x7f2897f0 .word 0x3ff4bfda,0xd5362a27 .word 0x3c7d4397,0xafec42e2 .word 0x3ff4ce41,0xb817c114 .word 0x3c905e29,0x690abd5d .word 0x3ff4dcb2,0x99fddd0d .word 0x3c98ecdb,0xbc6a7833 .word 0x3ff4eb2d,0x81d8abff .word 0xbc95257d,0x2e5d7a52 .word 0x3ff4f9b2,0x769d2ca7 .word 0xbc94b309,0xd25957e3 .word 0x3ff50841,0x7f4531ee .word 0x3c7a249b,0x49b7465f .word 0x3ff516da,0xa2cf6642 .word 0xbc8f7685,0x69bd93ee .word 0x3ff5257d,0xe83f4eef .word 0xbc7c998d,0x43efef71 .word 0x3ff5342b,0x569d4f82 .word 0xbc807abe,0x1db13cac .word 0x3ff542e2,0xf4f6ad27 .word 0x3c87926d,0x192d5f7e .word 0x3ff551a4,0xca5d920f .word 0xbc8d689c,0xefede59a .word 0x3ff56070,0xdde910d2 .word 0xbc90fb6e,0x168eebf0 .word 0x3ff56f47,0x36b527da .word 0x3c99bb2c,0x011d93ad .word 0x3ff57e27,0xdbe2c4cf .word 0xbc90b98c,0x8a57b9c4 .word 0x3ff58d12,0xd497c7fd .word 0x3c8295e1,0x5b9a1de8 .word 0x3ff59c08,0x27ff07cc .word 0xbc97e2ce,0xe467e60f .word 0x3ff5ab07,0xdd485429 .word 0x3c96324c,0x054647ad .word 0x3ff5ba11,0xfba87a03 .word 0xbc9b77a1,0x4c233e1a .word 0x3ff5c926,0x8a5946b7 .word 0x3c3c4b1b,0x816986a2 .word 0x3ff5d845,0x90998b93 .word 0xbc9cd6a7,0xa8b45642 .word 0x3ff5e76f,0x15ad2148 .word 0x3c9ba6f9,0x3080e65e .word 0x3ff5f6a3,0x20dceb71 .word 0xbc89eadd,0xe3cdcf92 .word 0x3ff605e1,0xb976dc09 .word 0xbc93e242,0x9b56de47 .word 0x3ff6152a,0xe6cdf6f4 .word 0x3c9e4b3e,0x4ab84c27 .word 0x3ff6247e,0xb03a5585 .word 0xbc9383c1,0x7e40b497 .word 0x3ff633dd,0x1d1929fd .word 0x3c984710,0xbeb964e5 .word 0x3ff64346,0x34ccc320 .word 0xbc8c483c,0x759d8932 .word 0x3ff652b9,0xfebc8fb7 .word 0xbc9ae3d5,0xc9a73e08 .word 0x3ff66238,0x82552225 .word 0xbc9bb609,0x87591c34 .word 0x3ff671c1,0xc70833f6 .word 0xbc8e8732,0x586c6134 .word 0x3ff68155,0xd44ca973 .word 0x3c6038ae,0x44f73e65 .word 0x3ff690f4,0xb19e9538 .word 0x3c8804bd,0x9aeb445c .word 0x3ff6a09e,0x667f3bcd .word 0xbc9bdd34,0x13b26456 .word 0x3ff6b052,0xfa75173e .word 0x3c7a38f5,0x2c9a9d0e .word 0x3ff6c012,0x750bdabf .word 0xbc728956,0x67ff0b0d .word 0x3ff6cfdc,0xddd47645 .word 0x3c9c7aa9,0xb6f17309 .word 0x3ff6dfb2,0x3c651a2f .word 0xbc6bbe3a,0x683c88ab .word 0x3ff6ef92,0x98593ae5 .word 0xbc90b974,0x9e1ac8b2 .word 0x3ff6ff7d,0xf9519484 .word 0xbc883c0f,0x25860ef6 .word 0x3ff70f74,0x66f42e87 .word 0x3c59d644,0xd45aa65f .word 0x3ff71f75,0xe8ec5f74 .word 0xbc816e47,0x86887a99 .word 0x3ff72f82,0x86ead08a .word 0xbc920aa0,0x2cd62c72 .word 0x3ff73f9a,0x48a58174 .word 0xbc90a8d9,0x6c65d53c .word 0x3ff74fbd,0x35d7cbfd .word 0x3c9047fd,0x618a6e1c .word 0x3ff75feb,0x564267c9 .word 0xbc902459,0x57316dd3 .word 0x3ff77024,0xb1ab6e09 .word 0x3c9b7877,0x169147f8 .word 0x3ff78069,0x4fde5d3f .word 0x3c9866b8,0x0a02162c .word 0x3ff790b9,0x38ac1cf6 .word 0x3c9349a8,0x62aadd3e .word 0x3ff7a114,0x73eb0187 .word 0xbc841577,0xee04992f .word 0x3ff7b17b,0x0976cfdb .word 0xbc9bebb5,0x8468dc88 .word 0x3ff7c1ed,0x0130c132 .word 0x3c9f124c,0xd1164dd6 .word 0x3ff7d26a,0x62ff86f0 .word 0x3c91bddb,0xfb72b8b4 .word 0x3ff7e2f3,0x36cf4e62 .word 0x3c705d02,0xba15797e .word 0x3ff7f387,0x8491c491 .word 0xbc807f11,0xcf9311ae .word 0x3ff80427,0x543e1a12 .word 0xbc927c86,0x626d972b .word 0x3ff814d2,0xadd106d9 .word 0x3c946437,0x0d151d4d .word 0x3ff82589,0x994cce13 .word 0xbc9d4c1d,0xd41532d8 .word 0x3ff8364c,0x1eb941f7 .word 0x3c999b9a,0x31df2bd5 .word 0x3ff8471a,0x4623c7ad .word 0xbc88d684,0xa341cdfb .word 0x3ff857f4,0x179f5b21 .word 0xbc5ba748,0xf8b216d0 .word 0x3ff868d9,0x9b4492ec .word 0x3ca01c83,0xb21584a3 .word 0x3ff879ca,0xd931a436 .word 0x3c85d2d7,0xd2db47bc .word 0x3ff88ac7,0xd98a6699 .word 0x3c9994c2,0xf37cb53a .word 0x3ff89bd0,0xa478580f .word 0x3c9d5395,0x4475202a .word 0x3ff8ace5,0x422aa0db .word 0x3c96e9f1,0x56864b27 .word 0x3ff8be05,0xbad61778 .word 0x3c9ecb5e,0xfc43446e .word 0x3ff8cf32,0x16b5448c .word 0xbc70d55e,0x32e9e3aa .word 0x3ff8e06a,0x5e0866d9 .word 0xbc97114a,0x6fc9b2e6 .word 0x3ff8f1ae,0x99157736 .word 0x3c85cc13,0xa2e3976c .word 0x3ff902fe,0xd0282c8a .word 0x3c9592ca,0x85fe3fd2 .word 0x3ff9145b,0x0b91ffc6 .word 0xbc9dd679,0x2e582524 .word 0x3ff925c3,0x53aa2fe2 .word 0xbc83455f,0xa639db7f .word 0x3ff93737,0xb0cdc5e5 .word 0xbc675fc7,0x81b57ebc .word 0x3ff948b8,0x2b5f98e5 .word 0xbc8dc3d6,0x797d2d99 .word 0x3ff95a44,0xcbc8520f .word 0xbc764b7c,0x96a5f039 .word 0x3ff96bdd,0x9a7670b3 .word 0xbc5ba596,0x7f19c896 .word 0x3ff97d82,0x9fde4e50 .word 0xbc9d185b,0x7c1b85d0 .word 0x3ff98f33,0xe47a22a2 .word 0x3c7cabda,0xa24c78ed .word 0x3ff9a0f1,0x70ca07ba .word 0xbc9173bd,0x91cee632 .word 0x3ff9b2bb,0x4d53fe0d .word 0xbc9dd84e,0x4df6d518 .word 0x3ff9c491,0x82a3f090 .word 0x3c7c7c46,0xb071f2be .word 0x3ff9d674,0x194bb8d5 .word 0xbc9516be,0xa3dd8233 .word 0x3ff9e863,0x19e32323 .word 0x3c7824ca,0x78e64c6e .word 0x3ff9fa5e,0x8d07f29e .word 0xbc84a9ce,0xaaf1face .word 0x3ffa0c66,0x7b5de565 .word 0xbc935949,0x5d1cd533 .word 0x3ffa1e7a,0xed8eb8bb .word 0x3c9c6618,0xee8be70e .word 0x3ffa309b,0xec4a2d33 .word 0x3c96305c,0x7ddc36ab .word 0x3ffa42c9,0x80460ad8 .word 0xbc9aa780,0x589fb120 .word 0x3ffa5503,0xb23e255d .word 0xbc9d2f6e,0xdb8d41e1 .word 0x3ffa674a,0x8af46052 .word 0x3c650f56,0x30670366 .word 0x3ffa799e,0x1330b358 .word 0x3c9bcb7e,0xcac563c6 .word 0x3ffa8bfe,0x53c12e59 .word 0xbc94f867,0xb2ba15a8 .word 0x3ffa9e6b,0x5579fdbf .word 0x3c90fac9,0x0ef7fd31 .word 0x3ffab0e5,0x21356eba .word 0x3c889c31,0xdae94544 .word 0x3ffac36b,0xbfd3f37a .word 0xbc8f9234,0xcae76cd0 .word 0x3ffad5ff,0x3a3c2774 .word 0x3c97ef3b,0xb6b1b8e4 .word 0x3ffae89f,0x995ad3ad .word 0x3c97a1cd,0x345dcc81 .word 0x3ffafb4c,0xe622f2ff .word 0xbc94b2fc,0x0f315ecc .word 0x3ffb0e07,0x298db666 .word 0xbc9bdef5,0x4c80e425 .word 0x3ffb20ce,0x6c9a8952 .word 0x3c94dd02,0x4a0756cc .word 0x3ffb33a2,0xb84f15fb .word 0xbc62805e,0x3084d708 .word 0x3ffb4684,0x15b749b1 .word 0xbc7f763d,0xe9df7c90 .word 0x3ffb5972,0x8de5593a .word 0xbc9c71df,0xbbba6de3 .word 0x3ffb6c6e,0x29f1c52a .word 0x3c92a8f3,0x52883f6e .word 0x3ffb7f76,0xf2fb5e47 .word 0xbc75584f,0x7e54ac3b .word 0x3ffb928c,0xf22749e4 .word 0xbc9b7216,0x54cb65c6 .word 0x3ffba5b0,0x30a1064a .word 0xbc9efcd3,0x0e54292e .word 0x3ffbb8e0,0xb79a6f1f .word 0xbc3f52d1,0xc9696205 .word 0x3ffbcc1e,0x904bc1d2 .word 0x3c823dd0,0x7a2d9e84 .word 0x3ffbdf69,0xc3f3a207 .word 0xbc3c2623,0x60ea5b52 .word 0x3ffbf2c2,0x5bd71e09 .word 0xbc9efdca,0x3f6b9c73 .word 0x3ffc0628,0x6141b33d .word 0xbc8d8a5a,0xa1fbca34 .word 0x3ffc199b,0xdd85529c .word 0x3c811065,0x895048dd .word 0x3ffc2d1c,0xd9fa652c .word 0xbc96e516,0x17c8a5d7 .word 0x3ffc40ab,0x5fffd07a .word 0x3c9b4537,0xe083c60a .word 0x3ffc5447,0x78fafb22 .word 0x3c912f07,0x2493b5af .word 0x3ffc67f1,0x2e57d14b .word 0x3c92884d,0xff483cad .word 0x3ffc7ba8,0x8988c933 .word 0xbc8e76bb,0xbe255559 .word 0x3ffc8f6d,0x9406e7b5 .word 0x3c71acbc,0x48805c44 .word 0x3ffca340,0x5751c4db .word 0xbc87f2be,0xd10d08f4 .word 0x3ffcb720,0xdcef9069 .word 0x3c7503cb,0xd1e949db .word 0x3ffccb0f,0x2e6d1675 .word 0xbc7d220f,0x86009093 .word 0x3ffcdf0b,0x555dc3fa .word 0xbc8dd83b,0x53829d72 .word 0x3ffcf315,0x5b5bab74 .word 0xbc9a08e9,0xb86dff57 .word 0x3ffd072d,0x4a07897c .word 0xbc9cbc37,0x43797a9c .word 0x3ffd1b53,0x2b08c968 .word 0x3c955636,0x219a36ee .word 0x3ffd2f87,0x080d89f2 .word 0xbc9d487b,0x719d8578 .word 0x3ffd43c8,0xeacaa1d6 .word 0x3c93db53,0xbf5a1614 .word 0x3ffd5818,0xdcfba487 .word 0x3c82ed02,0xd75b3706 .word 0x3ffd6c76,0xe862e6d3 .word 0x3c5fe87a,0x4a8165a0 .word 0x3ffd80e3,0x16c98398 .word 0xbc911ec1,0x8beddfe8 .word 0x3ffd955d,0x71ff6075 .word 0x3c9a052d,0xbb9af6be .word 0x3ffda9e6,0x03db3285 .word 0x3c9c2300,0x696db532 .word 0x3ffdbe7c,0xd63a8315 .word 0xbc9b76f1,0x926b8be4 .word 0x3ffdd321,0xf301b460 .word 0x3c92da57,0x78f018c2 .word 0x3ffde7d5,0x641c0658 .word 0xbc9ca552,0x8e79ba8f .word 0x3ffdfc97,0x337b9b5f .word 0xbc91a5cd,0x4f184b5c .word 0x3ffe1167,0x6b197d17 .word 0xbc72b529,0xbd5c7f44 .word 0x3ffe2646,0x14f5a129 .word 0xbc97b627,0x817a1496 .word 0x3ffe3b33,0x3b16ee12 .word 0xbc99f4a4,0x31fdc68a .word 0x3ffe502e,0xe78b3ff6 .word 0x3c839e89,0x80a9cc8f .word 0x3ffe6539,0x24676d76 .word 0xbc863ff8,0x7522b734 .word 0x3ffe7a51,0xfbc74c83 .word 0x3c92d522,0xca0c8de2 .word 0x3ffe8f79,0x77cdb740 .word 0xbc910894,0x80b054b1 .word 0x3ffea4af,0xa2a490da .word 0xbc9e9c23,0x179c2893 .word 0x3ffeb9f4,0x867cca6e .word 0x3c94832f,0x2293e4f2 .word 0x3ffecf48,0x2d8e67f1 .word 0xbc9c93f3,0xb411ad8c .word 0x3ffee4aa,0xa2188510 .word 0x3c91c68d,0xa487568d .word 0x3ffefa1b,0xee615a27 .word 0x3c9dc7f4,0x86a4b6b0 .word 0x3fff0f9c,0x1cb6412a .word 0xbc932200,0x65181d45 .word 0x3fff252b,0x376bba97 .word 0x3c93a1a5,0xbf0d8e43 .word 0x3fff3ac9,0x48dd7274 .word 0xbc795a5a,0x3ed837de .word 0x3fff5076,0x5b6e4540 .word 0x3c99d3e1,0x2dd8a18b .word 0x3fff6632,0x798844f8 .word 0x3c9fa37b,0x3539343e .word 0x3fff7bfd,0xad9cbe14 .word 0xbc9dbb12,0xd006350a .word 0x3fff91d8,0x02243c89 .word 0xbc612ea8,0xa779f689 .word 0x3fffa7c1,0x819e90d8 .word 0x3c874853,0xf3a5931e .word 0x3fffbdba,0x3692d514 .word 0xbc796773,0x15098eb6 .word 0x3fffd3c2,0x2b8f71f1 .word 0x3c62eb74,0x966579e7 .word 0x3fffe9d9,0x6b2a23d9 .word 0x3c74a603,0x7442fde3 .align 16 constants: .word 0x3ef00000,0x00000000 .word 0x40862e42,0xfefa39ef .word 0x01000000,0x00000000 .word 0x7f000000,0x00000000 .word 0x80000000,0x00000000 .word 0x43f00000,0x00000000 ! scaling 2^12 two96 .word 0xfff00000,0x00000000 .word 0x3ff00000,0x00000000 .word 0x3fdfffff,0xfffffff6 .word 0x3fc55555,0x721a1d14 .word 0x3fa55555,0x6e0896af .word 0x41371547,0x652b82fe ! scaling 2^12 invln2_256 .word 0x3ea62e42,0xfee00000 ! scaling 2^(-12) ln2_256h .word 0x3caa39ef,0x35793c76 ! scaling 2^(-12) ln2_256l ! base set w/o scaling ! .word 0x43300000,0x00000000 ! scaling two96 ! .word 0x40771547,0x652b82fe ! scaling invln2_256 ! .word 0x3f662e42,0xfee00000 ! scaling ln2_256h ! .word 0x3d6a39ef,0x35793c76 ! scaling ln2_256l #define ox3ef 0x0 #define thresh 0x8 #define tiny 0x10 #define huge 0x18 #define signbit 0x20 #define two96 0x28 #define neginf 0x30 #define one 0x38 #define B1OFF 0x40 #define B2OFF 0x48 #define B3OFF 0x50 #define invln2_256 0x58 #define ln2_256h 0x60 #define ln2_256l 0x68 ! local storage indices #define m2 STACK_BIAS-0x4 #define m1 STACK_BIAS-0x8 #define m0 STACK_BIAS-0xc #define jnk STACK_BIAS-0x20 ! sizeof temp storage - must be a multiple of 16 for V9 #define tmps 0x20 ! register use ! i0 n ! i1 x ! i2 stridex ! i3 y ! i4 stridey ! i5 0x80000000 ! g1 TBL ! l0 m0 ! l1 m1 ! l2 m2 ! l3 j0,oy0 ! l4 j1,oy1 ! l5 j2,oy2 ! l6 0x3e300000 ! l7 0x40862e41 ! o0 py0 ! o1 py1 ! o2 py2 ! o3 scratch ! o4 scratch ! o5 0x40874910 ! o7 0x7ff00000 ! f0 x0 ! f2 ! f4 ! f6 ! f8 ! f10 x1 ! f12 ! f14 ! f16 ! f18 ! f20 x2 ! f22 ! f24 ! f26 ! f28 ! f30 ! f32 ! f34 ! f36 0x3ef0... ! f38 thresh ! f40 tiny ! f42 huge ! f44 signbit ! f46 two96 ! f48 neginf ! f50 one ! f52 B1 ! f54 B2 ! f56 B3 ! f58 invln2_256 ! f60 ln2_256h ! f62 ln2_256l #define BOUNDRY %f36 #define THRESH %f38 #define TINY %f40 #define HUGE %f42 #define SIGNBIT %f44 #define TWO96 %f46 #define NEGINF %f48 #define ONE %f50 #define B1 %f52 #define B2 %f54 #define B3 %f56 #define INVLN2_256 %f58 #define LN2_256H %f60 #define LN2_256L %f62 ENTRY(__vexp) save %sp,-SA(MINFRAME)-tmps,%sp PIC_SETUP(l7) PIC_SET(l7,constants,o3) PIC_SET(l7,TBL,o0) mov %o0,%g1 wr %g0,0x82,%asi ! set %asi for non-faulting loads sethi %hi(0x80000000),%i5 sethi %hi(0x3e300000),%l6 sethi %hi(0x40862e41),%l7 or %l7,%lo(0x40862e41),%l7 sethi %hi(0x40874910),%o5 or %o5,%lo(0x40874910),%o5 sethi %hi(0x7ff00000),%o7 ldd [%o3+ox3ef],BOUNDRY ldd [%o3+thresh],THRESH ldd [%o3+tiny],TINY ldd [%o3+huge],HUGE ldd [%o3+signbit],SIGNBIT ldd [%o3+two96],TWO96 ldd [%o3+neginf],NEGINF ldd [%o3+one],ONE ldd [%o3+B1OFF],B1 ldd [%o3+B2OFF],B2 ldd [%o3+B3OFF],B3 ldd [%o3+invln2_256],INVLN2_256 ldd [%o3+ln2_256h],LN2_256H ldd [%o3+ln2_256l],LN2_256L sll %i2,3,%i2 ! scale strides sll %i4,3,%i4 add %fp,jnk,%l3 ! precondition loop add %fp,jnk,%l4 add %fp,jnk,%l5 ld [%i1],%l0 ! hx = *x ld [%i1],%f0 ld [%i1+4],%f1 andn %l0,%i5,%l0 ! hx &= ~0x80000000 ba .loop0 add %i1,%i2,%i1 ! x += stridex .align 16 ! -- 16 byte aligned .loop0: lda [%i1]%asi,%l1 ! preload next argument sub %l0,%l6,%o3 sub %l7,%l0,%o4 fand %f0,SIGNBIT,%f2 ! get sign bit lda [%i1]%asi,%f10 orcc %o3,%o4,%g0 mov %i3,%o0 ! py0 = y bl,pn %icc,.range0 ! if hx < 0x3e300000 or > 0x40862e41 ! delay slot lda [%i1+4]%asi,%f11 addcc %i0,-1,%i0 add %i3,%i4,%i3 ! y += stridey ble,pn %icc,.endloop1 ! delay slot andn %l1,%i5,%l1 add %i1,%i2,%i1 ! x += stridex for %f2,TWO96,%f2 ! used to strip least sig bits fmuld %f0,INVLN2_256,%f4 ! x/ (ln2/256) , creating k .loop1: lda [%i1]%asi,%l2 ! preload next argument sub %l1,%l6,%o3 sub %l7,%l1,%o4 fand %f10,SIGNBIT,%f12 lda [%i1]%asi,%f20 orcc %o3,%o4,%g0 mov %i3,%o1 ! py1 = y bl,pn %icc,.range1 ! if hx < 0x3e300000 or > 0x40862e41 ! delay slot lda [%i1+4]%asi,%f21 addcc %i0,-1,%i0 add %i3,%i4,%i3 ! y += stridey ble,pn %icc,.endloop2 ! delay slot andn %l2,%i5,%l2 add %i1,%i2,%i1 ! x += stridex for %f12,TWO96,%f12 fmuld %f10,INVLN2_256,%f14 .loop2: sub %l2,%l6,%o3 sub %l7,%l2,%o4 fand %f20,SIGNBIT,%f22 fmuld %f20,INVLN2_256,%f24 ! okay to put this here; for alignment orcc %o3,%o4,%g0 bl,pn %icc,.range2 ! if hx < 0x3e300000 or > 0x40862e41 ! delay slot for %f22,TWO96,%f22 faddd %f4,%f2,%f4 ! creating k+j/256, sra to zero bits .cont: faddd %f14,%f12,%f14 mov %i3,%o2 ! py2 = y faddd %f24,%f22,%f24 add %i3,%i4,%i3 ! y += stridey ! BUBBLE USIII fsubd %f4,%f2,%f8 ! creating k+j/256: sll st %f6,[%l3] ! store previous loop x0 fsubd %f14,%f12,%f18 st %f7,[%l3+4] ! store previous loop x0 fsubd %f24,%f22,%f28 st %f16,[%l4] ! BUBBLE USIII fmuld %f8,LN2_256H,%f2 ! closest LN2_256 to x st %f17,[%l4+4] fmuld %f18,LN2_256H,%f12 st %f26,[%l5] fmuld %f28,LN2_256H,%f22 st %f27,[%l5+4] ! BUBBLE USIII fsubd %f0,%f2,%f0 ! r = x - p*LN2_256H fmuld %f8,LN2_256L,%f4 ! closest LN2_256 to x , added prec fsubd %f10,%f12,%f10 fmuld %f18,LN2_256L,%f14 fsubd %f20,%f22,%f20 fmuld %f28,LN2_256L,%f24 ! BUBBLE USIII fsubd %f0,%f4,%f0 ! r -= p*LN2_256L fsubd %f10,%f14,%f10 fsubd %f20,%f24,%f20 !!!!!!!!!!!!!!!!!!! New polynomial reorder starts here ! Alternate polynomial grouping allowing non-sequential calc of p ! OLD : p = r * ( 1 + r * ( B1 + r * ( B2 + r * B3) ) ) ! NEW : p = r * [ (1+r*B1) + (r*r) * ( B2 + r * B3) ) ] ! ! let SLi Ri SRi be accumulators fmuld %f0,B3,%f2 ! SR1 = r1 * B3 fdtoi %f8,%f8 ! convert k+j/256 to int st %f8,[%fp+m0] ! store k, to shift return/use fmuld %f10,B3,%f12 ! SR2 = r2 * B3 fdtoi %f18,%f18 ! convert k+j/256 to int st %f18,[%fp+m1] ! store k, to shift return/use fmuld %f20,B3,%f22 ! SR3 = r3 * B3 fdtoi %f28,%f28 ! convert k+j/256 to int st %f28,[%fp+m2] ! store k, to shift return/use fmuld %f0,%f0,%f4 ! R1 = r1 * r1 fmuld %f10,%f10,%f14 ! R2 = r2 * r2 faddd %f2,B2,%f2 ! SR1 += B2 fmuld %f20,%f20,%f24 ! R3 = r3 * r3 faddd %f12,B2,%f12 ! SR2 += B2 faddd %f22,B2,%f22 ! SR3 += B2 fmuld %f0,B1,%f6 ! SL1 = r1 * B1 fmuld %f10,B1,%f32 ! SL2 = r2 * B1 fand %f8,NEGINF,%f8 ! best here for RAW BYPASS ld [%fp+m0],%l0 ! get nonshifted k into intreg fmuld %f20,B1,%f34 ! SL3 = r3 * B1 fand %f18,NEGINF,%f18 ld [%fp+m1],%l1 ! get nonshifted k into intreg fmuld %f4,%f2,%f4 ! R1 = R1 * SR1 fand %f28,NEGINF,%f28 ld [%fp+m2],%l2 ! get nonshifted k into intreg fmuld %f14,%f12,%f14 ! R2 = R2 * SR2 faddd %f6,ONE,%f6 ! SL1 += 1 fmuld %f24,%f22,%f24 ! R3 = R3 * SR3 faddd %f32,ONE,%f32 ! SL2 += 1 sra %l0,8,%l3 ! shift k tobe offset 256-8byte faddd %f34,ONE,%f34 ! SL3 += 1 sra %l1,8,%l4 ! shift k tobe offset 256-8byte sra %l2,8,%l5 ! shift k tobe offset 256-8byte ! BUBBLE in USIII and %l3,0xff0,%l3 and %l4,0xff0,%l4 faddd %f6,%f4,%f6 ! R1 = SL1 + R1 ldd [%g1+%l3],%f4 ! tbl[j] add %l3,8,%l3 ! inc j and %l5,0xff0,%l5 faddd %f32,%f14,%f32 ! R2 = SL2 + R2 ldd [%g1+%l4],%f14 ! tbl[j] add %l4,8,%l4 ! inc j sra %l0,20,%o3 faddd %f34,%f24,%f34 ! R3 = SL3 + R3 ldd [%g1+%l5],%f24 ! tbl[j] add %l5,8,%l5 ! inc j sra %l1,20,%l1 ! BUBBLE in USIII ldd [%g1+%l4],%f16 ! tbl[j+1] add %o3,1021,%o3 ! inc j fmuld %f0,%f6,%f0 ! p1 = r1 * R1 ldd [%g1+%l3],%f6 ! tbl[j+1] add %l1,1021,%l1 ! inc j sra %l2,20,%l2 fmuld %f10,%f32,%f10 ! p2 = r2 * R2 ldd [%g1+%l5],%f26 ! tbl[j+1] add %l2,1021,%l2 ! inc j fmuld %f20,%f34,%f20 ! p3 = r3 * R3 !!!!!!!!!!!!!!!!!!! poly-reorder - ends here fmuld %f0,%f4,%f0 ! start exp(x) = exp(r) * tbl[j] mov %o0,%l3 fmuld %f10,%f14,%f10 mov %o1,%l4 fmuld %f20,%f24,%f20 mov %o2,%l5 faddd %f0,%f6,%f6 ! cont exp(x) : apply tbl[j] high bits lda [%i1]%asi,%l0 ! preload next argument faddd %f10,%f16,%f16 lda [%i1]%asi,%f0 faddd %f20,%f26,%f26 lda [%i1+4]%asi,%f1 faddd %f6,%f4,%f6 ! cont exp(x) : apply tbl[j+1] low bits add %i1,%i2,%i1 ! x += stridex faddd %f16,%f14,%f16 andn %l0,%i5,%l0 or %o3,%l1,%o4 ! -- 16 byte aligned orcc %o4,%l2,%o4 bl,pn %icc,.small ! delay slot faddd %f26,%f24,%f26 fpadd32 %f6,%f8,%f6 ! done exp(x) : apply 2^k fpadd32 %f16,%f18,%f16 addcc %i0,-1,%i0 bg,pn %icc,.loop0 ! delay slot fpadd32 %f26,%f28,%f26 ba,pt %icc,.endloop0 ! delay slot nop .align 16 .small: tst %o3 bge,pt %icc,1f ! delay slot fpadd32 %f6,%f8,%f6 fpadd32 %f6,BOUNDRY,%f6 fmuld %f6,TINY,%f6 1: tst %l1 bge,pt %icc,1f ! delay slot fpadd32 %f16,%f18,%f16 fpadd32 %f16,BOUNDRY,%f16 fmuld %f16,TINY,%f16 1: tst %l2 bge,pt %icc,1f ! delay slot fpadd32 %f26,%f28,%f26 fpadd32 %f26,BOUNDRY,%f26 fmuld %f26,TINY,%f26 1: addcc %i0,-1,%i0 bg,pn %icc,.loop0 ! delay slot nop ba,pt %icc,.endloop0 ! delay slot nop .endloop2: for %f12,TWO96,%f12 fmuld %f10,INVLN2_256,%f14 faddd %f14,%f12,%f14 fsubd %f14,%f12,%f18 fmuld %f18,LN2_256H,%f12 fsubd %f10,%f12,%f10 fmuld %f18,LN2_256L,%f14 fsubd %f10,%f14,%f10 fmuld %f10,B3,%f12 fdtoi %f18,%f18 st %f18,[%fp+m1] fmuld %f10,%f10,%f14 faddd %f12,B2,%f12 fmuld %f10,B1,%f32 fand %f18,NEGINF,%f18 ld [%fp+m1],%l1 fmuld %f14,%f12,%f14 faddd %f32,ONE,%f32 sra %l1,8,%o4 and %o4,0xff0,%o4 faddd %f32,%f14,%f32 ldd [%g1+%o4],%f14 add %o4,8,%o4 sra %l1,20,%l1 ldd [%g1+%o4],%f30 addcc %l1,1021,%l1 fmuld %f10,%f32,%f10 fmuld %f10,%f14,%f10 faddd %f10,%f30,%f30 faddd %f30,%f14,%f30 bge,pt %icc,1f ! delay slot fpadd32 %f30,%f18,%f30 fpadd32 %f30,BOUNDRY,%f30 fmuld %f30,TINY,%f30 1: st %f30,[%o1] st %f31,[%o1+4] .endloop1: for %f2,TWO96,%f2 fmuld %f0,INVLN2_256,%f4 faddd %f4,%f2,%f4 fsubd %f4,%f2,%f8 fmuld %f8,LN2_256H,%f2 fsubd %f0,%f2,%f0 fmuld %f8,LN2_256L,%f4 fsubd %f0,%f4,%f0 fmuld %f0,B3,%f2 fdtoi %f8,%f8 st %f8,[%fp+m0] fmuld %f0,%f0,%f4 faddd %f2,B2,%f2 fmuld %f0,B1,%f32 fand %f8,NEGINF,%f8 ld [%fp+m0],%l0 fmuld %f4,%f2,%f4 faddd %f32,ONE,%f32 sra %l0,8,%o4 and %o4,0xff0,%o4 faddd %f32,%f4,%f32 ldd [%g1+%o4],%f4 add %o4,8,%o4 sra %l0,20,%o3 ldd [%g1+%o4],%f30 addcc %o3,1021,%o3 fmuld %f0,%f32,%f0 fmuld %f0,%f4,%f0 faddd %f0,%f30,%f30 faddd %f30,%f4,%f30 bge,pt %icc,1f ! delay slot fpadd32 %f30,%f8,%f30 fpadd32 %f30,BOUNDRY,%f30 fmuld %f30,TINY,%f30 1: st %f30,[%o0] st %f31,[%o0+4] .endloop0: st %f6,[%l3] st %f7,[%l3+4] st %f16,[%l4] st %f17,[%l4+4] st %f26,[%l5] st %f27,[%l5+4] ret restore .range0: cmp %l0,%l6 bl,a,pt %icc,3f ! if x is tiny ! delay slot, annulled if branch not taken faddd %f0,ONE,%f4 cmp %l0,%o5 bg,pt %icc,1f ! if x is huge, inf, nan ! delay slot nop fcmpd %fcc0,%f0,THRESH fbg,a,pt %fcc0,3f ! if x is huge and positive ! delay slot, annulled if branch not taken fmuld HUGE,HUGE,%f4 ! x is near the extremes but within range; return to the loop addcc %i0,-1,%i0 add %i3,%i4,%i3 ! y += stridey ble,pn %icc,.endloop1 ! delay slot andn %l1,%i5,%l1 add %i1,%i2,%i1 ! x += stridex for %f2,TWO96,%f2 ba,pt %icc,.loop1 ! delay slot fmuld %f0,INVLN2_256,%f4 1: cmp %l0,%o7 bl,pn %icc,2f ! if x is finite ! delay slot nop fzero %f4 fcmpd %fcc0,%f0,NEGINF fmovdne %fcc0,%f0,%f4 ba,pt %icc,3f fmuld %f4,%f4,%f4 ! x*x or zero*zero 2: fmovd HUGE,%f4 fcmpd %fcc0,%f0,ONE fmovdl %fcc0,TINY,%f4 fmuld %f4,%f4,%f4 ! huge*huge or tiny*tiny 3: st %f4,[%o0] andn %l1,%i5,%l0 add %i1,%i2,%i1 ! x += stridex fmovd %f10,%f0 st %f5,[%o0+4] addcc %i0,-1,%i0 bg,pt %icc,.loop0 ! delay slot add %i3,%i4,%i3 ! y += stridey ba,pt %icc,.endloop0 ! delay slot nop .range1: cmp %l1,%l6 bl,a,pt %icc,3f ! if x is tiny ! delay slot, annulled if branch not taken faddd %f10,ONE,%f14 cmp %l1,%o5 bg,pt %icc,1f ! if x is huge, inf, nan ! delay slot nop fcmpd %fcc0,%f10,THRESH fbg,a,pt %fcc0,3f ! if x is huge and positive ! delay slot, annulled if branch not taken fmuld HUGE,HUGE,%f14 ! x is near the extremes but within range; return to the loop addcc %i0,-1,%i0 add %i3,%i4,%i3 ! y += stridey ble,pn %icc,.endloop2 ! delay slot andn %l2,%i5,%l2 add %i1,%i2,%i1 ! x += stridex for %f12,TWO96,%f12 ba,pt %icc,.loop2 ! delay slot fmuld %f10,INVLN2_256,%f14 1: cmp %l1,%o7 bl,pn %icc,2f ! if x is finite ! delay slot nop fzero %f14 fcmpd %fcc0,%f10,NEGINF fmovdne %fcc0,%f10,%f14 ba,pt %icc,3f fmuld %f14,%f14,%f14 ! x*x or zero*zero 2: fmovd HUGE,%f14 fcmpd %fcc0,%f10,ONE fmovdl %fcc0,TINY,%f14 fmuld %f14,%f14,%f14 ! huge*huge or tiny*tiny 3: st %f14,[%o1] andn %l2,%i5,%l1 add %i1,%i2,%i1 ! x += stridex fmovd %f20,%f10 st %f15,[%o1+4] addcc %i0,-1,%i0 bg,pt %icc,.loop1 ! delay slot add %i3,%i4,%i3 ! y += stridey ba,pt %icc,.endloop1 ! delay slot nop .range2: cmp %l2,%l6 bl,a,pt %icc,3f ! if x is tiny ! delay slot, annulled if branch not taken faddd %f20,ONE,%f24 cmp %l2,%o5 bg,pt %icc,1f ! if x is huge, inf, nan ! delay slot nop fcmpd %fcc0,%f20,THRESH fbg,a,pt %fcc0,3f ! if x is huge and positive ! delay slot, annulled if branch not taken fmuld HUGE,HUGE,%f24 ! x is near the extremes but within range; return to the loop ba,pt %icc,.cont ! delay slot faddd %f4,%f2,%f4 1: cmp %l2,%o7 bl,pn %icc,2f ! if x is finite ! delay slot nop fzero %f24 fcmpd %fcc0,%f20,NEGINF fmovdne %fcc0,%f20,%f24 ba,pt %icc,3f fmuld %f24,%f24,%f24 ! x*x or zero*zero 2: fmovd HUGE,%f24 fcmpd %fcc0,%f20,ONE fmovdl %fcc0,TINY,%f24 fmuld %f24,%f24,%f24 ! huge*huge or tiny*tiny 3: st %f24,[%i3] st %f25,[%i3+4] lda [%i1]%asi,%l2 ! preload next argument lda [%i1]%asi,%f20 lda [%i1+4]%asi,%f21 andn %l2,%i5,%l2 add %i1,%i2,%i1 ! x += stridex addcc %i0,-1,%i0 bg,pt %icc,.loop2 ! delay slot add %i3,%i4,%i3 ! y += stridey ba,pt %icc,.endloop2 ! delay slot nop SET_SIZE(__vexp)