[444] | 1 | /* -------------------------------------------------------------- */ |
---|
| 2 | /* (C)Copyright 2007,2008, */ |
---|
| 3 | /* International Business Machines Corporation */ |
---|
| 4 | /* All Rights Reserved. */ |
---|
| 5 | /* */ |
---|
| 6 | /* Redistribution and use in source and binary forms, with or */ |
---|
| 7 | /* without modification, are permitted provided that the */ |
---|
| 8 | /* following conditions are met: */ |
---|
| 9 | /* */ |
---|
| 10 | /* - Redistributions of source code must retain the above copyright*/ |
---|
| 11 | /* notice, this list of conditions and the following disclaimer. */ |
---|
| 12 | /* */ |
---|
| 13 | /* - Redistributions in binary form must reproduce the above */ |
---|
| 14 | /* copyright notice, this list of conditions and the following */ |
---|
| 15 | /* disclaimer in the documentation and/or other materials */ |
---|
| 16 | /* provided with the distribution. */ |
---|
| 17 | /* */ |
---|
| 18 | /* - Neither the name of IBM Corporation nor the names of its */ |
---|
| 19 | /* contributors may be used to endorse or promote products */ |
---|
| 20 | /* derived from this software without specific prior written */ |
---|
| 21 | /* permission. */ |
---|
| 22 | /* */ |
---|
| 23 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ |
---|
| 24 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ |
---|
| 25 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ |
---|
| 26 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ |
---|
| 27 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ |
---|
| 28 | /* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ |
---|
| 29 | /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ |
---|
| 30 | /* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ |
---|
| 31 | /* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ |
---|
| 32 | /* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ |
---|
| 33 | /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ |
---|
| 34 | /* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ |
---|
| 35 | /* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ |
---|
| 36 | /* -------------------------------------------------------------- */ |
---|
| 37 | /* PROLOG END TAG zYx */ |
---|
| 38 | #ifdef __SPU__ |
---|
| 39 | #ifndef _ATANHD2_H_ |
---|
| 40 | #define _ATANHD2_H_ 1 |
---|
| 41 | |
---|
| 42 | #include <spu_intrinsics.h> |
---|
| 43 | #include "logd2.h" |
---|
| 44 | |
---|
| 45 | /* |
---|
| 46 | * FUNCTION |
---|
| 47 | * vector double _atanhd2(vector double x) |
---|
| 48 | * |
---|
| 49 | * DESCRIPTION |
---|
| 50 | * The atanhd2 function returns a vector containing the hyperbolic |
---|
| 51 | * arctangents of the corresponding elements of the input vector. |
---|
| 52 | * |
---|
| 53 | * We are using the formula: |
---|
| 54 | * atanh x = 1/2 * ln((1 + x)/(1 - x)) = 1/2 * [ln(1+x) - ln(1-x)] |
---|
| 55 | * and the anti-symmetry of atanh. |
---|
| 56 | * |
---|
| 57 | * For x near 0, we use the Taylor series: |
---|
| 58 | * atanh x = x + x^3/3 + x^5/5 + x^7/7 + x^9/9 + ... |
---|
| 59 | * |
---|
| 60 | * Special Cases: |
---|
| 61 | * - atanh(1) = Infinity |
---|
| 62 | * - atanh(-1) = -Infinity |
---|
| 63 | * - atanh(x) for |x| > 1 = Undefined |
---|
| 64 | * |
---|
| 65 | */ |
---|
| 66 | |
---|
| 67 | /* |
---|
| 68 | * Maclaurin Series Coefficients |
---|
| 69 | * for x near 0. |
---|
| 70 | */ |
---|
| 71 | #define SMD_DP_ATANH_MAC01 1.000000000000000000000000000000E0 |
---|
| 72 | #define SMD_DP_ATANH_MAC03 3.333333333333333333333333333333E-1 |
---|
| 73 | #define SMD_DP_ATANH_MAC05 2.000000000000000000000000000000E-1 |
---|
| 74 | #define SMD_DP_ATANH_MAC07 1.428571428571428571428571428571E-1 |
---|
| 75 | #define SMD_DP_ATANH_MAC09 1.111111111111111111111111111111E-1 |
---|
| 76 | #define SMD_DP_ATANH_MAC11 9.090909090909090909090909090909E-2 |
---|
| 77 | #define SMD_DP_ATANH_MAC13 7.692307692307692307692307692308E-2 |
---|
| 78 | #define SMD_DP_ATANH_MAC15 6.666666666666666666666666666667E-2 |
---|
| 79 | #define SMD_DP_ATANH_MAC17 5.882352941176470588235294117647E-2 |
---|
| 80 | #if 0 |
---|
| 81 | #define SMD_DP_ATANH_MAC19 5.263157894736842105263157894737E-2 |
---|
| 82 | #define SMD_DP_ATANH_MAC21 4.761904761904761904761904761905E-2 |
---|
| 83 | #define SMD_DP_ATANH_MAC23 4.347826086956521739130434782609E-2 |
---|
| 84 | #define SMD_DP_ATANH_MAC25 4.000000000000000000000000000000E-2 |
---|
| 85 | #define SMD_DP_ATANH_MAC27 3.703703703703703703703703703704E-2 |
---|
| 86 | #define SMD_DP_ATANH_MAC29 3.448275862068965517241379310345E-2 |
---|
| 87 | #define SMD_DP_ATANH_MAC31 3.225806451612903225806451612903E-2 |
---|
| 88 | #define SMD_DP_ATANH_MAC33 3.030303030303030303030303030303E-2 |
---|
| 89 | #define SMD_DP_ATANH_MAC35 2.857142857142857142857142857143E-2 |
---|
| 90 | #define SMD_DP_ATANH_MAC37 2.702702702702702702702702702703E-2 |
---|
| 91 | #define SMD_DP_ATANH_MAC39 2.564102564102564102564102564103E-2 |
---|
| 92 | #endif |
---|
| 93 | |
---|
| 94 | |
---|
| 95 | static __inline vector double _atanhd2(vector double x) |
---|
| 96 | { |
---|
| 97 | vec_uchar16 dup_even = ((vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }); |
---|
| 98 | vec_double2 sign_mask = spu_splats(-0.0); |
---|
| 99 | vec_double2 oned = spu_splats(1.0); |
---|
| 100 | vec_double2 onehalfd = spu_splats(0.5); |
---|
| 101 | vec_double2 xabs, xsqu; |
---|
| 102 | /* Where we switch from maclaurin to formula */ |
---|
| 103 | vec_float4 switch_approx = spu_splats(0.125f); |
---|
| 104 | vec_uint4 use_form; |
---|
| 105 | vec_float4 xf; |
---|
| 106 | vec_double2 result, fresult, mresult;; |
---|
| 107 | |
---|
| 108 | xabs = spu_andc(x, sign_mask); |
---|
| 109 | xsqu = spu_mul(x, x); |
---|
| 110 | |
---|
| 111 | xf = spu_roundtf(xabs); |
---|
| 112 | xf = spu_shuffle(xf, xf, dup_even); |
---|
| 113 | |
---|
| 114 | /* |
---|
| 115 | * Formula: |
---|
| 116 | * atanh = 1/2 * ln((1 + x)/(1 - x)) = 1/2 * [ln(1+x) - ln(1-x)] |
---|
| 117 | */ |
---|
| 118 | fresult = spu_sub(_logd2(spu_add(oned, xabs)), _logd2(spu_sub(oned, xabs))); |
---|
| 119 | fresult = spu_mul(fresult, onehalfd); |
---|
| 120 | |
---|
| 121 | |
---|
| 122 | /* |
---|
| 123 | * Taylor Series |
---|
| 124 | */ |
---|
| 125 | mresult = spu_madd(xsqu, spu_splats(SMD_DP_ATANH_MAC17), spu_splats(SMD_DP_ATANH_MAC15)); |
---|
| 126 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC13)); |
---|
| 127 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC11)); |
---|
| 128 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC09)); |
---|
| 129 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC07)); |
---|
| 130 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC05)); |
---|
| 131 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC03)); |
---|
| 132 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC01)); |
---|
| 133 | mresult = spu_mul(xabs, mresult); |
---|
| 134 | |
---|
| 135 | |
---|
| 136 | /* |
---|
| 137 | * Choose between series and formula |
---|
| 138 | */ |
---|
| 139 | use_form = spu_cmpgt(xf, switch_approx); |
---|
| 140 | result = spu_sel(mresult, fresult, (vec_ullong2)use_form); |
---|
| 141 | |
---|
| 142 | /* |
---|
| 143 | * Spec says results are undefined for |x| > 1, so |
---|
| 144 | * no boundary tests needed here. |
---|
| 145 | */ |
---|
| 146 | |
---|
| 147 | /* Restore sign - atanh is an anti-symmetric */ |
---|
| 148 | result = spu_sel(result, x, (vec_ullong2)sign_mask); |
---|
| 149 | |
---|
| 150 | return result; |
---|
| 151 | } |
---|
| 152 | |
---|
| 153 | #endif /* _ATANHD2_H_ */ |
---|
| 154 | #endif /* __SPU__ */ |
---|