1 | /* -------------------------------------------------------------- */ |
---|
2 | /* (C)Copyright 2007,2008, */ |
---|
3 | /* International Business Machines Corporation */ |
---|
4 | /* All Rights Reserved. */ |
---|
5 | /* */ |
---|
6 | /* Redistribution and use in source and binary forms, with or */ |
---|
7 | /* without modification, are permitted provided that the */ |
---|
8 | /* following conditions are met: */ |
---|
9 | /* */ |
---|
10 | /* - Redistributions of source code must retain the above copyright*/ |
---|
11 | /* notice, this list of conditions and the following disclaimer. */ |
---|
12 | /* */ |
---|
13 | /* - Redistributions in binary form must reproduce the above */ |
---|
14 | /* copyright notice, this list of conditions and the following */ |
---|
15 | /* disclaimer in the documentation and/or other materials */ |
---|
16 | /* provided with the distribution. */ |
---|
17 | /* */ |
---|
18 | /* - Neither the name of IBM Corporation nor the names of its */ |
---|
19 | /* contributors may be used to endorse or promote products */ |
---|
20 | /* derived from this software without specific prior written */ |
---|
21 | /* permission. */ |
---|
22 | /* */ |
---|
23 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ |
---|
24 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ |
---|
25 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ |
---|
26 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ |
---|
27 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ |
---|
28 | /* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ |
---|
29 | /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ |
---|
30 | /* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ |
---|
31 | /* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ |
---|
32 | /* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ |
---|
33 | /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ |
---|
34 | /* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ |
---|
35 | /* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ |
---|
36 | /* -------------------------------------------------------------- */ |
---|
37 | /* PROLOG END TAG zYx */ |
---|
38 | #ifdef __SPU__ |
---|
39 | #ifndef _ATANHD2_H_ |
---|
40 | #define _ATANHD2_H_ 1 |
---|
41 | |
---|
42 | #include <spu_intrinsics.h> |
---|
43 | #include "logd2.h" |
---|
44 | |
---|
45 | /* |
---|
46 | * FUNCTION |
---|
47 | * vector double _atanhd2(vector double x) |
---|
48 | * |
---|
49 | * DESCRIPTION |
---|
50 | * The atanhd2 function returns a vector containing the hyperbolic |
---|
51 | * arctangents of the corresponding elements of the input vector. |
---|
52 | * |
---|
53 | * We are using the formula: |
---|
54 | * atanh x = 1/2 * ln((1 + x)/(1 - x)) = 1/2 * [ln(1+x) - ln(1-x)] |
---|
55 | * and the anti-symmetry of atanh. |
---|
56 | * |
---|
57 | * For x near 0, we use the Taylor series: |
---|
58 | * atanh x = x + x^3/3 + x^5/5 + x^7/7 + x^9/9 + ... |
---|
59 | * |
---|
60 | * Special Cases: |
---|
61 | * - atanh(1) = Infinity |
---|
62 | * - atanh(-1) = -Infinity |
---|
63 | * - atanh(x) for |x| > 1 = Undefined |
---|
64 | * |
---|
65 | */ |
---|
66 | |
---|
67 | /* |
---|
68 | * Maclaurin Series Coefficients |
---|
69 | * for x near 0. |
---|
70 | */ |
---|
71 | #define SMD_DP_ATANH_MAC01 1.000000000000000000000000000000E0 |
---|
72 | #define SMD_DP_ATANH_MAC03 3.333333333333333333333333333333E-1 |
---|
73 | #define SMD_DP_ATANH_MAC05 2.000000000000000000000000000000E-1 |
---|
74 | #define SMD_DP_ATANH_MAC07 1.428571428571428571428571428571E-1 |
---|
75 | #define SMD_DP_ATANH_MAC09 1.111111111111111111111111111111E-1 |
---|
76 | #define SMD_DP_ATANH_MAC11 9.090909090909090909090909090909E-2 |
---|
77 | #define SMD_DP_ATANH_MAC13 7.692307692307692307692307692308E-2 |
---|
78 | #define SMD_DP_ATANH_MAC15 6.666666666666666666666666666667E-2 |
---|
79 | #define SMD_DP_ATANH_MAC17 5.882352941176470588235294117647E-2 |
---|
80 | #if 0 |
---|
81 | #define SMD_DP_ATANH_MAC19 5.263157894736842105263157894737E-2 |
---|
82 | #define SMD_DP_ATANH_MAC21 4.761904761904761904761904761905E-2 |
---|
83 | #define SMD_DP_ATANH_MAC23 4.347826086956521739130434782609E-2 |
---|
84 | #define SMD_DP_ATANH_MAC25 4.000000000000000000000000000000E-2 |
---|
85 | #define SMD_DP_ATANH_MAC27 3.703703703703703703703703703704E-2 |
---|
86 | #define SMD_DP_ATANH_MAC29 3.448275862068965517241379310345E-2 |
---|
87 | #define SMD_DP_ATANH_MAC31 3.225806451612903225806451612903E-2 |
---|
88 | #define SMD_DP_ATANH_MAC33 3.030303030303030303030303030303E-2 |
---|
89 | #define SMD_DP_ATANH_MAC35 2.857142857142857142857142857143E-2 |
---|
90 | #define SMD_DP_ATANH_MAC37 2.702702702702702702702702702703E-2 |
---|
91 | #define SMD_DP_ATANH_MAC39 2.564102564102564102564102564103E-2 |
---|
92 | #endif |
---|
93 | |
---|
94 | |
---|
95 | static __inline vector double _atanhd2(vector double x) |
---|
96 | { |
---|
97 | vec_uchar16 dup_even = ((vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }); |
---|
98 | vec_double2 sign_mask = spu_splats(-0.0); |
---|
99 | vec_double2 oned = spu_splats(1.0); |
---|
100 | vec_double2 onehalfd = spu_splats(0.5); |
---|
101 | vec_double2 xabs, xsqu; |
---|
102 | /* Where we switch from maclaurin to formula */ |
---|
103 | vec_float4 switch_approx = spu_splats(0.125f); |
---|
104 | vec_uint4 use_form; |
---|
105 | vec_float4 xf; |
---|
106 | vec_double2 result, fresult, mresult;; |
---|
107 | |
---|
108 | xabs = spu_andc(x, sign_mask); |
---|
109 | xsqu = spu_mul(x, x); |
---|
110 | |
---|
111 | xf = spu_roundtf(xabs); |
---|
112 | xf = spu_shuffle(xf, xf, dup_even); |
---|
113 | |
---|
114 | /* |
---|
115 | * Formula: |
---|
116 | * atanh = 1/2 * ln((1 + x)/(1 - x)) = 1/2 * [ln(1+x) - ln(1-x)] |
---|
117 | */ |
---|
118 | fresult = spu_sub(_logd2(spu_add(oned, xabs)), _logd2(spu_sub(oned, xabs))); |
---|
119 | fresult = spu_mul(fresult, onehalfd); |
---|
120 | |
---|
121 | |
---|
122 | /* |
---|
123 | * Taylor Series |
---|
124 | */ |
---|
125 | mresult = spu_madd(xsqu, spu_splats(SMD_DP_ATANH_MAC17), spu_splats(SMD_DP_ATANH_MAC15)); |
---|
126 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC13)); |
---|
127 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC11)); |
---|
128 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC09)); |
---|
129 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC07)); |
---|
130 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC05)); |
---|
131 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC03)); |
---|
132 | mresult = spu_madd(xsqu, mresult, spu_splats(SMD_DP_ATANH_MAC01)); |
---|
133 | mresult = spu_mul(xabs, mresult); |
---|
134 | |
---|
135 | |
---|
136 | /* |
---|
137 | * Choose between series and formula |
---|
138 | */ |
---|
139 | use_form = spu_cmpgt(xf, switch_approx); |
---|
140 | result = spu_sel(mresult, fresult, (vec_ullong2)use_form); |
---|
141 | |
---|
142 | /* |
---|
143 | * Spec says results are undefined for |x| > 1, so |
---|
144 | * no boundary tests needed here. |
---|
145 | */ |
---|
146 | |
---|
147 | /* Restore sign - atanh is an anti-symmetric */ |
---|
148 | result = spu_sel(result, x, (vec_ullong2)sign_mask); |
---|
149 | |
---|
150 | return result; |
---|
151 | } |
---|
152 | |
---|
153 | #endif /* _ATANHD2_H_ */ |
---|
154 | #endif /* __SPU__ */ |
---|