1 | /* |
---|
2 | * Copyright (c) 2014 |
---|
3 | * Imagination Technologies Limited. |
---|
4 | * |
---|
5 | * Redistribution and use in source and binary forms, with or without |
---|
6 | * modification, are permitted provided that the following conditions |
---|
7 | * are met: |
---|
8 | * 1. Redistributions of source code must retain the above copyright |
---|
9 | * notice, this list of conditions and the following disclaimer. |
---|
10 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
11 | * notice, this list of conditions and the following disclaimer in the |
---|
12 | * documentation and/or other materials provided with the distribution. |
---|
13 | * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its |
---|
14 | * contributors may be used to endorse or promote products derived from |
---|
15 | * this software without specific prior written permission. |
---|
16 | * |
---|
17 | * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND |
---|
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
20 | * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE |
---|
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
27 | * SUCH DAMAGE. |
---|
28 | */ |
---|
29 | |
---|
30 | #ifdef ANDROID_CHANGES |
---|
31 | # include "machine/asm.h" |
---|
32 | # include "machine/regdef.h" |
---|
33 | #elif _LIBC |
---|
34 | # include <sysdep.h> |
---|
35 | # include <regdef.h> |
---|
36 | # include <sys/asm.h> |
---|
37 | #elif _COMPILING_NEWLIB |
---|
38 | # include "machine/asm.h" |
---|
39 | # include "machine/regdef.h" |
---|
40 | #else |
---|
41 | # include <regdef.h> |
---|
42 | # include <sys/asm.h> |
---|
43 | #endif |
---|
44 | |
---|
45 | /* Technically strcmp should not read past the end of the strings being |
---|
46 | compared. We will read a full word that may contain excess bits beyond |
---|
47 | the NULL string terminator but unless ENABLE_READAHEAD is set, we will not |
---|
48 | read the next word after the end of string. Setting ENABLE_READAHEAD will |
---|
49 | improve performance but is technically illegal based on the definition of |
---|
50 | strcmp. */ |
---|
51 | #ifdef ENABLE_READAHEAD |
---|
52 | # define DELAY_READ |
---|
53 | #else |
---|
54 | # define DELAY_READ nop |
---|
55 | #endif |
---|
56 | |
---|
57 | /* Testing on a little endian machine showed using CLZ was a |
---|
58 | performance loss, so we are not turning it on by default. */ |
---|
59 | #if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) |
---|
60 | # define USE_CLZ |
---|
61 | #endif |
---|
62 | |
---|
63 | /* Some asm.h files do not have the L macro definition. */ |
---|
64 | #ifndef L |
---|
65 | # if _MIPS_SIM == _ABIO32 |
---|
66 | # define L(label) $L ## label |
---|
67 | # else |
---|
68 | # define L(label) .L ## label |
---|
69 | # endif |
---|
70 | #endif |
---|
71 | |
---|
72 | /* Some asm.h files do not have the PTR_ADDIU macro definition. */ |
---|
73 | #ifndef PTR_ADDIU |
---|
74 | # ifdef USE_DOUBLE |
---|
75 | # define PTR_ADDIU daddiu |
---|
76 | # else |
---|
77 | # define PTR_ADDIU addiu |
---|
78 | # endif |
---|
79 | #endif |
---|
80 | |
---|
81 | /* Allow the routine to be named something else if desired. */ |
---|
82 | #ifndef STRCMP_NAME |
---|
83 | # define STRCMP_NAME strcmp |
---|
84 | #endif |
---|
85 | |
---|
86 | #ifdef ANDROID_CHANGES |
---|
87 | LEAF(STRCMP_NAME, 0) |
---|
88 | #else |
---|
89 | LEAF(STRCMP_NAME) |
---|
90 | #endif |
---|
91 | .set nomips16 |
---|
92 | .set noreorder |
---|
93 | |
---|
94 | or t0, a0, a1 |
---|
95 | andi t0,0x3 |
---|
96 | bne t0, zero, L(byteloop) |
---|
97 | |
---|
98 | /* Both strings are 4 byte aligned at this point. */ |
---|
99 | |
---|
100 | lui t8, 0x0101 |
---|
101 | ori t8, t8, 0x0101 |
---|
102 | lui t9, 0x7f7f |
---|
103 | ori t9, 0x7f7f |
---|
104 | |
---|
105 | #define STRCMP32(OFFSET) \ |
---|
106 | lw v0, OFFSET(a0); \ |
---|
107 | lw v1, OFFSET(a1); \ |
---|
108 | subu t0, v0, t8; \ |
---|
109 | bne v0, v1, L(worddiff); \ |
---|
110 | nor t1, v0, t9; \ |
---|
111 | and t0, t0, t1; \ |
---|
112 | bne t0, zero, L(returnzero) |
---|
113 | |
---|
114 | L(wordloop): |
---|
115 | STRCMP32(0) |
---|
116 | DELAY_READ |
---|
117 | STRCMP32(4) |
---|
118 | DELAY_READ |
---|
119 | STRCMP32(8) |
---|
120 | DELAY_READ |
---|
121 | STRCMP32(12) |
---|
122 | DELAY_READ |
---|
123 | STRCMP32(16) |
---|
124 | DELAY_READ |
---|
125 | STRCMP32(20) |
---|
126 | DELAY_READ |
---|
127 | STRCMP32(24) |
---|
128 | DELAY_READ |
---|
129 | STRCMP32(28) |
---|
130 | PTR_ADDIU a0, a0, 32 |
---|
131 | b L(wordloop) |
---|
132 | PTR_ADDIU a1, a1, 32 |
---|
133 | |
---|
134 | L(returnzero): |
---|
135 | j ra |
---|
136 | move v0, zero |
---|
137 | |
---|
138 | L(worddiff): |
---|
139 | #ifdef USE_CLZ |
---|
140 | subu t0, v0, t8 |
---|
141 | nor t1, v0, t9 |
---|
142 | and t1, t0, t1 |
---|
143 | xor t0, v0, v1 |
---|
144 | or t0, t0, t1 |
---|
145 | # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
---|
146 | wsbh t0, t0 |
---|
147 | rotr t0, t0, 16 |
---|
148 | # endif |
---|
149 | clz t1, t0 |
---|
150 | and t1, 0xf8 |
---|
151 | # if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
---|
152 | neg t1 |
---|
153 | addu t1, 24 |
---|
154 | # endif |
---|
155 | rotrv v0, v0, t1 |
---|
156 | rotrv v1, v1, t1 |
---|
157 | and v0, v0, 0xff |
---|
158 | and v1, v1, 0xff |
---|
159 | j ra |
---|
160 | subu v0, v0, v1 |
---|
161 | #else /* USE_CLZ */ |
---|
162 | # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
---|
163 | andi t0, v0, 0xff |
---|
164 | beq t0, zero, L(wexit01) |
---|
165 | andi t1, v1, 0xff |
---|
166 | bne t0, t1, L(wexit01) |
---|
167 | |
---|
168 | srl t8, v0, 8 |
---|
169 | srl t9, v1, 8 |
---|
170 | andi t8, t8, 0xff |
---|
171 | beq t8, zero, L(wexit89) |
---|
172 | andi t9, t9, 0xff |
---|
173 | bne t8, t9, L(wexit89) |
---|
174 | |
---|
175 | srl t0, v0, 16 |
---|
176 | srl t1, v1, 16 |
---|
177 | andi t0, t0, 0xff |
---|
178 | beq t0, zero, L(wexit01) |
---|
179 | andi t1, t1, 0xff |
---|
180 | bne t0, t1, L(wexit01) |
---|
181 | |
---|
182 | srl t8, v0, 24 |
---|
183 | srl t9, v1, 24 |
---|
184 | # else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ |
---|
185 | srl t0, v0, 24 |
---|
186 | beq t0, zero, L(wexit01) |
---|
187 | srl t1, v1, 24 |
---|
188 | bne t0, t1, L(wexit01) |
---|
189 | |
---|
190 | srl t8, v0, 16 |
---|
191 | srl t9, v1, 16 |
---|
192 | andi t8, t8, 0xff |
---|
193 | beq t8, zero, L(wexit89) |
---|
194 | andi t9, t9, 0xff |
---|
195 | bne t8, t9, L(wexit89) |
---|
196 | |
---|
197 | srl t0, v0, 8 |
---|
198 | srl t1, v1, 8 |
---|
199 | andi t0, t0, 0xff |
---|
200 | beq t0, zero, L(wexit01) |
---|
201 | andi t1, t1, 0xff |
---|
202 | bne t0, t1, L(wexit01) |
---|
203 | |
---|
204 | andi t8, v0, 0xff |
---|
205 | andi t9, v1, 0xff |
---|
206 | # endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ |
---|
207 | |
---|
208 | L(wexit89): |
---|
209 | j ra |
---|
210 | subu v0, t8, t9 |
---|
211 | L(wexit01): |
---|
212 | j ra |
---|
213 | subu v0, t0, t1 |
---|
214 | #endif /* USE_CLZ */ |
---|
215 | |
---|
216 | /* It might seem better to do the 'beq' instruction between the two 'lbu' |
---|
217 | instructions so that the nop is not needed but testing showed that this |
---|
218 | code is actually faster (based on glibc strcmp test). */ |
---|
219 | #define BYTECMP01(OFFSET) \ |
---|
220 | lbu v0, OFFSET(a0); \ |
---|
221 | lbu v1, OFFSET(a1); \ |
---|
222 | beq v0, zero, L(bexit01); \ |
---|
223 | nop; \ |
---|
224 | bne v0, v1, L(bexit01) |
---|
225 | |
---|
226 | #define BYTECMP89(OFFSET) \ |
---|
227 | lbu t8, OFFSET(a0); \ |
---|
228 | lbu t9, OFFSET(a1); \ |
---|
229 | beq t8, zero, L(bexit89); \ |
---|
230 | nop; \ |
---|
231 | bne t8, t9, L(bexit89) |
---|
232 | |
---|
233 | L(byteloop): |
---|
234 | BYTECMP01(0) |
---|
235 | BYTECMP89(1) |
---|
236 | BYTECMP01(2) |
---|
237 | BYTECMP89(3) |
---|
238 | BYTECMP01(4) |
---|
239 | BYTECMP89(5) |
---|
240 | BYTECMP01(6) |
---|
241 | BYTECMP89(7) |
---|
242 | PTR_ADDIU a0, a0, 8 |
---|
243 | b L(byteloop) |
---|
244 | PTR_ADDIU a1, a1, 8 |
---|
245 | |
---|
246 | L(bexit01): |
---|
247 | j ra |
---|
248 | subu v0, v0, v1 |
---|
249 | L(bexit89): |
---|
250 | j ra |
---|
251 | subu v0, t8, t9 |
---|
252 | |
---|
253 | .set at |
---|
254 | .set reorder |
---|
255 | |
---|
256 | END(STRCMP_NAME) |
---|
257 | #ifndef ANDROID_CHANGES |
---|
258 | # ifdef _LIBC |
---|
259 | libc_hidden_builtin_def (STRCMP_NAME) |
---|
260 | # endif |
---|
261 | #endif |
---|