1 | /* |
---|
2 | * Copyright (c) 2012-2014 ARM Ltd |
---|
3 | * All rights reserved. |
---|
4 | * |
---|
5 | * Redistribution and use in source and binary forms, with or without |
---|
6 | * modification, are permitted provided that the following conditions |
---|
7 | * are met: |
---|
8 | * 1. Redistributions of source code must retain the above copyright |
---|
9 | * notice, this list of conditions and the following disclaimer. |
---|
10 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
11 | * notice, this list of conditions and the following disclaimer in the |
---|
12 | * documentation and/or other materials provided with the distribution. |
---|
13 | * 3. The name of the company may not be used to endorse or promote |
---|
14 | * products derived from this software without specific prior written |
---|
15 | * permission. |
---|
16 | * |
---|
17 | * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED |
---|
18 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
---|
19 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
---|
20 | * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED |
---|
22 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
---|
23 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
---|
24 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
---|
25 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
---|
26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
27 | */ |
---|
28 | |
---|
29 | /* Implementation of strcmp for ARMv6. Use ldrd to support wider |
---|
30 | loads, provided the data is sufficiently aligned. Use |
---|
31 | saturating arithmetic to optimize the compares. */ |
---|
32 | |
---|
33 | /* Build Options: |
---|
34 | STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first |
---|
35 | byte in the string. If comparing completely random strings |
---|
36 | the pre-check will save time, since there is a very high |
---|
37 | probability of a mismatch in the first character: we save |
---|
38 | significant overhead if this is the common case. However, |
---|
39 | if strings are likely to be identical (eg because we're |
---|
40 | verifying a hit in a hash table), then this check is largely |
---|
41 | redundant. */ |
---|
42 | |
---|
43 | .arm |
---|
44 | |
---|
45 | /* Parameters and result. */ |
---|
46 | #define src1 r0 |
---|
47 | #define src2 r1 |
---|
48 | #define result r0 /* Overlaps src1. */ |
---|
49 | |
---|
50 | /* Internal variables. */ |
---|
51 | #define tmp1 r4 |
---|
52 | #define tmp2 r5 |
---|
53 | #define const_m1 r12 |
---|
54 | |
---|
55 | /* Additional internal variables for 64-bit aligned data. */ |
---|
56 | #define data1a r2 |
---|
57 | #define data1b r3 |
---|
58 | #define data2a r6 |
---|
59 | #define data2b r7 |
---|
60 | #define syndrome_a tmp1 |
---|
61 | #define syndrome_b tmp2 |
---|
62 | |
---|
63 | /* Additional internal variables for 32-bit aligned data. */ |
---|
64 | #define data1 r2 |
---|
65 | #define data2 r3 |
---|
66 | #define syndrome tmp2 |
---|
67 | |
---|
68 | |
---|
69 | /* Macro to compute and return the result value for word-aligned |
---|
70 | cases. */ |
---|
71 | .macro strcmp_epilogue_aligned synd d1 d2 restore_r6 |
---|
72 | #ifdef __ARM_BIG_ENDIAN |
---|
73 | /* If data1 contains a zero byte, then syndrome will contain a 1 in |
---|
74 | bit 7 of that byte. Otherwise, the highest set bit in the |
---|
75 | syndrome will highlight the first different bit. It is therefore |
---|
76 | sufficient to extract the eight bits starting with the syndrome |
---|
77 | bit. */ |
---|
78 | clz tmp1, \synd |
---|
79 | lsl r1, \d2, tmp1 |
---|
80 | .if \restore_r6 |
---|
81 | ldrd r6, r7, [sp, #8] |
---|
82 | .endif |
---|
83 | .cfi_restore 6 |
---|
84 | .cfi_restore 7 |
---|
85 | lsl \d1, \d1, tmp1 |
---|
86 | .cfi_remember_state |
---|
87 | lsr result, \d1, #24 |
---|
88 | ldrd r4, r5, [sp], #16 |
---|
89 | .cfi_restore 4 |
---|
90 | .cfi_restore 5 |
---|
91 | sub result, result, r1, lsr #24 |
---|
92 | bx lr |
---|
93 | #else |
---|
94 | /* To use the big-endian trick we'd have to reverse all three words. |
---|
95 | that's slower than this approach. */ |
---|
96 | rev \synd, \synd |
---|
97 | clz tmp1, \synd |
---|
98 | bic tmp1, tmp1, #7 |
---|
99 | lsr r1, \d2, tmp1 |
---|
100 | .cfi_remember_state |
---|
101 | .if \restore_r6 |
---|
102 | ldrd r6, r7, [sp, #8] |
---|
103 | .endif |
---|
104 | .cfi_restore 6 |
---|
105 | .cfi_restore 7 |
---|
106 | lsr \d1, \d1, tmp1 |
---|
107 | and result, \d1, #255 |
---|
108 | and r1, r1, #255 |
---|
109 | ldrd r4, r5, [sp], #16 |
---|
110 | .cfi_restore 4 |
---|
111 | .cfi_restore 5 |
---|
112 | sub result, result, r1 |
---|
113 | |
---|
114 | bx lr |
---|
115 | #endif |
---|
116 | .endm |
---|
117 | |
---|
118 | .text |
---|
119 | .p2align 5 |
---|
120 | .Lstrcmp_start_addr: |
---|
121 | #ifndef STRCMP_NO_PRECHECK |
---|
122 | .Lfastpath_exit: |
---|
123 | sub r0, r2, r3 |
---|
124 | bx lr |
---|
125 | #endif |
---|
126 | def_fn strcmp |
---|
127 | #ifndef STRCMP_NO_PRECHECK |
---|
128 | ldrb r2, [src1] |
---|
129 | ldrb r3, [src2] |
---|
130 | cmp r2, #1 |
---|
131 | cmpcs r2, r3 |
---|
132 | bne .Lfastpath_exit |
---|
133 | #endif |
---|
134 | .cfi_sections .debug_frame |
---|
135 | .cfi_startproc |
---|
136 | strd r4, r5, [sp, #-16]! |
---|
137 | .cfi_def_cfa_offset 16 |
---|
138 | .cfi_offset 4, -16 |
---|
139 | .cfi_offset 5, -12 |
---|
140 | orr tmp1, src1, src2 |
---|
141 | strd r6, r7, [sp, #8] |
---|
142 | .cfi_offset 6, -8 |
---|
143 | .cfi_offset 7, -4 |
---|
144 | mvn const_m1, #0 |
---|
145 | tst tmp1, #7 |
---|
146 | beq .Lloop_aligned8 |
---|
147 | |
---|
148 | .Lnot_aligned: |
---|
149 | eor tmp1, src1, src2 |
---|
150 | tst tmp1, #7 |
---|
151 | bne .Lmisaligned8 |
---|
152 | |
---|
153 | /* Deal with mutual misalignment by aligning downwards and then |
---|
154 | masking off the unwanted loaded data to prevent a difference. */ |
---|
155 | and tmp1, src1, #7 |
---|
156 | bic src1, src1, #7 |
---|
157 | and tmp2, tmp1, #3 |
---|
158 | bic src2, src2, #7 |
---|
159 | lsl tmp2, tmp2, #3 /* Bytes -> bits. */ |
---|
160 | ldrd data1a, data1b, [src1], #16 |
---|
161 | tst tmp1, #4 |
---|
162 | ldrd data2a, data2b, [src2], #16 |
---|
163 | /* In ARM code we can't use ORN, but with do have MVN with a |
---|
164 | register shift. */ |
---|
165 | mvn tmp1, const_m1, S2HI tmp2 |
---|
166 | orr data1a, data1a, tmp1 |
---|
167 | orr data2a, data2a, tmp1 |
---|
168 | beq .Lstart_realigned8 |
---|
169 | orr data1b, data1b, tmp1 |
---|
170 | mov data1a, const_m1 |
---|
171 | orr data2b, data2b, tmp1 |
---|
172 | mov data2a, const_m1 |
---|
173 | b .Lstart_realigned8 |
---|
174 | |
---|
175 | /* Unwind the inner loop by a factor of 2, giving 16 bytes per |
---|
176 | pass. */ |
---|
177 | .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ |
---|
178 | .p2align 2 /* Always word aligned. */ |
---|
179 | .Lloop_aligned8: |
---|
180 | ldrd data1a, data1b, [src1], #16 |
---|
181 | ldrd data2a, data2b, [src2], #16 |
---|
182 | .Lstart_realigned8: |
---|
183 | uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ |
---|
184 | eor syndrome_a, data1a, data2a |
---|
185 | sel syndrome_a, syndrome_a, const_m1 |
---|
186 | uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ |
---|
187 | eor syndrome_b, data1b, data2b |
---|
188 | sel syndrome_b, syndrome_b, const_m1 |
---|
189 | orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ |
---|
190 | bne .Ldiff_found |
---|
191 | |
---|
192 | ldrd data1a, data1b, [src1, #-8] |
---|
193 | ldrd data2a, data2b, [src2, #-8] |
---|
194 | uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ |
---|
195 | eor syndrome_a, data1a, data2a |
---|
196 | sel syndrome_a, syndrome_a, const_m1 |
---|
197 | uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ |
---|
198 | eor syndrome_b, data1b, data2b |
---|
199 | sel syndrome_b, syndrome_b, const_m1 |
---|
200 | orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ |
---|
201 | beq .Lloop_aligned8 |
---|
202 | |
---|
203 | .Ldiff_found: |
---|
204 | cmp syndrome_a, #0 |
---|
205 | bne .Ldiff_in_a |
---|
206 | |
---|
207 | .Ldiff_in_b: |
---|
208 | strcmp_epilogue_aligned syndrome_b, data1b, data2b 1 |
---|
209 | |
---|
210 | .Ldiff_in_a: |
---|
211 | .cfi_restore_state |
---|
212 | strcmp_epilogue_aligned syndrome_a, data1a, data2a 1 |
---|
213 | |
---|
214 | .cfi_restore_state |
---|
215 | .Lmisaligned8: |
---|
216 | tst tmp1, #3 |
---|
217 | bne .Lmisaligned4 |
---|
218 | ands tmp1, src1, #3 |
---|
219 | bne .Lmutual_align4 |
---|
220 | |
---|
221 | /* Unrolled by a factor of 2, to reduce the number of post-increment |
---|
222 | operations. */ |
---|
223 | .Lloop_aligned4: |
---|
224 | ldr data1, [src1], #8 |
---|
225 | ldr data2, [src2], #8 |
---|
226 | .Lstart_realigned4: |
---|
227 | uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ |
---|
228 | eor syndrome, data1, data2 |
---|
229 | sel syndrome, syndrome, const_m1 |
---|
230 | cmp syndrome, #0 |
---|
231 | bne .Laligned4_done |
---|
232 | |
---|
233 | ldr data1, [src1, #-4] |
---|
234 | ldr data2, [src2, #-4] |
---|
235 | uadd8 syndrome, data1, const_m1 |
---|
236 | eor syndrome, data1, data2 |
---|
237 | sel syndrome, syndrome, const_m1 |
---|
238 | cmp syndrome, #0 |
---|
239 | beq .Lloop_aligned4 |
---|
240 | |
---|
241 | .Laligned4_done: |
---|
242 | strcmp_epilogue_aligned syndrome, data1, data2, 0 |
---|
243 | |
---|
244 | .Lmutual_align4: |
---|
245 | .cfi_restore_state |
---|
246 | /* Deal with mutual misalignment by aligning downwards and then |
---|
247 | masking off the unwanted loaded data to prevent a difference. */ |
---|
248 | lsl tmp1, tmp1, #3 /* Bytes -> bits. */ |
---|
249 | bic src1, src1, #3 |
---|
250 | ldr data1, [src1], #8 |
---|
251 | bic src2, src2, #3 |
---|
252 | ldr data2, [src2], #8 |
---|
253 | |
---|
254 | /* In ARM code we can't use ORN, but with do have MVN with a |
---|
255 | register shift. */ |
---|
256 | mvn tmp1, const_m1, S2HI tmp1 |
---|
257 | orr data1, data1, tmp1 |
---|
258 | orr data2, data2, tmp1 |
---|
259 | b .Lstart_realigned4 |
---|
260 | |
---|
261 | .Lmisaligned4: |
---|
262 | ands tmp1, src1, #3 |
---|
263 | beq .Lsrc1_aligned |
---|
264 | sub src2, src2, tmp1 |
---|
265 | bic src1, src1, #3 |
---|
266 | lsls tmp1, tmp1, #31 |
---|
267 | ldr data1, [src1], #4 |
---|
268 | beq .Laligned_m2 |
---|
269 | bcs .Laligned_m1 |
---|
270 | |
---|
271 | #ifdef STRCMP_NO_PRECHECK |
---|
272 | ldrb data2, [src2, #1] |
---|
273 | uxtb tmp1, data1, ror #BYTE1_OFFSET |
---|
274 | cmp tmp1, #1 |
---|
275 | cmpcs tmp1, data2 |
---|
276 | bne .Lmisaligned_exit |
---|
277 | |
---|
278 | .Laligned_m2: |
---|
279 | ldrb data2, [src2, #2] |
---|
280 | uxtb tmp1, data1, ror #BYTE2_OFFSET |
---|
281 | cmp tmp1, #1 |
---|
282 | cmpcs tmp1, data2 |
---|
283 | bne .Lmisaligned_exit |
---|
284 | |
---|
285 | .Laligned_m1: |
---|
286 | ldrb data2, [src2, #3] |
---|
287 | uxtb tmp1, data1, ror #BYTE3_OFFSET |
---|
288 | cmp tmp1, #1 |
---|
289 | cmpcs tmp1, data2 |
---|
290 | beq .Lsrc1_aligned |
---|
291 | |
---|
292 | #else /* STRCMP_NO_PRECHECK */ |
---|
293 | /* If we've done the pre-check, then we don't need to check the |
---|
294 | first byte again here. */ |
---|
295 | ldrb data2, [src2, #2] |
---|
296 | uxtb tmp1, data1, ror #BYTE2_OFFSET |
---|
297 | cmp tmp1, #1 |
---|
298 | cmpcs tmp1, data2 |
---|
299 | bne .Lmisaligned_exit |
---|
300 | |
---|
301 | .Laligned_m2: |
---|
302 | ldrb data2, [src2, #3] |
---|
303 | uxtb tmp1, data1, ror #BYTE3_OFFSET |
---|
304 | cmp tmp1, #1 |
---|
305 | cmpcs tmp1, data2 |
---|
306 | beq .Laligned_m1 |
---|
307 | #endif |
---|
308 | |
---|
309 | .Lmisaligned_exit: |
---|
310 | .cfi_remember_state |
---|
311 | sub result, tmp1, data2 |
---|
312 | ldr r4, [sp], #16 |
---|
313 | .cfi_restore 4 |
---|
314 | bx lr |
---|
315 | |
---|
316 | #ifndef STRCMP_NO_PRECHECK |
---|
317 | .Laligned_m1: |
---|
318 | add src2, src2, #4 |
---|
319 | #endif |
---|
320 | .Lsrc1_aligned: |
---|
321 | .cfi_restore_state |
---|
322 | /* src1 is word aligned, but src2 has no common alignment |
---|
323 | with it. */ |
---|
324 | ldr data1, [src1], #4 |
---|
325 | lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ |
---|
326 | |
---|
327 | bic src2, src2, #3 |
---|
328 | ldr data2, [src2], #4 |
---|
329 | bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ |
---|
330 | bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ |
---|
331 | |
---|
332 | /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */ |
---|
333 | .Loverlap3: |
---|
334 | bic tmp1, data1, #MSB |
---|
335 | uadd8 syndrome, data1, const_m1 |
---|
336 | eors syndrome, tmp1, data2, S2LO #8 |
---|
337 | sel syndrome, syndrome, const_m1 |
---|
338 | bne 4f |
---|
339 | cmp syndrome, #0 |
---|
340 | ldreq data2, [src2], #4 |
---|
341 | bne 5f |
---|
342 | |
---|
343 | eor tmp1, tmp1, data1 |
---|
344 | cmp tmp1, data2, S2HI #24 |
---|
345 | bne 6f |
---|
346 | ldr data1, [src1], #4 |
---|
347 | b .Loverlap3 |
---|
348 | 4: |
---|
349 | S2LO data2, data2, #8 |
---|
350 | b .Lstrcmp_tail |
---|
351 | |
---|
352 | 5: |
---|
353 | bics syndrome, syndrome, #MSB |
---|
354 | bne .Lstrcmp_done_equal |
---|
355 | |
---|
356 | /* We can only get here if the MSB of data1 contains 0, so |
---|
357 | fast-path the exit. */ |
---|
358 | ldrb result, [src2] |
---|
359 | .cfi_remember_state |
---|
360 | ldrd r4, r5, [sp], #16 |
---|
361 | .cfi_restore 4 |
---|
362 | .cfi_restore 5 |
---|
363 | /* R6/7 Not used in this sequence. */ |
---|
364 | .cfi_restore 6 |
---|
365 | .cfi_restore 7 |
---|
366 | neg result, result |
---|
367 | bx lr |
---|
368 | |
---|
369 | 6: |
---|
370 | .cfi_restore_state |
---|
371 | S2LO data1, data1, #24 |
---|
372 | and data2, data2, #LSB |
---|
373 | b .Lstrcmp_tail |
---|
374 | |
---|
375 | .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ |
---|
376 | .Loverlap2: |
---|
377 | and tmp1, data1, const_m1, S2LO #16 |
---|
378 | uadd8 syndrome, data1, const_m1 |
---|
379 | eors syndrome, tmp1, data2, S2LO #16 |
---|
380 | sel syndrome, syndrome, const_m1 |
---|
381 | bne 4f |
---|
382 | cmp syndrome, #0 |
---|
383 | ldreq data2, [src2], #4 |
---|
384 | bne 5f |
---|
385 | eor tmp1, tmp1, data1 |
---|
386 | cmp tmp1, data2, S2HI #16 |
---|
387 | bne 6f |
---|
388 | ldr data1, [src1], #4 |
---|
389 | b .Loverlap2 |
---|
390 | 4: |
---|
391 | S2LO data2, data2, #16 |
---|
392 | b .Lstrcmp_tail |
---|
393 | 5: |
---|
394 | ands syndrome, syndrome, const_m1, S2LO #16 |
---|
395 | bne .Lstrcmp_done_equal |
---|
396 | |
---|
397 | ldrh data2, [src2] |
---|
398 | S2LO data1, data1, #16 |
---|
399 | #ifdef __ARM_BIG_ENDIAN |
---|
400 | lsl data2, data2, #16 |
---|
401 | #endif |
---|
402 | b .Lstrcmp_tail |
---|
403 | |
---|
404 | 6: |
---|
405 | S2LO data1, data1, #16 |
---|
406 | and data2, data2, const_m1, S2LO #16 |
---|
407 | b .Lstrcmp_tail |
---|
408 | |
---|
409 | .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ |
---|
410 | .Loverlap1: |
---|
411 | and tmp1, data1, #LSB |
---|
412 | uadd8 syndrome, data1, const_m1 |
---|
413 | eors syndrome, tmp1, data2, S2LO #24 |
---|
414 | sel syndrome, syndrome, const_m1 |
---|
415 | bne 4f |
---|
416 | cmp syndrome, #0 |
---|
417 | ldreq data2, [src2], #4 |
---|
418 | bne 5f |
---|
419 | eor tmp1, tmp1, data1 |
---|
420 | cmp tmp1, data2, S2HI #8 |
---|
421 | bne 6f |
---|
422 | ldr data1, [src1], #4 |
---|
423 | b .Loverlap1 |
---|
424 | 4: |
---|
425 | S2LO data2, data2, #24 |
---|
426 | b .Lstrcmp_tail |
---|
427 | 5: |
---|
428 | tst syndrome, #LSB |
---|
429 | bne .Lstrcmp_done_equal |
---|
430 | ldr data2, [src2] |
---|
431 | 6: |
---|
432 | S2LO data1, data1, #8 |
---|
433 | bic data2, data2, #MSB |
---|
434 | b .Lstrcmp_tail |
---|
435 | |
---|
436 | .Lstrcmp_done_equal: |
---|
437 | mov result, #0 |
---|
438 | .cfi_remember_state |
---|
439 | ldrd r4, r5, [sp], #16 |
---|
440 | .cfi_restore 4 |
---|
441 | .cfi_restore 5 |
---|
442 | /* R6/7 not used in this sequence. */ |
---|
443 | .cfi_restore 6 |
---|
444 | .cfi_restore 7 |
---|
445 | bx lr |
---|
446 | |
---|
447 | .Lstrcmp_tail: |
---|
448 | .cfi_restore_state |
---|
449 | #ifndef __ARM_BIG_ENDIAN |
---|
450 | rev data1, data1 |
---|
451 | rev data2, data2 |
---|
452 | /* Now everything looks big-endian... */ |
---|
453 | #endif |
---|
454 | uadd8 tmp1, data1, const_m1 |
---|
455 | eor tmp1, data1, data2 |
---|
456 | sel syndrome, tmp1, const_m1 |
---|
457 | clz tmp1, syndrome |
---|
458 | lsl data1, data1, tmp1 |
---|
459 | lsl data2, data2, tmp1 |
---|
460 | lsr result, data1, #24 |
---|
461 | ldrd r4, r5, [sp], #16 |
---|
462 | .cfi_restore 4 |
---|
463 | .cfi_restore 5 |
---|
464 | /* R6/7 not used in this sequence. */ |
---|
465 | .cfi_restore 6 |
---|
466 | .cfi_restore 7 |
---|
467 | sub result, result, data2, lsr #24 |
---|
468 | bx lr |
---|
469 | .cfi_endproc |
---|
470 | .size strcmp, . - .Lstrcmp_start_addr |
---|