source: trunk/libs/newlib/src/newlib/libc/machine/arm/strcmp-armv6.S @ 444

Last change on this file since 444 was 444, checked in by satin@…, 6 years ago

add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc

File size: 11.5 KB
Line 
1/*
2 * Copyright (c) 2012-2014 ARM Ltd
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 *    products derived from this software without specific prior written
15 *    permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29        /* Implementation of strcmp for ARMv6.  Use ldrd to support wider
30           loads, provided the data is sufficiently aligned.  Use
31           saturating arithmetic to optimize the compares.  */
32
33        /* Build Options:
34           STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
35           byte in the string.  If comparing completely random strings
36           the pre-check will save time, since there is a very high
37           probability of a mismatch in the first character: we save
38           significant overhead if this is the common case.  However,
39           if strings are likely to be identical (eg because we're
40           verifying a hit in a hash table), then this check is largely
41           redundant.  */
42
43        .arm
44
45/* Parameters and result.  */
46#define src1            r0
47#define src2            r1
48#define result          r0      /* Overlaps src1.  */
49
50/* Internal variables.  */
51#define tmp1            r4
52#define tmp2            r5
53#define const_m1        r12
54
55/* Additional internal variables for 64-bit aligned data.  */
56#define data1a          r2
57#define data1b          r3
58#define data2a          r6
59#define data2b          r7
60#define syndrome_a      tmp1
61#define syndrome_b      tmp2
62
63/* Additional internal variables for 32-bit aligned data.  */
64#define data1           r2
65#define data2           r3
66#define syndrome        tmp2
67
68
69        /* Macro to compute and return the result value for word-aligned
70           cases.  */
71        .macro strcmp_epilogue_aligned synd d1 d2 restore_r6
72#ifdef __ARM_BIG_ENDIAN
73        /* If data1 contains a zero byte, then syndrome will contain a 1 in
74           bit 7 of that byte.  Otherwise, the highest set bit in the
75           syndrome will highlight the first different bit.  It is therefore
76           sufficient to extract the eight bits starting with the syndrome
77           bit.  */
78        clz     tmp1, \synd
79        lsl     r1, \d2, tmp1
80        .if \restore_r6
81        ldrd    r6, r7, [sp, #8]
82        .endif
83        .cfi_restore 6
84        .cfi_restore 7
85        lsl     \d1, \d1, tmp1
86        .cfi_remember_state
87        lsr     result, \d1, #24
88        ldrd    r4, r5, [sp], #16
89        .cfi_restore 4
90        .cfi_restore 5
91        sub     result, result, r1, lsr #24
92        bx      lr
93#else
94        /* To use the big-endian trick we'd have to reverse all three words.
95           that's slower than this approach.  */
96        rev     \synd, \synd
97        clz     tmp1, \synd
98        bic     tmp1, tmp1, #7
99        lsr     r1, \d2, tmp1
100        .cfi_remember_state
101        .if \restore_r6
102        ldrd    r6, r7, [sp, #8]
103        .endif
104        .cfi_restore 6
105        .cfi_restore 7
106        lsr     \d1, \d1, tmp1
107        and     result, \d1, #255
108        and     r1, r1, #255
109        ldrd    r4, r5, [sp], #16
110        .cfi_restore 4
111        .cfi_restore 5
112        sub     result, result, r1
113
114        bx      lr
115#endif
116        .endm
117
118        .text
119        .p2align        5
120.Lstrcmp_start_addr:
121#ifndef STRCMP_NO_PRECHECK
122.Lfastpath_exit:
123        sub     r0, r2, r3
124        bx      lr
125#endif
126def_fn  strcmp
127#ifndef STRCMP_NO_PRECHECK
128        ldrb    r2, [src1]
129        ldrb    r3, [src2]
130        cmp     r2, #1
131        cmpcs   r2, r3
132        bne     .Lfastpath_exit
133#endif
134        .cfi_sections .debug_frame
135        .cfi_startproc
136        strd    r4, r5, [sp, #-16]!
137        .cfi_def_cfa_offset 16
138        .cfi_offset 4, -16
139        .cfi_offset 5, -12
140        orr     tmp1, src1, src2
141        strd    r6, r7, [sp, #8]
142        .cfi_offset 6, -8
143        .cfi_offset 7, -4
144        mvn     const_m1, #0
145        tst     tmp1, #7
146        beq     .Lloop_aligned8
147
148.Lnot_aligned:
149        eor     tmp1, src1, src2
150        tst     tmp1, #7
151        bne     .Lmisaligned8
152
153        /* Deal with mutual misalignment by aligning downwards and then
154           masking off the unwanted loaded data to prevent a difference.  */
155        and     tmp1, src1, #7
156        bic     src1, src1, #7
157        and     tmp2, tmp1, #3
158        bic     src2, src2, #7
159        lsl     tmp2, tmp2, #3  /* Bytes -> bits.  */
160        ldrd    data1a, data1b, [src1], #16
161        tst     tmp1, #4
162        ldrd    data2a, data2b, [src2], #16
163        /* In ARM code we can't use ORN, but with do have MVN with a
164           register shift.  */
165        mvn     tmp1, const_m1, S2HI tmp2
166        orr     data1a, data1a, tmp1
167        orr     data2a, data2a, tmp1
168        beq     .Lstart_realigned8
169        orr     data1b, data1b, tmp1
170        mov     data1a, const_m1
171        orr     data2b, data2b, tmp1
172        mov     data2a, const_m1
173        b       .Lstart_realigned8
174
175        /* Unwind the inner loop by a factor of 2, giving 16 bytes per
176           pass.  */
177        .p2align 5,,12  /* Don't start in the tail bytes of a cache line.  */
178        .p2align 2      /* Always word aligned.  */
179.Lloop_aligned8:
180        ldrd    data1a, data1b, [src1], #16
181        ldrd    data2a, data2b, [src2], #16
182.Lstart_realigned8:
183        uadd8   syndrome_b, data1a, const_m1    /* Only want GE bits,  */
184        eor     syndrome_a, data1a, data2a
185        sel     syndrome_a, syndrome_a, const_m1
186        uadd8   syndrome_b, data1b, const_m1    /* Only want GE bits.  */
187        eor     syndrome_b, data1b, data2b
188        sel     syndrome_b, syndrome_b, const_m1
189        orrs    syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
190        bne     .Ldiff_found
191
192        ldrd    data1a, data1b, [src1, #-8]
193        ldrd    data2a, data2b, [src2, #-8]
194        uadd8   syndrome_b, data1a, const_m1    /* Only want GE bits,  */
195        eor     syndrome_a, data1a, data2a
196        sel     syndrome_a, syndrome_a, const_m1
197        uadd8   syndrome_b, data1b, const_m1    /* Only want GE bits.  */
198        eor     syndrome_b, data1b, data2b
199        sel     syndrome_b, syndrome_b, const_m1
200        orrs    syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
201        beq     .Lloop_aligned8
202
203.Ldiff_found:
204        cmp     syndrome_a, #0
205        bne     .Ldiff_in_a
206
207.Ldiff_in_b:
208        strcmp_epilogue_aligned syndrome_b, data1b, data2b 1
209
210.Ldiff_in_a:
211        .cfi_restore_state
212        strcmp_epilogue_aligned syndrome_a, data1a, data2a 1
213
214        .cfi_restore_state
215.Lmisaligned8:
216        tst     tmp1, #3
217        bne     .Lmisaligned4
218        ands    tmp1, src1, #3
219        bne     .Lmutual_align4
220
221        /* Unrolled by a factor of 2, to reduce the number of post-increment
222           operations.  */
223.Lloop_aligned4:
224        ldr     data1, [src1], #8
225        ldr     data2, [src2], #8
226.Lstart_realigned4:
227        uadd8   syndrome, data1, const_m1       /* Only need GE bits.  */
228        eor     syndrome, data1, data2
229        sel     syndrome, syndrome, const_m1
230        cmp     syndrome, #0
231        bne     .Laligned4_done
232
233        ldr     data1, [src1, #-4]
234        ldr     data2, [src2, #-4]
235        uadd8   syndrome, data1, const_m1
236        eor     syndrome, data1, data2
237        sel     syndrome, syndrome, const_m1
238        cmp     syndrome, #0
239        beq     .Lloop_aligned4
240
241.Laligned4_done:
242        strcmp_epilogue_aligned syndrome, data1, data2, 0
243
244.Lmutual_align4:
245        .cfi_restore_state
246        /* Deal with mutual misalignment by aligning downwards and then
247           masking off the unwanted loaded data to prevent a difference.  */
248        lsl     tmp1, tmp1, #3  /* Bytes -> bits.  */
249        bic     src1, src1, #3
250        ldr     data1, [src1], #8
251        bic     src2, src2, #3
252        ldr     data2, [src2], #8
253
254        /* In ARM code we can't use ORN, but with do have MVN with a
255           register shift.  */
256        mvn     tmp1, const_m1, S2HI tmp1
257        orr     data1, data1, tmp1
258        orr     data2, data2, tmp1
259        b       .Lstart_realigned4
260
261.Lmisaligned4:
262        ands    tmp1, src1, #3
263        beq     .Lsrc1_aligned
264        sub     src2, src2, tmp1
265        bic     src1, src1, #3
266        lsls    tmp1, tmp1, #31
267        ldr     data1, [src1], #4
268        beq     .Laligned_m2
269        bcs     .Laligned_m1
270
271#ifdef STRCMP_NO_PRECHECK
272        ldrb    data2, [src2, #1]
273        uxtb    tmp1, data1, ror #BYTE1_OFFSET
274        cmp     tmp1, #1
275        cmpcs   tmp1, data2
276        bne     .Lmisaligned_exit
277
278.Laligned_m2:
279        ldrb    data2, [src2, #2]
280        uxtb    tmp1, data1, ror #BYTE2_OFFSET
281        cmp     tmp1, #1
282        cmpcs   tmp1, data2
283        bne     .Lmisaligned_exit
284
285.Laligned_m1:
286        ldrb    data2, [src2, #3]
287        uxtb    tmp1, data1, ror #BYTE3_OFFSET
288        cmp     tmp1, #1
289        cmpcs   tmp1, data2
290        beq     .Lsrc1_aligned
291
292#else  /* STRCMP_NO_PRECHECK */
293        /* If we've done the pre-check, then we don't need to check the
294           first byte again here.  */
295        ldrb    data2, [src2, #2]
296        uxtb    tmp1, data1, ror #BYTE2_OFFSET
297        cmp     tmp1, #1
298        cmpcs   tmp1, data2
299        bne     .Lmisaligned_exit
300
301.Laligned_m2:
302        ldrb    data2, [src2, #3]
303        uxtb    tmp1, data1, ror #BYTE3_OFFSET
304        cmp     tmp1, #1
305        cmpcs   tmp1, data2
306        beq     .Laligned_m1
307#endif
308
309.Lmisaligned_exit:
310        .cfi_remember_state
311        sub     result, tmp1, data2
312        ldr     r4, [sp], #16
313        .cfi_restore 4
314        bx      lr
315
316#ifndef STRCMP_NO_PRECHECK
317.Laligned_m1:
318        add     src2, src2, #4
319#endif
320.Lsrc1_aligned:
321        .cfi_restore_state
322        /* src1 is word aligned, but src2 has no common alignment
323           with it.  */
324        ldr     data1, [src1], #4
325        lsls    tmp1, src2, #31         /* C=src2[1], Z=src2[0].  */
326
327        bic     src2, src2, #3
328        ldr     data2, [src2], #4
329        bhi     .Loverlap1              /* C=1, Z=0 => src2[1:0] = 0b11.  */
330        bcs     .Loverlap2              /* C=1, Z=1 => src2[1:0] = 0b10.  */
331
332        /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01.  */
333.Loverlap3:
334        bic     tmp1, data1, #MSB
335        uadd8   syndrome, data1, const_m1
336        eors    syndrome, tmp1, data2, S2LO #8
337        sel     syndrome, syndrome, const_m1
338        bne     4f
339        cmp     syndrome, #0
340        ldreq   data2, [src2], #4
341        bne     5f
342
343        eor     tmp1, tmp1, data1
344        cmp     tmp1, data2, S2HI #24
345        bne     6f
346        ldr     data1, [src1], #4
347        b       .Loverlap3
3484:
349        S2LO    data2, data2, #8
350        b       .Lstrcmp_tail
351
3525:
353        bics    syndrome, syndrome, #MSB
354        bne     .Lstrcmp_done_equal
355
356        /* We can only get here if the MSB of data1 contains 0, so
357           fast-path the exit.  */
358        ldrb    result, [src2]
359        .cfi_remember_state
360        ldrd    r4, r5, [sp], #16
361        .cfi_restore 4
362        .cfi_restore 5
363        /* R6/7 Not used in this sequence.  */
364        .cfi_restore 6
365        .cfi_restore 7
366        neg     result, result
367        bx      lr
368
3696:
370        .cfi_restore_state
371        S2LO    data1, data1, #24
372        and     data2, data2, #LSB
373        b       .Lstrcmp_tail
374
375        .p2align 5,,12  /* Ensure at least 3 instructions in cache line.  */
376.Loverlap2:
377        and     tmp1, data1, const_m1, S2LO #16
378        uadd8   syndrome, data1, const_m1
379        eors    syndrome, tmp1, data2, S2LO #16
380        sel     syndrome, syndrome, const_m1
381        bne     4f
382        cmp     syndrome, #0
383        ldreq   data2, [src2], #4
384        bne     5f
385        eor     tmp1, tmp1, data1
386        cmp     tmp1, data2, S2HI #16
387        bne     6f
388        ldr     data1, [src1], #4
389        b       .Loverlap2
3904:
391        S2LO    data2, data2, #16
392        b       .Lstrcmp_tail
3935:
394        ands    syndrome, syndrome, const_m1, S2LO #16
395        bne     .Lstrcmp_done_equal
396
397        ldrh    data2, [src2]
398        S2LO    data1, data1, #16
399#ifdef __ARM_BIG_ENDIAN
400        lsl     data2, data2, #16
401#endif
402        b       .Lstrcmp_tail
403
4046:
405        S2LO    data1, data1, #16
406        and     data2, data2, const_m1, S2LO #16
407        b       .Lstrcmp_tail
408
409        .p2align 5,,12  /* Ensure at least 3 instructions in cache line.  */
410.Loverlap1:
411        and     tmp1, data1, #LSB
412        uadd8   syndrome, data1, const_m1
413        eors    syndrome, tmp1, data2, S2LO #24
414        sel     syndrome, syndrome, const_m1
415        bne     4f
416        cmp     syndrome, #0
417        ldreq   data2, [src2], #4
418        bne     5f
419        eor     tmp1, tmp1, data1
420        cmp     tmp1, data2, S2HI #8
421        bne     6f
422        ldr     data1, [src1], #4
423        b       .Loverlap1
4244:
425        S2LO    data2, data2, #24
426        b       .Lstrcmp_tail
4275:
428        tst     syndrome, #LSB
429        bne     .Lstrcmp_done_equal
430        ldr     data2, [src2]
4316:
432        S2LO    data1, data1, #8
433        bic     data2, data2, #MSB
434        b       .Lstrcmp_tail
435
436.Lstrcmp_done_equal:
437        mov     result, #0
438        .cfi_remember_state
439        ldrd    r4, r5, [sp], #16
440        .cfi_restore 4
441        .cfi_restore 5
442        /* R6/7 not used in this sequence.  */
443        .cfi_restore 6
444        .cfi_restore 7
445        bx      lr
446
447.Lstrcmp_tail:
448        .cfi_restore_state
449#ifndef __ARM_BIG_ENDIAN
450        rev     data1, data1
451        rev     data2, data2
452        /* Now everything looks big-endian...  */
453#endif
454        uadd8   tmp1, data1, const_m1
455        eor     tmp1, data1, data2
456        sel     syndrome, tmp1, const_m1
457        clz     tmp1, syndrome
458        lsl     data1, data1, tmp1
459        lsl     data2, data2, tmp1
460        lsr     result, data1, #24
461        ldrd    r4, r5, [sp], #16
462        .cfi_restore 4
463        .cfi_restore 5
464        /* R6/7 not used in this sequence.  */
465        .cfi_restore 6
466        .cfi_restore 7
467        sub     result, result, data2, lsr #24
468        bx      lr
469        .cfi_endproc
470        .size strcmp, . - .Lstrcmp_start_addr
Note: See TracBrowser for help on using the repository browser.