source: trunk/libs/newlib/src/newlib/libc/machine/aarch64/strcmp.S @ 444

Last change on this file since 444 was 444, checked in by satin@…, 6 years ago

add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc

File size: 5.5 KB
Line 
1/* Copyright (c) 2012-2013, Linaro Limited
2   All rights reserved.
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6       * Redistributions of source code must retain the above copyright
7         notice, this list of conditions and the following disclaimer.
8       * Redistributions in binary form must reproduce the above copyright
9         notice, this list of conditions and the following disclaimer in the
10         documentation and/or other materials provided with the distribution.
11       * Neither the name of the Linaro nor the
12         names of its contributors may be used to endorse or promote products
13         derived from this software without specific prior written permission.
14
15   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
26
27/* Assumptions:
28 *
29 * ARMv8-a, AArch64
30 */
31
32#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))
33/* See strcmp-stub.c  */
34#else
35
36        .macro def_fn f p2align=0
37        .text
38        .p2align \p2align
39        .global \f
40        .type \f, %function
41\f:
42        .endm
43
44#define REP8_01 0x0101010101010101
45#define REP8_7f 0x7f7f7f7f7f7f7f7f
46#define REP8_80 0x8080808080808080
47
48/* Parameters and result.  */
49#define src1            x0
50#define src2            x1
51#define result          x0
52
53/* Internal variables.  */
54#define data1           x2
55#define data1w          w2
56#define data2           x3
57#define data2w          w3
58#define has_nul         x4
59#define diff            x5
60#define syndrome        x6
61#define tmp1            x7
62#define tmp2            x8
63#define tmp3            x9
64#define zeroones        x10
65#define pos             x11
66
67        /* Start of performance-critical section  -- one 64B cache line.  */
68def_fn strcmp p2align=6
69        eor     tmp1, src1, src2
70        mov     zeroones, #REP8_01
71        tst     tmp1, #7
72        b.ne    .Lmisaligned8
73        ands    tmp1, src1, #7
74        b.ne    .Lmutual_align
75        /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
76           (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
77           can be done in parallel across the entire word.  */
78.Lloop_aligned:
79        ldr     data1, [src1], #8
80        ldr     data2, [src2], #8
81.Lstart_realigned:
82        sub     tmp1, data1, zeroones
83        orr     tmp2, data1, #REP8_7f
84        eor     diff, data1, data2      /* Non-zero if differences found.  */
85        bic     has_nul, tmp1, tmp2     /* Non-zero if NUL terminator.  */
86        orr     syndrome, diff, has_nul
87        cbz     syndrome, .Lloop_aligned
88        /* End of performance-critical section  -- one 64B cache line.  */
89
90#ifndef __AARCH64EB__
91        rev     syndrome, syndrome
92        rev     data1, data1
93        /* The MS-non-zero bit of the syndrome marks either the first bit
94           that is different, or the top bit of the first zero byte.
95           Shifting left now will bring the critical information into the
96           top bits.  */
97        clz     pos, syndrome
98        rev     data2, data2
99        lsl     data1, data1, pos
100        lsl     data2, data2, pos
101        /* But we need to zero-extend (char is unsigned) the value and then
102           perform a signed 32-bit subtraction.  */
103        lsr     data1, data1, #56
104        sub     result, data1, data2, lsr #56
105        ret
106#else
107        /* For big-endian we cannot use the trick with the syndrome value
108           as carry-propagation can corrupt the upper bits if the trailing
109           bytes in the string contain 0x01.  */
110        /* However, if there is no NUL byte in the dword, we can generate
111           the result directly.  We can't just subtract the bytes as the
112           MSB might be significant.  */
113        cbnz    has_nul, 1f
114        cmp     data1, data2
115        cset    result, ne
116        cneg    result, result, lo
117        ret
1181:
119        /* Re-compute the NUL-byte detection, using a byte-reversed value.  */
120        rev     tmp3, data1
121        sub     tmp1, tmp3, zeroones
122        orr     tmp2, tmp3, #REP8_7f
123        bic     has_nul, tmp1, tmp2
124        rev     has_nul, has_nul
125        orr     syndrome, diff, has_nul
126        clz     pos, syndrome
127        /* The MS-non-zero bit of the syndrome marks either the first bit
128           that is different, or the top bit of the first zero byte.
129           Shifting left now will bring the critical information into the
130           top bits.  */
131        lsl     data1, data1, pos
132        lsl     data2, data2, pos
133        /* But we need to zero-extend (char is unsigned) the value and then
134           perform a signed 32-bit subtraction.  */
135        lsr     data1, data1, #56
136        sub     result, data1, data2, lsr #56
137        ret
138#endif
139
140.Lmutual_align:
141        /* Sources are mutually aligned, but are not currently at an
142           alignment boundary.  Round down the addresses and then mask off
143           the bytes that preceed the start point.  */
144        bic     src1, src1, #7
145        bic     src2, src2, #7
146        lsl     tmp1, tmp1, #3          /* Bytes beyond alignment -> bits.  */
147        ldr     data1, [src1], #8
148        neg     tmp1, tmp1              /* Bits to alignment -64.  */
149        ldr     data2, [src2], #8
150        mov     tmp2, #~0
151#ifdef __AARCH64EB__
152        /* Big-endian.  Early bytes are at MSB.  */
153        lsl     tmp2, tmp2, tmp1        /* Shift (tmp1 & 63).  */
154#else
155        /* Little-endian.  Early bytes are at LSB.  */
156        lsr     tmp2, tmp2, tmp1        /* Shift (tmp1 & 63).  */
157#endif
158        orr     data1, data1, tmp2
159        orr     data2, data2, tmp2
160        b       .Lstart_realigned
161
162.Lmisaligned8:
163        /* We can do better than this.  */
164        ldrb    data1w, [src1], #1
165        ldrb    data2w, [src2], #1
166        cmp     data1w, #1
167        ccmp    data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
168        b.eq    .Lmisaligned8
169        sub     result, data1, data2
170        ret
171        .size   strcmp, .-strcmp
172
173#endif
Note: See TracBrowser for help on using the repository browser.