1 | /* |
---|
2 | Copyright (c) 2015, Synopsys, Inc. All rights reserved. |
---|
3 | |
---|
4 | Redistribution and use in source and binary forms, with or without |
---|
5 | modification, are permitted provided that the following conditions are met: |
---|
6 | |
---|
7 | 1) Redistributions of source code must retain the above copyright notice, |
---|
8 | this list of conditions and the following disclaimer. |
---|
9 | |
---|
10 | 2) Redistributions in binary form must reproduce the above copyright notice, |
---|
11 | this list of conditions and the following disclaimer in the documentation |
---|
12 | and/or other materials provided with the distribution. |
---|
13 | |
---|
14 | 3) Neither the name of the Synopsys, Inc., nor the names of its contributors |
---|
15 | may be used to endorse or promote products derived from this software |
---|
16 | without specific prior written permission. |
---|
17 | |
---|
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
---|
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
21 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
---|
22 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
---|
23 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
---|
24 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
---|
25 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
---|
26 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
---|
27 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
---|
28 | POSSIBILITY OF SUCH DAMAGE. |
---|
29 | */ |
---|
30 | |
---|
31 | /* This implementation is optimized for performance. For code size a generic |
---|
32 | implementation of this function from newlib/libc/string/strcmp.c will be |
---|
33 | used. */ |
---|
34 | #if !defined (__OPTIMIZE_SIZE__) && !defined (PREFER_SIZE_OVER_SPEED) |
---|
35 | |
---|
36 | #include "asm.h" |
---|
37 | |
---|
38 | /* This is optimized primarily for the ARC700. |
---|
39 | It would be possible to speed up the loops by one cycle / word |
---|
40 | respective one cycle / byte by forcing double source 1 alignment, unrolling |
---|
41 | by a factor of two, and speculatively loading the second word / byte of |
---|
42 | source 1; however, that would increase the overhead for loop setup / finish, |
---|
43 | and strcmp might often terminate early. */ |
---|
44 | #ifndef __ARCHS__ |
---|
45 | |
---|
46 | ENTRY (strcmp) |
---|
47 | or r2,r0,r1 |
---|
48 | bmsk_s r2,r2,1 |
---|
49 | brne_l r2,0,.Lcharloop |
---|
50 | mov_s r12,0x01010101 |
---|
51 | ror r5,r12 |
---|
52 | .Lwordloop: |
---|
53 | ld.ab r2,[r0,4] |
---|
54 | ld.ab r3,[r1,4] |
---|
55 | nop_s |
---|
56 | sub r4,r2,r12 |
---|
57 | bic r4,r4,r2 |
---|
58 | and r4,r4,r5 |
---|
59 | brne_l r4,0,.Lfound0 |
---|
60 | breq r2,r3,.Lwordloop |
---|
61 | #ifdef __LITTLE_ENDIAN__ |
---|
62 | xor r0,r2,r3 ; mask for difference |
---|
63 | sub_s r1,r0,1 |
---|
64 | bic_s r0,r0,r1 ; mask for least significant difference bit |
---|
65 | sub r1,r5,r0 |
---|
66 | xor r0,r5,r1 ; mask for least significant difference byte |
---|
67 | and_s r2,r2,r0 |
---|
68 | and_s r3,r3,r0 |
---|
69 | #endif /* LITTLE ENDIAN */ |
---|
70 | cmp_s r2,r3 |
---|
71 | mov_s r0,1 |
---|
72 | j_s.d [blink] |
---|
73 | bset.lo r0,r0,31 |
---|
74 | |
---|
75 | .balign 4 |
---|
76 | #ifdef __LITTLE_ENDIAN__ |
---|
77 | .Lfound0: |
---|
78 | xor r0,r2,r3 ; mask for difference |
---|
79 | or r0,r0,r4 ; or in zero indicator |
---|
80 | sub_s r1,r0,1 |
---|
81 | bic_s r0,r0,r1 ; mask for least significant difference bit |
---|
82 | sub r1,r5,r0 |
---|
83 | xor r0,r5,r1 ; mask for least significant difference byte |
---|
84 | and_s r2,r2,r0 |
---|
85 | and_s r3,r3,r0 |
---|
86 | sub.f r0,r2,r3 |
---|
87 | mov.hi r0,1 |
---|
88 | j_s.d [blink] |
---|
89 | bset.lo r0,r0,31 |
---|
90 | #else /* BIG ENDIAN */ |
---|
91 | /* The zero-detection above can mis-detect 0x01 bytes as zeroes |
---|
92 | because of carry-propagateion from a lower significant zero byte. |
---|
93 | We can compensate for this by checking that bit0 is zero. |
---|
94 | This compensation is not necessary in the step where we |
---|
95 | get a low estimate for r2, because in any affected bytes |
---|
96 | we already have 0x00 or 0x01, which will remain unchanged |
---|
97 | when bit 7 is cleared. */ |
---|
98 | .balign 4 |
---|
99 | .Lfound0: |
---|
100 | #ifdef __ARC_BARREL_SHIFTER__ |
---|
101 | lsr r0,r4,8 |
---|
102 | lsr_s r1,r2 |
---|
103 | bic_s r2,r2,r0 ; get low estimate for r2 and get ... |
---|
104 | bic_s r0,r0,r1 ; <this is the adjusted mask for zeros> |
---|
105 | or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ... |
---|
106 | cmp_s r3,r2 ; ... be independent of trailing garbage |
---|
107 | or_s r2,r2,r0 ; likewise for r3 > r2 |
---|
108 | bic_s r3,r3,r0 |
---|
109 | rlc r0,0 ; r0 := r2 > r3 ? 1 : 0 |
---|
110 | cmp_s r2,r3 |
---|
111 | j_s.d [blink] |
---|
112 | bset.lo r0,r0,31 |
---|
113 | #else /* __ARC_BARREL_SHIFTER__ */ |
---|
114 | /* Fall through to .Lcharloop. */ |
---|
115 | sub_s r0,r0,4 |
---|
116 | sub_s r1,r1,4 |
---|
117 | #endif /* __ARC_BARREL_SHIFTER__ */ |
---|
118 | #endif /* ENDIAN */ |
---|
119 | |
---|
120 | .balign 4 |
---|
121 | .Lcharloop: |
---|
122 | ldb.ab r2,[r0,1] |
---|
123 | ldb.ab r3,[r1,1] |
---|
124 | nop_s |
---|
125 | breq_l r2,0,.Lcmpend |
---|
126 | breq r2,r3,.Lcharloop |
---|
127 | .Lcmpend: |
---|
128 | j_s.d [blink] |
---|
129 | sub r0,r2,r3 |
---|
130 | ENDFUNC (strcmp) |
---|
131 | #endif /* !__ARCHS__ */ |
---|
132 | |
---|
133 | #endif /* !__OPTIMIZE_SIZE__ && !PREFER_SIZE_OVER_SPEED */ |
---|