[444] | 1 | /* |
---|
| 2 | * Copyright (c) 2014 |
---|
| 3 | * Imagination Technologies Limited. |
---|
| 4 | * |
---|
| 5 | * Redistribution and use in source and binary forms, with or without |
---|
| 6 | * modification, are permitted provided that the following conditions |
---|
| 7 | * are met: |
---|
| 8 | * 1. Redistributions of source code must retain the above copyright |
---|
| 9 | * notice, this list of conditions and the following disclaimer. |
---|
| 10 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
| 11 | * notice, this list of conditions and the following disclaimer in the |
---|
| 12 | * documentation and/or other materials provided with the distribution. |
---|
| 13 | * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its |
---|
| 14 | * contributors may be used to endorse or promote products derived from |
---|
| 15 | * this software without specific prior written permission. |
---|
| 16 | * |
---|
| 17 | * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND |
---|
| 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
| 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
| 20 | * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE |
---|
| 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
| 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
| 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
| 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
| 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
| 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
| 27 | * SUCH DAMAGE. |
---|
| 28 | */ |
---|
| 29 | |
---|
| 30 | #ifdef ANDROID_CHANGES |
---|
| 31 | # include "machine/asm.h" |
---|
| 32 | # include "machine/regdef.h" |
---|
| 33 | #elif _LIBC |
---|
| 34 | # include <sysdep.h> |
---|
| 35 | # include <regdef.h> |
---|
| 36 | # include <sys/asm.h> |
---|
| 37 | #elif _COMPILING_NEWLIB |
---|
| 38 | # include "machine/asm.h" |
---|
| 39 | # include "machine/regdef.h" |
---|
| 40 | #else |
---|
| 41 | # include <regdef.h> |
---|
| 42 | # include <sys/asm.h> |
---|
| 43 | #endif |
---|
| 44 | |
---|
| 45 | /* Technically strcmp should not read past the end of the strings being |
---|
| 46 | compared. We will read a full word that may contain excess bits beyond |
---|
| 47 | the NULL string terminator but unless ENABLE_READAHEAD is set, we will not |
---|
| 48 | read the next word after the end of string. Setting ENABLE_READAHEAD will |
---|
| 49 | improve performance but is technically illegal based on the definition of |
---|
| 50 | strcmp. */ |
---|
| 51 | #ifdef ENABLE_READAHEAD |
---|
| 52 | # define DELAY_READ |
---|
| 53 | #else |
---|
| 54 | # define DELAY_READ nop |
---|
| 55 | #endif |
---|
| 56 | |
---|
| 57 | /* Testing on a little endian machine showed using CLZ was a |
---|
| 58 | performance loss, so we are not turning it on by default. */ |
---|
| 59 | #if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) |
---|
| 60 | # define USE_CLZ |
---|
| 61 | #endif |
---|
| 62 | |
---|
| 63 | /* Some asm.h files do not have the L macro definition. */ |
---|
| 64 | #ifndef L |
---|
| 65 | # if _MIPS_SIM == _ABIO32 |
---|
| 66 | # define L(label) $L ## label |
---|
| 67 | # else |
---|
| 68 | # define L(label) .L ## label |
---|
| 69 | # endif |
---|
| 70 | #endif |
---|
| 71 | |
---|
| 72 | /* Some asm.h files do not have the PTR_ADDIU macro definition. */ |
---|
| 73 | #ifndef PTR_ADDIU |
---|
| 74 | # ifdef USE_DOUBLE |
---|
| 75 | # define PTR_ADDIU daddiu |
---|
| 76 | # else |
---|
| 77 | # define PTR_ADDIU addiu |
---|
| 78 | # endif |
---|
| 79 | #endif |
---|
| 80 | |
---|
| 81 | /* Allow the routine to be named something else if desired. */ |
---|
| 82 | #ifndef STRCMP_NAME |
---|
| 83 | # define STRCMP_NAME strcmp |
---|
| 84 | #endif |
---|
| 85 | |
---|
| 86 | #ifdef ANDROID_CHANGES |
---|
| 87 | LEAF(STRCMP_NAME, 0) |
---|
| 88 | #else |
---|
| 89 | LEAF(STRCMP_NAME) |
---|
| 90 | #endif |
---|
| 91 | .set nomips16 |
---|
| 92 | .set noreorder |
---|
| 93 | |
---|
| 94 | or t0, a0, a1 |
---|
| 95 | andi t0,0x3 |
---|
| 96 | bne t0, zero, L(byteloop) |
---|
| 97 | |
---|
| 98 | /* Both strings are 4 byte aligned at this point. */ |
---|
| 99 | |
---|
| 100 | lui t8, 0x0101 |
---|
| 101 | ori t8, t8, 0x0101 |
---|
| 102 | lui t9, 0x7f7f |
---|
| 103 | ori t9, 0x7f7f |
---|
| 104 | |
---|
| 105 | #define STRCMP32(OFFSET) \ |
---|
| 106 | lw v0, OFFSET(a0); \ |
---|
| 107 | lw v1, OFFSET(a1); \ |
---|
| 108 | subu t0, v0, t8; \ |
---|
| 109 | bne v0, v1, L(worddiff); \ |
---|
| 110 | nor t1, v0, t9; \ |
---|
| 111 | and t0, t0, t1; \ |
---|
| 112 | bne t0, zero, L(returnzero) |
---|
| 113 | |
---|
| 114 | L(wordloop): |
---|
| 115 | STRCMP32(0) |
---|
| 116 | DELAY_READ |
---|
| 117 | STRCMP32(4) |
---|
| 118 | DELAY_READ |
---|
| 119 | STRCMP32(8) |
---|
| 120 | DELAY_READ |
---|
| 121 | STRCMP32(12) |
---|
| 122 | DELAY_READ |
---|
| 123 | STRCMP32(16) |
---|
| 124 | DELAY_READ |
---|
| 125 | STRCMP32(20) |
---|
| 126 | DELAY_READ |
---|
| 127 | STRCMP32(24) |
---|
| 128 | DELAY_READ |
---|
| 129 | STRCMP32(28) |
---|
| 130 | PTR_ADDIU a0, a0, 32 |
---|
| 131 | b L(wordloop) |
---|
| 132 | PTR_ADDIU a1, a1, 32 |
---|
| 133 | |
---|
| 134 | L(returnzero): |
---|
| 135 | j ra |
---|
| 136 | move v0, zero |
---|
| 137 | |
---|
| 138 | L(worddiff): |
---|
| 139 | #ifdef USE_CLZ |
---|
| 140 | subu t0, v0, t8 |
---|
| 141 | nor t1, v0, t9 |
---|
| 142 | and t1, t0, t1 |
---|
| 143 | xor t0, v0, v1 |
---|
| 144 | or t0, t0, t1 |
---|
| 145 | # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
---|
| 146 | wsbh t0, t0 |
---|
| 147 | rotr t0, t0, 16 |
---|
| 148 | # endif |
---|
| 149 | clz t1, t0 |
---|
| 150 | and t1, 0xf8 |
---|
| 151 | # if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
---|
| 152 | neg t1 |
---|
| 153 | addu t1, 24 |
---|
| 154 | # endif |
---|
| 155 | rotrv v0, v0, t1 |
---|
| 156 | rotrv v1, v1, t1 |
---|
| 157 | and v0, v0, 0xff |
---|
| 158 | and v1, v1, 0xff |
---|
| 159 | j ra |
---|
| 160 | subu v0, v0, v1 |
---|
| 161 | #else /* USE_CLZ */ |
---|
| 162 | # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
---|
| 163 | andi t0, v0, 0xff |
---|
| 164 | beq t0, zero, L(wexit01) |
---|
| 165 | andi t1, v1, 0xff |
---|
| 166 | bne t0, t1, L(wexit01) |
---|
| 167 | |
---|
| 168 | srl t8, v0, 8 |
---|
| 169 | srl t9, v1, 8 |
---|
| 170 | andi t8, t8, 0xff |
---|
| 171 | beq t8, zero, L(wexit89) |
---|
| 172 | andi t9, t9, 0xff |
---|
| 173 | bne t8, t9, L(wexit89) |
---|
| 174 | |
---|
| 175 | srl t0, v0, 16 |
---|
| 176 | srl t1, v1, 16 |
---|
| 177 | andi t0, t0, 0xff |
---|
| 178 | beq t0, zero, L(wexit01) |
---|
| 179 | andi t1, t1, 0xff |
---|
| 180 | bne t0, t1, L(wexit01) |
---|
| 181 | |
---|
| 182 | srl t8, v0, 24 |
---|
| 183 | srl t9, v1, 24 |
---|
| 184 | # else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ |
---|
| 185 | srl t0, v0, 24 |
---|
| 186 | beq t0, zero, L(wexit01) |
---|
| 187 | srl t1, v1, 24 |
---|
| 188 | bne t0, t1, L(wexit01) |
---|
| 189 | |
---|
| 190 | srl t8, v0, 16 |
---|
| 191 | srl t9, v1, 16 |
---|
| 192 | andi t8, t8, 0xff |
---|
| 193 | beq t8, zero, L(wexit89) |
---|
| 194 | andi t9, t9, 0xff |
---|
| 195 | bne t8, t9, L(wexit89) |
---|
| 196 | |
---|
| 197 | srl t0, v0, 8 |
---|
| 198 | srl t1, v1, 8 |
---|
| 199 | andi t0, t0, 0xff |
---|
| 200 | beq t0, zero, L(wexit01) |
---|
| 201 | andi t1, t1, 0xff |
---|
| 202 | bne t0, t1, L(wexit01) |
---|
| 203 | |
---|
| 204 | andi t8, v0, 0xff |
---|
| 205 | andi t9, v1, 0xff |
---|
| 206 | # endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ |
---|
| 207 | |
---|
| 208 | L(wexit89): |
---|
| 209 | j ra |
---|
| 210 | subu v0, t8, t9 |
---|
| 211 | L(wexit01): |
---|
| 212 | j ra |
---|
| 213 | subu v0, t0, t1 |
---|
| 214 | #endif /* USE_CLZ */ |
---|
| 215 | |
---|
| 216 | /* It might seem better to do the 'beq' instruction between the two 'lbu' |
---|
| 217 | instructions so that the nop is not needed but testing showed that this |
---|
| 218 | code is actually faster (based on glibc strcmp test). */ |
---|
| 219 | #define BYTECMP01(OFFSET) \ |
---|
| 220 | lbu v0, OFFSET(a0); \ |
---|
| 221 | lbu v1, OFFSET(a1); \ |
---|
| 222 | beq v0, zero, L(bexit01); \ |
---|
| 223 | nop; \ |
---|
| 224 | bne v0, v1, L(bexit01) |
---|
| 225 | |
---|
| 226 | #define BYTECMP89(OFFSET) \ |
---|
| 227 | lbu t8, OFFSET(a0); \ |
---|
| 228 | lbu t9, OFFSET(a1); \ |
---|
| 229 | beq t8, zero, L(bexit89); \ |
---|
| 230 | nop; \ |
---|
| 231 | bne t8, t9, L(bexit89) |
---|
| 232 | |
---|
| 233 | L(byteloop): |
---|
| 234 | BYTECMP01(0) |
---|
| 235 | BYTECMP89(1) |
---|
| 236 | BYTECMP01(2) |
---|
| 237 | BYTECMP89(3) |
---|
| 238 | BYTECMP01(4) |
---|
| 239 | BYTECMP89(5) |
---|
| 240 | BYTECMP01(6) |
---|
| 241 | BYTECMP89(7) |
---|
| 242 | PTR_ADDIU a0, a0, 8 |
---|
| 243 | b L(byteloop) |
---|
| 244 | PTR_ADDIU a1, a1, 8 |
---|
| 245 | |
---|
| 246 | L(bexit01): |
---|
| 247 | j ra |
---|
| 248 | subu v0, v0, v1 |
---|
| 249 | L(bexit89): |
---|
| 250 | j ra |
---|
| 251 | subu v0, t8, t9 |
---|
| 252 | |
---|
| 253 | .set at |
---|
| 254 | .set reorder |
---|
| 255 | |
---|
| 256 | END(STRCMP_NAME) |
---|
| 257 | #ifndef ANDROID_CHANGES |
---|
| 258 | # ifdef _LIBC |
---|
| 259 | libc_hidden_builtin_def (STRCMP_NAME) |
---|
| 260 | # endif |
---|
| 261 | #endif |
---|