source: trunk/libs/newlib/src/newlib/libc/machine/aarch64/memcmp.S @ 444

Last change on this file since 444 was 444, checked in by satin@…, 6 years ago

add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc

File size: 4.0 KB
Line 
1/*
2 * Copyright (c) 2017 ARM Ltd
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 *    products derived from this software without specific prior written
15 *    permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))
30/* See memcmp-stub.c  */
31#else
32
33/* Assumptions:
34 *
35 * ARMv8-a, AArch64, unaligned accesses.
36 */
37
38/* Parameters and result.  */
39#define src1            x0
40#define src2            x1
41#define limit           x2
42#define result          w0
43
44/* Internal variables.  */
45#define data1           x3
46#define data1w          w3
47#define data2           x4
48#define data2w          w4
49#define tmp1            x5
50
51        .macro def_fn f p2align=0
52        .text
53        .p2align \p2align
54        .global \f
55        .type \f, %function
56\f:
57        .endm
58
59/* Small inputs of less than 8 bytes are handled separately.  This allows the
60   main code to be sped up using unaligned loads since there are now at least
61   8 bytes to be compared.  If the first 8 bytes are equal, align src1.
62   This ensures each iteration does at most one unaligned access even if both
63   src1 and src2 are unaligned, and mutually aligned inputs behave as if
64   aligned.  After the main loop, process the last 8 bytes using unaligned
65   accesses.  */
66
67def_fn memcmp p2align=6
68        subs    limit, limit, 8
69        b.lo    .Lless8
70
71        /* Limit >= 8, so check first 8 bytes using unaligned loads.  */
72        ldr     data1, [src1], 8
73        ldr     data2, [src2], 8
74        and     tmp1, src1, 7
75        add     limit, limit, tmp1
76        cmp     data1, data2
77        bne     .Lreturn
78
79        /* Align src1 and adjust src2 with bytes not yet done.  */
80        sub     src1, src1, tmp1
81        sub     src2, src2, tmp1
82
83        subs    limit, limit, 8
84        b.ls    .Llast_bytes
85
86        /* Loop performing 8 bytes per iteration using aligned src1.
87           Limit is pre-decremented by 8 and must be larger than zero.
88           Exit if <= 8 bytes left to do or if the data is not equal.  */
89        .p2align 4
90.Lloop8:
91        ldr     data1, [src1], 8
92        ldr     data2, [src2], 8
93        subs    limit, limit, 8
94        ccmp    data1, data2, 0, hi  /* NZCV = 0b0000.  */
95        b.eq    .Lloop8
96
97        cmp     data1, data2
98        bne     .Lreturn
99
100        /* Compare last 1-8 bytes using unaligned access.  */
101.Llast_bytes:
102        ldr     data1, [src1, limit]
103        ldr     data2, [src2, limit]
104
105        /* Compare data bytes and set return value to 0, -1 or 1.  */
106.Lreturn:
107#ifndef __AARCH64EB__
108        rev     data1, data1
109        rev     data2, data2
110#endif
111        cmp     data1, data2
112.Lret_eq:
113        cset    result, ne
114        cneg    result, result, lo
115        ret
116
117        .p2align 4
118        /* Compare up to 8 bytes.  Limit is [-8..-1].  */
119.Lless8:
120        adds    limit, limit, 4
121        b.lo    .Lless4
122        ldr     data1w, [src1], 4
123        ldr     data2w, [src2], 4
124        cmp     data1w, data2w
125        b.ne    .Lreturn
126        sub     limit, limit, 4
127.Lless4:
128        adds    limit, limit, 4
129        beq     .Lret_eq
130.Lbyte_loop:
131        ldrb    data1w, [src1], 1
132        ldrb    data2w, [src2], 1
133        subs    limit, limit, 1
134        ccmp    data1w, data2w, 0, ne   /* NZCV = 0b0000.  */
135        b.eq    .Lbyte_loop
136        sub     result, data1w, data2w
137        ret
138
139        .size   memcmp, . - memcmp
140#endif
Note: See TracBrowser for help on using the repository browser.