source: trunk/libs/newlib/src/newlib/libc/machine/arc/memcpy-archs.S @ 444

Last change on this file since 444 was 444, checked in by satin@…, 6 years ago

add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc

File size: 7.6 KB
Line 
1/*
2   Copyright (c) 2015, Synopsys, Inc. All rights reserved.
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6
7   1) Redistributions of source code must retain the above copyright notice,
8   this list of conditions and the following disclaimer.
9
10   2) Redistributions in binary form must reproduce the above copyright notice,
11   this list of conditions and the following disclaimer in the documentation
12   and/or other materials provided with the distribution.
13
14   3) Neither the name of the Synopsys, Inc., nor the names of its contributors
15   may be used to endorse or promote products derived from this software
16   without specific prior written permission.
17
18   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28   POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/* This implementation is optimized for performance.  For code size a generic
32   implementation of this function from newlib/libc/string/memcpy.c will be
33   used.  */
34#if !defined (__OPTIMIZE_SIZE__) && !defined (PREFER_SIZE_OVER_SPEED)
35
36#include "asm.h"
37
38#if defined (__ARCHS__)
39
40#ifdef __LITTLE_ENDIAN__
41# define SHIFT_1(RX,RY,IMM)     asl     RX, RY, IMM     ; <<
42# define SHIFT_2(RX,RY,IMM)     lsr     RX, RY, IMM     ; >>
43# define MERGE_1(RX,RY,IMM)     asl     RX, RY, IMM
44# define MERGE_2(RX,RY,IMM)
45# define EXTRACT_1(RX,RY,IMM)   and     RX, RY, 0xFFFF
46# define EXTRACT_2(RX,RY,IMM)   lsr     RX, RY, IMM
47#else
48# define SHIFT_1(RX,RY,IMM)     lsr     RX, RY, IMM     ; >>
49# define SHIFT_2(RX,RY,IMM)     asl     RX, RY, IMM     ; <<
50# define MERGE_1(RX,RY,IMM)     asl     RX, RY, IMM     ; <<
51# define MERGE_2(RX,RY,IMM)     asl     RX, RY, IMM     ; <<
52# define EXTRACT_1(RX,RY,IMM)   lsr     RX, RY, IMM
53# define EXTRACT_2(RX,RY,IMM)   lsr     RX, RY, 0x08
54#endif
55
56#ifdef __ARC_LL64__
57# define PREFETCH_READ(RX)      prefetch        [RX, 56]
58# define PREFETCH_WRITE(RX)     prefetchw       [RX, 64]
59# define LOADX(DST,RX)          ldd.ab  DST, [RX, 8]
60# define STOREX(SRC,RX)         std.ab  SRC, [RX, 8]
61# define ZOLSHFT                5
62# define ZOLAND                 0x1F
63#else
64# define PREFETCH_READ(RX)      prefetch        [RX, 28]
65# define PREFETCH_WRITE(RX)     prefetchw       [RX, 32]
66# define LOADX(DST,RX)          ld.ab   DST, [RX, 4]
67# define STOREX(SRC,RX)         st.ab   SRC, [RX, 4]
68# define ZOLSHFT                4
69# define ZOLAND                 0xF
70#endif
71
72#ifdef __ARC_ALIGNED_ACCESS__
73ENTRY (memcpy)
74        prefetch  [r1]          ; Prefetch the read location
75        prefetchw [r0]          ; Prefetch the write location
76        mov.f   0, r2
77; if size is zero
78        jz.d    [blink]
79        mov     r3, r0          ; don't clobber ret val
80
81; if size <= 8
82        cmp     r2, 8
83        bls.d   @.Lsmallchunk
84        mov.f   lp_count, r2
85
86        and.f   r4, r0, 0x03
87        rsub    lp_count, r4, 4
88        lpnz    @.Laligndestination
89        ; LOOP BEGIN
90        ldb.ab  r5, [r1,1]
91        sub     r2, r2, 1
92        stb.ab  r5, [r3,1]
93.Laligndestination:
94
95; Check the alignment of the source
96        and.f   r4, r1, 0x03
97        bnz.d   @.Lsourceunaligned
98
99; CASE 0: Both source and destination are 32bit aligned
100; Convert len to Dwords, unfold x4
101        lsr.f   lp_count, r2, ZOLSHFT
102        lpnz    @.Lcopy32_64bytes
103        ; LOOP START
104        LOADX (r6, r1)
105        PREFETCH_READ (r1)
106        PREFETCH_WRITE (r3)
107        LOADX (r8, r1)
108        LOADX (r10, r1)
109        LOADX (r4, r1)
110        STOREX (r6, r3)
111        STOREX (r8, r3)
112        STOREX (r10, r3)
113        STOREX (r4, r3)
114.Lcopy32_64bytes:
115
116        and.f   lp_count, r2, ZOLAND ;Last remaining 31 bytes
117.Lsmallchunk:
118        lpnz    @.Lcopyremainingbytes
119        ; LOOP START
120        ldb.ab  r5, [r1,1]
121        stb.ab  r5, [r3,1]
122.Lcopyremainingbytes:
123
124        j       [blink]
125; END CASE 0
126
127.Lsourceunaligned:
128        cmp     r4, 2
129        beq.d   @.LunalignedOffby2
130        sub     r2, r2, 1
131
132        bhi.d   @.LunalignedOffby3
133        ldb.ab  r5, [r1, 1]
134
135; CASE 1: The source is unaligned, off by 1
136        ; Hence I need to read 1 byte for a 16bit alignment
137        ; and 2bytes to reach 32bit alignment
138        ldh.ab  r6, [r1, 2]
139        sub     r2, r2, 2
140        ; Convert to words, unfold x2
141        lsr.f   lp_count, r2, 3
142        MERGE_1 (r6, r6, 8)
143        MERGE_2 (r5, r5, 24)
144        or      r5, r5, r6
145
146        ; Both src and dst are aligned
147        lpnz    @.Lcopy8bytes_1
148        ; LOOP START
149        ld.ab   r6, [r1, 4]
150        prefetch [r1, 28]       ;Prefetch the next read location
151        ld.ab   r8, [r1,4]
152        prefetchw [r3, 32]      ;Prefetch the next write location
153
154        SHIFT_1 (r7, r6, 24)
155        or      r7, r7, r5
156        SHIFT_2 (r5, r6, 8)
157
158        SHIFT_1 (r9, r8, 24)
159        or      r9, r9, r5
160        SHIFT_2 (r5, r8, 8)
161
162        st.ab   r7, [r3, 4]
163        st.ab   r9, [r3, 4]
164.Lcopy8bytes_1:
165
166        ; Write back the remaining 16bits
167        EXTRACT_1 (r6, r5, 16)
168        sth.ab  r6, [r3, 2]
169        ; Write back the remaining 8bits
170        EXTRACT_2 (r5, r5, 16)
171        stb.ab  r5, [r3, 1]
172
173        and.f   lp_count, r2, 0x07 ;Last 8bytes
174        lpnz    @.Lcopybytewise_1
175        ; LOOP START
176        ldb.ab  r6, [r1,1]
177        stb.ab  r6, [r3,1]
178.Lcopybytewise_1:
179        j       [blink]
180
181.LunalignedOffby2:
182; CASE 2: The source is unaligned, off by 2
183        ldh.ab  r5, [r1, 2]
184        sub     r2, r2, 1
185
186        ; Both src and dst are aligned
187        ; Convert to words, unfold x2
188        lsr.f   lp_count, r2, 3
189#ifdef __BIG_ENDIAN__
190        asl.nz  r5, r5, 16
191#endif
192        lpnz    @.Lcopy8bytes_2
193        ; LOOP START
194        ld.ab   r6, [r1, 4]
195        prefetch [r1, 28]       ;Prefetch the next read location
196        ld.ab   r8, [r1,4]
197        prefetchw [r3, 32]      ;Prefetch the next write location
198
199        SHIFT_1 (r7, r6, 16)
200        or      r7, r7, r5
201        SHIFT_2 (r5, r6, 16)
202
203        SHIFT_1 (r9, r8, 16)
204        or      r9, r9, r5
205        SHIFT_2 (r5, r8, 16)
206
207        st.ab   r7, [r3, 4]
208        st.ab   r9, [r3, 4]
209.Lcopy8bytes_2:
210
211#ifdef __BIG_ENDIAN__
212        lsr.nz  r5, r5, 16
213#endif
214        sth.ab  r5, [r3, 2]
215
216        and.f   lp_count, r2, 0x07 ;Last 8bytes
217        lpnz    @.Lcopybytewise_2
218        ; LOOP START
219        ldb.ab  r6, [r1,1]
220        stb.ab  r6, [r3,1]
221.Lcopybytewise_2:
222        j       [blink]
223
224.LunalignedOffby3:
225; CASE 3: The source is unaligned, off by 3
226; Hence, I need to read 1byte for achieve the 32bit alignment
227
228        ; Both src and dst are aligned
229        ; Convert to words, unfold x2
230        lsr.f   lp_count, r2, 3
231#ifdef __BIG_ENDIAN__
232        asl.ne  r5, r5, 24
233#endif
234        lpnz    @.Lcopy8bytes_3
235        ; LOOP START
236        ld.ab   r6, [r1, 4]
237        prefetch [r1, 28]       ;Prefetch the next read location
238        ld.ab   r8, [r1,4]
239        prefetchw [r3, 32]      ;Prefetch the next write location
240
241        SHIFT_1 (r7, r6, 8)
242        or      r7, r7, r5
243        SHIFT_2 (r5, r6, 24)
244
245        SHIFT_1 (r9, r8, 8)
246        or      r9, r9, r5
247        SHIFT_2 (r5, r8, 24)
248
249        st.ab   r7, [r3, 4]
250        st.ab   r9, [r3, 4]
251.Lcopy8bytes_3:
252
253#ifdef __BIG_ENDIAN__
254        lsr.nz  r5, r5, 24
255#endif
256        stb.ab  r5, [r3, 1]
257
258        and.f   lp_count, r2, 0x07 ;Last 8bytes
259        lpnz    @.Lcopybytewise_3
260        ; LOOP START
261        ldb.ab  r6, [r1,1]
262        stb.ab  r6, [r3,1]
263.Lcopybytewise_3:
264        j       [blink]
265
266ENDFUNC (memcpy)
267
268#else
269
270ENTRY(memcpy)
271        prefetch  [r1]          ; Prefetch the read location
272        prefetchw [r0]          ; Prefetch the write location
273        mov.f   0, r2
274;;; if size is zero
275        jz.d    [blink]
276        mov     r3, r0          ; don't clobber ret val
277
278;;; if size <= 8
279        cmp     r2, 8
280        bls.d   @.Lsmallchunk
281        mov.f   lp_count, r2
282
283;;; Convert len to Dwords, unfold x4
284        lsr.f   lp_count, r2, ZOLSHFT
285        lpnz    @.Lcopyfast
286        ;; LOOP START
287        LOADX (r6, r1)
288        PREFETCH_READ (r1)
289        PREFETCH_WRITE (r3)
290        LOADX (r8, r1)
291        LOADX (r10, r1)
292        LOADX (r4, r1)
293        STOREX (r6, r3)
294        STOREX (r8, r3)
295        STOREX (r10, r3)
296        STOREX (r4, r3)
297.Lcopyfast:
298
299#ifdef __ARC_LL64__
300        and     r2, r2, ZOLAND  ;Remaining 31 bytes
301        lsr.f   lp_count, r2, 3 ;Convert to 64-bit words.
302        lpnz    @.Lcopy64b
303        ;; LOOP START
304        ldd.ab  r6,[r1,8]
305        std.ab  r6,[r3,8]
306.Lcopy64b:
307
308        and.f   lp_count, r2, 0x07 ; Last 7 bytes
309#else
310        and.f   lp_count, r2, ZOLAND
311#endif
312
313.Lsmallchunk:
314        lpnz    @.Lcopyremainingbytes
315        ;; LOOP START
316        ldb.ab  r5, [r1,1]
317        stb.ab  r5, [r3,1]
318.Lcopyremainingbytes:
319
320        j       [blink]
321
322ENDFUNC(memcpy)
323#endif
324
325#endif /* __ARCHS__ */
326
327#endif /* !__OPTIMIZE_SIZE__ && !PREFER_SIZE_OVER_SPEED */
Note: See TracBrowser for help on using the repository browser.