source: trunk/libs/newlib/src/newlib/libc/machine/h8300/memcpy.S @ 444

Last change on this file since 444 was 444, checked in by satin@…, 6 years ago

add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc

File size: 3.2 KB
Line 
1#include "setarch.h"
2
3#include "defines.h"
4
5#ifdef __H8300SX__
6
7        .global _memcpy
8_memcpy:
9        stm.l   er4-er6,@-er7
10
11        ; Set up source and destination pointers for movmd.
12        mov.l   er0,er6
13        mov.l   er1,er5
14
15        ; See whether the copy is long enough to use the movmd.l code.
16        ; Although the code can handle anything longer than 6 bytes,
17        ; it can be more expensive than movmd.b for small moves.
18        ; It's better to use a higher threshold to account for this.
19        ;
20        ; Note that the exact overhead of the movmd.l checks depends on
21        ; the alignments of the length and pointers.  They are faster when
22        ; er0 & 3 == er1 & 3 == er2 & 3, faster still when these values
23        ; are 0.  This threshold is a compromise between the various cases.
24        cmp     #16,LEN(r2)
25        blo     simple
26
27        ; movmd.l only works for even addresses.  If one of the addresses
28        ; is odd and the other is not, fall back on a simple move.
29        bld     #0,r5l
30        bxor    #0,r6l
31        bcs     simple
32
33        ; Make the addresses even.
34        bld     #0,r5l
35        bcc     word_aligned
36        mov.b   @er5+,@er6+
37        sub     #1,LEN(r2)
38
39word_aligned:
40        ; See if copying one word would make the first operand longword
41        ; aligned.  Although this is only really worthwhile if it aligns
42        ; the second operand as well, it's no worse if doesn't, so it
43        ; hardly seems worth the overhead of a "band" check.
44        bld     #1,r6l
45        bcc     fast_copy
46        mov.w   @er5+,@er6+
47        sub     #2,LEN(r2)
48
49fast_copy:
50        ; Set (e)r4 to the number of longwords to copy.
51        mov     LEN(r2),LEN(r4)
52        shlr    #2,LEN(r4)
53
54#ifdef __NORMAL_MODE__
55        ; 16-bit pointers and size_ts: one movmd.l is enough.  This code
56        ; is never reached with r4 == 0.
57        movmd.l
58        and.w   #3,r2
59simple:
60        mov.w   r2,r4
61        beq     quit
62        movmd.b
63quit:
64        rts/l   er4-er6
65#else
66        ; Skip the first iteration if the number of longwords is divisible
67        ; by 0x10000.
68        mov.w   r4,r4
69        beq     fast_loop_next
70
71        ; This loop copies r4 (!= 0) longwords the first time round and 65536
72        ; longwords on each iteration after that.
73fast_loop:
74        movmd.l
75fast_loop_next:
76        sub.w   #1,e4
77        bhs     fast_loop
78
79        ; Mop up any left-over bytes.  We could just fall through to the
80        ; simple code after the "and" but the version below is quicker
81        ; and only takes 10 more bytes.
82        and.w   #3,r2
83        beq     quit
84        mov.w   r2,r4
85        movmd.b
86quit:
87        rts/l   er4-er6
88
89simple:
90        ; Simple bytewise copy.  We need to handle all lengths, including zero.
91        mov.w   r2,r4
92        beq     simple_loop_next
93simple_loop:
94        movmd.b
95simple_loop_next:
96        sub.w   #1,e2
97        bhs     simple_loop
98        rts/l   er4-er6
99#endif
100
101#else
102
103        .global _memcpy
104_memcpy:
105;       MOVP    @(2/4,r7),A0P   ; dst
106;       MOVP    @(4/8,r7),A1P   ; src
107;       MOVP    @(6/12,r7),A2P  ; len
108
109        MOVP    A0P,A3P ; keep copy of final dst
110        ADDP    A2P,A0P ; point to end of dst
111        CMPP    A0P,A3P ; see if anything to do
112        beq     quit
113
114        ADDP    A2P,A1P ; point to end of src
115
116        ; lets see if we can do this in words
117        or      A0L,A2L ; or in the dst address
118        or      A3L,A2L ; or the length
119        or      A1L,A2L ; or the src address
120        btst    #0,A2L  ; see if the lsb is zero
121        bne     byteloop
122
123wordloop:
124#ifdef __NORMAL_MODE__
125        sub     #2,A1P
126#else
127        subs    #2,A1P          ; point to word
128#endif
129        mov.w   @A1P,A2         ; get word
130        mov.w   A2,@-A0P        ; save word
131        CMPP    A0P,A3P         ; at the front again ?
132        bne     wordloop
133        rts
134
135byteloop:
136#ifdef __NORMAL_MODE__
137        sub     #1,A1P
138#else
139        subs    #1,A1P          ; point to byte
140#endif
141        mov.b   @A1P,A2L        ; get byte
142        mov.b   A2L,@-A0P       ; save byte
143        CMPP    A0P,A3P         ; at the front again ?
144        bne     byteloop
145
146        ; return with A0 pointing to dst
147quit:   rts
148
149#endif
Note: See TracBrowser for help on using the repository browser.