1 | #include "setarch.h" |
---|
2 | |
---|
3 | #include "defines.h" |
---|
4 | |
---|
5 | #ifdef __H8300SX__ |
---|
6 | |
---|
7 | .global _memcpy |
---|
8 | _memcpy: |
---|
9 | stm.l er4-er6,@-er7 |
---|
10 | |
---|
11 | ; Set up source and destination pointers for movmd. |
---|
12 | mov.l er0,er6 |
---|
13 | mov.l er1,er5 |
---|
14 | |
---|
15 | ; See whether the copy is long enough to use the movmd.l code. |
---|
16 | ; Although the code can handle anything longer than 6 bytes, |
---|
17 | ; it can be more expensive than movmd.b for small moves. |
---|
18 | ; It's better to use a higher threshold to account for this. |
---|
19 | ; |
---|
20 | ; Note that the exact overhead of the movmd.l checks depends on |
---|
21 | ; the alignments of the length and pointers. They are faster when |
---|
22 | ; er0 & 3 == er1 & 3 == er2 & 3, faster still when these values |
---|
23 | ; are 0. This threshold is a compromise between the various cases. |
---|
24 | cmp #16,LEN(r2) |
---|
25 | blo simple |
---|
26 | |
---|
27 | ; movmd.l only works for even addresses. If one of the addresses |
---|
28 | ; is odd and the other is not, fall back on a simple move. |
---|
29 | bld #0,r5l |
---|
30 | bxor #0,r6l |
---|
31 | bcs simple |
---|
32 | |
---|
33 | ; Make the addresses even. |
---|
34 | bld #0,r5l |
---|
35 | bcc word_aligned |
---|
36 | mov.b @er5+,@er6+ |
---|
37 | sub #1,LEN(r2) |
---|
38 | |
---|
39 | word_aligned: |
---|
40 | ; See if copying one word would make the first operand longword |
---|
41 | ; aligned. Although this is only really worthwhile if it aligns |
---|
42 | ; the second operand as well, it's no worse if doesn't, so it |
---|
43 | ; hardly seems worth the overhead of a "band" check. |
---|
44 | bld #1,r6l |
---|
45 | bcc fast_copy |
---|
46 | mov.w @er5+,@er6+ |
---|
47 | sub #2,LEN(r2) |
---|
48 | |
---|
49 | fast_copy: |
---|
50 | ; Set (e)r4 to the number of longwords to copy. |
---|
51 | mov LEN(r2),LEN(r4) |
---|
52 | shlr #2,LEN(r4) |
---|
53 | |
---|
54 | #ifdef __NORMAL_MODE__ |
---|
55 | ; 16-bit pointers and size_ts: one movmd.l is enough. This code |
---|
56 | ; is never reached with r4 == 0. |
---|
57 | movmd.l |
---|
58 | and.w #3,r2 |
---|
59 | simple: |
---|
60 | mov.w r2,r4 |
---|
61 | beq quit |
---|
62 | movmd.b |
---|
63 | quit: |
---|
64 | rts/l er4-er6 |
---|
65 | #else |
---|
66 | ; Skip the first iteration if the number of longwords is divisible |
---|
67 | ; by 0x10000. |
---|
68 | mov.w r4,r4 |
---|
69 | beq fast_loop_next |
---|
70 | |
---|
71 | ; This loop copies r4 (!= 0) longwords the first time round and 65536 |
---|
72 | ; longwords on each iteration after that. |
---|
73 | fast_loop: |
---|
74 | movmd.l |
---|
75 | fast_loop_next: |
---|
76 | sub.w #1,e4 |
---|
77 | bhs fast_loop |
---|
78 | |
---|
79 | ; Mop up any left-over bytes. We could just fall through to the |
---|
80 | ; simple code after the "and" but the version below is quicker |
---|
81 | ; and only takes 10 more bytes. |
---|
82 | and.w #3,r2 |
---|
83 | beq quit |
---|
84 | mov.w r2,r4 |
---|
85 | movmd.b |
---|
86 | quit: |
---|
87 | rts/l er4-er6 |
---|
88 | |
---|
89 | simple: |
---|
90 | ; Simple bytewise copy. We need to handle all lengths, including zero. |
---|
91 | mov.w r2,r4 |
---|
92 | beq simple_loop_next |
---|
93 | simple_loop: |
---|
94 | movmd.b |
---|
95 | simple_loop_next: |
---|
96 | sub.w #1,e2 |
---|
97 | bhs simple_loop |
---|
98 | rts/l er4-er6 |
---|
99 | #endif |
---|
100 | |
---|
101 | #else |
---|
102 | |
---|
103 | .global _memcpy |
---|
104 | _memcpy: |
---|
105 | ; MOVP @(2/4,r7),A0P ; dst |
---|
106 | ; MOVP @(4/8,r7),A1P ; src |
---|
107 | ; MOVP @(6/12,r7),A2P ; len |
---|
108 | |
---|
109 | MOVP A0P,A3P ; keep copy of final dst |
---|
110 | ADDP A2P,A0P ; point to end of dst |
---|
111 | CMPP A0P,A3P ; see if anything to do |
---|
112 | beq quit |
---|
113 | |
---|
114 | ADDP A2P,A1P ; point to end of src |
---|
115 | |
---|
116 | ; lets see if we can do this in words |
---|
117 | or A0L,A2L ; or in the dst address |
---|
118 | or A3L,A2L ; or the length |
---|
119 | or A1L,A2L ; or the src address |
---|
120 | btst #0,A2L ; see if the lsb is zero |
---|
121 | bne byteloop |
---|
122 | |
---|
123 | wordloop: |
---|
124 | #ifdef __NORMAL_MODE__ |
---|
125 | sub #2,A1P |
---|
126 | #else |
---|
127 | subs #2,A1P ; point to word |
---|
128 | #endif |
---|
129 | mov.w @A1P,A2 ; get word |
---|
130 | mov.w A2,@-A0P ; save word |
---|
131 | CMPP A0P,A3P ; at the front again ? |
---|
132 | bne wordloop |
---|
133 | rts |
---|
134 | |
---|
135 | byteloop: |
---|
136 | #ifdef __NORMAL_MODE__ |
---|
137 | sub #1,A1P |
---|
138 | #else |
---|
139 | subs #1,A1P ; point to byte |
---|
140 | #endif |
---|
141 | mov.b @A1P,A2L ; get byte |
---|
142 | mov.b A2L,@-A0P ; save byte |
---|
143 | CMPP A0P,A3P ; at the front again ? |
---|
144 | bne byteloop |
---|
145 | |
---|
146 | ; return with A0 pointing to dst |
---|
147 | quit: rts |
---|
148 | |
---|
149 | #endif |
---|