1 | /* a-memcpy.s -- memcpy, optimised for m68k asm |
---|
2 | * |
---|
3 | * Copyright (c) 2007 mocom software GmbH & Co KG) |
---|
4 | * |
---|
5 | * The authors hereby grant permission to use, copy, modify, distribute, |
---|
6 | * and license this software and its documentation for any purpose, provided |
---|
7 | * that existing copyright notices are retained in all copies and that this |
---|
8 | * notice is included verbatim in any distributions. No written agreement, |
---|
9 | * license, or royalty fee is required for any of the authorized uses. |
---|
10 | * Modifications to this software may be copyrighted by their authors |
---|
11 | * and need not follow the licensing terms described here, provided that |
---|
12 | * the new terms are clearly indicated on the first page of each file where |
---|
13 | * they apply. |
---|
14 | */ |
---|
15 | |
---|
16 | #include "m68kasm.h" |
---|
17 | |
---|
18 | #if defined (__mcoldfire__) || defined (__mc68010__) || defined (__mc68020__) || defined (__mc68030__) || defined (__mc68040__) || defined (__mc68060__) |
---|
19 | # define MISALIGNED_OK 1 |
---|
20 | #else |
---|
21 | # define MISALIGNED_OK 0 |
---|
22 | #endif |
---|
23 | |
---|
24 | .text |
---|
25 | .align 4 |
---|
26 | |
---|
27 | .globl SYM(memcpy) |
---|
28 | .type SYM(memcpy), @function |
---|
29 | |
---|
30 | /* memcpy, optimised |
---|
31 | * |
---|
32 | * strategy: |
---|
33 | * - no argument testing (the original memcpy from the GNU lib does |
---|
34 | * no checking either) |
---|
35 | * - make sure the destination pointer (the write pointer) is long word |
---|
36 | * aligned. This is the best you can do, because writing to unaligned |
---|
37 | * addresses can be the most costfull thing you could do. |
---|
38 | * - Once you have figured that out, we do a little loop unrolling |
---|
39 | * to further improve speed. |
---|
40 | */ |
---|
41 | |
---|
42 | SYM(memcpy): |
---|
43 | move.l 4(sp),a0 | dest ptr |
---|
44 | move.l 8(sp),a1 | src ptr |
---|
45 | move.l 12(sp),d1 | len |
---|
46 | cmp.l #8,d1 | if fewer than 8 bytes to transfer, |
---|
47 | blo .Lresidue | do not optimise |
---|
48 | |
---|
49 | #if !MISALIGNED_OK |
---|
50 | /* Goto .Lresidue if either dest or src is not 4-byte aligned */ |
---|
51 | move.l a0,d0 |
---|
52 | and.l #3,d0 |
---|
53 | bne .Lresidue |
---|
54 | move.l a1,d0 |
---|
55 | and.l #3,d0 |
---|
56 | bne .Lresidue |
---|
57 | #else /* MISALIGNED_OK */ |
---|
58 | /* align dest */ |
---|
59 | move.l a0,d0 | copy of dest |
---|
60 | neg.l d0 |
---|
61 | and.l #3,d0 | look for the lower two only |
---|
62 | beq 2f | is aligned? |
---|
63 | sub.l d0,d1 |
---|
64 | lsr.l #1,d0 | word align needed? |
---|
65 | bcc 1f |
---|
66 | move.b (a1)+,(a0)+ |
---|
67 | 1: |
---|
68 | lsr.l #1,d0 | long align needed? |
---|
69 | bcc 2f |
---|
70 | move.w (a1)+,(a0)+ |
---|
71 | 2: |
---|
72 | #endif /* !MISALIGNED_OK */ |
---|
73 | |
---|
74 | /* long word transfers */ |
---|
75 | move.l d1,d0 |
---|
76 | and.l #3,d1 | byte residue |
---|
77 | lsr.l #3,d0 |
---|
78 | bcc 1f | carry set for 4-byte residue |
---|
79 | move.l (a1)+,(a0)+ |
---|
80 | 1: |
---|
81 | lsr.l #1,d0 | number of 16-byte transfers |
---|
82 | bcc .Lcopy | carry set for 8-byte residue |
---|
83 | bra .Lcopy8 |
---|
84 | |
---|
85 | 1: |
---|
86 | move.l (a1)+,(a0)+ |
---|
87 | move.l (a1)+,(a0)+ |
---|
88 | .Lcopy8: |
---|
89 | move.l (a1)+,(a0)+ |
---|
90 | move.l (a1)+,(a0)+ |
---|
91 | .Lcopy: |
---|
92 | #if !defined (__mcoldfire__) |
---|
93 | dbra d0,1b |
---|
94 | sub.l #0x10000,d0 |
---|
95 | #else |
---|
96 | subq.l #1,d0 |
---|
97 | #endif |
---|
98 | bpl 1b |
---|
99 | bra .Lresidue |
---|
100 | |
---|
101 | 1: |
---|
102 | move.b (a1)+,(a0)+ | move residue bytes |
---|
103 | |
---|
104 | .Lresidue: |
---|
105 | #if !defined (__mcoldfire__) |
---|
106 | dbra d1,1b | loop until done |
---|
107 | #else |
---|
108 | subq.l #1,d1 |
---|
109 | bpl 1b |
---|
110 | #endif |
---|
111 | move.l 4(sp),d0 | return value |
---|
112 | rts |
---|