source: trunk/libs/newlib/src/newlib/libc/machine/m68k/memcpy.S @ 444

Last change on this file since 444 was 444, checked in by satin@…, 6 years ago

add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc

File size: 2.8 KB
Line 
1/* a-memcpy.s -- memcpy, optimised for m68k asm
2 *
3 * Copyright (c) 2007 mocom software GmbH & Co KG)
4 *
5 * The authors hereby grant permission to use, copy, modify, distribute,
6 * and license this software and its documentation for any purpose, provided
7 * that existing copyright notices are retained in all copies and that this
8 * notice is included verbatim in any distributions. No written agreement,
9 * license, or royalty fee is required for any of the authorized uses.
10 * Modifications to this software may be copyrighted by their authors
11 * and need not follow the licensing terms described here, provided that
12 * the new terms are clearly indicated on the first page of each file where
13 * they apply.
14 */
15
16#include "m68kasm.h"
17
18#if defined (__mcoldfire__) || defined (__mc68010__) || defined (__mc68020__) || defined (__mc68030__) || defined (__mc68040__) || defined (__mc68060__)
19# define MISALIGNED_OK 1
20#else
21# define MISALIGNED_OK 0
22#endif
23       
24        .text
25        .align  4
26
27        .globl  SYM(memcpy)
28        .type   SYM(memcpy), @function
29
30/*   memcpy, optimised
31 *
32 *   strategy:
33 *       - no argument testing (the original memcpy from the GNU lib does
34 *         no checking either)
35 *       - make sure the destination pointer (the write pointer) is long word
36 *         aligned. This is the best you can do, because writing to unaligned
37 *         addresses can be the most costfull thing you could do.
38 *       - Once you have figured that out, we do a little loop unrolling
39 *         to further improve speed.
40 */
41
42SYM(memcpy):
43        move.l  4(sp),a0        | dest ptr
44        move.l  8(sp),a1        | src ptr
45        move.l  12(sp),d1       | len
46        cmp.l   #8,d1           | if fewer than 8 bytes to transfer,
47        blo     .Lresidue       | do not optimise
48
49#if !MISALIGNED_OK
50        /* Goto .Lresidue if either dest or src is not 4-byte aligned */
51        move.l  a0,d0
52        and.l   #3,d0
53        bne     .Lresidue
54        move.l  a1,d0
55        and.l   #3,d0
56        bne     .Lresidue
57#else /* MISALIGNED_OK */
58        /* align dest */
59        move.l  a0,d0           | copy of dest
60        neg.l   d0
61        and.l   #3,d0           | look for the lower two only
62        beq     2f              | is aligned?
63        sub.l   d0,d1
64        lsr.l   #1,d0           | word align needed?
65        bcc     1f
66        move.b  (a1)+,(a0)+
671:
68        lsr.l   #1,d0           | long align needed?
69        bcc     2f
70        move.w  (a1)+,(a0)+
712:
72#endif /* !MISALIGNED_OK */
73
74        /* long word transfers */
75        move.l  d1,d0
76        and.l   #3,d1           | byte residue
77        lsr.l   #3,d0
78        bcc     1f              | carry set for 4-byte residue
79        move.l  (a1)+,(a0)+
801:
81        lsr.l   #1,d0           | number of 16-byte transfers
82        bcc     .Lcopy          | carry set for 8-byte residue
83        bra     .Lcopy8
84
851:
86        move.l  (a1)+,(a0)+
87        move.l  (a1)+,(a0)+
88.Lcopy8:
89        move.l  (a1)+,(a0)+
90        move.l  (a1)+,(a0)+
91.Lcopy:
92#if !defined (__mcoldfire__)
93        dbra    d0,1b
94        sub.l   #0x10000,d0
95#else
96        subq.l  #1,d0
97#endif
98        bpl     1b
99        bra     .Lresidue
100
1011:
102        move.b  (a1)+,(a0)+     | move residue bytes
103
104.Lresidue:
105#if !defined (__mcoldfire__)
106        dbra    d1,1b           | loop until done
107#else
108        subq.l  #1,d1
109        bpl     1b
110#endif
111        move.l  4(sp),d0        | return value
112        rts
Note: See TracBrowser for help on using the repository browser.