Last change
on this file since 444 was
444,
checked in by satin@…, 6 years ago
|
add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc
|
File size:
2.2 KB
|
Line | |
---|
1 | /* |
---|
2 | * ==================================================== |
---|
3 | * Copyright (C) 2007 by Ellips BV. All rights reserved. |
---|
4 | * |
---|
5 | * Permission to use, copy, modify, and distribute this |
---|
6 | * software is freely granted, provided that this notice |
---|
7 | * is preserved. |
---|
8 | * ==================================================== |
---|
9 | */ |
---|
10 | |
---|
11 | #include "x86_64mach.h" |
---|
12 | |
---|
13 | .global SYM (memcpy) |
---|
14 | SOTYPE_FUNCTION(memcpy) |
---|
15 | |
---|
16 | SYM (memcpy): |
---|
17 | movq rdi, rax /* Store destination in return value */ |
---|
18 | cmpq $16, rdx |
---|
19 | jb byte_copy |
---|
20 | |
---|
21 | movq rdi, r8 /* Align destination on quad word boundary */ |
---|
22 | andq $7, r8 |
---|
23 | jz quadword_aligned |
---|
24 | movq $8, rcx |
---|
25 | subq r8, rcx |
---|
26 | subq rcx, rdx |
---|
27 | rep movsb |
---|
28 | |
---|
29 | quadword_aligned: |
---|
30 | cmpq $256, rdx |
---|
31 | jb quadword_copy |
---|
32 | |
---|
33 | pushq rax |
---|
34 | pushq r12 |
---|
35 | pushq r13 |
---|
36 | pushq r14 |
---|
37 | |
---|
38 | movq rdx, rcx /* Copy 128 bytes at a time with minimum cache polution */ |
---|
39 | shrq $7, rcx |
---|
40 | |
---|
41 | .p2align 4 |
---|
42 | loop: |
---|
43 | prefetchnta 768 (rsi) |
---|
44 | prefetchnta 832 (rsi) |
---|
45 | |
---|
46 | movq (rsi), rax |
---|
47 | movq 8 (rsi), r8 |
---|
48 | movq 16 (rsi), r9 |
---|
49 | movq 24 (rsi), r10 |
---|
50 | movq 32 (rsi), r11 |
---|
51 | movq 40 (rsi), r12 |
---|
52 | movq 48 (rsi), r13 |
---|
53 | movq 56 (rsi), r14 |
---|
54 | |
---|
55 | movntiq rax, (rdi) |
---|
56 | movntiq r8 , 8 (rdi) |
---|
57 | movntiq r9 , 16 (rdi) |
---|
58 | movntiq r10, 24 (rdi) |
---|
59 | movntiq r11, 32 (rdi) |
---|
60 | movntiq r12, 40 (rdi) |
---|
61 | movntiq r13, 48 (rdi) |
---|
62 | movntiq r14, 56 (rdi) |
---|
63 | |
---|
64 | movq 64 (rsi), rax |
---|
65 | movq 72 (rsi), r8 |
---|
66 | movq 80 (rsi), r9 |
---|
67 | movq 88 (rsi), r10 |
---|
68 | movq 96 (rsi), r11 |
---|
69 | movq 104 (rsi), r12 |
---|
70 | movq 112 (rsi), r13 |
---|
71 | movq 120 (rsi), r14 |
---|
72 | |
---|
73 | movntiq rax, 64 (rdi) |
---|
74 | movntiq r8 , 72 (rdi) |
---|
75 | movntiq r9 , 80 (rdi) |
---|
76 | movntiq r10, 88 (rdi) |
---|
77 | movntiq r11, 96 (rdi) |
---|
78 | movntiq r12, 104 (rdi) |
---|
79 | movntiq r13, 112 (rdi) |
---|
80 | movntiq r14, 120 (rdi) |
---|
81 | |
---|
82 | leaq 128 (rsi), rsi |
---|
83 | leaq 128 (rdi), rdi |
---|
84 | |
---|
85 | dec rcx |
---|
86 | jnz loop |
---|
87 | |
---|
88 | sfence |
---|
89 | movq rdx, rcx |
---|
90 | andq $127, rcx |
---|
91 | rep movsb |
---|
92 | popq r14 |
---|
93 | popq r13 |
---|
94 | popq r12 |
---|
95 | popq rax |
---|
96 | ret |
---|
97 | |
---|
98 | |
---|
99 | byte_copy: |
---|
100 | movq rdx, rcx |
---|
101 | rep movsb |
---|
102 | ret |
---|
103 | |
---|
104 | |
---|
105 | quadword_copy: |
---|
106 | movq rdx, rcx |
---|
107 | shrq $3, rcx |
---|
108 | .p2align 4 |
---|
109 | rep movsq |
---|
110 | movq rdx, rcx |
---|
111 | andq $7, rcx |
---|
112 | rep movsb /* Copy the remaining bytes */ |
---|
113 | ret |
---|
Note: See
TracBrowser
for help on using the repository browser.