Last change
on this file since 444 was
444,
checked in by satin@…, 6 years ago
|
add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc
|
File size:
2.2 KB
|
Rev | Line | |
---|
[444] | 1 | /* |
---|
| 2 | * ==================================================== |
---|
| 3 | * Copyright (C) 2007 by Ellips BV. All rights reserved. |
---|
| 4 | * |
---|
| 5 | * Permission to use, copy, modify, and distribute this |
---|
| 6 | * software is freely granted, provided that this notice |
---|
| 7 | * is preserved. |
---|
| 8 | * ==================================================== |
---|
| 9 | */ |
---|
| 10 | |
---|
| 11 | #include "x86_64mach.h" |
---|
| 12 | |
---|
| 13 | .global SYM (memcpy) |
---|
| 14 | SOTYPE_FUNCTION(memcpy) |
---|
| 15 | |
---|
| 16 | SYM (memcpy): |
---|
| 17 | movq rdi, rax /* Store destination in return value */ |
---|
| 18 | cmpq $16, rdx |
---|
| 19 | jb byte_copy |
---|
| 20 | |
---|
| 21 | movq rdi, r8 /* Align destination on quad word boundary */ |
---|
| 22 | andq $7, r8 |
---|
| 23 | jz quadword_aligned |
---|
| 24 | movq $8, rcx |
---|
| 25 | subq r8, rcx |
---|
| 26 | subq rcx, rdx |
---|
| 27 | rep movsb |
---|
| 28 | |
---|
| 29 | quadword_aligned: |
---|
| 30 | cmpq $256, rdx |
---|
| 31 | jb quadword_copy |
---|
| 32 | |
---|
| 33 | pushq rax |
---|
| 34 | pushq r12 |
---|
| 35 | pushq r13 |
---|
| 36 | pushq r14 |
---|
| 37 | |
---|
| 38 | movq rdx, rcx /* Copy 128 bytes at a time with minimum cache polution */ |
---|
| 39 | shrq $7, rcx |
---|
| 40 | |
---|
| 41 | .p2align 4 |
---|
| 42 | loop: |
---|
| 43 | prefetchnta 768 (rsi) |
---|
| 44 | prefetchnta 832 (rsi) |
---|
| 45 | |
---|
| 46 | movq (rsi), rax |
---|
| 47 | movq 8 (rsi), r8 |
---|
| 48 | movq 16 (rsi), r9 |
---|
| 49 | movq 24 (rsi), r10 |
---|
| 50 | movq 32 (rsi), r11 |
---|
| 51 | movq 40 (rsi), r12 |
---|
| 52 | movq 48 (rsi), r13 |
---|
| 53 | movq 56 (rsi), r14 |
---|
| 54 | |
---|
| 55 | movntiq rax, (rdi) |
---|
| 56 | movntiq r8 , 8 (rdi) |
---|
| 57 | movntiq r9 , 16 (rdi) |
---|
| 58 | movntiq r10, 24 (rdi) |
---|
| 59 | movntiq r11, 32 (rdi) |
---|
| 60 | movntiq r12, 40 (rdi) |
---|
| 61 | movntiq r13, 48 (rdi) |
---|
| 62 | movntiq r14, 56 (rdi) |
---|
| 63 | |
---|
| 64 | movq 64 (rsi), rax |
---|
| 65 | movq 72 (rsi), r8 |
---|
| 66 | movq 80 (rsi), r9 |
---|
| 67 | movq 88 (rsi), r10 |
---|
| 68 | movq 96 (rsi), r11 |
---|
| 69 | movq 104 (rsi), r12 |
---|
| 70 | movq 112 (rsi), r13 |
---|
| 71 | movq 120 (rsi), r14 |
---|
| 72 | |
---|
| 73 | movntiq rax, 64 (rdi) |
---|
| 74 | movntiq r8 , 72 (rdi) |
---|
| 75 | movntiq r9 , 80 (rdi) |
---|
| 76 | movntiq r10, 88 (rdi) |
---|
| 77 | movntiq r11, 96 (rdi) |
---|
| 78 | movntiq r12, 104 (rdi) |
---|
| 79 | movntiq r13, 112 (rdi) |
---|
| 80 | movntiq r14, 120 (rdi) |
---|
| 81 | |
---|
| 82 | leaq 128 (rsi), rsi |
---|
| 83 | leaq 128 (rdi), rdi |
---|
| 84 | |
---|
| 85 | dec rcx |
---|
| 86 | jnz loop |
---|
| 87 | |
---|
| 88 | sfence |
---|
| 89 | movq rdx, rcx |
---|
| 90 | andq $127, rcx |
---|
| 91 | rep movsb |
---|
| 92 | popq r14 |
---|
| 93 | popq r13 |
---|
| 94 | popq r12 |
---|
| 95 | popq rax |
---|
| 96 | ret |
---|
| 97 | |
---|
| 98 | |
---|
| 99 | byte_copy: |
---|
| 100 | movq rdx, rcx |
---|
| 101 | rep movsb |
---|
| 102 | ret |
---|
| 103 | |
---|
| 104 | |
---|
| 105 | quadword_copy: |
---|
| 106 | movq rdx, rcx |
---|
| 107 | shrq $3, rcx |
---|
| 108 | .p2align 4 |
---|
| 109 | rep movsq |
---|
| 110 | movq rdx, rcx |
---|
| 111 | andq $7, rcx |
---|
| 112 | rep movsb /* Copy the remaining bytes */ |
---|
| 113 | ret |
---|
Note: See
TracBrowser
for help on using the repository browser.