source: trunk/libs/newlib/src/newlib/libc/machine/spu/memmove.c @ 444

Last change on this file since 444 was 444, checked in by satin@…, 6 years ago

add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc

File size: 7.8 KB
Line 
1/*
2  (C) Copyright 2001,2006,
3  International Business Machines Corporation,
4  Sony Computer Entertainment, Incorporated,
5  Toshiba Corporation,
6
7  All rights reserved.
8
9  Redistribution and use in source and binary forms, with or without
10  modification, are permitted provided that the following conditions are met:
11
12    * Redistributions of source code must retain the above copyright notice,
13  this list of conditions and the following disclaimer.
14    * Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in the
16  documentation and/or other materials provided with the distribution.
17    * Neither the names of the copyright holders nor the names of their
18  contributors may be used to endorse or promote products derived from this
19  software without specific prior written permission.
20
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  POSSIBILITY OF SUCH DAMAGE.
32*/
33#include <spu_intrinsics.h>
34#include <stddef.h>
35#include "vec_literal.h"
36
37/* Copy n bytes from memory area src to memory area dest.
38 * Copying is performed as if the n characters pointed to
39 * by src are first copied into a temporary array that does
40 * not overlap the src and dest arrays. Then the n characters
41 * of the temporary array are copied into the destination
42 * array. The memmove subroutine returns a pointer to dest.
43 */
44
45void * memmove(void * __restrict__ dest, const void * __restrict__ src, size_t n)
46{
47  int adjust, delta;
48  unsigned int soffset1, soffset2, doffset1, doffset2;
49  vec_uchar16 *vSrc, *vDst;
50  vec_uchar16 sdata1, sdata2, sdata, ddata, shuffle;
51  vec_uchar16 mask, mask1, mask2, mask3, one = spu_splats((unsigned char)-1);
52
53  soffset1  = (unsigned int)(src) & 15;
54  doffset1 = (unsigned int)(dest) & 15;
55  doffset2 = ((unsigned int)(dest) + n) & 15;
56
57  /* Construct a series of masks used to data insert. The masks
58   * contains 0 bit when the destination word is unchanged, 1 when it
59   * must be replaced by source bits.
60   *
61   * mask1 = mask for leading unchanged bytes
62   * mask2 = mask for trailing unchange bytes
63   * mask3 = mask indicating the more than one qword is being changed.
64   */
65  mask  = one;
66  mask1 = spu_rlmaskqwbyte(mask, -doffset1);
67  mask2 = spu_slqwbyte(mask, 16-doffset2);
68  mask3 = (vec_uchar16)spu_cmpgt(spu_splats((unsigned int)(doffset1 + n)), 15);
69
70  vDst = (vec_uchar16 *)(dest);
71
72  delta  = (int)soffset1 - (int)doffset1;
73
74  /* The follow check only works if the SPU addresses are not
75   * wrapped. No provisions have been made to correct for this
76   * limitation.
77   */
78  if (((unsigned int)dest - (unsigned int)src) >= (unsigned int)n) {
79    /* Forward copy. Perform a memcpy.
80     *
81     * Handle any leading destination partial quadwords as
82     * well a very short copy (ie, such that the n characters
83     * all reside in a single (destination) quadword.
84     */
85    vSrc = (vec_uchar16 *)(src);
86    vDst = (vec_uchar16 *)(dest);
87
88    /* Handle any leading destination partial quadwords as
89     * well a very short copy (ie, such that the n characters
90     * all reside in a single (destination) quadword.
91     */
92    soffset1 = (unsigned int)(src) & 15;
93    doffset1 = (unsigned int)(dest) & 15;
94    doffset2 = ((unsigned int)(dest) + n) & 15;
95
96    /* Compute a shuffle pattern used to align the source string
97     * with the alignment of the destination string.
98     */
99
100    adjust = (int)spu_extract(spu_cmpgt(spu_promote(doffset1, 0), spu_promote(soffset1, 0)), 0);
101    delta  = (int)soffset1 - (int)doffset1;
102    delta += adjust & 16;
103
104    shuffle = (vec_uchar16)spu_add((vec_uint4)spu_splats((unsigned char)delta),
105                                   VEC_LITERAL(vec_uint4, 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F));
106
107    vSrc += adjust;
108
109    sdata1 = *vSrc++;
110    sdata2 = *vSrc++;
111
112    ddata = *vDst;
113    sdata = spu_shuffle(sdata1, sdata2, shuffle);
114
115    /* Construct a series of masks used to data insert. The masks
116     * contain 0 when the destination word is unchanged, 1 when it
117     * must be replaced by source bytes.
118     *
119     * mask1 = mask for leading unchanged bytes
120     * mask2 = mask for trailing unchange bytes
121     * mask3 = mask indicating the more than one qword is being changed.
122     */
123    mask  = one;
124    mask1 = spu_rlmaskqwbyte(mask, -doffset1);
125    mask2 = spu_slqwbyte(mask, 16-doffset2);
126    mask3 = (vec_uchar16)spu_cmpgt(spu_splats((unsigned int)(doffset1 + n)), 15);
127
128    *vDst++ = spu_sel(ddata, sdata, spu_and(mask1, spu_or(mask2, mask3)));
129
130    n += doffset1;
131
132    /* Handle complete destination quadwords
133     */
134    while (n > 31) {
135      sdata1 = sdata2;
136      sdata2 = *vSrc++;
137      *vDst++ = spu_shuffle(sdata1, sdata2, shuffle);
138      n -= 16;
139    }
140
141    /* Handle any trailing partial (destination) quadwords
142     */
143    mask = spu_and((vec_uchar16)spu_cmpgt(spu_splats((unsigned int)n), 16), mask2);
144    *vDst = spu_sel(*vDst, spu_shuffle(sdata2, *vSrc, shuffle), mask);
145
146  } else {
147    /* Backward copy.
148     *
149     * Handle any leading destination partial quadwords as
150     * well a very short copy (ie, such that the n characters
151     * all reside in a single (destination) quadword.
152     */
153    vSrc = (vec_uchar16 *)((unsigned int)src  + n-1);
154    vDst = (vec_uchar16 *)((unsigned int)dest + n-1);
155
156    /* Handle any leading destination partial quadwords as
157     * well a very short copy (ie, such that the n characters
158     * all reside in a single (destination) quadword.
159     */
160    soffset1 = (unsigned int)(src)  & 15;
161    soffset2 = (unsigned int)(vSrc) & 15;
162    doffset1 = (unsigned int)(dest) & 15;
163    doffset2 = (unsigned int)(vDst) & 15;
164
165    /* Compute a shuffle pattern used to align the source string
166     * with the alignment of the destination string.
167     */
168    adjust = (int)spu_extract(spu_cmpgt(spu_promote(soffset2, 0), spu_promote(doffset2, 0)), 0);
169    delta  = (int)doffset2 - (int)soffset2;
170    delta += adjust & 16;
171
172    shuffle = (vec_uchar16)spu_sub(VEC_LITERAL(vec_uint4, 0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F),
173                                   (vec_uint4)spu_splats((unsigned char)delta));
174
175    vSrc -= adjust;
176
177    sdata2 = *vSrc--;
178    sdata1 = *vSrc--;
179
180    ddata = *vDst;
181    sdata = spu_shuffle(sdata1, sdata2, shuffle);
182
183    /* Construct a series of masks used to data insert. The masks
184     * contain 0 when the destination word is unchanged, 1 when it
185     * must be replaced by source bytes.
186     *
187     * mask1 = mask for leading unchanged bytes
188     * mask2 = mask for trailing unchange bytes
189     * mask3 = mask indicating the more than one qword is being changed.
190     */
191    mask  = one;
192    mask1 = spu_rlmaskqwbyte(mask, -doffset1);
193    mask2 = spu_slqwbyte(mask, 15-doffset2);
194    mask3 = (vec_uchar16)spu_cmpgt(spu_splats((int)(doffset2 - n)), -2);
195
196    *vDst-- = spu_sel(ddata, sdata, spu_and(mask2, spu_orc(mask1, mask3)));
197
198    n -= doffset2 + 1;
199
200    /* Handle complete destination quadwords
201     */
202    while ((int)n > 15) {
203      sdata2 = sdata1;
204      sdata1 = *vSrc--;
205      *vDst-- = spu_shuffle(sdata1, sdata2, shuffle);
206      n -= 16;
207    }
208
209    /* Handle any trailing partial (destination) quadwords
210     */
211    mask = spu_and((vec_uchar16)spu_cmpgt(spu_splats((int)n), 0), mask1);
212    *vDst = spu_sel(*vDst, spu_shuffle(*vSrc, sdata1, shuffle), mask);
213  }
214  return (dest);
215}
216
Note: See TracBrowser for help on using the repository browser.