Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

memchr.S @ 444

Last change on this file since 444 was 444, checked in by satin@…, 6 years ago
add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc
File size: 5.3 KB

Line
1	/*
2	* memchr - find a character in a memory zone
3	*
4	* Copyright (c) 2014, ARM Limited
5	* All rights Reserved.
6	*
7	* Redistribution and use in source and binary forms, with or without
8	* modification, are permitted provided that the following conditions are met:
9	* * Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* * Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* * Neither the name of the company nor the names of its contributors
15	* may be used to endorse or promote products derived from this
16	* software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22	* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29	*/
30
31	#if (defined (__OPTIMIZE_SIZE__) \|\| defined (PREFER_SIZE_OVER_SPEED))
32	/* See memchr-stub.c */
33	#else
34	/* Assumptions:
35	*
36	* ARMv8-a, AArch64
37	* Neon Available.
38	*/
39
40	/* Arguments and results. */
41	#define srcin x0
42	#define chrin w1
43	#define cntin x2
44
45	#define result x0
46
47	#define src x3
48	#define tmp x4
49	#define wtmp2 w5
50	#define synd x6
51	#define soff x9
52	#define cntrem x10
53
54	#define vrepchr v0
55	#define vdata1 v1
56	#define vdata2 v2
57	#define vhas_chr1 v3
58	#define vhas_chr2 v4
59	#define vrepmask v5
60	#define vend v6
61
62	/*
63	* Core algorithm:
64	*
65	* For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
66	* per byte. For each tuple, bit 0 is set if the relevant byte matched the
67	* requested character and bit 1 is not used (faster than using a 32bit
68	* syndrome). Since the bits in the syndrome reflect exactly the order in which
69	* things occur in the original string, counting trailing zeros allows to
70	* identify exactly which byte has matched.
71	*/
72
73	.macro def_fn f p2align=0
74	.text
75	.p2align \p2align
76	.global \f
77	.type \f, %function
78	\f:
79	.endm
80
81	def_fn memchr
82	/* Do not dereference srcin if no bytes to compare. */
83	cbz cntin, .Lzero_length
84	/*
85	* Magic constant 0x40100401 allows us to identify which lane matches
86	* the requested byte.
87	*/
88	mov wtmp2, #0x0401
89	movk wtmp2, #0x4010, lsl #16
90	dup vrepchr.16b, chrin
91	/* Work with aligned 32-byte chunks */
92	bic src, srcin, #31
93	dup vrepmask.4s, wtmp2
94	ands soff, srcin, #31
95	and cntrem, cntin, #31
96	b.eq .Lloop
97
98	/*
99	* Input string is not 32-byte aligned. We calculate the syndrome
100	* value for the aligned 32 bytes block containing the first bytes
101	* and mask the irrelevant part.
102	*/
103
104	ld1 {vdata1.16b, vdata2.16b}, [src], #32
105	sub tmp, soff, #32
106	adds cntin, cntin, tmp
107	cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
108	cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
109	and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
110	and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
111	addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
112	addp vend.16b, vend.16b, vend.16b /* 128->64 */
113	mov synd, vend.2d[0]
114	/* Clear the soff2 lower bits /
115	lsl tmp, soff, #1
116	lsr synd, synd, tmp
117	lsl synd, synd, tmp
118	/* The first block can also be the last */
119	b.ls .Lmasklast
120	/* Have we found something already? */
121	cbnz synd, .Ltail
122
123	.Lloop:
124	ld1 {vdata1.16b, vdata2.16b}, [src], #32
125	subs cntin, cntin, #32
126	cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
127	cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
128	/* If we're out of data we finish regardless of the result */
129	b.ls .Lend
130	/* Use a fast check for the termination condition */
131	orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
132	addp vend.2d, vend.2d, vend.2d
133	mov synd, vend.2d[0]
134	/* We're not out of data, loop if we haven't found the character */
135	cbz synd, .Lloop
136
137	.Lend:
138	/* Termination condition found, let's calculate the syndrome value */
139	and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
140	and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
141	addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
142	addp vend.16b, vend.16b, vend.16b /* 128->64 */
143	mov synd, vend.2d[0]
144	/* Only do the clear for the last possible block */
145	b.hi .Ltail
146
147	.Lmasklast:
148	/* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
149	add tmp, cntrem, soff
150	and tmp, tmp, #31
151	sub tmp, tmp, #32
152	neg tmp, tmp, lsl #1
153	lsl synd, synd, tmp
154	lsr synd, synd, tmp
155
156	.Ltail:
157	/* Count the trailing zeros using bit reversing */
158	rbit synd, synd
159	/* Compensate the last post-increment */
160	sub src, src, #32
161	/* Check that we have found a character */
162	cmp synd, #0
163	/* And count the leading zeros */
164	clz synd, synd
165	/* Compute the potential result */
166	add result, src, synd, lsr #1
167	/* Select result or NULL */
168	csel result, xzr, result, eq
169	ret
170
171	.Lzero_length:
172	mov result, #0
173	ret
174
175	.size memchr, . - memchr
176	#endif

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format