Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

strcmp.S @ 444

Last change on this file since 444 was 444, checked in by satin@…, 6 years ago
add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc
File size: 5.5 KB

Line
1	/* Copyright (c) 2012-2013, Linaro Limited
2	All rights reserved.
3
4	Redistribution and use in source and binary forms, with or without
5	modification, are permitted provided that the following conditions are met:
6	* Redistributions of source code must retain the above copyright
7	notice, this list of conditions and the following disclaimer.
8	* Redistributions in binary form must reproduce the above copyright
9	notice, this list of conditions and the following disclaimer in the
10	documentation and/or other materials provided with the distribution.
11	* Neither the name of the Linaro nor the
12	names of its contributors may be used to endorse or promote products
13	derived from this software without specific prior written permission.
14
15	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19	HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
26
27	/* Assumptions:
28	*
29	* ARMv8-a, AArch64
30	*/
31
32	#if (defined (__OPTIMIZE_SIZE__) \|\| defined (PREFER_SIZE_OVER_SPEED))
33	/* See strcmp-stub.c */
34	#else
35
36	.macro def_fn f p2align=0
37	.text
38	.p2align \p2align
39	.global \f
40	.type \f, %function
41	\f:
42	.endm
43
44	#define REP8_01 0x0101010101010101
45	#define REP8_7f 0x7f7f7f7f7f7f7f7f
46	#define REP8_80 0x8080808080808080
47
48	/* Parameters and result. */
49	#define src1 x0
50	#define src2 x1
51	#define result x0
52
53	/* Internal variables. */
54	#define data1 x2
55	#define data1w w2
56	#define data2 x3
57	#define data2w w3
58	#define has_nul x4
59	#define diff x5
60	#define syndrome x6
61	#define tmp1 x7
62	#define tmp2 x8
63	#define tmp3 x9
64	#define zeroones x10
65	#define pos x11
66
67	/* Start of performance-critical section -- one 64B cache line. */
68	def_fn strcmp p2align=6
69	eor tmp1, src1, src2
70	mov zeroones, #REP8_01
71	tst tmp1, #7
72	b.ne .Lmisaligned8
73	ands tmp1, src1, #7
74	b.ne .Lmutual_align
75	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
76	(=> (X - 1) & ~(X \| 0x7f)) is non-zero iff a byte is zero, and
77	can be done in parallel across the entire word. */
78	.Lloop_aligned:
79	ldr data1, [src1], #8
80	ldr data2, [src2], #8
81	.Lstart_realigned:
82	sub tmp1, data1, zeroones
83	orr tmp2, data1, #REP8_7f
84	eor diff, data1, data2 /* Non-zero if differences found. */
85	bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
86	orr syndrome, diff, has_nul
87	cbz syndrome, .Lloop_aligned
88	/* End of performance-critical section -- one 64B cache line. */
89
90	#ifndef __AARCH64EB__
91	rev syndrome, syndrome
92	rev data1, data1
93	/* The MS-non-zero bit of the syndrome marks either the first bit
94	that is different, or the top bit of the first zero byte.
95	Shifting left now will bring the critical information into the
96	top bits. */
97	clz pos, syndrome
98	rev data2, data2
99	lsl data1, data1, pos
100	lsl data2, data2, pos
101	/* But we need to zero-extend (char is unsigned) the value and then
102	perform a signed 32-bit subtraction. */
103	lsr data1, data1, #56
104	sub result, data1, data2, lsr #56
105	ret
106	#else
107	/* For big-endian we cannot use the trick with the syndrome value
108	as carry-propagation can corrupt the upper bits if the trailing
109	bytes in the string contain 0x01. */
110	/* However, if there is no NUL byte in the dword, we can generate
111	the result directly. We can't just subtract the bytes as the
112	MSB might be significant. */
113	cbnz has_nul, 1f
114	cmp data1, data2
115	cset result, ne
116	cneg result, result, lo
117	ret
118	1:
119	/* Re-compute the NUL-byte detection, using a byte-reversed value. */
120	rev tmp3, data1
121	sub tmp1, tmp3, zeroones
122	orr tmp2, tmp3, #REP8_7f
123	bic has_nul, tmp1, tmp2
124	rev has_nul, has_nul
125	orr syndrome, diff, has_nul
126	clz pos, syndrome
127	/* The MS-non-zero bit of the syndrome marks either the first bit
128	that is different, or the top bit of the first zero byte.
129	Shifting left now will bring the critical information into the
130	top bits. */
131	lsl data1, data1, pos
132	lsl data2, data2, pos
133	/* But we need to zero-extend (char is unsigned) the value and then
134	perform a signed 32-bit subtraction. */
135	lsr data1, data1, #56
136	sub result, data1, data2, lsr #56
137	ret
138	#endif
139
140	.Lmutual_align:
141	/* Sources are mutually aligned, but are not currently at an
142	alignment boundary. Round down the addresses and then mask off
143	the bytes that preceed the start point. */
144	bic src1, src1, #7
145	bic src2, src2, #7
146	lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
147	ldr data1, [src1], #8
148	neg tmp1, tmp1 /* Bits to alignment -64. */
149	ldr data2, [src2], #8
150	mov tmp2, #~0
151	#ifdef __AARCH64EB__
152	/* Big-endian. Early bytes are at MSB. */
153	lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
154	#else
155	/* Little-endian. Early bytes are at LSB. */
156	lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
157	#endif
158	orr data1, data1, tmp2
159	orr data2, data2, tmp2
160	b .Lstart_realigned
161
162	.Lmisaligned8:
163	/* We can do better than this. */
164	ldrb data1w, [src1], #1
165	ldrb data2w, [src2], #1
166	cmp data1w, #1
167	ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
168	b.eq .Lmisaligned8
169	sub result, data1, data2
170	ret
171	.size strcmp, .-strcmp
172
173	#endif

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format