summaryrefslogtreecommitdiff
path: root/usr/src/lib/libc/sparcv9/gen/strchr.s
blob: 5dc0ae9324dd6cac3201a62f99c66ebd792dd558 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

	.file	"strchr.s"

/*
 * The strchr() function returns a pointer to the first occurrence of c 
 * (converted to a char) in string s, or a null pointer if c does not occur
 * in the string.
 */

#include <sys/asm_linkage.h>

	! Here, we start by checking to see if we're searching the dest
	! string for a null byte.  We have fast code for this, so it's
	! an important special case.  Otherwise, if the string is not
	! word aligned, we check a for the search char a byte at a time
	! until we've reached a word boundary.  Once this has happened
	! some zero-byte finding values are initialized and the string
	! is checked a word at a time

	ENTRY(strchr)

	.align 32

	andcc	%o1, 0xff, %o1		! search only for this one byte
	bz,pn	%ncc, .searchnullbyte	! faster code for searching null
	andcc	%o0, 3, %o4		! str word aligned ?
	bz,a,pn	%ncc, .prepword2	! yup, prepare for word-wise search
	sll	%o1, 8, %g1		! start spreading findchar across word

	ldub	[%o0], %o2		! str[0]
	cmp	%o2, %o1		! str[0] == findchar ?
	be,pn	%ncc, .done		! yup, done
	tst	%o2			! str[0] == 0 ?
	bz,pn	%ncc, .notfound		! yup, return null pointer
	cmp	%o4, 3			! only one byte needed to align?
	bz,pn	%ncc, .prepword		! yup, prepare for word-wise search
	inc	%o0			! str++
	ldub	[%o0], %o2		! str[1]
	cmp	%o2, %o1		! str[1] == findchar ?
	be,pn	%ncc, .done		! yup, done
	tst	%o2			! str[1] == 0 ?
	bz,pn	%ncc, .notfound		! yup, return null pointer
	cmp	%o4, 2			! only two bytes needed to align?
	bz,pn	%ncc, .prepword		! yup, prepare for word-wise search
	inc	%o0			! str++
	ldub	[%o0], %o2		! str[2]
	cmp	%o2, %o1		! str[2] == findchar ?
	be,pn	%ncc, .done		! yup, done
	tst	%o2			! str[2] == 0 ?
	bz,pn	%ncc, .notfound		! yup, return null pointer
	inc	%o0			! str++

.prepword:
	sll	%o1, 8, %g1		! spread findchar ------+
.prepword2:							!
	sethi	%hi(0x01010101), %o4	! Alan Mycroft's magic1 !
	or	%o1, %g1, %o1		!  across all <---------+
	sethi	%hi(0x80808080), %o5	! Alan Mycroft's magic2	!
	sll	%o1, 16, %g1		!   four bytes <--------+
	or	%o4, %lo(0x01010101), %o4			!
	or	%o1, %g1, %o1		!    of a word <--------+
	or	%o5, %lo(0x80808080), %o5

.searchchar:
	lduw	[%o0], %o2		! src word
	andn	%o5, %o2, %o3		! ~word & 0x80808080
	sub	%o2, %o4, %g1		! word = (word - 0x01010101)
	andcc	%o3, %g1, %g0		! ((word - 0x01010101) & ~word & 0x80808080)
	bnz,pn	%ncc, .haszerobyte	! zero byte if magic expression != 0
	xor	%o2, %o1, %g1		! tword = word ^ findchar
	andn	%o5, %g1, %o3		! ~tword & 0x80808080
	sub	%g1, %o4, %o2		! (tword - 0x01010101)
	andcc	%o3, %o2, %g0		! ((tword - 0x01010101) & ~tword & 0x80808080)
	bz,a,pt	%ncc, .searchchar	! no findchar if magic expression == 0
	add	%o0, 4, %o0		! str += 4

	! here we know "word" contains the searched character, but no null
	! byte. if there was a null byte, we would have gone to .haszerobyte
	! "tword" has null bytes where "word" had findchar. Examine "tword"

.foundchar:
	set	0xff000000, %o4		! mask for 1st byte
	andcc	%g1, %o4, %g0		! first byte zero (= found search char) ?
	bz,pn	%ncc, .done		! yup, done
	set	0x00ff0000, %o5		! mask for 2nd byte
	inc	%o0			! str++
	andcc	%g1, %o5, %g0		! second byte zero (= found search char) ?	
	bz,pn	%ncc, .done		! yup, done
	srl	%o4, 16, %o4		! 0x0000ff00 = mask for 3rd byte
	inc	%o0			! str++
	andcc	%g1, %o4, %g0		! third byte zero (= found search char) ?
	bnz,a	%ncc, .done		! nope, increment in delay slot
	inc	%o0			! str++

.done:
	retl				! done with leaf function
	nop				! padding

	! Here we know that "word" contains a null byte indicating the
	! end of the string. However, "word" might also contain findchar
	! "tword" (in %g1) has null bytes where "word" had findchar. So
	! check both "tword" and "word"
    
.haszerobyte:
	set	0xff000000, %o4		! mask for 1st byte
	andcc	%g1, %o4, %g0		! first byte == findchar ?
	bz,pn	%ncc, .done		! yup, done
	andcc	%o2, %o4, %g0		! first byte == 0 ?
	bz,pn	%ncc, .notfound		! yup, return null pointer
	set	0x00ff0000, %o4		! mask for 2nd byte
	inc	%o0			! str++
	andcc	%g1, %o4, %g0		! second byte == findchar ?
	bz,pn	%ncc, .done		! yup, done
	andcc	%o2, %o4, %g0		! second byte == 0 ?
	bz,pn	%ncc, .notfound		! yup, return null pointer
	srl	%o4, 8, %o4		! mask for 3rd byte = 0x0000ff00
	inc	%o0			! str++
	andcc	%g1, %o4, %g0		! third byte == findchar ?
	bz,pn	%ncc, .done		! yup, done
	andcc	%o2, %o4, %g0		! third byte == 0 ?
	bz,pn	%ncc, .notfound		! yup, return null pointer
	andcc	%g1, 0xff, %g0		! fourth byte == findchar ?
	bz,pn	%ncc, .done		! yup, done
	inc	%o0			! str++
    
.notfound:
	retl				! done with leaf function
	xor	%o0, %o0, %o0		! return null pointer

	! since findchar == 0, we only have to do one test per item
	! instead of two. This makes the search much faster.

.searchnullbyte:
	bz,pn	%ncc, .straligned	! str is word aligned
	nop				! padding

	cmp	%o4, 2			! str halfword aligned ?
	be,pn	%ncc, .s2aligned	! yup
	ldub	[%o0], %o1		! str[0]
	tst	%o1			! byte zero?
	bz,pn	%ncc, .done		! yup, done
	cmp	%o4, 3			! only one byte needed to align?
	bz,pn	%ncc, .straligned	! yup
	inc	%o0			! str++

	! check to see if we're half word aligned, which it better than
	! not being aligned at all.  Search the first half of the word
	! if we are, and then search by whole word.	

.s2aligned:
	lduh	[%o0], %o1		! str[]     
	srl	%o1, 8, %o4		! %o4<7:0> = first byte
	tst	%o4			! first byte zero ?
	bz,pn	%ncc, .done2		! yup, done
	andcc	%o1, 0xff, %g0		! second byte zero ?
	bz,a,pn	%ncc, .done2		! yup, done
	inc	%o0			! str++
	add	%o0, 2, %o0		! str+=2

.straligned:
	sethi	%hi(0x01010101), %o4	! Alan Mycroft's magic1
	sethi	%hi(0x80808080), %o5	! Alan Mycroft's magic2
	or	%o4, %lo(0x01010101), %o4
	or	%o5, %lo(0x80808080), %o5

.searchword:
	lduw	[%o0], %o1		! src word
	andn	%o5, %o1, %o3		! ~word & 0x80808080
	sub	%o1, %o4, %g1		! word = (word - 0x01010101)
	andcc	%o3, %g1, %g0		! ((word - 0x01010101) & ~word & 0x80808080)
	bz,a,pt	%ncc, .searchword	! no zero byte if magic expression == 0
	add	%o0, 4, %o0		! str += 4

.zerobyte:
	set	0xff000000, %o4		! mask for 1st byte
	andcc	%o1, %o4, %g0		! first byte zero?
	bz,pn	%ncc, .done2		! yup, done
	set	0x00ff0000, %o5		! mask for 2nd byte
	inc	%o0			! str++
	andcc	%o1, %o5, %g0		! second byte zero?
	bz,pn	%ncc, .done2		! yup, done
	srl	%o4, 16, %o4		! 0x0000ff00 = mask for 3rd byte
	inc	%o0			! str++
	andcc	%o1, %o4, %g0		! third byte zero?
	bnz,a	%ncc, .done2		! nope, increment in delay slot
	inc	%o0			! str++
.done2:
    	retl				! return from leaf function
	nop				! padding

	SET_SIZE(strchr)