1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
TEXT ·IndexByte(SB),7,$0
MOVQ s+0(FP), SI
MOVQ s+8(FP), BX
MOVB c+24(FP), AL
MOVQ SI, DI
CMPQ BX, $16
JLT small
// round up to first 16-byte boundary
TESTQ $15, SI
JZ aligned
MOVQ SI, CX
ANDQ $~15, CX
ADDQ $16, CX
// search the beginning
SUBQ SI, CX
REPN; SCASB
JZ success
// DI is 16-byte aligned; get ready to search using SSE instructions
aligned:
// round down to last 16-byte boundary
MOVQ BX, R11
ADDQ SI, R11
ANDQ $~15, R11
// shuffle X0 around so that each byte contains c
MOVD AX, X0
PUNPCKLBW X0, X0
PUNPCKLBW X0, X0
PSHUFL $0, X0, X0
JMP condition
sse:
// move the next 16-byte chunk of the buffer into X1
MOVO (DI), X1
// compare bytes in X0 to X1
PCMPEQB X0, X1
// take the top bit of each byte in X1 and put the result in DX
PMOVMSKB X1, DX
TESTL DX, DX
JNZ ssesuccess
ADDQ $16, DI
condition:
CMPQ DI, R11
JLT sse
// search the end
MOVQ SI, CX
ADDQ BX, CX
SUBQ R11, CX
// if CX == 0, the zero flag will be set and we'll end up
// returning a false success
JZ failure
REPN; SCASB
JZ success
failure:
MOVQ $-1, r+32(FP)
RET
// handle for lengths < 16
small:
MOVQ BX, CX
REPN; SCASB
JZ success
MOVQ $-1, r+32(FP)
RET
// we've found the chunk containing the byte
// now just figure out which specific byte it is
ssesuccess:
// get the index of the least significant set bit
BSFW DX, DX
SUBQ SI, DI
ADDQ DI, DX
MOVQ DX, r+32(FP)
RET
success:
SUBQ SI, DI
SUBL $1, DI
MOVQ DI, r+32(FP)
RET
TEXT ·Equal(SB),7,$0
MOVQ a+8(FP), BX
MOVQ b+32(FP), CX
MOVL $0, AX
CMPQ BX, CX
JNE eqret
MOVQ a+0(FP), SI
MOVQ b+24(FP), DI
CLD
REP; CMPSB
MOVL $1, DX
CMOVLEQ DX, AX
eqret:
MOVB AX, r+48(FP)
RET
|