summaryrefslogtreecommitdiff
path: root/usr/src/lib/libc/capabilities/sun4u-us3/common/memcmp.s
blob: fbbb3d6fb84ffac666388e4f6891ccbd0fed6221 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
 */

	.file	"memcmp.s"

/*
 * memcmp(s1, s2, len)
 *
 * Compare n bytes:  s1>s2: >0  s1==s2: 0  s1<s2: <0
 *
 * Fast assembler language version of the following C-program for memcmp
 * which represents the `standard' for the C-library.
 *
 *	int
 *	memcmp(const void *s1, const void *s2, size_t n)
 *	{
 *		if (s1 != s2 && n != 0) {
 *			const char *ps1 = s1;
 *			const char *ps2 = s2;
 *			do {
 *				if (*ps1++ != *ps2++)
 *					return(ps1[-1] - ps2[-1]);
 *			} while (--n != 0);
 *		}
 *		return (0);
 *	}
 */

#include <sys/asm_linkage.h>
#include <sys/machasi.h>

#define	BLOCK_SIZE	64

	ANSI_PRAGMA_WEAK(memcmp,function)

	ENTRY(memcmp)
	cmp	%o0, %o1		! s1 == s2?
	be	%ncc, .cmpeq
	prefetch [%o0], #one_read
	prefetch [%o1], #one_read
	
	! for small counts byte compare immediately
	cmp	%o2, 48
	bleu,a 	%ncc, .bytcmp
	mov	%o2, %o3		! o3 <= 48
	
	! Count > 48. We will byte compare (8 + num of bytes to dbl align) 
	! bytes. We assume that most miscompares will occur in the 1st 8 bytes 

	prefetch [%o0 + (1 * BLOCK_SIZE)], #one_read
	prefetch [%o1 + (1 * BLOCK_SIZE)], #one_read

.chkdbl:
	and     %o0, 7, %o4             ! is s1 aligned on a 8 byte bound
	mov	8, %o3			! o2 > 48;  o3 = 8
        sub     %o4, 8, %o4		! o4 = -(num of bytes to dbl align)
	ba	%ncc, .bytcmp
        sub     %o3, %o4, %o3           ! o3 = 8 + (num of bytes to dbl align)

1:	ldub	[%o1], %o5        	! byte compare loop
        inc     %o1
        inc     %o0
	dec	%o2
        cmp     %o4, %o5
	bne	%ncc, .noteq
.bytcmp:
	deccc   %o3
	bgeu,a	%ncc, 1b
        ldub    [%o0], %o4

	! Check to see if there are more bytes to compare
	cmp	%o2, 0			! is o2 > 0
	bgu	%ncc, .dwcmp		! we should already be dbl aligned
	nop
.cmpeq:
        retl                             ! strings compare equal
	sub	%g0, %g0, %o0

.noteq:
	retl				! strings aren't equal
	sub	%o4, %o5, %o0		! return(*s1 - *s2)


        ! double word compare - using ldd and faligndata. Compares upto
        ! 8 byte multiple count and does byte compare for the residual.

.dwcmp: 
	prefetch [%o0 + (2 * BLOCK_SIZE)], #one_read
	prefetch [%o1 + (2 * BLOCK_SIZE)], #one_read

        ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions.
        ! So set it anyway, without checking.
        rd      %fprs, %o3              ! o3 = fprs
        wr      %g0, 0x4, %fprs         ! fprs.fef = 1

        andn    %o2, 7, %o4             ! o4 has 8 byte aligned cnt
	sub     %o4, 8, %o4
        alignaddr %o1, %g0, %g1
        ldd     [%g1], %d0
4:
        add     %g1, 8, %g1
        ldd     [%g1], %d2
	ldd	[%o0], %d6
	prefetch [%g1 + (3 * BLOCK_SIZE)], #one_read
	prefetch [%o0 + (3 * BLOCK_SIZE)], #one_read
        faligndata %d0, %d2, %d8
	fcmpne32 %d6, %d8, %o5
	fsrc1	%d6, %d6		! 2 fsrc1's added since o5 cannot
	fsrc1	%d8, %d8		! be used for 3 cycles else we 
	fmovd	%d2, %d0		! create 9 bubbles in the pipeline
	brnz,a,pn %o5, 6f
	sub     %o1, %o0, %o1           ! o1 gets the difference
        subcc   %o4, 8, %o4
        add     %o0, 8, %o0
        add     %o1, 8, %o1
        bgu,pt	%ncc, 4b
        sub     %o2, 8, %o2

.residcmp:
        ba      6f
	sub     %o1, %o0, %o1           ! o1 gets the difference

5:      ldub    [%o0 + %o1], %o5        ! byte compare loop
        inc     %o0
        cmp     %o4, %o5
        bne     %ncc, .dnoteq
6:
        deccc   %o2
        bgeu,a	%ncc, 5b
        ldub    [%o0], %o4
	
	and     %o3, 0x4, %o3           ! fprs.du = fprs.dl = 0
	wr      %o3, %g0, %fprs         ! fprs = o3 - restore fprs
	retl
	sub	%g0, %g0, %o0		! strings compare equal 
        
.dnoteq:
	and     %o3, 0x4, %o3           ! fprs.du = fprs.dl = 0
	wr      %o3, %g0, %fprs         ! fprs = o3 - restore fprs
	retl
	sub	%o4, %o5, %o0		! return(*s1 - *s2)
        
	SET_SIZE(memcmp)