summaryrefslogtreecommitdiff
path: root/usr/src/common/crypto/md5/md5_byteswap.h
blob: 02d6b545768f7ffaa6bc096d56d483a7f6584b9f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#ifndef	_MD5_BYTESWAP_H
#define	_MD5_BYTESWAP_H

/*
 * definitions for inline functions for little-endian loads.
 *
 * This file has special definitions for UltraSPARC architectures,
 * which have a special address space identifier for loading 32 and 16 bit
 * integers in little-endian byte order.
 */

#include <sys/types.h>
#if defined(__sparc)
#include <v9/sys/asi.h>
#elif defined(_LITTLE_ENDIAN)
#include <sys/byteorder.h>
#endif

#ifdef	__cplusplus
extern "C" {
#endif

#if defined(_LITTLE_ENDIAN)

/*
 * Little-endian optimization:  I don't need to do any weirdness.   On
 * some little-endian boxen, I'll have to do alignment checks, but I can do
 * that below.
 */

#if !defined(__i386) && !defined(__amd64)
/*
 * i386 and amd64 don't require aligned 4-byte loads.  The symbol
 * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function
 * requires alignment checking.
 */
#define	_MD5_CHECK_ALIGNMENT
#endif /* !__i386 && !__amd64 */

#define	LOAD_LITTLE_32(addr)	(*(uint32_t *)(void *)(addr))

#else	/* !_LITTLE_ENDIAN */

/*
 * sparc v9/v8plus optimization:
 *
 * on the sparc v9/v8plus, we can load data little endian.  however, since
 * the compiler doesn't have direct support for little endian, we
 * link to an assembly-language routine `load_little_32' to do
 * the magic.  note that special care must be taken to ensure the
 * address is 32-bit aligned -- in the interest of speed, we don't
 * check to make sure, since careful programming can guarantee this
 * for us.
 */
#if defined(sun4u)

/* Define alignment check because we can 4-byte load as little endian. */
#define	_MD5_CHECK_ALIGNMENT
#define	LOAD_LITTLE_32(addr)    load_little_32((uint32_t *)(void *)(addr))

#if !defined(__lint) && defined(__GNUC__)

static __inline__ uint32_t
load_little_32(uint32_t *addr)
{
	uint32_t value;

	__asm__(
	    "lduwa	[%1] %2, %0\n\t"
	    : "=r" (value)
	    : "r" (addr), "i" (ASI_PL));

	return (value);
}
#endif	/* !__lint && __GNUC__ */

#if !defined(__GNUC__)
extern	uint32_t load_little_32(uint32_t *);
#endif	/* !__GNUC__ */

/* Placate lint */
#if defined(__lint)
uint32_t
load_little_32(uint32_t *addr)
{
	return (*addr);
}
#endif	/* __lint */

#elif defined(_LITTLE_ENDIAN)
#define	LOAD_LITTLE_32(addr)	htonl(addr)

#else
/* big endian -- will work on little endian, but slowly */
/* Since we do byte operations, we don't have to check for alignment. */
#define	LOAD_LITTLE_32(addr)	\
	((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24))
#endif	/* sun4u */

#if defined(sun4v)

/*
 * For N1 want to minimize number of arithmetic operations. This is best
 * achieved by using the %asi register to specify ASI for the lduwa operations.
 * Also, have a separate inline template for each word, so can utilize the
 * immediate offset in lduwa, without relying on the compiler to do the right
 * thing.
 *
 * Moving to 64-bit loads might also be beneficial.
 */
#define	LOAD_LITTLE_32_0(addr)	load_little_32_0((uint32_t *)(addr))
#define	LOAD_LITTLE_32_1(addr)	load_little_32_1((uint32_t *)(addr))
#define	LOAD_LITTLE_32_2(addr)	load_little_32_2((uint32_t *)(addr))
#define	LOAD_LITTLE_32_3(addr)	load_little_32_3((uint32_t *)(addr))
#define	LOAD_LITTLE_32_4(addr)	load_little_32_4((uint32_t *)(addr))
#define	LOAD_LITTLE_32_5(addr)	load_little_32_5((uint32_t *)(addr))
#define	LOAD_LITTLE_32_6(addr)	load_little_32_6((uint32_t *)(addr))
#define	LOAD_LITTLE_32_7(addr)	load_little_32_7((uint32_t *)(addr))
#define	LOAD_LITTLE_32_8(addr)	load_little_32_8((uint32_t *)(addr))
#define	LOAD_LITTLE_32_9(addr)	load_little_32_9((uint32_t *)(addr))
#define	LOAD_LITTLE_32_a(addr)	load_little_32_a((uint32_t *)(addr))
#define	LOAD_LITTLE_32_b(addr)	load_little_32_b((uint32_t *)(addr))
#define	LOAD_LITTLE_32_c(addr)	load_little_32_c((uint32_t *)(addr))
#define	LOAD_LITTLE_32_d(addr)	load_little_32_d((uint32_t *)(addr))
#define	LOAD_LITTLE_32_e(addr)	load_little_32_e((uint32_t *)(addr))
#define	LOAD_LITTLE_32_f(addr)	load_little_32_f((uint32_t *)(addr))

#if !defined(__lint) && defined(__GNUC__)

/*
 * This actually sets the ASI register, not necessarily to ASI_PL.
 */
static __inline__ void
set_little(uint8_t asi)
{
	__asm__ __volatile__(
	    "wr	%%g0, %0, %%asi\n\t"
	    : /* Nothing */
	    : "r" (asi));
}

static __inline__ uint8_t
get_little(void)
{
	uint8_t asi;

	__asm__ __volatile__(
	    "rd	%%asi, %0\n\t"
	    : "=r" (asi));

	return (asi);
}

/*
 * We have 16 functions which differ only in the offset from which they
 * load.  Use this preprocessor template to simplify maintenance.  Its
 * argument is the offset in hex, without the 0x.
 */
#define	LL_TEMPLATE(__off)			\
static __inline__ uint32_t			\
load_little_32_##__off(uint32_t *addr)		\
{						\
	uint32_t value;				\
	__asm__(				\
		"lduwa	[%1 + %2]%%asi, %0\n\t"	\
	: "=r" (value)				\
	: "r" (addr), "i" ((0x##__off) << 2));	\
	return (value);				\
}

/* BEGIN CSTYLED */
LL_TEMPLATE(0)
LL_TEMPLATE(1)
LL_TEMPLATE(2)
LL_TEMPLATE(3)
LL_TEMPLATE(4)
LL_TEMPLATE(5)
LL_TEMPLATE(6)
LL_TEMPLATE(7)
LL_TEMPLATE(8)
LL_TEMPLATE(9)
LL_TEMPLATE(a)
LL_TEMPLATE(b)
LL_TEMPLATE(c)
LL_TEMPLATE(d)
LL_TEMPLATE(e)
LL_TEMPLATE(f)
/* END CSTYLED */
#undef	LL_TEMPLATE

#endif	/* !__lint && __GNUC__ */

#if !defined(__GNUC__)
/*
 * Using the %asi register to achieve little endian loads - register
 * is set using a inline template.
 *
 * Saves a few arithmetic ops as can now use an immediate offset with the
 * lduwa instructions.
 */
extern void set_little(uint32_t);
extern uint32_t get_little(void);

extern	uint32_t load_little_32_0(uint32_t *);
extern	uint32_t load_little_32_1(uint32_t *);
extern	uint32_t load_little_32_2(uint32_t *);
extern	uint32_t load_little_32_3(uint32_t *);
extern	uint32_t load_little_32_4(uint32_t *);
extern	uint32_t load_little_32_5(uint32_t *);
extern	uint32_t load_little_32_6(uint32_t *);
extern	uint32_t load_little_32_7(uint32_t *);
extern	uint32_t load_little_32_8(uint32_t *);
extern	uint32_t load_little_32_9(uint32_t *);
extern	uint32_t load_little_32_a(uint32_t *);
extern	uint32_t load_little_32_b(uint32_t *);
extern	uint32_t load_little_32_c(uint32_t *);
extern	uint32_t load_little_32_d(uint32_t *);
extern	uint32_t load_little_32_e(uint32_t *);
extern	uint32_t load_little_32_f(uint32_t *);
#endif	/* !__GNUC__ */
#endif	/* sun4v */

#endif	/* _LITTLE_ENDIAN */

#ifdef	__cplusplus
}
#endif

#endif	/* !_MD5_BYTESWAP_H */