summaryrefslogtreecommitdiff
path: root/usr/src/lib/libresolv2/common/cylink/lbnppc.c
blob: 15eef0e28e74fd23adf8e09cc725bd041e678211 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
/*
 * Copyright (c) 1999 by Sun Microsystems, Inc.
 * All rights reserved.
 */

/*
 * Cylink Corporation © 1998
 * 
 * This software is licensed by Cylink to the Internet Software Consortium to
 * promote implementation of royalty free public key cryptography within IETF
 * standards.  Cylink wishes to expressly thank the contributions of Dr.
 * Martin Hellman, Whitfield Diffie, Ralph Merkle and Stanford University for
 * their contributions to Internet Security.  In accordance with the terms of
 * this license, ISC is authorized to distribute and sublicense this software
 * for the practice of IETF standards.  
 *
 * The software includes BigNum, written by Colin Plumb and licensed by Philip
 * R. Zimmermann for royalty free use and distribution with Cylink's
 * software.  Use of BigNum as a stand alone product or component is
 * specifically prohibited.
 *
 * Disclaimer of All Warranties. THIS SOFTWARE IS BEING PROVIDED "AS IS",
 * WITHOUT ANY EXPRESSED OR IMPLIED WARRANTY OF ANY KIND WHATSOEVER. IN
 * PARTICULAR, WITHOUT LIMITATION ON THE GENERALITY OF THE FOREGOING, CYLINK
 * MAKES NO REPRESENTATION OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
 * PURPOSE.
 *
 * Cylink or its representatives shall not be liable for tort, indirect,
 * special or consequential damages such as loss of profits or loss of
 * goodwill from the use or inability to use the software for any purpose or
 * for any reason whatsoever.
 *
 * EXPORT LAW: Export of the Foundations Suite may be subject to compliance
 * with the rules and regulations promulgated from time to time by the Bureau
 * of Export Administration, United States Department of Commerce, which
 * restrict the export and re-export of certain products and technical data.
 * If the export of the Foundations Suite is controlled under such rules and
 * regulations, then the Foundations Suite shall not be exported or
 * re-exported, directly or indirectly, (a) without all export or re-export
 * licenses and governmental approvals required by any applicable laws, or (b)
 * in violation of any applicable prohibition against the export or re-export
 * of any part of the Foundations Suite. All export licenses for software
 * containing the Foundations Suite are the sole responsibility of the licensee.
 */
 
#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include "lbnppc.h"

/*
 * lbnppc.c - Assembly primitives for the bignum library, PowerPC version.
 *
 * Copyright (c) 1995  Colin Plumb.  All rights reserved.
 * For licensing and other legal details, see the file legal.c
 *
 * Register usage during function calls is:
 * r0 - volatile
 * r1 - stack pointer, preserved
 * r2 - TOC pointer, preserved
 * r3 - First argument and return value register
 * r4-r10 - More argument registers, volatile
 * r11-r12 - Volatile
 * r13-r31 - Preserved
 * LR, CTR, XER and MQ are all volatile.
 * LR holds return address on entry.
 *
 * On the PPC 601, unrolling the loops more doesn't seem to speed things
 * up at all.  I'd be curious if other chips differed.
 */
#if __MWERKS__ < 0x800

#include "ppcasm.h"	/* PowerPC assembler */
 
/*
 * MulN1 expects (*out, *in, len, k), count >= 1
 *                r3    r4   r5   r6
 */
static const unsigned mulN1[] = {
	PPC_LWZ(7,4,0), 	/* Load first word of in in r7 */
	PPC_MULLW(8,7,6),	/* Low half of multiply in r8 */
	PPC_MTCTR(5),		/* Move len into CTR */
	PPC_ADDIC(0,0,0),	/* Clear carry bit for loop */
	PPC_MULHWU(5,7,6),	/* High half of multiply in r5 */
	PPC_STW(8,3,0),
	PPC_BC(18,31,7),	/* Branch to Label if --ctr == 0 */
/* Loop: */
	PPC_LWZU(7,4,4),	/* r7 = *++in */
	PPC_MULLW(8,7,6),	/* r8 = low word of product */
	PPC_ADDE(8,8,5),	/* Add carry word r5 and bit CF to r8 */
	PPC_STWU(8,3,4),	/* *++out = r8 */
	PPC_MULHWU(5,7,6),	/* r5 is high word of product, for carry word */
	PPC_BC(16,31,-5),	/* Branch to Loop if --ctr != 0 */
/* Label: */
	PPC_ADDZE(5,5),		/* Add carry flag to r5 */
	PPC_STW(5,3,4),		/* out[1] = r5 */
	PPC_BLR()
};

/*
 * MulAdd1 expects (*out, *in, len, k), count >= 1
 *                  r3    r4   r5   r6
 */
static unsigned const mulAdd1[] = {
	PPC_LWZ(7,4,0), 	/* Load first word of in in r7 */
	PPC_LWZ(0,3,0),		/* Load first word of out into r0 */
	PPC_MULLW(8,7,6),	/* Low half of multiply in r8 */
	PPC_MTCTR(5),		/* Move len into CTR */
	PPC_MULHWU(5,7,6),	/* High half of multiply in r5 */
	PPC_ADDC(8,8,0),	/* r8 = r8 + r0 */
	PPC_STW(8,3,0),		/* Store result to memory */
	PPC_BC(18,31,10),	/* Branch to Label if --ctr == 0 */
/* Loop: */
	PPC_LWZU(7,4,4),	/* r7 = *++in */
	PPC_LWZU(0,3,4),	/* r0 = *++out */
	PPC_MULLW(8,7,6),	/* r8 = low word of product */
	PPC_ADDE(8,8,5), 	/* Add carry word r5 and carry bit CF to r8 */
	PPC_MULHWU(5,7,6),	/* r5 is high word of product, for carry word */
	PPC_ADDZE(5,5),		/* Add carry bit from low add to r5 */
	PPC_ADDC(8,8,0),	/* r8 = r8 + r0 */
	PPC_STW(8,3,0), 	/* *out = r8 */
	PPC_BC(16,31,-8),	/* Branch to Loop if --ctr != 0 */
/* Label: */
	PPC_ADDZE(3,5),		/* Add carry flag to r5 and move to r3 */
	PPC_BLR()
};

/*
 * MulSub1 expects (*out, *in, len, k), count >= 1
 *                  r3    r4   r5   r6
 *
 * Multiply and subtract is rather a pain.  If the subtract of the
 * low word of the product from out[i] generates a borrow, we want to
 * increment the carry word (initially in the range 0..0xfffffffe).
 * However, the PPC's carry bit CF is *clear* after a subtract, so
 * we want to add (1-CF) to the carry word.  This is done using two
 * instructions:
 *
 * SUBFME, subtract from minus one extended.  This computes
 *   rD = ~rS + 0xffffffff + CF.  Since rS is from 0 to 0xfffffffe,
 *   ~rS is from 1 through 0xffffffff, and the sum with 0xffffffff+CF is
 *   from 0 through 0xfffffffff, setting the carry flag unconditionally, and
 * NOR, which is used as a bitwise invert NOT instruction.
 *
 * The SUBFME performs the computation rD = ~rS + 0xffffffff + CF,
 * = (-rS - 1) + (CF - 1) = -(rS - CF + 1) - 1 = ~(rS + 1-CF),
 * which is the bitwise complement of the value we want.
 * We want to add the complement of that result to the low word of the
 * product, which is just what a subtract would do, if only we could get
 * the carry flag clear.  But it's always set, except for SUBFE, and the
 * operation we just performed unconditionally *sets* the carry flag.  Ugh.
 * So find the complement in a separate instruction.
 */
static unsigned const mulSub1[] = {
	PPC_LWZ(7,4,0), 	/* Load first word of in in r7 */
	PPC_LWZ(0,3,0),		/* Load first word of out into r0 */
	PPC_MTCTR(5),		/* Move len into CTR */
	PPC_MULLW(8,7,6),	/* Low half of multiply in r8 */
	PPC_MULHWU(5,7,6),	/* High half of multiply in r5 */
	PPC_SUBFC(8,8,0),	/* r8 = r0 - r8, setting CF */
	PPC_STW(8,3,0),		/* Store result to memory */
	PPC_SUBFME(5,5),	/* First of two insns to add (1-CF) to r5 */
	PPC_BC(18,31,12),	/* Branch to Label if --ctr == 0 */
/* Loop: */
	PPC_LWZU(7,4,4),	/* r7 = *++in */
	PPC_LWZU(0,3,4),	/* r0 = *++out */
	PPC_NOR(5,5,5),		/* Second of two insns to add (1-CF) to r5 */
	PPC_MULLW(8,7,6),	/* r8 = low word of product */
	PPC_ADDC(8,8,5), 	/* Add carry word r5 to r8 */
	PPC_MULHWU(5,7,6),	/* r5 is high word of product, for carry word */
	PPC_ADDZE(5,5),		/* Add carry bit from low add to r5 */
	PPC_SUBFC(8,8,0),	/* r8 = r0 - r8, setting CF */
	PPC_STW(8,3,0), 	/* *out = r8 */
	PPC_SUBFME(5,5),	/* First of two insns to add (1-CF) to r5 */
	PPC_BC(16,31,-10),	/* Branch to Loop if --ctr != 0 */
/* Label: */
	PPC_NOR(3,5,5),		/* Finish adding (1-CF) to r5, store in r3 */
	PPC_BLR()
};

#if 0
/*
 * Args: BNWORD32 *n, BNWORD32 const *mod, unsigned mlen, BNWORD32 inv)
 *                r3                  r4            r5             r6
 * r7, r8 and r9 are the triple-width accumulator.
 * r0 and r10 are temporary registers.
 * r11 and r12 are temporary pointers into n and mod, respectively. 
 * r2 (!) is another temporary register.
 */
static unsigned const montReduce[] = {
	PPC_MTCTR(5),	/* ??? */
	PPC_LWZ(7,3,0),		/* Load low word of n into r7 */
	PPC_LWZ(10,4,0),	/* Fetch low word of mod */
	PPC_MULLW(0,7,6),	/* Invert r7 into r0 */
	PPC_STW(0,3,0),		/* Store back for future use */
	PPC_MULHWU(8,10,7),	/* Get high word of whatnot */
	PPC_MULLW(10,10,7),	/* Get low word of it */
	PPC_ADDC(7,7,10),	/* Add low word of product to r7 */
	PPC_ADDZE(8,8),		/* Add carry to high word */
	PPC_
	

	PPC_MULHW(8,7,6),
	PPC_ADDC(7,7,0),	/* Add inverse back to r7 */
	PPC_ADDZE(8,8),
	PPC_
	
	PPC_LWZU(
/* Loop: */
	PPC_LWZU(0,11,4),
	PPC_LWZU(10,23,-4),
	PPC_MULLW(2,0,10),
	PPC_ADDC(7,7,2),
	PPC_MULHWU(0,0,10),
	PPC_ADDE(8,8,0),
	PPC_ADDZE(9,9),
	PPC_BC(16,31,-7),	/* Branch to Loop if --ctr != 0 */

	PPC_ADDIC_(count,-1),
	PPC_LWZU(0,x,4),
	PPC_ADDC(0,7,0),
	PPC_STW(0,x,0),
	PPC_ADDZE(7,8),
	PPC_ADDZE(8,9),
	PPC_LI(9,0),
	PPC_BC(xx,2,yy),
	
};
#endif

/*
 * Three overlapped transition vectors for three functions.
 * A PowerPC transition vector for a (potentially) inter-module
 * jump or call consists of two words, an instruction address
 * and a Table Of Contents (TOC) pointer, which is loaded into
 * r1.  Since none of the routines here have global variables,
 * they don't need a TOC pointer, so the value is unimportant.
 * This array places an unintersting 32-bit value after each address.
 */
unsigned const * const lbnPPC_tv[] = {
	mulN1,
	mulAdd1,
	mulSub1,
	0
};

#else /* __MWERKS >= 0x800 */

/*
 * MulN1 expects (*out, *in, len, k), count >= 1
 *                r3    r4   r5   r6
 */
asm void
lbnMulN1_32(register unsigned *out, register unsigned const *in,
	register unsigned len, register unsigned k)
{
	lwz 	r7,0(in) 	/* Load first word of in in r7 */
	mtctr	len			/* Move len into CTR */
	mullw	r8,r7,k		/* Low half of multiply in r8 */
	addic	r0,r0,0		/* Clear carry bit for loop */
	mulhwu	len,r7,k	/* High half of multiply in len */
	stw 	r8,0(out)	/* *out = r8 */
	mulhwu	len,r7,k	/* len is high word of product, for carry */
	bdz-	label		/* Branch to Label if --ctr == 0 */
loop:
	lwzu	r7,4(in)	/* r7 = *++in */
	mullw	r8,r7,k		/* Low half of multiply in r8 */
	adde	r8,r8,len	/* Add carry word len and bit CF to r8 */
	stwu	r8,4(out)	/* *++out = r8 */
	mulhwu	len,r7,k	/* len is high word of product, for carry */
	bdnz+	loop		/* Branch to Loop if --ctr != 0 */
label:
	addze	len,len		/* Add carry flag to carry word */
	stw 	len,4(out)
	blr
}

/*
 * MulAdd1 expects (*out, *in, len, k), count >= 1
 *                  r3    r4   r5   r6
 */
asm unsigned
lbnMulAdd1_32(register unsigned *out, register unsigned const *in,
	register unsigned len, register unsigned k)
{
	lwz 	r7,0(in) 	/* Load first word of in in r7 */
	lwz 	r0,0(out)	/* Load first word of out into r0 */
	mullw	r8,r7,k 	/* Low half of multiply in r8 */
	mtctr	len 		/* Move len into CTR */
	mulhwu	len,r7,k	/* High half of multiply in len */
	addc	r8,r8,r0	/* r8 = r8 + r0 */
	stw 	r8,0(out)	/* Store result to memory */
	bdz-	label		/* Branch to Label if --ctr == 0 */
loop:
	lwzu	r7,4(in)	/* r7 = *++in */
	lwzu	r0,4(out)	/* r0 = *++out */
	mullw	r8,r7,k		/* r8 = low word of product */
	adde	r8,r8,len	/* Add carry word len and carry bit CF to r8 */
	mulhwu	len,r7,k	/* len is high word of product, for carry */
	addze	len,len		/* Add carry bit from low add to r5 */
	addc	r8,r8,r0	/* r8 = r8 + r0 */
	stw 	r8,0(out)	/* *out = r8 */
	bdnz+	loop		/* Branch to Loop if --ctr != 0 */
label:
	addze	r3,r5		/* Add carry flag to r5 and move to r3 */
	blr
}

/*
 * MulSub1 expects (*out, *in, len, k), count >= 1
 *                  r3    r4   r5   r6
 *
 * Multiply and subtract is rather a pain.  If the subtract of the
 * low word of the product from out[i] generates a borrow, we want to
 * increment the carry word (initially in the range 0..0xfffffffe).
 * However, the PPC's carry bit CF is *clear* after a subtract, so
 * we want to add (1-CF) to the carry word.  This is done using two
 * instructions:
 *
 * SUBFME, subtract from minus one extended.  This computes
 *   rD = ~rS + 0xffffffff + CF.  Since rS is from 0 to 0xfffffffe,
 *   ~rS is from 1 through 0xffffffff, and the sum with 0xffffffff+CF is
 *   from 0 through 0xfffffffff, setting the carry flag unconditionally, and
 * NOR, which is used as a bitwise invert NOT instruction.
 *
 * The SUBFME performs the computation rD = ~rS + 0xffffffff + CF,
 * = (-rS - 1) + (CF - 1) = -(rS - CF + 1) - 1 = ~(rS + 1-CF),
 * which is the bitwise complement of the value we want.
 * We want to add the complement of that result to the low word of the
 * product, which is just what a subtract would do, if only we could get
 * the carry flag clear.  But it's always set, except for SUBFE, and the
 * operation we just performed unconditionally *sets* the carry flag.  Ugh.
 * So find the complement in a separate instruction.
 */
asm unsigned
lbnMulSub1_32(register unsigned *out, register unsigned const *in,
	register unsigned len, register unsigned k)
{
	lwz 	r7,0(in) 	/* Load first word of in in r7 */
	lwz 	r0,0(out)	/* Load first word of out into r0 */
	mtctr	len 		/* Move len into CTR */
	mullw	r8,r7,k 	/* Low half of multiply in r8 */
	mulhwu	len,r7,k	/* High half of multiply in len */
	subfc	r8,r8,r0	/* r8 = r0 - r8, setting CF */
	stw 	r8,0(out)	/* Store result to memory */
	subfme	len,len		/* First of two insns to add (1-CF) to len */
	bdz-	label		/* Branch to Label if --ctr == 0 */
loop:
	lwzu	r7,4(in)	/* r7 = *++in */
	lwzu	r0,4(out)	/* r0 = *++out */
	nor 	len,len,len	/* Second of two insns to add (1-CF) to len */
	mullw	r8,r7,k		/* r8 = low word of product */
	addc	r8,r8,len	/* Add carry word len to r8 */
	mulhwu	len,r7,k	/* len is high word of product, for carry */
	addze	len,len		/* Add carry bit from low add to len */
	subfc	r8,r8,r0	/* r8 = r0 - r8 */
	stw 	r8,0(out)	/* *out = r8 */
	subfme	len,len		/* First of two insns to add (1-CF) to len */
	bdnz+	loop		/* Branch to Loop if --ctr != 0 */
label:
	nor 	r3,r5,r5	/* Finish adding (1-CF) to len, store in r3 */
	blr
}

#endif /* __MWERKS >= 0x800 */
/* 45678901234567890123456789012345678901234567890123456789012345678901234567 */