1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
|
Description: Fix many cases of unaligned memory accesses
Author: James Clarke <jrtc27@jrtc27.com>
Forwarded: https://github.com/vincenthz/hs-cryptohash/pull/44/files
---
This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
--- /dev/null
+++ b/cbits/align.h
@@ -0,0 +1,157 @@
+#ifndef ALIGN_H
+#define ALIGN_H
+
+#include "bitfn.h"
+
+#if (defined(__i386__))
+# define UNALIGNED_ACCESS_OK
+#elif defined(__x86_64__)
+# define UNALIGNED_ACCESS_OK
+#else
+# define UNALIGNED_ACCESS_FAULT
+#endif
+
+/* n need to be power of 2.
+ * IS_ALIGNED(p,8) */
+#define IS_ALIGNED(p,alignment) (((uintptr_t) (p)) & ((alignment)-1))
+
+#ifdef WITH_ASSERT_ALIGNMENT
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+# define ASSERT_ALIGNMENT(up, alignment) \
+ do { if (IS_ALIGNED(up, alignment)) \
+ { printf("ALIGNMENT-ASSERT-FAILURE: %s:%d: ptr=%p alignment=%d\n", __FILE__, __LINE__, (void *) up, (alignment)); \
+ exit(99); \
+ }; } while (0)
+#else
+# define ASSERT_ALIGNMENT(p, n) do {} while (0)
+#endif
+
+#ifdef UNALIGNED_ACCESS_OK
+#define need_alignment(p,n) (0)
+#else
+#define need_alignment(p,n) IS_ALIGNED(p,n)
+#endif
+
+static inline uint32_t load_be32_aligned(const uint8_t *p)
+{
+ return be32_to_cpu(*((uint32_t *) p));
+}
+
+static inline uint64_t load_be64_aligned(const uint8_t *p)
+{
+ return be64_to_cpu(*((uint64_t *) p));
+}
+
+static inline void store_be32_aligned(uint8_t *p, uint32_t val)
+{
+ *((uint32_t *) p) = cpu_to_be32(val);
+}
+
+static inline void store_be64_aligned(uint8_t *p, uint64_t val)
+{
+ *((uint64_t *) p) = cpu_to_be64(val);
+}
+
+static inline uint32_t load_le32_aligned(const uint8_t *p)
+{
+ return le32_to_cpu(*((uint32_t *) p));
+}
+
+static inline uint64_t load_le64_aligned(const uint8_t *p)
+{
+ return le64_to_cpu(*((uint64_t *) p));
+}
+
+static inline void store_le32_aligned(uint8_t *p, uint32_t val)
+{
+ *((uint32_t *) p) = cpu_to_le32(val);
+}
+
+static inline void store_le64_aligned(uint8_t *p, uint64_t val)
+{
+ *((uint64_t *) p) = cpu_to_le64(val);
+}
+
+#ifdef UNALIGNED_ACCESS_OK
+
+#define load_be32(p) load_be32_aligned(p)
+#define load_be64(p) load_be64_aligned(p)
+
+#define store_be32(p, v) store_be32_aligned((p), (v))
+#define store_be64(p, v) store_be64_aligned((p), (v))
+
+#define load_le32(p) load_le32_aligned(p)
+#define load_le64(p) load_le64_aligned(p)
+
+#define store_le32(p, v) store_le32_aligned((p), (v))
+#define store_le64(p, v) store_le64_aligned((p), (v))
+
+#else
+
+static inline uint32_t load_be32(const uint8_t *p)
+{
+ return ((uint32_t)p[0] << 24) | ((uint32_t)p[1] << 16) | ((uint32_t)p[2] << 8) | ((uint32_t)p[3]);
+}
+
+static inline uint64_t load_be64(const uint8_t *p)
+{
+ return ((uint64_t)p[0] << 56) | ((uint64_t)p[1] << 48) | ((uint64_t)p[2] << 40) | ((uint64_t)p[3] << 32) |
+ ((uint64_t)p[4] << 24) | ((uint64_t)p[5] << 16) | ((uint64_t)p[6] << 8) | ((uint64_t)p[7]);
+}
+
+static inline void store_be32(uint8_t *p, uint32_t val)
+{
+ p[0] = (val >> 24);
+ p[1] = (val >> 16) & 0xFF;
+ p[2] = (val >> 8) & 0xFF;
+ p[3] = (val ) & 0xFF;
+}
+
+static inline void store_be64(uint8_t *p, uint64_t val)
+{
+ p[0] = (val >> 56);
+ p[1] = (val >> 48) & 0xFF;
+ p[2] = (val >> 40) & 0xFF;
+ p[3] = (val >> 32) & 0xFF;
+ p[4] = (val >> 24) & 0xFF;
+ p[5] = (val >> 16) & 0xFF;
+ p[6] = (val >> 8) & 0xFF;
+ p[7] = (val ) & 0xFF;
+}
+
+static inline uint32_t load_le32(const uint8_t *p)
+{
+ return ((uint32_t)p[0]) | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24);
+}
+
+static inline uint64_t load_le64(const uint8_t *p)
+{
+ return ((uint64_t)p[0]) | ((uint64_t)p[1] << 8) | ((uint64_t)p[2] << 16) | ((uint64_t)p[3] << 24) |
+ ((uint64_t)p[4] << 32) | ((uint64_t)p[5] << 40) | ((uint64_t)p[6] << 48) | ((uint64_t)p[7] << 56);
+}
+
+static inline void store_le32(uint8_t *p, uint32_t val)
+{
+ p[0] = (val ) & 0xFF;
+ p[1] = (val >> 8) & 0xFF;
+ p[2] = (val >> 16) & 0xFF;
+ p[3] = (val >> 24);
+}
+
+static inline void store_le64(uint8_t *p, uint64_t val)
+{
+ p[0] = (val ) & 0xFF;
+ p[1] = (val >> 8) & 0xFF;
+ p[2] = (val >> 16) & 0xFF;
+ p[3] = (val >> 24) & 0xFF;
+ p[4] = (val >> 32) & 0xFF;
+ p[5] = (val >> 40) & 0xFF;
+ p[6] = (val >> 48) & 0xFF;
+ p[7] = (val >> 56);
+}
+
+#endif
+
+#endif
--- a/cbits/sha3.c
+++ b/cbits/sha3.c
@@ -25,6 +25,7 @@
#include <stdint.h>
#include <string.h>
#include "bitfn.h"
+#include "align.h"
#include "sha3.h"
#define KECCAK_NB_ROUNDS 24
@@ -101,7 +102,7 @@ void cryptohash_sha3_init(struct sha3_ct
{
memset(ctx, 0, sizeof(*ctx));
ctx->hashlen = hashlen / 8;
- ctx->bufsz = 200 - 2 * ctx->hashlen;
+ ctx->bufsz = SHA3_BUF_SIZE(hashlen);
}
void cryptohash_sha3_update(struct sha3_ctx *ctx, uint8_t *data, uint32_t len)
@@ -124,9 +125,18 @@ void cryptohash_sha3_update(struct sha3_
ctx->bufindex = 0;
}
- /* process as much ctx->bufsz-block */
- for (; len >= ctx->bufsz; len -= ctx->bufsz, data += ctx->bufsz)
- sha3_do_chunk(ctx->state, (uint64_t *) data, ctx->bufsz / 8);
+ if (need_alignment(data, 8)) {
+ uint64_t tramp[SHA3_BUF_SIZE_MAX/8];
+ ASSERT_ALIGNMENT(tramp, 8);
+ for (; len >= ctx->bufsz; len -= ctx->bufsz, data += ctx->bufsz) {
+ memcpy(tramp, data, ctx->bufsz);
+ sha3_do_chunk(ctx->state, tramp, ctx->bufsz / 8);
+ }
+ } else {
+ /* process as much ctx->bufsz-block */
+ for (; len >= ctx->bufsz; len -= ctx->bufsz, data += ctx->bufsz)
+ sha3_do_chunk(ctx->state, (uint64_t *) data, ctx->bufsz / 8);
+ }
/* append data into buf */
if (len) {
--- a/cbits/sha3.h
+++ b/cbits/sha3.h
@@ -37,6 +37,22 @@ struct sha3_ctx
};
#define SHA3_CTX_SIZE sizeof(struct sha3_ctx)
+#define SHA3_CTX_BUF_MAX_SIZE (SHA3_CTX_SIZE + SHA3_BUF_SIZE_MAX)
+#define SHA3_BITSIZE_MIN 128
+#define SHA3_BITSIZE_MAX 512
+
+#define SHA3_BUF_SIZE(bitsize) (200 - 2 * ((bitsize) / 8))
+
+#define SHA3_BUF_SIZE_MIN SHA3_BUF_SIZE(SHA3_BITSIZE_MAX)
+#define SHA3_BUF_SIZE_MAX SHA3_BUF_SIZE(SHA3_BITSIZE_MIN)
+
+/*
+ * buffer size:
+ *
+ * 128 bits (shake 128 bits) => 200 - 2 * (128 / 8) = 200 - 2*16 = 200 - 32 = 168 bytes
+ * 224 bits (SHA3 224 bits) => 200 - 2 * (224 / 8) = 200 - 2*28 = 200 - 56 = 144 bytes
+ * 512 bits (SHA3 512 bits) => 200 - 2 * (512 / 8) = 200 - 2*64 = 200 - 128 = 72 bytes
+ */
void cryptohash_sha3_init(struct sha3_ctx *ctx, uint32_t hashlen);
void cryptohash_sha3_update(struct sha3_ctx *ctx, uint8_t *data, uint32_t len);
--- a/cbits/sha512.c
+++ b/cbits/sha512.c
@@ -24,6 +24,7 @@
#include <string.h>
#include "bitfn.h"
+#include "align.h"
#include "sha512.h"
void cryptohash_sha384_init(struct sha512_ctx *ctx)
@@ -153,9 +154,18 @@ void cryptohash_sha512_update(struct sha
index = 0;
}
- /* process as much 128-block as possible */
- for (; len >= 128; len -= 128, data += 128)
- sha512_do_chunk(ctx, (uint64_t *) data);
+ if (need_alignment(data, 8)) {
+ uint64_t tramp[16];
+ ASSERT_ALIGNMENT(tramp, 8);
+ for (; len >= 128; len -= 128, data += 128) {
+ memcpy(tramp, data, 128);
+ sha512_do_chunk(ctx, tramp);
+ }
+ } else {
+ /* process as much 128-block as possible */
+ for (; len >= 128; len -= 128, data += 128)
+ sha512_do_chunk(ctx, (uint64_t *) data);
+ }
/* append data into buf */
if (len)
@@ -175,7 +185,6 @@ void cryptohash_sha512_finalize(struct s
static uint8_t padding[128] = { 0x80, };
uint32_t i, index, padlen;
uint64_t bits[2];
- uint64_t *p = (uint64_t *) out;
/* cpu -> big endian */
bits[0] = cpu_to_be64((ctx->sz[1] << 3 | ctx->sz[0] >> 61));
@@ -191,7 +200,7 @@ void cryptohash_sha512_finalize(struct s
/* store to digest */
for (i = 0; i < 8; i++)
- p[i] = cpu_to_be64(ctx->h[i]);
+ store_be64(out+8*i, ctx->h[i]);
}
#include <stdio.h>
--- a/cbits/skein256.c
+++ b/cbits/skein256.c
@@ -26,6 +26,7 @@
#include "skein.h"
#include "skein256.h"
#include "bitfn.h"
+#include "align.h"
static const uint8_t K256_0[2] = { 14, 16, };
static const uint8_t K256_1[2] = { 52, 57, };
@@ -144,9 +145,18 @@ void cryptohash_skein256_update(struct s
ctx->bufindex = 0;
}
- /* process as much 32-block as possible except the last one in case we finalize */
- for (; len > 32; len -= 32, data += 32)
- skein256_do_chunk(ctx, (uint64_t *) data, 32);
+ if (need_alignment(data, 8)) {
+ uint64_t tramp[4];
+ ASSERT_ALIGNMENT(tramp, 8);
+ for (; len > 32; len -= 32, data += 32) {
+ memcpy(tramp, data, 32);
+ skein256_do_chunk(ctx, tramp, 32);
+ }
+ } else {
+ /* process as much 32-block as possible except the last one in case we finalize */
+ for (; len > 32; len -= 32, data += 32)
+ skein256_do_chunk(ctx, (uint64_t *) data, 32);
+ }
/* append data into buf */
if (len) {
--- a/cbits/skein512.c
+++ b/cbits/skein512.c
@@ -26,6 +26,7 @@
#include "skein.h"
#include "skein512.h"
#include "bitfn.h"
+#include "align.h"
static const uint8_t K512_0[4] = { 46, 36, 19, 37, };
static const uint8_t K512_1[4] = { 33, 27, 14, 42, };
@@ -162,9 +163,18 @@ void cryptohash_skein512_update(struct s
ctx->bufindex = 0;
}
- /* process as much 64-block as possible except the last one in case we finalize */
- for (; len > 64; len -= 64, data += 64)
- skein512_do_chunk(ctx, (uint64_t *) data, 64);
+ if (need_alignment(data, 8)) {
+ uint64_t tramp[8];
+ ASSERT_ALIGNMENT(tramp, 8);
+ for (; len > 64; len -= 64, data += 64) {
+ memcpy(tramp, data, 64);
+ skein512_do_chunk(ctx, tramp, 64);
+ }
+ } else {
+ /* process as much 64-block as possible except the last one in case we finalize */
+ for (; len > 64; len -= 64, data += 64)
+ skein512_do_chunk(ctx, (uint64_t *) data, 64);
+ }
/* append data into buf */
if (len) {
|