aboutsummaryrefslogtreecommitdiff
path: root/lib/libmd/aarch64/sha1block.S
blob: e16fb36342fdee81c77d5a54584c353ad6d07baa (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
/*-
 * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * sha1block_sha1 implementation based on sha1-arm.c,
 * written and placed in public domain by Jeffrey Walton
 * based on code from ARM, and by Johannes Schneiders, Skip
 * Hovsmith and Barry O'Rourke for the mbedTLS project.
 */

#include <machine/asm.h>

/*
 * Scalar SHA1 implementation.
 *
 * Due to the ample register file available on AArch64, the w array is
 * kept entirely in registers.  The saved a-e variables are instead kept
 * in memory as we don't have that much memory.
 */

	// sha1block(SHA1_CTX, buf, len)
ENTRY(_libmd_sha1block_scalar)
ctx	.req	x0
buf	.req	x1
len	.req	x2
w	.req	sp
a	.req	w3
b	.req	w4
c	.req	w5
d	.req	w6
e	.req	w7
k	.req	w8
f	.req	w9
tmp	.req	w10
w_0	.req	w11
w_1	.req	w12
w_2	.req	w13
w_3	.req	w14
w_4	.req	w15
w_5	.req	w16
w_6	.req	w17
// w18 is the platform register
w_7	.req	w19
w_8	.req	w20
w_9	.req	w21
w_10	.req	w22
w_11	.req	w23
w_12	.req	w24
w_13	.req	w25
w_14	.req	w26
w_15	.req	w27

.macro	shuffle	w_i, w_i3, w_i8, w_i14
	eor	\w_i, \w_i, \w_i3
	eor	tmp, \w_i8, \w_i14
	eor	\w_i, \w_i, tmp		// w[i-16] ^ w[i-14] ^ w[i-8] ^ w[i-3]
	ror	\w_i, \w_i, #31		// w[i] = ... ror #31
.endm

.macro	func1	a, b, c, d, e
	and	f, \c, \b
	bic	tmp, \d, \b
	orr	f, f, tmp
.endm

.macro	func2	a, b, c, d, e
	eor	f, \b, \c
	eor	f, f, \d
.endm

.macro	func3	a, b, c, d, e
	eor	tmp, \b, \c
	and	f, \b, \c
	and	tmp, tmp, \d
	orr	f, f, tmp
.endm

.macro	func4	a, b, c, d, e
	func2	\a, \b, \c, \d, \e
.endm

.macro	mix	a, b, c, d, e, w_i
	ror	\b, \b, #2
	ror	tmp, \a, #27
	add	\e, \e, \w_i
	add	tmp, tmp, k
	add	\e, \e, f
	add	\e, \e, tmp		// (a ror 27) + e + f + k + w[i]
.endm

.macro	round1	a, b, c, d, e, w_i
	func1 	\a, \b, \c, \d, \e
	rev	\w_i, \w_i
	mix	\a, \b, \c, \d, \e, \w_i
.endm

.macro	round	func, a, b, c, d, e, w_i, w_i3, w_i8, w_i14
	shuffle	\w_i, \w_i3, \w_i8, \w_i14
	\func	\a, \b, \c, \d, \e
	mix	\a, \b, \c, \d, \e, \w_i
.endm

.macro	round1x	a, b, c, d, e, w_i, w_i3, w_i8, w_i14
	round	func1, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
.endm

.macro	round2	a, b, c, d, e, w_i, w_i3, w_i8, w_i14
	round	func2, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
.endm

.macro	round3	a, b, c, d, e, w_i, w_i3, w_i8, w_i14
	round	func3, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
.endm

.macro	round4	a, b, c, d, e, w_i, w_i3, w_i8, w_i14
	round	func4, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
.endm

	ands	len, len, #~63		// take length in multiples of block length
	beq	1f			// bail out if input empty

	sub	sp, sp, #24+9*8		// allocate stack space
	str	x19, [sp, #24+0*8]
	stp	x20, x21, [sp, #24+1*8]
	stp	x22, x23, [sp, #24+3*8]
	stp	x24, x25, [sp, #24+5*8]
	stp	x26, x27, [sp, #24+7*8]

	ldp	a, b, [ctx, #0]		// load SHA1 state from context
	ldp	c, d, [ctx, #8]
	ldr	e, [ctx, #16]

0:	stp	a, b, [sp, #0]		// save old SHA1 state
	stp	c, d, [sp, #8]
	str	e, [sp, #16]

	movz	k, #0x7999		// round constant 1
	movk	k, #0x5a82, lsl #16

	ldp	w_0, w_1, [buf, #0*4]
	round1	a, b, c, d, e, w_0
	round1	e, a, b, c, d, w_1

	ldp	w_2, w_3, [buf, #2*4]
	round1	d, e, a, b, c, w_2
	round1	c, d, e, a, b, w_3

	ldp	w_4, w_5, [buf, #4*4]
	round1	b, c, d, e, a, w_4
	round1	a, b, c, d, e, w_5

	ldp	w_6, w_7, [buf, #6*4]
	round1	e, a, b, c, d, w_6
	round1	d, e, a, b, c, w_7

	ldp	w_8, w_9, [buf, #8*4]
	round1	c, d, e, a, b, w_8
	round1	b, c, d, e, a, w_9

	ldp	w_10, w_11, [buf, #10*4]
	round1	a, b, c, d, e, w_10
	round1	e, a, b, c, d, w_11

	ldp	w_12, w_13, [buf, #12*4]
	round1	d, e, a, b, c, w_12
	round1	c, d, e, a, b, w_13

	ldp	w_14, w_15, [buf, #14*4]
	round1	b, c, d, e, a, w_14
	round1	a, b, c, d, e, w_15

	round1x	e, a, b, c, d, w_0,  w_13,  w_8,  w_2
	round1x	d, e, a, b, c, w_1,  w_14,  w_9,  w_3
	round1x	c, d, e, a, b, w_2,  w_15, w_10,  w_4
	round1x	b, c, d, e, a, w_3,  w_0,  w_11,  w_5

	movz	k, #0xeba1		// round constant 2
	movk	k, #0x6ed9, lsl #16

	round2	a, b, c, d, e, w_4,  w_1,  w_12,  w_6
	round2	e, a, b, c, d, w_5,  w_2,  w_13,  w_7
	round2	d, e, a, b, c, w_6,  w_3,  w_14,  w_8
	round2	c, d, e, a, b, w_7,  w_4,  w_15,  w_9
	round2	b, c, d, e, a, w_8,  w_5,  w_0,   w_10

	round2	a, b, c, d, e, w_9,  w_6,  w_1,   w_11
	round2	e, a, b, c, d, w_10, w_7,  w_2,   w_12
	round2	d, e, a, b, c, w_11, w_8,  w_3,   w_13
	round2	c, d, e, a, b, w_12, w_9,  w_4,   w_14
	round2	b, c, d, e, a, w_13, w_10, w_5,   w_15

	round2	a, b, c, d, e, w_14, w_11, w_6,   w_0
	round2	e, a, b, c, d, w_15, w_12, w_7,   w_1
	round2	d, e, a, b, c, w_0,  w_13, w_8,   w_2
	round2	c, d, e, a, b, w_1,  w_14, w_9,   w_3
	round2	b, c, d, e, a, w_2,  w_15, w_10,  w_4

	round2	a, b, c, d, e, w_3,  w_0,  w_11,  w_5
	round2	e, a, b, c, d, w_4,  w_1,  w_12,  w_6
	round2	d, e, a, b, c, w_5,  w_2,  w_13,  w_7
	round2	c, d, e, a, b, w_6,  w_3,  w_14,  w_8
	round2	b, c, d, e, a, w_7,  w_4,  w_15,  w_9

	movz	k, #0xbcdc		// round constant 3
	movk	k, #0x8f1b, lsl #16

	round3	a, b, c, d, e, w_8,  w_5,  w_0,  w_10
	round3	e, a, b, c, d, w_9,  w_6,  w_1,  w_11
	round3	d, e, a, b, c, w_10, w_7,  w_2,  w_12
	round3	c, d, e, a, b, w_11, w_8,  w_3,  w_13
	round3	b, c, d, e, a, w_12, w_9,  w_4,  w_14

	round3	a, b, c, d, e, w_13, w_10, w_5,  w_15
	round3	e, a, b, c, d, w_14, w_11, w_6,  w_0
	round3	d, e, a, b, c, w_15, w_12, w_7,  w_1
	round3	c, d, e, a, b, w_0,  w_13, w_8,  w_2
	round3	b, c, d, e, a, w_1,  w_14, w_9,  w_3

	round3	a, b, c, d, e, w_2,  w_15, w_10, w_4
	round3	e, a, b, c, d, w_3,  w_0,  w_11, w_5
	round3	d, e, a, b, c, w_4,  w_1,  w_12, w_6
	round3	c, d, e, a, b, w_5,  w_2,  w_13, w_7
	round3	b, c, d, e, a, w_6,  w_3,  w_14, w_8

	round3	a, b, c, d, e, w_7,  w_4,  w_15, w_9
	round3	e, a, b, c, d, w_8,  w_5,  w_0,  w_10
	round3	d, e, a, b, c, w_9,  w_6,  w_1,  w_11
	round3	c, d, e, a, b, w_10, w_7,  w_2,  w_12
	round3	b, c, d, e, a, w_11, w_8,  w_3,  w_13

	movz	k, #0xc1d6		// round constant 4
	movk	k, #0xca62, lsl #16

	round4	a, b, c, d, e, w_12, w_9,  w_4,  w_14
	round4	e, a, b, c, d, w_13, w_10, w_5,  w_15
	round4	d, e, a, b, c, w_14, w_11, w_6,  w_0
	round4	c, d, e, a, b, w_15, w_12, w_7,  w_1
	round4	b, c, d, e, a, w_0,  w_13, w_8,  w_2

	round4	a, b, c, d, e, w_1,  w_14, w_9,  w_3
	round4	e, a, b, c, d, w_2,  w_15, w_10, w_4
	round4	d, e, a, b, c, w_3,  w_0,  w_11, w_5
	round4	c, d, e, a, b, w_4,  w_1,  w_12, w_6
	round4	b, c, d, e, a, w_5,  w_2,  w_13, w_7

	round4	a, b, c, d, e, w_6,  w_3,  w_14, w_8
	round4	e, a, b, c, d, w_7,  w_4,  w_15, w_9
	round4	d, e, a, b, c, w_8,  w_5,  w_0,  w_10
	round4	c, d, e, a, b, w_9,  w_6,  w_1,  w_11
	round4	b, c, d, e, a, w_10, w_7,  w_2,  w_12

	round4	a, b, c, d, e, w_11, w_8,  w_3,  w_13
	round4	e, a, b, c, d, w_12, w_9,  w_4,  w_14
	round4	d, e, a, b, c, w_13, w_10, w_5,  w_15
	round4	c, d, e, a, b, w_14, w_11, w_6,  w_0
	round4	b, c, d, e, a, w_15, w_12, w_7,  w_1

	ldp	w_0, w_1, [sp, #0]	// reload saved SHA1 state
	ldp	w_2, w_3, [sp, #8]
	ldr	w_4, [sp, #16]

	add	a, a, w_0
	add	b, b, w_1
	add	c, c, w_2
	add	d, d, w_3
	add	e, e, w_4

	add	buf, buf, #64
	subs	len, len, #64
	bhi	0b

	stp	a, b, [ctx, #0]		// write updated SHA1 state
	stp	c, d, [ctx, #8]
	str	e, [ctx, #16]

	ldr	x19, [sp, #24+0*8]
	ldp	x20, x21, [sp, #24+1*8]
	ldp	x22, x23, [sp, #24+3*8]
	ldp	x24, x25, [sp, #24+5*8]
	ldp	x26, x27, [sp, #24+7*8]
	add	sp, sp, #24+9*8

1:	ret
END(_libmd_sha1block_scalar)

/*
 * SHA1 implementation using the SHA1 instruction set extension.
 */

	.arch_extension sha2

	// sha1block(SHA1_CTX, buf, len)
ENTRY(_libmd_sha1block_sha1)
	/* ctx, buf, len: same as for sha1block_scalar */
kaddr	.req	x3
abcd	.req	v0
abcd_q	.req	q0			// alias for use with scalar instructions
abcd_s	.req	s0
e0	.req	s1
e0_v	.req	v1
e1	.req	s2
abcd_saved .req	v3
e0_saved .req	v4
tmp0	.req	v5
tmp1	.req	v6
msg0	.req	v16
msg1	.req	v17
msg2	.req	v18
msg3	.req	v19
k0	.req	v20
k1	.req	v21
k2	.req	v22
k3	.req	v23

	ands	len, len, #~63		// take length in multiples of block length
	beq	1f			// bail out if input empty

	ldr	abcd_q, [ctx, #0]
	ldr	e0, [ctx, #16]

	adrp	kaddr, k1234
	add	kaddr, kaddr, #:lo12:k1234
	ld4r	{k0.4s, k1.4s, k2.4s, k3.4s}, [kaddr]

0:	mov	abcd_saved.16b, abcd.16b
	mov	e0_saved.16b, e0_v.16b

	ld1	{msg0.4s, msg1.4s, msg2.4s, msg3.4s}, [buf], #64
	rev32	msg0.16b, msg0.16b
	rev32	msg1.16b, msg1.16b
	rev32	msg2.16b, msg2.16b
	rev32	msg3.16b, msg3.16b

	add	tmp0.4s, msg0.4s, k0.4s
	add	tmp1.4s, msg1.4s, k0.4s

	/* rounds 0--3 */
	sha1h	e1, abcd_s
	sha1c	abcd_q, e0, tmp0.4s
	add	tmp0.4s, msg2.4s, k0.4s
	sha1su0	msg0.4s, msg1.4s, msg2.4s

	/* rounds 4--7 */
	sha1h	e0, abcd_s
	sha1c	abcd_q, e1, tmp1.4s
	add	tmp1.4s, msg3.4s, k0.4s
	sha1su1	msg0.4s, msg3.4s
	sha1su0	msg1.4s, msg2.4s, msg3.4s

	/* rounds 8--11 */
	sha1h	e1, abcd_s
	sha1c	abcd_q, e0, tmp0.4s
	add	tmp0.4s, msg0.4s, k0.4s
	sha1su1	msg1.4s, msg0.4s
	sha1su0	msg2.4s, msg3.4s, msg0.4s

	/* rounds 12--15 */
	sha1h	e0, abcd_s
	sha1c	abcd_q, e1, tmp1.4s
	add	tmp1.4s, msg1.4s, k1.4s
	sha1su1	msg2.4s, msg1.4s
	sha1su0	msg3.4s, msg0.4s, msg1.4s

	/* rounds 16--19 */
	sha1h	e1, abcd_s
	sha1c	abcd_q, e0, tmp0.4s
	add	tmp0.4s, msg2.4s, k1.4s
	sha1su1	msg3.4s, msg2.4s
	sha1su0	msg0.4s, msg1.4s, msg2.4s

	/* rounds 20--23 */
	sha1h	e0, abcd_s
	sha1p	abcd_q, e1, tmp1.4s
	add	tmp1.4s, msg3.4s, k1.4s
	sha1su1	msg0.4s, msg3.4s
	sha1su0	msg1.4s, msg2.4s, msg3.4s

	/* rounds 24--27 */
	sha1h	e1, abcd_s
	sha1p	abcd_q, e0, tmp0.4s
	add	tmp0.4s, msg0.4s, k1.4s
	sha1su1	msg1.4s, msg0.4s
	sha1su0	msg2.4s, msg3.4s, msg0.4s

	/* rounds 28--31 */
	sha1h	e0, abcd_s
	sha1p	abcd_q, e1, tmp1.4s
	add	tmp1.4s, msg1.4s, k1.4s
	sha1su1	msg2.4s, msg1.4s
	sha1su0	msg3.4s, msg0.4s, msg1.4s

	/* rounds 32--35 */
	sha1h	e1, abcd_s
	sha1p	abcd_q, e0, tmp0.4s
	add	tmp0.4s, msg2.4s, k2.4s
	sha1su1	msg3.4s, msg2.4s
	sha1su0	msg0.4s, msg1.4s, msg2.4s

	/* rounds 36--39 */
	sha1h	e0, abcd_s
	sha1p	abcd_q, e1, tmp1.4s
	add	tmp1.4s, msg3.4s, k2.4s
	sha1su1	msg0.4s, msg3.4s
	sha1su0	msg1.4s, msg2.4s, msg3.4s

	/* rounds 40--43 */
	sha1h	e1, abcd_s
	sha1m	abcd_q, e0, tmp0.4s
	add	tmp0.4s, msg0.4s, k2.4s
	sha1su1	msg1.4s, msg0.4s
	sha1su0	msg2.4s, msg3.4s, msg0.4s

	/* rounds 44--47 */
	sha1h	e0, abcd_s
	sha1m	abcd_q, e1, tmp1.4s
	add	tmp1.4s, msg1.4s, k2.4s
	sha1su1	msg2.4s, msg1.4s
	sha1su0	msg3.4s, msg0.4s, msg1.4s

	/* rounds 48--51 */
	sha1h	e1, abcd_s
	sha1m	abcd_q, e0, tmp0.4s
	add	tmp0.4s, msg2.4s, k2.4s
	sha1su1	msg3.4s, msg2.4s
	sha1su0	msg0.4s, msg1.4s, msg2.4s

	/* rounds 52--55 */
	sha1h	e0, abcd_s
	sha1m	abcd_q, e1, tmp1.4s
	add	tmp1.4s, msg3.4s, k3.4s
	sha1su1	msg0.4s, msg3.4s
	sha1su0	msg1.4s, msg2.4s, msg3.4s

	/* rounds 56--59 */
	sha1h	e1, abcd_s
	sha1m	abcd_q, e0, tmp0.4s
	add	tmp0.4s, msg0.4s, k3.4s
	sha1su1	msg1.4s, msg0.4s
	sha1su0	msg2.4s, msg3.4s, msg0.4s

	/* rounds 60--63 */
	sha1h	e0, abcd_s
	sha1p	abcd_q, e1, tmp1.4s
	add	tmp1.4s, msg1.4s, k3.4s
	sha1su1	msg2.4s, msg1.4s
	sha1su0	msg3.4s, msg0.4s, msg1.4s

	/* rounds 64--67 */
	sha1h	e1, abcd_s
	sha1p	abcd_q, e0, tmp0.4s
	add	tmp0.4s, msg2.4s, k3.4s
	sha1su1	msg3.4s, msg2.4s
	sha1su0	msg0.4s, msg1.4s, msg2.4s

	/* rounds 68--71 */
	sha1h	e0, abcd_s
	sha1p	abcd_q, e1, tmp1.4s
	add	tmp1.4s, msg3.4s, k3.4s
	sha1su1	msg0.4s, msg3.4s

	/* rounds 72--75 */
	sha1h	e1, abcd_s
	sha1p	abcd_q, e0, tmp0.4s

	/* rounds 76--79 */
	sha1h	e0, abcd_s
	sha1p	abcd_q, e1, tmp1.4s

	add	e0_v.4s, e0_v.4s, e0_saved.4s
	add	abcd.4s, abcd.4s, abcd_saved.4s

	subs	len, len, #64
	bhi	0b

	str	abcd_q, [ctx, #0]
	str	e0, [ctx, #16]

1:	ret
END(_libmd_sha1block_sha1)

	.section .rodata
	.balign	16
k1234:	.4byte	0x5a827999
	.4byte	0x6ed9eba1
	.4byte	0x8f1bbcdc
	.4byte	0xca62c1d6
	.size	k1234, .-k1234

	.section .note.GNU-stack,"",%progbits