summaryrefslogtreecommitdiffstats
path: root/src/crypto/chacha/chacha_vec_arm.S
diff options
context:
space:
mode:
authorAdam Langley <agl@google.com>2015-05-11 17:20:37 -0700
committerKenny Root <kroot@google.com>2015-05-12 23:06:14 +0000
commite9ada863a7b3e81f5d2b1e3bdd2305da902a87f5 (patch)
tree6e43e34595ecf887c26c32b86d8ab097fe8cac64 /src/crypto/chacha/chacha_vec_arm.S
parentb3106a0cc1493bbe0505c0ec0ce3da4ca90a29ae (diff)
downloadexternal_boringssl-e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5.zip
external_boringssl-e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5.tar.gz
external_boringssl-e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5.tar.bz2
external/boringssl: bump revision.
This change bumps the BoringSSL revision to the current tip-of-tree. Change-Id: I91d5bf467e16e8d86cb19a4de873985f524e5faa
Diffstat (limited to 'src/crypto/chacha/chacha_vec_arm.S')
-rw-r--r--src/crypto/chacha/chacha_vec_arm.S728
1 files changed, 364 insertions, 364 deletions
diff --git a/src/crypto/chacha/chacha_vec_arm.S b/src/crypto/chacha/chacha_vec_arm.S
index 15d4556..ddc374e 100644
--- a/src/crypto/chacha/chacha_vec_arm.S
+++ b/src/crypto/chacha/chacha_vec_arm.S
@@ -59,131 +59,147 @@
.thumb_func
.type CRYPTO_chacha_20_neon, %function
CRYPTO_chacha_20_neon:
- @ args = 8, pretend = 0, frame = 128
+ @ args = 8, pretend = 0, frame = 152
@ frame_needed = 1, uses_anonymous_args = 0
push {r4, r5, r6, r7, r8, r9, r10, fp, lr}
- mov r4, r2
+ mov r8, r3
vpush.64 {d8, d9, d10, d11, d12, d13, d14, d15}
- movw r8, #43691
- movt r8, 43690
- mov ip, r3
- umull r8, r9, r4, r8
- sub sp, sp, #132
- add r7, sp, #0
- sub sp, sp, #112
+ mov r9, r2
+ ldr r4, .L91+16
mov fp, r0
mov r10, r1
+ mov lr, r8
+.LPIC16:
+ add r4, pc
+ sub sp, sp, #156
+ add r7, sp, #0
+ sub sp, sp, #112
+ add r6, r7, #144
+ str r0, [r7, #88]
+ str r1, [r7, #12]
str r2, [r7, #8]
+ ldmia r4, {r0, r1, r2, r3}
add r4, sp, #15
- ldr r2, .L92+16
bic r4, r4, #15
- ldr r5, [r7, #232]
- add lr, r4, #64
-.LPIC16:
- add r2, pc
- str r0, [r7, #60]
- str r1, [r7, #12]
- str r3, [r7, #44]
- ldmia r2, {r0, r1, r2, r3}
- ldr r6, [r5]
- str r4, [r7, #72]
- ldr r5, [r5, #4]
- ldr r4, [r7, #236]
- str r6, [r7, #120]
- str r5, [r7, #124]
- str r4, [r7, #112]
- stmia lr, {r0, r1, r2, r3}
- movs r3, #0
- ldr r0, [r7, #72]
- str r3, [r7, #116]
- lsrs r3, r9, #7
- vldr d22, [r7, #112]
- vldr d23, [r7, #120]
- vldr d24, [r0, #64]
- vldr d25, [r0, #72]
- vld1.64 {d26-d27}, [ip:64]
- vldr d28, [ip, #16]
- vldr d29, [ip, #24]
+ ldr ip, [r7, #256]
+ str r4, [r7, #84]
+ mov r5, r4
+ adds r4, r4, #64
+ adds r5, r5, #80
+ str r8, [r7, #68]
+ stmia r4, {r0, r1, r2, r3}
+ movw r4, #43691
+ ldr r0, [ip] @ unaligned
+ movt r4, 43690
+ ldr r1, [ip, #4] @ unaligned
+ ldr r3, [r7, #84]
+ ldr r2, [r8, #8] @ unaligned
+ mov r8, #0
+ stmia r6!, {r0, r1}
+ mov r6, r5
+ ldr r1, [lr, #4] @ unaligned
+ ldr r0, [lr] @ unaligned
+ vldr d24, [r3, #64]
+ vldr d25, [r3, #72]
+ ldr r3, [lr, #12] @ unaligned
+ str r5, [r7, #80]
+ stmia r5!, {r0, r1, r2, r3}
+ ldr r0, [lr, #16]! @ unaligned
+ ldr r2, [r7, #84]
+ umull r4, r5, r9, r4
+ vldr d26, [r2, #80]
+ vldr d27, [r2, #88]
+ ldr r1, [lr, #4] @ unaligned
+ ldr r2, [lr, #8] @ unaligned
+ ldr r3, [lr, #12] @ unaligned
+ ldr r4, [r7, #260]
+ stmia r6!, {r0, r1, r2, r3}
+ ldr r3, [ip]
+ ldr r1, [r7, #84]
+ ldr r2, [ip, #4]
+ str r3, [r7, #64]
+ vldr d28, [r1, #80]
+ vldr d29, [r1, #88]
+ str r3, [r7, #136]
+ lsrs r3, r5, #7
+ str r4, [r7, #128]
+ str r2, [r7, #140]
+ str r8, [r7, #132]
+ str r2, [r7, #60]
+ vldr d22, [r7, #128]
+ vldr d23, [r7, #136]
beq .L26
- ldr r1, [r0, #64]
lsls r2, r3, #8
+ ldr r5, [r1, #64]
sub r3, r2, r3, lsl #6
+ ldr r2, [r1, #68]
+ vldr d0, .L91
+ vldr d1, .L91+8
+ adds r4, r4, #2
+ str r5, [r7, #56]
+ str r2, [r7, #52]
+ ldr r5, [r1, #72]
+ ldr r2, [r1, #76]
str r3, [r7, #4]
- ldr r2, [r0, #72]
- str r1, [r7, #40]
- mov r1, r3
- ldr r3, [r0, #68]
- vldr d0, .L92
- vldr d1, .L92+8
- str r2, [r7, #32]
- adds r2, r4, #2
- str r3, [r7, #36]
- ldr r3, [r0, #76]
- str r2, [r7, #48]
- mov r2, r0
- mov r0, fp
- str r10, [r7, #64]
- str r3, [r7, #28]
- adds r3, r0, r1
- mov r1, r6
+ str r5, [r7, #48]
+ str r2, [r7, #44]
+ mov r2, fp
+ str r4, [r7, #72]
+ adds r3, r2, r3
+ str r10, [r7, #76]
str r3, [r7, #16]
- add r3, r2, #80
- mov r2, r5
- str r3, [r7, #68]
.L4:
- ldr r0, [r7, #44]
- add r8, r7, #28
- str r2, [r7, #108]
+ ldr r5, [r7, #68]
+ add r8, r7, #44
+ ldr r4, [r7, #72]
vadd.i32 q3, q11, q0
ldmia r8, {r8, r9, r10, fp}
vmov q8, q14 @ v4si
- ldr r3, [r0]
+ ldr r2, [r5, #4]
vmov q1, q13 @ v4si
+ ldr r3, [r5]
vmov q9, q12 @ v4si
+ ldr lr, [r5, #20]
vmov q2, q11 @ v4si
- str r3, [r7, #52]
- mov r3, r0
- ldr r5, [r3, #8]
+ mov r0, r2
+ ldr r2, [r5, #8]
+ str r3, [r7, #108]
+ mov r3, r5
+ ldr ip, [r5, #16]
vmov q15, q14 @ v4si
- ldr lr, [r3, #20]
+ mov r1, r2
+ ldr r2, [r5, #12]
+ ldr r5, [r5, #24]
vmov q5, q13 @ v4si
- ldr r6, [r3, #12]
+ ldr r6, [r3, #28]
vmov q10, q12 @ v4si
- str r5, [r7, #92]
- mov r5, r3
- ldr r4, [r5, #28]
+ ldr r3, [r7, #64]
+ str r5, [r7, #116]
movs r5, #10
- ldr ip, [r3, #16]
- ldr r3, [r3, #24]
- str r4, [r7, #104]
- ldr r4, [r7, #48]
- str r3, [r7, #100]
- mov r3, r1
- str r6, [r7, #56]
- str r4, [r7, #96]
- str r8, [r7, #80]
+ str r6, [r7, #120]
+ str r4, [r7, #112]
+ ldr r6, [r7, #60]
+ str r8, [r7, #96]
mov r8, r10
- ldr r0, [r0, #4]
+ ldr r4, [r7, #108]
mov r10, r9
- ldr r1, [r7, #92]
- ldr r2, [r7, #56]
- ldr r9, [r7, #100]
- ldr r4, [r7, #52]
- str lr, [r7, #88]
+ ldr r9, [r7, #116]
+ str lr, [r7, #104]
mov lr, r3
- str r5, [r7, #76]
+ str r5, [r7, #92]
movs r5, #0
- str r5, [r7, #84]
- b .L93
-.L94:
+ str r6, [r7, #124]
+ str r5, [r7, #100]
+ b .L92
+.L93:
.align 3
-.L92:
+.L91:
.word 1
.word 0
.word 0
.word 0
.word .LANCHOR0-(.LPIC16+4)
-.L93:
+.L92:
.L3:
vadd.i32 q9, q9, q1
add r3, r8, r0
@@ -192,8 +208,8 @@ CRYPTO_chacha_20_neon:
veor q3, q3, q9
mov r6, r3
veor q2, q2, q10
- ldr r3, [r7, #80]
- str r5, [r7, #100]
+ ldr r3, [r7, #96]
+ str r5, [r7, #116]
add r10, r10, r1
vrev32.16 q3, q3
eor lr, lr, r10
@@ -201,13 +217,13 @@ CRYPTO_chacha_20_neon:
vrev32.16 q2, q2
vadd.i32 q15, q15, q2
mov fp, r3
- ldr r3, [r7, #96]
+ ldr r3, [r7, #112]
veor q4, q8, q1
- str r6, [r7, #96]
+ str r6, [r7, #112]
veor q6, q15, q5
eors r3, r3, r5
mov r5, r6
- ldr r6, [r7, #84]
+ ldr r6, [r7, #100]
vshl.i32 q1, q4, #12
vshl.i32 q5, q6, #12
add fp, fp, r2
@@ -216,33 +232,33 @@ CRYPTO_chacha_20_neon:
vsri.32 q1, q4, #20
ror lr, lr, #16
mov r5, r6
- ldr r6, [r7, #108]
+ ldr r6, [r7, #124]
vsri.32 q5, q6, #20
- str r3, [r7, #108]
+ str r3, [r7, #124]
eor r6, r6, fp
ror r5, r5, #16
vadd.i32 q9, q9, q1
add r9, r9, lr
ror r3, r6, #16
- ldr r6, [r7, #108]
+ ldr r6, [r7, #124]
vadd.i32 q10, q10, q5
- str r3, [r7, #92]
+ str r3, [r7, #108]
veor q4, q9, q3
add ip, ip, r6
- ldr r6, [r7, #88]
+ ldr r6, [r7, #104]
veor q6, q10, q2
eor r4, ip, r4
eor r1, r9, r1
vshl.i32 q3, q4, #8
mov r8, r6
- ldr r6, [r7, #104]
+ ldr r6, [r7, #120]
vshl.i32 q2, q6, #8
ror r4, r4, #20
add r6, r6, r3
vsri.32 q3, q4, #24
- str r6, [r7, #88]
+ str r6, [r7, #104]
eors r2, r2, r6
- ldr r6, [r7, #100]
+ ldr r6, [r7, #116]
vsri.32 q2, q6, #24
add r8, r8, r5
ror r2, r2, #20
@@ -251,42 +267,42 @@ CRYPTO_chacha_20_neon:
eor r0, r8, r0
vadd.i32 q15, q15, q2
mov r3, r6
- ldr r6, [r7, #96]
+ ldr r6, [r7, #112]
veor q6, q4, q1
ror r0, r0, #20
- str r3, [r7, #96]
+ str r3, [r7, #112]
veor q5, q15, q5
adds r6, r0, r6
- str r6, [r7, #104]
+ str r6, [r7, #120]
mov r6, r3
- ldr r3, [r7, #108]
+ ldr r3, [r7, #124]
vshl.i32 q8, q6, #7
add fp, fp, r2
eors r3, r3, r6
- ldr r6, [r7, #104]
+ ldr r6, [r7, #120]
vshl.i32 q1, q5, #7
ror r1, r1, #20
eors r5, r5, r6
vsri.32 q8, q6, #25
- ldr r6, [r7, #92]
+ ldr r6, [r7, #108]
ror r3, r3, #24
ror r5, r5, #24
vsri.32 q1, q5, #25
- str r5, [r7, #100]
+ str r5, [r7, #116]
eor r6, fp, r6
- ldr r5, [r7, #100]
+ ldr r5, [r7, #116]
add r10, r10, r1
add ip, r3, ip
vext.32 q8, q8, q8, #1
- str ip, [r7, #108]
+ str ip, [r7, #124]
add ip, r5, r8
- ldr r5, [r7, #88]
+ ldr r5, [r7, #104]
eor lr, r10, lr
ror r6, r6, #24
vext.32 q1, q1, q1, #1
add r8, r6, r5
vadd.i32 q9, q9, q8
- ldr r5, [r7, #108]
+ ldr r5, [r7, #124]
vext.32 q3, q3, q3, #3
vadd.i32 q10, q10, q1
ror lr, lr, #24
@@ -295,14 +311,14 @@ CRYPTO_chacha_20_neon:
add r9, r9, lr
eors r4, r4, r5
veor q3, q9, q3
- ldr r5, [r7, #96]
+ ldr r5, [r7, #112]
eor r1, r9, r1
ror r0, r0, #25
veor q2, q10, q2
adds r5, r0, r5
vext.32 q4, q4, q4, #2
- str r5, [r7, #96]
- ldr r5, [r7, #104]
+ str r5, [r7, #112]
+ ldr r5, [r7, #120]
ror r1, r1, #25
vrev32.16 q3, q3
eor r2, r8, r2
@@ -311,10 +327,10 @@ CRYPTO_chacha_20_neon:
vadd.i32 q4, q4, q3
ror r4, r4, #25
vrev32.16 q2, q2
- str r5, [r7, #84]
+ str r5, [r7, #100]
vadd.i32 q15, q15, q2
eors r3, r3, r5
- ldr r5, [r7, #96]
+ ldr r5, [r7, #112]
add fp, fp, r4
veor q8, q4, q8
ror r2, r2, #25
@@ -322,174 +338,182 @@ CRYPTO_chacha_20_neon:
eor lr, fp, lr
eors r6, r6, r5
ror r3, r3, #16
- ldr r5, [r7, #100]
+ ldr r5, [r7, #116]
add r10, r10, r2
- str r3, [r7, #104]
+ str r3, [r7, #120]
ror lr, lr, #16
- ldr r3, [r7, #104]
+ ldr r3, [r7, #120]
eor r5, r10, r5
vshl.i32 q5, q8, #12
add ip, lr, ip
vshl.i32 q6, q1, #12
- str ip, [r7, #88]
+ str ip, [r7, #104]
add ip, r3, r8
- str ip, [r7, #100]
- ldr r3, [r7, #108]
+ str ip, [r7, #116]
+ ldr r3, [r7, #124]
ror r5, r5, #16
vsri.32 q5, q8, #20
ror r6, r6, #16
add ip, r5, r3
- ldr r3, [r7, #88]
+ ldr r3, [r7, #104]
vsri.32 q6, q1, #20
add r9, r9, r6
eor r2, ip, r2
eors r4, r4, r3
- ldr r3, [r7, #100]
+ ldr r3, [r7, #116]
eor r0, r9, r0
vadd.i32 q9, q9, q5
ror r4, r4, #20
eors r1, r1, r3
vadd.i32 q10, q10, q6
ror r3, r2, #20
- str r3, [r7, #92]
- ldr r3, [r7, #96]
+ str r3, [r7, #108]
+ ldr r3, [r7, #112]
veor q3, q9, q3
ror r0, r0, #20
add r8, r4, fp
veor q2, q10, q2
add fp, r0, r3
- ldr r3, [r7, #84]
+ ldr r3, [r7, #100]
ror r1, r1, #20
mov r2, r8
vshl.i32 q8, q3, #8
- str r8, [r7, #80]
+ str r8, [r7, #96]
add r8, r1, r3
- ldr r3, [r7, #92]
+ ldr r3, [r7, #108]
vmov q1, q6 @ v4si
vshl.i32 q6, q2, #8
eor r6, fp, r6
add r10, r10, r3
- ldr r3, [r7, #104]
+ ldr r3, [r7, #120]
vsri.32 q8, q3, #24
eor lr, r2, lr
eor r3, r8, r3
ror r2, r6, #24
vsri.32 q6, q2, #24
eor r5, r10, r5
- str r2, [r7, #108]
+ str r2, [r7, #124]
ror r2, r3, #24
- ldr r3, [r7, #88]
+ ldr r3, [r7, #104]
vmov q3, q8 @ v4si
vadd.i32 q15, q15, q6
ror lr, lr, #24
vadd.i32 q8, q4, q8
ror r6, r5, #24
add r5, lr, r3
- ldr r3, [r7, #108]
+ ldr r3, [r7, #124]
veor q4, q8, q5
add ip, ip, r6
vmov q2, q6 @ v4si
add r9, r9, r3
veor q6, q15, q1
- ldr r3, [r7, #100]
+ ldr r3, [r7, #116]
vshl.i32 q1, q4, #7
- str r2, [r7, #96]
+ str r2, [r7, #112]
add r3, r3, r2
- str r3, [r7, #104]
+ str r3, [r7, #120]
vshl.i32 q5, q6, #7
eors r1, r1, r3
- ldr r3, [r7, #92]
+ ldr r3, [r7, #108]
vsri.32 q1, q4, #25
eors r4, r4, r5
eor r0, r9, r0
eor r2, ip, r3
vsri.32 q5, q6, #25
- ldr r3, [r7, #76]
+ ldr r3, [r7, #92]
ror r4, r4, #25
- str r6, [r7, #84]
+ str r6, [r7, #100]
ror r0, r0, #25
subs r3, r3, #1
- str r5, [r7, #88]
+ str r5, [r7, #104]
ror r1, r1, #25
ror r2, r2, #25
vext.32 q15, q15, q15, #2
- str r3, [r7, #76]
+ str r3, [r7, #92]
vext.32 q2, q2, q2, #1
vext.32 q8, q8, q8, #2
vext.32 q3, q3, q3, #1
vext.32 q5, q5, q5, #3
vext.32 q1, q1, q1, #3
bne .L3
- ldr r3, [r7, #68]
+ ldr r3, [r7, #80]
vadd.i32 q4, q12, q10
- str r9, [r7, #100]
+ str r9, [r7, #116]
mov r9, r10
mov r10, r8
- ldr r8, [r7, #80]
- str lr, [r7, #80]
+ ldr r8, [r7, #96]
+ str lr, [r7, #96]
mov lr, r5
- ldr r5, [r7, #40]
+ ldr r5, [r7, #56]
vadd.i32 q5, q13, q5
- ldr r6, [r7, #64]
+ ldr r6, [r7, #76]
vadd.i32 q15, q14, q15
add fp, fp, r5
- ldr r5, [r7, #36]
- str r4, [r7, #52]
+ ldr r5, [r7, #52]
+ str r4, [r7, #108]
vadd.i32 q7, q14, q8
- ldr r4, [r7, #96]
+ ldr r4, [r7, #112]
add r5, r10, r5
- str r3, [r7, #96]
+ str r3, [r7, #112]
vadd.i32 q2, q11, q2
ldr r3, [r6, #12] @ unaligned
vadd.i32 q6, q12, q9
- str r0, [r7, #76]
+ str r0, [r7, #92]
vadd.i32 q1, q13, q1
ldr r0, [r6] @ unaligned
vadd.i32 q11, q11, q0
- str r1, [r7, #92]
- str r2, [r7, #56]
+ str r1, [r7, #40]
+ str r2, [r7, #36]
vadd.i32 q3, q11, q3
ldr r1, [r6, #4] @ unaligned
vadd.i32 q11, q11, q0
ldr r2, [r6, #8] @ unaligned
- str r5, [r7, #88]
+ str r5, [r7, #104]
vadd.i32 q11, q11, q0
- ldr r5, [r7, #96]
- ldr r10, [r7, #68]
+ ldr r5, [r7, #112]
+ ldr r10, [r7, #80]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
- ldr r2, [r7, #72]
- ldr r1, [r7, #32]
- ldr r3, [r7, #48]
- vldr d20, [r2, #80]
- vldr d21, [r2, #88]
- add r9, r9, r1
+ ldr r0, [r7, #84]
+ ldr r2, [r7, #48]
+ ldr r3, [r7, #72]
+ vldr d20, [r0, #80]
+ vldr d21, [r0, #88]
+ add r9, r9, r2
veor q10, q10, q4
- ldr r1, [r7, #28]
- add r0, r8, r1
- str r0, [r7, #24]
- vstr d20, [r2, #80]
- vstr d21, [r2, #88]
- adds r0, r4, r3
- str r0, [r7, #20]
+ ldr r2, [r7, #44]
+ adds r1, r4, r3
+ str r1, [r7, #28]
+ add r2, r8, r2
+ str r2, [r7, #32]
+ vstr d20, [r0, #80]
+ vstr d21, [r0, #88]
ldmia r5!, {r0, r1, r2, r3}
- mov r5, r10
+ ldr r4, [r7, #96]
+ ldr r5, [r7, #64]
+ add r4, r4, r5
+ ldr r5, [r7, #124]
+ str r4, [r7, #96]
ldr r4, [r7, #60]
+ add r5, r5, r4
+ ldr r4, [r7, #88]
+ str r5, [r7, #24]
+ mov r5, r10
str r0, [r4] @ unaligned
- mov r4, r10
- ldr r0, [r7, #60]
- str r1, [r0, #4] @ unaligned
+ mov r0, r4
+ str r1, [r4, #4] @ unaligned
mov r8, r0
str r2, [r0, #8] @ unaligned
+ mov r4, r10
str r3, [r0, #12] @ unaligned
ldr r0, [r6, #16]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
- ldr r6, [r7, #64]
+ ldr r6, [r7, #76]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
- ldr r3, [r7, #72]
+ ldr r3, [r7, #84]
vldr d20, [r3, #80]
vldr d21, [r3, #88]
veor q10, q10, q5
@@ -501,21 +525,22 @@ CRYPTO_chacha_20_neon:
str r1, [r8, #20] @ unaligned
str r2, [r8, #24] @ unaligned
str r3, [r8, #28] @ unaligned
+ mov r8, r4
ldr r0, [r6, #32]! @ unaligned
+ str r10, [r7, #124]
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
- ldr r6, [r7, #64]
+ ldr r6, [r7, #76]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
- ldr r0, [r7, #72]
- vldr d16, [r0, #80]
- vldr d17, [r0, #88]
+ ldr r2, [r7, #84]
+ vldr d16, [r2, #80]
+ vldr d17, [r2, #88]
veor q15, q8, q15
- vstr d30, [r0, #80]
- vstr d31, [r0, #88]
+ vstr d30, [r2, #80]
+ vstr d31, [r2, #88]
ldmia r10!, {r0, r1, r2, r3}
- mov r10, r5
str r0, [r4, #32] @ unaligned
str r1, [r4, #36] @ unaligned
str r2, [r4, #40] @ unaligned
@@ -524,17 +549,18 @@ CRYPTO_chacha_20_neon:
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
- ldr r6, [r7, #64]
+ ldr r6, [r7, #76]
stmia r5!, {r0, r1, r2, r3}
- mov r5, r10
- ldr r2, [r7, #72]
- vldr d18, [r2, #80]
- vldr d19, [r2, #88]
+ ldr r1, [r7, #84]
+ vldr d18, [r1, #80]
+ vldr d19, [r1, #88]
veor q9, q9, q2
- vstr d18, [r2, #80]
- vstr d19, [r2, #88]
+ vstr d18, [r1, #80]
+ vstr d19, [r1, #88]
+ ldr r3, [r7, #112]
+ ldr r5, [r7, #80]
+ mov r10, r3
ldmia r10!, {r0, r1, r2, r3}
- mov r10, r5
str r0, [r4, #48] @ unaligned
str r1, [r4, #52] @ unaligned
str r2, [r4, #56] @ unaligned
@@ -543,34 +569,38 @@ CRYPTO_chacha_20_neon:
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
- ldr r6, [r7, #64]
+ ldr r6, [r7, #76]
stmia r5!, {r0, r1, r2, r3}
- mov r5, r10
- ldr r2, [r7, #72]
- vldr d18, [r2, #80]
- vldr d19, [r2, #88]
+ ldr r1, [r7, #84]
+ ldr r3, [r7, #112]
+ ldr r5, [r7, #80]
+ vldr d18, [r1, #80]
+ vldr d19, [r1, #88]
veor q9, q9, q6
- vstr d18, [r2, #80]
- vstr d19, [r2, #88]
+ mov r10, r3
+ str r5, [r7, #20]
+ vstr d18, [r1, #80]
+ vstr d19, [r1, #88]
ldmia r10!, {r0, r1, r2, r3}
- mov r10, r5
- str r0, [r4, #64] @ unaligned
str r1, [r4, #68] @ unaligned
str r2, [r4, #72] @ unaligned
str r3, [r4, #76] @ unaligned
+ str r0, [r4, #64] @ unaligned
ldr r0, [r6, #80]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
- ldr r6, [r7, #64]
+ ldr r6, [r7, #76]
stmia r5!, {r0, r1, r2, r3}
- mov r5, r10
- ldr r2, [r7, #72]
- vldr d18, [r2, #80]
- vldr d19, [r2, #88]
+ ldr r1, [r7, #84]
+ ldr r3, [r7, #20]
+ ldr r5, [r7, #80]
+ vldr d18, [r1, #80]
+ vldr d19, [r1, #88]
veor q1, q9, q1
- vstr d2, [r2, #80]
- vstr d3, [r2, #88]
+ mov r10, r3
+ vstr d2, [r1, #80]
+ vstr d3, [r1, #88]
ldmia r10!, {r0, r1, r2, r3}
mov r10, r5
str r0, [r4, #80] @ unaligned
@@ -581,17 +611,16 @@ CRYPTO_chacha_20_neon:
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
- ldr r6, [r7, #64]
+ ldr r6, [r7, #76]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
- ldr r3, [r7, #72]
+ ldr r3, [r7, #84]
vldr d16, [r3, #80]
vldr d17, [r3, #88]
veor q8, q8, q7
vstr d16, [r3, #80]
vstr d17, [r3, #88]
ldmia r10!, {r0, r1, r2, r3}
- mov r10, r5
str r0, [r4, #96] @ unaligned
str r1, [r4, #100] @ unaligned
str r2, [r4, #104] @ unaligned
@@ -600,140 +629,116 @@ CRYPTO_chacha_20_neon:
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
- stmia r5!, {r0, r1, r2, r3}
- mov r5, r10
- ldr r0, [r7, #72]
- ldr r6, [r7, #44]
- vldr d16, [r0, #80]
- vldr d17, [r0, #88]
+ mov r6, r5
+ stmia r6!, {r0, r1, r2, r3}
+ ldr r3, [r7, #84]
+ vldr d16, [r3, #80]
+ vldr d17, [r3, #88]
veor q8, q8, q3
- vstr d16, [r0, #80]
- vstr d17, [r0, #88]
+ vstr d16, [r3, #80]
+ vstr d17, [r3, #88]
ldmia r5!, {r0, r1, r2, r3}
- mov r5, r4
- mov r8, r5
str r1, [r4, #116] @ unaligned
- ldr r1, [r7, #64]
+ ldr r1, [r7, #76]
str r0, [r4, #112] @ unaligned
- mov r0, r5
str r2, [r4, #120] @ unaligned
str r3, [r4, #124] @ unaligned
ldr r3, [r1, #128]
- ldr r2, [r7, #88]
+ ldr r2, [r7, #104]
eor r3, fp, r3
str r3, [r4, #128]
ldr r3, [r1, #132]
- mov r4, r1
- mov r1, r5
eors r2, r2, r3
str r2, [r8, #132]
- ldr r3, [r4, #136]
- ldr r2, [r7, #24]
+ ldr r3, [r1, #136]
+ ldr r5, [r7, #68]
+ ldr r6, [r7, #32]
eor r3, r9, r3
- str r3, [r5, #136]
- ldr r3, [r4, #140]
- eors r3, r3, r2
- str r3, [r5, #140]
- mov r5, r4
- ldr r3, [r6]
- ldr r2, [r4, #144]
- ldr r4, [r7, #52]
- add r4, r4, r3
- eors r2, r2, r4
- mov r4, r1
- str r2, [r1, #144]
- ldr r1, [r7, #76]
- ldr r2, [r6, #4]
- ldr r3, [r5, #148]
- mov r8, r1
- add r8, r8, r2
- mov r2, r8
- eors r3, r3, r2
- str r3, [r0, #148]
- mov r0, r4
- ldr r2, [r6, #8]
- ldr r1, [r7, #92]
- ldr r3, [r5, #152]
- mov r8, r1
- add r8, r8, r2
- ldr r1, [r7, #56]
- mov r2, r8
- eors r3, r3, r2
+ str r3, [r4, #136]
+ ldr r3, [r1, #140]
+ ldr r0, [r7, #92]
+ eors r3, r3, r6
+ ldr r6, [r7, #108]
+ str r3, [r4, #140]
+ ldr r3, [r5]
+ ldr r2, [r1, #144]
+ add r6, r6, r3
+ eors r2, r2, r6
+ str r2, [r4, #144]
+ ldr r2, [r5, #4]
+ ldr r3, [r1, #148]
+ add r0, r0, r2
+ ldr r6, [r7, #36]
+ eors r3, r3, r0
+ ldr r0, [r7, #40]
+ str r3, [r4, #148]
+ ldr r2, [r5, #8]
+ ldr r3, [r1, #152]
+ add r0, r0, r2
+ eors r3, r3, r0
str r3, [r4, #152]
- mov r8, r6
- ldr r2, [r6, #12]
- mov r4, r5
- ldr r3, [r5, #156]
- add r1, r1, r2
- eors r3, r3, r1
- str r3, [r0, #156]
- ldr r2, [r6, #16]
+ ldr r2, [r5, #12]
+ mov r0, r4
+ ldr r3, [r1, #156]
+ mov r4, r1
+ add r6, r6, r2
mov r1, r0
- ldr r3, [r5, #160]
+ eors r3, r3, r6
+ str r3, [r0, #156]
+ ldr r2, [r5, #16]
+ ldr r3, [r4, #160]
add ip, ip, r2
eor r3, ip, r3
- str r3, [r0, #160]
- ldr r2, [r6, #20]
- mov ip, r0
- ldr r3, [r5, #164]
+ str r3, [r1, #160]
+ ldr r2, [r5, #20]
+ ldr r3, [r4, #164]
add lr, lr, r2
- ldr r2, [r7, #100]
+ ldr r2, [r7, #116]
eor r3, lr, r3
str r3, [r1, #164]
- ldr r6, [r6, #24]
+ ldr r6, [r5, #24]
+ mov lr, r4
ldr r3, [r4, #168]
add r2, r2, r6
+ mov r6, r4
eors r3, r3, r2
- ldr r2, [r7, #104]
- str r3, [r0, #168]
- ldr r5, [r8, #28]
+ str r3, [r1, #168]
+ ldr r5, [r5, #28]
+ mov r2, r1
ldr r3, [r4, #172]
- add r2, r2, r5
- mov r5, r4
- eors r3, r3, r2
- mov r2, r0
- str r3, [r0, #172]
- ldr r3, [r7, #48]
+ ldr r0, [r7, #120]
+ add r0, r0, r5
+ ldr r5, [r7, #24]
+ eors r3, r3, r0
+ str r3, [r1, #172]
+ ldr r3, [r7, #72]
ldr r4, [r4, #176]
- ldr r0, [r7, #20]
+ ldr r1, [r7, #28]
+ eors r4, r4, r1
adds r1, r3, #3
- ldr r3, [r7, #84]
- eors r4, r4, r0
str r4, [r2, #176]
- ldr r0, [r5, #180]
- mov r4, r2
- str r1, [r7, #48]
+ ldr r3, [r7, #100]
+ ldr r0, [lr, #180]
+ str r1, [r7, #72]
eors r3, r3, r0
mov r0, r3
- ldr r3, [r7, #232]
+ mov r3, r2
str r0, [r2, #180]
- ldr r1, [r3]
- ldr r3, [r5, #184]
- ldr r2, [r7, #80]
- add r2, r2, r1
- mov r1, r5
- eors r3, r3, r2
- str r3, [ip, #184]
- ldr r3, [r7, #232]
+ adds r3, r3, #192
+ ldr r1, [lr, #184]
+ ldr r2, [r7, #96]
+ eors r1, r1, r2
+ str r1, [r3, #-8]
+ ldr r2, [lr, #188]
+ mov r1, r6
adds r1, r1, #192
- str r1, [r7, #64]
- ldr r1, [r7, #108]
- ldr r2, [r3, #4]
- ldr r3, [r5, #188]
- add r1, r1, r2
- mov r2, r1
- eors r2, r2, r3
- str r2, [ip, #188]
- mov r3, r4
+ str r1, [r7, #76]
+ eors r2, r2, r5
+ str r2, [r3, #-4]
ldr r2, [r7, #16]
- adds r3, r3, #192
- str r3, [r7, #60]
+ str r3, [r7, #88]
cmp r2, r3
- beq .L85
- ldr r3, [r7, #232]
- ldmia r3, {r1, r2}
- b .L4
-.L85:
+ bne .L4
ldr r3, [r7, #12]
ldr r2, [r7, #4]
add r3, r3, r2
@@ -749,16 +754,14 @@ CRYPTO_chacha_20_neon:
rsb fp, fp, r1
lsrs fp, fp, #6
beq .L6
- ldr r6, [r7, #72]
ldr r5, [r7, #12]
ldr r4, [r7, #16]
- mov r3, r6
- adds r3, r3, #80
- vldr d30, .L95
- vldr d31, .L95+8
- mov lr, r3
- str fp, [r7, #104]
- str fp, [r7, #108]
+ ldr r6, [r7, #84]
+ ldr lr, [r7, #80]
+ vldr d30, .L94
+ vldr d31, .L94+8
+ str fp, [r7, #120]
+ str fp, [r7, #124]
.L8:
vmov q2, q11 @ v4si
movs r3, #10
@@ -883,22 +886,22 @@ CRYPTO_chacha_20_neon:
str r0, [r4, #-16] @ unaligned
str r1, [r4, #-12] @ unaligned
str r3, [r10, #12] @ unaligned
- ldr r3, [r7, #108]
+ ldr r3, [r7, #124]
str r2, [r10, #8] @ unaligned
cmp r3, #1
- beq .L88
+ beq .L87
movs r3, #1
- str r3, [r7, #108]
+ str r3, [r7, #124]
b .L8
-.L96:
- .align 3
.L95:
+ .align 3
+.L94:
.word 1
.word 0
.word 0
.word 0
-.L88:
- ldr fp, [r7, #104]
+.L87:
+ ldr fp, [r7, #120]
ldr r3, [r7, #12]
lsl fp, fp, #6
add r3, r3, fp
@@ -958,9 +961,9 @@ CRYPTO_chacha_20_neon:
bne .L10
cmp r5, #15
mov r9, r5
- bhi .L89
+ bhi .L88
vadd.i32 q12, q12, q10
- ldr r3, [r7, #72]
+ ldr r3, [r7, #84]
vst1.64 {d24-d25}, [r3:128]
.L14:
ldr r3, [r7, #8]
@@ -997,7 +1000,7 @@ CRYPTO_chacha_20_neon:
movcs r1, ip
cmp r1, #0
beq .L17
- ldr r5, [r7, #72]
+ ldr r5, [r7, #84]
cmp r1, #1
ldrb r0, [r0] @ zero_extendqisi2
add r3, r2, #1
@@ -1132,7 +1135,7 @@ CRYPTO_chacha_20_neon:
ldr r5, [r7, #16]
cmp r6, #1
add r0, r1, r2
- ldr r1, [r7, #72]
+ ldr r1, [r7, #84]
add r1, r1, r2
vld1.64 {d18-d19}, [r0:64]
add r2, r2, r5
@@ -1170,7 +1173,7 @@ CRYPTO_chacha_20_neon:
add r3, r3, lr
beq .L1
.L19:
- ldr r4, [r7, #72]
+ ldr r4, [r7, #84]
adds r2, r3, #1
ldr r1, [r7, #12]
cmp r2, r9
@@ -1285,7 +1288,7 @@ CRYPTO_chacha_20_neon:
eor r1, r1, r0
strb r1, [r5, r2]
bls .L1
- ldr r2, [r7, #72]
+ ldr r2, [r7, #84]
ldrb r1, [r2, r3] @ zero_extendqisi2
ldr r2, [r7, #12]
ldrb r2, [r2, r3] @ zero_extendqisi2
@@ -1293,26 +1296,23 @@ CRYPTO_chacha_20_neon:
ldr r1, [r7, #16]
strb r2, [r1, r3]
.L1:
- adds r7, r7, #132
+ adds r7, r7, #156
mov sp, r7
@ sp needed
vldm sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, fp, pc}
-.L89:
- ldr r4, [r7, #12]
+.L88:
+ ldr r5, [r7, #12]
vadd.i32 q12, q12, q10
- ldr r5, [r7, #72]
+ ldr r4, [r7, #80]
cmp r9, #31
- ldr r0, [r4] @ unaligned
- add r6, r5, #80
- ldr r1, [r4, #4] @ unaligned
- ldr r2, [r4, #8] @ unaligned
- mov r5, r6
- ldr r3, [r4, #12] @ unaligned
- mov r4, r6
- str r6, [r7, #68]
+ ldr r0, [r5] @ unaligned
+ ldr r1, [r5, #4] @ unaligned
+ mov r6, r4
+ ldr r2, [r5, #8] @ unaligned
+ ldr r3, [r5, #12] @ unaligned
stmia r6!, {r0, r1, r2, r3}
- ldr r2, [r7, #72]
+ ldr r2, [r7, #84]
ldr r6, [r7, #16]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
@@ -1325,9 +1325,9 @@ CRYPTO_chacha_20_neon:
str r0, [r6] @ unaligned
str r2, [r6, #8] @ unaligned
str r3, [r6, #12] @ unaligned
- bhi .L90
+ bhi .L89
vadd.i32 q13, q13, q15
- ldr r3, [r7, #72]
+ ldr r3, [r7, #84]
vstr d26, [r3, #16]
vstr d27, [r3, #24]
b .L14
@@ -1336,7 +1336,7 @@ CRYPTO_chacha_20_neon:
ldr r2, [r7, #12]
add r2, r2, r9
mov r5, r2
- ldr r2, [r7, #72]
+ ldr r2, [r7, #84]
add r2, r2, r3
mov r3, r2
.L24:
@@ -1346,7 +1346,7 @@ CRYPTO_chacha_20_neon:
eor r2, r2, r1
strb r2, [r4], #1
bne .L24
- adds r7, r7, #132
+ adds r7, r7, #156
mov sp, r7
@ sp needed
vldm sp!, {d8-d15}
@@ -1354,20 +1354,20 @@ CRYPTO_chacha_20_neon:
.L26:
str fp, [r7, #16]
b .L2
-.L90:
- ldr r3, [r7, #12]
+.L89:
+ mov r3, r5
+ ldr r4, [r7, #80]
+ ldr r0, [r3, #16]! @ unaligned
add lr, r1, #16
- mov r4, r5
- mov r6, r5
mov r5, r1
vadd.i32 q13, q13, q15
- ldr r0, [r3, #16]! @ unaligned
+ mov r6, r4
cmp r9, #47
ldr r1, [r3, #4] @ unaligned
ldr r2, [r3, #8] @ unaligned
ldr r3, [r3, #12] @ unaligned
stmia r6!, {r0, r1, r2, r3}
- ldr r2, [r7, #72]
+ ldr r2, [r7, #84]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
veor q13, q9, q13
@@ -1378,18 +1378,18 @@ CRYPTO_chacha_20_neon:
str r1, [lr, #4] @ unaligned
str r2, [lr, #8] @ unaligned
str r3, [lr, #12] @ unaligned
- bhi .L91
+ bhi .L90
vadd.i32 q8, q14, q8
- ldr r3, [r7, #72]
+ ldr r3, [r7, #84]
vstr d16, [r3, #32]
vstr d17, [r3, #40]
b .L14
-.L91:
+.L90:
ldr r3, [r7, #12]
add lr, r5, #32
- ldr r4, [r7, #68]
+ ldr r4, [r7, #80]
vadd.i32 q8, q14, q8
- ldr r5, [r7, #72]
+ ldr r5, [r7, #84]
vadd.i32 q11, q11, q3
ldr r0, [r3, #32]! @ unaligned
mov r6, r4