summaryrefslogtreecommitdiffstats
path: root/linux-aarch64
diff options
context:
space:
mode:
Diffstat (limited to 'linux-aarch64')
-rw-r--r--linux-aarch64/crypto/aes/aesv8-armx.S520
-rw-r--r--linux-aarch64/crypto/modes/ghashv8-armx.S251
-rw-r--r--linux-aarch64/crypto/sha/sha1-armv8.S664
-rw-r--r--linux-aarch64/crypto/sha/sha256-armv8.S292
-rw-r--r--linux-aarch64/crypto/sha/sha512-armv8.S96
5 files changed, 982 insertions, 841 deletions
diff --git a/linux-aarch64/crypto/aes/aesv8-armx.S b/linux-aarch64/crypto/aes/aesv8-armx.S
index e7ae46f..9c63291 100644
--- a/linux-aarch64/crypto/aes/aesv8-armx.S
+++ b/linux-aarch64/crypto/aes/aesv8-armx.S
@@ -6,7 +6,7 @@
.arch armv8-a+crypto
#endif
.align 5
-rcon:
+.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
@@ -31,7 +31,7 @@ aes_v8_set_encrypt_key:
tst w1,#0x3f
b.ne .Lenc_key_abort
- adr x3,rcon
+ adr x3,.Lrcon
cmp w1,#192
eor v0.16b,v0.16b,v0.16b
@@ -55,7 +55,7 @@ aes_v8_set_encrypt_key:
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
- eor v6.16b,v6.16b,v1.16b
+ eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
@@ -72,7 +72,7 @@ aes_v8_set_encrypt_key:
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
- eor v6.16b,v6.16b,v1.16b
+ eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
@@ -86,7 +86,7 @@ aes_v8_set_encrypt_key:
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
- eor v6.16b,v6.16b,v1.16b
+ eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2]
@@ -117,7 +117,7 @@ aes_v8_set_encrypt_key:
dup v5.4s,v3.s[3]
eor v5.16b,v5.16b,v4.16b
- eor v6.16b,v6.16b,v1.16b
+ eor v6.16b,v6.16b,v1.16b
ext v4.16b,v0.16b,v4.16b,#12
shl v1.16b,v1.16b,#1
eor v4.16b,v4.16b,v5.16b
@@ -148,7 +148,7 @@ aes_v8_set_encrypt_key:
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
- eor v6.16b,v6.16b,v1.16b
+ eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
@@ -229,17 +229,17 @@ aes_v8_encrypt:
.Loop_enc:
aese v2.16b,v0.16b
- ld1 {v0.4s},[x2],#16
aesmc v2.16b,v2.16b
+ ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aese v2.16b,v1.16b
- ld1 {v1.4s},[x2],#16
aesmc v2.16b,v2.16b
+ ld1 {v1.4s},[x2],#16
b.gt .Loop_enc
aese v2.16b,v0.16b
- ld1 {v0.4s},[x2]
aesmc v2.16b,v2.16b
+ ld1 {v0.4s},[x2]
aese v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
@@ -258,17 +258,17 @@ aes_v8_decrypt:
.Loop_dec:
aesd v2.16b,v0.16b
- ld1 {v0.4s},[x2],#16
aesimc v2.16b,v2.16b
+ ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aesd v2.16b,v1.16b
- ld1 {v1.4s},[x2],#16
aesimc v2.16b,v2.16b
+ ld1 {v1.4s},[x2],#16
b.gt .Loop_dec
aesd v2.16b,v0.16b
- ld1 {v0.4s},[x2]
aesimc v2.16b,v2.16b
+ ld1 {v0.4s},[x2]
aesd v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
@@ -292,13 +292,13 @@ aes_v8_cbc_encrypt:
ld1 {v6.16b},[x4]
ld1 {v0.16b},[x0],x8
- ld1 {v16.4s-v17.4s},[x3] // load key schedule...
+ ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#6
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
sub w5,w5,#2
- ld1 {v18.4s-v19.4s},[x7],#32
- ld1 {v20.4s-v21.4s},[x7],#32
- ld1 {v22.4s-v23.4s},[x7],#32
+ ld1 {v18.4s,v19.4s},[x7],#32
+ ld1 {v20.4s,v21.4s},[x7],#32
+ ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
@@ -310,76 +310,99 @@ aes_v8_cbc_encrypt:
eor v5.16b,v16.16b,v7.16b
b.eq .Lcbc_enc128
+ ld1 {v2.4s,v3.4s},[x7]
+ add x7,x3,#16
+ add x6,x3,#16*4
+ add x12,x3,#16*5
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ add x14,x3,#16*6
+ add x3,x3,#16*7
+ b .Lenter_cbc_enc
+
+.align 4
.Loop_cbc_enc:
aese v0.16b,v16.16b
- ld1 {v16.4s},[x7],#16
aesmc v0.16b,v0.16b
- subs w6,w6,#2
+ st1 {v6.16b},[x1],#16
+.Lenter_cbc_enc:
aese v0.16b,v17.16b
- ld1 {v17.4s},[x7],#16
aesmc v0.16b,v0.16b
- b.gt .Loop_cbc_enc
+ aese v0.16b,v2.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v16.4s},[x6]
+ cmp w5,#4
+ aese v0.16b,v3.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v17.4s},[x12]
+ b.eq .Lcbc_enc192
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
- subs x2,x2,#16
+ ld1 {v16.4s},[x14]
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
- csel x8,xzr,x8,eq
+ ld1 {v17.4s},[x3]
+ nop
+
+.Lcbc_enc192:
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ subs x2,x2,#16
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ csel x8,xzr,x8,eq
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
- add x7,x3,#16
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
- ld1 {v16.16b},[x0],x8
+ ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
- eor v16.16b,v16.16b,v5.16b
+ eor v16.16b,v16.16b,v5.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
- ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
+ ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v0.16b,v23.16b
-
- mov w6,w5
eor v6.16b,v0.16b,v7.16b
- st1 {v6.16b},[x1],#16
b.hs .Loop_cbc_enc
+ st1 {v6.16b},[x1],#16
b .Lcbc_done
.align 5
.Lcbc_enc128:
- ld1 {v2.4s-v3.4s},[x7]
+ ld1 {v2.4s,v3.4s},[x7]
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
b .Lenter_cbc_enc128
.Loop_cbc_enc128:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
- st1 {v6.16b},[x1],#16
+ st1 {v6.16b},[x1],#16
.Lenter_cbc_enc128:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
- subs x2,x2,#16
+ subs x2,x2,#16
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
- csel x8,xzr,x8,eq
+ csel x8,xzr,x8,eq
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
- ld1 {v16.16b},[x0],x8
+ ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
- eor v16.16b,v16.16b,v5.16b
+ eor v16.16b,v16.16b,v5.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs .Loop_cbc_enc128
@@ -404,81 +427,80 @@ aes_v8_cbc_encrypt:
.Loop3x_cbc_dec:
aesd v0.16b,v16.16b
- aesd v1.16b,v16.16b
- aesd v18.16b,v16.16b
- ld1 {v16.4s},[x7],#16
aesimc v0.16b,v0.16b
+ aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
+ aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
+ ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v0.16b,v17.16b
- aesd v1.16b,v17.16b
- aesd v18.16b,v17.16b
- ld1 {v17.4s},[x7],#16
aesimc v0.16b,v0.16b
+ aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
+ aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
+ ld1 {v17.4s},[x7],#16
b.gt .Loop3x_cbc_dec
aesd v0.16b,v16.16b
- aesd v1.16b,v16.16b
- aesd v18.16b,v16.16b
- eor v4.16b,v6.16b,v7.16b
aesimc v0.16b,v0.16b
+ aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
+ aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
- eor v5.16b,v2.16b,v7.16b
+ eor v4.16b,v6.16b,v7.16b
+ subs x2,x2,#0x30
+ eor v5.16b,v2.16b,v7.16b
+ csel x6,x2,x6,lo // x6, w6, is zero at this point
aesd v0.16b,v17.16b
- aesd v1.16b,v17.16b
- aesd v18.16b,v17.16b
- eor v17.16b,v3.16b,v7.16b
- subs x2,x2,#0x30
aesimc v0.16b,v0.16b
+ aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
+ aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
- orr v6.16b,v19.16b,v19.16b
- csel x6,x2,x6,lo // x6, w6, is zero at this point
- aesd v0.16b,v20.16b
- aesd v1.16b,v20.16b
- aesd v18.16b,v20.16b
- add x0,x0,x6 // x0 is adjusted in such way that
+ eor v17.16b,v3.16b,v7.16b
+ add x0,x0,x6 // x0 is adjusted in such way that
// at exit from the loop v1.16b-v18.16b
// are loaded with last "words"
+ orr v6.16b,v19.16b,v19.16b
+ mov x7,x3
+ aesd v0.16b,v20.16b
aesimc v0.16b,v0.16b
+ aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
+ aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
- mov x7,x3
+ ld1 {v2.16b},[x0],#16
aesd v0.16b,v21.16b
- aesd v1.16b,v21.16b
- aesd v18.16b,v21.16b
- ld1 {v2.16b},[x0],#16
aesimc v0.16b,v0.16b
+ aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
+ aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
- ld1 {v3.16b},[x0],#16
+ ld1 {v3.16b},[x0],#16
aesd v0.16b,v22.16b
- aesd v1.16b,v22.16b
- aesd v18.16b,v22.16b
- ld1 {v19.16b},[x0],#16
aesimc v0.16b,v0.16b
+ aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
+ aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
- ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
+ ld1 {v19.16b},[x0],#16
aesd v0.16b,v23.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
-
- add w6,w5,#2
+ ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
+ add w6,w5,#2
eor v4.16b,v4.16b,v0.16b
eor v5.16b,v5.16b,v1.16b
eor v18.16b,v18.16b,v17.16b
- ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
- orr v0.16b,v2.16b,v2.16b
+ ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v4.16b},[x1],#16
- orr v1.16b,v3.16b,v3.16b
+ orr v0.16b,v2.16b,v2.16b
st1 {v5.16b},[x1],#16
+ orr v1.16b,v3.16b,v3.16b
st1 {v18.16b},[x1],#16
- orr v18.16b,v19.16b,v19.16b
+ orr v18.16b,v19.16b,v19.16b
b.hs .Loop3x_cbc_dec
cmn x2,#0x30
@@ -487,54 +509,54 @@ aes_v8_cbc_encrypt:
.Lcbc_dec_tail:
aesd v1.16b,v16.16b
- aesd v18.16b,v16.16b
- ld1 {v16.4s},[x7],#16
aesimc v1.16b,v1.16b
+ aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
+ ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v1.16b,v17.16b
- aesd v18.16b,v17.16b
- ld1 {v17.4s},[x7],#16
aesimc v1.16b,v1.16b
+ aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
+ ld1 {v17.4s},[x7],#16
b.gt .Lcbc_dec_tail
aesd v1.16b,v16.16b
- aesd v18.16b,v16.16b
aesimc v1.16b,v1.16b
+ aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v17.16b
- aesd v18.16b,v17.16b
aesimc v1.16b,v1.16b
+ aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v20.16b
- aesd v18.16b,v20.16b
aesimc v1.16b,v1.16b
+ aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
- cmn x2,#0x20
+ cmn x2,#0x20
aesd v1.16b,v21.16b
- aesd v18.16b,v21.16b
aesimc v1.16b,v1.16b
+ aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
- eor v5.16b,v6.16b,v7.16b
+ eor v5.16b,v6.16b,v7.16b
aesd v1.16b,v22.16b
- aesd v18.16b,v22.16b
aesimc v1.16b,v1.16b
+ aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
- eor v17.16b,v3.16b,v7.16b
+ eor v17.16b,v3.16b,v7.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
b.eq .Lcbc_dec_one
eor v5.16b,v5.16b,v1.16b
eor v17.16b,v17.16b,v18.16b
- orr v6.16b,v19.16b,v19.16b
+ orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
st1 {v17.16b},[x1],#16
b .Lcbc_done
.Lcbc_dec_one:
eor v5.16b,v5.16b,v18.16b
- orr v6.16b,v19.16b,v19.16b
+ orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
.Lcbc_done:
@@ -547,181 +569,181 @@ aes_v8_cbc_encrypt:
.type aes_v8_ctr32_encrypt_blocks,%function
.align 5
aes_v8_ctr32_encrypt_blocks:
- stp x29,x30,[sp,#-16]!
- add x29,sp,#0
- ldr w5,[x3,#240]
-
- ldr w8, [x4, #12]
- ld1 {v0.4s},[x4]
-
- ld1 {v16.4s-v17.4s},[x3] // load key schedule...
- sub w5,w5,#4
- mov x12,#16
- cmp x2,#2
- add x7,x3,x5,lsl#4 // pointer to last 5 round keys
- sub w5,w5,#2
- ld1 {v20.4s-v21.4s},[x7],#32
- ld1 {v22.4s-v23.4s},[x7],#32
- ld1 {v7.4s},[x7]
- add x7,x3,#32
- mov w6,w5
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+ ldr w5,[x3,#240]
+
+ ldr w8, [x4, #12]
+ ld1 {v0.4s},[x4]
+
+ ld1 {v16.4s,v17.4s},[x3] // load key schedule...
+ sub w5,w5,#4
+ mov x12,#16
+ cmp x2,#2
+ add x7,x3,x5,lsl#4 // pointer to last 5 round keys
+ sub w5,w5,#2
+ ld1 {v20.4s,v21.4s},[x7],#32
+ ld1 {v22.4s,v23.4s},[x7],#32
+ ld1 {v7.4s},[x7]
+ add x7,x3,#32
+ mov w6,w5
csel x12,xzr,x12,lo
#ifndef __ARMEB__
- rev w8, w8
+ rev w8, w8
#endif
- orr v1.16b,v0.16b,v0.16b
- add w10, w8, #1
- orr v18.16b,v0.16b,v0.16b
- add w8, w8, #2
- orr v6.16b,v0.16b,v0.16b
- rev w10, w10
- mov v1.s[3],w10
- b.ls .Lctr32_tail
- rev w12, w8
- sub x2,x2,#3 // bias
- mov v18.s[3],w12
- b .Loop3x_ctr32
+ orr v1.16b,v0.16b,v0.16b
+ add w10, w8, #1
+ orr v18.16b,v0.16b,v0.16b
+ add w8, w8, #2
+ orr v6.16b,v0.16b,v0.16b
+ rev w10, w10
+ mov v1.s[3],w10
+ b.ls .Lctr32_tail
+ rev w12, w8
+ sub x2,x2,#3 // bias
+ mov v18.s[3],w12
+ b .Loop3x_ctr32
.align 4
.Loop3x_ctr32:
- aese v0.16b,v16.16b
- aese v1.16b,v16.16b
- aese v18.16b,v16.16b
- ld1 {v16.4s},[x7],#16
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- aesmc v18.16b,v18.16b
- subs w6,w6,#2
- aese v0.16b,v17.16b
- aese v1.16b,v17.16b
- aese v18.16b,v17.16b
- ld1 {v17.4s},[x7],#16
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- aesmc v18.16b,v18.16b
- b.gt .Loop3x_ctr32
-
- aese v0.16b,v16.16b
- aese v1.16b,v16.16b
- aese v18.16b,v16.16b
- mov x7,x3
- aesmc v4.16b,v0.16b
- ld1 {v2.16b},[x0],#16
- aesmc v5.16b,v1.16b
- aesmc v18.16b,v18.16b
- orr v0.16b,v6.16b,v6.16b
- aese v4.16b,v17.16b
- ld1 {v3.16b},[x0],#16
- aese v5.16b,v17.16b
- aese v18.16b,v17.16b
- orr v1.16b,v6.16b,v6.16b
- aesmc v4.16b,v4.16b
- ld1 {v19.16b},[x0],#16
- aesmc v5.16b,v5.16b
- aesmc v17.16b,v18.16b
- orr v18.16b,v6.16b,v6.16b
- add w9,w8,#1
- aese v4.16b,v20.16b
- aese v5.16b,v20.16b
- aese v17.16b,v20.16b
- eor v2.16b,v2.16b,v7.16b
- add w10,w8,#2
- aesmc v4.16b,v4.16b
- aesmc v5.16b,v5.16b
- aesmc v17.16b,v17.16b
- eor v3.16b,v3.16b,v7.16b
- add w8,w8,#3
- aese v4.16b,v21.16b
- aese v5.16b,v21.16b
- aese v17.16b,v21.16b
- eor v19.16b,v19.16b,v7.16b
- rev w9,w9
- aesmc v4.16b,v4.16b
- ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
- aesmc v5.16b,v5.16b
- aesmc v17.16b,v17.16b
- mov v0.s[3], w9
- rev w10,w10
- aese v4.16b,v22.16b
- aese v5.16b,v22.16b
- aese v17.16b,v22.16b
- mov v1.s[3], w10
- rev w12,w8
- aesmc v4.16b,v4.16b
- aesmc v5.16b,v5.16b
- aesmc v17.16b,v17.16b
- mov v18.s[3], w12
- subs x2,x2,#3
- aese v4.16b,v23.16b
- aese v5.16b,v23.16b
- aese v17.16b,v23.16b
-
- mov w6,w5
- eor v2.16b,v2.16b,v4.16b
- eor v3.16b,v3.16b,v5.16b
- eor v19.16b,v19.16b,v17.16b
- ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
- st1 {v2.16b},[x1],#16
- st1 {v3.16b},[x1],#16
- st1 {v19.16b},[x1],#16
- b.hs .Loop3x_ctr32
-
- adds x2,x2,#3
- b.eq .Lctr32_done
- cmp x2,#1
- mov x12,#16
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v16.16b
+ aesmc v1.16b,v1.16b
+ aese v18.16b,v16.16b
+ aesmc v18.16b,v18.16b
+ ld1 {v16.4s},[x7],#16
+ subs w6,w6,#2
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v17.16b
+ aesmc v1.16b,v1.16b
+ aese v18.16b,v17.16b
+ aesmc v18.16b,v18.16b
+ ld1 {v17.4s},[x7],#16
+ b.gt .Loop3x_ctr32
+
+ aese v0.16b,v16.16b
+ aesmc v4.16b,v0.16b
+ aese v1.16b,v16.16b
+ aesmc v5.16b,v1.16b
+ ld1 {v2.16b},[x0],#16
+ orr v0.16b,v6.16b,v6.16b
+ aese v18.16b,v16.16b
+ aesmc v18.16b,v18.16b
+ ld1 {v3.16b},[x0],#16
+ orr v1.16b,v6.16b,v6.16b
+ aese v4.16b,v17.16b
+ aesmc v4.16b,v4.16b
+ aese v5.16b,v17.16b
+ aesmc v5.16b,v5.16b
+ ld1 {v19.16b},[x0],#16
+ mov x7,x3
+ aese v18.16b,v17.16b
+ aesmc v17.16b,v18.16b
+ orr v18.16b,v6.16b,v6.16b
+ add w9,w8,#1
+ aese v4.16b,v20.16b
+ aesmc v4.16b,v4.16b
+ aese v5.16b,v20.16b
+ aesmc v5.16b,v5.16b
+ eor v2.16b,v2.16b,v7.16b
+ add w10,w8,#2
+ aese v17.16b,v20.16b
+ aesmc v17.16b,v17.16b
+ eor v3.16b,v3.16b,v7.16b
+ add w8,w8,#3
+ aese v4.16b,v21.16b
+ aesmc v4.16b,v4.16b
+ aese v5.16b,v21.16b
+ aesmc v5.16b,v5.16b
+ eor v19.16b,v19.16b,v7.16b
+ rev w9,w9
+ aese v17.16b,v21.16b
+ aesmc v17.16b,v17.16b
+ mov v0.s[3], w9
+ rev w10,w10
+ aese v4.16b,v22.16b
+ aesmc v4.16b,v4.16b
+ aese v5.16b,v22.16b
+ aesmc v5.16b,v5.16b
+ mov v1.s[3], w10
+ rev w12,w8
+ aese v17.16b,v22.16b
+ aesmc v17.16b,v17.16b
+ mov v18.s[3], w12
+ subs x2,x2,#3
+ aese v4.16b,v23.16b
+ aese v5.16b,v23.16b
+ aese v17.16b,v23.16b
+
+ eor v2.16b,v2.16b,v4.16b
+ ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
+ st1 {v2.16b},[x1],#16
+ eor v3.16b,v3.16b,v5.16b
+ mov w6,w5
+ st1 {v3.16b},[x1],#16
+ eor v19.16b,v19.16b,v17.16b
+ ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
+ st1 {v19.16b},[x1],#16
+ b.hs .Loop3x_ctr32
+
+ adds x2,x2,#3
+ b.eq .Lctr32_done
+ cmp x2,#1
+ mov x12,#16
csel x12,xzr,x12,eq
.Lctr32_tail:
- aese v0.16b,v16.16b
- aese v1.16b,v16.16b
- ld1 {v16.4s},[x7],#16
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- subs w6,w6,#2
- aese v0.16b,v17.16b
- aese v1.16b,v17.16b
- ld1 {v17.4s},[x7],#16
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- b.gt .Lctr32_tail
-
- aese v0.16b,v16.16b
- aese v1.16b,v16.16b
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- aese v0.16b,v17.16b
- aese v1.16b,v17.16b
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- ld1 {v2.16b},[x0],x12
- aese v0.16b,v20.16b
- aese v1.16b,v20.16b
- ld1 {v3.16b},[x0]
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- aese v0.16b,v21.16b
- aese v1.16b,v21.16b
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- aese v0.16b,v22.16b
- aese v1.16b,v22.16b
- eor v2.16b,v2.16b,v7.16b
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- eor v3.16b,v3.16b,v7.16b
- aese v0.16b,v23.16b
- aese v1.16b,v23.16b
-
- cmp x2,#1
- eor v2.16b,v2.16b,v0.16b
- eor v3.16b,v3.16b,v1.16b
- st1 {v2.16b},[x1],#16
- b.eq .Lctr32_done
- st1 {v3.16b},[x1]
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v16.16b
+ aesmc v1.16b,v1.16b
+ ld1 {v16.4s},[x7],#16
+ subs w6,w6,#2
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v17.16b
+ aesmc v1.16b,v1.16b
+ ld1 {v17.4s},[x7],#16
+ b.gt .Lctr32_tail
+
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v16.16b
+ aesmc v1.16b,v1.16b
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v17.16b
+ aesmc v1.16b,v1.16b
+ ld1 {v2.16b},[x0],x12
+ aese v0.16b,v20.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v20.16b
+ aesmc v1.16b,v1.16b
+ ld1 {v3.16b},[x0]
+ aese v0.16b,v21.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v21.16b
+ aesmc v1.16b,v1.16b
+ eor v2.16b,v2.16b,v7.16b
+ aese v0.16b,v22.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v22.16b
+ aesmc v1.16b,v1.16b
+ eor v3.16b,v3.16b,v7.16b
+ aese v0.16b,v23.16b
+ aese v1.16b,v23.16b
+
+ cmp x2,#1
+ eor v2.16b,v2.16b,v0.16b
+ eor v3.16b,v3.16b,v1.16b
+ st1 {v2.16b},[x1],#16
+ b.eq .Lctr32_done
+ st1 {v3.16b},[x1]
.Lctr32_done:
- ldr x29,[sp],#16
+ ldr x29,[sp],#16
ret
.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
#endif
diff --git a/linux-aarch64/crypto/modes/ghashv8-armx.S b/linux-aarch64/crypto/modes/ghashv8-armx.S
index 565146e..ad19074 100644
--- a/linux-aarch64/crypto/modes/ghashv8-armx.S
+++ b/linux-aarch64/crypto/modes/ghashv8-armx.S
@@ -4,114 +4,227 @@
#if !defined(__clang__)
.arch armv8-a+crypto
#endif
-.global gcm_init_v8
+.globl gcm_init_v8
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
- ld1 {v17.2d},[x1] //load H
- movi v16.16b,#0xe1
- ext v3.16b,v17.16b,v17.16b,#8
- shl v16.2d,v16.2d,#57
- ushr v18.2d,v16.2d,#63
- ext v16.16b,v18.16b,v16.16b,#8 //t0=0xc2....01
- dup v17.4s,v17.s[1]
- ushr v19.2d,v3.2d,#63
+ ld1 {v17.2d},[x1] //load input H
+ movi v19.16b,#0xe1
+ shl v19.2d,v19.2d,#57 //0xc2.0
+ ext v3.16b,v17.16b,v17.16b,#8
+ ushr v18.2d,v19.2d,#63
+ dup v17.4s,v17.s[1]
+ ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
+ ushr v18.2d,v3.2d,#63
sshr v17.4s,v17.4s,#31 //broadcast carry bit
- and v19.16b,v19.16b,v16.16b
+ and v18.16b,v18.16b,v16.16b
shl v3.2d,v3.2d,#1
- ext v19.16b,v19.16b,v19.16b,#8
- and v16.16b,v16.16b,v17.16b
- orr v3.16b,v3.16b,v19.16b //H<<<=1
- eor v3.16b,v3.16b,v16.16b //twisted H
- st1 {v3.2d},[x0]
+ ext v18.16b,v18.16b,v18.16b,#8
+ and v16.16b,v16.16b,v17.16b
+ orr v3.16b,v3.16b,v18.16b //H<<<=1
+ eor v20.16b,v3.16b,v16.16b //twisted H
+ st1 {v20.2d},[x0],#16 //store Htable[0]
+
+ //calculate H^2
+ ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
+ pmull v0.1q,v20.1d,v20.1d
+ eor v16.16b,v16.16b,v20.16b
+ pmull2 v2.1q,v20.2d,v20.2d
+ pmull v1.1q,v16.1d,v16.1d
+
+ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d //1st phase
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ eor v0.16b,v1.16b,v18.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
+ pmull v0.1q,v0.1d,v19.1d
+ eor v18.16b,v18.16b,v2.16b
+ eor v22.16b,v0.16b,v18.16b
+
+ ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
+ eor v17.16b,v17.16b,v22.16b
+ ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
+ st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
ret
.size gcm_init_v8,.-gcm_init_v8
-
-.global gcm_gmult_v8
+.globl gcm_gmult_v8
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
- ld1 {v17.2d},[x0] //load Xi
- movi v19.16b,#0xe1
- ld1 {v20.2d},[x1] //load twisted H
+ ld1 {v17.2d},[x0] //load Xi
+ movi v19.16b,#0xe1
+ ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
shl v19.2d,v19.2d,#57
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
- ext v21.16b,v20.16b,v20.16b,#8
- mov x3,#0
- ext v3.16b,v17.16b,v17.16b,#8
- mov x12,#0
- eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing
- mov x2,x0
- b .Lgmult_v8
-.size gcm_gmult_v8,.-gcm_gmult_v8
+ ext v3.16b,v17.16b,v17.16b,#8
+
+ pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
+ eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
+ pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
+ pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ eor v0.16b,v1.16b,v18.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
+ pmull v0.1q,v0.1d,v19.1d
+ eor v18.16b,v18.16b,v2.16b
+ eor v0.16b,v0.16b,v18.16b
-.global gcm_ghash_v8
+#ifndef __ARMEB__
+ rev64 v0.16b,v0.16b
+#endif
+ ext v0.16b,v0.16b,v0.16b,#8
+ st1 {v0.2d},[x0] //write out Xi
+
+ ret
+.size gcm_gmult_v8,.-gcm_gmult_v8
+.globl gcm_ghash_v8
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
- ld1 {v0.2d},[x0] //load [rotated] Xi
- subs x3,x3,#16
- movi v19.16b,#0xe1
- mov x12,#16
- ld1 {v20.2d},[x1] //load twisted H
- csel x12,xzr,x12,eq
- ext v0.16b,v0.16b,v0.16b,#8
- shl v19.2d,v19.2d,#57
- ld1 {v17.2d},[x2],x12 //load [rotated] inp
- ext v21.16b,v20.16b,v20.16b,#8
+ ld1 {v0.2d},[x0] //load [rotated] Xi
+ //"[rotated]" means that
+ //loaded value would have
+ //to be rotated in order to
+ //make it appear as in
+ //alorithm specification
+ subs x3,x3,#32 //see if x3 is 32 or larger
+ mov x12,#16 //x12 is used as post-
+ //increment for input pointer;
+ //as loop is modulo-scheduled
+ //x12 is zeroed just in time
+ //to preclude oversteping
+ //inp[len], which means that
+ //last block[s] are actually
+ //loaded twice, but last
+ //copy is not processed
+ ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
+ movi v19.16b,#0xe1
+ ld1 {v22.2d},[x1]
+ csel x12,xzr,x12,eq //is it time to zero x12?
+ ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
+ ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
+ shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
#ifndef __ARMEB__
+ rev64 v16.16b,v16.16b
rev64 v0.16b,v0.16b
+#endif
+ ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
+ b.lo .Lodd_tail_v8 //x3 was less than 32
+ ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
+#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
- eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing
- ext v3.16b,v17.16b,v17.16b,#8
- b .Loop_v8
+ ext v7.16b,v17.16b,v17.16b,#8
+ eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
+ pmull v4.1q,v20.1d,v7.1d //H·Ii+1
+ eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
+ pmull2 v6.1q,v20.2d,v7.2d
+ b .Loop_mod2x_v8
.align 4
-.Loop_v8:
- ext v18.16b,v0.16b,v0.16b,#8
- eor v3.16b,v3.16b,v0.16b //inp^=Xi
- eor v17.16b,v17.16b,v18.16b //v17.16b is rotated inp^Xi
+.Loop_mod2x_v8:
+ ext v18.16b,v3.16b,v3.16b,#8
+ subs x3,x3,#32 //is there more data?
+ pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
+ csel x12,xzr,x12,lo //is it time to zero x12?
+
+ pmull v5.1q,v21.1d,v17.1d
+ eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
+ pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
+ eor v0.16b,v0.16b,v4.16b //accumulate
+ pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
+ ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
+
+ eor v2.16b,v2.16b,v6.16b
+ csel x12,xzr,x12,eq //is it time to zero x12?
+ eor v1.16b,v1.16b,v5.16b
+
+ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
+#ifndef __ARMEB__
+ rev64 v16.16b,v16.16b
+#endif
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
+
+#ifndef __ARMEB__
+ rev64 v17.16b,v17.16b
+#endif
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ ext v7.16b,v17.16b,v17.16b,#8
+ ext v3.16b,v16.16b,v16.16b,#8
+ eor v0.16b,v1.16b,v18.16b
+ pmull v4.1q,v20.1d,v7.1d //H·Ii+1
+ eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
+
+ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
+ pmull v0.1q,v0.1d,v19.1d
+ eor v3.16b,v3.16b,v18.16b
+ eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
+ eor v3.16b,v3.16b,v0.16b
+ pmull2 v6.1q,v20.2d,v7.2d
+ b.hs .Loop_mod2x_v8 //there was at least 32 more bytes
+
+ eor v2.16b,v2.16b,v18.16b
+ ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
+ adds x3,x3,#32 //re-construct x3
+ eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
+ b.eq .Ldone_v8 //is x3 zero?
+.Lodd_tail_v8:
+ ext v18.16b,v0.16b,v0.16b,#8
+ eor v3.16b,v3.16b,v0.16b //inp^=Xi
+ eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
-.Lgmult_v8:
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
- eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
+ eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
- subs x3,x3,#16
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
- csel x12,xzr,x12,eq
- ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
- eor v18.16b,v0.16b,v2.16b
- eor v1.16b,v1.16b,v17.16b
- ld1 {v17.2d},[x2],x12 //load [rotated] inp
- eor v1.16b,v1.16b,v18.16b
- pmull v18.1q,v0.1d,v19.1d //1st phase
+ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
-#ifndef __ARMEB__
- rev64 v17.16b,v17.16b
-#endif
- eor v0.16b,v1.16b,v18.16b
- ext v3.16b,v17.16b,v17.16b,#8
+ eor v0.16b,v1.16b,v18.16b
- ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
+ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
- eor v18.16b,v18.16b,v2.16b
- eor v0.16b,v0.16b,v18.16b
- b.hs .Loop_v8
+ eor v18.16b,v18.16b,v2.16b
+ eor v0.16b,v0.16b,v18.16b
+.Ldone_v8:
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
- ext v0.16b,v0.16b,v0.16b,#8
- st1 {v0.2d},[x0] //write out Xi
+ ext v0.16b,v0.16b,v0.16b,#8
+ st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_ghash_v8,.-gcm_ghash_v8
-.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align 2
+.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2
+.align 2
diff --git a/linux-aarch64/crypto/sha/sha1-armv8.S b/linux-aarch64/crypto/sha/sha1-armv8.S
index f9d1262..ab6aa98 100644
--- a/linux-aarch64/crypto/sha/sha1-armv8.S
+++ b/linux-aarch64/crypto/sha/sha1-armv8.S
@@ -2,6 +2,7 @@
.text
+
.globl sha1_block_data_order
.type sha1_block_data_order,%function
.align 6
@@ -213,826 +214,826 @@ sha1_block_data_order:
add w20,w20,w17 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
lsr x19,x17,#32
- eor w3,w3,w5
+ eor w3,w3,w5
bic w25,w24,w22
and w26,w23,w22
ror w27,w21,#27
- eor w3,w3,w11
+ eor w3,w3,w11
add w24,w24,w28 // future e+=K
orr w25,w25,w26
add w20,w20,w27 // e+=rot(a,5)
- eor w3,w3,w16
+ eor w3,w3,w16
ror w22,w22,#2
add w24,w24,w19 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
- ror w3,w3,#31
- eor w4,w4,w6
+ ror w3,w3,#31
+ eor w4,w4,w6
bic w25,w23,w21
and w26,w22,w21
ror w27,w20,#27
- eor w4,w4,w12
+ eor w4,w4,w12
add w23,w23,w28 // future e+=K
orr w25,w25,w26
add w24,w24,w27 // e+=rot(a,5)
- eor w4,w4,w17
+ eor w4,w4,w17
ror w21,w21,#2
add w23,w23,w3 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
- ror w4,w4,#31
- eor w5,w5,w7
+ ror w4,w4,#31
+ eor w5,w5,w7
bic w25,w22,w20
and w26,w21,w20
ror w27,w24,#27
- eor w5,w5,w13
+ eor w5,w5,w13
add w22,w22,w28 // future e+=K
orr w25,w25,w26
add w23,w23,w27 // e+=rot(a,5)
- eor w5,w5,w19
+ eor w5,w5,w19
ror w20,w20,#2
add w22,w22,w4 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
- ror w5,w5,#31
- eor w6,w6,w8
+ ror w5,w5,#31
+ eor w6,w6,w8
bic w25,w21,w24
and w26,w20,w24
ror w27,w23,#27
- eor w6,w6,w14
+ eor w6,w6,w14
add w21,w21,w28 // future e+=K
orr w25,w25,w26
add w22,w22,w27 // e+=rot(a,5)
- eor w6,w6,w3
+ eor w6,w6,w3
ror w24,w24,#2
add w21,w21,w5 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
- ror w6,w6,#31
- eor w7,w7,w9
+ ror w6,w6,#31
+ eor w7,w7,w9
bic w25,w20,w23
and w26,w24,w23
ror w27,w22,#27
- eor w7,w7,w15
+ eor w7,w7,w15
add w20,w20,w28 // future e+=K
orr w25,w25,w26
add w21,w21,w27 // e+=rot(a,5)
- eor w7,w7,w4
+ eor w7,w7,w4
ror w23,w23,#2
add w20,w20,w6 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
- ror w7,w7,#31
+ ror w7,w7,#31
movz w28,#0xeba1
movk w28,#0x6ed9,lsl#16
- eor w8,w8,w10
+ eor w8,w8,w10
bic w25,w24,w22
and w26,w23,w22
ror w27,w21,#27
- eor w8,w8,w16
+ eor w8,w8,w16
add w24,w24,w28 // future e+=K
orr w25,w25,w26
add w20,w20,w27 // e+=rot(a,5)
- eor w8,w8,w5
+ eor w8,w8,w5
ror w22,w22,#2
add w24,w24,w7 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
- ror w8,w8,#31
- eor w9,w9,w11
+ ror w8,w8,#31
+ eor w9,w9,w11
eor w25,w23,w21
ror w27,w20,#27
add w23,w23,w28 // future e+=K
- eor w9,w9,w17
+ eor w9,w9,w17
eor w25,w25,w22
add w24,w24,w27 // e+=rot(a,5)
ror w21,w21,#2
- eor w9,w9,w6
+ eor w9,w9,w6
add w23,w23,w8 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
- ror w9,w9,#31
- eor w10,w10,w12
+ ror w9,w9,#31
+ eor w10,w10,w12
eor w25,w22,w20
ror w27,w24,#27
add w22,w22,w28 // future e+=K
- eor w10,w10,w19
+ eor w10,w10,w19
eor w25,w25,w21
add w23,w23,w27 // e+=rot(a,5)
ror w20,w20,#2
- eor w10,w10,w7
+ eor w10,w10,w7
add w22,w22,w9 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
- ror w10,w10,#31
- eor w11,w11,w13
+ ror w10,w10,#31
+ eor w11,w11,w13
eor w25,w21,w24
ror w27,w23,#27
add w21,w21,w28 // future e+=K
- eor w11,w11,w3
+ eor w11,w11,w3
eor w25,w25,w20
add w22,w22,w27 // e+=rot(a,5)
ror w24,w24,#2
- eor w11,w11,w8
+ eor w11,w11,w8
add w21,w21,w10 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
- ror w11,w11,#31
- eor w12,w12,w14
+ ror w11,w11,#31
+ eor w12,w12,w14
eor w25,w20,w23
ror w27,w22,#27
add w20,w20,w28 // future e+=K
- eor w12,w12,w4
+ eor w12,w12,w4
eor w25,w25,w24
add w21,w21,w27 // e+=rot(a,5)
ror w23,w23,#2
- eor w12,w12,w9
+ eor w12,w12,w9
add w20,w20,w11 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
- ror w12,w12,#31
- eor w13,w13,w15
+ ror w12,w12,#31
+ eor w13,w13,w15
eor w25,w24,w22
ror w27,w21,#27
add w24,w24,w28 // future e+=K
- eor w13,w13,w5
+ eor w13,w13,w5
eor w25,w25,w23
add w20,w20,w27 // e+=rot(a,5)
ror w22,w22,#2
- eor w13,w13,w10
+ eor w13,w13,w10
add w24,w24,w12 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
- ror w13,w13,#31
- eor w14,w14,w16
+ ror w13,w13,#31
+ eor w14,w14,w16
eor w25,w23,w21
ror w27,w20,#27
add w23,w23,w28 // future e+=K
- eor w14,w14,w6
+ eor w14,w14,w6
eor w25,w25,w22
add w24,w24,w27 // e+=rot(a,5)
ror w21,w21,#2
- eor w14,w14,w11
+ eor w14,w14,w11
add w23,w23,w13 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
- ror w14,w14,#31
- eor w15,w15,w17
+ ror w14,w14,#31
+ eor w15,w15,w17
eor w25,w22,w20
ror w27,w24,#27
add w22,w22,w28 // future e+=K
- eor w15,w15,w7
+ eor w15,w15,w7
eor w25,w25,w21
add w23,w23,w27 // e+=rot(a,5)
ror w20,w20,#2
- eor w15,w15,w12
+ eor w15,w15,w12
add w22,w22,w14 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
- ror w15,w15,#31
- eor w16,w16,w19
+ ror w15,w15,#31
+ eor w16,w16,w19
eor w25,w21,w24
ror w27,w23,#27
add w21,w21,w28 // future e+=K
- eor w16,w16,w8
+ eor w16,w16,w8
eor w25,w25,w20
add w22,w22,w27 // e+=rot(a,5)
ror w24,w24,#2
- eor w16,w16,w13
+ eor w16,w16,w13
add w21,w21,w15 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
- ror w16,w16,#31
- eor w17,w17,w3
+ ror w16,w16,#31
+ eor w17,w17,w3
eor w25,w20,w23
ror w27,w22,#27
add w20,w20,w28 // future e+=K
- eor w17,w17,w9
+ eor w17,w17,w9
eor w25,w25,w24
add w21,w21,w27 // e+=rot(a,5)
ror w23,w23,#2
- eor w17,w17,w14
+ eor w17,w17,w14
add w20,w20,w16 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
- ror w17,w17,#31
- eor w19,w19,w4
+ ror w17,w17,#31
+ eor w19,w19,w4
eor w25,w24,w22
ror w27,w21,#27
add w24,w24,w28 // future e+=K
- eor w19,w19,w10
+ eor w19,w19,w10
eor w25,w25,w23
add w20,w20,w27 // e+=rot(a,5)
ror w22,w22,#2
- eor w19,w19,w15
+ eor w19,w19,w15
add w24,w24,w17 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
- ror w19,w19,#31
- eor w3,w3,w5
+ ror w19,w19,#31
+ eor w3,w3,w5
eor w25,w23,w21
ror w27,w20,#27
add w23,w23,w28 // future e+=K
- eor w3,w3,w11
+ eor w3,w3,w11
eor w25,w25,w22
add w24,w24,w27 // e+=rot(a,5)
ror w21,w21,#2
- eor w3,w3,w16
+ eor w3,w3,w16
add w23,w23,w19 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
- ror w3,w3,#31
- eor w4,w4,w6
+ ror w3,w3,#31
+ eor w4,w4,w6
eor w25,w22,w20
ror w27,w24,#27
add w22,w22,w28 // future e+=K
- eor w4,w4,w12
+ eor w4,w4,w12
eor w25,w25,w21
add w23,w23,w27 // e+=rot(a,5)
ror w20,w20,#2
- eor w4,w4,w17
+ eor w4,w4,w17
add w22,w22,w3 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
- ror w4,w4,#31
- eor w5,w5,w7
+ ror w4,w4,#31
+ eor w5,w5,w7
eor w25,w21,w24
ror w27,w23,#27
add w21,w21,w28 // future e+=K
- eor w5,w5,w13
+ eor w5,w5,w13
eor w25,w25,w20
add w22,w22,w27 // e+=rot(a,5)
ror w24,w24,#2
- eor w5,w5,w19
+ eor w5,w5,w19
add w21,w21,w4 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
- ror w5,w5,#31
- eor w6,w6,w8
+ ror w5,w5,#31
+ eor w6,w6,w8
eor w25,w20,w23
ror w27,w22,#27
add w20,w20,w28 // future e+=K
- eor w6,w6,w14
+ eor w6,w6,w14
eor w25,w25,w24
add w21,w21,w27 // e+=rot(a,5)
ror w23,w23,#2
- eor w6,w6,w3
+ eor w6,w6,w3
add w20,w20,w5 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
- ror w6,w6,#31
- eor w7,w7,w9
+ ror w6,w6,#31
+ eor w7,w7,w9
eor w25,w24,w22
ror w27,w21,#27
add w24,w24,w28 // future e+=K
- eor w7,w7,w15
+ eor w7,w7,w15
eor w25,w25,w23
add w20,w20,w27 // e+=rot(a,5)
ror w22,w22,#2
- eor w7,w7,w4
+ eor w7,w7,w4
add w24,w24,w6 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
- ror w7,w7,#31
- eor w8,w8,w10
+ ror w7,w7,#31
+ eor w8,w8,w10
eor w25,w23,w21
ror w27,w20,#27
add w23,w23,w28 // future e+=K
- eor w8,w8,w16
+ eor w8,w8,w16
eor w25,w25,w22
add w24,w24,w27 // e+=rot(a,5)
ror w21,w21,#2
- eor w8,w8,w5
+ eor w8,w8,w5
add w23,w23,w7 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
- ror w8,w8,#31
- eor w9,w9,w11
+ ror w8,w8,#31
+ eor w9,w9,w11
eor w25,w22,w20
ror w27,w24,#27
add w22,w22,w28 // future e+=K
- eor w9,w9,w17
+ eor w9,w9,w17
eor w25,w25,w21
add w23,w23,w27 // e+=rot(a,5)
ror w20,w20,#2
- eor w9,w9,w6
+ eor w9,w9,w6
add w22,w22,w8 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
- ror w9,w9,#31
- eor w10,w10,w12
+ ror w9,w9,#31
+ eor w10,w10,w12
eor w25,w21,w24
ror w27,w23,#27
add w21,w21,w28 // future e+=K
- eor w10,w10,w19
+ eor w10,w10,w19
eor w25,w25,w20
add w22,w22,w27 // e+=rot(a,5)
ror w24,w24,#2
- eor w10,w10,w7
+ eor w10,w10,w7
add w21,w21,w9 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
- ror w10,w10,#31
- eor w11,w11,w13
+ ror w10,w10,#31
+ eor w11,w11,w13
eor w25,w20,w23
ror w27,w22,#27
add w20,w20,w28 // future e+=K
- eor w11,w11,w3
+ eor w11,w11,w3
eor w25,w25,w24
add w21,w21,w27 // e+=rot(a,5)
ror w23,w23,#2
- eor w11,w11,w8
+ eor w11,w11,w8
add w20,w20,w10 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
- ror w11,w11,#31
+ ror w11,w11,#31
movz w28,#0xbcdc
movk w28,#0x8f1b,lsl#16
- eor w12,w12,w14
+ eor w12,w12,w14
eor w25,w24,w22
ror w27,w21,#27
add w24,w24,w28 // future e+=K
- eor w12,w12,w4
+ eor w12,w12,w4
eor w25,w25,w23
add w20,w20,w27 // e+=rot(a,5)
ror w22,w22,#2
- eor w12,w12,w9
+ eor w12,w12,w9
add w24,w24,w11 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
- ror w12,w12,#31
+ ror w12,w12,#31
orr w25,w21,w22
and w26,w21,w22
- eor w13,w13,w15
+ eor w13,w13,w15
ror w27,w20,#27
and w25,w25,w23
add w23,w23,w28 // future e+=K
- eor w13,w13,w5
+ eor w13,w13,w5
add w24,w24,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w21,w21,#2
- eor w13,w13,w10
+ eor w13,w13,w10
add w23,w23,w12 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
- ror w13,w13,#31
+ ror w13,w13,#31
orr w25,w20,w21
and w26,w20,w21
- eor w14,w14,w16
+ eor w14,w14,w16
ror w27,w24,#27
and w25,w25,w22
add w22,w22,w28 // future e+=K
- eor w14,w14,w6
+ eor w14,w14,w6
add w23,w23,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w20,w20,#2
- eor w14,w14,w11
+ eor w14,w14,w11
add w22,w22,w13 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
- ror w14,w14,#31
+ ror w14,w14,#31
orr w25,w24,w20
and w26,w24,w20
- eor w15,w15,w17
+ eor w15,w15,w17
ror w27,w23,#27
and w25,w25,w21
add w21,w21,w28 // future e+=K
- eor w15,w15,w7
+ eor w15,w15,w7
add w22,w22,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w24,w24,#2
- eor w15,w15,w12
+ eor w15,w15,w12
add w21,w21,w14 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
- ror w15,w15,#31
+ ror w15,w15,#31
orr w25,w23,w24
and w26,w23,w24
- eor w16,w16,w19
+ eor w16,w16,w19
ror w27,w22,#27
and w25,w25,w20
add w20,w20,w28 // future e+=K
- eor w16,w16,w8
+ eor w16,w16,w8
add w21,w21,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w23,w23,#2
- eor w16,w16,w13
+ eor w16,w16,w13
add w20,w20,w15 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
- ror w16,w16,#31
+ ror w16,w16,#31
orr w25,w22,w23
and w26,w22,w23
- eor w17,w17,w3
+ eor w17,w17,w3
ror w27,w21,#27
and w25,w25,w24
add w24,w24,w28 // future e+=K
- eor w17,w17,w9
+ eor w17,w17,w9
add w20,w20,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w22,w22,#2
- eor w17,w17,w14
+ eor w17,w17,w14
add w24,w24,w16 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
- ror w17,w17,#31
+ ror w17,w17,#31
orr w25,w21,w22
and w26,w21,w22
- eor w19,w19,w4
+ eor w19,w19,w4
ror w27,w20,#27
and w25,w25,w23
add w23,w23,w28 // future e+=K
- eor w19,w19,w10
+ eor w19,w19,w10
add w24,w24,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w21,w21,#2
- eor w19,w19,w15
+ eor w19,w19,w15
add w23,w23,w17 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
- ror w19,w19,#31
+ ror w19,w19,#31
orr w25,w20,w21
and w26,w20,w21
- eor w3,w3,w5
+ eor w3,w3,w5
ror w27,w24,#27
and w25,w25,w22
add w22,w22,w28 // future e+=K
- eor w3,w3,w11
+ eor w3,w3,w11
add w23,w23,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w20,w20,#2
- eor w3,w3,w16
+ eor w3,w3,w16
add w22,w22,w19 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
- ror w3,w3,#31
+ ror w3,w3,#31
orr w25,w24,w20
and w26,w24,w20
- eor w4,w4,w6
+ eor w4,w4,w6
ror w27,w23,#27
and w25,w25,w21
add w21,w21,w28 // future e+=K
- eor w4,w4,w12
+ eor w4,w4,w12
add w22,w22,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w24,w24,#2
- eor w4,w4,w17
+ eor w4,w4,w17
add w21,w21,w3 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
- ror w4,w4,#31
+ ror w4,w4,#31
orr w25,w23,w24
and w26,w23,w24
- eor w5,w5,w7
+ eor w5,w5,w7
ror w27,w22,#27
and w25,w25,w20
add w20,w20,w28 // future e+=K
- eor w5,w5,w13
+ eor w5,w5,w13
add w21,w21,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w23,w23,#2
- eor w5,w5,w19
+ eor w5,w5,w19
add w20,w20,w4 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
- ror w5,w5,#31
+ ror w5,w5,#31
orr w25,w22,w23
and w26,w22,w23
- eor w6,w6,w8
+ eor w6,w6,w8
ror w27,w21,#27
and w25,w25,w24
add w24,w24,w28 // future e+=K
- eor w6,w6,w14
+ eor w6,w6,w14
add w20,w20,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w22,w22,#2
- eor w6,w6,w3
+ eor w6,w6,w3
add w24,w24,w5 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
- ror w6,w6,#31
+ ror w6,w6,#31
orr w25,w21,w22
and w26,w21,w22
- eor w7,w7,w9
+ eor w7,w7,w9
ror w27,w20,#27
and w25,w25,w23
add w23,w23,w28 // future e+=K
- eor w7,w7,w15
+ eor w7,w7,w15
add w24,w24,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w21,w21,#2
- eor w7,w7,w4
+ eor w7,w7,w4
add w23,w23,w6 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
- ror w7,w7,#31
+ ror w7,w7,#31
orr w25,w20,w21
and w26,w20,w21
- eor w8,w8,w10
+ eor w8,w8,w10
ror w27,w24,#27
and w25,w25,w22
add w22,w22,w28 // future e+=K
- eor w8,w8,w16
+ eor w8,w8,w16
add w23,w23,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w20,w20,#2
- eor w8,w8,w5
+ eor w8,w8,w5
add w22,w22,w7 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
- ror w8,w8,#31
+ ror w8,w8,#31
orr w25,w24,w20
and w26,w24,w20
- eor w9,w9,w11
+ eor w9,w9,w11
ror w27,w23,#27
and w25,w25,w21
add w21,w21,w28 // future e+=K
- eor w9,w9,w17
+ eor w9,w9,w17
add w22,w22,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w24,w24,#2
- eor w9,w9,w6
+ eor w9,w9,w6
add w21,w21,w8 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
- ror w9,w9,#31
+ ror w9,w9,#31
orr w25,w23,w24
and w26,w23,w24
- eor w10,w10,w12
+ eor w10,w10,w12
ror w27,w22,#27
and w25,w25,w20
add w20,w20,w28 // future e+=K
- eor w10,w10,w19
+ eor w10,w10,w19
add w21,w21,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w23,w23,#2
- eor w10,w10,w7
+ eor w10,w10,w7
add w20,w20,w9 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
- ror w10,w10,#31
+ ror w10,w10,#31
orr w25,w22,w23
and w26,w22,w23
- eor w11,w11,w13
+ eor w11,w11,w13
ror w27,w21,#27
and w25,w25,w24
add w24,w24,w28 // future e+=K
- eor w11,w11,w3
+ eor w11,w11,w3
add w20,w20,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w22,w22,#2
- eor w11,w11,w8
+ eor w11,w11,w8
add w24,w24,w10 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
- ror w11,w11,#31
+ ror w11,w11,#31
orr w25,w21,w22
and w26,w21,w22
- eor w12,w12,w14
+ eor w12,w12,w14
ror w27,w20,#27
and w25,w25,w23
add w23,w23,w28 // future e+=K
- eor w12,w12,w4
+ eor w12,w12,w4
add w24,w24,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w21,w21,#2
- eor w12,w12,w9
+ eor w12,w12,w9
add w23,w23,w11 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
- ror w12,w12,#31
+ ror w12,w12,#31
orr w25,w20,w21
and w26,w20,w21
- eor w13,w13,w15
+ eor w13,w13,w15
ror w27,w24,#27
and w25,w25,w22
add w22,w22,w28 // future e+=K
- eor w13,w13,w5
+ eor w13,w13,w5
add w23,w23,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w20,w20,#2
- eor w13,w13,w10
+ eor w13,w13,w10
add w22,w22,w12 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
- ror w13,w13,#31
+ ror w13,w13,#31
orr w25,w24,w20
and w26,w24,w20
- eor w14,w14,w16
+ eor w14,w14,w16
ror w27,w23,#27
and w25,w25,w21
add w21,w21,w28 // future e+=K
- eor w14,w14,w6
+ eor w14,w14,w6
add w22,w22,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w24,w24,#2
- eor w14,w14,w11
+ eor w14,w14,w11
add w21,w21,w13 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
- ror w14,w14,#31
+ ror w14,w14,#31
orr w25,w23,w24
and w26,w23,w24
- eor w15,w15,w17
+ eor w15,w15,w17
ror w27,w22,#27
and w25,w25,w20
add w20,w20,w28 // future e+=K
- eor w15,w15,w7
+ eor w15,w15,w7
add w21,w21,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w23,w23,#2
- eor w15,w15,w12
+ eor w15,w15,w12
add w20,w20,w14 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
- ror w15,w15,#31
+ ror w15,w15,#31
movz w28,#0xc1d6
movk w28,#0xca62,lsl#16
orr w25,w22,w23
and w26,w22,w23
- eor w16,w16,w19
+ eor w16,w16,w19
ror w27,w21,#27
and w25,w25,w24
add w24,w24,w28 // future e+=K
- eor w16,w16,w8
+ eor w16,w16,w8
add w20,w20,w27 // e+=rot(a,5)
orr w25,w25,w26
ror w22,w22,#2
- eor w16,w16,w13
+ eor w16,w16,w13
add w24,w24,w15 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
- ror w16,w16,#31
- eor w17,w17,w3
+ ror w16,w16,#31
+ eor w17,w17,w3
eor w25,w23,w21
ror w27,w20,#27
add w23,w23,w28 // future e+=K
- eor w17,w17,w9
+ eor w17,w17,w9
eor w25,w25,w22
add w24,w24,w27 // e+=rot(a,5)
ror w21,w21,#2
- eor w17,w17,w14
+ eor w17,w17,w14
add w23,w23,w16 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
- ror w17,w17,#31
- eor w19,w19,w4
+ ror w17,w17,#31
+ eor w19,w19,w4
eor w25,w22,w20
ror w27,w24,#27
add w22,w22,w28 // future e+=K
- eor w19,w19,w10
+ eor w19,w19,w10
eor w25,w25,w21
add w23,w23,w27 // e+=rot(a,5)
ror w20,w20,#2
- eor w19,w19,w15
+ eor w19,w19,w15
add w22,w22,w17 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
- ror w19,w19,#31
- eor w3,w3,w5
+ ror w19,w19,#31
+ eor w3,w3,w5
eor w25,w21,w24
ror w27,w23,#27
add w21,w21,w28 // future e+=K
- eor w3,w3,w11
+ eor w3,w3,w11
eor w25,w25,w20
add w22,w22,w27 // e+=rot(a,5)
ror w24,w24,#2
- eor w3,w3,w16
+ eor w3,w3,w16
add w21,w21,w19 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
- ror w3,w3,#31
- eor w4,w4,w6
+ ror w3,w3,#31
+ eor w4,w4,w6
eor w25,w20,w23
ror w27,w22,#27
add w20,w20,w28 // future e+=K
- eor w4,w4,w12
+ eor w4,w4,w12
eor w25,w25,w24
add w21,w21,w27 // e+=rot(a,5)
ror w23,w23,#2
- eor w4,w4,w17
+ eor w4,w4,w17
add w20,w20,w3 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
- ror w4,w4,#31
- eor w5,w5,w7
+ ror w4,w4,#31
+ eor w5,w5,w7
eor w25,w24,w22
ror w27,w21,#27
add w24,w24,w28 // future e+=K
- eor w5,w5,w13
+ eor w5,w5,w13
eor w25,w25,w23
add w20,w20,w27 // e+=rot(a,5)
ror w22,w22,#2
- eor w5,w5,w19
+ eor w5,w5,w19
add w24,w24,w4 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
- ror w5,w5,#31
- eor w6,w6,w8
+ ror w5,w5,#31
+ eor w6,w6,w8
eor w25,w23,w21
ror w27,w20,#27
add w23,w23,w28 // future e+=K
- eor w6,w6,w14
+ eor w6,w6,w14
eor w25,w25,w22
add w24,w24,w27 // e+=rot(a,5)
ror w21,w21,#2
- eor w6,w6,w3
+ eor w6,w6,w3
add w23,w23,w5 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
- ror w6,w6,#31
- eor w7,w7,w9
+ ror w6,w6,#31
+ eor w7,w7,w9
eor w25,w22,w20
ror w27,w24,#27
add w22,w22,w28 // future e+=K
- eor w7,w7,w15
+ eor w7,w7,w15
eor w25,w25,w21
add w23,w23,w27 // e+=rot(a,5)
ror w20,w20,#2
- eor w7,w7,w4
+ eor w7,w7,w4
add w22,w22,w6 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
- ror w7,w7,#31
- eor w8,w8,w10
+ ror w7,w7,#31
+ eor w8,w8,w10
eor w25,w21,w24
ror w27,w23,#27
add w21,w21,w28 // future e+=K
- eor w8,w8,w16
+ eor w8,w8,w16
eor w25,w25,w20
add w22,w22,w27 // e+=rot(a,5)
ror w24,w24,#2
- eor w8,w8,w5
+ eor w8,w8,w5
add w21,w21,w7 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
- ror w8,w8,#31
- eor w9,w9,w11
+ ror w8,w8,#31
+ eor w9,w9,w11
eor w25,w20,w23
ror w27,w22,#27
add w20,w20,w28 // future e+=K
- eor w9,w9,w17
+ eor w9,w9,w17
eor w25,w25,w24
add w21,w21,w27 // e+=rot(a,5)
ror w23,w23,#2
- eor w9,w9,w6
+ eor w9,w9,w6
add w20,w20,w8 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
- ror w9,w9,#31
- eor w10,w10,w12
+ ror w9,w9,#31
+ eor w10,w10,w12
eor w25,w24,w22
ror w27,w21,#27
add w24,w24,w28 // future e+=K
- eor w10,w10,w19
+ eor w10,w10,w19
eor w25,w25,w23
add w20,w20,w27 // e+=rot(a,5)
ror w22,w22,#2
- eor w10,w10,w7
+ eor w10,w10,w7
add w24,w24,w9 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
- ror w10,w10,#31
- eor w11,w11,w13
+ ror w10,w10,#31
+ eor w11,w11,w13
eor w25,w23,w21
ror w27,w20,#27
add w23,w23,w28 // future e+=K
- eor w11,w11,w3
+ eor w11,w11,w3
eor w25,w25,w22
add w24,w24,w27 // e+=rot(a,5)
ror w21,w21,#2
- eor w11,w11,w8
+ eor w11,w11,w8
add w23,w23,w10 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
- ror w11,w11,#31
- eor w12,w12,w14
+ ror w11,w11,#31
+ eor w12,w12,w14
eor w25,w22,w20
ror w27,w24,#27
add w22,w22,w28 // future e+=K
- eor w12,w12,w4
+ eor w12,w12,w4
eor w25,w25,w21
add w23,w23,w27 // e+=rot(a,5)
ror w20,w20,#2
- eor w12,w12,w9
+ eor w12,w12,w9
add w22,w22,w11 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
- ror w12,w12,#31
- eor w13,w13,w15
+ ror w12,w12,#31
+ eor w13,w13,w15
eor w25,w21,w24
ror w27,w23,#27
add w21,w21,w28 // future e+=K
- eor w13,w13,w5
+ eor w13,w13,w5
eor w25,w25,w20
add w22,w22,w27 // e+=rot(a,5)
ror w24,w24,#2
- eor w13,w13,w10
+ eor w13,w13,w10
add w21,w21,w12 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
- ror w13,w13,#31
- eor w14,w14,w16
+ ror w13,w13,#31
+ eor w14,w14,w16
eor w25,w20,w23
ror w27,w22,#27
add w20,w20,w28 // future e+=K
- eor w14,w14,w6
+ eor w14,w14,w6
eor w25,w25,w24
add w21,w21,w27 // e+=rot(a,5)
ror w23,w23,#2
- eor w14,w14,w11
+ eor w14,w14,w11
add w20,w20,w13 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
- ror w14,w14,#31
- eor w15,w15,w17
+ ror w14,w14,#31
+ eor w15,w15,w17
eor w25,w24,w22
ror w27,w21,#27
add w24,w24,w28 // future e+=K
- eor w15,w15,w7
+ eor w15,w15,w7
eor w25,w25,w23
add w20,w20,w27 // e+=rot(a,5)
ror w22,w22,#2
- eor w15,w15,w12
+ eor w15,w15,w12
add w24,w24,w14 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
- ror w15,w15,#31
- eor w16,w16,w19
+ ror w15,w15,#31
+ eor w16,w16,w19
eor w25,w23,w21
ror w27,w20,#27
add w23,w23,w28 // future e+=K
- eor w16,w16,w8
+ eor w16,w16,w8
eor w25,w25,w22
add w24,w24,w27 // e+=rot(a,5)
ror w21,w21,#2
- eor w16,w16,w13
+ eor w16,w16,w13
add w23,w23,w15 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
- ror w16,w16,#31
- eor w17,w17,w3
+ ror w16,w16,#31
+ eor w17,w17,w3
eor w25,w22,w20
ror w27,w24,#27
add w22,w22,w28 // future e+=K
- eor w17,w17,w9
+ eor w17,w17,w9
eor w25,w25,w21
add w23,w23,w27 // e+=rot(a,5)
ror w20,w20,#2
- eor w17,w17,w14
+ eor w17,w17,w14
add w22,w22,w16 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
- ror w17,w17,#31
- eor w19,w19,w4
+ ror w17,w17,#31
+ eor w19,w19,w4
eor w25,w21,w24
ror w27,w23,#27
add w21,w21,w28 // future e+=K
- eor w19,w19,w10
+ eor w19,w19,w10
eor w25,w25,w20
add w22,w22,w27 // e+=rot(a,5)
ror w24,w24,#2
- eor w19,w19,w15
+ eor w19,w19,w15
add w21,w21,w17 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
- ror w19,w19,#31
+ ror w19,w19,#31
ldp w4,w5,[x0]
eor w25,w20,w23
ror w27,w22,#27
@@ -1080,10 +1081,10 @@ sha1_block_armv8:
ld1 {v0.4s},[x0],#16
ld1 {v1.s}[0],[x0]
sub x0,x0,#16
- ld1 {v16.4s-v19.4s},[x4]
+ ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x4]
.Loop_hw:
- ld1 {v4.16b-v7.16b},[x1],#64
+ ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64
sub x2,x2,#1
rev32 v4.16b,v4.16b
rev32 v5.16b,v5.16b
@@ -1094,98 +1095,98 @@ sha1_block_armv8:
add v21.4s,v16.4s,v5.4s
rev32 v7.16b,v7.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b
- .inst 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0
+.inst 0x5e280803 //sha1h v3.16b,v0.16b
+.inst 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0
add v20.4s,v16.4s,v6.4s
- .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 1
- .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s
+.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
+.inst 0x5e280802 //sha1h v2.16b,v0.16b // 1
+.inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s
add v21.4s,v16.4s,v7.4s
- .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
- .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 2
- .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s
+.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
+.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
+.inst 0x5e280803 //sha1h v3.16b,v0.16b // 2
+.inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s
add v20.4s,v16.4s,v4.4s
- .inst 0x5e281885 //sha1su1 v5.16b,v4.16b
- .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 3
- .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s
+.inst 0x5e281885 //sha1su1 v5.16b,v4.16b
+.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
+.inst 0x5e280802 //sha1h v2.16b,v0.16b // 3
+.inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s
add v21.4s,v17.4s,v5.4s
- .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
- .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 4
- .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s
+.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
+.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
+.inst 0x5e280803 //sha1h v3.16b,v0.16b // 4
+.inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s
add v20.4s,v17.4s,v6.4s
- .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
- .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 5
- .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
+.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
+.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
+.inst 0x5e280802 //sha1h v2.16b,v0.16b // 5
+.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
add v21.4s,v17.4s,v7.4s
- .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
- .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 6
- .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
+.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
+.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
+.inst 0x5e280803 //sha1h v3.16b,v0.16b // 6
+.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
add v20.4s,v17.4s,v4.4s
- .inst 0x5e281885 //sha1su1 v5.16b,v4.16b
- .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 7
- .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
+.inst 0x5e281885 //sha1su1 v5.16b,v4.16b
+.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
+.inst 0x5e280802 //sha1h v2.16b,v0.16b // 7
+.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
add v21.4s,v17.4s,v5.4s
- .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
- .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 8
- .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
+.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
+.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
+.inst 0x5e280803 //sha1h v3.16b,v0.16b // 8
+.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
add v20.4s,v18.4s,v6.4s
- .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
- .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 9
- .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
+.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
+.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
+.inst 0x5e280802 //sha1h v2.16b,v0.16b // 9
+.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
add v21.4s,v18.4s,v7.4s
- .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
- .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 10
- .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s
+.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
+.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
+.inst 0x5e280803 //sha1h v3.16b,v0.16b // 10
+.inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s
add v20.4s,v18.4s,v4.4s
- .inst 0x5e281885 //sha1su1 v5.16b,v4.16b
- .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 11
- .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s
+.inst 0x5e281885 //sha1su1 v5.16b,v4.16b
+.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
+.inst 0x5e280802 //sha1h v2.16b,v0.16b // 11
+.inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s
add v21.4s,v18.4s,v5.4s
- .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
- .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 12
- .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s
+.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
+.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
+.inst 0x5e280803 //sha1h v3.16b,v0.16b // 12
+.inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s
add v20.4s,v18.4s,v6.4s
- .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
- .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 13
- .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s
+.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
+.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
+.inst 0x5e280802 //sha1h v2.16b,v0.16b // 13
+.inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s
add v21.4s,v19.4s,v7.4s
- .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
- .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 14
- .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s
+.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
+.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
+.inst 0x5e280803 //sha1h v3.16b,v0.16b // 14
+.inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s
add v20.4s,v19.4s,v4.4s
- .inst 0x5e281885 //sha1su1 v5.16b,v4.16b
- .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 15
- .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
+.inst 0x5e281885 //sha1su1 v5.16b,v4.16b
+.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
+.inst 0x5e280802 //sha1h v2.16b,v0.16b // 15
+.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
add v21.4s,v19.4s,v5.4s
- .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
- .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 16
- .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
+.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
+.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
+.inst 0x5e280803 //sha1h v3.16b,v0.16b // 16
+.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
add v20.4s,v19.4s,v6.4s
- .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 17
- .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
+.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
+.inst 0x5e280802 //sha1h v2.16b,v0.16b // 17
+.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
add v21.4s,v19.4s,v7.4s
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 18
- .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
+.inst 0x5e280803 //sha1h v3.16b,v0.16b // 18
+.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 19
- .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
+.inst 0x5e280802 //sha1h v2.16b,v0.16b // 19
+.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
add v1.4s,v1.4s,v2.4s
add v0.4s,v0.4s,v22.4s
@@ -1206,6 +1207,7 @@ sha1_block_armv8:
.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79
.LOPENSSL_armcap_P:
.quad OPENSSL_armcap_P-.
-.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
+.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2
.align 2
.comm OPENSSL_armcap_P,4,4
diff --git a/linux-aarch64/crypto/sha/sha256-armv8.S b/linux-aarch64/crypto/sha/sha256-armv8.S
index bd43b1f..ec572e9 100644
--- a/linux-aarch64/crypto/sha/sha256-armv8.S
+++ b/linux-aarch64/crypto/sha/sha256-armv8.S
@@ -2,6 +2,7 @@
.text
+
.globl sha256_block_data_order
.type sha256_block_data_order,%function
.align 6
@@ -27,7 +28,7 @@ sha256_block_data_order:
ldp w24,w25,[x0,#4*4]
add x2,x1,x2,lsl#6 // end of input
ldp w26,w27,[x0,#6*4]
- adr x30,K256
+ adr x30,.LK256
stp x0,x2,[x29,#96]
.Loop:
@@ -975,167 +976,168 @@ sha256_block_data_order:
.size sha256_block_data_order,.-sha256_block_data_order
.align 6
-.type K256,%object
-K256:
- .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
- .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
- .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
- .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
- .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
- .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
- .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
- .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
- .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
- .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
- .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
- .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
- .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
- .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
- .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
- .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
- .long 0 //terminator
-.size K256,.-K256
+.type .LK256,%object
+.LK256:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.long 0 //terminator
+.size .LK256,.-.LK256
.align 3
.LOPENSSL_armcap_P:
- .quad OPENSSL_armcap_P-.
-.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
+.quad OPENSSL_armcap_P-.
+.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2
.align 2
.type sha256_block_armv8,%function
.align 6
sha256_block_armv8:
.Lv8_entry:
- stp x29,x30,[sp,#-16]!
- add x29,sp,#0
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
- ld1 {v0.4s,v1.4s},[x0]
- adr x3,K256
+ ld1 {v0.4s,v1.4s},[x0]
+ adr x3,.LK256
.Loop_hw:
- ld1 {v4.16b-v7.16b},[x1],#64
- sub x2,x2,#1
- ld1 {v16.4s},[x3],#16
- rev32 v4.16b,v4.16b
- rev32 v5.16b,v5.16b
- rev32 v6.16b,v6.16b
- rev32 v7.16b,v7.16b
- orr v18.16b,v0.16b,v0.16b // offload
- orr v19.16b,v1.16b,v1.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v6.4s
- .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v7.4s
- .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v6.4s
- .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v7.4s
- .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v6.4s
- .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v7.4s
- .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64
+ sub x2,x2,#1
+ ld1 {v16.4s},[x3],#16
+ rev32 v4.16b,v4.16b
+ rev32 v5.16b,v5.16b
+ rev32 v6.16b,v6.16b
+ rev32 v7.16b,v7.16b
+ orr v18.16b,v0.16b,v0.16b // offload
+ orr v19.16b,v1.16b,v1.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v4.4s
+.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v5.4s
+.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v6.4s
+.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v7.4s
+.inst 0x5e282887 //sha256su0 v7.16b,v4.16b
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v4.4s
+.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v5.4s
+.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v6.4s
+.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v7.4s
+.inst 0x5e282887 //sha256su0 v7.16b,v4.16b
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v4.4s
+.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v5.4s
+.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v6.4s
+.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v7.4s
+.inst 0x5e282887 //sha256su0 v7.16b,v4.16b
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v4.4s
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v5.4s
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- ld1 {v17.4s},[x3]
- add v16.4s,v16.4s,v6.4s
- sub x3,x3,#64*4-16 // rewind
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ ld1 {v17.4s},[x3]
+ add v16.4s,v16.4s,v6.4s
+ sub x3,x3,#64*4-16 // rewind
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- add v17.4s,v17.4s,v7.4s
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ add v17.4s,v17.4s,v7.4s
+ orr v2.16b,v0.16b,v0.16b
+.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- add v0.4s,v0.4s,v18.4s
- add v1.4s,v1.4s,v19.4s
+ add v0.4s,v0.4s,v18.4s
+ add v1.4s,v1.4s,v19.4s
- cbnz x2,.Loop_hw
+ cbnz x2,.Loop_hw
- st1 {v0.4s,v1.4s},[x0]
+ st1 {v0.4s,v1.4s},[x0]
- ldr x29,[sp],#16
+ ldr x29,[sp],#16
ret
.size sha256_block_armv8,.-sha256_block_armv8
.comm OPENSSL_armcap_P,4,4
diff --git a/linux-aarch64/crypto/sha/sha512-armv8.S b/linux-aarch64/crypto/sha/sha512-armv8.S
index 6b0d194..8fc342a 100644
--- a/linux-aarch64/crypto/sha/sha512-armv8.S
+++ b/linux-aarch64/crypto/sha/sha512-armv8.S
@@ -2,6 +2,7 @@
.text
+
.globl sha512_block_data_order
.type sha512_block_data_order,%function
.align 6
@@ -21,7 +22,7 @@ sha512_block_data_order:
ldp x24,x25,[x0,#4*8]
add x2,x1,x2,lsl#7 // end of input
ldp x26,x27,[x0,#6*8]
- adr x30,K512
+ adr x30,.LK512
stp x0,x2,[x29,#96]
.Loop:
@@ -969,53 +970,54 @@ sha512_block_data_order:
.size sha512_block_data_order,.-sha512_block_data_order
.align 6
-.type K512,%object
-K512:
- .quad 0x428a2f98d728ae22,0x7137449123ef65cd
- .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
- .quad 0x3956c25bf348b538,0x59f111f1b605d019
- .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
- .quad 0xd807aa98a3030242,0x12835b0145706fbe
- .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
- .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
- .quad 0x9bdc06a725c71235,0xc19bf174cf692694
- .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
- .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
- .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
- .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
- .quad 0x983e5152ee66dfab,0xa831c66d2db43210
- .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
- .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
- .quad 0x06ca6351e003826f,0x142929670a0e6e70
- .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
- .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
- .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
- .quad 0x81c2c92e47edaee6,0x92722c851482353b
- .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
- .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
- .quad 0xd192e819d6ef5218,0xd69906245565a910
- .quad 0xf40e35855771202a,0x106aa07032bbd1b8
- .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
- .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
- .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
- .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
- .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
- .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
- .quad 0x90befffa23631e28,0xa4506cebde82bde9
- .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
- .quad 0xca273eceea26619c,0xd186b8c721c0c207
- .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
- .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
- .quad 0x113f9804bef90dae,0x1b710b35131c471b
- .quad 0x28db77f523047d84,0x32caab7b40c72493
- .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
- .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
- .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
- .quad 0 // terminator
-.size K512,.-K512
+.type .LK512,%object
+.LK512:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x06ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+.quad 0 // terminator
+.size .LK512,.-.LK512
.align 3
.LOPENSSL_armcap_P:
- .quad OPENSSL_armcap_P-.
-.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
+.quad OPENSSL_armcap_P-.
+.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2
.align 2
.comm OPENSSL_armcap_P,4,4