diff options
author | Adam Langley <agl@google.com> | 2015-05-11 17:20:37 -0700 |
---|---|---|
committer | Kenny Root <kroot@google.com> | 2015-05-12 23:06:14 +0000 |
commit | e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5 (patch) | |
tree | 6e43e34595ecf887c26c32b86d8ab097fe8cac64 /linux-aarch64/crypto/aes | |
parent | b3106a0cc1493bbe0505c0ec0ce3da4ca90a29ae (diff) | |
download | external_boringssl-e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5.zip external_boringssl-e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5.tar.gz external_boringssl-e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5.tar.bz2 |
external/boringssl: bump revision.
This change bumps the BoringSSL revision to the current tip-of-tree.
Change-Id: I91d5bf467e16e8d86cb19a4de873985f524e5faa
Diffstat (limited to 'linux-aarch64/crypto/aes')
-rw-r--r-- | linux-aarch64/crypto/aes/aesv8-armx.S | 520 |
1 files changed, 271 insertions, 249 deletions
diff --git a/linux-aarch64/crypto/aes/aesv8-armx.S b/linux-aarch64/crypto/aes/aesv8-armx.S index e7ae46f..9c63291 100644 --- a/linux-aarch64/crypto/aes/aesv8-armx.S +++ b/linux-aarch64/crypto/aes/aesv8-armx.S @@ -6,7 +6,7 @@ .arch armv8-a+crypto #endif .align 5 -rcon: +.Lrcon: .long 0x01,0x01,0x01,0x01 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat .long 0x1b,0x1b,0x1b,0x1b @@ -31,7 +31,7 @@ aes_v8_set_encrypt_key: tst w1,#0x3f b.ne .Lenc_key_abort - adr x3,rcon + adr x3,.Lrcon cmp w1,#192 eor v0.16b,v0.16b,v0.16b @@ -55,7 +55,7 @@ aes_v8_set_encrypt_key: ext v5.16b,v0.16b,v5.16b,#12 eor v3.16b,v3.16b,v5.16b ext v5.16b,v0.16b,v5.16b,#12 - eor v6.16b,v6.16b,v1.16b + eor v6.16b,v6.16b,v1.16b eor v3.16b,v3.16b,v5.16b shl v1.16b,v1.16b,#1 eor v3.16b,v3.16b,v6.16b @@ -72,7 +72,7 @@ aes_v8_set_encrypt_key: ext v5.16b,v0.16b,v5.16b,#12 eor v3.16b,v3.16b,v5.16b ext v5.16b,v0.16b,v5.16b,#12 - eor v6.16b,v6.16b,v1.16b + eor v6.16b,v6.16b,v1.16b eor v3.16b,v3.16b,v5.16b shl v1.16b,v1.16b,#1 eor v3.16b,v3.16b,v6.16b @@ -86,7 +86,7 @@ aes_v8_set_encrypt_key: ext v5.16b,v0.16b,v5.16b,#12 eor v3.16b,v3.16b,v5.16b ext v5.16b,v0.16b,v5.16b,#12 - eor v6.16b,v6.16b,v1.16b + eor v6.16b,v6.16b,v1.16b eor v3.16b,v3.16b,v5.16b eor v3.16b,v3.16b,v6.16b st1 {v3.4s},[x2] @@ -117,7 +117,7 @@ aes_v8_set_encrypt_key: dup v5.4s,v3.s[3] eor v5.16b,v5.16b,v4.16b - eor v6.16b,v6.16b,v1.16b + eor v6.16b,v6.16b,v1.16b ext v4.16b,v0.16b,v4.16b,#12 shl v1.16b,v1.16b,#1 eor v4.16b,v4.16b,v5.16b @@ -148,7 +148,7 @@ aes_v8_set_encrypt_key: ext v5.16b,v0.16b,v5.16b,#12 eor v3.16b,v3.16b,v5.16b ext v5.16b,v0.16b,v5.16b,#12 - eor v6.16b,v6.16b,v1.16b + eor v6.16b,v6.16b,v1.16b eor v3.16b,v3.16b,v5.16b shl v1.16b,v1.16b,#1 eor v3.16b,v3.16b,v6.16b @@ -229,17 +229,17 @@ aes_v8_encrypt: .Loop_enc: aese v2.16b,v0.16b - ld1 {v0.4s},[x2],#16 aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 subs w3,w3,#2 aese v2.16b,v1.16b - ld1 {v1.4s},[x2],#16 aesmc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 b.gt .Loop_enc aese v2.16b,v0.16b - ld1 {v0.4s},[x2] aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2] aese v2.16b,v1.16b eor v2.16b,v2.16b,v0.16b @@ -258,17 +258,17 @@ aes_v8_decrypt: .Loop_dec: aesd v2.16b,v0.16b - ld1 {v0.4s},[x2],#16 aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 subs w3,w3,#2 aesd v2.16b,v1.16b - ld1 {v1.4s},[x2],#16 aesimc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 b.gt .Loop_dec aesd v2.16b,v0.16b - ld1 {v0.4s},[x2] aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2] aesd v2.16b,v1.16b eor v2.16b,v2.16b,v0.16b @@ -292,13 +292,13 @@ aes_v8_cbc_encrypt: ld1 {v6.16b},[x4] ld1 {v0.16b},[x0],x8 - ld1 {v16.4s-v17.4s},[x3] // load key schedule... + ld1 {v16.4s,v17.4s},[x3] // load key schedule... sub w5,w5,#6 add x7,x3,x5,lsl#4 // pointer to last 7 round keys sub w5,w5,#2 - ld1 {v18.4s-v19.4s},[x7],#32 - ld1 {v20.4s-v21.4s},[x7],#32 - ld1 {v22.4s-v23.4s},[x7],#32 + ld1 {v18.4s,v19.4s},[x7],#32 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 ld1 {v7.4s},[x7] add x7,x3,#32 @@ -310,76 +310,99 @@ aes_v8_cbc_encrypt: eor v5.16b,v16.16b,v7.16b b.eq .Lcbc_enc128 + ld1 {v2.4s,v3.4s},[x7] + add x7,x3,#16 + add x6,x3,#16*4 + add x12,x3,#16*5 + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + add x14,x3,#16*6 + add x3,x3,#16*7 + b .Lenter_cbc_enc + +.align 4 .Loop_cbc_enc: aese v0.16b,v16.16b - ld1 {v16.4s},[x7],#16 aesmc v0.16b,v0.16b - subs w6,w6,#2 + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc: aese v0.16b,v17.16b - ld1 {v17.4s},[x7],#16 aesmc v0.16b,v0.16b - b.gt .Loop_cbc_enc + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x6] + cmp w5,#4 + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x12] + b.eq .Lcbc_enc192 aese v0.16b,v16.16b aesmc v0.16b,v0.16b - subs x2,x2,#16 + ld1 {v16.4s},[x14] aese v0.16b,v17.16b aesmc v0.16b,v0.16b - csel x8,xzr,x8,eq + ld1 {v17.4s},[x3] + nop + +.Lcbc_enc192: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq aese v0.16b,v18.16b aesmc v0.16b,v0.16b - add x7,x3,#16 aese v0.16b,v19.16b aesmc v0.16b,v0.16b - ld1 {v16.16b},[x0],x8 + ld1 {v16.16b},[x0],x8 aese v0.16b,v20.16b aesmc v0.16b,v0.16b - eor v16.16b,v16.16b,v5.16b + eor v16.16b,v16.16b,v5.16b aese v0.16b,v21.16b aesmc v0.16b,v0.16b - ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + ld1 {v17.4s},[x7] // re-pre-load rndkey[1] aese v0.16b,v22.16b aesmc v0.16b,v0.16b aese v0.16b,v23.16b - - mov w6,w5 eor v6.16b,v0.16b,v7.16b - st1 {v6.16b},[x1],#16 b.hs .Loop_cbc_enc + st1 {v6.16b},[x1],#16 b .Lcbc_done .align 5 .Lcbc_enc128: - ld1 {v2.4s-v3.4s},[x7] + ld1 {v2.4s,v3.4s},[x7] aese v0.16b,v16.16b aesmc v0.16b,v0.16b b .Lenter_cbc_enc128 .Loop_cbc_enc128: aese v0.16b,v16.16b aesmc v0.16b,v0.16b - st1 {v6.16b},[x1],#16 + st1 {v6.16b},[x1],#16 .Lenter_cbc_enc128: aese v0.16b,v17.16b aesmc v0.16b,v0.16b - subs x2,x2,#16 + subs x2,x2,#16 aese v0.16b,v2.16b aesmc v0.16b,v0.16b - csel x8,xzr,x8,eq + csel x8,xzr,x8,eq aese v0.16b,v3.16b aesmc v0.16b,v0.16b aese v0.16b,v18.16b aesmc v0.16b,v0.16b aese v0.16b,v19.16b aesmc v0.16b,v0.16b - ld1 {v16.16b},[x0],x8 + ld1 {v16.16b},[x0],x8 aese v0.16b,v20.16b aesmc v0.16b,v0.16b aese v0.16b,v21.16b aesmc v0.16b,v0.16b aese v0.16b,v22.16b aesmc v0.16b,v0.16b - eor v16.16b,v16.16b,v5.16b + eor v16.16b,v16.16b,v5.16b aese v0.16b,v23.16b eor v6.16b,v0.16b,v7.16b b.hs .Loop_cbc_enc128 @@ -404,81 +427,80 @@ aes_v8_cbc_encrypt: .Loop3x_cbc_dec: aesd v0.16b,v16.16b - aesd v1.16b,v16.16b - aesd v18.16b,v16.16b - ld1 {v16.4s},[x7],#16 aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 subs w6,w6,#2 aesd v0.16b,v17.16b - aesd v1.16b,v17.16b - aesd v18.16b,v17.16b - ld1 {v17.4s},[x7],#16 aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 b.gt .Loop3x_cbc_dec aesd v0.16b,v16.16b - aesd v1.16b,v16.16b - aesd v18.16b,v16.16b - eor v4.16b,v6.16b,v7.16b aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b aesimc v18.16b,v18.16b - eor v5.16b,v2.16b,v7.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 + eor v5.16b,v2.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point aesd v0.16b,v17.16b - aesd v1.16b,v17.16b - aesd v18.16b,v17.16b - eor v17.16b,v3.16b,v7.16b - subs x2,x2,#0x30 aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b aesimc v18.16b,v18.16b - orr v6.16b,v19.16b,v19.16b - csel x6,x2,x6,lo // x6, w6, is zero at this point - aesd v0.16b,v20.16b - aesd v1.16b,v20.16b - aesd v18.16b,v20.16b - add x0,x0,x6 // x0 is adjusted in such way that + eor v17.16b,v3.16b,v7.16b + add x0,x0,x6 // x0 is adjusted in such way that // at exit from the loop v1.16b-v18.16b // are loaded with last "words" + orr v6.16b,v19.16b,v19.16b + mov x7,x3 + aesd v0.16b,v20.16b aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b aesimc v18.16b,v18.16b - mov x7,x3 + ld1 {v2.16b},[x0],#16 aesd v0.16b,v21.16b - aesd v1.16b,v21.16b - aesd v18.16b,v21.16b - ld1 {v2.16b},[x0],#16 aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b aesimc v18.16b,v18.16b - ld1 {v3.16b},[x0],#16 + ld1 {v3.16b},[x0],#16 aesd v0.16b,v22.16b - aesd v1.16b,v22.16b - aesd v18.16b,v22.16b - ld1 {v19.16b},[x0],#16 aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b aesimc v18.16b,v18.16b - ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + ld1 {v19.16b},[x0],#16 aesd v0.16b,v23.16b aesd v1.16b,v23.16b aesd v18.16b,v23.16b - - add w6,w5,#2 + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + add w6,w5,#2 eor v4.16b,v4.16b,v0.16b eor v5.16b,v5.16b,v1.16b eor v18.16b,v18.16b,v17.16b - ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] - orr v0.16b,v2.16b,v2.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] st1 {v4.16b},[x1],#16 - orr v1.16b,v3.16b,v3.16b + orr v0.16b,v2.16b,v2.16b st1 {v5.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b st1 {v18.16b},[x1],#16 - orr v18.16b,v19.16b,v19.16b + orr v18.16b,v19.16b,v19.16b b.hs .Loop3x_cbc_dec cmn x2,#0x30 @@ -487,54 +509,54 @@ aes_v8_cbc_encrypt: .Lcbc_dec_tail: aesd v1.16b,v16.16b - aesd v18.16b,v16.16b - ld1 {v16.4s},[x7],#16 aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 subs w6,w6,#2 aesd v1.16b,v17.16b - aesd v18.16b,v17.16b - ld1 {v17.4s},[x7],#16 aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 b.gt .Lcbc_dec_tail aesd v1.16b,v16.16b - aesd v18.16b,v16.16b aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b aesimc v18.16b,v18.16b aesd v1.16b,v17.16b - aesd v18.16b,v17.16b aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b aesimc v18.16b,v18.16b aesd v1.16b,v20.16b - aesd v18.16b,v20.16b aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b aesimc v18.16b,v18.16b - cmn x2,#0x20 + cmn x2,#0x20 aesd v1.16b,v21.16b - aesd v18.16b,v21.16b aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b aesimc v18.16b,v18.16b - eor v5.16b,v6.16b,v7.16b + eor v5.16b,v6.16b,v7.16b aesd v1.16b,v22.16b - aesd v18.16b,v22.16b aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b aesimc v18.16b,v18.16b - eor v17.16b,v3.16b,v7.16b + eor v17.16b,v3.16b,v7.16b aesd v1.16b,v23.16b aesd v18.16b,v23.16b b.eq .Lcbc_dec_one eor v5.16b,v5.16b,v1.16b eor v17.16b,v17.16b,v18.16b - orr v6.16b,v19.16b,v19.16b + orr v6.16b,v19.16b,v19.16b st1 {v5.16b},[x1],#16 st1 {v17.16b},[x1],#16 b .Lcbc_done .Lcbc_dec_one: eor v5.16b,v5.16b,v18.16b - orr v6.16b,v19.16b,v19.16b + orr v6.16b,v19.16b,v19.16b st1 {v5.16b},[x1],#16 .Lcbc_done: @@ -547,181 +569,181 @@ aes_v8_cbc_encrypt: .type aes_v8_ctr32_encrypt_blocks,%function .align 5 aes_v8_ctr32_encrypt_blocks: - stp x29,x30,[sp,#-16]! - add x29,sp,#0 - ldr w5,[x3,#240] - - ldr w8, [x4, #12] - ld1 {v0.4s},[x4] - - ld1 {v16.4s-v17.4s},[x3] // load key schedule... - sub w5,w5,#4 - mov x12,#16 - cmp x2,#2 - add x7,x3,x5,lsl#4 // pointer to last 5 round keys - sub w5,w5,#2 - ld1 {v20.4s-v21.4s},[x7],#32 - ld1 {v22.4s-v23.4s},[x7],#32 - ld1 {v7.4s},[x7] - add x7,x3,#32 - mov w6,w5 + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + ldr w5,[x3,#240] + + ldr w8, [x4, #12] + ld1 {v0.4s},[x4] + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#4 + mov x12,#16 + cmp x2,#2 + add x7,x3,x5,lsl#4 // pointer to last 5 round keys + sub w5,w5,#2 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + add x7,x3,#32 + mov w6,w5 csel x12,xzr,x12,lo #ifndef __ARMEB__ - rev w8, w8 + rev w8, w8 #endif - orr v1.16b,v0.16b,v0.16b - add w10, w8, #1 - orr v18.16b,v0.16b,v0.16b - add w8, w8, #2 - orr v6.16b,v0.16b,v0.16b - rev w10, w10 - mov v1.s[3],w10 - b.ls .Lctr32_tail - rev w12, w8 - sub x2,x2,#3 // bias - mov v18.s[3],w12 - b .Loop3x_ctr32 + orr v1.16b,v0.16b,v0.16b + add w10, w8, #1 + orr v18.16b,v0.16b,v0.16b + add w8, w8, #2 + orr v6.16b,v0.16b,v0.16b + rev w10, w10 + mov v1.s[3],w10 + b.ls .Lctr32_tail + rev w12, w8 + sub x2,x2,#3 // bias + mov v18.s[3],w12 + b .Loop3x_ctr32 .align 4 .Loop3x_ctr32: - aese v0.16b,v16.16b - aese v1.16b,v16.16b - aese v18.16b,v16.16b - ld1 {v16.4s},[x7],#16 - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - aesmc v18.16b,v18.16b - subs w6,w6,#2 - aese v0.16b,v17.16b - aese v1.16b,v17.16b - aese v18.16b,v17.16b - ld1 {v17.4s},[x7],#16 - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - aesmc v18.16b,v18.16b - b.gt .Loop3x_ctr32 - - aese v0.16b,v16.16b - aese v1.16b,v16.16b - aese v18.16b,v16.16b - mov x7,x3 - aesmc v4.16b,v0.16b - ld1 {v2.16b},[x0],#16 - aesmc v5.16b,v1.16b - aesmc v18.16b,v18.16b - orr v0.16b,v6.16b,v6.16b - aese v4.16b,v17.16b - ld1 {v3.16b},[x0],#16 - aese v5.16b,v17.16b - aese v18.16b,v17.16b - orr v1.16b,v6.16b,v6.16b - aesmc v4.16b,v4.16b - ld1 {v19.16b},[x0],#16 - aesmc v5.16b,v5.16b - aesmc v17.16b,v18.16b - orr v18.16b,v6.16b,v6.16b - add w9,w8,#1 - aese v4.16b,v20.16b - aese v5.16b,v20.16b - aese v17.16b,v20.16b - eor v2.16b,v2.16b,v7.16b - add w10,w8,#2 - aesmc v4.16b,v4.16b - aesmc v5.16b,v5.16b - aesmc v17.16b,v17.16b - eor v3.16b,v3.16b,v7.16b - add w8,w8,#3 - aese v4.16b,v21.16b - aese v5.16b,v21.16b - aese v17.16b,v21.16b - eor v19.16b,v19.16b,v7.16b - rev w9,w9 - aesmc v4.16b,v4.16b - ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] - aesmc v5.16b,v5.16b - aesmc v17.16b,v17.16b - mov v0.s[3], w9 - rev w10,w10 - aese v4.16b,v22.16b - aese v5.16b,v22.16b - aese v17.16b,v22.16b - mov v1.s[3], w10 - rev w12,w8 - aesmc v4.16b,v4.16b - aesmc v5.16b,v5.16b - aesmc v17.16b,v17.16b - mov v18.s[3], w12 - subs x2,x2,#3 - aese v4.16b,v23.16b - aese v5.16b,v23.16b - aese v17.16b,v23.16b - - mov w6,w5 - eor v2.16b,v2.16b,v4.16b - eor v3.16b,v3.16b,v5.16b - eor v19.16b,v19.16b,v17.16b - ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] - st1 {v2.16b},[x1],#16 - st1 {v3.16b},[x1],#16 - st1 {v19.16b},[x1],#16 - b.hs .Loop3x_ctr32 - - adds x2,x2,#3 - b.eq .Lctr32_done - cmp x2,#1 - mov x12,#16 + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v18.16b,v17.16b + aesmc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop3x_ctr32 + + aese v0.16b,v16.16b + aesmc v4.16b,v0.16b + aese v1.16b,v16.16b + aesmc v5.16b,v1.16b + ld1 {v2.16b},[x0],#16 + orr v0.16b,v6.16b,v6.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v3.16b},[x0],#16 + orr v1.16b,v6.16b,v6.16b + aese v4.16b,v17.16b + aesmc v4.16b,v4.16b + aese v5.16b,v17.16b + aesmc v5.16b,v5.16b + ld1 {v19.16b},[x0],#16 + mov x7,x3 + aese v18.16b,v17.16b + aesmc v17.16b,v18.16b + orr v18.16b,v6.16b,v6.16b + add w9,w8,#1 + aese v4.16b,v20.16b + aesmc v4.16b,v4.16b + aese v5.16b,v20.16b + aesmc v5.16b,v5.16b + eor v2.16b,v2.16b,v7.16b + add w10,w8,#2 + aese v17.16b,v20.16b + aesmc v17.16b,v17.16b + eor v3.16b,v3.16b,v7.16b + add w8,w8,#3 + aese v4.16b,v21.16b + aesmc v4.16b,v4.16b + aese v5.16b,v21.16b + aesmc v5.16b,v5.16b + eor v19.16b,v19.16b,v7.16b + rev w9,w9 + aese v17.16b,v21.16b + aesmc v17.16b,v17.16b + mov v0.s[3], w9 + rev w10,w10 + aese v4.16b,v22.16b + aesmc v4.16b,v4.16b + aese v5.16b,v22.16b + aesmc v5.16b,v5.16b + mov v1.s[3], w10 + rev w12,w8 + aese v17.16b,v22.16b + aesmc v17.16b,v17.16b + mov v18.s[3], w12 + subs x2,x2,#3 + aese v4.16b,v23.16b + aese v5.16b,v23.16b + aese v17.16b,v23.16b + + eor v2.16b,v2.16b,v4.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + st1 {v2.16b},[x1],#16 + eor v3.16b,v3.16b,v5.16b + mov w6,w5 + st1 {v3.16b},[x1],#16 + eor v19.16b,v19.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v19.16b},[x1],#16 + b.hs .Loop3x_ctr32 + + adds x2,x2,#3 + b.eq .Lctr32_done + cmp x2,#1 + mov x12,#16 csel x12,xzr,x12,eq .Lctr32_tail: - aese v0.16b,v16.16b - aese v1.16b,v16.16b - ld1 {v16.4s},[x7],#16 - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - subs w6,w6,#2 - aese v0.16b,v17.16b - aese v1.16b,v17.16b - ld1 {v17.4s},[x7],#16 - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - b.gt .Lctr32_tail - - aese v0.16b,v16.16b - aese v1.16b,v16.16b - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - aese v0.16b,v17.16b - aese v1.16b,v17.16b - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - ld1 {v2.16b},[x0],x12 - aese v0.16b,v20.16b - aese v1.16b,v20.16b - ld1 {v3.16b},[x0] - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - aese v0.16b,v21.16b - aese v1.16b,v21.16b - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - aese v0.16b,v22.16b - aese v1.16b,v22.16b - eor v2.16b,v2.16b,v7.16b - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - eor v3.16b,v3.16b,v7.16b - aese v0.16b,v23.16b - aese v1.16b,v23.16b - - cmp x2,#1 - eor v2.16b,v2.16b,v0.16b - eor v3.16b,v3.16b,v1.16b - st1 {v2.16b},[x1],#16 - b.eq .Lctr32_done - st1 {v3.16b},[x1] + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lctr32_tail + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v2.16b},[x0],x12 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + ld1 {v3.16b},[x0] + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + eor v2.16b,v2.16b,v7.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + eor v3.16b,v3.16b,v7.16b + aese v0.16b,v23.16b + aese v1.16b,v23.16b + + cmp x2,#1 + eor v2.16b,v2.16b,v0.16b + eor v3.16b,v3.16b,v1.16b + st1 {v2.16b},[x1],#16 + b.eq .Lctr32_done + st1 {v3.16b},[x1] .Lctr32_done: - ldr x29,[sp],#16 + ldr x29,[sp],#16 ret .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks #endif |