summaryrefslogtreecommitdiffstats
path: root/linux-aarch64/crypto/modes/ghashv8-armx.S
diff options
context:
space:
mode:
Diffstat (limited to 'linux-aarch64/crypto/modes/ghashv8-armx.S')
-rw-r--r--linux-aarch64/crypto/modes/ghashv8-armx.S115
1 files changed, 115 insertions, 0 deletions
diff --git a/linux-aarch64/crypto/modes/ghashv8-armx.S b/linux-aarch64/crypto/modes/ghashv8-armx.S
new file mode 100644
index 0000000..1bfb263
--- /dev/null
+++ b/linux-aarch64/crypto/modes/ghashv8-armx.S
@@ -0,0 +1,115 @@
+#include "arm_arch.h"
+
+.text
+.arch armv8-a+crypto
+.global gcm_init_v8
+.type gcm_init_v8,%function
+.align 4
+gcm_init_v8:
+ ld1 {v17.2d},[x1] //load H
+ movi v16.16b,#0xe1
+ ext v3.16b,v17.16b,v17.16b,#8
+ shl v16.2d,v16.2d,#57
+ ushr v18.2d,v16.2d,#63
+ ext v16.16b,v18.16b,v16.16b,#8 //t0=0xc2....01
+ dup v17.4s,v17.s[1]
+ ushr v19.2d,v3.2d,#63
+ sshr v17.4s,v17.4s,#31 //broadcast carry bit
+ and v19.16b,v19.16b,v16.16b
+ shl v3.2d,v3.2d,#1
+ ext v19.16b,v19.16b,v19.16b,#8
+ and v16.16b,v16.16b,v17.16b
+ orr v3.16b,v3.16b,v19.16b //H<<<=1
+ eor v3.16b,v3.16b,v16.16b //twisted H
+ st1 {v3.2d},[x0]
+
+ ret
+.size gcm_init_v8,.-gcm_init_v8
+
+.global gcm_gmult_v8
+.type gcm_gmult_v8,%function
+.align 4
+gcm_gmult_v8:
+ ld1 {v17.2d},[x0] //load Xi
+ movi v19.16b,#0xe1
+ ld1 {v20.2d},[x1] //load twisted H
+ shl v19.2d,v19.2d,#57
+#ifndef __ARMEB__
+ rev64 v17.16b,v17.16b
+#endif
+ ext v21.16b,v20.16b,v20.16b,#8
+ mov x3,#0
+ ext v3.16b,v17.16b,v17.16b,#8
+ mov x12,#0
+ eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing
+ mov x2,x0
+ b .Lgmult_v8
+.size gcm_gmult_v8,.-gcm_gmult_v8
+
+.global gcm_ghash_v8
+.type gcm_ghash_v8,%function
+.align 4
+gcm_ghash_v8:
+ ld1 {v0.2d},[x0] //load [rotated] Xi
+ subs x3,x3,#16
+ movi v19.16b,#0xe1
+ mov x12,#16
+ ld1 {v20.2d},[x1] //load twisted H
+ csel x12,xzr,x12,eq
+ ext v0.16b,v0.16b,v0.16b,#8
+ shl v19.2d,v19.2d,#57
+ ld1 {v17.2d},[x2],x12 //load [rotated] inp
+ ext v21.16b,v20.16b,v20.16b,#8
+#ifndef __ARMEB__
+ rev64 v0.16b,v0.16b
+ rev64 v17.16b,v17.16b
+#endif
+ eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing
+ ext v3.16b,v17.16b,v17.16b,#8
+ b .Loop_v8
+
+.align 4
+.Loop_v8:
+ ext v18.16b,v0.16b,v0.16b,#8
+ eor v3.16b,v3.16b,v0.16b //inp^=Xi
+ eor v17.16b,v17.16b,v18.16b //v17.16b is rotated inp^Xi
+
+.Lgmult_v8:
+ pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
+ eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
+ pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
+ subs x3,x3,#16
+ pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
+ csel x12,xzr,x12,eq
+
+ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ ld1 {v17.2d},[x2],x12 //load [rotated] inp
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d //1st phase
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+#ifndef __ARMEB__
+ rev64 v17.16b,v17.16b
+#endif
+ eor v0.16b,v1.16b,v18.16b
+ ext v3.16b,v17.16b,v17.16b,#8
+
+ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
+ pmull v0.1q,v0.1d,v19.1d
+ eor v18.16b,v18.16b,v2.16b
+ eor v0.16b,v0.16b,v18.16b
+ b.hs .Loop_v8
+
+#ifndef __ARMEB__
+ rev64 v0.16b,v0.16b
+#endif
+ ext v0.16b,v0.16b,v0.16b,#8
+ st1 {v0.2d},[x0] //write out Xi
+
+ ret
+.size gcm_ghash_v8,.-gcm_ghash_v8
+.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
+.align 2