summaryrefslogtreecommitdiffstats
path: root/src/crypto/modes/asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/modes/asm')
-rw-r--r--src/crypto/modes/asm/ghash-armv4.pl10
-rw-r--r--src/crypto/modes/asm/ghash-x86.pl2
-rw-r--r--src/crypto/modes/asm/ghash-x86_64.pl8
-rw-r--r--src/crypto/modes/asm/ghashv8-armx.pl24
4 files changed, 22 insertions, 22 deletions
diff --git a/src/crypto/modes/asm/ghash-armv4.pl b/src/crypto/modes/asm/ghash-armv4.pl
index dc5b99e..25a4e27 100644
--- a/src/crypto/modes/asm/ghash-armv4.pl
+++ b/src/crypto/modes/asm/ghash-armv4.pl
@@ -45,7 +45,7 @@
# processes one byte in 8.45 cycles, A9 - in 10.2, A15 - in 7.63,
# Snapdragon S4 - in 9.33.
#
-# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
# Polynomial Multiplication on ARM Processors using the NEON Engine.
#
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
@@ -134,7 +134,7 @@ ___
$code=<<___;
#if defined(__arm__)
-#include <openssl/arm_arch.h>
+#include "arm_arch.h"
.syntax unified
@@ -457,12 +457,12 @@ gcm_ghash_neon:
veor $IN,$Xl @ inp^=Xi
.Lgmult_neon:
___
- &clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo
+ &clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo
$code.=<<___;
veor $IN#lo,$IN#lo,$IN#hi @ Karatsuba pre-processing
___
- &clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi)
- &clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi
+ &clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ &clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi
$code.=<<___;
veor $Xm,$Xm,$Xl @ Karatsuba post-processing
veor $Xm,$Xm,$Xh
diff --git a/src/crypto/modes/asm/ghash-x86.pl b/src/crypto/modes/asm/ghash-x86.pl
index 0269169..23a5527 100644
--- a/src/crypto/modes/asm/ghash-x86.pl
+++ b/src/crypto/modes/asm/ghash-x86.pl
@@ -358,7 +358,7 @@ $S=12; # shift factor for rem_4bit
# effective address calculation and finally merge of value to Z.hi.
# Reference to rem_4bit is scheduled so late that I had to >>4
# rem_4bit elements. This resulted in 20-45% procent improvement
-# on contemporary µ-archs.
+# on contemporary µ-archs.
{
my $cnt;
my $rem_4bit = "eax";
diff --git a/src/crypto/modes/asm/ghash-x86_64.pl b/src/crypto/modes/asm/ghash-x86_64.pl
index 5a7ce39..6e656ca 100644
--- a/src/crypto/modes/asm/ghash-x86_64.pl
+++ b/src/crypto/modes/asm/ghash-x86_64.pl
@@ -576,15 +576,15 @@ $code.=<<___ if (0 || (&reduction_alg9($Xhi,$Xi)&&0));
# experimental alternative. special thing about is that there
# no dependency between the two multiplications...
mov \$`0xE1<<1`,%eax
- mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff
+ mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff
mov \$0x07,%r11d
movq %rax,$T1
movq %r10,$T2
movq %r11,$T3 # borrow $T3
pand $Xi,$T3
- pshufb $T3,$T2 # ($Xi&7)·0xE0
+ pshufb $T3,$T2 # ($Xi&7)·0xE0
movq %rax,$T3
- pclmulqdq \$0x00,$Xi,$T1 # ·(0xE1<<1)
+ pclmulqdq \$0x00,$Xi,$T1 # ·(0xE1<<1)
pxor $Xi,$T2
pslldq \$15,$T2
paddd $T2,$T2 # <<(64+56+1)
@@ -657,7 +657,7 @@ $code.=<<___;
je .Lskip4x
sub \$0x30,$len
- mov \$0xA040608020C0E000,%rax # ((7..0)·0xE0)&0xff
+ mov \$0xA040608020C0E000,%rax # ((7..0)·0xE0)&0xff
movdqu 0x30($Htbl),$Hkey3
movdqu 0x40($Htbl),$Hkey4
diff --git a/src/crypto/modes/asm/ghashv8-armx.pl b/src/crypto/modes/asm/ghashv8-armx.pl
index 3a7b8d8..686951f 100644
--- a/src/crypto/modes/asm/ghashv8-armx.pl
+++ b/src/crypto/modes/asm/ghashv8-armx.pl
@@ -54,7 +54,7 @@ my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3));
my ($t0,$t1,$t2,$xC2,$H,$Hhl,$H2)=map("q$_",(8..14));
$code=<<___;
-#include <openssl/arm_arch.h>
+#include "arm_arch.h"
.text
___
@@ -148,10 +148,10 @@ gcm_gmult_v8:
#endif
vext.8 $IN,$t1,$t1,#8
- vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
+ vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
veor $t1,$t1,$IN @ Karatsuba pre-processing
- vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
- vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
+ vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
veor $t2,$Xl,$Xh
@@ -239,7 +239,7 @@ $code.=<<___;
#endif
vext.8 $In,$t1,$t1,#8
veor $IN,$IN,$Xl @ I[i]^=Xi
- vpmull.p64 $Xln,$H,$In @ H·Ii+1
+ vpmull.p64 $Xln,$H,$In @ H·Ii+1
veor $t1,$t1,$In @ Karatsuba pre-processing
vpmull2.p64 $Xhn,$H,$In
b .Loop_mod2x_v8
@@ -248,14 +248,14 @@ $code.=<<___;
.Loop_mod2x_v8:
vext.8 $t2,$IN,$IN,#8
subs $len,$len,#32 @ is there more data?
- vpmull.p64 $Xl,$H2,$IN @ H^2.lo·Xi.lo
+ vpmull.p64 $Xl,$H2,$IN @ H^2.lo·Xi.lo
cclr $inc,lo @ is it time to zero $inc?
vpmull.p64 $Xmn,$Hhl,$t1
veor $t2,$t2,$IN @ Karatsuba pre-processing
- vpmull2.p64 $Xh,$H2,$IN @ H^2.hi·Xi.hi
+ vpmull2.p64 $Xh,$H2,$IN @ H^2.hi·Xi.hi
veor $Xl,$Xl,$Xln @ accumulate
- vpmull2.p64 $Xm,$Hhl,$t2 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
+ vpmull2.p64 $Xm,$Hhl,$t2 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {$t0},[$inp],$inc @ load [rotated] I[i+2]
veor $Xh,$Xh,$Xhn
@@ -280,7 +280,7 @@ $code.=<<___;
vext.8 $In,$t1,$t1,#8
vext.8 $IN,$t0,$t0,#8
veor $Xl,$Xm,$t2
- vpmull.p64 $Xln,$H,$In @ H·Ii+1
+ vpmull.p64 $Xln,$H,$In @ H·Ii+1
veor $IN,$IN,$Xh @ accumulate $IN early
vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
@@ -304,10 +304,10 @@ $code.=<<___;
veor $IN,$IN,$Xl @ inp^=Xi
veor $t1,$t0,$t2 @ $t1 is rotated inp^Xi
- vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
+ vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
veor $t1,$t1,$IN @ Karatsuba pre-processing
- vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
- vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
+ vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
veor $t2,$Xl,$Xh