summaryrefslogtreecommitdiffstats
path: root/src/crypto/sha/asm/sha1-armv4-large.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/sha/asm/sha1-armv4-large.pl')
-rw-r--r--src/crypto/sha/asm/sha1-armv4-large.pl23
1 files changed, 20 insertions, 3 deletions
diff --git a/src/crypto/sha/asm/sha1-armv4-large.pl b/src/crypto/sha/asm/sha1-armv4-large.pl
index 1ffa041..a20d336 100644
--- a/src/crypto/sha/asm/sha1-armv4-large.pl
+++ b/src/crypto/sha/asm/sha1-armv4-large.pl
@@ -60,14 +60,28 @@
# is ~2.5x larger and there are some redundant instructions executed
# when processing last block, improvement is not as big for smallest
# blocks, only ~30%. Snapdragon S4 is a tad faster, 6.4 cycles per
-# byte, which is also >80% faster than integer-only code.
+# byte, which is also >80% faster than integer-only code. Cortex-A15
+# is even faster spending 5.6 cycles per byte outperforming integer-
+# only code by factor of 2.
# May 2014.
#
# Add ARMv8 code path performing at 2.35 cpb on Apple A7.
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ die "can't locate arm-xlate.pl";
+
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+ open STDOUT,">$output";
+}
$ctx="r0";
$inp="r1";
@@ -178,6 +192,9 @@ sha1_block_data_order:
sub r3,pc,#8 @ sha1_block_data_order
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
+#ifdef __APPLE__
+ ldr r12,[r12]
+#endif
tst r12,#ARMV8_SHA1
bne .LARMv8
tst r12,#ARMV7_NEON