diff options
Diffstat (limited to 'src/crypto/sha/asm/sha1-armv4-large.pl')
-rw-r--r-- | src/crypto/sha/asm/sha1-armv4-large.pl | 23 |
1 files changed, 20 insertions, 3 deletions
diff --git a/src/crypto/sha/asm/sha1-armv4-large.pl b/src/crypto/sha/asm/sha1-armv4-large.pl index 1ffa041..a20d336 100644 --- a/src/crypto/sha/asm/sha1-armv4-large.pl +++ b/src/crypto/sha/asm/sha1-armv4-large.pl @@ -60,14 +60,28 @@ # is ~2.5x larger and there are some redundant instructions executed # when processing last block, improvement is not as big for smallest # blocks, only ~30%. Snapdragon S4 is a tad faster, 6.4 cycles per -# byte, which is also >80% faster than integer-only code. +# byte, which is also >80% faster than integer-only code. Cortex-A15 +# is even faster spending 5.6 cycles per byte outperforming integer- +# only code by factor of 2. # May 2014. # # Add ARMv8 code path performing at 2.35 cpb on Apple A7. -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $ctx="r0"; $inp="r1"; @@ -178,6 +192,9 @@ sha1_block_data_order: sub r3,pc,#8 @ sha1_block_data_order ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P +#ifdef __APPLE__ + ldr r12,[r12] +#endif tst r12,#ARMV8_SHA1 bne .LARMv8 tst r12,#ARMV7_NEON |