summaryrefslogtreecommitdiffstats
path: root/linux-x86_64/crypto/rc4
diff options
context:
space:
mode:
authorAdam Langley <agl@google.com>2015-01-22 14:27:53 -0800
committerAdam Langley <agl@google.com>2015-01-30 16:52:14 -0800
commitd9e397b599b13d642138480a28c14db7a136bf05 (patch)
tree34bab61dc4ce323b123ad4614dbc07e86ea2f9ef /linux-x86_64/crypto/rc4
downloadexternal_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.zip
external_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.tar.gz
external_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.tar.bz2
Initial commit of BoringSSL for Android.
Diffstat (limited to 'linux-x86_64/crypto/rc4')
-rw-r--r--linux-x86_64/crypto/rc4/rc4-md5-x86_64.S1262
-rw-r--r--linux-x86_64/crypto/rc4/rc4-x86_64.S622
2 files changed, 1884 insertions, 0 deletions
diff --git a/linux-x86_64/crypto/rc4/rc4-md5-x86_64.S b/linux-x86_64/crypto/rc4/rc4-md5-x86_64.S
new file mode 100644
index 0000000..06c8d67
--- /dev/null
+++ b/linux-x86_64/crypto/rc4/rc4-md5-x86_64.S
@@ -0,0 +1,1262 @@
+#if defined(__x86_64__)
+.text
+.align 16
+
+.globl rc4_md5_enc
+.hidden rc4_md5_enc
+.type rc4_md5_enc,@function
+rc4_md5_enc:
+ cmpq $0,%r9
+ je .Labort
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $40,%rsp
+.Lbody:
+ movq %rcx,%r11
+ movq %r9,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %r8,%r15
+ xorq %rbp,%rbp
+ xorq %rcx,%rcx
+
+ leaq 8(%rdi),%rdi
+ movb -8(%rdi),%bpl
+ movb -4(%rdi),%cl
+
+ incb %bpl
+ subq %r13,%r14
+ movl (%rdi,%rbp,4),%eax
+ addb %al,%cl
+ leaq (%rdi,%rbp,4),%rsi
+ shlq $6,%r12
+ addq %r15,%r12
+ movq %r12,16(%rsp)
+
+ movq %r11,24(%rsp)
+ movl 0(%r11),%r8d
+ movl 4(%r11),%r9d
+ movl 8(%r11),%r10d
+ movl 12(%r11),%r11d
+ jmp .Loop
+
+.align 16
+.Loop:
+ movl %r8d,0(%rsp)
+ movl %r9d,4(%rsp)
+ movl %r10d,8(%rsp)
+ movl %r11d,%r12d
+ movl %r11d,12(%rsp)
+ pxor %xmm0,%xmm0
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 0(%r15),%r8d
+ addb %dl,%al
+ movl 4(%rsi),%ebx
+ addl $3614090360,%r8d
+ xorl %r11d,%r12d
+ movzbl %al,%eax
+ movl %edx,0(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $7,%r8d
+ movl %r10d,%r12d
+ movd (%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ pxor %xmm1,%xmm1
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 4(%r15),%r11d
+ addb %dl,%bl
+ movl 8(%rsi),%eax
+ addl $3905402710,%r11d
+ xorl %r10d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,4(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $12,%r11d
+ movl %r9d,%r12d
+ movd (%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 8(%r15),%r10d
+ addb %dl,%al
+ movl 12(%rsi),%ebx
+ addl $606105819,%r10d
+ xorl %r9d,%r12d
+ movzbl %al,%eax
+ movl %edx,8(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $17,%r10d
+ movl %r8d,%r12d
+ pinsrw $1,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 12(%r15),%r9d
+ addb %dl,%bl
+ movl 16(%rsi),%eax
+ addl $3250441966,%r9d
+ xorl %r8d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,12(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $22,%r9d
+ movl %r11d,%r12d
+ pinsrw $1,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 16(%r15),%r8d
+ addb %dl,%al
+ movl 20(%rsi),%ebx
+ addl $4118548399,%r8d
+ xorl %r11d,%r12d
+ movzbl %al,%eax
+ movl %edx,16(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $7,%r8d
+ movl %r10d,%r12d
+ pinsrw $2,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 20(%r15),%r11d
+ addb %dl,%bl
+ movl 24(%rsi),%eax
+ addl $1200080426,%r11d
+ xorl %r10d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,20(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $12,%r11d
+ movl %r9d,%r12d
+ pinsrw $2,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 24(%r15),%r10d
+ addb %dl,%al
+ movl 28(%rsi),%ebx
+ addl $2821735955,%r10d
+ xorl %r9d,%r12d
+ movzbl %al,%eax
+ movl %edx,24(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $17,%r10d
+ movl %r8d,%r12d
+ pinsrw $3,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 28(%r15),%r9d
+ addb %dl,%bl
+ movl 32(%rsi),%eax
+ addl $4249261313,%r9d
+ xorl %r8d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,28(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $22,%r9d
+ movl %r11d,%r12d
+ pinsrw $3,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 32(%r15),%r8d
+ addb %dl,%al
+ movl 36(%rsi),%ebx
+ addl $1770035416,%r8d
+ xorl %r11d,%r12d
+ movzbl %al,%eax
+ movl %edx,32(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $7,%r8d
+ movl %r10d,%r12d
+ pinsrw $4,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 36(%r15),%r11d
+ addb %dl,%bl
+ movl 40(%rsi),%eax
+ addl $2336552879,%r11d
+ xorl %r10d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,36(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $12,%r11d
+ movl %r9d,%r12d
+ pinsrw $4,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 40(%r15),%r10d
+ addb %dl,%al
+ movl 44(%rsi),%ebx
+ addl $4294925233,%r10d
+ xorl %r9d,%r12d
+ movzbl %al,%eax
+ movl %edx,40(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $17,%r10d
+ movl %r8d,%r12d
+ pinsrw $5,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 44(%r15),%r9d
+ addb %dl,%bl
+ movl 48(%rsi),%eax
+ addl $2304563134,%r9d
+ xorl %r8d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,44(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $22,%r9d
+ movl %r11d,%r12d
+ pinsrw $5,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 48(%r15),%r8d
+ addb %dl,%al
+ movl 52(%rsi),%ebx
+ addl $1804603682,%r8d
+ xorl %r11d,%r12d
+ movzbl %al,%eax
+ movl %edx,48(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $7,%r8d
+ movl %r10d,%r12d
+ pinsrw $6,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 52(%r15),%r11d
+ addb %dl,%bl
+ movl 56(%rsi),%eax
+ addl $4254626195,%r11d
+ xorl %r10d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,52(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $12,%r11d
+ movl %r9d,%r12d
+ pinsrw $6,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 56(%r15),%r10d
+ addb %dl,%al
+ movl 60(%rsi),%ebx
+ addl $2792965006,%r10d
+ xorl %r9d,%r12d
+ movzbl %al,%eax
+ movl %edx,56(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $17,%r10d
+ movl %r8d,%r12d
+ pinsrw $7,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movdqu (%r13),%xmm2
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 60(%r15),%r9d
+ addb %dl,%bl
+ movl 64(%rsi),%eax
+ addl $1236535329,%r9d
+ xorl %r8d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,60(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $22,%r9d
+ movl %r10d,%r12d
+ pinsrw $7,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm1,%xmm2
+ pxor %xmm0,%xmm0
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 4(%r15),%r8d
+ addb %dl,%al
+ movl 68(%rsi),%ebx
+ addl $4129170786,%r8d
+ xorl %r10d,%r12d
+ movzbl %al,%eax
+ movl %edx,64(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $5,%r8d
+ movl %r9d,%r12d
+ movd (%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ pxor %xmm1,%xmm1
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 24(%r15),%r11d
+ addb %dl,%bl
+ movl 72(%rsi),%eax
+ addl $3225465664,%r11d
+ xorl %r9d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,68(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $9,%r11d
+ movl %r8d,%r12d
+ movd (%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 44(%r15),%r10d
+ addb %dl,%al
+ movl 76(%rsi),%ebx
+ addl $643717713,%r10d
+ xorl %r8d,%r12d
+ movzbl %al,%eax
+ movl %edx,72(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $14,%r10d
+ movl %r11d,%r12d
+ pinsrw $1,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 0(%r15),%r9d
+ addb %dl,%bl
+ movl 80(%rsi),%eax
+ addl $3921069994,%r9d
+ xorl %r11d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,76(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $20,%r9d
+ movl %r10d,%r12d
+ pinsrw $1,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 20(%r15),%r8d
+ addb %dl,%al
+ movl 84(%rsi),%ebx
+ addl $3593408605,%r8d
+ xorl %r10d,%r12d
+ movzbl %al,%eax
+ movl %edx,80(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $5,%r8d
+ movl %r9d,%r12d
+ pinsrw $2,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 40(%r15),%r11d
+ addb %dl,%bl
+ movl 88(%rsi),%eax
+ addl $38016083,%r11d
+ xorl %r9d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,84(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $9,%r11d
+ movl %r8d,%r12d
+ pinsrw $2,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 60(%r15),%r10d
+ addb %dl,%al
+ movl 92(%rsi),%ebx
+ addl $3634488961,%r10d
+ xorl %r8d,%r12d
+ movzbl %al,%eax
+ movl %edx,88(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $14,%r10d
+ movl %r11d,%r12d
+ pinsrw $3,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 16(%r15),%r9d
+ addb %dl,%bl
+ movl 96(%rsi),%eax
+ addl $3889429448,%r9d
+ xorl %r11d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,92(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $20,%r9d
+ movl %r10d,%r12d
+ pinsrw $3,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 36(%r15),%r8d
+ addb %dl,%al
+ movl 100(%rsi),%ebx
+ addl $568446438,%r8d
+ xorl %r10d,%r12d
+ movzbl %al,%eax
+ movl %edx,96(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $5,%r8d
+ movl %r9d,%r12d
+ pinsrw $4,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 56(%r15),%r11d
+ addb %dl,%bl
+ movl 104(%rsi),%eax
+ addl $3275163606,%r11d
+ xorl %r9d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,100(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $9,%r11d
+ movl %r8d,%r12d
+ pinsrw $4,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 12(%r15),%r10d
+ addb %dl,%al
+ movl 108(%rsi),%ebx
+ addl $4107603335,%r10d
+ xorl %r8d,%r12d
+ movzbl %al,%eax
+ movl %edx,104(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $14,%r10d
+ movl %r11d,%r12d
+ pinsrw $5,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 32(%r15),%r9d
+ addb %dl,%bl
+ movl 112(%rsi),%eax
+ addl $1163531501,%r9d
+ xorl %r11d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,108(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $20,%r9d
+ movl %r10d,%r12d
+ pinsrw $5,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 52(%r15),%r8d
+ addb %dl,%al
+ movl 116(%rsi),%ebx
+ addl $2850285829,%r8d
+ xorl %r10d,%r12d
+ movzbl %al,%eax
+ movl %edx,112(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $5,%r8d
+ movl %r9d,%r12d
+ pinsrw $6,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 8(%r15),%r11d
+ addb %dl,%bl
+ movl 120(%rsi),%eax
+ addl $4243563512,%r11d
+ xorl %r9d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,116(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $9,%r11d
+ movl %r8d,%r12d
+ pinsrw $6,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 28(%r15),%r10d
+ addb %dl,%al
+ movl 124(%rsi),%ebx
+ addl $1735328473,%r10d
+ xorl %r8d,%r12d
+ movzbl %al,%eax
+ movl %edx,120(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $14,%r10d
+ movl %r11d,%r12d
+ pinsrw $7,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movdqu 16(%r13),%xmm3
+ addb $32,%bpl
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 48(%r15),%r9d
+ addb %dl,%bl
+ movl 0(%rdi,%rbp,4),%eax
+ addl $2368359562,%r9d
+ xorl %r11d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,124(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $20,%r9d
+ movl %r11d,%r12d
+ pinsrw $7,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movq %rcx,%rsi
+ xorq %rcx,%rcx
+ movb %sil,%cl
+ leaq (%rdi,%rbp,4),%rsi
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ pxor %xmm0,%xmm0
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r9d,%r12d
+ addl 20(%r15),%r8d
+ addb %dl,%al
+ movl 4(%rsi),%ebx
+ addl $4294588738,%r8d
+ movzbl %al,%eax
+ addl %r12d,%r8d
+ movl %edx,0(%rsi)
+ addb %bl,%cl
+ roll $4,%r8d
+ movl %r10d,%r12d
+ movd (%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ pxor %xmm1,%xmm1
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r8d,%r12d
+ addl 32(%r15),%r11d
+ addb %dl,%bl
+ movl 8(%rsi),%eax
+ addl $2272392833,%r11d
+ movzbl %bl,%ebx
+ addl %r12d,%r11d
+ movl %edx,4(%rsi)
+ addb %al,%cl
+ roll $11,%r11d
+ movl %r9d,%r12d
+ movd (%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r11d,%r12d
+ addl 44(%r15),%r10d
+ addb %dl,%al
+ movl 12(%rsi),%ebx
+ addl $1839030562,%r10d
+ movzbl %al,%eax
+ addl %r12d,%r10d
+ movl %edx,8(%rsi)
+ addb %bl,%cl
+ roll $16,%r10d
+ movl %r8d,%r12d
+ pinsrw $1,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r10d,%r12d
+ addl 56(%r15),%r9d
+ addb %dl,%bl
+ movl 16(%rsi),%eax
+ addl $4259657740,%r9d
+ movzbl %bl,%ebx
+ addl %r12d,%r9d
+ movl %edx,12(%rsi)
+ addb %al,%cl
+ roll $23,%r9d
+ movl %r11d,%r12d
+ pinsrw $1,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r9d,%r12d
+ addl 4(%r15),%r8d
+ addb %dl,%al
+ movl 20(%rsi),%ebx
+ addl $2763975236,%r8d
+ movzbl %al,%eax
+ addl %r12d,%r8d
+ movl %edx,16(%rsi)
+ addb %bl,%cl
+ roll $4,%r8d
+ movl %r10d,%r12d
+ pinsrw $2,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r8d,%r12d
+ addl 16(%r15),%r11d
+ addb %dl,%bl
+ movl 24(%rsi),%eax
+ addl $1272893353,%r11d
+ movzbl %bl,%ebx
+ addl %r12d,%r11d
+ movl %edx,20(%rsi)
+ addb %al,%cl
+ roll $11,%r11d
+ movl %r9d,%r12d
+ pinsrw $2,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r11d,%r12d
+ addl 28(%r15),%r10d
+ addb %dl,%al
+ movl 28(%rsi),%ebx
+ addl $4139469664,%r10d
+ movzbl %al,%eax
+ addl %r12d,%r10d
+ movl %edx,24(%rsi)
+ addb %bl,%cl
+ roll $16,%r10d
+ movl %r8d,%r12d
+ pinsrw $3,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r10d,%r12d
+ addl 40(%r15),%r9d
+ addb %dl,%bl
+ movl 32(%rsi),%eax
+ addl $3200236656,%r9d
+ movzbl %bl,%ebx
+ addl %r12d,%r9d
+ movl %edx,28(%rsi)
+ addb %al,%cl
+ roll $23,%r9d
+ movl %r11d,%r12d
+ pinsrw $3,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r9d,%r12d
+ addl 52(%r15),%r8d
+ addb %dl,%al
+ movl 36(%rsi),%ebx
+ addl $681279174,%r8d
+ movzbl %al,%eax
+ addl %r12d,%r8d
+ movl %edx,32(%rsi)
+ addb %bl,%cl
+ roll $4,%r8d
+ movl %r10d,%r12d
+ pinsrw $4,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r8d,%r12d
+ addl 0(%r15),%r11d
+ addb %dl,%bl
+ movl 40(%rsi),%eax
+ addl $3936430074,%r11d
+ movzbl %bl,%ebx
+ addl %r12d,%r11d
+ movl %edx,36(%rsi)
+ addb %al,%cl
+ roll $11,%r11d
+ movl %r9d,%r12d
+ pinsrw $4,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r11d,%r12d
+ addl 12(%r15),%r10d
+ addb %dl,%al
+ movl 44(%rsi),%ebx
+ addl $3572445317,%r10d
+ movzbl %al,%eax
+ addl %r12d,%r10d
+ movl %edx,40(%rsi)
+ addb %bl,%cl
+ roll $16,%r10d
+ movl %r8d,%r12d
+ pinsrw $5,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r10d,%r12d
+ addl 24(%r15),%r9d
+ addb %dl,%bl
+ movl 48(%rsi),%eax
+ addl $76029189,%r9d
+ movzbl %bl,%ebx
+ addl %r12d,%r9d
+ movl %edx,44(%rsi)
+ addb %al,%cl
+ roll $23,%r9d
+ movl %r11d,%r12d
+ pinsrw $5,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r9d,%r12d
+ addl 36(%r15),%r8d
+ addb %dl,%al
+ movl 52(%rsi),%ebx
+ addl $3654602809,%r8d
+ movzbl %al,%eax
+ addl %r12d,%r8d
+ movl %edx,48(%rsi)
+ addb %bl,%cl
+ roll $4,%r8d
+ movl %r10d,%r12d
+ pinsrw $6,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r8d,%r12d
+ addl 48(%r15),%r11d
+ addb %dl,%bl
+ movl 56(%rsi),%eax
+ addl $3873151461,%r11d
+ movzbl %bl,%ebx
+ addl %r12d,%r11d
+ movl %edx,52(%rsi)
+ addb %al,%cl
+ roll $11,%r11d
+ movl %r9d,%r12d
+ pinsrw $6,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r11d,%r12d
+ addl 60(%r15),%r10d
+ addb %dl,%al
+ movl 60(%rsi),%ebx
+ addl $530742520,%r10d
+ movzbl %al,%eax
+ addl %r12d,%r10d
+ movl %edx,56(%rsi)
+ addb %bl,%cl
+ roll $16,%r10d
+ movl %r8d,%r12d
+ pinsrw $7,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movdqu 32(%r13),%xmm4
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r10d,%r12d
+ addl 8(%r15),%r9d
+ addb %dl,%bl
+ movl 64(%rsi),%eax
+ addl $3299628645,%r9d
+ movzbl %bl,%ebx
+ addl %r12d,%r9d
+ movl %edx,60(%rsi)
+ addb %al,%cl
+ roll $23,%r9d
+ movl $-1,%r12d
+ pinsrw $7,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm4
+ pxor %xmm1,%xmm4
+ pxor %xmm0,%xmm0
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r9d,%r12d
+ addl 0(%r15),%r8d
+ addb %dl,%al
+ movl 68(%rsi),%ebx
+ addl $4096336452,%r8d
+ movzbl %al,%eax
+ xorl %r10d,%r12d
+ movl %edx,64(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $6,%r8d
+ movl $-1,%r12d
+ movd (%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ pxor %xmm1,%xmm1
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r8d,%r12d
+ addl 28(%r15),%r11d
+ addb %dl,%bl
+ movl 72(%rsi),%eax
+ addl $1126891415,%r11d
+ movzbl %bl,%ebx
+ xorl %r9d,%r12d
+ movl %edx,68(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $10,%r11d
+ movl $-1,%r12d
+ movd (%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r11d,%r12d
+ addl 56(%r15),%r10d
+ addb %dl,%al
+ movl 76(%rsi),%ebx
+ addl $2878612391,%r10d
+ movzbl %al,%eax
+ xorl %r8d,%r12d
+ movl %edx,72(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $15,%r10d
+ movl $-1,%r12d
+ pinsrw $1,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r10d,%r12d
+ addl 20(%r15),%r9d
+ addb %dl,%bl
+ movl 80(%rsi),%eax
+ addl $4237533241,%r9d
+ movzbl %bl,%ebx
+ xorl %r11d,%r12d
+ movl %edx,76(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $21,%r9d
+ movl $-1,%r12d
+ pinsrw $1,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r9d,%r12d
+ addl 48(%r15),%r8d
+ addb %dl,%al
+ movl 84(%rsi),%ebx
+ addl $1700485571,%r8d
+ movzbl %al,%eax
+ xorl %r10d,%r12d
+ movl %edx,80(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $6,%r8d
+ movl $-1,%r12d
+ pinsrw $2,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r8d,%r12d
+ addl 12(%r15),%r11d
+ addb %dl,%bl
+ movl 88(%rsi),%eax
+ addl $2399980690,%r11d
+ movzbl %bl,%ebx
+ xorl %r9d,%r12d
+ movl %edx,84(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $10,%r11d
+ movl $-1,%r12d
+ pinsrw $2,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r11d,%r12d
+ addl 40(%r15),%r10d
+ addb %dl,%al
+ movl 92(%rsi),%ebx
+ addl $4293915773,%r10d
+ movzbl %al,%eax
+ xorl %r8d,%r12d
+ movl %edx,88(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $15,%r10d
+ movl $-1,%r12d
+ pinsrw $3,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r10d,%r12d
+ addl 4(%r15),%r9d
+ addb %dl,%bl
+ movl 96(%rsi),%eax
+ addl $2240044497,%r9d
+ movzbl %bl,%ebx
+ xorl %r11d,%r12d
+ movl %edx,92(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $21,%r9d
+ movl $-1,%r12d
+ pinsrw $3,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r9d,%r12d
+ addl 32(%r15),%r8d
+ addb %dl,%al
+ movl 100(%rsi),%ebx
+ addl $1873313359,%r8d
+ movzbl %al,%eax
+ xorl %r10d,%r12d
+ movl %edx,96(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $6,%r8d
+ movl $-1,%r12d
+ pinsrw $4,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r8d,%r12d
+ addl 60(%r15),%r11d
+ addb %dl,%bl
+ movl 104(%rsi),%eax
+ addl $4264355552,%r11d
+ movzbl %bl,%ebx
+ xorl %r9d,%r12d
+ movl %edx,100(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $10,%r11d
+ movl $-1,%r12d
+ pinsrw $4,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r11d,%r12d
+ addl 24(%r15),%r10d
+ addb %dl,%al
+ movl 108(%rsi),%ebx
+ addl $2734768916,%r10d
+ movzbl %al,%eax
+ xorl %r8d,%r12d
+ movl %edx,104(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $15,%r10d
+ movl $-1,%r12d
+ pinsrw $5,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r10d,%r12d
+ addl 52(%r15),%r9d
+ addb %dl,%bl
+ movl 112(%rsi),%eax
+ addl $1309151649,%r9d
+ movzbl %bl,%ebx
+ xorl %r11d,%r12d
+ movl %edx,108(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $21,%r9d
+ movl $-1,%r12d
+ pinsrw $5,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r9d,%r12d
+ addl 16(%r15),%r8d
+ addb %dl,%al
+ movl 116(%rsi),%ebx
+ addl $4149444226,%r8d
+ movzbl %al,%eax
+ xorl %r10d,%r12d
+ movl %edx,112(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $6,%r8d
+ movl $-1,%r12d
+ pinsrw $6,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r8d,%r12d
+ addl 44(%r15),%r11d
+ addb %dl,%bl
+ movl 120(%rsi),%eax
+ addl $3174756917,%r11d
+ movzbl %bl,%ebx
+ xorl %r9d,%r12d
+ movl %edx,116(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $10,%r11d
+ movl $-1,%r12d
+ pinsrw $6,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r11d,%r12d
+ addl 8(%r15),%r10d
+ addb %dl,%al
+ movl 124(%rsi),%ebx
+ addl $718787259,%r10d
+ movzbl %al,%eax
+ xorl %r8d,%r12d
+ movl %edx,120(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $15,%r10d
+ movl $-1,%r12d
+ pinsrw $7,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movdqu 48(%r13),%xmm5
+ addb $32,%bpl
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r10d,%r12d
+ addl 36(%r15),%r9d
+ addb %dl,%bl
+ movl 0(%rdi,%rbp,4),%eax
+ addl $3951481745,%r9d
+ movzbl %bl,%ebx
+ xorl %r11d,%r12d
+ movl %edx,124(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $21,%r9d
+ movl $-1,%r12d
+ pinsrw $7,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movq %rbp,%rsi
+ xorq %rbp,%rbp
+ movb %sil,%bpl
+ movq %rcx,%rsi
+ xorq %rcx,%rcx
+ movb %sil,%cl
+ leaq (%rdi,%rbp,4),%rsi
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm5
+ pxor %xmm1,%xmm5
+ addl 0(%rsp),%r8d
+ addl 4(%rsp),%r9d
+ addl 8(%rsp),%r10d
+ addl 12(%rsp),%r11d
+
+ movdqu %xmm2,(%r14,%r13,1)
+ movdqu %xmm3,16(%r14,%r13,1)
+ movdqu %xmm4,32(%r14,%r13,1)
+ movdqu %xmm5,48(%r14,%r13,1)
+ leaq 64(%r15),%r15
+ leaq 64(%r13),%r13
+ cmpq 16(%rsp),%r15
+ jb .Loop
+
+ movq 24(%rsp),%r12
+ subb %al,%cl
+ movl %r8d,0(%r12)
+ movl %r9d,4(%r12)
+ movl %r10d,8(%r12)
+ movl %r11d,12(%r12)
+ subb $1,%bpl
+ movl %ebp,-8(%rdi)
+ movl %ecx,-4(%rdi)
+
+ movq 40(%rsp),%r15
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r13
+ movq 64(%rsp),%r12
+ movq 72(%rsp),%rbp
+ movq 80(%rsp),%rbx
+ leaq 88(%rsp),%rsp
+.Lepilogue:
+.Labort:
+ .byte 0xf3,0xc3
+.size rc4_md5_enc,.-rc4_md5_enc
+#endif
diff --git a/linux-x86_64/crypto/rc4/rc4-x86_64.S b/linux-x86_64/crypto/rc4/rc4-x86_64.S
new file mode 100644
index 0000000..edd6565
--- /dev/null
+++ b/linux-x86_64/crypto/rc4/rc4-x86_64.S
@@ -0,0 +1,622 @@
+#if defined(__x86_64__)
+.text
+
+
+.globl asm_RC4
+.hidden asm_RC4
+.type asm_RC4,@function
+.align 16
+asm_RC4:
+ orq %rsi,%rsi
+ jne .Lentry
+ .byte 0xf3,0xc3
+.Lentry:
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+.Lprologue:
+ movq %rsi,%r11
+ movq %rdx,%r12
+ movq %rcx,%r13
+ xorq %r10,%r10
+ xorq %rcx,%rcx
+
+ leaq 8(%rdi),%rdi
+ movb -8(%rdi),%r10b
+ movb -4(%rdi),%cl
+ cmpl $-1,256(%rdi)
+ je .LRC4_CHAR
+ movl OPENSSL_ia32cap_P(%rip),%r8d
+ xorq %rbx,%rbx
+ incb %r10b
+ subq %r10,%rbx
+ subq %r12,%r13
+ movl (%rdi,%r10,4),%eax
+ testq $-16,%r11
+ jz .Lloop1
+ btl $30,%r8d
+ jc .Lintel
+ andq $7,%rbx
+ leaq 1(%r10),%rsi
+ jz .Loop8
+ subq %rbx,%r11
+.Loop8_warmup:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl %edx,(%rdi,%r10,4)
+ addb %dl,%al
+ incb %r10b
+ movl (%rdi,%rax,4),%edx
+ movl (%rdi,%r10,4),%eax
+ xorb (%r12),%dl
+ movb %dl,(%r12,%r13,1)
+ leaq 1(%r12),%r12
+ decq %rbx
+ jnz .Loop8_warmup
+
+ leaq 1(%r10),%rsi
+ jmp .Loop8
+.align 16
+.Loop8:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl 0(%rdi,%rsi,4),%ebx
+ rorq $8,%r8
+ movl %edx,0(%rdi,%r10,4)
+ addb %al,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %bl,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ movl 4(%rdi,%rsi,4),%eax
+ rorq $8,%r8
+ movl %edx,4(%rdi,%r10,4)
+ addb %bl,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl 8(%rdi,%rsi,4),%ebx
+ rorq $8,%r8
+ movl %edx,8(%rdi,%r10,4)
+ addb %al,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %bl,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ movl 12(%rdi,%rsi,4),%eax
+ rorq $8,%r8
+ movl %edx,12(%rdi,%r10,4)
+ addb %bl,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl 16(%rdi,%rsi,4),%ebx
+ rorq $8,%r8
+ movl %edx,16(%rdi,%r10,4)
+ addb %al,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %bl,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ movl 20(%rdi,%rsi,4),%eax
+ rorq $8,%r8
+ movl %edx,20(%rdi,%r10,4)
+ addb %bl,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl 24(%rdi,%rsi,4),%ebx
+ rorq $8,%r8
+ movl %edx,24(%rdi,%r10,4)
+ addb %al,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb $8,%sil
+ addb %bl,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ movl -4(%rdi,%rsi,4),%eax
+ rorq $8,%r8
+ movl %edx,28(%rdi,%r10,4)
+ addb %bl,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb $8,%r10b
+ rorq $8,%r8
+ subq $8,%r11
+
+ xorq (%r12),%r8
+ movq %r8,(%r12,%r13,1)
+ leaq 8(%r12),%r12
+
+ testq $-8,%r11
+ jnz .Loop8
+ cmpq $0,%r11
+ jne .Lloop1
+ jmp .Lexit
+
+.align 16
+.Lintel:
+ testq $-32,%r11
+ jz .Lloop1
+ andq $15,%rbx
+ jz .Loop16_is_hot
+ subq %rbx,%r11
+.Loop16_warmup:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl %edx,(%rdi,%r10,4)
+ addb %dl,%al
+ incb %r10b
+ movl (%rdi,%rax,4),%edx
+ movl (%rdi,%r10,4),%eax
+ xorb (%r12),%dl
+ movb %dl,(%r12,%r13,1)
+ leaq 1(%r12),%r12
+ decq %rbx
+ jnz .Loop16_warmup
+
+ movq %rcx,%rbx
+ xorq %rcx,%rcx
+ movb %bl,%cl
+
+.Loop16_is_hot:
+ leaq (%rdi,%r10,4),%rsi
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ pxor %xmm0,%xmm0
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 4(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,0(%rsi)
+ addb %bl,%cl
+ pinsrw $0,(%rdi,%rax,4),%xmm0
+ jmp .Loop16_enter
+.align 16
+.Loop16:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ pxor %xmm0,%xmm2
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm0
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 4(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,0(%rsi)
+ pxor %xmm1,%xmm2
+ addb %bl,%cl
+ pinsrw $0,(%rdi,%rax,4),%xmm0
+ movdqu %xmm2,(%r12,%r13,1)
+ leaq 16(%r12),%r12
+.Loop16_enter:
+ movl (%rdi,%rcx,4),%edx
+ pxor %xmm1,%xmm1
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 8(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,4(%rsi)
+ addb %al,%cl
+ pinsrw $0,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 12(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,8(%rsi)
+ addb %bl,%cl
+ pinsrw $1,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 16(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,12(%rsi)
+ addb %al,%cl
+ pinsrw $1,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 20(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,16(%rsi)
+ addb %bl,%cl
+ pinsrw $2,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 24(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,20(%rsi)
+ addb %al,%cl
+ pinsrw $2,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 28(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,24(%rsi)
+ addb %bl,%cl
+ pinsrw $3,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 32(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,28(%rsi)
+ addb %al,%cl
+ pinsrw $3,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 36(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,32(%rsi)
+ addb %bl,%cl
+ pinsrw $4,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 40(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,36(%rsi)
+ addb %al,%cl
+ pinsrw $4,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 44(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,40(%rsi)
+ addb %bl,%cl
+ pinsrw $5,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 48(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,44(%rsi)
+ addb %al,%cl
+ pinsrw $5,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 52(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,48(%rsi)
+ addb %bl,%cl
+ pinsrw $6,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 56(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,52(%rsi)
+ addb %al,%cl
+ pinsrw $6,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 60(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,56(%rsi)
+ addb %bl,%cl
+ pinsrw $7,(%rdi,%rax,4),%xmm0
+ addb $16,%r10b
+ movdqu (%r12),%xmm2
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movzbl %bl,%ebx
+ movl %edx,60(%rsi)
+ leaq (%rdi,%r10,4),%rsi
+ pinsrw $7,(%rdi,%rbx,4),%xmm1
+ movl (%rsi),%eax
+ movq %rcx,%rbx
+ xorq %rcx,%rcx
+ subq $16,%r11
+ movb %bl,%cl
+ testq $-16,%r11
+ jnz .Loop16
+
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm1,%xmm2
+ movdqu %xmm2,(%r12,%r13,1)
+ leaq 16(%r12),%r12
+
+ cmpq $0,%r11
+ jne .Lloop1
+ jmp .Lexit
+
+.align 16
+.Lloop1:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl %edx,(%rdi,%r10,4)
+ addb %dl,%al
+ incb %r10b
+ movl (%rdi,%rax,4),%edx
+ movl (%rdi,%r10,4),%eax
+ xorb (%r12),%dl
+ movb %dl,(%r12,%r13,1)
+ leaq 1(%r12),%r12
+ decq %r11
+ jnz .Lloop1
+ jmp .Lexit
+
+.align 16
+.LRC4_CHAR:
+ addb $1,%r10b
+ movzbl (%rdi,%r10,1),%eax
+ testq $-8,%r11
+ jz .Lcloop1
+ jmp .Lcloop8
+.align 16
+.Lcloop8:
+ movl (%r12),%r8d
+ movl 4(%r12),%r9d
+ addb %al,%cl
+ leaq 1(%r10),%rsi
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %sil,%esi
+ movzbl (%rdi,%rsi,1),%ebx
+ movb %al,(%rdi,%rcx,1)
+ cmpq %rsi,%rcx
+ movb %dl,(%rdi,%r10,1)
+ jne .Lcmov0
+ movq %rax,%rbx
+.Lcmov0:
+ addb %al,%dl
+ xorb (%rdi,%rdx,1),%r8b
+ rorl $8,%r8d
+ addb %bl,%cl
+ leaq 1(%rsi),%r10
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%eax
+ movb %bl,(%rdi,%rcx,1)
+ cmpq %r10,%rcx
+ movb %dl,(%rdi,%rsi,1)
+ jne .Lcmov1
+ movq %rbx,%rax
+.Lcmov1:
+ addb %bl,%dl
+ xorb (%rdi,%rdx,1),%r8b
+ rorl $8,%r8d
+ addb %al,%cl
+ leaq 1(%r10),%rsi
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %sil,%esi
+ movzbl (%rdi,%rsi,1),%ebx
+ movb %al,(%rdi,%rcx,1)
+ cmpq %rsi,%rcx
+ movb %dl,(%rdi,%r10,1)
+ jne .Lcmov2
+ movq %rax,%rbx
+.Lcmov2:
+ addb %al,%dl
+ xorb (%rdi,%rdx,1),%r8b
+ rorl $8,%r8d
+ addb %bl,%cl
+ leaq 1(%rsi),%r10
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%eax
+ movb %bl,(%rdi,%rcx,1)
+ cmpq %r10,%rcx
+ movb %dl,(%rdi,%rsi,1)
+ jne .Lcmov3
+ movq %rbx,%rax
+.Lcmov3:
+ addb %bl,%dl
+ xorb (%rdi,%rdx,1),%r8b
+ rorl $8,%r8d
+ addb %al,%cl
+ leaq 1(%r10),%rsi
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %sil,%esi
+ movzbl (%rdi,%rsi,1),%ebx
+ movb %al,(%rdi,%rcx,1)
+ cmpq %rsi,%rcx
+ movb %dl,(%rdi,%r10,1)
+ jne .Lcmov4
+ movq %rax,%rbx
+.Lcmov4:
+ addb %al,%dl
+ xorb (%rdi,%rdx,1),%r9b
+ rorl $8,%r9d
+ addb %bl,%cl
+ leaq 1(%rsi),%r10
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%eax
+ movb %bl,(%rdi,%rcx,1)
+ cmpq %r10,%rcx
+ movb %dl,(%rdi,%rsi,1)
+ jne .Lcmov5
+ movq %rbx,%rax
+.Lcmov5:
+ addb %bl,%dl
+ xorb (%rdi,%rdx,1),%r9b
+ rorl $8,%r9d
+ addb %al,%cl
+ leaq 1(%r10),%rsi
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %sil,%esi
+ movzbl (%rdi,%rsi,1),%ebx
+ movb %al,(%rdi,%rcx,1)
+ cmpq %rsi,%rcx
+ movb %dl,(%rdi,%r10,1)
+ jne .Lcmov6
+ movq %rax,%rbx
+.Lcmov6:
+ addb %al,%dl
+ xorb (%rdi,%rdx,1),%r9b
+ rorl $8,%r9d
+ addb %bl,%cl
+ leaq 1(%rsi),%r10
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%eax
+ movb %bl,(%rdi,%rcx,1)
+ cmpq %r10,%rcx
+ movb %dl,(%rdi,%rsi,1)
+ jne .Lcmov7
+ movq %rbx,%rax
+.Lcmov7:
+ addb %bl,%dl
+ xorb (%rdi,%rdx,1),%r9b
+ rorl $8,%r9d
+ leaq -8(%r11),%r11
+ movl %r8d,(%r13)
+ leaq 8(%r12),%r12
+ movl %r9d,4(%r13)
+ leaq 8(%r13),%r13
+
+ testq $-8,%r11
+ jnz .Lcloop8
+ cmpq $0,%r11
+ jne .Lcloop1
+ jmp .Lexit
+.align 16
+.Lcloop1:
+ addb %al,%cl
+ movzbl %cl,%ecx
+ movzbl (%rdi,%rcx,1),%edx
+ movb %al,(%rdi,%rcx,1)
+ movb %dl,(%rdi,%r10,1)
+ addb %al,%dl
+ addb $1,%r10b
+ movzbl %dl,%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%rdx,1),%edx
+ movzbl (%rdi,%r10,1),%eax
+ xorb (%r12),%dl
+ leaq 1(%r12),%r12
+ movb %dl,(%r13)
+ leaq 1(%r13),%r13
+ subq $1,%r11
+ jnz .Lcloop1
+ jmp .Lexit
+
+.align 16
+.Lexit:
+ subb $1,%r10b
+ movl %r10d,-8(%rdi)
+ movl %ecx,-4(%rdi)
+
+ movq (%rsp),%r13
+ movq 8(%rsp),%r12
+ movq 16(%rsp),%rbx
+ addq $24,%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size asm_RC4,.-asm_RC4
+.globl asm_RC4_set_key
+.hidden asm_RC4_set_key
+.type asm_RC4_set_key,@function
+.align 16
+asm_RC4_set_key:
+ leaq 8(%rdi),%rdi
+ leaq (%rdx,%rsi,1),%rdx
+ negq %rsi
+ movq %rsi,%rcx
+ xorl %eax,%eax
+ xorq %r9,%r9
+ xorq %r10,%r10
+ xorq %r11,%r11
+
+ movl OPENSSL_ia32cap_P(%rip),%r8d
+ btl $20,%r8d
+ jc .Lc1stloop
+ jmp .Lw1stloop
+
+.align 16
+.Lw1stloop:
+ movl %eax,(%rdi,%rax,4)
+ addb $1,%al
+ jnc .Lw1stloop
+
+ xorq %r9,%r9
+ xorq %r8,%r8
+.align 16
+.Lw2ndloop:
+ movl (%rdi,%r9,4),%r10d
+ addb (%rdx,%rsi,1),%r8b
+ addb %r10b,%r8b
+ addq $1,%rsi
+ movl (%rdi,%r8,4),%r11d
+ cmovzq %rcx,%rsi
+ movl %r10d,(%rdi,%r8,4)
+ movl %r11d,(%rdi,%r9,4)
+ addb $1,%r9b
+ jnc .Lw2ndloop
+ jmp .Lexit_key
+
+.align 16
+.Lc1stloop:
+ movb %al,(%rdi,%rax,1)
+ addb $1,%al
+ jnc .Lc1stloop
+
+ xorq %r9,%r9
+ xorq %r8,%r8
+.align 16
+.Lc2ndloop:
+ movb (%rdi,%r9,1),%r10b
+ addb (%rdx,%rsi,1),%r8b
+ addb %r10b,%r8b
+ addq $1,%rsi
+ movb (%rdi,%r8,1),%r11b
+ jnz .Lcnowrap
+ movq %rcx,%rsi
+.Lcnowrap:
+ movb %r10b,(%rdi,%r8,1)
+ movb %r11b,(%rdi,%r9,1)
+ addb $1,%r9b
+ jnc .Lc2ndloop
+ movl $-1,256(%rdi)
+
+.align 16
+.Lexit_key:
+ xorl %eax,%eax
+ movl %eax,-8(%rdi)
+ movl %eax,-4(%rdi)
+ .byte 0xf3,0xc3
+.size asm_RC4_set_key,.-asm_RC4_set_key
+
+.globl RC4_options
+.hidden RC4_options
+.type RC4_options,@function
+.align 16
+RC4_options:
+ leaq .Lopts(%rip),%rax
+ movq OPENSSL_ia32cap_P(%rip),%rdx
+ movl (%rdx),%edx
+ btl $20,%edx
+ jc .L8xchar
+ btl $30,%edx
+ jnc .Ldone
+ addq $25,%rax
+ .byte 0xf3,0xc3
+.L8xchar:
+ addq $12,%rax
+.Ldone:
+ .byte 0xf3,0xc3
+.align 64
+.Lopts:
+.byte 114,99,52,40,56,120,44,105,110,116,41,0
+.byte 114,99,52,40,56,120,44,99,104,97,114,41,0
+.byte 114,99,52,40,49,54,120,44,105,110,116,41,0
+.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+.size RC4_options,.-RC4_options
+#endif