diff options
author | Adam Langley <agl@google.com> | 2015-01-22 14:27:53 -0800 |
---|---|---|
committer | Adam Langley <agl@google.com> | 2015-01-30 16:52:14 -0800 |
commit | d9e397b599b13d642138480a28c14db7a136bf05 (patch) | |
tree | 34bab61dc4ce323b123ad4614dbc07e86ea2f9ef /linux-x86_64/crypto/rc4 | |
download | external_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.zip external_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.tar.gz external_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.tar.bz2 |
Initial commit of BoringSSL for Android.
Diffstat (limited to 'linux-x86_64/crypto/rc4')
-rw-r--r-- | linux-x86_64/crypto/rc4/rc4-md5-x86_64.S | 1262 | ||||
-rw-r--r-- | linux-x86_64/crypto/rc4/rc4-x86_64.S | 622 |
2 files changed, 1884 insertions, 0 deletions
diff --git a/linux-x86_64/crypto/rc4/rc4-md5-x86_64.S b/linux-x86_64/crypto/rc4/rc4-md5-x86_64.S new file mode 100644 index 0000000..06c8d67 --- /dev/null +++ b/linux-x86_64/crypto/rc4/rc4-md5-x86_64.S @@ -0,0 +1,1262 @@ +#if defined(__x86_64__) +.text +.align 16 + +.globl rc4_md5_enc +.hidden rc4_md5_enc +.type rc4_md5_enc,@function +rc4_md5_enc: + cmpq $0,%r9 + je .Labort + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $40,%rsp +.Lbody: + movq %rcx,%r11 + movq %r9,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %r8,%r15 + xorq %rbp,%rbp + xorq %rcx,%rcx + + leaq 8(%rdi),%rdi + movb -8(%rdi),%bpl + movb -4(%rdi),%cl + + incb %bpl + subq %r13,%r14 + movl (%rdi,%rbp,4),%eax + addb %al,%cl + leaq (%rdi,%rbp,4),%rsi + shlq $6,%r12 + addq %r15,%r12 + movq %r12,16(%rsp) + + movq %r11,24(%rsp) + movl 0(%r11),%r8d + movl 4(%r11),%r9d + movl 8(%r11),%r10d + movl 12(%r11),%r11d + jmp .Loop + +.align 16 +.Loop: + movl %r8d,0(%rsp) + movl %r9d,4(%rsp) + movl %r10d,8(%rsp) + movl %r11d,%r12d + movl %r11d,12(%rsp) + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $3614090360,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,0(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 4(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $3905402710,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,4(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $606105819,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,8(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 12(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $3250441966,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,12(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $4118548399,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,16(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 20(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1200080426,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,20(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $2821735955,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,24(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 28(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $4249261313,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,28(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $1770035416,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,32(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 36(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $2336552879,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,36(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $4294925233,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,40(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 44(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $2304563134,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,44(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $1804603682,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,48(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 52(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $4254626195,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,52(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $2792965006,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,56(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu (%r13),%xmm2 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 60(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $1236535329,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,60(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r10d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4129170786,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 24(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $3225465664,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $643717713,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 0(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $3921069994,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $3593408605,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 40(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $38016083,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $3634488961,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 16(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $3889429448,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $568446438,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 56(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $3275163606,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $4107603335,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 32(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1163531501,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $2850285829,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 8(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $4243563512,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $1735328473,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 16(%r13),%xmm3 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 48(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $2368359562,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $4294588738,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,0(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 32(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $2272392833,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,4(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $1839030562,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,8(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 56(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $4259657740,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,12(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $2763975236,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,16(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 16(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1272893353,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,20(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $4139469664,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,24(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 40(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $3200236656,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,28(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $681279174,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,32(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 0(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $3936430074,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,36(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $3572445317,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,40(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 24(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $76029189,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,44(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $3654602809,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,48(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 48(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $3873151461,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,52(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $530742520,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,56(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 32(%r13),%xmm4 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 8(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $3299628645,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,60(%rsi) + addb %al,%cl + roll $23,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm1,%xmm4 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4096336452,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 28(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $1126891415,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $2878612391,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 20(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $4237533241,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $1700485571,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 12(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $2399980690,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $4293915773,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 4(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $2240044497,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $1873313359,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 60(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $4264355552,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $2734768916,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 52(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1309151649,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $4149444226,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 44(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $3174756917,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $718787259,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 48(%r13),%xmm5 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 36(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $3951481745,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rbp,%rsi + xorq %rbp,%rbp + movb %sil,%bpl + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm5 + pxor %xmm1,%xmm5 + addl 0(%rsp),%r8d + addl 4(%rsp),%r9d + addl 8(%rsp),%r10d + addl 12(%rsp),%r11d + + movdqu %xmm2,(%r14,%r13,1) + movdqu %xmm3,16(%r14,%r13,1) + movdqu %xmm4,32(%r14,%r13,1) + movdqu %xmm5,48(%r14,%r13,1) + leaq 64(%r15),%r15 + leaq 64(%r13),%r13 + cmpq 16(%rsp),%r15 + jb .Loop + + movq 24(%rsp),%r12 + subb %al,%cl + movl %r8d,0(%r12) + movl %r9d,4(%r12) + movl %r10d,8(%r12) + movl %r11d,12(%r12) + subb $1,%bpl + movl %ebp,-8(%rdi) + movl %ecx,-4(%rdi) + + movq 40(%rsp),%r15 + movq 48(%rsp),%r14 + movq 56(%rsp),%r13 + movq 64(%rsp),%r12 + movq 72(%rsp),%rbp + movq 80(%rsp),%rbx + leaq 88(%rsp),%rsp +.Lepilogue: +.Labort: + .byte 0xf3,0xc3 +.size rc4_md5_enc,.-rc4_md5_enc +#endif diff --git a/linux-x86_64/crypto/rc4/rc4-x86_64.S b/linux-x86_64/crypto/rc4/rc4-x86_64.S new file mode 100644 index 0000000..edd6565 --- /dev/null +++ b/linux-x86_64/crypto/rc4/rc4-x86_64.S @@ -0,0 +1,622 @@ +#if defined(__x86_64__) +.text + + +.globl asm_RC4 +.hidden asm_RC4 +.type asm_RC4,@function +.align 16 +asm_RC4: + orq %rsi,%rsi + jne .Lentry + .byte 0xf3,0xc3 +.Lentry: + pushq %rbx + pushq %r12 + pushq %r13 +.Lprologue: + movq %rsi,%r11 + movq %rdx,%r12 + movq %rcx,%r13 + xorq %r10,%r10 + xorq %rcx,%rcx + + leaq 8(%rdi),%rdi + movb -8(%rdi),%r10b + movb -4(%rdi),%cl + cmpl $-1,256(%rdi) + je .LRC4_CHAR + movl OPENSSL_ia32cap_P(%rip),%r8d + xorq %rbx,%rbx + incb %r10b + subq %r10,%rbx + subq %r12,%r13 + movl (%rdi,%r10,4),%eax + testq $-16,%r11 + jz .Lloop1 + btl $30,%r8d + jc .Lintel + andq $7,%rbx + leaq 1(%r10),%rsi + jz .Loop8 + subq %rbx,%r11 +.Loop8_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r12,%r13,1) + leaq 1(%r12),%r12 + decq %rbx + jnz .Loop8_warmup + + leaq 1(%r10),%rsi + jmp .Loop8 +.align 16 +.Loop8: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 0(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,0(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,4(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 8(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,8(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 12(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,12(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 16(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,16(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 20(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,20(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 24(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,24(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%sil + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl -4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,28(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%r10b + rorq $8,%r8 + subq $8,%r11 + + xorq (%r12),%r8 + movq %r8,(%r12,%r13,1) + leaq 8(%r12),%r12 + + testq $-8,%r11 + jnz .Loop8 + cmpq $0,%r11 + jne .Lloop1 + jmp .Lexit + +.align 16 +.Lintel: + testq $-32,%r11 + jz .Lloop1 + andq $15,%rbx + jz .Loop16_is_hot + subq %rbx,%r11 +.Loop16_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r12,%r13,1) + leaq 1(%r12),%r12 + decq %rbx + jnz .Loop16_warmup + + movq %rcx,%rbx + xorq %rcx,%rcx + movb %bl,%cl + +.Loop16_is_hot: + leaq (%rdi,%r10,4),%rsi + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + jmp .Loop16_enter +.align 16 +.Loop16: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm2 + psllq $8,%xmm1 + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + pxor %xmm1,%xmm2 + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + movdqu %xmm2,(%r12,%r13,1) + leaq 16(%r12),%r12 +.Loop16_enter: + movl (%rdi,%rcx,4),%edx + pxor %xmm1,%xmm1 + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 8(%rsi),%eax + movzbl %bl,%ebx + movl %edx,4(%rsi) + addb %al,%cl + pinsrw $0,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 12(%rsi),%ebx + movzbl %al,%eax + movl %edx,8(%rsi) + addb %bl,%cl + pinsrw $1,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 16(%rsi),%eax + movzbl %bl,%ebx + movl %edx,12(%rsi) + addb %al,%cl + pinsrw $1,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 20(%rsi),%ebx + movzbl %al,%eax + movl %edx,16(%rsi) + addb %bl,%cl + pinsrw $2,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 24(%rsi),%eax + movzbl %bl,%ebx + movl %edx,20(%rsi) + addb %al,%cl + pinsrw $2,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 28(%rsi),%ebx + movzbl %al,%eax + movl %edx,24(%rsi) + addb %bl,%cl + pinsrw $3,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 32(%rsi),%eax + movzbl %bl,%ebx + movl %edx,28(%rsi) + addb %al,%cl + pinsrw $3,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 36(%rsi),%ebx + movzbl %al,%eax + movl %edx,32(%rsi) + addb %bl,%cl + pinsrw $4,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 40(%rsi),%eax + movzbl %bl,%ebx + movl %edx,36(%rsi) + addb %al,%cl + pinsrw $4,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 44(%rsi),%ebx + movzbl %al,%eax + movl %edx,40(%rsi) + addb %bl,%cl + pinsrw $5,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 48(%rsi),%eax + movzbl %bl,%ebx + movl %edx,44(%rsi) + addb %al,%cl + pinsrw $5,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 52(%rsi),%ebx + movzbl %al,%eax + movl %edx,48(%rsi) + addb %bl,%cl + pinsrw $6,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 56(%rsi),%eax + movzbl %bl,%ebx + movl %edx,52(%rsi) + addb %al,%cl + pinsrw $6,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 60(%rsi),%ebx + movzbl %al,%eax + movl %edx,56(%rsi) + addb %bl,%cl + pinsrw $7,(%rdi,%rax,4),%xmm0 + addb $16,%r10b + movdqu (%r12),%xmm2 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movzbl %bl,%ebx + movl %edx,60(%rsi) + leaq (%rdi,%r10,4),%rsi + pinsrw $7,(%rdi,%rbx,4),%xmm1 + movl (%rsi),%eax + movq %rcx,%rbx + xorq %rcx,%rcx + subq $16,%r11 + movb %bl,%cl + testq $-16,%r11 + jnz .Loop16 + + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + movdqu %xmm2,(%r12,%r13,1) + leaq 16(%r12),%r12 + + cmpq $0,%r11 + jne .Lloop1 + jmp .Lexit + +.align 16 +.Lloop1: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r12,%r13,1) + leaq 1(%r12),%r12 + decq %r11 + jnz .Lloop1 + jmp .Lexit + +.align 16 +.LRC4_CHAR: + addb $1,%r10b + movzbl (%rdi,%r10,1),%eax + testq $-8,%r11 + jz .Lcloop1 + jmp .Lcloop8 +.align 16 +.Lcloop8: + movl (%r12),%r8d + movl 4(%r12),%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov0 + movq %rax,%rbx +.Lcmov0: + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov1 + movq %rbx,%rax +.Lcmov1: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov2 + movq %rax,%rbx +.Lcmov2: + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov3 + movq %rbx,%rax +.Lcmov3: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov4 + movq %rax,%rbx +.Lcmov4: + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov5 + movq %rbx,%rax +.Lcmov5: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov6 + movq %rax,%rbx +.Lcmov6: + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov7 + movq %rbx,%rax +.Lcmov7: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + leaq -8(%r11),%r11 + movl %r8d,(%r13) + leaq 8(%r12),%r12 + movl %r9d,4(%r13) + leaq 8(%r13),%r13 + + testq $-8,%r11 + jnz .Lcloop8 + cmpq $0,%r11 + jne .Lcloop1 + jmp .Lexit +.align 16 +.Lcloop1: + addb %al,%cl + movzbl %cl,%ecx + movzbl (%rdi,%rcx,1),%edx + movb %al,(%rdi,%rcx,1) + movb %dl,(%rdi,%r10,1) + addb %al,%dl + addb $1,%r10b + movzbl %dl,%edx + movzbl %r10b,%r10d + movzbl (%rdi,%rdx,1),%edx + movzbl (%rdi,%r10,1),%eax + xorb (%r12),%dl + leaq 1(%r12),%r12 + movb %dl,(%r13) + leaq 1(%r13),%r13 + subq $1,%r11 + jnz .Lcloop1 + jmp .Lexit + +.align 16 +.Lexit: + subb $1,%r10b + movl %r10d,-8(%rdi) + movl %ecx,-4(%rdi) + + movq (%rsp),%r13 + movq 8(%rsp),%r12 + movq 16(%rsp),%rbx + addq $24,%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size asm_RC4,.-asm_RC4 +.globl asm_RC4_set_key +.hidden asm_RC4_set_key +.type asm_RC4_set_key,@function +.align 16 +asm_RC4_set_key: + leaq 8(%rdi),%rdi + leaq (%rdx,%rsi,1),%rdx + negq %rsi + movq %rsi,%rcx + xorl %eax,%eax + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + + movl OPENSSL_ia32cap_P(%rip),%r8d + btl $20,%r8d + jc .Lc1stloop + jmp .Lw1stloop + +.align 16 +.Lw1stloop: + movl %eax,(%rdi,%rax,4) + addb $1,%al + jnc .Lw1stloop + + xorq %r9,%r9 + xorq %r8,%r8 +.align 16 +.Lw2ndloop: + movl (%rdi,%r9,4),%r10d + addb (%rdx,%rsi,1),%r8b + addb %r10b,%r8b + addq $1,%rsi + movl (%rdi,%r8,4),%r11d + cmovzq %rcx,%rsi + movl %r10d,(%rdi,%r8,4) + movl %r11d,(%rdi,%r9,4) + addb $1,%r9b + jnc .Lw2ndloop + jmp .Lexit_key + +.align 16 +.Lc1stloop: + movb %al,(%rdi,%rax,1) + addb $1,%al + jnc .Lc1stloop + + xorq %r9,%r9 + xorq %r8,%r8 +.align 16 +.Lc2ndloop: + movb (%rdi,%r9,1),%r10b + addb (%rdx,%rsi,1),%r8b + addb %r10b,%r8b + addq $1,%rsi + movb (%rdi,%r8,1),%r11b + jnz .Lcnowrap + movq %rcx,%rsi +.Lcnowrap: + movb %r10b,(%rdi,%r8,1) + movb %r11b,(%rdi,%r9,1) + addb $1,%r9b + jnc .Lc2ndloop + movl $-1,256(%rdi) + +.align 16 +.Lexit_key: + xorl %eax,%eax + movl %eax,-8(%rdi) + movl %eax,-4(%rdi) + .byte 0xf3,0xc3 +.size asm_RC4_set_key,.-asm_RC4_set_key + +.globl RC4_options +.hidden RC4_options +.type RC4_options,@function +.align 16 +RC4_options: + leaq .Lopts(%rip),%rax + movq OPENSSL_ia32cap_P(%rip),%rdx + movl (%rdx),%edx + btl $20,%edx + jc .L8xchar + btl $30,%edx + jnc .Ldone + addq $25,%rax + .byte 0xf3,0xc3 +.L8xchar: + addq $12,%rax +.Ldone: + .byte 0xf3,0xc3 +.align 64 +.Lopts: +.byte 114,99,52,40,56,120,44,105,110,116,41,0 +.byte 114,99,52,40,56,120,44,99,104,97,114,41,0 +.byte 114,99,52,40,49,54,120,44,105,110,116,41,0 +.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.size RC4_options,.-RC4_options +#endif |