diff options
author | Nate Begeman <natebegeman@mac.com> | 2010-07-27 22:37:06 +0000 |
---|---|---|
committer | Nate Begeman <natebegeman@mac.com> | 2010-07-27 22:37:06 +0000 |
commit | bdcb5afb77547337ba148ce24d5e1046c0b25ced (patch) | |
tree | 9fcb26351ca757a6e412769796e5d7f5f7dd9b0c /lib/Target/X86/X86ISelLowering.h | |
parent | 622b7cf147f9231a1d6e3aac81a2dd1b6047b26c (diff) | |
download | external_llvm-bdcb5afb77547337ba148ce24d5e1046c0b25ced.zip external_llvm-bdcb5afb77547337ba148ce24d5e1046c0b25ced.tar.gz external_llvm-bdcb5afb77547337ba148ce24d5e1046c0b25ced.tar.bz2 |
~40% faster vector shl <4 x i32> on SSE 4.1 Larger improvements for smaller types coming in future patches.
For:
define <2 x i64> @shl(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
entry:
%shl = shl <4 x i32> %r, %a ; <<4 x i32>> [#uses=1]
%tmp2 = bitcast <4 x i32> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp2
}
We get:
_shl: ## @shl
pslld $23, %xmm1
paddd LCPI0_0, %xmm1
cvttps2dq %xmm1, %xmm1
pmulld %xmm1, %xmm0
ret
Instead of:
_shl: ## @shl
pshufd $3, %xmm0, %xmm2
movd %xmm2, %eax
pshufd $3, %xmm1, %xmm2
movd %xmm2, %ecx
shll %cl, %eax
movd %eax, %xmm2
pshufd $1, %xmm0, %xmm3
movd %xmm3, %eax
pshufd $1, %xmm1, %xmm3
movd %xmm3, %ecx
shll %cl, %eax
movd %eax, %xmm3
punpckldq %xmm2, %xmm3
movd %xmm0, %eax
movd %xmm1, %ecx
shll %cl, %eax
movd %eax, %xmm2
movhlps %xmm0, %xmm0
movd %xmm0, %eax
movhlps %xmm1, %xmm1
movd %xmm1, %ecx
shll %cl, %eax
movd %eax, %xmm0
punpckldq %xmm0, %xmm2
movdqa %xmm2, %xmm0
punpckldq %xmm3, %xmm0
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109549 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.h')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 96c97d9..3556579 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -723,6 +723,7 @@ namespace llvm { SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSHL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const; |