aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/X86/X86ISelLowering.h
diff options
context:
space:
mode:
authorNate Begeman <natebegeman@mac.com>2010-07-27 22:37:06 +0000
committerNate Begeman <natebegeman@mac.com>2010-07-27 22:37:06 +0000
commitbdcb5afb77547337ba148ce24d5e1046c0b25ced (patch)
tree9fcb26351ca757a6e412769796e5d7f5f7dd9b0c /lib/Target/X86/X86ISelLowering.h
parent622b7cf147f9231a1d6e3aac81a2dd1b6047b26c (diff)
downloadexternal_llvm-bdcb5afb77547337ba148ce24d5e1046c0b25ced.zip
external_llvm-bdcb5afb77547337ba148ce24d5e1046c0b25ced.tar.gz
external_llvm-bdcb5afb77547337ba148ce24d5e1046c0b25ced.tar.bz2
~40% faster vector shl <4 x i32> on SSE 4.1 Larger improvements for smaller types coming in future patches.
For: define <2 x i64> @shl(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp { entry: %shl = shl <4 x i32> %r, %a ; <<4 x i32>> [#uses=1] %tmp2 = bitcast <4 x i32> %shl to <2 x i64> ; <<2 x i64>> [#uses=1] ret <2 x i64> %tmp2 } We get: _shl: ## @shl pslld $23, %xmm1 paddd LCPI0_0, %xmm1 cvttps2dq %xmm1, %xmm1 pmulld %xmm1, %xmm0 ret Instead of: _shl: ## @shl pshufd $3, %xmm0, %xmm2 movd %xmm2, %eax pshufd $3, %xmm1, %xmm2 movd %xmm2, %ecx shll %cl, %eax movd %eax, %xmm2 pshufd $1, %xmm0, %xmm3 movd %xmm3, %eax pshufd $1, %xmm1, %xmm3 movd %xmm3, %ecx shll %cl, %eax movd %eax, %xmm3 punpckldq %xmm2, %xmm3 movd %xmm0, %eax movd %xmm1, %ecx shll %cl, %eax movd %eax, %xmm2 movhlps %xmm0, %xmm0 movd %xmm0, %eax movhlps %xmm1, %xmm1 movd %xmm1, %ecx shll %cl, %eax movd %eax, %xmm0 punpckldq %xmm0, %xmm2 movdqa %xmm2, %xmm0 punpckldq %xmm3, %xmm0 ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109549 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.h')
-rw-r--r--lib/Target/X86/X86ISelLowering.h1
1 files changed, 1 insertions, 0 deletions
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 96c97d9..3556579 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -723,6 +723,7 @@ namespace llvm {
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;