diff options
author | Evan Cheng <evan.cheng@apple.com> | 2008-05-13 00:54:02 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2008-05-13 00:54:02 +0000 |
commit | fa7fd33a2636a663c69cc9ba56b01821c74da637 (patch) | |
tree | 17853ab89857afed2f407824a083b439141ee5f4 | |
parent | 844731a7f1909f55935e3514c9e713a62d67662e (diff) | |
download | external_llvm-fa7fd33a2636a663c69cc9ba56b01821c74da637.zip external_llvm-fa7fd33a2636a663c69cc9ba56b01821c74da637.tar.gz external_llvm-fa7fd33a2636a663c69cc9ba56b01821c74da637.tar.bz2 |
On x86, it's safe to treat i32 load anyext as a normal i32 load. Ditto for i8 anyext load to i16.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51019 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/README-SSE.txt | 25 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 30 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_set-H.ll | 15 |
3 files changed, 43 insertions, 27 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 34b949a..c78e13b 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -757,31 +757,6 @@ or iseling it. //===---------------------------------------------------------------------===// -Take the following code: - -#include <xmmintrin.h> -__m128i doload64(short x) {return _mm_set_epi16(x,x,x,x,x,x,x,x);} - -LLVM currently generates the following on x86: -doload64: - movzwl 4(%esp), %eax - movd %eax, %xmm0 - punpcklwd %xmm0, %xmm0 - pshufd $0, %xmm0, %xmm0 - ret - -gcc's generated code: -doload64: - movd 4(%esp), %xmm0 - punpcklwd %xmm0, %xmm0 - pshufd $0, %xmm0, %xmm0 - ret - -LLVM should be able to generate the same thing as gcc. This looks like it is -just a matter of matching (scalar_to_vector (load x)) to movd. - -//===---------------------------------------------------------------------===// - LLVM currently generates stack realignment code, when it is not necessary needed. The problem is that we need to know about stack alignment too early, before RA runs. diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 6b0a19b..afe4206 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -229,9 +229,35 @@ def i32immSExt8 : PatLeaf<(i32 imm), [{ }]>; // Helper fragments for loads. +// It's always safe to treat a anyext i16 load as a i32 load. Ditto for +// i8 to i16. +def loadi16 : PatFrag<(ops node:$ptr), (i16 (ld node:$ptr)), [{ + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->getAddressingMode() != ISD::UNINDEXED) + return false; + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) + return true; + if (ExtType == ISD::EXTLOAD) + return LD->getAlignment() >= 16; + } + return false; +}]>; + +def loadi32 : PatFrag<(ops node:$ptr), (i32 (ld node:$ptr)), [{ + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->getAddressingMode() != ISD::UNINDEXED) + return false; + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) + return true; + if (ExtType == ISD::EXTLOAD) + return LD->getAlignment() >= 16; + } + return false; +}]>; + def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>; -def loadi16 : PatFrag<(ops node:$ptr), (i16 (load node:$ptr))>; -def loadi32 : PatFrag<(ops node:$ptr), (i32 (load node:$ptr))>; def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>; def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>; diff --git a/test/CodeGen/X86/vec_set-H.ll b/test/CodeGen/X86/vec_set-H.ll new file mode 100644 index 0000000..ea7b853 --- /dev/null +++ b/test/CodeGen/X86/vec_set-H.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep movz + +define <2 x i64> @doload64(i16 signext %x) nounwind { +entry: + %tmp36 = insertelement <8 x i16> undef, i16 %x, i32 0 ; <<8 x i16>> [#uses=1] + %tmp37 = insertelement <8 x i16> %tmp36, i16 %x, i32 1 ; <<8 x i16>> [#uses=1] + %tmp38 = insertelement <8 x i16> %tmp37, i16 %x, i32 2 ; <<8 x i16>> [#uses=1] + %tmp39 = insertelement <8 x i16> %tmp38, i16 %x, i32 3 ; <<8 x i16>> [#uses=1] + %tmp40 = insertelement <8 x i16> %tmp39, i16 %x, i32 4 ; <<8 x i16>> [#uses=1] + %tmp41 = insertelement <8 x i16> %tmp40, i16 %x, i32 5 ; <<8 x i16>> [#uses=1] + %tmp42 = insertelement <8 x i16> %tmp41, i16 %x, i32 6 ; <<8 x i16>> [#uses=1] + %tmp43 = insertelement <8 x i16> %tmp42, i16 %x, i32 7 ; <<8 x i16>> [#uses=1] + %tmp46 = bitcast <8 x i16> %tmp43 to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp46 +} |