aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChad Rosier <mcrosier@apple.com>2012-03-09 02:00:48 +0000
committerChad Rosier <mcrosier@apple.com>2012-03-09 02:00:48 +0000
commitabd6674166d99a8da98269051fbd869d9d8c3ba1 (patch)
tree4b68f0ac12b9221bc2931e98600677d9a545ca4a
parentd04ec0c855176ebddd459c044bdd24f49938fae4 (diff)
downloadexternal_llvm-abd6674166d99a8da98269051fbd869d9d8c3ba1.zip
external_llvm-abd6674166d99a8da98269051fbd869d9d8c3ba1.tar.gz
external_llvm-abd6674166d99a8da98269051fbd869d9d8c3ba1.tar.bz2
Fix a regression from r147481.
Original commit message from r147481: DAGCombine for transforming 128->256 casts into a vmovaps, rather then a vxorps + vinsertf128 pair if the original vector came from a load. Fix: Unaligned loads need to generate a vmovups. rdar://10974078 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152366 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td5
-rw-r--r--lib/Target/X86/X86InstrSSE.td4
-rw-r--r--test/CodeGen/X86/avx-shuffle.ll12
3 files changed, 20 insertions, 1 deletions
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 4f9f089..ae3ed1b 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -218,6 +218,11 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
+// Like 'X86vzload', but always requires 128-bit vector alignment.
+def alignedX86vzload : PatFrag<(ops node:$ptr), (X86vzload node:$ptr), [{
+ return cast<MemSDNode>(N)->getAlignment() >= 16;
+}]>;
+
// Like 'load', but always requires 256-bit vector alignment.
def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 32;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index c6d1d19..36526ad 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4820,8 +4820,10 @@ let Predicates = [HasSSE2], AddedComplexity = 20 in {
}
let Predicates = [HasAVX] in {
-def : Pat<(v4i64 (X86vzload addr:$src)),
+def : Pat<(v4i64 (alignedX86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>;
+def : Pat<(v4i64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>;
}
//===---------------------------------------------------------------------===//
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 947d79f..54f01e9 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -135,3 +135,15 @@ define <4 x i32> @test15(<2 x i32>%x) nounwind readnone {
ret <4 x i32>%x1
}
+; rdar://10974078
+define <8 x float> @test16(float* nocapture %f) nounwind uwtable readonly ssp {
+entry:
+ %0 = bitcast float* %f to <4 x float>*
+ %1 = load <4 x float>* %0, align 8
+; CHECK: test16
+; CHECK: vmovups
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+ %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+ ret <8 x float> %shuffle.i
+}