aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp13
-rw-r--r--test/CodeGen/X86/2008-10-27-StackRealignment.ll22
2 files changed, 31 insertions, 4 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 3dbedba..a10f8a8 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -850,10 +850,15 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
MVT
X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
bool isSrcConst, bool isSrcStr) const {
- if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
- return MVT::v4i32;
- if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
- return MVT::v4f32;
+ // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
+ // linux. This is because the stack realignment code can't handle certain
+ // cases like PR2962. This should be removed when PR2962 is fixed.
+ if (Subtarget->getStackAlignment() >= 16) {
+ if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
+ return MVT::v4i32;
+ if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
+ return MVT::v4f32;
+ }
if (Subtarget->is64Bit() && Size >= 8)
return MVT::i64;
return MVT::i32;
diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
new file mode 100644
index 0000000..d8b0e70
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
@@ -0,0 +1,22 @@
+; Linux doesn't support stack realignment for functions with allocas (PR2888).
+; Until it does, we shouldn't use movaps to access the stack. On targets with
+; sufficiently aligned stack (e.g. darwin) we should.
+
+; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -mcpu=yonah | not grep movaps
+; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
+
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @foo(i32 %t) nounwind {
+ %tmp1210 = alloca i8, i32 32, align 4
+ call void @llvm.memset.i64(i8* %tmp1210, i8 0, i64 32, i32 4)
+
+ %x = alloca i8, i32 %t
+ call void @dummy(i8* %x)
+ ret void
+}
+
+declare void @dummy(i8* %x)
+declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind