diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 13 | ||||
-rw-r--r-- | test/CodeGen/X86/2008-10-27-StackRealignment.ll | 22 |
2 files changed, 31 insertions, 4 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3dbedba..a10f8a8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -850,10 +850,15 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { MVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, bool isSrcConst, bool isSrcStr) const { - if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16) - return MVT::v4i32; - if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16) - return MVT::v4f32; + // FIXME: This turns off use of xmm stores for memset/memcpy on targets like + // linux. This is because the stack realignment code can't handle certain + // cases like PR2962. This should be removed when PR2962 is fixed. + if (Subtarget->getStackAlignment() >= 16) { + if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16) + return MVT::v4i32; + if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16) + return MVT::v4f32; + } if (Subtarget->is64Bit() && Size >= 8) return MVT::i64; return MVT::i32; diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll new file mode 100644 index 0000000..d8b0e70 --- /dev/null +++ b/test/CodeGen/X86/2008-10-27-StackRealignment.ll @@ -0,0 +1,22 @@ +; Linux doesn't support stack realignment for functions with allocas (PR2888). +; Until it does, we shouldn't use movaps to access the stack. On targets with +; sufficiently aligned stack (e.g. darwin) we should. + +; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -mcpu=yonah | not grep movaps +; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2 + + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" +target triple = "i386-pc-linux-gnu" + +define void @foo(i32 %t) nounwind { + %tmp1210 = alloca i8, i32 32, align 4 + call void @llvm.memset.i64(i8* %tmp1210, i8 0, i64 32, i32 4) + + %x = alloca i8, i32 %t + call void @dummy(i8* %x) + ret void +} + +declare void @dummy(i8* %x) +declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind |