diff options
-rw-r--r-- | lib/Target/ARM/ARMFrameLowering.cpp | 6 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 49 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMMachineFunctionInfo.h | 17 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb1FrameLowering.cpp | 6 | ||||
-rw-r--r-- | test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll | 31 | ||||
-rw-r--r-- | test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll | 25 |
8 files changed, 133 insertions, 19 deletions
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 483802b..c8637be 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -141,7 +141,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -357,7 +358,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c9ee5fb..a1443d1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2630,6 +2630,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, void ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, unsigned InRegsParamRecordIdx, + unsigned ArgSize, unsigned &ArgRegsSize, unsigned &ArgRegsSaveSize) const { @@ -2648,7 +2649,29 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); ArgRegsSize = NumGPRs * 4; - ArgRegsSaveSize = (ArgRegsSize + Align - 1) & ~(Align - 1); + + // If parameter is split between stack and GPRs... + if (NumGPRs && Align == 8 && + (ArgRegsSize < ArgSize || + InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { + // Add padding for part of param recovered from GPRs, so + // its last byte must be at address K*8 - 1. + // We need to do it, since remained (stack) part of parameter has + // stack alignment, and we need to "attach" "GPRs head" without gaps + // to it: + // Stack: + // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes... + // [ [padding] [GPRs head] ] [ Tail passed via stack .... + // + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned Padding = + ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) - + (ArgRegsSize + AFI->getArgRegsSaveSize()); + ArgRegsSaveSize = ArgRegsSize + Padding; + } else + // We don't need to extend regs save size for byval parameters if they + // are passed via GPRs only. + ArgRegsSaveSize = ArgRegsSize; } // The remaining GPRs hold either the beginning of variable-argument @@ -2666,6 +2689,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, unsigned InRegsParamRecordIdx, unsigned OffsetFromOrigArg, unsigned ArgOffset, + unsigned ArgSize, bool ForceMutable) const { // Currently, two use-cases possible: @@ -2695,7 +2719,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, } unsigned ArgRegsSize, ArgRegsSaveSize; - computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgRegsSize, ArgRegsSaveSize); + computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize, + ArgRegsSize, ArgRegsSaveSize); // Store any by-val regs to their spots on the stack so that they may be // loaded by deferencing the result of formal parameter pointer or va_next. @@ -2703,9 +2728,17 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, // was initialized, it can't be initialized again. if (ArgRegsSaveSize) { + unsigned Padding = ArgRegsSaveSize - ArgRegsSize; + + if (Padding) { + assert(AFI->getStoredByValParamsPadding() == 0 && + "The only parameter may be padded."); + AFI->setStoredByValParamsPadding(Padding); + } + int FrameIndex = MFI->CreateFixedObject( ArgRegsSaveSize, - ArgOffset + ArgRegsSaveSize - ArgRegsSize, + Padding + ArgOffset, false); SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); @@ -2737,7 +2770,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, return FrameIndex; } else // This will point to the next argument passed via stack. - return MFI->CreateFixedObject(4, ArgOffset, !ForceMutable); + return MFI->CreateFixedObject( + 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable); } // Setup stack frame, the va_list pointer will start from. @@ -2756,7 +2790,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // argument passed via stack. int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), - 0, ArgOffset, ForceMutable); + 0, ArgOffset, 0, ForceMutable); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -2896,12 +2930,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, CurByValIndex, Ins[VA.getValNo()].PartOffset, VA.getLocMemOffset(), + Flags.getByValSize(), true /*force mutable frames*/); InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); CCInfo.nextInRegsParam(); } else { + unsigned FIOffset = VA.getLocMemOffset() + + AFI->getStoredByValParamsPadding(); int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, - VA.getLocMemOffset(), true); + FIOffset, true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 1d1b42a..8aa926f 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -480,6 +480,7 @@ namespace llvm { unsigned InRegsParamRecordIdx, unsigned OffsetFromOrigArg, unsigned ArgOffset, + unsigned ArgSize, bool ForceMutable) const; void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, @@ -489,6 +490,7 @@ namespace llvm { void computeRegArea(CCState &CCInfo, MachineFunction &MF, unsigned InRegsParamRecordIdx, + unsigned ArgSize, unsigned &ArgRegsSize, unsigned &ArgRegsSaveSize) const; diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index f4248fc..d9ec4fd 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -36,6 +36,13 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// 'isThumb'. bool hasThumb2; + /// StByValParamsPadding - For parameter that is split between + /// GPRs and memory; while recovering GPRs part, when + /// StackAlignment == 8, and GPRs-part-size mod 8 != 0, + /// we need to insert gap before parameter start address. It allows to + /// "attach" GPR-part to the part that was passed via stack. + unsigned StByValParamsPadding; + /// VarArgsRegSaveSize - Size of the register save area for vararg functions. /// unsigned ArgRegsSaveSize; @@ -129,6 +136,7 @@ public: explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), + StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), @@ -141,7 +149,14 @@ public: bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } bool isThumb2Function() const { return isThumb && hasThumb2; } - unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; } + unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; } + void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; } + + unsigned getArgRegsSaveSize(unsigned Align = 0) const { + if (!Align) + return ArgRegsSaveSize; + return (ArgRegsSaveSize + Align - 1) & ~(Align - 1); + } void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; } bool hasStackFrame() const { return HasStackFrame; } diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 1e2a8b0..db49db8 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -88,7 +88,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -249,7 +250,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); diff --git a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll index 4a5ca9d..80b9d28 100644 --- a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll +++ b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll @@ -4,24 +4,24 @@ ;CHECK: foo: ;CHECK: sub sp, sp, #8 ;CHECK: push {r11, lr} -;CHECK: str r0, [sp, #12] -;CHECK: add r0, sp, #12 +;CHECK: str r0, [sp, #8] +;CHECK: add r0, sp, #8 ;CHECK: bl fooUseParam ;CHECK: pop {r11, lr} ;CHECK: add sp, sp, #8 ;CHECK: mov pc, lr ;CHECK: foo2: -;CHECK: sub sp, sp, #16 +;CHECK: sub sp, sp, #8 ;CHECK: push {r11, lr} -;CHECK: str r0, [sp, #12] -;CHECK: add r0, sp, #12 -;CHECK: str r2, [sp, #16] +;CHECK: str r0, [sp, #8] +;CHECK: add r0, sp, #8 +;CHECK: str r2, [sp, #12] ;CHECK: bl fooUseParam -;CHECK: add r0, sp, #16 +;CHECK: add r0, sp, #12 ;CHECK: bl fooUseParam ;CHECK: pop {r11, lr} -;CHECK: add sp, sp, #16 +;CHECK: add sp, sp, #8 ;CHECK: mov pc, lr ;CHECK: doFoo: diff --git a/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll b/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll new file mode 100644 index 0000000..7bf03a1 --- /dev/null +++ b/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll @@ -0,0 +1,31 @@ +;PR15293: ARM codegen ice - expected larger existing stack allocation +;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s + +%struct.S227 = type { [49 x i32], i32 } + +define void @check227( + i32 %b, + %struct.S227* byval nocapture %arg0, + %struct.S227* %arg1) { +; b --> R0 +; arg0 --> [R1, R2, R3, SP+0 .. SP+188) +; arg1 --> SP+188 + +entry: + +;CHECK: sub sp, sp, #16 +;CHECK: push {r11, lr} +;CHECK: add r0, sp, #12 +;CHECK: stm r0, {r1, r2, r3} +;CHECK: ldr r0, [sp, #212] +;CHECK: bl useInt +;CHECK: pop {r11, lr} +;CHECK: add sp, sp, #16 + + %0 = ptrtoint %struct.S227* %arg1 to i32 + tail call void @useInt(i32 %0) + ret void +} + +declare void @useInt(i32) + diff --git a/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll b/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll new file mode 100644 index 0000000..438b021a --- /dev/null +++ b/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll @@ -0,0 +1,25 @@ +;PR15293: ARM codegen ice - expected larger existing stack allocation +;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s + +%struct4bytes = type { i32 } +%struct20bytes = type { i32, i32, i32, i32, i32 } + +define void @foo(%struct4bytes* byval %p0, ; --> R0 + %struct20bytes* byval %p1 ; --> R1,R2,R3, [SP+0 .. SP+8) +) { +;CHECK: sub sp, sp, #16 +;CHECK: push {r11, lr} +;CHECK: add r11, sp, #8 +;CHECK: stm r11, {r0, r1, r2, r3} +;CHECK: add r0, sp, #12 +;CHECK: bl useInt +;CHECK: pop {r11, lr} +;CHECK: add sp, sp, #16 + + %1 = ptrtoint %struct20bytes* %p1 to i32 + tail call void @useInt(i32 %1) + ret void +} + +declare void @useInt(i32) + |