diff options
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 58 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/vec-abi-align.ll | 64 |
2 files changed, 113 insertions, 9 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index b4ba527..34571e2 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -578,24 +578,48 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } } +/// getMaxByValAlign - Helper for getByValTypeAlignment to determine +/// the desired ByVal argument alignment. +static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, + unsigned MaxMaxAlign) { + if (MaxAlign == MaxMaxAlign) + return; + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { + if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) + MaxAlign = 32; + else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) + MaxAlign = 16; + } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + unsigned EltAlign = 0; + getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + } else if (StructType *STy = dyn_cast<StructType>(Ty)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + unsigned EltAlign = 0; + getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + if (MaxAlign == MaxMaxAlign) + break; + } + } +} + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { const TargetMachine &TM = getTargetMachine(); // Darwin passes everything on 4 byte boundary. - if (TM.getSubtarget<PPCSubtarget>().isDarwin()) + if (PPCSubTarget.isDarwin()) return 4; // 16byte and wider vectors are passed on 16byte boundary. - if (VectorType *VTy = dyn_cast<VectorType>(Ty)) - if (VTy->getBitWidth() >= 128) - return 16; - // The rest is 8 on PPC64 and 4 on PPC32 boundary. - if (PPCSubTarget.isPPC64()) - return 8; - - return 4; + unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4; + if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX()) + getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16); + return Align; } const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -2281,6 +2305,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( InVals.push_back(FIN); continue; } + + unsigned BVAlign = Flags.getByValAlign(); + if (BVAlign > 8) { + ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; + CurArgOffset = ArgOffset; + } + // All aggregates smaller than 8 bytes must be passed right-justified. if (ObjSize < PtrByteSize) CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); @@ -3870,6 +3901,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (Size == 0) continue; + unsigned BVAlign = Flags.getByValAlign(); + if (BVAlign > 8) { + if (BVAlign % PtrByteSize != 0) + llvm_unreachable( + "ByVal alignment is not a multiple of the pointer size"); + + ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; + } + // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); diff --git a/test/CodeGen/PowerPC/vec-abi-align.ll b/test/CodeGen/PowerPC/vec-abi-align.ll new file mode 100644 index 0000000..3d6129b --- /dev/null +++ b/test/CodeGen/PowerPC/vec-abi-align.ll @@ -0,0 +1,64 @@ +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.s2 = type { i64, <4 x float> } + +@ve = external global <4 x float> +@n = external global i64 + +; Function Attrs: nounwind +define void @test1(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, <4 x float> inreg %vs.coerce) #0 { +entry: + store <4 x float> %vs.coerce, <4 x float>* @ve, align 16, !tbaa !0 + ret void + +; CHECK-LABEL: @test1 +; CHECK: stvx 2, +; CHECK: blr +} + +; Function Attrs: nounwind +define void @test2(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, %struct.s2* byval nocapture readonly %vs) #0 { +entry: + %m = getelementptr inbounds %struct.s2* %vs, i64 0, i32 0 + %0 = load i64* %m, align 8, !tbaa !2 + store i64 %0, i64* @n, align 8, !tbaa !2 + %v = getelementptr inbounds %struct.s2* %vs, i64 0, i32 1 + %1 = load <4 x float>* %v, align 16, !tbaa !0 + store <4 x float> %1, <4 x float>* @ve, align 16, !tbaa !0 + ret void + +; CHECK-LABEL: @test2 +; CHECK: ld {{[0-9]+}}, 112(1) +; CHECK: li [[REG16:[0-9]+]], 16 +; CHECK: addi [[REGB:[0-9]+]], 1, 112 +; CHECK: lvx 2, [[REGB]], [[REG16]] +; CHECK: blr +} + +; Function Attrs: nounwind +define void @test3(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, %struct.s2* byval nocapture readonly %vs) #0 { +entry: + %m = getelementptr inbounds %struct.s2* %vs, i64 0, i32 0 + %0 = load i64* %m, align 8, !tbaa !2 + store i64 %0, i64* @n, align 8, !tbaa !2 + %v = getelementptr inbounds %struct.s2* %vs, i64 0, i32 1 + %1 = load <4 x float>* %v, align 16, !tbaa !0 + store <4 x float> %1, <4 x float>* @ve, align 16, !tbaa !0 + ret void + +; CHECK-LABEL: @test3 +; CHECK: ld {{[0-9]+}}, 128(1) +; CHECK: li [[REG16:[0-9]+]], 16 +; CHECK: addi [[REGB:[0-9]+]], 1, 128 +; CHECK: lvx 2, [[REGB]], [[REG16]] +; CHECK: blr +} + +attributes #0 = { nounwind } + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA"} +!2 = metadata !{metadata !"long", metadata !0} + |