diff options
-rw-r--r-- | include/llvm/Target/TargetLowering.h | 6 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 13 | ||||
-rw-r--r-- | lib/CodeGen/TargetLoweringBase.cpp | 13 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 4 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUSubtarget.cpp | 4 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 2 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 9 | ||||
-rw-r--r-- | test/CodeGen/R600/32-bit-local-address-space.ll | 42 |
8 files changed, 79 insertions, 14 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 580a358..cd95a73 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -151,7 +151,9 @@ public: // Return the pointer type for the given address space, defaults to // the pointer type from the data layout. // FIXME: The default needs to be removed once all the code is updated. - virtual MVT getPointerTy(uint32_t /*AS*/ = 0) const { return PointerTy; } + virtual MVT getPointerTy(uint32_t /*AS*/ = 0) const; + unsigned getPointerSizeInBits(uint32_t AS = 0) const; + unsigned getPointerTypeSizeInBits(Type *Ty) const; virtual MVT getScalarShiftAmountTy(EVT LHSTy) const; EVT getShiftAmountTy(EVT LHSTy) const; @@ -568,7 +570,7 @@ public: /// otherwise it will assert. EVT getValueType(Type *Ty, bool AllowUnknown = false) const { // Lower scalar pointers to native pointer types. - if (Ty->isPointerTy()) return PointerTy; + if (Ty->isPointerTy()) return getPointerTy(Ty->getPointerAddressSpace()); if (Ty->isVectorTy()) { VectorType *VTy = cast<VectorType>(Ty); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 30fde54..ef3466b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3242,6 +3242,10 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Ty = StTy->getElementType(Field); } else { + uint32_t AS = 0; + if (PointerType *PtrType = dyn_cast<PointerType>(Ty)) { + AS = PtrType->getAddressSpace(); + } Ty = cast<SequentialType>(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. @@ -3251,14 +3255,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { uint64_t Offs = TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; - EVT PTy = TLI->getPointerTy(); + EVT PTy = TLI->getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); if (PtrBits < 64) - OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), - TLI->getPointerTy(), + OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, DAG.getConstant(Offs, MVT::i64)); else - OffsVal = DAG.getIntPtrConstant(Offs); + OffsVal = DAG.getConstant(Offs, PTy); N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); @@ -3266,7 +3269,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } // N = N + Idx * ElementSize; - APInt ElementSize = APInt(TLI->getPointerTy().getSizeInBits(), + APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), TD->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index a2ea35e..7d28ed4 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -767,6 +767,19 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); } +MVT TargetLoweringBase::getPointerTy(uint32_t AS) const { + return MVT::getIntegerVT(getPointerSizeInBits(AS)); +} + +unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const { + return TD->getPointerSizeInBits(AS); +} + +unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { + assert(Ty->isPointerTy()); + return getPointerSizeInBits(Ty->getPointerAddressSpace()); +} + MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { return MVT::getIntegerVT(8*TD->getPointerSize(0)); } diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 692b3b7..56a9a2b 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -233,6 +233,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, const DataLayout *TD = getTargetMachine().getDataLayout(); GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); + + assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS); // XXX: What does the value of G->getOffset() mean? assert(G->getOffset() == 0 && "Do not know what to do with an non-zero offset"); @@ -244,7 +246,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, // XXX: Account for alignment? MFI->LDSSize += Size; - return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32); + return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace())); } void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index 8b2e445..53cfe84 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -97,6 +97,10 @@ AMDGPUSubtarget::getDataLayout() const { DataLayout.append("-p:32:32:32"); } + if (Gen >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + DataLayout.append("-p3:32:32:32"); + } + return DataLayout; } diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index dc721e9..9cbba6c 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -87,7 +87,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setTargetDAGCombine(ISD::SELECT_CC); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 436a2cd..8c52a2e 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1746,14 +1746,13 @@ def : Pat < /********** ======================= **********/ def : Pat < - (local_load i64:$src0), - (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0), - (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0)) + (local_load i32:$src0), + (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0)) >; def : Pat < - (local_store i32:$src1, i64:$src0), - (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0) + (local_store i32:$src1, i32:$src0), + (DS_WRITE_B32 0, $src0, $src1, $src1, 0, 0) >; /********** ================== **********/ diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll new file mode 100644 index 0000000..d19a82e --- /dev/null +++ b/test/CodeGen/R600/32-bit-local-address-space.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s + +; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and +; the global address space(1) uses 64-bit pointers. These tests check to make sure +; the correct pointer size is used for the local address space. + +; The e{{32|64}} suffix on the instructions refers to the encoding size and not +; the size of the operands. The operand size is denoted in the instruction name. +; Instructions with B32, U32, and I32 in their name take 32-bit operands, while +; instructions with B64, U64, and I64 take 64-bit operands. + +; CHECK-LABEL: @local_address_load +; CHECK: V_MOV_B32_e{{32|64}} [[PTR:VGPR[0-9]]] +; CHECK: DS_READ_B32 [[PTR]] +define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { +entry: + %0 = load i32 addrspace(3)* %in + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: @local_address_gep +; CHECK: V_ADD_I32_e{{32|64}} [[PTR:VGPR[0-9]]] +; CHECK: DS_READ_B32 [[PTR]] +define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) { +entry: + %0 = getelementptr i32 addrspace(3)* %in, i32 %offset + %1 = load i32 addrspace(3)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: @local_address_gep_const_offset +; CHECK: V_ADD_I32_e{{32|64}} [[PTR:VGPR[0-9]]] +; CHECK: DS_READ_B32 [[PTR]] +define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { +entry: + %0 = getelementptr i32 addrspace(3)* %in, i32 1 + %1 = load i32 addrspace(3)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} |