diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-08-01 15:23:42 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-08-01 15:23:42 +0000 |
commit | 692ee102ebef535d311c35d53457028083e5c5be (patch) | |
tree | 5966632bb87e4120a27dadfce4187535429a4275 /lib/Target/R600/R600ISelLowering.cpp | |
parent | 98b357e1cd0d41108e6011725dad6a6dbf208a38 (diff) | |
download | external_llvm-692ee102ebef535d311c35d53457028083e5c5be.zip external_llvm-692ee102ebef535d311c35d53457028083e5c5be.tar.gz external_llvm-692ee102ebef535d311c35d53457028083e5c5be.tar.bz2 |
R600: Add 64-bit float load/store support
* Added R600_Reg64 class
* Added T#Index#.XY registers definition
* Added v2i32 register reads from parameter and global space
* Added f32 and i32 elements extraction from v2f32 and v2i32
* Added v2i32 -> v2f32 conversions
Tom Stellard:
- Mark vec2 operations as expand. The addition of a vec2 register
class made them all legal.
Patch by: Dmitry Cherkassov
Signed-off-by: Dmitry Cherkassov <dcherkassov@gmail.com>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187582 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/R600/R600ISelLowering.cpp')
-rw-r--r-- | lib/Target/R600/R600ISelLowering.cpp | 21 |
1 files changed, 18 insertions, 3 deletions
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 303c0e1..ce6ac89 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -33,17 +33,25 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); + addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass); + addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass); + computeRegisterProperties(); setOperationAction(ISD::FADD, MVT::v4f32, Expand); + setOperationAction(ISD::FADD, MVT::v2f32, Expand); setOperationAction(ISD::FMUL, MVT::v4f32, Expand); + setOperationAction(ISD::FMUL, MVT::v2f32, Expand); setOperationAction(ISD::FDIV, MVT::v4f32, Expand); + setOperationAction(ISD::FDIV, MVT::v2f32, Expand); setOperationAction(ISD::FSUB, MVT::v4f32, Expand); + setOperationAction(ISD::FSUB, MVT::v2f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Custom); setOperationAction(ISD::FSIN, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::v4i32, Expand); + setOperationAction(ISD::SETCC, MVT::v2i32, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); @@ -66,7 +74,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : // Legalize loads and stores to the private address space. setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::LOAD, MVT::v2i32, Expand); + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); @@ -74,7 +82,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); setOperationAction(ISD::STORE, MVT::i8, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::v2i32, Expand); + setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); setOperationAction(ISD::LOAD, MVT::i32, Custom); @@ -170,6 +178,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( } case AMDGPU::RAT_WRITE_CACHELESS_32_eg: + case AMDGPU::RAT_WRITE_CACHELESS_64_eg: case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; @@ -1129,7 +1138,13 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32)); Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); } - Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4); + EVT NewVT = MVT::v4i32; + unsigned NumElements = 4; + if (VT.isVector()) { + NewVT = VT; + NumElements = VT.getVectorNumElements(); + } + Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements); } else { // non constant ptr cant be folded, keeps it as a v4f32 load Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, |