aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/R600/SIInstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600/SIInstrInfo.cpp')
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp499
1 files changed, 477 insertions, 22 deletions
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index ab55c1b..ab2fe09 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -16,6 +16,7 @@
#include "SIInstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "SIDefines.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -24,12 +25,7 @@ using namespace llvm;
SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
: AMDGPUInstrInfo(tm),
- RI(tm)
- { }
-
-const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const {
- return RI;
-}
+ RI(tm) { }
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
@@ -185,6 +181,67 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
return Opcode;
}
+void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ unsigned KillFlag = isKill ? RegState::Kill : 0;
+
+ if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
+ unsigned Lane = MFI->SpillTracker.getNextLane(MRI);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
+ MFI->SpillTracker.LaneVGPR)
+ .addReg(SrcReg, KillFlag)
+ .addImm(Lane);
+ MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
+ Lane);
+ } else {
+ for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
+ unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg)
+ .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
+ storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i,
+ &AMDGPU::SReg_32RegClass, TRI);
+ }
+ }
+}
+
+void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) {
+ SIMachineFunctionInfo::SpilledReg Spill =
+ MFI->SpillTracker.getSpilledReg(FrameIndex);
+ assert(Spill.VGPR);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg)
+ .addReg(Spill.VGPR)
+ .addImm(Spill.Lane);
+ } else {
+ for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
+ unsigned Flags = RegState::Define;
+ if (i == 0) {
+ Flags |= RegState::Undef;
+ }
+ unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i,
+ &AMDGPU::SReg_32RegClass, TRI);
+ BuildMI(MBB, MI, DL, get(AMDGPU::COPY))
+ .addReg(DestReg, Flags, RI.getSubRegFromChannel(i))
+ .addReg(SubReg);
+ }
+ }
+}
+
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
bool NewMI) const {
@@ -213,8 +270,10 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
return 0;
unsigned Reg = MI->getOperand(1).getReg();
+ unsigned SubReg = MI->getOperand(1).getSubReg();
MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
MI->getOperand(2).ChangeToRegister(Reg, false);
+ MI->getOperand(2).setSubReg(SubReg);
} else {
MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
}
@@ -249,6 +308,30 @@ SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
return RC != &AMDGPU::EXECRegRegClass;
}
+bool
+SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
+ AliasAnalysis *AA) const {
+ switch(MI->getOpcode()) {
+ default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA);
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::V_MOV_B32_e32:
+ return MI->getOperand(1).isImm();
+ }
+}
+
+namespace llvm {
+namespace AMDGPU {
+// Helper function generated by tablegen. We are wrapping this with
+// an SIInstrInfo function that reutrns bool rather than int.
+int isDS(uint16_t Opcode);
+}
+}
+
+bool SIInstrInfo::isDS(uint16_t Opcode) const {
+ return ::AMDGPU::isDS(Opcode) != -1;
+}
+
int SIInstrInfo::isMIMG(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::MIMG;
}
@@ -277,21 +360,40 @@ bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU;
}
+bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
+ int32_t Val = Imm.getSExtValue();
+ if (Val >= -16 && Val <= 64)
+ return true;
+
+ // The actual type of the operand does not seem to matter as long
+ // as the bits match one of the inline immediate values. For example:
+ //
+ // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
+ // so it is a legal inline immediate.
+ //
+ // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
+ // floating-point, so it is a legal inline immediate.
+
+ return (APInt::floatToBits(0.0f) == Imm) ||
+ (APInt::floatToBits(1.0f) == Imm) ||
+ (APInt::floatToBits(-1.0f) == Imm) ||
+ (APInt::floatToBits(0.5f) == Imm) ||
+ (APInt::floatToBits(-0.5f) == Imm) ||
+ (APInt::floatToBits(2.0f) == Imm) ||
+ (APInt::floatToBits(-2.0f) == Imm) ||
+ (APInt::floatToBits(4.0f) == Imm) ||
+ (APInt::floatToBits(-4.0f) == Imm);
+}
+
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
- if(MO.isImm()) {
- return MO.getImm() >= -16 && MO.getImm() <= 64;
- }
+ if (MO.isImm())
+ return isInlineConstant(APInt(32, MO.getImm(), true));
+
if (MO.isFPImm()) {
- return MO.getFPImm()->isExactlyValue(0.0) ||
- MO.getFPImm()->isExactlyValue(0.5) ||
- MO.getFPImm()->isExactlyValue(-0.5) ||
- MO.getFPImm()->isExactlyValue(1.0) ||
- MO.getFPImm()->isExactlyValue(-1.0) ||
- MO.getFPImm()->isExactlyValue(2.0) ||
- MO.getFPImm()->isExactlyValue(-2.0) ||
- MO.getFPImm()->isExactlyValue(4.0) ||
- MO.getFPImm()->isExactlyValue(-4.0);
+ APFloat FpImm = MO.getFPImm()->getValueAPF();
+ return isInlineConstant(FpImm.bitcastToAPInt());
}
+
return false;
}
@@ -306,6 +408,47 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+ // Make sure the number of operands is correct.
+ const MCInstrDesc &Desc = get(Opcode);
+ if (!Desc.isVariadic() &&
+ Desc.getNumOperands() != MI->getNumExplicitOperands()) {
+ ErrInfo = "Instruction has wrong number of operands.";
+ return false;
+ }
+
+ // Make sure the register classes are correct
+ for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
+ switch (Desc.OpInfo[i].OperandType) {
+ case MCOI::OPERAND_REGISTER:
+ break;
+ case MCOI::OPERAND_IMMEDIATE:
+ if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm()) {
+ ErrInfo = "Expected immediate, but got non-immediate";
+ return false;
+ }
+ // Fall-through
+ default:
+ continue;
+ }
+
+ if (!MI->getOperand(i).isReg())
+ continue;
+
+ int RegClass = Desc.OpInfo[i].RegClass;
+ if (RegClass != -1) {
+ unsigned Reg = MI->getOperand(i).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ const TargetRegisterClass *RC = RI.getRegClass(RegClass);
+ if (!RC->contains(Reg)) {
+ ErrInfo = "Operand has incorrect register class.";
+ return false;
+ }
+ }
+ }
+
+
// Verify VOP*
if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
unsigned ConstantBusCount = 0;
@@ -373,10 +516,20 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
case AMDGPU::COPY: return AMDGPU::COPY;
case AMDGPU::PHI: return AMDGPU::PHI;
+ case AMDGPU::S_MOV_B32:
+ return MI.getOperand(1).isReg() ?
+ AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
+ case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
+ case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
+ case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
+ case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
+ case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
+ case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
+ case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
@@ -432,6 +585,84 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
MO.ChangeToRegister(Reg, false);
}
+unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
+ MachineRegisterInfo &MRI,
+ MachineOperand &SuperReg,
+ const TargetRegisterClass *SuperRC,
+ unsigned SubIdx,
+ const TargetRegisterClass *SubRC)
+ const {
+ assert(SuperReg.isReg());
+
+ unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
+ unsigned SubReg = MRI.createVirtualRegister(SubRC);
+
+ // Just in case the super register is itself a sub-register, copy it to a new
+ // value so we don't need to wory about merging its subreg index with the
+ // SubIdx passed to this function. The register coalescer should be able to
+ // eliminate this extra copy.
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
+ NewSuperReg)
+ .addOperand(SuperReg);
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
+ SubReg)
+ .addReg(NewSuperReg, 0, SubIdx);
+ return SubReg;
+}
+
+MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
+ MachineBasicBlock::iterator MII,
+ MachineRegisterInfo &MRI,
+ MachineOperand &Op,
+ const TargetRegisterClass *SuperRC,
+ unsigned SubIdx,
+ const TargetRegisterClass *SubRC) const {
+ if (Op.isImm()) {
+ // XXX - Is there a better way to do this?
+ if (SubIdx == AMDGPU::sub0)
+ return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
+ if (SubIdx == AMDGPU::sub1)
+ return MachineOperand::CreateImm(Op.getImm() >> 32);
+
+ llvm_unreachable("Unhandled register index for immediate");
+ }
+
+ unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
+ SubIdx, SubRC);
+ return MachineOperand::CreateReg(SubReg, false);
+}
+
+unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineBasicBlock::iterator MI,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterClass *RC,
+ const MachineOperand &Op) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned Dst = MRI.createVirtualRegister(RC);
+
+ MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
+ LoDst)
+ .addImm(Op.getImm() & 0xFFFFFFFF);
+ MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
+ HiDst)
+ .addImm(Op.getImm() >> 32);
+
+ BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
+ .addReg(LoDst)
+ .addImm(AMDGPU::sub0)
+ .addReg(HiDst)
+ .addImm(AMDGPU::sub1);
+
+ Worklist.push_back(Lo);
+ Worklist.push_back(Hi);
+
+ return Dst;
+}
+
void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
@@ -549,6 +780,110 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
MI->getOperand(i).setReg(DstReg);
}
}
+
+ // Legalize MUBUF* instructions
+ // FIXME: If we start using the non-addr64 instructions for compute, we
+ // may need to legalize them here.
+
+ int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::srsrc);
+ int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::vaddr);
+ if (SRsrcIdx != -1 && VAddrIdx != -1) {
+ const TargetRegisterClass *VAddrRC =
+ RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
+
+ if(VAddrRC->getSize() == 8 &&
+ MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
+ // We have a MUBUF instruction that uses a 64-bit vaddr register and
+ // srsrc has the incorrect register class. In order to fix this, we
+ // need to extract the pointer from the resource descriptor (srsrc),
+ // add it to the value of vadd, then store the result in the vaddr
+ // operand. Then, we need to set the pointer field of the resource
+ // descriptor to zero.
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
+ MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
+ unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
+ unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+
+ // SRsrcPtrLo = srsrc:sub0
+ SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
+
+ // SRsrcPtrHi = srsrc:sub1
+ SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
+
+ // VAddrLo = vaddr:sub0
+ VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
+ &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
+
+ // VAddrHi = vaddr:sub1
+ VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
+ &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
+
+ // NewVaddrLo = SRsrcPtrLo + VAddrLo
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
+ NewVAddrLo)
+ .addReg(SRsrcPtrLo)
+ .addReg(VAddrLo)
+ .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
+
+ // NewVaddrHi = SRsrcPtrHi + VAddrHi
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
+ NewVAddrHi)
+ .addReg(SRsrcPtrHi)
+ .addReg(VAddrHi)
+ .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
+ .addReg(AMDGPU::VCC, RegState::Implicit);
+
+ // NewVaddr = {NewVaddrHi, NewVaddrLo}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+ NewVAddr)
+ .addReg(NewVAddrLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(NewVAddrHi)
+ .addImm(AMDGPU::sub1);
+
+ // Zero64 = 0
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
+ Zero64)
+ .addImm(0);
+
+ // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ SRsrcFormatLo)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
+
+ // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ SRsrcFormatHi)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
+
+ // NewSRsrc = {Zero64, SRsrcFormat}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+ NewSRsrc)
+ .addReg(Zero64)
+ .addImm(AMDGPU::sub0_sub1)
+ .addReg(SRsrcFormatLo)
+ .addImm(AMDGPU::sub2)
+ .addReg(SRsrcFormatHi)
+ .addImm(AMDGPU::sub3);
+
+ // Update the instruction to use NewVaddr
+ MI->getOperand(VAddrIdx).setReg(NewVAddr);
+ // Update the instruction to use NewSRsrc
+ MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
+ }
+ }
}
void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
@@ -557,11 +892,68 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
while (!Worklist.empty()) {
MachineInstr *Inst = Worklist.pop_back_val();
- unsigned NewOpcode = getVALUOp(*Inst);
- if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
+ MachineBasicBlock *MBB = Inst->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ // Handle some special cases
+ switch(Inst->getOpcode()) {
+ case AMDGPU::S_MOV_B64: {
+ DebugLoc DL = Inst->getDebugLoc();
+
+ // If the source operand is a register we can replace this with a
+ // copy.
+ if (Inst->getOperand(1).isReg()) {
+ MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
+ .addOperand(Inst->getOperand(0))
+ .addOperand(Inst->getOperand(1));
+ Worklist.push_back(Copy);
+ } else {
+ // Otherwise, we need to split this into two movs, because there is
+ // no 64-bit VALU move instruction.
+ unsigned Reg = Inst->getOperand(0).getReg();
+ unsigned Dst = split64BitImm(Worklist,
+ Inst,
+ MRI,
+ MRI.getRegClass(Reg),
+ Inst->getOperand(1));
+ MRI.replaceRegWith(Reg, Dst);
+ }
+ Inst->eraseFromParent();
+ continue;
+ }
+ case AMDGPU::S_AND_B64:
+ splitScalar64BitOp(Worklist, Inst, AMDGPU::S_AND_B32);
+ Inst->eraseFromParent();
continue;
- MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo();
+ case AMDGPU::S_OR_B64:
+ splitScalar64BitOp(Worklist, Inst, AMDGPU::S_OR_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_XOR_B64:
+ splitScalar64BitOp(Worklist, Inst, AMDGPU::S_XOR_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_NOT_B64:
+ splitScalar64BitOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_BFE_U64:
+ case AMDGPU::S_BFE_I64:
+ case AMDGPU::S_BFM_B64:
+ llvm_unreachable("Moving this op to VALU not implemented");
+ }
+
+ unsigned NewOpcode = getVALUOp(*Inst);
+ if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
+ // We cannot move this instruction to the VALU, so we should try to
+ // legalize its operands instead.
+ legalizeOperands(Inst);
+ continue;
+ }
// Use the new VALU Opcode.
const MCInstrDesc &NewDesc = get(NewOpcode);
@@ -620,7 +1012,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
E = MRI.use_end(); I != E; ++I) {
- MachineInstr &UseMI = *I;
+ MachineInstr &UseMI = *I->getParent();
if (!canReadVGPR(UseMI, I.getOperandNo())) {
Worklist.push_back(&UseMI);
}
@@ -642,6 +1034,69 @@ const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
return &AMDGPU::VReg_32RegClass;
}
+void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst,
+ unsigned Opcode) const {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ MachineOperand &Dest = Inst->getOperand(0);
+ MachineOperand &Src0 = Inst->getOperand(1);
+ MachineOperand &Src1 = Inst->getOperand(2);
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineBasicBlock::iterator MII = Inst;
+
+ const MCInstrDesc &InstDesc = get(Opcode);
+ const TargetRegisterClass *Src0RC = Src0.isReg() ?
+ MRI.getRegClass(Src0.getReg()) :
+ &AMDGPU::SGPR_32RegClass;
+
+ const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
+ const TargetRegisterClass *Src1RC = Src1.isReg() ?
+ MRI.getRegClass(Src1.getReg()) :
+ &AMDGPU::SGPR_32RegClass;
+
+ const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
+
+ MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+ AMDGPU::sub0, Src0SubRC);
+ MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
+ AMDGPU::sub0, Src1SubRC);
+
+ const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
+ const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+
+ unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
+ MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
+ .addOperand(SrcReg0Sub0)
+ .addOperand(SrcReg1Sub0);
+
+ MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+ AMDGPU::sub1, Src0SubRC);
+ MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
+ AMDGPU::sub1, Src1SubRC);
+
+ unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
+ MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
+ .addOperand(SrcReg0Sub1)
+ .addOperand(SrcReg1Sub1);
+
+ unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+ BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
+
+ MRI.replaceRegWith(Dest.getReg(), FullDestReg);
+
+ // Try to legalize the operands in case we need to swap the order to keep it
+ // valid.
+ Worklist.push_back(LoHalf);
+ Worklist.push_back(HiHalf);
+}
+
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,