aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/R600/SIFoldOperands.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600/SIFoldOperands.cpp')
-rw-r--r--lib/Target/R600/SIFoldOperands.cpp287
1 files changed, 287 insertions, 0 deletions
diff --git a/lib/Target/R600/SIFoldOperands.cpp b/lib/Target/R600/SIFoldOperands.cpp
new file mode 100644
index 0000000..ae4b05d
--- /dev/null
+++ b/lib/Target/R600/SIFoldOperands.cpp
@@ -0,0 +1,287 @@
+//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "si-fold-operands"
+using namespace llvm;
+
+namespace {
+
+class SIFoldOperands : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIFoldOperands() : MachineFunctionPass(ID) {
+ initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "SI Fold Operands";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+struct FoldCandidate {
+ MachineInstr *UseMI;
+ unsigned UseOpNo;
+ MachineOperand *OpToFold;
+ uint64_t ImmToFold;
+
+ FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
+ UseMI(MI), UseOpNo(OpNo) {
+
+ if (FoldOp->isImm()) {
+ OpToFold = nullptr;
+ ImmToFold = FoldOp->getImm();
+ } else {
+ assert(FoldOp->isReg());
+ OpToFold = FoldOp;
+ }
+ }
+
+ bool isImm() const {
+ return !OpToFold;
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
+ "SI Fold Operands", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,
+ "SI Fold Operands", false, false)
+
+char SIFoldOperands::ID = 0;
+
+char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
+
+FunctionPass *llvm::createSIFoldOperandsPass() {
+ return new SIFoldOperands();
+}
+
+static bool isSafeToFold(unsigned Opcode) {
+ switch(Opcode) {
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_B64_PSEUDO:
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::COPY:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool updateOperand(FoldCandidate &Fold,
+ const TargetRegisterInfo &TRI) {
+ MachineInstr *MI = Fold.UseMI;
+ MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
+ assert(Old.isReg());
+
+ if (Fold.isImm()) {
+ Old.ChangeToImmediate(Fold.ImmToFold);
+ return true;
+ }
+
+ MachineOperand *New = Fold.OpToFold;
+ if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
+ TargetRegisterInfo::isVirtualRegister(New->getReg())) {
+ Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
+ return true;
+ }
+
+ // FIXME: Handle physical registers.
+
+ return false;
+}
+
+static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
+ MachineInstr *MI, unsigned OpNo,
+ MachineOperand *OpToFold,
+ const SIInstrInfo *TII) {
+ if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
+ // Operand is not legal, so try to commute the instruction to
+ // see if this makes it possible to fold.
+ unsigned CommuteIdx0;
+ unsigned CommuteIdx1;
+ bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
+
+ if (CanCommute) {
+ if (CommuteIdx0 == OpNo)
+ OpNo = CommuteIdx1;
+ else if (CommuteIdx1 == OpNo)
+ OpNo = CommuteIdx0;
+ }
+
+ if (!CanCommute || !TII->commuteInstruction(MI))
+ return false;
+
+ if (!TII->isOperandLegal(MI, OpNo, OpToFold))
+ return false;
+ }
+
+ FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
+ return true;
+}
+
+bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ MachineBasicBlock::iterator I, Next;
+ for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ Next = std::next(I);
+ MachineInstr &MI = *I;
+
+ if (!isSafeToFold(MI.getOpcode()))
+ continue;
+
+ unsigned OpSize = TII->getOpSize(MI, 1);
+ MachineOperand &OpToFold = MI.getOperand(1);
+ bool FoldingImm = OpToFold.isImm();
+
+ // FIXME: We could also be folding things like FrameIndexes and
+ // TargetIndexes.
+ if (!FoldingImm && !OpToFold.isReg())
+ continue;
+
+ // Folding immediates with more than one use will increase program size.
+ // FIXME: This will also reduce register usage, which may be better
+ // in some cases. A better heuristic is needed.
+ if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
+ !MRI.hasOneUse(MI.getOperand(0).getReg()))
+ continue;
+
+ // FIXME: Fold operands with subregs.
+ if (OpToFold.isReg() &&
+ (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
+ OpToFold.getSubReg()))
+ continue;
+
+ std::vector<FoldCandidate> FoldList;
+ for (MachineRegisterInfo::use_iterator
+ Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
+ Use != E; ++Use) {
+
+ MachineInstr *UseMI = Use->getParent();
+ const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
+
+ // FIXME: Fold operands with subregs.
+ if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
+ UseOp.isImplicit())) {
+ continue;
+ }
+
+ APInt Imm;
+
+ if (FoldingImm) {
+ unsigned UseReg = UseOp.getReg();
+ const TargetRegisterClass *UseRC
+ = TargetRegisterInfo::isVirtualRegister(UseReg) ?
+ MRI.getRegClass(UseReg) :
+ TRI.getRegClass(UseReg);
+
+ Imm = APInt(64, OpToFold.getImm());
+
+ // Split 64-bit constants into 32-bits for folding.
+ if (UseOp.getSubReg()) {
+ if (UseRC->getSize() != 8)
+ continue;
+
+ if (UseOp.getSubReg() == AMDGPU::sub0) {
+ Imm = Imm.getLoBits(32);
+ } else {
+ assert(UseOp.getSubReg() == AMDGPU::sub1);
+ Imm = Imm.getHiBits(32);
+ }
+ }
+
+ // In order to fold immediates into copies, we need to change the
+ // copy to a MOV.
+ if (UseMI->getOpcode() == AMDGPU::COPY) {
+ unsigned DestReg = UseMI->getOperand(0).getReg();
+ const TargetRegisterClass *DestRC
+ = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+ MRI.getRegClass(DestReg) :
+ TRI.getRegClass(DestReg);
+
+ unsigned MovOp = TII->getMovOpcode(DestRC);
+ if (MovOp == AMDGPU::COPY)
+ continue;
+
+ UseMI->setDesc(TII->get(MovOp));
+ }
+ }
+
+ const MCInstrDesc &UseDesc = UseMI->getDesc();
+
+ // Don't fold into target independent nodes. Target independent opcodes
+ // don't have defined register classes.
+ if (UseDesc.isVariadic() ||
+ UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
+ continue;
+
+ if (FoldingImm) {
+ MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+ tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
+ continue;
+ }
+
+ tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
+
+ // FIXME: We could try to change the instruction from 64-bit to 32-bit
+ // to enable more folding opportunites. The shrink operands pass
+ // already does this.
+ }
+
+ for (FoldCandidate &Fold : FoldList) {
+ if (updateOperand(Fold, TRI)) {
+ // Clear kill flags.
+ if (!Fold.isImm()) {
+ assert(Fold.OpToFold && Fold.OpToFold->isReg());
+ Fold.OpToFold->setIsKill(false);
+ }
+ DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
+ Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
+ }
+ }
+ }
+ }
+ return false;
+}