From beac75da3784929aee9f0357fc5cd76d49d6c3d7 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 5 Sep 2010 02:18:34 +0000 Subject: implement rdar://6653118 - fastisel should fold loads where possible. Since mem2reg isn't run at -O0, we get a ton of reloads from the stack, for example, before, this code: int foo(int x, int y, int z) { return x+y+z; } used to compile into: _foo: ## @foo subq $12, %rsp movl %edi, 8(%rsp) movl %esi, 4(%rsp) movl %edx, (%rsp) movl 8(%rsp), %edx movl 4(%rsp), %esi addl %edx, %esi movl (%rsp), %edx addl %esi, %edx movl %edx, %eax addq $12, %rsp ret Now we produce: _foo: ## @foo subq $12, %rsp movl %edi, 8(%rsp) movl %esi, 4(%rsp) movl %edx, (%rsp) movl 8(%rsp), %edx addl 4(%rsp), %edx ## Folded load addl (%rsp), %edx ## Folded load movl %edx, %eax addq $12, %rsp ret Fewer instructions and less register use = faster compiles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113102 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 35 +++++++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrBuilder.h | 32 +++++++++++++++++++++++++++++--- lib/Target/X86/X86InstrInfo.h | 12 ++++++------ 3 files changed, 70 insertions(+), 9 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 0c70eec..9390ba9 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -63,6 +63,13 @@ public: virtual bool TargetSelectInstruction(const Instruction *I); + /// TryToFoldLoad - The specified machine instr operand is a vreg, and that + /// vreg is being provided by the specified load instruction. If possible, + /// try to fold the load as an operand to the instruction, returning true if + /// possible. + virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI); + #include "X86GenFastISel.inc" private: @@ -1941,6 +1948,34 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { return ResultReg; } +/// TryToFoldLoad - The specified machine instr operand is a vreg, and that +/// vreg is being provided by the specified load instruction. If possible, +/// try to fold the load as an operand to the instruction, returning true if +/// possible. +bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) { + X86AddressMode AM; + if (!X86SelectAddress(LI->getOperand(0), AM)) + return false; + + X86InstrInfo &XII = (X86InstrInfo&)TII; + + unsigned Size = TD.getTypeAllocSize(LI->getType()); + unsigned Alignment = LI->getAlignment(); + + SmallVector AddrOps; + AM.getFullAddress(AddrOps); + + MachineInstr *Result = + XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment); + if (Result == 0) return false; + + MI->getParent()->insert(MI, Result); + MI->eraseFromParent(); + return true; +} + + namespace llvm { llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { return new X86FastISel(funcInfo); diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 2a6a71d..407b97e 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -56,6 +56,31 @@ struct X86AddressMode { : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) { Base.Reg = 0; } + + + void getFullAddress(SmallVectorImpl &MO) { + assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8); + + if (BaseType == X86AddressMode::RegBase) + MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false, + false, false, false, 0, false)); + else { + assert(BaseType == X86AddressMode::FrameIndexBase); + MO.push_back(MachineOperand::CreateFI(Base.FrameIndex)); + } + + MO.push_back(MachineOperand::CreateImm(Scale)); + MO.push_back(MachineOperand::CreateReg(IndexReg, false, false, + false, false, false, 0, false)); + + if (GV) + MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags)); + else + MO.push_back(MachineOperand::CreateImm(Disp)); + + MO.push_back(MachineOperand::CreateReg(0, false, false, + false, false, false, 0, false)); + } }; /// addDirectMem - This function is used to add a direct memory reference to the @@ -101,10 +126,11 @@ addFullAddress(const MachineInstrBuilder &MIB, if (AM.BaseType == X86AddressMode::RegBase) MIB.addReg(AM.Base.Reg); - else if (AM.BaseType == X86AddressMode::FrameIndexBase) + else { + assert(AM.BaseType == X86AddressMode::FrameIndexBase); MIB.addFrameIndex(AM.Base.FrameIndex); - else - assert (0); + } + MIB.addImm(AM.Scale).addReg(AM.IndexReg); if (AM.GV) MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index f336206..eb0e4b8 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -845,18 +845,18 @@ public: /// SetSSEDomain - Set the SSEDomain of MI. void SetSSEDomain(MachineInstr *MI, unsigned Domain) const; + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + unsigned OpNum, + const SmallVectorImpl &MOs, + unsigned Size, unsigned Alignment) const; + private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - unsigned OpNum, - const SmallVectorImpl &MOs, - unsigned Size, unsigned Alignment) const; - /// isFrameOperand - Return true and the FrameIndex if the specified /// operand and follow operands form a reference to the stack frame. bool isFrameOperand(const MachineInstr *MI, unsigned int Op, -- cgit v1.1