implement rdar://6653118 - fastisel should fold loads where possible.

Since mem2reg isn't run at -O0, we get a ton of reloads from the stack, for example, before, this code: int foo(int x, int y, int z) { return x+y+z; } used to compile into: _foo: ## @foo subq $12, %rsp movl %edi, 8(%rsp) movl %esi, 4(%rsp) movl %edx, (%rsp) movl 8(%rsp), %edx movl 4(%rsp), %esi addl %edx, %esi movl (%rsp), %edx addl %esi, %edx movl %edx, %eax addq $12, %rsp ret Now we produce: _foo: ## @foo subq $12, %rsp movl %edi, 8(%rsp) movl %esi, 4(%rsp) movl %edx, (%rsp) movl 8(%rsp), %edx addl 4(%rsp), %edx ## Folded load addl (%rsp), %edx ## Folded load movl %edx, %eax addq $12, %rsp ret Fewer instructions and less register use = faster compiles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113102 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2010-09-05 02:18:34 +0000
committer: Chris Lattner <sabre@nondot.org> 2010-09-05 02:18:34 +0000
commit: 083b9e18e561b1b7f00a159a6936c7b81e3c8a2b (patch)
tree: 54525601bcdb388c49b9d014c90e6cbe8dbc1467 /lib/Target/X86/X86FastISel.cpp
parent: fa2d83d5a9a3fabe46c671ca2d54625b26b0e637 (diff)
download: external_llvm-083b9e18e561b1b7f00a159a6936c7b81e3c8a2b.zip
external_llvm-083b9e18e561b1b7f00a159a6936c7b81e3c8a2b.tar.gz
external_llvm-083b9e18e561b1b7f00a159a6936c7b81e3c8a2b.tar.bz2
1 files changed, 35 insertions, 0 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 0c70eec..9390ba9 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -63,6 +63,13 @@ public:
 
   virtual bool TargetSelectInstruction(const Instruction *I);
 
+  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// vreg is being provided by the specified load instruction.  If possible,
+  /// try to fold the load as an operand to the instruction, returning true if
+  /// possible.
+  virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                             const LoadInst *LI);
+  
 #include "X86GenFastISel.inc"
 
 private:
@@ -1941,6 +1948,34 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
   return ResultReg;
 }
 
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction.  If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// possible.
+bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                                const LoadInst *LI) {
+  X86AddressMode AM;
+  if (!X86SelectAddress(LI->getOperand(0), AM))
+    return false;
+  
+  X86InstrInfo &XII = (X86InstrInfo&)TII;
+  
+  unsigned Size = TD.getTypeAllocSize(LI->getType());
+  unsigned Alignment = LI->getAlignment();
+
+  SmallVector<MachineOperand, 8> AddrOps;
+  AM.getFullAddress(AddrOps);
+  
+  MachineInstr *Result =
+    XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+  if (Result == 0) return false;
+  
+  MI->getParent()->insert(MI, Result);
+  MI->eraseFromParent();
+  return true;
+}
+
+
 namespace llvm {
   llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
     return new X86FastISel(funcInfo);
author	Chris Lattner <sabre@nondot.org>	2010-09-05 02:18:34 +0000
committer	Chris Lattner <sabre@nondot.org>	2010-09-05 02:18:34 +0000
commit	083b9e18e561b1b7f00a159a6936c7b81e3c8a2b (patch)
tree	54525601bcdb388c49b9d014c90e6cbe8dbc1467 /lib/Target/X86/X86FastISel.cpp
parent	fa2d83d5a9a3fabe46c671ca2d54625b26b0e637 (diff)
download	external_llvm-083b9e18e561b1b7f00a159a6936c7b81e3c8a2b.zip external_llvm-083b9e18e561b1b7f00a159a6936c7b81e3c8a2b.tar.gz external_llvm-083b9e18e561b1b7f00a159a6936c7b81e3c8a2b.tar.bz2