* Make the previous patch more efficient by not allocating a temporary MachineInstr

to do analysis. *** FOLD getelementptr instructions into loads and stores when possible, making use of some of the crazy X86 addressing modes. For example, the following C++ program fragment: struct complex { double re, im; complex(double r, double i) : re(r), im(i) {} }; inline complex operator+(const complex& a, const complex& b) { return complex(a.re+b.re, a.im+b.im); } complex addone(const complex& arg) { return arg + complex(1,0); } Used to be compiled to: _Z6addoneRK7complex: mov %EAX, DWORD PTR [%ESP + 4] mov %ECX, DWORD PTR [%ESP + 8] *** mov %EDX, %ECX fld QWORD PTR [%EDX] fld1 faddp %ST(1) *** add %ECX, 8 fld QWORD PTR [%ECX] fldz faddp %ST(1) *** mov %ECX, %EAX fxch %ST(1) fstp QWORD PTR [%ECX] *** add %EAX, 8 fstp QWORD PTR [%EAX] ret Now it is compiled to: _Z6addoneRK7complex: mov %EAX, DWORD PTR [%ESP + 4] mov %ECX, DWORD PTR [%ESP + 8] fld QWORD PTR [%ECX] fld1 faddp %ST(1) fld QWORD PTR [%ECX + 8] fldz faddp %ST(1) fxch %ST(1) fstp QWORD PTR [%EAX] fstp QWORD PTR [%EAX + 8] ret Other programs should see similar improvements, across the board. Note that in addition to reducing instruction count, this also reduces register pressure a lot, always a good thing on X86. :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@11819 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2004-02-25 06:13:04 +0000
committer: Chris Lattner <sabre@nondot.org> 2004-02-25 06:13:04 +0000
commit: b6bac51351d2a1a9db76381b92c40ec24cc59e8e (patch)
tree: 8b9a66497deae906c86b158147276240329b75a8 /lib/Target
parent: 2e68037187175945b406a436b00d9d767ee1dfa0 (diff)
download: external_llvm-b6bac51351d2a1a9db76381b92c40ec24cc59e8e.zip
external_llvm-b6bac51351d2a1a9db76381b92c40ec24cc59e8e.tar.gz
external_llvm-b6bac51351d2a1a9db76381b92c40ec24cc59e8e.tar.bz2
2 files changed, 368 insertions, 112 deletions
diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp
index f0e0a5c..d6e3a75 100644
--- a/lib/Target/X86/InstSelectSimple.cpp
+++ b/lib/Target/X86/InstSelectSimple.cpp
@@ -222,6 +222,20 @@ namespace {
     ///
     void promote32(unsigned targetReg, const ValueRecord &VR);
 
+    // getGEPIndex - This is used to fold GEP instructions into X86 addressing
+    // expressions.
+    void getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
+                     std::vector<Value*> &GEPOps,
+                     std::vector<const Type*> &GEPTypes, unsigned &BaseReg,
+                     unsigned &Scale, unsigned &IndexReg, unsigned &Disp);
+
+    /// isGEPFoldable - Return true if the specified GEP can be completely
+    /// folded into the addressing mode of a load/store or lea instruction.
+    bool isGEPFoldable(MachineBasicBlock *MBB,
+                       Value *Src, User::op_iterator IdxBegin,
+                       User::op_iterator IdxEnd, unsigned &BaseReg,
+                       unsigned &Scale, unsigned &IndexReg, unsigned &Disp);
+
     /// emitGEPOperation - Common code shared between visitGetElementPtrInst and
     /// constant expression GEP support.
     ///
@@ -1884,14 +1898,32 @@ void ISel::emitShiftOperation(MachineBasicBlock *MBB,
 /// need to worry about the memory layout of the target machine.
 ///
 void ISel::visitLoadInst(LoadInst &I) {
-  unsigned SrcAddrReg = getReg(I.getOperand(0));
   unsigned DestReg = getReg(I);
+  unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
+  Value *Addr = I.getOperand(0);
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) {
+    if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(),
+                       BaseReg, Scale, IndexReg, Disp))
+      Addr = 0;  // Address is consumed!
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+    if (CE->getOpcode() == Instruction::GetElementPtr)
+      if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(),
+                        BaseReg, Scale, IndexReg, Disp))
+        Addr = 0;
+  }
 
-  unsigned Class = getClassB(I.getType());
+  if (Addr) {
+    // If it's not foldable, reset addr mode.
+    BaseReg = getReg(Addr);
+    Scale = 1; IndexReg = 0; Disp = 0;
+  }
 
+  unsigned Class = getClassB(I.getType());
   if (Class == cLong) {
-    addDirectMem(BuildMI(BB, X86::MOVrm32, 4, DestReg), SrcAddrReg);
-    addRegOffset(BuildMI(BB, X86::MOVrm32, 4, DestReg+1), SrcAddrReg, 4);
+    addFullAddress(BuildMI(BB, X86::MOVrm32, 4, DestReg),
+                   BaseReg, Scale, IndexReg, Disp);
+    addFullAddress(BuildMI(BB, X86::MOVrm32, 4, DestReg+1),
+                   BaseReg, Scale, IndexReg, Disp+4);
     return;
   }
 
@@ -1900,37 +1932,61 @@ void ISel::visitLoadInst(LoadInst &I) {
   };
   unsigned Opcode = Opcodes[Class];
   if (I.getType() == Type::DoubleTy) Opcode = X86::FLDr64;
-  addDirectMem(BuildMI(BB, Opcode, 4, DestReg), SrcAddrReg);
+  addFullAddress(BuildMI(BB, Opcode, 4, DestReg),
+                 BaseReg, Scale, IndexReg, Disp);
 }
 
 /// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov'
 /// instruction.
 ///
 void ISel::visitStoreInst(StoreInst &I) {
-  unsigned AddressReg  = getReg(I.getOperand(1));
+  unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
+  Value *Addr = I.getOperand(1);
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) {
+    if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(),
+                       BaseReg, Scale, IndexReg, Disp))
+      Addr = 0;  // Address is consumed!
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+    if (CE->getOpcode() == Instruction::GetElementPtr)
+      if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(),
+                        BaseReg, Scale, IndexReg, Disp))
+        Addr = 0;
+  }
+
+  if (Addr) {
+    // If it's not foldable, reset addr mode.
+    BaseReg = getReg(Addr);
+    Scale = 1; IndexReg = 0; Disp = 0;
+  }
+
   const Type *ValTy = I.getOperand(0)->getType();
   unsigned Class = getClassB(ValTy);
 
   if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(0))) {
     uint64_t Val = CI->getRawValue();
     if (Class == cLong) {
-      addDirectMem(BuildMI(BB, X86::MOVmi32, 5), AddressReg).addZImm(Val & ~0U);
-      addRegOffset(BuildMI(BB, X86::MOVmi32, 5), AddressReg,4).addZImm(Val>>32);
+      addFullAddress(BuildMI(BB, X86::MOVmi32, 5),
+                     BaseReg, Scale, IndexReg, Disp).addZImm(Val & ~0U);
+      addFullAddress(BuildMI(BB, X86::MOVmi32, 5),
+                     BaseReg, Scale, IndexReg, Disp+4).addZImm(Val>>32);
     } else {
       static const unsigned Opcodes[] = {
         X86::MOVmi8, X86::MOVmi16, X86::MOVmi32
       };
       unsigned Opcode = Opcodes[Class];
-      addDirectMem(BuildMI(BB, Opcode, 5), AddressReg).addZImm(Val);
+      addFullAddress(BuildMI(BB, Opcode, 5),
+                     BaseReg, Scale, IndexReg, Disp).addZImm(Val);
     }
   } else if (ConstantBool *CB = dyn_cast<ConstantBool>(I.getOperand(0))) {
-    addDirectMem(BuildMI(BB, X86::MOVmi8, 5),
-                 AddressReg).addZImm(CB->getValue());
+    addFullAddress(BuildMI(BB, X86::MOVmi8, 5),
+                   BaseReg, Scale, IndexReg, Disp).addZImm(CB->getValue());
   } else {    
     if (Class == cLong) {
       unsigned ValReg = getReg(I.getOperand(0));
-      addDirectMem(BuildMI(BB, X86::MOVmr32, 5), AddressReg).addReg(ValReg);
-      addRegOffset(BuildMI(BB, X86::MOVmr32, 5), AddressReg,4).addReg(ValReg+1);
+      addFullAddress(BuildMI(BB, X86::MOVmr32, 5),
+                     BaseReg, Scale, IndexReg, Disp).addReg(ValReg);
+      addFullAddress(BuildMI(BB, X86::MOVmr32, 5),
+                     BaseReg, Scale, IndexReg, Disp+4).addReg(ValReg+1);
     } else {
       unsigned ValReg = getReg(I.getOperand(0));
       static const unsigned Opcodes[] = {
@@ -1938,7 +1994,8 @@ void ISel::visitStoreInst(StoreInst &I) {
       };
       unsigned Opcode = Opcodes[Class];
       if (ValTy == Type::DoubleTy) Opcode = X86::FSTr64;
-      addDirectMem(BuildMI(BB, Opcode, 1+4), AddressReg).addReg(ValReg);
+      addFullAddress(BuildMI(BB, Opcode, 1+4),
+                     BaseReg, Scale, IndexReg, Disp).addReg(ValReg);
     }
   }
 }
@@ -2138,7 +2195,8 @@ void ISel::emitCastOperation(MachineBasicBlock *BB,
     }
 
     // Spill the integer to memory and reload it from there...
-    int FrameIdx = F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData());
+    int FrameIdx =
+      F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData());
 
     if (SrcClass == cLong) {
       addFrameReference(BMI(BB, IP, X86::MOVmr32, 5), FrameIdx).addReg(SrcReg);
@@ -2160,15 +2218,18 @@ void ISel::emitCastOperation(MachineBasicBlock *BB,
       // Emit a test instruction to see if the dynamic input value was signed.
       BMI(BB, IP, X86::TESTrr32, 2).addReg(SrcReg+1).addReg(SrcReg+1);
 
-      // If the sign bit is set, get a pointer to an offset, otherwise get a pointer to a zero.
+      // If the sign bit is set, get a pointer to an offset, otherwise get a
+      // pointer to a zero.
       MachineConstantPool *CP = F->getConstantPool();
       unsigned Zero = makeAnotherReg(Type::IntTy);
+      Constant *Null = Constant::getNullValue(Type::UIntTy);
       addConstantPoolReference(BMI(BB, IP, X86::LEAr32, 5, Zero), 
-                               CP->getConstantPoolIndex(Constant::getNullValue(Type::UIntTy)));
+                               CP->getConstantPoolIndex(Null));
       unsigned Offset = makeAnotherReg(Type::IntTy);
+      Constant *OffsetCst = ConstantUInt::get(Type::UIntTy, 0x5f800000);
+                                             
       addConstantPoolReference(BMI(BB, IP, X86::LEAr32, 5, Offset),
-                               CP->getConstantPoolIndex(ConstantUInt::get(Type::UIntTy,
-                                                                          0x5f800000)));
+                               CP->getConstantPoolIndex(OffsetCst));
       unsigned Addr = makeAnotherReg(Type::IntTy);
       BMI(BB, IP, X86::CMOVSrr32, 2, Addr).addReg(Zero).addReg(Offset);
 
@@ -2303,6 +2364,26 @@ void ISel::visitVAArgInst(VAArgInst &I) {
 
 
 void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
+  // If this GEP instruction will be folded into all of its users, we don't need
+  // to explicitly calculate it!
+  unsigned A, B, C, D;
+  if (isGEPFoldable(0, I.getOperand(0), I.op_begin()+1, I.op_end(), A,B,C,D)) {
+    // Check all of the users of the instruction to see if they are loads and
+    // stores.
+    bool AllWillFold = true;
+    for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI)
+      if (cast<Instruction>(*UI)->getOpcode() != Instruction::Load)
+        if (cast<Instruction>(*UI)->getOpcode() != Instruction::Store ||
+            cast<Instruction>(*UI)->getOperand(0) == &I) {
+          AllWillFold = false;
+          break;
+        }
+
+    // If the instruction is foldable, and will be folded into all users, don't
+    // emit it!
+    if (AllWillFold) return;
+  }
+
   unsigned outputReg = getReg(I);
   emitGEPOperation(BB, BB->end(), I.getOperand(0),
                    I.op_begin()+1, I.op_end(), outputReg);
@@ -2319,15 +2400,18 @@ void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
 ///
 /// Note that there is one fewer entry in GEPTypes than there is in GEPOps.
 ///
-static void getGEPIndex(std::vector<Value*> &GEPOps,
-                        std::vector<const Type*> &GEPTypes,
-                        MachineInstr *Ops, const TargetData &TD){
+void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
+                       std::vector<Value*> &GEPOps,
+                       std::vector<const Type*> &GEPTypes, unsigned &BaseReg,
+                       unsigned &Scale, unsigned &IndexReg, unsigned &Disp) {
+  const TargetData &TD = TM.getTargetData();
+
   // Clear out the state we are working with...
-  Ops->getOperand(0).setReg(0);           // No base register
-  Ops->getOperand(1).setImmedValue(1);    // Unit scale
-  Ops->getOperand(2).setReg(0);           // No index register
-  Ops->getOperand(3).setImmedValue(0);    // No displacement
-  
+  BaseReg = 0;    // No base register
+  Scale = 1;      // Unit scale
+  IndexReg = 0;   // No index register
+  Disp = 0;       // No displacement
+
   // While there are GEP indexes that can be folded into the current address,
   // keep processing them.
   while (!GEPTypes.empty()) {
@@ -2340,14 +2424,7 @@ static void getGEPIndex(std::vector<Value*> &GEPOps,
       // structure is in memory.  Since the structure index must be constant, we
       // can get its value and use it to find the right byte offset from the
       // StructLayout class's list of structure member offsets.
-      unsigned idxValue = CUI->getValue();
-      unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue];
-      if (FieldOff) {
-        if (Ops->getOperand(2).getReg())
-          return;  // Already has an index, can't add offset.
-        Ops->getOperand(3).setImmedValue(FieldOff+
-                                         Ops->getOperand(3).getImmedValue());
-      }
+      Disp += TD.getStructLayout(StTy)->MemberOffsets[CUI->getValue()];
       GEPOps.pop_back();        // Consume a GEP operand
       GEPTypes.pop_back();
     } else {
@@ -2362,10 +2439,7 @@ static void getGEPIndex(std::vector<Value*> &GEPOps,
 
       // If idx is a constant, fold it into the offset.
       if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) {
-        unsigned elementSize = TD.getTypeSize(SqTy->getElementType());
-        unsigned Offset = elementSize*CSI->getValue();
-        Ops->getOperand(3).setImmedValue(Offset+
-                                         Ops->getOperand(3).getImmedValue());
+        Disp += TD.getTypeSize(SqTy->getElementType())*CSI->getValue();
       } else {
         // If we can't handle it, return.
         return;
@@ -2375,15 +2449,49 @@ static void getGEPIndex(std::vector<Value*> &GEPOps,
       GEPTypes.pop_back();
     }
   }
+
+  // GEPTypes is empty, which means we have a single operand left.  See if we
+  // can set it as the base register.
+  //
+  // FIXME: When addressing modes are more powerful/correct, we could load
+  // global addresses directly as 32-bit immediates.
+  assert(BaseReg == 0);
+  BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 0;
+  GEPOps.pop_back();        // Consume the last GEP operand
 }
 
 
+/// isGEPFoldable - Return true if the specified GEP can be completely
+/// folded into the addressing mode of a load/store or lea instruction.
+bool ISel::isGEPFoldable(MachineBasicBlock *MBB,
+                         Value *Src, User::op_iterator IdxBegin,
+                         User::op_iterator IdxEnd, unsigned &BaseReg,
+                         unsigned &Scale, unsigned &IndexReg, unsigned &Disp) {
+  if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(Src))
+    Src = CPR->getValue();
+
+  std::vector<Value*> GEPOps;
+  GEPOps.resize(IdxEnd-IdxBegin+1);
+  GEPOps[0] = Src;
+  std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1);
+  
+  std::vector<const Type*> GEPTypes;
+  GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
+                  gep_type_end(Src->getType(), IdxBegin, IdxEnd));
+
+  MachineBasicBlock::iterator IP;
+  if (MBB) IP = MBB->end();
+  getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp);
+
+  // We can fold it away iff the getGEPIndex call eliminated all operands.
+  return GEPOps.empty();
+}
+
 void ISel::emitGEPOperation(MachineBasicBlock *MBB,
                             MachineBasicBlock::iterator IP,
                             Value *Src, User::op_iterator IdxBegin,
                             User::op_iterator IdxEnd, unsigned TargetReg) {
   const TargetData &TD = TM.getTargetData();
-
   if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(Src))
     Src = CPR->getValue();
 
@@ -2396,27 +2504,28 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
   GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
                   gep_type_end(Src->getType(), IdxBegin, IdxEnd));
 
-  // DummyMI - A dummy instruction to pass into getGEPIndex.  The opcode doesn't
-  // matter, we just need 4 MachineOperands.
-  MachineInstr *DummyMI =
-    BuildMI(X86::PHI, 4).addReg(0).addZImm(1).addReg(0).addSImm(0);
-
   // Keep emitting instructions until we consume the entire GEP instruction.
   while (!GEPOps.empty()) {
     unsigned OldSize = GEPOps.size();
-    getGEPIndex(GEPOps, GEPTypes, DummyMI, TD);
+    unsigned BaseReg, Scale, IndexReg, Disp;
+    getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp);
     
     if (GEPOps.size() != OldSize) {
       // getGEPIndex consumed some of the input.  Build an LEA instruction here.
-      assert(DummyMI->getOperand(0).getReg() == 0 &&
-             DummyMI->getOperand(1).getImmedValue() == 1 &&
-             DummyMI->getOperand(2).getReg() == 0 &&
-             "Unhandled GEP fold!");
-      if (unsigned Offset = DummyMI->getOperand(3).getImmedValue()) {
-        unsigned Reg = makeAnotherReg(Type::UIntTy);
-        addRegOffset(BMI(MBB, IP, X86::LEAr32, 5, TargetReg), Reg, Offset);
-        TargetReg = Reg;
+      unsigned NextTarget = 0;
+      if (!GEPOps.empty()) {
+        assert(BaseReg == 0 &&
+           "getGEPIndex should have left the base register open for chaining!");
+        NextTarget = BaseReg = makeAnotherReg(Type::UIntTy);
       }
+
+      if (IndexReg == 0 && Disp == 0)
+        BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg);
+      else
+        addFullAddress(BMI(MBB, IP, X86::LEAr32, 5, TargetReg),
+                       BaseReg, Scale, IndexReg, Disp);
+      --IP;
+      TargetReg = NextTarget;
     } else if (GEPTypes.empty()) {
       // The getGEPIndex operation didn't want to build an LEA.  Check to see if
       // all operands are consumed but the base pointer.  If so, just load it
@@ -2428,6 +2537,27 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
         BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg);
       }
       break;                // we are now done
+
+    } else if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) {
+      // It's a struct access.  CUI is the index into the structure,
+      // which names the field. This index must have unsigned type.
+      const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back());
+      GEPOps.pop_back();        // Consume a GEP operand
+      GEPTypes.pop_back();
+
+      // Use the TargetData structure to pick out what the layout of the
+      // structure is in memory.  Since the structure index must be constant, we
+      // can get its value and use it to find the right byte offset from the
+      // StructLayout class's list of structure member offsets.
+      unsigned idxValue = CUI->getValue();
+      unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue];
+      if (FieldOff) {
+        unsigned Reg = makeAnotherReg(Type::UIntTy);
+        // Emit an ADD to add FieldOff to the basePtr.
+        BMI(MBB, IP, X86::ADDri32, 2, TargetReg).addReg(Reg).addZImm(FieldOff);
+        --IP;            // Insert the next instruction before this one.
+        TargetReg = Reg; // Codegen the rest of the GEP into this
+      }
     } else {
       // It's an array or pointer access: [ArraySize x ElementType].
       const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());
@@ -2496,8 +2626,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
       }
     }
   }
-
-  delete DummyMI;
 }
 
 
diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp
index f0e0a5c..d6e3a75 100644
--- a/lib/Target/X86/X86ISelSimple.cpp
+++ b/lib/Target/X86/X86ISelSimple.cpp
@@ -222,6 +222,20 @@ namespace {
     ///
     void promote32(unsigned targetReg, const ValueRecord &VR);
 
+    // getGEPIndex - This is used to fold GEP instructions into X86 addressing
+    // expressions.
+    void getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
+                     std::vector<Value*> &GEPOps,
+                     std::vector<const Type*> &GEPTypes, unsigned &BaseReg,
+                     unsigned &Scale, unsigned &IndexReg, unsigned &Disp);
+
+    /// isGEPFoldable - Return true if the specified GEP can be completely
+    /// folded into the addressing mode of a load/store or lea instruction.
+    bool isGEPFoldable(MachineBasicBlock *MBB,
+                       Value *Src, User::op_iterator IdxBegin,
+                       User::op_iterator IdxEnd, unsigned &BaseReg,
+                       unsigned &Scale, unsigned &IndexReg, unsigned &Disp);
+
     /// emitGEPOperation - Common code shared between visitGetElementPtrInst and
     /// constant expression GEP support.
     ///
@@ -1884,14 +1898,32 @@ void ISel::emitShiftOperation(MachineBasicBlock *MBB,
 /// need to worry about the memory layout of the target machine.
 ///
 void ISel::visitLoadInst(LoadInst &I) {
-  unsigned SrcAddrReg = getReg(I.getOperand(0));
   unsigned DestReg = getReg(I);
+  unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
+  Value *Addr = I.getOperand(0);
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) {
+    if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(),
+                       BaseReg, Scale, IndexReg, Disp))
+      Addr = 0;  // Address is consumed!
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+    if (CE->getOpcode() == Instruction::GetElementPtr)
+      if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(),
+                        BaseReg, Scale, IndexReg, Disp))
+        Addr = 0;
+  }
 
-  unsigned Class = getClassB(I.getType());
+  if (Addr) {
+    // If it's not foldable, reset addr mode.
+    BaseReg = getReg(Addr);
+    Scale = 1; IndexReg = 0; Disp = 0;
+  }
 
+  unsigned Class = getClassB(I.getType());
   if (Class == cLong) {
-    addDirectMem(BuildMI(BB, X86::MOVrm32, 4, DestReg), SrcAddrReg);
-    addRegOffset(BuildMI(BB, X86::MOVrm32, 4, DestReg+1), SrcAddrReg, 4);
+    addFullAddress(BuildMI(BB, X86::MOVrm32, 4, DestReg),
+                   BaseReg, Scale, IndexReg, Disp);
+    addFullAddress(BuildMI(BB, X86::MOVrm32, 4, DestReg+1),
+                   BaseReg, Scale, IndexReg, Disp+4);
     return;
   }
 
@@ -1900,37 +1932,61 @@ void ISel::visitLoadInst(LoadInst &I) {
   };
   unsigned Opcode = Opcodes[Class];
   if (I.getType() == Type::DoubleTy) Opcode = X86::FLDr64;
-  addDirectMem(BuildMI(BB, Opcode, 4, DestReg), SrcAddrReg);
+  addFullAddress(BuildMI(BB, Opcode, 4, DestReg),
+                 BaseReg, Scale, IndexReg, Disp);
 }
 
 /// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov'
 /// instruction.
 ///
 void ISel::visitStoreInst(StoreInst &I) {
-  unsigned AddressReg  = getReg(I.getOperand(1));
+  unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
+  Value *Addr = I.getOperand(1);
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) {
+    if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(),
+                       BaseReg, Scale, IndexReg, Disp))
+      Addr = 0;  // Address is consumed!
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+    if (CE->getOpcode() == Instruction::GetElementPtr)
+      if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(),
+                        BaseReg, Scale, IndexReg, Disp))
+        Addr = 0;
+  }
+
+  if (Addr) {
+    // If it's not foldable, reset addr mode.
+    BaseReg = getReg(Addr);
+    Scale = 1; IndexReg = 0; Disp = 0;
+  }
+
   const Type *ValTy = I.getOperand(0)->getType();
   unsigned Class = getClassB(ValTy);
 
   if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(0))) {
     uint64_t Val = CI->getRawValue();
     if (Class == cLong) {
-      addDirectMem(BuildMI(BB, X86::MOVmi32, 5), AddressReg).addZImm(Val & ~0U);
-      addRegOffset(BuildMI(BB, X86::MOVmi32, 5), AddressReg,4).addZImm(Val>>32);
+      addFullAddress(BuildMI(BB, X86::MOVmi32, 5),
+                     BaseReg, Scale, IndexReg, Disp).addZImm(Val & ~0U);
+      addFullAddress(BuildMI(BB, X86::MOVmi32, 5),
+                     BaseReg, Scale, IndexReg, Disp+4).addZImm(Val>>32);
     } else {
       static const unsigned Opcodes[] = {
         X86::MOVmi8, X86::MOVmi16, X86::MOVmi32
       };
       unsigned Opcode = Opcodes[Class];
-      addDirectMem(BuildMI(BB, Opcode, 5), AddressReg).addZImm(Val);
+      addFullAddress(BuildMI(BB, Opcode, 5),
+                     BaseReg, Scale, IndexReg, Disp).addZImm(Val);
     }
   } else if (ConstantBool *CB = dyn_cast<ConstantBool>(I.getOperand(0))) {
-    addDirectMem(BuildMI(BB, X86::MOVmi8, 5),
-                 AddressReg).addZImm(CB->getValue());
+    addFullAddress(BuildMI(BB, X86::MOVmi8, 5),
+                   BaseReg, Scale, IndexReg, Disp).addZImm(CB->getValue());
   } else {    
     if (Class == cLong) {
       unsigned ValReg = getReg(I.getOperand(0));
-      addDirectMem(BuildMI(BB, X86::MOVmr32, 5), AddressReg).addReg(ValReg);
-      addRegOffset(BuildMI(BB, X86::MOVmr32, 5), AddressReg,4).addReg(ValReg+1);
+      addFullAddress(BuildMI(BB, X86::MOVmr32, 5),
+                     BaseReg, Scale, IndexReg, Disp).addReg(ValReg);
+      addFullAddress(BuildMI(BB, X86::MOVmr32, 5),
+                     BaseReg, Scale, IndexReg, Disp+4).addReg(ValReg+1);
     } else {
       unsigned ValReg = getReg(I.getOperand(0));
       static const unsigned Opcodes[] = {
@@ -1938,7 +1994,8 @@ void ISel::visitStoreInst(StoreInst &I) {
       };
       unsigned Opcode = Opcodes[Class];
       if (ValTy == Type::DoubleTy) Opcode = X86::FSTr64;
-      addDirectMem(BuildMI(BB, Opcode, 1+4), AddressReg).addReg(ValReg);
+      addFullAddress(BuildMI(BB, Opcode, 1+4),
+                     BaseReg, Scale, IndexReg, Disp).addReg(ValReg);
     }
   }
 }
@@ -2138,7 +2195,8 @@ void ISel::emitCastOperation(MachineBasicBlock *BB,
     }
 
     // Spill the integer to memory and reload it from there...
-    int FrameIdx = F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData());
+    int FrameIdx =
+      F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData());
 
     if (SrcClass == cLong) {
       addFrameReference(BMI(BB, IP, X86::MOVmr32, 5), FrameIdx).addReg(SrcReg);
@@ -2160,15 +2218,18 @@ void ISel::emitCastOperation(MachineBasicBlock *BB,
       // Emit a test instruction to see if the dynamic input value was signed.
       BMI(BB, IP, X86::TESTrr32, 2).addReg(SrcReg+1).addReg(SrcReg+1);
 
-      // If the sign bit is set, get a pointer to an offset, otherwise get a pointer to a zero.
+      // If the sign bit is set, get a pointer to an offset, otherwise get a
+      // pointer to a zero.
       MachineConstantPool *CP = F->getConstantPool();
       unsigned Zero = makeAnotherReg(Type::IntTy);
+      Constant *Null = Constant::getNullValue(Type::UIntTy);
       addConstantPoolReference(BMI(BB, IP, X86::LEAr32, 5, Zero), 
-                               CP->getConstantPoolIndex(Constant::getNullValue(Type::UIntTy)));
+                               CP->getConstantPoolIndex(Null));
       unsigned Offset = makeAnotherReg(Type::IntTy);
+      Constant *OffsetCst = ConstantUInt::get(Type::UIntTy, 0x5f800000);
+                                             
       addConstantPoolReference(BMI(BB, IP, X86::LEAr32, 5, Offset),
-                               CP->getConstantPoolIndex(ConstantUInt::get(Type::UIntTy,
-                                                                          0x5f800000)));
+                               CP->getConstantPoolIndex(OffsetCst));
       unsigned Addr = makeAnotherReg(Type::IntTy);
       BMI(BB, IP, X86::CMOVSrr32, 2, Addr).addReg(Zero).addReg(Offset);
 
@@ -2303,6 +2364,26 @@ void ISel::visitVAArgInst(VAArgInst &I) {
 
 
 void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
+  // If this GEP instruction will be folded into all of its users, we don't need
+  // to explicitly calculate it!
+  unsigned A, B, C, D;
+  if (isGEPFoldable(0, I.getOperand(0), I.op_begin()+1, I.op_end(), A,B,C,D)) {
+    // Check all of the users of the instruction to see if they are loads and
+    // stores.
+    bool AllWillFold = true;
+    for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI)
+      if (cast<Instruction>(*UI)->getOpcode() != Instruction::Load)
+        if (cast<Instruction>(*UI)->getOpcode() != Instruction::Store ||
+            cast<Instruction>(*UI)->getOperand(0) == &I) {
+          AllWillFold = false;
+          break;
+        }
+
+    // If the instruction is foldable, and will be folded into all users, don't
+    // emit it!
+    if (AllWillFold) return;
+  }
+
   unsigned outputReg = getReg(I);
   emitGEPOperation(BB, BB->end(), I.getOperand(0),
                    I.op_begin()+1, I.op_end(), outputReg);
@@ -2319,15 +2400,18 @@ void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
 ///
 /// Note that there is one fewer entry in GEPTypes than there is in GEPOps.
 ///
-static void getGEPIndex(std::vector<Value*> &GEPOps,
-                        std::vector<const Type*> &GEPTypes,
-                        MachineInstr *Ops, const TargetData &TD){
+void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
+                       std::vector<Value*> &GEPOps,
+                       std::vector<const Type*> &GEPTypes, unsigned &BaseReg,
+                       unsigned &Scale, unsigned &IndexReg, unsigned &Disp) {
+  const TargetData &TD = TM.getTargetData();
+
   // Clear out the state we are working with...
-  Ops->getOperand(0).setReg(0);           // No base register
-  Ops->getOperand(1).setImmedValue(1);    // Unit scale
-  Ops->getOperand(2).setReg(0);           // No index register
-  Ops->getOperand(3).setImmedValue(0);    // No displacement
-  
+  BaseReg = 0;    // No base register
+  Scale = 1;      // Unit scale
+  IndexReg = 0;   // No index register
+  Disp = 0;       // No displacement
+
   // While there are GEP indexes that can be folded into the current address,
   // keep processing them.
   while (!GEPTypes.empty()) {
@@ -2340,14 +2424,7 @@ static void getGEPIndex(std::vector<Value*> &GEPOps,
       // structure is in memory.  Since the structure index must be constant, we
       // can get its value and use it to find the right byte offset from the
       // StructLayout class's list of structure member offsets.
-      unsigned idxValue = CUI->getValue();
-      unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue];
-      if (FieldOff) {
-        if (Ops->getOperand(2).getReg())
-          return;  // Already has an index, can't add offset.
-        Ops->getOperand(3).setImmedValue(FieldOff+
-                                         Ops->getOperand(3).getImmedValue());
-      }
+      Disp += TD.getStructLayout(StTy)->MemberOffsets[CUI->getValue()];
       GEPOps.pop_back();        // Consume a GEP operand
       GEPTypes.pop_back();
     } else {
@@ -2362,10 +2439,7 @@ static void getGEPIndex(std::vector<Value*> &GEPOps,
 
       // If idx is a constant, fold it into the offset.
       if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) {
-        unsigned elementSize = TD.getTypeSize(SqTy->getElementType());
-        unsigned Offset = elementSize*CSI->getValue();
-        Ops->getOperand(3).setImmedValue(Offset+
-                                         Ops->getOperand(3).getImmedValue());
+        Disp += TD.getTypeSize(SqTy->getElementType())*CSI->getValue();
       } else {
         // If we can't handle it, return.
         return;
@@ -2375,15 +2449,49 @@ static void getGEPIndex(std::vector<Value*> &GEPOps,
       GEPTypes.pop_back();
     }
   }
+
+  // GEPTypes is empty, which means we have a single operand left.  See if we
+  // can set it as the base register.
+  //
+  // FIXME: When addressing modes are more powerful/correct, we could load
+  // global addresses directly as 32-bit immediates.
+  assert(BaseReg == 0);
+  BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 0;
+  GEPOps.pop_back();        // Consume the last GEP operand
 }
 
 
+/// isGEPFoldable - Return true if the specified GEP can be completely
+/// folded into the addressing mode of a load/store or lea instruction.
+bool ISel::isGEPFoldable(MachineBasicBlock *MBB,
+                         Value *Src, User::op_iterator IdxBegin,
+                         User::op_iterator IdxEnd, unsigned &BaseReg,
+                         unsigned &Scale, unsigned &IndexReg, unsigned &Disp) {
+  if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(Src))
+    Src = CPR->getValue();
+
+  std::vector<Value*> GEPOps;
+  GEPOps.resize(IdxEnd-IdxBegin+1);
+  GEPOps[0] = Src;
+  std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1);
+  
+  std::vector<const Type*> GEPTypes;
+  GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
+                  gep_type_end(Src->getType(), IdxBegin, IdxEnd));
+
+  MachineBasicBlock::iterator IP;
+  if (MBB) IP = MBB->end();
+  getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp);
+
+  // We can fold it away iff the getGEPIndex call eliminated all operands.
+  return GEPOps.empty();
+}
+
 void ISel::emitGEPOperation(MachineBasicBlock *MBB,
                             MachineBasicBlock::iterator IP,
                             Value *Src, User::op_iterator IdxBegin,
                             User::op_iterator IdxEnd, unsigned TargetReg) {
   const TargetData &TD = TM.getTargetData();
-
   if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(Src))
     Src = CPR->getValue();
 
@@ -2396,27 +2504,28 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
   GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
                   gep_type_end(Src->getType(), IdxBegin, IdxEnd));
 
-  // DummyMI - A dummy instruction to pass into getGEPIndex.  The opcode doesn't
-  // matter, we just need 4 MachineOperands.
-  MachineInstr *DummyMI =
-    BuildMI(X86::PHI, 4).addReg(0).addZImm(1).addReg(0).addSImm(0);
-
   // Keep emitting instructions until we consume the entire GEP instruction.
   while (!GEPOps.empty()) {
     unsigned OldSize = GEPOps.size();
-    getGEPIndex(GEPOps, GEPTypes, DummyMI, TD);
+    unsigned BaseReg, Scale, IndexReg, Disp;
+    getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp);
     
     if (GEPOps.size() != OldSize) {
       // getGEPIndex consumed some of the input.  Build an LEA instruction here.
-      assert(DummyMI->getOperand(0).getReg() == 0 &&
-             DummyMI->getOperand(1).getImmedValue() == 1 &&
-             DummyMI->getOperand(2).getReg() == 0 &&
-             "Unhandled GEP fold!");
-      if (unsigned Offset = DummyMI->getOperand(3).getImmedValue()) {
-        unsigned Reg = makeAnotherReg(Type::UIntTy);
-        addRegOffset(BMI(MBB, IP, X86::LEAr32, 5, TargetReg), Reg, Offset);
-        TargetReg = Reg;
+      unsigned NextTarget = 0;
+      if (!GEPOps.empty()) {
+        assert(BaseReg == 0 &&
+           "getGEPIndex should have left the base register open for chaining!");
+        NextTarget = BaseReg = makeAnotherReg(Type::UIntTy);
       }
+
+      if (IndexReg == 0 && Disp == 0)
+        BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg);
+      else
+        addFullAddress(BMI(MBB, IP, X86::LEAr32, 5, TargetReg),
+                       BaseReg, Scale, IndexReg, Disp);
+      --IP;
+      TargetReg = NextTarget;
     } else if (GEPTypes.empty()) {
       // The getGEPIndex operation didn't want to build an LEA.  Check to see if
       // all operands are consumed but the base pointer.  If so, just load it
@@ -2428,6 +2537,27 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
         BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg);
       }
       break;                // we are now done
+
+    } else if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) {
+      // It's a struct access.  CUI is the index into the structure,
+      // which names the field. This index must have unsigned type.
+      const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back());
+      GEPOps.pop_back();        // Consume a GEP operand
+      GEPTypes.pop_back();
+
+      // Use the TargetData structure to pick out what the layout of the
+      // structure is in memory.  Since the structure index must be constant, we
+      // can get its value and use it to find the right byte offset from the
+      // StructLayout class's list of structure member offsets.
+      unsigned idxValue = CUI->getValue();
+      unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue];
+      if (FieldOff) {
+        unsigned Reg = makeAnotherReg(Type::UIntTy);
+        // Emit an ADD to add FieldOff to the basePtr.
+        BMI(MBB, IP, X86::ADDri32, 2, TargetReg).addReg(Reg).addZImm(FieldOff);
+        --IP;            // Insert the next instruction before this one.
+        TargetReg = Reg; // Codegen the rest of the GEP into this
+      }
     } else {
       // It's an array or pointer access: [ArraySize x ElementType].
       const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());
@@ -2496,8 +2626,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
       }
     }
   }
-
-  delete DummyMI;
 }
author	Chris Lattner <sabre@nondot.org>	2004-02-25 06:13:04 +0000
committer	Chris Lattner <sabre@nondot.org>	2004-02-25 06:13:04 +0000
commit	b6bac51351d2a1a9db76381b92c40ec24cc59e8e (patch)
tree	8b9a66497deae906c86b158147276240329b75a8 /lib/Target
parent	2e68037187175945b406a436b00d9d767ee1dfa0 (diff)
download	external_llvm-b6bac51351d2a1a9db76381b92c40ec24cc59e8e.zip external_llvm-b6bac51351d2a1a9db76381b92c40ec24cc59e8e.tar.gz external_llvm-b6bac51351d2a1a9db76381b92c40ec24cc59e8e.tar.bz2