diff options
author | Chris Lattner <sabre@nondot.org> | 2004-02-25 06:13:04 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2004-02-25 06:13:04 +0000 |
commit | b6bac51351d2a1a9db76381b92c40ec24cc59e8e (patch) | |
tree | 8b9a66497deae906c86b158147276240329b75a8 /lib/Target | |
parent | 2e68037187175945b406a436b00d9d767ee1dfa0 (diff) | |
download | external_llvm-b6bac51351d2a1a9db76381b92c40ec24cc59e8e.zip external_llvm-b6bac51351d2a1a9db76381b92c40ec24cc59e8e.tar.gz external_llvm-b6bac51351d2a1a9db76381b92c40ec24cc59e8e.tar.bz2 |
* Make the previous patch more efficient by not allocating a temporary MachineInstr
to do analysis.
*** FOLD getelementptr instructions into loads and stores when possible,
making use of some of the crazy X86 addressing modes.
For example, the following C++ program fragment:
struct complex {
double re, im;
complex(double r, double i) : re(r), im(i) {}
};
inline complex operator+(const complex& a, const complex& b) {
return complex(a.re+b.re, a.im+b.im);
}
complex addone(const complex& arg) {
return arg + complex(1,0);
}
Used to be compiled to:
_Z6addoneRK7complex:
mov %EAX, DWORD PTR [%ESP + 4]
mov %ECX, DWORD PTR [%ESP + 8]
*** mov %EDX, %ECX
fld QWORD PTR [%EDX]
fld1
faddp %ST(1)
*** add %ECX, 8
fld QWORD PTR [%ECX]
fldz
faddp %ST(1)
*** mov %ECX, %EAX
fxch %ST(1)
fstp QWORD PTR [%ECX]
*** add %EAX, 8
fstp QWORD PTR [%EAX]
ret
Now it is compiled to:
_Z6addoneRK7complex:
mov %EAX, DWORD PTR [%ESP + 4]
mov %ECX, DWORD PTR [%ESP + 8]
fld QWORD PTR [%ECX]
fld1
faddp %ST(1)
fld QWORD PTR [%ECX + 8]
fldz
faddp %ST(1)
fxch %ST(1)
fstp QWORD PTR [%EAX]
fstp QWORD PTR [%EAX + 8]
ret
Other programs should see similar improvements, across the board. Note that
in addition to reducing instruction count, this also reduces register pressure
a lot, always a good thing on X86. :)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@11819 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/X86/InstSelectSimple.cpp | 240 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelSimple.cpp | 240 |
2 files changed, 368 insertions, 112 deletions
diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp index f0e0a5c..d6e3a75 100644 --- a/lib/Target/X86/InstSelectSimple.cpp +++ b/lib/Target/X86/InstSelectSimple.cpp @@ -222,6 +222,20 @@ namespace { /// void promote32(unsigned targetReg, const ValueRecord &VR); + // getGEPIndex - This is used to fold GEP instructions into X86 addressing + // expressions. + void getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, + std::vector<Value*> &GEPOps, + std::vector<const Type*> &GEPTypes, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp); + + /// isGEPFoldable - Return true if the specified GEP can be completely + /// folded into the addressing mode of a load/store or lea instruction. + bool isGEPFoldable(MachineBasicBlock *MBB, + Value *Src, User::op_iterator IdxBegin, + User::op_iterator IdxEnd, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp); + /// emitGEPOperation - Common code shared between visitGetElementPtrInst and /// constant expression GEP support. /// @@ -1884,14 +1898,32 @@ void ISel::emitShiftOperation(MachineBasicBlock *MBB, /// need to worry about the memory layout of the target machine. /// void ISel::visitLoadInst(LoadInst &I) { - unsigned SrcAddrReg = getReg(I.getOperand(0)); unsigned DestReg = getReg(I); + unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0; + Value *Addr = I.getOperand(0); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) { + if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; // Address is consumed! + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { + if (CE->getOpcode() == Instruction::GetElementPtr) + if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; + } - unsigned Class = getClassB(I.getType()); + if (Addr) { + // If it's not foldable, reset addr mode. + BaseReg = getReg(Addr); + Scale = 1; IndexReg = 0; Disp = 0; + } + unsigned Class = getClassB(I.getType()); if (Class == cLong) { - addDirectMem(BuildMI(BB, X86::MOVrm32, 4, DestReg), SrcAddrReg); - addRegOffset(BuildMI(BB, X86::MOVrm32, 4, DestReg+1), SrcAddrReg, 4); + addFullAddress(BuildMI(BB, X86::MOVrm32, 4, DestReg), + BaseReg, Scale, IndexReg, Disp); + addFullAddress(BuildMI(BB, X86::MOVrm32, 4, DestReg+1), + BaseReg, Scale, IndexReg, Disp+4); return; } @@ -1900,37 +1932,61 @@ void ISel::visitLoadInst(LoadInst &I) { }; unsigned Opcode = Opcodes[Class]; if (I.getType() == Type::DoubleTy) Opcode = X86::FLDr64; - addDirectMem(BuildMI(BB, Opcode, 4, DestReg), SrcAddrReg); + addFullAddress(BuildMI(BB, Opcode, 4, DestReg), + BaseReg, Scale, IndexReg, Disp); } /// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov' /// instruction. /// void ISel::visitStoreInst(StoreInst &I) { - unsigned AddressReg = getReg(I.getOperand(1)); + unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0; + Value *Addr = I.getOperand(1); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) { + if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; // Address is consumed! + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { + if (CE->getOpcode() == Instruction::GetElementPtr) + if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; + } + + if (Addr) { + // If it's not foldable, reset addr mode. + BaseReg = getReg(Addr); + Scale = 1; IndexReg = 0; Disp = 0; + } + const Type *ValTy = I.getOperand(0)->getType(); unsigned Class = getClassB(ValTy); if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(0))) { uint64_t Val = CI->getRawValue(); if (Class == cLong) { - addDirectMem(BuildMI(BB, X86::MOVmi32, 5), AddressReg).addZImm(Val & ~0U); - addRegOffset(BuildMI(BB, X86::MOVmi32, 5), AddressReg,4).addZImm(Val>>32); + addFullAddress(BuildMI(BB, X86::MOVmi32, 5), + BaseReg, Scale, IndexReg, Disp).addZImm(Val & ~0U); + addFullAddress(BuildMI(BB, X86::MOVmi32, 5), + BaseReg, Scale, IndexReg, Disp+4).addZImm(Val>>32); } else { static const unsigned Opcodes[] = { X86::MOVmi8, X86::MOVmi16, X86::MOVmi32 }; unsigned Opcode = Opcodes[Class]; - addDirectMem(BuildMI(BB, Opcode, 5), AddressReg).addZImm(Val); + addFullAddress(BuildMI(BB, Opcode, 5), + BaseReg, Scale, IndexReg, Disp).addZImm(Val); } } else if (ConstantBool *CB = dyn_cast<ConstantBool>(I.getOperand(0))) { - addDirectMem(BuildMI(BB, X86::MOVmi8, 5), - AddressReg).addZImm(CB->getValue()); + addFullAddress(BuildMI(BB, X86::MOVmi8, 5), + BaseReg, Scale, IndexReg, Disp).addZImm(CB->getValue()); } else { if (Class == cLong) { unsigned ValReg = getReg(I.getOperand(0)); - addDirectMem(BuildMI(BB, X86::MOVmr32, 5), AddressReg).addReg(ValReg); - addRegOffset(BuildMI(BB, X86::MOVmr32, 5), AddressReg,4).addReg(ValReg+1); + addFullAddress(BuildMI(BB, X86::MOVmr32, 5), + BaseReg, Scale, IndexReg, Disp).addReg(ValReg); + addFullAddress(BuildMI(BB, X86::MOVmr32, 5), + BaseReg, Scale, IndexReg, Disp+4).addReg(ValReg+1); } else { unsigned ValReg = getReg(I.getOperand(0)); static const unsigned Opcodes[] = { @@ -1938,7 +1994,8 @@ void ISel::visitStoreInst(StoreInst &I) { }; unsigned Opcode = Opcodes[Class]; if (ValTy == Type::DoubleTy) Opcode = X86::FSTr64; - addDirectMem(BuildMI(BB, Opcode, 1+4), AddressReg).addReg(ValReg); + addFullAddress(BuildMI(BB, Opcode, 1+4), + BaseReg, Scale, IndexReg, Disp).addReg(ValReg); } } } @@ -2138,7 +2195,8 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, } // Spill the integer to memory and reload it from there... - int FrameIdx = F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData()); + int FrameIdx = + F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData()); if (SrcClass == cLong) { addFrameReference(BMI(BB, IP, X86::MOVmr32, 5), FrameIdx).addReg(SrcReg); @@ -2160,15 +2218,18 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, // Emit a test instruction to see if the dynamic input value was signed. BMI(BB, IP, X86::TESTrr32, 2).addReg(SrcReg+1).addReg(SrcReg+1); - // If the sign bit is set, get a pointer to an offset, otherwise get a pointer to a zero. + // If the sign bit is set, get a pointer to an offset, otherwise get a + // pointer to a zero. MachineConstantPool *CP = F->getConstantPool(); unsigned Zero = makeAnotherReg(Type::IntTy); + Constant *Null = Constant::getNullValue(Type::UIntTy); addConstantPoolReference(BMI(BB, IP, X86::LEAr32, 5, Zero), - CP->getConstantPoolIndex(Constant::getNullValue(Type::UIntTy))); + CP->getConstantPoolIndex(Null)); unsigned Offset = makeAnotherReg(Type::IntTy); + Constant *OffsetCst = ConstantUInt::get(Type::UIntTy, 0x5f800000); + addConstantPoolReference(BMI(BB, IP, X86::LEAr32, 5, Offset), - CP->getConstantPoolIndex(ConstantUInt::get(Type::UIntTy, - 0x5f800000))); + CP->getConstantPoolIndex(OffsetCst)); unsigned Addr = makeAnotherReg(Type::IntTy); BMI(BB, IP, X86::CMOVSrr32, 2, Addr).addReg(Zero).addReg(Offset); @@ -2303,6 +2364,26 @@ void ISel::visitVAArgInst(VAArgInst &I) { void ISel::visitGetElementPtrInst(GetElementPtrInst &I) { + // If this GEP instruction will be folded into all of its users, we don't need + // to explicitly calculate it! + unsigned A, B, C, D; + if (isGEPFoldable(0, I.getOperand(0), I.op_begin()+1, I.op_end(), A,B,C,D)) { + // Check all of the users of the instruction to see if they are loads and + // stores. + bool AllWillFold = true; + for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) + if (cast<Instruction>(*UI)->getOpcode() != Instruction::Load) + if (cast<Instruction>(*UI)->getOpcode() != Instruction::Store || + cast<Instruction>(*UI)->getOperand(0) == &I) { + AllWillFold = false; + break; + } + + // If the instruction is foldable, and will be folded into all users, don't + // emit it! + if (AllWillFold) return; + } + unsigned outputReg = getReg(I); emitGEPOperation(BB, BB->end(), I.getOperand(0), I.op_begin()+1, I.op_end(), outputReg); @@ -2319,15 +2400,18 @@ void ISel::visitGetElementPtrInst(GetElementPtrInst &I) { /// /// Note that there is one fewer entry in GEPTypes than there is in GEPOps. /// -static void getGEPIndex(std::vector<Value*> &GEPOps, - std::vector<const Type*> &GEPTypes, - MachineInstr *Ops, const TargetData &TD){ +void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, + std::vector<Value*> &GEPOps, + std::vector<const Type*> &GEPTypes, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp) { + const TargetData &TD = TM.getTargetData(); + // Clear out the state we are working with... - Ops->getOperand(0).setReg(0); // No base register - Ops->getOperand(1).setImmedValue(1); // Unit scale - Ops->getOperand(2).setReg(0); // No index register - Ops->getOperand(3).setImmedValue(0); // No displacement - + BaseReg = 0; // No base register + Scale = 1; // Unit scale + IndexReg = 0; // No index register + Disp = 0; // No displacement + // While there are GEP indexes that can be folded into the current address, // keep processing them. while (!GEPTypes.empty()) { @@ -2340,14 +2424,7 @@ static void getGEPIndex(std::vector<Value*> &GEPOps, // structure is in memory. Since the structure index must be constant, we // can get its value and use it to find the right byte offset from the // StructLayout class's list of structure member offsets. - unsigned idxValue = CUI->getValue(); - unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue]; - if (FieldOff) { - if (Ops->getOperand(2).getReg()) - return; // Already has an index, can't add offset. - Ops->getOperand(3).setImmedValue(FieldOff+ - Ops->getOperand(3).getImmedValue()); - } + Disp += TD.getStructLayout(StTy)->MemberOffsets[CUI->getValue()]; GEPOps.pop_back(); // Consume a GEP operand GEPTypes.pop_back(); } else { @@ -2362,10 +2439,7 @@ static void getGEPIndex(std::vector<Value*> &GEPOps, // If idx is a constant, fold it into the offset. if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) { - unsigned elementSize = TD.getTypeSize(SqTy->getElementType()); - unsigned Offset = elementSize*CSI->getValue(); - Ops->getOperand(3).setImmedValue(Offset+ - Ops->getOperand(3).getImmedValue()); + Disp += TD.getTypeSize(SqTy->getElementType())*CSI->getValue(); } else { // If we can't handle it, return. return; @@ -2375,15 +2449,49 @@ static void getGEPIndex(std::vector<Value*> &GEPOps, GEPTypes.pop_back(); } } + + // GEPTypes is empty, which means we have a single operand left. See if we + // can set it as the base register. + // + // FIXME: When addressing modes are more powerful/correct, we could load + // global addresses directly as 32-bit immediates. + assert(BaseReg == 0); + BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 0; + GEPOps.pop_back(); // Consume the last GEP operand } +/// isGEPFoldable - Return true if the specified GEP can be completely +/// folded into the addressing mode of a load/store or lea instruction. +bool ISel::isGEPFoldable(MachineBasicBlock *MBB, + Value *Src, User::op_iterator IdxBegin, + User::op_iterator IdxEnd, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp) { + if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(Src)) + Src = CPR->getValue(); + + std::vector<Value*> GEPOps; + GEPOps.resize(IdxEnd-IdxBegin+1); + GEPOps[0] = Src; + std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1); + + std::vector<const Type*> GEPTypes; + GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd), + gep_type_end(Src->getType(), IdxBegin, IdxEnd)); + + MachineBasicBlock::iterator IP; + if (MBB) IP = MBB->end(); + getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp); + + // We can fold it away iff the getGEPIndex call eliminated all operands. + return GEPOps.empty(); +} + void ISel::emitGEPOperation(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, Value *Src, User::op_iterator IdxBegin, User::op_iterator IdxEnd, unsigned TargetReg) { const TargetData &TD = TM.getTargetData(); - if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(Src)) Src = CPR->getValue(); @@ -2396,27 +2504,28 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd), gep_type_end(Src->getType(), IdxBegin, IdxEnd)); - // DummyMI - A dummy instruction to pass into getGEPIndex. The opcode doesn't - // matter, we just need 4 MachineOperands. - MachineInstr *DummyMI = - BuildMI(X86::PHI, 4).addReg(0).addZImm(1).addReg(0).addSImm(0); - // Keep emitting instructions until we consume the entire GEP instruction. while (!GEPOps.empty()) { unsigned OldSize = GEPOps.size(); - getGEPIndex(GEPOps, GEPTypes, DummyMI, TD); + unsigned BaseReg, Scale, IndexReg, Disp; + getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp); if (GEPOps.size() != OldSize) { // getGEPIndex consumed some of the input. Build an LEA instruction here. - assert(DummyMI->getOperand(0).getReg() == 0 && - DummyMI->getOperand(1).getImmedValue() == 1 && - DummyMI->getOperand(2).getReg() == 0 && - "Unhandled GEP fold!"); - if (unsigned Offset = DummyMI->getOperand(3).getImmedValue()) { - unsigned Reg = makeAnotherReg(Type::UIntTy); - addRegOffset(BMI(MBB, IP, X86::LEAr32, 5, TargetReg), Reg, Offset); - TargetReg = Reg; + unsigned NextTarget = 0; + if (!GEPOps.empty()) { + assert(BaseReg == 0 && + "getGEPIndex should have left the base register open for chaining!"); + NextTarget = BaseReg = makeAnotherReg(Type::UIntTy); } + + if (IndexReg == 0 && Disp == 0) + BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg); + else + addFullAddress(BMI(MBB, IP, X86::LEAr32, 5, TargetReg), + BaseReg, Scale, IndexReg, Disp); + --IP; + TargetReg = NextTarget; } else if (GEPTypes.empty()) { // The getGEPIndex operation didn't want to build an LEA. Check to see if // all operands are consumed but the base pointer. If so, just load it @@ -2428,6 +2537,27 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg); } break; // we are now done + + } else if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) { + // It's a struct access. CUI is the index into the structure, + // which names the field. This index must have unsigned type. + const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back()); + GEPOps.pop_back(); // Consume a GEP operand + GEPTypes.pop_back(); + + // Use the TargetData structure to pick out what the layout of the + // structure is in memory. Since the structure index must be constant, we + // can get its value and use it to find the right byte offset from the + // StructLayout class's list of structure member offsets. + unsigned idxValue = CUI->getValue(); + unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue]; + if (FieldOff) { + unsigned Reg = makeAnotherReg(Type::UIntTy); + // Emit an ADD to add FieldOff to the basePtr. + BMI(MBB, IP, X86::ADDri32, 2, TargetReg).addReg(Reg).addZImm(FieldOff); + --IP; // Insert the next instruction before this one. + TargetReg = Reg; // Codegen the rest of the GEP into this + } } else { // It's an array or pointer access: [ArraySize x ElementType]. const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back()); @@ -2496,8 +2626,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, } } } - - delete DummyMI; } diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp index f0e0a5c..d6e3a75 100644 --- a/lib/Target/X86/X86ISelSimple.cpp +++ b/lib/Target/X86/X86ISelSimple.cpp @@ -222,6 +222,20 @@ namespace { /// void promote32(unsigned targetReg, const ValueRecord &VR); + // getGEPIndex - This is used to fold GEP instructions into X86 addressing + // expressions. + void getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, + std::vector<Value*> &GEPOps, + std::vector<const Type*> &GEPTypes, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp); + + /// isGEPFoldable - Return true if the specified GEP can be completely + /// folded into the addressing mode of a load/store or lea instruction. + bool isGEPFoldable(MachineBasicBlock *MBB, + Value *Src, User::op_iterator IdxBegin, + User::op_iterator IdxEnd, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp); + /// emitGEPOperation - Common code shared between visitGetElementPtrInst and /// constant expression GEP support. /// @@ -1884,14 +1898,32 @@ void ISel::emitShiftOperation(MachineBasicBlock *MBB, /// need to worry about the memory layout of the target machine. /// void ISel::visitLoadInst(LoadInst &I) { - unsigned SrcAddrReg = getReg(I.getOperand(0)); unsigned DestReg = getReg(I); + unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0; + Value *Addr = I.getOperand(0); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) { + if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; // Address is consumed! + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { + if (CE->getOpcode() == Instruction::GetElementPtr) + if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; + } - unsigned Class = getClassB(I.getType()); + if (Addr) { + // If it's not foldable, reset addr mode. + BaseReg = getReg(Addr); + Scale = 1; IndexReg = 0; Disp = 0; + } + unsigned Class = getClassB(I.getType()); if (Class == cLong) { - addDirectMem(BuildMI(BB, X86::MOVrm32, 4, DestReg), SrcAddrReg); - addRegOffset(BuildMI(BB, X86::MOVrm32, 4, DestReg+1), SrcAddrReg, 4); + addFullAddress(BuildMI(BB, X86::MOVrm32, 4, DestReg), + BaseReg, Scale, IndexReg, Disp); + addFullAddress(BuildMI(BB, X86::MOVrm32, 4, DestReg+1), + BaseReg, Scale, IndexReg, Disp+4); return; } @@ -1900,37 +1932,61 @@ void ISel::visitLoadInst(LoadInst &I) { }; unsigned Opcode = Opcodes[Class]; if (I.getType() == Type::DoubleTy) Opcode = X86::FLDr64; - addDirectMem(BuildMI(BB, Opcode, 4, DestReg), SrcAddrReg); + addFullAddress(BuildMI(BB, Opcode, 4, DestReg), + BaseReg, Scale, IndexReg, Disp); } /// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov' /// instruction. /// void ISel::visitStoreInst(StoreInst &I) { - unsigned AddressReg = getReg(I.getOperand(1)); + unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0; + Value *Addr = I.getOperand(1); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) { + if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; // Address is consumed! + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { + if (CE->getOpcode() == Instruction::GetElementPtr) + if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; + } + + if (Addr) { + // If it's not foldable, reset addr mode. + BaseReg = getReg(Addr); + Scale = 1; IndexReg = 0; Disp = 0; + } + const Type *ValTy = I.getOperand(0)->getType(); unsigned Class = getClassB(ValTy); if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(0))) { uint64_t Val = CI->getRawValue(); if (Class == cLong) { - addDirectMem(BuildMI(BB, X86::MOVmi32, 5), AddressReg).addZImm(Val & ~0U); - addRegOffset(BuildMI(BB, X86::MOVmi32, 5), AddressReg,4).addZImm(Val>>32); + addFullAddress(BuildMI(BB, X86::MOVmi32, 5), + BaseReg, Scale, IndexReg, Disp).addZImm(Val & ~0U); + addFullAddress(BuildMI(BB, X86::MOVmi32, 5), + BaseReg, Scale, IndexReg, Disp+4).addZImm(Val>>32); } else { static const unsigned Opcodes[] = { X86::MOVmi8, X86::MOVmi16, X86::MOVmi32 }; unsigned Opcode = Opcodes[Class]; - addDirectMem(BuildMI(BB, Opcode, 5), AddressReg).addZImm(Val); + addFullAddress(BuildMI(BB, Opcode, 5), + BaseReg, Scale, IndexReg, Disp).addZImm(Val); } } else if (ConstantBool *CB = dyn_cast<ConstantBool>(I.getOperand(0))) { - addDirectMem(BuildMI(BB, X86::MOVmi8, 5), - AddressReg).addZImm(CB->getValue()); + addFullAddress(BuildMI(BB, X86::MOVmi8, 5), + BaseReg, Scale, IndexReg, Disp).addZImm(CB->getValue()); } else { if (Class == cLong) { unsigned ValReg = getReg(I.getOperand(0)); - addDirectMem(BuildMI(BB, X86::MOVmr32, 5), AddressReg).addReg(ValReg); - addRegOffset(BuildMI(BB, X86::MOVmr32, 5), AddressReg,4).addReg(ValReg+1); + addFullAddress(BuildMI(BB, X86::MOVmr32, 5), + BaseReg, Scale, IndexReg, Disp).addReg(ValReg); + addFullAddress(BuildMI(BB, X86::MOVmr32, 5), + BaseReg, Scale, IndexReg, Disp+4).addReg(ValReg+1); } else { unsigned ValReg = getReg(I.getOperand(0)); static const unsigned Opcodes[] = { @@ -1938,7 +1994,8 @@ void ISel::visitStoreInst(StoreInst &I) { }; unsigned Opcode = Opcodes[Class]; if (ValTy == Type::DoubleTy) Opcode = X86::FSTr64; - addDirectMem(BuildMI(BB, Opcode, 1+4), AddressReg).addReg(ValReg); + addFullAddress(BuildMI(BB, Opcode, 1+4), + BaseReg, Scale, IndexReg, Disp).addReg(ValReg); } } } @@ -2138,7 +2195,8 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, } // Spill the integer to memory and reload it from there... - int FrameIdx = F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData()); + int FrameIdx = + F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData()); if (SrcClass == cLong) { addFrameReference(BMI(BB, IP, X86::MOVmr32, 5), FrameIdx).addReg(SrcReg); @@ -2160,15 +2218,18 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, // Emit a test instruction to see if the dynamic input value was signed. BMI(BB, IP, X86::TESTrr32, 2).addReg(SrcReg+1).addReg(SrcReg+1); - // If the sign bit is set, get a pointer to an offset, otherwise get a pointer to a zero. + // If the sign bit is set, get a pointer to an offset, otherwise get a + // pointer to a zero. MachineConstantPool *CP = F->getConstantPool(); unsigned Zero = makeAnotherReg(Type::IntTy); + Constant *Null = Constant::getNullValue(Type::UIntTy); addConstantPoolReference(BMI(BB, IP, X86::LEAr32, 5, Zero), - CP->getConstantPoolIndex(Constant::getNullValue(Type::UIntTy))); + CP->getConstantPoolIndex(Null)); unsigned Offset = makeAnotherReg(Type::IntTy); + Constant *OffsetCst = ConstantUInt::get(Type::UIntTy, 0x5f800000); + addConstantPoolReference(BMI(BB, IP, X86::LEAr32, 5, Offset), - CP->getConstantPoolIndex(ConstantUInt::get(Type::UIntTy, - 0x5f800000))); + CP->getConstantPoolIndex(OffsetCst)); unsigned Addr = makeAnotherReg(Type::IntTy); BMI(BB, IP, X86::CMOVSrr32, 2, Addr).addReg(Zero).addReg(Offset); @@ -2303,6 +2364,26 @@ void ISel::visitVAArgInst(VAArgInst &I) { void ISel::visitGetElementPtrInst(GetElementPtrInst &I) { + // If this GEP instruction will be folded into all of its users, we don't need + // to explicitly calculate it! + unsigned A, B, C, D; + if (isGEPFoldable(0, I.getOperand(0), I.op_begin()+1, I.op_end(), A,B,C,D)) { + // Check all of the users of the instruction to see if they are loads and + // stores. + bool AllWillFold = true; + for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) + if (cast<Instruction>(*UI)->getOpcode() != Instruction::Load) + if (cast<Instruction>(*UI)->getOpcode() != Instruction::Store || + cast<Instruction>(*UI)->getOperand(0) == &I) { + AllWillFold = false; + break; + } + + // If the instruction is foldable, and will be folded into all users, don't + // emit it! + if (AllWillFold) return; + } + unsigned outputReg = getReg(I); emitGEPOperation(BB, BB->end(), I.getOperand(0), I.op_begin()+1, I.op_end(), outputReg); @@ -2319,15 +2400,18 @@ void ISel::visitGetElementPtrInst(GetElementPtrInst &I) { /// /// Note that there is one fewer entry in GEPTypes than there is in GEPOps. /// -static void getGEPIndex(std::vector<Value*> &GEPOps, - std::vector<const Type*> &GEPTypes, - MachineInstr *Ops, const TargetData &TD){ +void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, + std::vector<Value*> &GEPOps, + std::vector<const Type*> &GEPTypes, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp) { + const TargetData &TD = TM.getTargetData(); + // Clear out the state we are working with... - Ops->getOperand(0).setReg(0); // No base register - Ops->getOperand(1).setImmedValue(1); // Unit scale - Ops->getOperand(2).setReg(0); // No index register - Ops->getOperand(3).setImmedValue(0); // No displacement - + BaseReg = 0; // No base register + Scale = 1; // Unit scale + IndexReg = 0; // No index register + Disp = 0; // No displacement + // While there are GEP indexes that can be folded into the current address, // keep processing them. while (!GEPTypes.empty()) { @@ -2340,14 +2424,7 @@ static void getGEPIndex(std::vector<Value*> &GEPOps, // structure is in memory. Since the structure index must be constant, we // can get its value and use it to find the right byte offset from the // StructLayout class's list of structure member offsets. - unsigned idxValue = CUI->getValue(); - unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue]; - if (FieldOff) { - if (Ops->getOperand(2).getReg()) - return; // Already has an index, can't add offset. - Ops->getOperand(3).setImmedValue(FieldOff+ - Ops->getOperand(3).getImmedValue()); - } + Disp += TD.getStructLayout(StTy)->MemberOffsets[CUI->getValue()]; GEPOps.pop_back(); // Consume a GEP operand GEPTypes.pop_back(); } else { @@ -2362,10 +2439,7 @@ static void getGEPIndex(std::vector<Value*> &GEPOps, // If idx is a constant, fold it into the offset. if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) { - unsigned elementSize = TD.getTypeSize(SqTy->getElementType()); - unsigned Offset = elementSize*CSI->getValue(); - Ops->getOperand(3).setImmedValue(Offset+ - Ops->getOperand(3).getImmedValue()); + Disp += TD.getTypeSize(SqTy->getElementType())*CSI->getValue(); } else { // If we can't handle it, return. return; @@ -2375,15 +2449,49 @@ static void getGEPIndex(std::vector<Value*> &GEPOps, GEPTypes.pop_back(); } } + + // GEPTypes is empty, which means we have a single operand left. See if we + // can set it as the base register. + // + // FIXME: When addressing modes are more powerful/correct, we could load + // global addresses directly as 32-bit immediates. + assert(BaseReg == 0); + BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 0; + GEPOps.pop_back(); // Consume the last GEP operand } +/// isGEPFoldable - Return true if the specified GEP can be completely +/// folded into the addressing mode of a load/store or lea instruction. +bool ISel::isGEPFoldable(MachineBasicBlock *MBB, + Value *Src, User::op_iterator IdxBegin, + User::op_iterator IdxEnd, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp) { + if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(Src)) + Src = CPR->getValue(); + + std::vector<Value*> GEPOps; + GEPOps.resize(IdxEnd-IdxBegin+1); + GEPOps[0] = Src; + std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1); + + std::vector<const Type*> GEPTypes; + GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd), + gep_type_end(Src->getType(), IdxBegin, IdxEnd)); + + MachineBasicBlock::iterator IP; + if (MBB) IP = MBB->end(); + getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp); + + // We can fold it away iff the getGEPIndex call eliminated all operands. + return GEPOps.empty(); +} + void ISel::emitGEPOperation(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, Value *Src, User::op_iterator IdxBegin, User::op_iterator IdxEnd, unsigned TargetReg) { const TargetData &TD = TM.getTargetData(); - if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(Src)) Src = CPR->getValue(); @@ -2396,27 +2504,28 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd), gep_type_end(Src->getType(), IdxBegin, IdxEnd)); - // DummyMI - A dummy instruction to pass into getGEPIndex. The opcode doesn't - // matter, we just need 4 MachineOperands. - MachineInstr *DummyMI = - BuildMI(X86::PHI, 4).addReg(0).addZImm(1).addReg(0).addSImm(0); - // Keep emitting instructions until we consume the entire GEP instruction. while (!GEPOps.empty()) { unsigned OldSize = GEPOps.size(); - getGEPIndex(GEPOps, GEPTypes, DummyMI, TD); + unsigned BaseReg, Scale, IndexReg, Disp; + getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp); if (GEPOps.size() != OldSize) { // getGEPIndex consumed some of the input. Build an LEA instruction here. - assert(DummyMI->getOperand(0).getReg() == 0 && - DummyMI->getOperand(1).getImmedValue() == 1 && - DummyMI->getOperand(2).getReg() == 0 && - "Unhandled GEP fold!"); - if (unsigned Offset = DummyMI->getOperand(3).getImmedValue()) { - unsigned Reg = makeAnotherReg(Type::UIntTy); - addRegOffset(BMI(MBB, IP, X86::LEAr32, 5, TargetReg), Reg, Offset); - TargetReg = Reg; + unsigned NextTarget = 0; + if (!GEPOps.empty()) { + assert(BaseReg == 0 && + "getGEPIndex should have left the base register open for chaining!"); + NextTarget = BaseReg = makeAnotherReg(Type::UIntTy); } + + if (IndexReg == 0 && Disp == 0) + BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg); + else + addFullAddress(BMI(MBB, IP, X86::LEAr32, 5, TargetReg), + BaseReg, Scale, IndexReg, Disp); + --IP; + TargetReg = NextTarget; } else if (GEPTypes.empty()) { // The getGEPIndex operation didn't want to build an LEA. Check to see if // all operands are consumed but the base pointer. If so, just load it @@ -2428,6 +2537,27 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg); } break; // we are now done + + } else if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) { + // It's a struct access. CUI is the index into the structure, + // which names the field. This index must have unsigned type. + const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back()); + GEPOps.pop_back(); // Consume a GEP operand + GEPTypes.pop_back(); + + // Use the TargetData structure to pick out what the layout of the + // structure is in memory. Since the structure index must be constant, we + // can get its value and use it to find the right byte offset from the + // StructLayout class's list of structure member offsets. + unsigned idxValue = CUI->getValue(); + unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue]; + if (FieldOff) { + unsigned Reg = makeAnotherReg(Type::UIntTy); + // Emit an ADD to add FieldOff to the basePtr. + BMI(MBB, IP, X86::ADDri32, 2, TargetReg).addReg(Reg).addZImm(FieldOff); + --IP; // Insert the next instruction before this one. + TargetReg = Reg; // Codegen the rest of the GEP into this + } } else { // It's an array or pointer access: [ArraySize x ElementType]. const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back()); @@ -2496,8 +2626,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, } } } - - delete DummyMI; } |