aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp18
-rw-r--r--lib/CodeGen/AllocationOrder.cpp6
-rw-r--r--lib/CodeGen/AllocationOrder.h15
-rw-r--r--lib/CodeGen/Analysis.cpp15
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp3
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp132
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp12
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp10
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp9
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp217
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h28
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp293
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h37
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp3
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/Win64Exception.cpp1
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp56
-rw-r--r--lib/CodeGen/CMakeLists.txt15
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp50
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h3
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp9
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp2
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp4
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp39
-rw-r--r--lib/CodeGen/GCMetadata.cpp41
-rw-r--r--lib/CodeGen/IfConversion.cpp4
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp36
-rw-r--r--lib/CodeGen/LLVMBuild.txt2
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp2
-rw-r--r--lib/CodeGen/LexicalScopes.cpp14
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp67
-rw-r--r--lib/CodeGen/LiveInterval.cpp364
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp439
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp94
-rw-r--r--lib/CodeGen/LiveRangeCalc.h29
-rw-r--r--lib/CodeGen/LiveVariables.cpp23
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp123
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp2
-rw-r--r--lib/CodeGen/MachineFunction.cpp7
-rw-r--r--lib/CodeGen/MachineInstr.cpp49
-rw-r--r--lib/CodeGen/MachineLICM.cpp2
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp7
-rw-r--r--lib/CodeGen/MachineScheduler.cpp246
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp2
-rw-r--r--lib/CodeGen/MachineTraceMetrics.h350
-rw-r--r--lib/CodeGen/PHIElimination.cpp256
-rw-r--r--lib/CodeGen/Passes.cpp12
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp24
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp65
-rw-r--r--lib/CodeGen/RegAllocFast.cpp109
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp19
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp24
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp42
-rw-r--r--lib/CodeGen/RegisterPressure.cpp11
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp18
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp29
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp305
-rw-r--r--lib/CodeGen/ScheduleDAGPrinter.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp256
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp32
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp200
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp478
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp52
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp40
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h22
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp180
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp106
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h2
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp222
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp33
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp68
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp1387
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp6
-rw-r--r--lib/CodeGen/SlotIndexes.cpp70
-rw-r--r--lib/CodeGen/StackColoring.cpp130
-rw-r--r--lib/CodeGen/StackProtector.cpp126
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp2
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp1290
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp33
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp1
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp324
83 files changed, 5048 insertions, 3817 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 152d9fa..c50f8b5 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -151,23 +151,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
std::vector<unsigned> &KillIndices = State->GetKillIndices();
std::vector<unsigned> &DefIndices = State->GetDefIndices();
- // Determine the live-out physregs for this block.
- if (IsReturnBlock) {
- // In a return block, examine the function live-out regs.
- for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
- E = MRI.liveout_end(); I != E; ++I) {
- for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
- unsigned Reg = *AI;
- State->UnionGroups(Reg, 0);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- }
- }
- }
-
- // In a non-return block, examine the live-in regs of all successors.
- // Note a return block can have successors if the return instruction is
- // predicated.
+ // Examine the live-in regs of all successors.
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 94754a0..3fa1f8f 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -22,7 +22,6 @@
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -45,4 +44,9 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
dbgs() << '\n';
}
});
+#ifndef NDEBUG
+ for (unsigned I = 0, E = Hints.size(); I != E; ++I)
+ assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() &&
+ "Target hint is outside allocation order.");
+#endif
}
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index a5293f6..aed461a 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -39,6 +39,9 @@ public:
const VirtRegMap &VRM,
const RegisterClassInfo &RegClassInfo);
+ /// Get the allocation order without reordered hints.
+ ArrayRef<MCPhysReg> getOrder() const { return Order; }
+
/// Return the next physical register in the allocation order, or 0.
/// It is safe to call next() again after it returned 0, it will keep
/// returning 0 until rewind() is called.
@@ -53,6 +56,18 @@ public:
return 0;
}
+ /// As next(), but allow duplicates to be returned, and stop before the
+ /// Limit'th register in the RegisterClassInfo allocation order.
+ ///
+ /// This can produce more than Limit registers if there are hints.
+ unsigned nextWithDups(unsigned Limit) {
+ if (Pos < 0)
+ return Hints.end()[Pos++];
+ if (Pos < int(Limit))
+ return Order[Pos++];
+ return 0;
+ }
+
/// Start over from the beginning.
void rewind() { Pos = -int(Hints.size()); }
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index aaba144..c7abf7a 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -265,8 +265,7 @@ static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
/// between it and the return.
///
/// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attribute CalleeRetAttr,
- const TargetLowering &TLI) {
+bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){
const Instruction *I = CS.getInstruction();
const BasicBlock *ExitBB = I->getParent();
const TerminatorInst *Term = ExitBB->getTerminator();
@@ -312,14 +311,16 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attribute CalleeRetAttr,
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
const Function *F = ExitBB->getParent();
- Attribute CallerRetAttr = F->getAttributes().getRetAttributes();
- if (AttrBuilder(CalleeRetAttr).removeAttribute(Attribute::NoAlias) !=
- AttrBuilder(CallerRetAttr).removeAttribute(Attribute::NoAlias))
+ AttributeSet CallerAttrs = F->getAttributes();
+ if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex).
+ removeAttribute(Attribute::NoAlias) !=
+ AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex).
+ removeAttribute(Attribute::NoAlias))
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
- if (CallerRetAttr.hasAttribute(Attribute::ZExt) ||
- CallerRetAttr.hasAttribute(Attribute::SExt))
+ if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
return false;
// Otherwise, make sure the unmodified return value of I is the return value.
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 1728331..188047d 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -32,12 +32,11 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
-cl::opt<bool>
+static cl::opt<bool>
EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden,
cl::desc("Generate ARM EHABI tables with unwinding descriptors"),
cl::init(false));
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index cbcc290..aa9d43b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -391,9 +391,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// - pointer to mangled symbol above with initializer
unsigned PtrSize = TD->getPointerSizeInBits()/8;
OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
- PtrSize, 0);
- OutStreamer.EmitIntValue(0, PtrSize, 0);
- OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0);
+ PtrSize);
+ OutStreamer.EmitIntValue(0, PtrSize);
+ OutStreamer.EmitSymbolValue(MangSym, PtrSize);
OutStreamer.AddBlankLine();
return;
@@ -1040,7 +1040,7 @@ void AsmPrinter::EmitConstantPool() {
// Emit inter-object padding for alignment.
unsigned AlignMask = CPE.getAlignment() - 1;
unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
- OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/);
+ OutStreamer.EmitZeros(NewOffset - Offset);
Type *Ty = CPE.getType();
Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty);
@@ -1203,7 +1203,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
assert(Value && "Unknown entry kind!");
unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout());
- OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0);
+ OutStreamer.EmitValue(Value, EntrySize);
}
@@ -1326,19 +1326,19 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
/// EmitInt8 - Emit a byte directive and value.
///
void AsmPrinter::EmitInt8(int Value) const {
- OutStreamer.EmitIntValue(Value, 1, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(Value, 1);
}
/// EmitInt16 - Emit a short directive and value.
///
void AsmPrinter::EmitInt16(int Value) const {
- OutStreamer.EmitIntValue(Value, 2, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(Value, 2);
}
/// EmitInt32 - Emit a long directive and value.
///
void AsmPrinter::EmitInt32(int Value) const {
- OutStreamer.EmitIntValue(Value, 4, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(Value, 4);
}
/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size
@@ -1353,14 +1353,14 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
OutContext);
if (!MAI->hasSetDirective()) {
- OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/);
+ OutStreamer.EmitValue(Diff, Size);
return;
}
// Otherwise, emit with .set (aka assignment).
MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
OutStreamer.EmitAssignment(SetLabel, Diff);
- OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/);
+ OutStreamer.EmitSymbolValue(SetLabel, Size);
}
/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo"
@@ -1384,12 +1384,12 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
OutContext);
if (!MAI->hasSetDirective())
- OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/);
+ OutStreamer.EmitValue(Diff, 4);
else {
// Otherwise, emit with .set (aka assignment).
MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
OutStreamer.EmitAssignment(SetLabel, Diff);
- OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/);
+ OutStreamer.EmitSymbolValue(SetLabel, 4);
}
}
@@ -1407,7 +1407,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
MCConstantExpr::Create(Offset, OutContext),
OutContext);
- OutStreamer.EmitValue(Expr, Size, 0/*AddrSpace*/);
+ OutStreamer.EmitValue(Expr, Size);
}
@@ -1746,90 +1746,48 @@ static void emitGlobalConstantStruct(const ConstantStruct *CS,
static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
AsmPrinter &AP) {
- if (CFP->getType()->isHalfTy()) {
- if (AP.isVerbose()) {
- SmallString<10> Str;
- CFP->getValueAPF().toString(Str);
- AP.OutStreamer.GetCommentOS() << "half " << Str << '\n';
- }
- uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- AP.OutStreamer.EmitIntValue(Val, 2, AddrSpace);
- return;
- }
-
- if (CFP->getType()->isFloatTy()) {
- if (AP.isVerbose()) {
- float Val = CFP->getValueAPF().convertToFloat();
- uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- AP.OutStreamer.GetCommentOS() << "float " << Val << '\n'
- << " (" << format("0x%x", IntVal) << ")\n";
- }
- uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace);
- return;
- }
+ APInt API = CFP->getValueAPF().bitcastToAPInt();
- // FP Constants are printed as integer constants to avoid losing
- // precision.
- if (CFP->getType()->isDoubleTy()) {
- if (AP.isVerbose()) {
- double Val = CFP->getValueAPF().convertToDouble();
- uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- AP.OutStreamer.GetCommentOS() << "double " << Val << '\n'
- << " (" << format("0x%lx", IntVal) << ")\n";
- }
+ // First print a comment with what we think the original floating-point value
+ // should have been.
+ if (AP.isVerbose()) {
+ SmallString<8> StrVal;
+ CFP->getValueAPF().toString(StrVal);
- uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
- return;
+ CFP->getType()->print(AP.OutStreamer.GetCommentOS());
+ AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n';
}
- if (CFP->getType()->isX86_FP80Ty() || CFP->getType()->isFP128Ty()) {
- // all long double variants are printed as hex
- // API needed to prevent premature destruction
- APInt API = CFP->getValueAPF().bitcastToAPInt();
- const uint64_t *p = API.getRawData();
- if (AP.isVerbose()) {
- // Convert to double so we can print the approximate val as a comment.
- SmallString<8> StrVal;
- CFP->getValueAPF().toString(StrVal);
+ // Now iterate through the APInt chunks, emitting them in endian-correct
+ // order, possibly with a smaller chunk at beginning/end (e.g. for x87 80-bit
+ // floats).
+ unsigned NumBytes = API.getBitWidth() / 8;
+ unsigned TrailingBytes = NumBytes % sizeof(uint64_t);
+ const uint64_t *p = API.getRawData();
- const char *TyNote = CFP->getType()->isFP128Ty() ? "fp128 " : "x86_fp80 ";
- AP.OutStreamer.GetCommentOS() << TyNote << StrVal << '\n';
- }
+ // PPC's long double has odd notions of endianness compared to how LLVM
+ // handles it: p[0] goes first for *big* endian on PPC.
+ if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) {
+ int Chunk = API.getNumWords() - 1;
- // The 80-bit type is made of a 64-bit and 16-bit value, the 128-bit has 2
- // 64-bit words.
- uint32_t TrailingSize = CFP->getType()->isFP128Ty() ? 8 : 2;
+ if (TrailingBytes)
+ AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes, AddrSpace);
- if (AP.TM.getDataLayout()->isBigEndian()) {
- AP.OutStreamer.EmitIntValue(p[1], TrailingSize, AddrSpace);
- AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
- } else {
- AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
- AP.OutStreamer.EmitIntValue(p[1], TrailingSize, AddrSpace);
- }
+ for (; Chunk >= 0; --Chunk)
+ AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace);
+ } else {
+ unsigned Chunk;
+ for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk)
+ AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace);
- // Emit the tail padding for the long double.
- const DataLayout &TD = *AP.TM.getDataLayout();
- AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
- TD.getTypeStoreSize(CFP->getType()), AddrSpace);
- return;
+ if (TrailingBytes)
+ AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes, AddrSpace);
}
- assert(CFP->getType()->isPPC_FP128Ty() &&
- "Floating point constant type not handled");
- // All long double variants are printed as hex
- // API needed to prevent premature destruction.
- APInt API = CFP->getValueAPF().bitcastToAPInt();
- const uint64_t *p = API.getRawData();
- if (AP.TM.getDataLayout()->isBigEndian()) {
- AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
- AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
- } else {
- AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
- AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
- }
+ // Emit the tail padding for the long double.
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
+ TD.getTypeStoreSize(CFP->getType()), AddrSpace);
}
static void emitGlobalConstantLargeInt(const ConstantInt *CI,
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index ece92d8..156acac 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -46,7 +46,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
if (isVerbose() && Desc)
OutStreamer.AddComment(Desc);
- OutStreamer.EmitULEB128IntValue(Value, 0/*addrspace*/, PadTo);
+ OutStreamer.EmitULEB128IntValue(Value, PadTo);
}
/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
@@ -58,7 +58,7 @@ void AsmPrinter::EmitCFAByte(unsigned Val) const {
else
OutStreamer.AddComment(dwarf::CallFrameString(Val));
}
- OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(Val, 1);
}
static const char *DecodeDWARFEncoding(unsigned Encoding) {
@@ -102,7 +102,7 @@ void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
DecodeDWARFEncoding(Val));
}
- OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(Val, 1);
}
/// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
@@ -126,9 +126,9 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
const MCExpr *Exp =
TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
- OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+ OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
} else
- OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding), 0);
+ OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
}
/// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
@@ -157,7 +157,7 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
// If the section in question will end up with an address of 0 anyway, we can
// just emit an absolute reference to save a relocation.
if (Section.isBaseAddressKnownZero()) {
- OutStreamer.EmitSymbolValue(Label, 4, 0/*AddrSpace*/);
+ OutStreamer.EmitSymbolValue(Label, 4);
return;
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 4890627..8b9a884 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
namespace {
struct SrcMgrDiagInfo {
const MDNode *LocInfo;
- LLVMContext::DiagHandlerTy DiagHandler;
+ LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
void *DiagContext;
};
}
@@ -89,15 +89,15 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
SourceMgr SrcMgr;
SrcMgrDiagInfo DiagInfo;
- // If the current LLVMContext has a diagnostic handler, set it in SourceMgr.
+ // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
LLVMContext &LLVMCtx = MMI->getModule()->getContext();
bool HasDiagHandler = false;
- if (LLVMCtx.getDiagnosticHandler() != 0) {
+ if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
// If the source manager has an issue, we arrange for srcMgrDiagHandler
// to be invoked, getting DiagInfo passed into it.
DiagInfo.LocInfo = LocMDNode;
- DiagInfo.DiagHandler = LLVMCtx.getDiagnosticHandler();
- DiagInfo.DiagContext = LLVMCtx.getDiagnosticContext();
+ DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+ DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo);
HasDiagHandler = true;
}
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index a913ca4..4ded281 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -193,18 +193,20 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
case dwarf::DW_FORM_data1: Size = 1; break;
case dwarf::DW_FORM_ref2: // Fall thru
case dwarf::DW_FORM_data2: Size = 2; break;
+ case dwarf::DW_FORM_sec_offset: // Fall thru
case dwarf::DW_FORM_ref4: // Fall thru
case dwarf::DW_FORM_data4: Size = 4; break;
case dwarf::DW_FORM_ref8: // Fall thru
case dwarf::DW_FORM_data8: Size = 8; break;
case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return;
+ case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return;
case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
case dwarf::DW_FORM_addr:
Size = Asm->getDataLayout().getPointerSize(); break;
default: llvm_unreachable("DIE Value form not supported yet");
}
- Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
+ Asm->OutStreamer.EmitIntValue(Integer, Size);
}
/// SizeOf - Determine size of integer value in bytes.
@@ -217,11 +219,13 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
case dwarf::DW_FORM_data1: return sizeof(int8_t);
case dwarf::DW_FORM_ref2: // Fall thru
case dwarf::DW_FORM_data2: return sizeof(int16_t);
+ case dwarf::DW_FORM_sec_offset: // Fall thru
case dwarf::DW_FORM_ref4: // Fall thru
case dwarf::DW_FORM_data4: return sizeof(int32_t);
case dwarf::DW_FORM_ref8: // Fall thru
case dwarf::DW_FORM_data8: return sizeof(int64_t);
case dwarf::DW_FORM_GNU_str_index: return MCAsmInfo::getULEB128Size(Integer);
+ case dwarf::DW_FORM_GNU_addr_index: return MCAsmInfo::getULEB128Size(Integer);
case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize();
@@ -243,13 +247,14 @@ void DIEInteger::print(raw_ostream &O) {
/// EmitValue - Emit label value.
///
void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
- AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form), 0/*AddrSpace*/);
+ AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form));
}
/// SizeOf - Determine size of label value in bytes.
///
unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
+ if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
return AP->getDataLayout().getPointerSize();
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index c193999..f58ec9b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -173,7 +173,7 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context),
MCSymbolRefExpr::Create(SecBegin, Context),
Context);
- Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0);
+ Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t));
}
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 21cceaf..93b00fb 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -170,6 +170,42 @@ void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
Die->addValue(Attribute, Form, Value);
}
+/// addLabelAddress - Add a dwarf label attribute data and value using
+/// DW_FORM_addr or DW_FORM_GNU_addr_index.
+///
+void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute,
+ MCSymbol *Label) {
+ if (!DD->useSplitDwarf()) {
+ if (Label != NULL) {
+ DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
+ Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
+ } else {
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(0);
+ Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
+ }
+ } else {
+ unsigned idx = DU->getAddrPoolIndex(Label);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+ Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value);
+ }
+}
+
+/// addOpAddress - Add a dwarf op address data and value using the
+/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
+///
+void CompileUnit::addOpAddress(DIE *Die, MCSymbol *Sym) {
+
+ if (!DD->useSplitDwarf()) {
+ addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Die, 0, dwarf::DW_FORM_udata, Sym);
+ } else {
+ unsigned idx = DU->getAddrPoolIndex(Sym);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+ addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
+ Die->addValue(0, dwarf::DW_FORM_GNU_addr_index, Value);
+ }
+}
+
/// addDelta - Add a label delta attribute data and value.
///
void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
@@ -592,10 +628,21 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
return true;
}
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) {
+ return addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false);
+}
+
/// addConstantValue - Add constant value entry in variable DIE.
bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
bool Unsigned) {
- unsigned CIBitWidth = CI->getBitWidth();
+ return addConstantValue(Die, CI->getValue(), Unsigned);
+}
+
+// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val,
+ bool Unsigned) {
+ unsigned CIBitWidth = Val.getBitWidth();
if (CIBitWidth <= 64) {
unsigned form = 0;
switch (CIBitWidth) {
@@ -607,16 +654,15 @@ bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata;
}
if (Unsigned)
- addUInt(Die, dwarf::DW_AT_const_value, form, CI->getZExtValue());
+ addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue());
else
- addSInt(Die, dwarf::DW_AT_const_value, form, CI->getSExtValue());
+ addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue());
return true;
}
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
// Get the raw data form of the large APInt.
- const APInt Val = CI->getValue();
const uint64_t *Ptr64 = Val.getRawData();
int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
@@ -650,18 +696,21 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
}
}
+/// getOrCreateContextDIE - Get context owner's DIE.
+DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) {
+ if (Context.isType())
+ return getOrCreateTypeDIE(DIType(Context));
+ else if (Context.isNameSpace())
+ return getOrCreateNameSpace(DINameSpace(Context));
+ else if (Context.isSubprogram())
+ return getOrCreateSubprogramDIE(DISubprogram(Context));
+ else
+ return getDIE(Context);
+}
+
/// addToContextOwner - Add Die into the list of its context owner's children.
void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
- if (Context.isType()) {
- DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context));
- ContextDIE->addChild(Die);
- } else if (Context.isNameSpace()) {
- DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
- ContextDIE->addChild(Die);
- } else if (Context.isSubprogram()) {
- DIE *ContextDIE = getOrCreateSubprogramDIE(DISubprogram(Context));
- ContextDIE->addChild(Die);
- } else if (DIE *ContextDIE = getDIE(Context))
+ if (DIE *ContextDIE = getOrCreateContextDIE(Context))
ContextDIE->addChild(Die);
else
addDie(Die);
@@ -864,6 +913,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
} else {
DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
addType(Arg, DIType(Ty));
+ if (DIType(Ty).isArtificial())
+ addFlag(Arg, dwarf::DW_AT_artificial);
Buffer.addChild(Arg);
}
}
@@ -905,22 +956,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
dwarf::DW_ACCESS_public);
if (SP.isExplicit())
addFlag(ElemDie, dwarf::DW_AT_explicit);
- }
- else if (Element.isVariable()) {
- DIVariable DV(Element);
- ElemDie = new DIE(dwarf::DW_TAG_variable);
- addString(ElemDie, dwarf::DW_AT_name, DV.getName());
- addType(ElemDie, DV.getType());
- addFlag(ElemDie, dwarf::DW_AT_declaration);
- addFlag(ElemDie, dwarf::DW_AT_external);
- addSourceLine(ElemDie, DV);
} else if (Element.isDerivedType()) {
DIDerivedType DDTy(Element);
if (DDTy.getTag() == dwarf::DW_TAG_friend) {
ElemDie = new DIE(dwarf::DW_TAG_friend);
addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend);
- } else
- ElemDie = createMemberDIE(DIDerivedType(Element));
+ } else if (DDTy.isStaticMember())
+ ElemDie = createStaticMemberDIE(DDTy);
+ else
+ ElemDie = createMemberDIE(DDTy);
} else if (Element.isObjCProperty()) {
DIObjCProperty Property(Element);
ElemDie = new DIE(Property.getTag());
@@ -1236,39 +1280,56 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
if (!GV.Verify())
return;
- DIE *VariableDIE = new DIE(GV.getTag());
- // Add to map.
- insertDIE(N, VariableDIE);
-
- // Add name.
- addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
- StringRef LinkageName = GV.getLinkageName();
- bool isGlobalVariable = GV.getGlobal() != NULL;
- if (!LinkageName.empty() && isGlobalVariable)
- addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
- getRealLinkageName(LinkageName));
- // Add type.
+ DIDescriptor GVContext = GV.getContext();
DIType GTy = GV.getType();
- addType(VariableDIE, GTy);
- // Add scoping info.
- if (!GV.isLocalToUnit())
- addFlag(VariableDIE, dwarf::DW_AT_external);
+ // If this is a static data member definition, some attributes belong
+ // to the declaration DIE.
+ DIE *VariableDIE = NULL;
+ bool IsStaticMember = false;
+ DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration();
+ if (SDMDecl.Verify()) {
+ assert(SDMDecl.isStaticMember() && "Expected static member decl");
+ // We need the declaration DIE that is in the static member's class.
+ // But that class might not exist in the DWARF yet.
+ // Creating the class will create the static member decl DIE.
+ getOrCreateContextDIE(SDMDecl.getContext());
+ VariableDIE = getDIE(SDMDecl);
+ assert(VariableDIE && "Static member decl has no context?");
+ IsStaticMember = true;
+ }
+
+ // If this is not a static data member definition, create the variable
+ // DIE and add the initial set of attributes to it.
+ if (!VariableDIE) {
+ VariableDIE = new DIE(GV.getTag());
+ // Add to map.
+ insertDIE(N, VariableDIE);
+
+ // Add name and type.
+ addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
+ addType(VariableDIE, GTy);
+
+ // Add scoping info.
+ if (!GV.isLocalToUnit()) {
+ addFlag(VariableDIE, dwarf::DW_AT_external);
+ addGlobalName(GV.getName(), VariableDIE);
+ }
+
+ // Add line number info.
+ addSourceLine(VariableDIE, GV);
+ // Add to context owner.
+ addToContextOwner(VariableDIE, GVContext);
+ }
- // Add line number info.
- addSourceLine(VariableDIE, GV);
- // Add to context owner.
- DIDescriptor GVContext = GV.getContext();
- addToContextOwner(VariableDIE, GVContext);
// Add location.
bool addToAccelTable = false;
DIE *VariableSpecDIE = NULL;
+ bool isGlobalVariable = GV.getGlobal() != NULL;
if (isGlobalVariable) {
addToAccelTable = true;
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
- addLabel(Block, 0, dwarf::DW_FORM_udata,
- Asm->Mang->getSymbol(GV.getGlobal()));
+ addOpAddress(Block, Asm->Mang->getSymbol(GV.getGlobal()));
// Do not create specification DIE if context is either compile unit
// or a subprogram.
if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
@@ -1278,22 +1339,31 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
dwarf::DW_FORM_ref4, VariableDIE);
addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
- addFlag(VariableDIE, dwarf::DW_AT_declaration);
+ // A static member's declaration is already flagged as such.
+ if (!SDMDecl.Verify())
+ addFlag(VariableDIE, dwarf::DW_AT_declaration);
addDie(VariableSpecDIE);
} else {
addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
}
+ // Add linkage name.
+ StringRef LinkageName = GV.getLinkageName();
+ if (!LinkageName.empty() && isGlobalVariable)
+ addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
} else if (const ConstantInt *CI =
- dyn_cast_or_null<ConstantInt>(GV.getConstant()))
- addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
- else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+ dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
+ // AT_const_value was added when the static memeber was created. To avoid
+ // emitting AT_const_value multiple times, we only add AT_const_value when
+ // it is not a static member.
+ if (!IsStaticMember)
+ addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
+ } else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
addToAccelTable = true;
// GV is a merged global.
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
Value *Ptr = CE->getOperand(0);
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
- addLabel(Block, 0, dwarf::DW_FORM_udata,
- Asm->Mang->getSymbol(cast<GlobalValue>(Ptr)));
+ addOpAddress(Block, Asm->Mang->getSymbol(cast<GlobalValue>(Ptr)));
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end());
addUInt(Block, 0, dwarf::DW_FORM_udata,
@@ -1618,3 +1688,38 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
}
return MemberDie;
}
+
+/// createStaticMemberDIE - Create new DIE for C++ static member.
+DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) {
+ if (!DT.Verify())
+ return NULL;
+
+ DIE *StaticMemberDIE = new DIE(DT.getTag());
+ DIType Ty = DT.getTypeDerivedFrom();
+
+ addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName());
+ addType(StaticMemberDIE, Ty);
+ addSourceLine(StaticMemberDIE, DT);
+ addFlag(StaticMemberDIE, dwarf::DW_AT_external);
+ addFlag(StaticMemberDIE, dwarf::DW_AT_declaration);
+
+ // FIXME: We could omit private if the parent is a class_type, and
+ // public if the parent is something else.
+ if (DT.isProtected())
+ addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_protected);
+ else if (DT.isPrivate())
+ addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_private);
+ else
+ addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_public);
+
+ if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant()))
+ addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType());
+ if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant()))
+ addConstantFPValue(StaticMemberDIE, CFP);
+
+ insertDIE(DT, StaticMemberDIE);
+ return StaticMemberDIE;
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index f210dcc..77bf6a9 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -27,6 +27,7 @@ class DwarfUnits;
class MachineLocation;
class MachineOperand;
class ConstantInt;
+class ConstantFP;
class DbgVariable;
//===----------------------------------------------------------------------===//
@@ -63,6 +64,10 @@ class CompileUnit {
/// descriptors to debug information entries using a DIEEntry proxy.
DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
+ /// GlobalNames - A map of globally visible named entities for this unit.
+ ///
+ StringMap<DIE*> GlobalNames;
+
/// GlobalTypes - A map of globally visible types for this unit.
///
StringMap<DIE*> GlobalTypes;
@@ -86,6 +91,9 @@ class CompileUnit {
/// DWARF version doesn't handle the language, return -1.
int64_t getDefaultLowerBound() const;
+ /// getOrCreateContextDIE - Get context owner's DIE.
+ DIE *getOrCreateContextDIE(DIDescriptor Context);
+
public:
CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW,
DwarfUnits *);
@@ -95,6 +103,7 @@ public:
unsigned getUniqueID() const { return UniqueID; }
unsigned getLanguage() const { return Language; }
DIE* getCUDie() const { return CUDie.get(); }
+ const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; }
const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
const StringMap<std::vector<DIE*> > &getAccelNames() const {
@@ -115,6 +124,10 @@ public:
///
bool hasContent() const { return !CUDie->getChildren().empty(); }
+ /// addGlobalName - Add a new global entity to the compile unit.
+ ///
+ void addGlobalName(StringRef Name, DIE *Die) { GlobalNames[Name] = Die; }
+
/// addGlobalType - Add a new global type to the compile unit.
///
void addGlobalType(DIType Ty);
@@ -207,6 +220,16 @@ public:
void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
const MCSymbol *Label);
+ /// addLabelAddress - Add a dwarf label attribute data and value using
+ /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
+ ///
+ void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label);
+
+ /// addOpAddress - Add a dwarf op address data and value using the
+ /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
+ ///
+ void addOpAddress(DIE *Die, MCSymbol *Label);
+
/// addDelta - Add a label delta attribute data and value.
///
void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
@@ -237,9 +260,11 @@ public:
/// addConstantValue - Add constant value entry in variable DIE.
bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
+ bool addConstantValue(DIE *Die, const APInt &Val, bool Unsigned);
/// addConstantFPValue - Add constant value entry in variable DIE.
bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
+ bool addConstantFPValue(DIE *Die, const ConstantFP *CFP);
/// addTemplateParams - Add template parameters in buffer.
void addTemplateParams(DIE &Buffer, DIArray TParams);
@@ -339,6 +364,9 @@ public:
/// createMemberDIE - Create new member DIE.
DIE *createMemberDIE(DIDerivedType DT);
+ /// createStaticMemberDIE - Create new static data member DIE.
+ DIE *createStaticMemberDIE(DIDerivedType DT);
+
private:
// DIEValueAllocator - All DIEValues are allocated through this allocator.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 66a5a6d..87659ef 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -54,6 +54,10 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
cl::desc("Make an absence of debug location information explicit."),
cl::init(false));
+static cl::opt<bool> GenerateDwarfPubNamesSection("generate-dwarf-pubnames",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Generate DWARF pubnames section"));
+
namespace {
enum DefaultOnOff {
Default, Enable, Disable
@@ -159,14 +163,13 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
PrevLabel(NULL), GlobalCUIndexCount(0),
InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string",
DIEValueAllocator),
- SkeletonCU(0),
SkeletonAbbrevSet(InitAbbreviationsSetSize),
SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string",
DIEValueAllocator) {
DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
DwarfStrSectionSym = TextSectionSym = 0;
- DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
+ DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0;
DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0;
FunctionBeginSym = FunctionEndSym = 0;
@@ -237,6 +240,15 @@ unsigned DwarfUnits::getStringPoolIndex(StringRef Str) {
return Entry.second;
}
+unsigned DwarfUnits::getAddrPoolIndex(MCSymbol *Sym) {
+ std::pair<MCSymbol*, unsigned> &Entry = AddressPool[Sym];
+ if (Entry.first) return Entry.second;
+
+ Entry.second = NextAddrPoolNumber++;
+ Entry.first = Sym;
+ return Entry.second;
+}
+
// Define a unique number for the abbreviation.
//
void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) {
@@ -384,10 +396,12 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
}
}
- SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()));
- SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()));
+ SPCU->addLabelAddress(SPDie, dwarf::DW_AT_low_pc,
+ Asm->GetTempSymbol("func_begin",
+ Asm->getFunctionNumber()));
+ SPCU->addLabelAddress(SPDie, dwarf::DW_AT_high_pc,
+ Asm->GetTempSymbol("func_end",
+ Asm->getFunctionNumber()));
const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
MachineLocation Location(RI->getFrameRegister(*Asm->MF));
SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
@@ -429,16 +443,16 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
return ScopeDIE;
}
- const MCSymbol *Start = getLabelBeforeInsn(RI->first);
- const MCSymbol *End = getLabelAfterInsn(RI->second);
+ MCSymbol *Start = getLabelBeforeInsn(RI->first);
+ MCSymbol *End = getLabelAfterInsn(RI->second);
if (End == 0) return 0;
assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
assert(End->isDefined() && "Invalid end label for an inlined scope!");
- TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
- TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
+ TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, Start);
+ TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, End);
return ScopeDIE;
}
@@ -462,8 +476,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
}
SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin();
- const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
- const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
+ MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
+ MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
if (StartLabel == 0 || EndLabel == 0) {
llvm_unreachable("Unexpected Start and End labels for an inlined scope!");
@@ -492,10 +506,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
DebugRangeSymbols.push_back(NULL);
DebugRangeSymbols.push_back(NULL);
} else {
- TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- StartLabel);
- TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- EndLabel);
+ TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel);
+ TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel);
}
InlinedSubprogramDIEs.insert(OriginDIE);
@@ -531,6 +543,12 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
if (!Scope || !Scope->getScopeNode())
return NULL;
+ DIScope DS(Scope->getScopeNode());
+ // Early return to avoid creating dangling variable|scope DIEs.
+ if (!Scope->getInlinedAt() && DS.isSubprogram() && Scope->isAbstractScope() &&
+ !TheCU->getDIE(DS))
+ return NULL;
+
SmallVector<DIE *, 8> Children;
DIE *ObjectPointer = NULL;
@@ -556,7 +574,6 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j]))
Children.push_back(Nested);
- DIScope DS(Scope->getScopeNode());
DIE *ScopeDIE = NULL;
if (Scope->getInlinedAt())
ScopeDIE = constructInlinedScopeDIE(TheCU, Scope);
@@ -646,15 +663,28 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
DIUnit.getLanguage());
NewCU->addString(Die, dwarf::DW_AT_name, FN);
// 2.17.1 requires that we use DW_AT_low_pc for a single entry point
- // into an entity.
- NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
+ // into an entity. We're using 0 (or a NULL label) for this.
+ NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
+
+ // Define start line table label for each Compile Unit.
+ MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start",
+ NewCU->getUniqueID());
+ Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym,
+ NewCU->getUniqueID());
+
// DW_AT_stmt_list is a offset of line number information for this
// compile unit in debug_line section.
+ // The line table entries are not always emitted in assembly, so it
+ // is not okay to use line_table_start here.
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
- Asm->GetTempSymbol("section_line"));
- else
+ NewCU->getUniqueID() == 0 ?
+ Asm->GetTempSymbol("section_line") : LineTableStartSym);
+ else if (NewCU->getUniqueID() == 0)
NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+ else
+ NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ LineTableStartSym, DwarfLineSectionSym);
if (!CompilationDir.empty())
NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
@@ -671,8 +701,13 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
if (!FirstCU)
FirstCU = NewCU;
- if (useSplitDwarf() && !SkeletonCU)
- SkeletonCU = constructSkeletonCU(N);
+
+ if (useSplitDwarf()) {
+ // This should be a unique identifier when we want to build .dwp files.
+ NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+ // Now construct the skeleton CU associated.
+ constructSkeletonCU(N);
+ }
InfoHolder.addUnit(NewCU);
@@ -702,7 +737,9 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
// Add to context owner.
TheCU->addToContextOwner(SubprogramDie, SP.getContext());
- return;
+ // Expose as global, if requested.
+ if (GenerateDwarfPubNamesSection)
+ TheCU->addGlobalName(SP.getName(), SubprogramDie);
}
// Collect debug info from named mdnodes such as llvm.dbg.enum and llvm.dbg.ty.
@@ -754,6 +791,9 @@ bool DwarfDebug::collectLegacyDebugInfo(const Module *M) {
}
if (!HasDebugInfo) return false;
+ // Emit initial sections so we can refer to them later.
+ emitSectionLabels();
+
// Create all the compile unit DIEs.
for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
E = DbgFinder.compile_unit_end(); I != E; ++I)
@@ -791,6 +831,9 @@ void DwarfDebug::beginModule() {
// module using debug info finder to collect debug info.
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (CU_Nodes) {
+ // Emit initial sections so we can reference labels later.
+ emitSectionLabels();
+
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
DICompileUnit CUNode(CU_Nodes->getOperand(i));
CompileUnit *CU = constructCompileUnit(CUNode);
@@ -923,9 +966,6 @@ void DwarfDebug::endModule() {
// Finalize the debug info for the module.
finalizeModuleInfo();
- // Emit initial sections.
- emitSectionLabels();
-
if (!useSplitDwarf()) {
// Emit all the DIEs into a debug info section.
emitDebugInfo();
@@ -975,6 +1015,9 @@ void DwarfDebug::endModule() {
// Emit info into a debug macinfo section.
emitDebugMacInfo();
+ // Emit DWO addresses.
+ InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection());
+
// Emit inline info.
// TODO: When we don't need the option anymore we
// can remove all of the code that this section
@@ -991,6 +1034,10 @@ void DwarfDebug::endModule() {
emitAccelTypes();
}
+ // Emit info into a debug pubnames section, if requested.
+ if (GenerateDwarfPubNamesSection)
+ emitDebugPubnames();
+
// Emit info into a debug pubtypes section.
// TODO: When we don't need the option anymore we can
// remove all of the code that adds to the table.
@@ -1008,11 +1055,12 @@ void DwarfDebug::endModule() {
E = CUMap.end(); I != E; ++I)
delete I->second;
- delete SkeletonCU;
+ for (SmallVector<CompileUnit *, 1>::iterator I = SkeletonCUs.begin(),
+ E = SkeletonCUs.end(); I != E; ++I)
+ delete *I;
// Reset these for the next Module if we have one.
FirstCU = NULL;
- SkeletonCU = NULL;
}
// Find abstract variable, if any, associated with Var.
@@ -1179,7 +1227,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
continue;
}
- // handle multiple DBG_VALUE instructions describing one variable.
+ // Handle multiple DBG_VALUE instructions describing one variable.
RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
for (SmallVectorImpl<const MachineInstr*>::const_iterator
@@ -1234,14 +1282,14 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
}
// Return Label preceding the instruction.
-const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
+MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
assert(Label && "Didn't insert label before instruction");
return Label;
}
// Return Label immediately following the instruction.
-const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
+MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
return LabelsAfterInsn.lookup(MI);
}
@@ -1377,6 +1425,13 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (LScopes.empty()) return;
identifyScopeMarkers();
+ // Set DwarfCompileUnitID in MCContext to the Compile Unit this function
+ // belongs to.
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
+ assert(TheCU && "Unable to find compile unit!");
+ Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
+
FunctionBeginSym = Asm->GetTempSymbol("func_begin",
Asm->getFunctionNumber());
// Assumes in correct section after the entry point.
@@ -1561,6 +1616,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
Asm->getFunctionNumber());
// Assumes in correct section after the entry point.
Asm->OutStreamer.EmitLabel(FunctionEndSym);
+ // Set DwarfCompileUnitID in MCContext to default value.
+ Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
SmallPtrSet<const MDNode *, 16> ProcessedVars;
collectVariableInfo(MF, ProcessedVars);
@@ -1734,8 +1791,11 @@ void DwarfDebug::emitSectionLabels() {
if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
emitSectionSym(Asm, MacroInfo);
- emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
+ DwarfLineSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
emitSectionSym(Asm, TLOF.getDwarfLocSection());
+ if (GenerateDwarfPubNamesSection)
+ emitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
DwarfStrSectionSym =
emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
@@ -1942,8 +2002,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
Asm->OutStreamer.AddComment("Section end label");
Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
- Asm->getDataLayout().getPointerSize(),
- 0/*AddrSpace*/);
+ Asm->getDataLayout().getPointerSize());
// Mark end of matrix.
Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
@@ -2072,6 +2131,61 @@ void DwarfDebug::emitAccelTypes() {
AT.Emit(Asm, SectionBegin, &InfoHolder);
}
+/// emitDebugPubnames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::emitDebugPubnames() {
+ const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
+
+ typedef DenseMap<const MDNode*, CompileUnit*> CUMapType;
+ for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ unsigned ID = TheCU->getUniqueID();
+
+ if (TheCU->getGlobalNames().empty())
+ continue;
+
+ // Start the dwarf pubnames section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubNamesSection());
+
+ Asm->OutStreamer.AddComment("Length of Public Names Info");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID),
+ Asm->GetTempSymbol("pubnames_begin", ID), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID));
+
+ Asm->OutStreamer.AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+ Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID),
+ DwarfInfoSectionSym);
+
+ Asm->OutStreamer.AddComment("Compilation Unit Length");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), ID),
+ Asm->GetTempSymbol(ISec->getLabelBeginName(), ID),
+ 4);
+
+ const StringMap<DIE*> &Globals = TheCU->getGlobalNames();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const DIE *Entity = GI->second;
+
+ Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("External Name");
+ Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
+ }
+
+ Asm->OutStreamer.AddComment("End Mark");
+ Asm->EmitInt32(0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", ID));
+ }
+}
+
void DwarfDebug::emitDebugPubTypes() {
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
E = CUMap.end(); I != E; ++I) {
@@ -2114,7 +2228,7 @@ void DwarfDebug::emitDebugPubTypes() {
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
// Emit the name with a terminating null byte.
- Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
+ Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1));
}
Asm->OutStreamer.AddComment("End Mark");
@@ -2152,22 +2266,53 @@ void DwarfUnits::emitStrings(const MCSection *StrSection,
// Emit the string itself with a terminating null byte.
Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(),
- Entries[i].second->getKeyLength()+1),
- 0/*addrspace*/);
+ Entries[i].second->getKeyLength()+1));
}
// If we've got an offset section go ahead and emit that now as well.
if (OffsetSection) {
Asm->OutStreamer.SwitchSection(OffsetSection);
unsigned offset = 0;
- unsigned size = 4;
+ unsigned size = 4; // FIXME: DWARF64 is 8.
for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
- Asm->OutStreamer.EmitIntValue(offset, size, 0);
+ Asm->OutStreamer.EmitIntValue(offset, size);
offset += Entries[i].second->getKeyLength() + 1;
}
}
}
+// Emit strings into a string section.
+void DwarfUnits::emitAddresses(const MCSection *AddrSection) {
+
+ if (AddressPool.empty()) return;
+
+ // Start the dwarf addr section.
+ Asm->OutStreamer.SwitchSection(AddrSection);
+
+ // Get all of the string pool entries and put them in an array by their ID so
+ // we can sort them.
+ SmallVector<std::pair<unsigned,
+ std::pair<MCSymbol*, unsigned>* >, 64> Entries;
+
+ for (DenseMap<MCSymbol*, std::pair<MCSymbol*, unsigned> >::iterator
+ I = AddressPool.begin(), E = AddressPool.end();
+ I != E; ++I)
+ Entries.push_back(std::make_pair(I->second.second, &(I->second)));
+
+ array_pod_sort(Entries.begin(), Entries.end());
+
+ for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+ // Emit a label for reference from debug information entries.
+ MCSymbol *Sym = Entries[i].second->first;
+ if (Sym)
+ Asm->EmitLabelReference(Entries[i].second->first,
+ Asm->getDataLayout().getPointerSize());
+ else
+ Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize());
+ }
+
+}
+
// Emit visible names into a debug str section.
void DwarfDebug::emitDebugStr() {
DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
@@ -2199,12 +2344,12 @@ void DwarfDebug::emitDebugLoc() {
DotDebugLocEntry &Entry = *I;
if (Entry.isMerged()) continue;
if (Entry.isEmpty()) {
- Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
- Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+ Asm->OutStreamer.EmitIntValue(0, Size);
+ Asm->OutStreamer.EmitIntValue(0, Size);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index));
} else {
- Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0);
- Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0);
+ Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size);
+ Asm->OutStreamer.EmitSymbolValue(Entry.End, Size);
DIVariable DV(Entry.Variable);
Asm->OutStreamer.AddComment("Loc expr size");
MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
@@ -2290,9 +2435,9 @@ void DwarfDebug::emitDebugRanges() {
I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
I != E; ++I) {
if (*I)
- Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0);
+ Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size);
else
- Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+ Asm->OutStreamer.EmitIntValue(0, Size);
}
}
@@ -2376,7 +2521,7 @@ void DwarfDebug::emitDebugInlineInfo() {
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
Asm->OutStreamer.EmitSymbolValue(LI->first,
- Asm->getDataLayout().getPointerSize(),0);
+ Asm->getDataLayout().getPointerSize());
}
}
@@ -2391,68 +2536,44 @@ void DwarfDebug::emitDebugInlineInfo() {
// DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa.
CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
DICompileUnit DIUnit(N);
- StringRef FN = DIUnit.getFilename();
CompilationDir = DIUnit.getDirectory();
DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
DIUnit.getLanguage(), Die, Asm,
this, &SkeletonHolder);
- // FIXME: This should be the .dwo file.
- NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, FN);
- // FIXME: We also need DW_AT_addr_base and DW_AT_dwo_id.
+ NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name,
+ DIUnit.getSplitDebugFilename());
+
+ // This should be a unique identifier when we want to build .dwp files.
+ NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+
+ // FIXME: The addr base should be relative for each compile unit, however,
+ // this one is going to be 0 anyhow.
+ NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0);
// 2.17.1 requires that we use DW_AT_low_pc for a single entry point
- // into an entity.
+ // into an entity. We're using 0, or a NULL label for this.
NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
+
// DW_AT_stmt_list is a offset of line number information for this
// compile unit in debug_line section.
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
- Asm->GetTempSymbol("section_line"));
+ NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset,
+ DwarfLineSectionSym);
else
- NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+ NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0);
if (!CompilationDir.empty())
NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
SkeletonHolder.addUnit(NewCU);
+ SkeletonCUs.push_back(NewCU);
return NewCU;
}
-void DwarfDebug::emitSkeletonCU(const MCSection *Section) {
- Asm->OutStreamer.SwitchSection(Section);
- DIE *Die = SkeletonCU->getCUDie();
-
- // Emit the compile units header.
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(Section->getLabelBeginName(),
- SkeletonCU->getUniqueID()));
-
- // Emit size of content not including length itself
- unsigned ContentSize = Die->getSize() +
- sizeof(int16_t) + // DWARF version number
- sizeof(int32_t) + // Offset Into Abbrev. Section
- sizeof(int8_t); // Pointer Size (in bytes)
-
- Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
- Asm->EmitInt32(ContentSize);
- Asm->OutStreamer.AddComment("DWARF version number");
- Asm->EmitInt16(dwarf::DWARF_VERSION);
- Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
-
- const MCSection *ASec = Asm->getObjFileLowering().getDwarfAbbrevSection();
- Asm->EmitSectionOffset(Asm->GetTempSymbol(ASec->getLabelBeginName()),
- DwarfAbbrevSectionSym);
- Asm->OutStreamer.AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
-
- emitDIE(Die, &SkeletonAbbrevs);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(Section->getLabelEndName(),
- SkeletonCU->getUniqueID()));
-}
-
void DwarfDebug::emitSkeletonAbbrevs(const MCSection *Section) {
assert(useSplitDwarf() && "No split dwarf debug info?");
emitAbbrevs(Section, &SkeletonAbbrevs);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 1e471f7..7b56815 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -195,6 +195,10 @@ public:
typedef StringMap<std::pair<MCSymbol*, unsigned>,
BumpPtrAllocator&> StrPool;
+// A Symbol->pair<Symbol, unsigned> mapping of addresses used by indirect
+// references.
+typedef DenseMap<MCSymbol *, std::pair<MCSymbol *, unsigned> > AddrPool;
+
/// \brief Collects and handles information specific to a particular
/// collection of units.
class DwarfUnits {
@@ -215,12 +219,17 @@ class DwarfUnits {
unsigned NextStringPoolNumber;
std::string StringPref;
+ // Collection of addresses for this unit and assorted labels.
+ AddrPool AddressPool;
+ unsigned NextAddrPoolNumber;
+
public:
DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS,
std::vector<DIEAbbrev *> *A, const char *Pref,
BumpPtrAllocator &DA) :
Asm(AP), AbbreviationsSet(AS), Abbreviations(A),
- StringPool(DA), NextStringPoolNumber(0), StringPref(Pref) {}
+ StringPool(DA), NextStringPoolNumber(0), StringPref(Pref),
+ AddressPool(), NextAddrPoolNumber(0) {}
/// \brief Compute the size and offset of a DIE given an incoming Offset.
unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
@@ -242,6 +251,9 @@ public:
/// \brief Emit all of the strings to the section given.
void emitStrings(const MCSection *, const MCSection *, const MCSymbol *);
+ /// \brief Emit all of the addresses to the section given.
+ void emitAddresses(const MCSection *);
+
/// \brief Returns the entry into the start of the pool.
MCSymbol *getStringPoolSym();
@@ -255,6 +267,13 @@ public:
/// \brief Returns the string pool.
StrPool *getStringPool() { return &StringPool; }
+
+ /// \brief Returns the index into the address pool with the given
+ /// label/symbol.
+ unsigned getAddrPoolIndex(MCSymbol *);
+
+ /// \brief Returns the address pool.
+ AddrPool *getAddrPool() { return &AddressPool; }
};
/// \brief Collects and handles dwarf debug information.
@@ -367,7 +386,7 @@ class DwarfDebug {
// section offsets and are created by EmitSectionLabels.
MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
- MCSymbol *DwarfDebugLocSectionSym;
+ MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym;
MCSymbol *FunctionBeginSym, *FunctionEndSym;
MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym;
@@ -396,8 +415,8 @@ class DwarfDebug {
// original object file, rather than things that are meant
// to be in the .dwo sections.
- // The CU left in the original object file for separated debug info.
- CompileUnit *SkeletonCU;
+ // The CUs left in the original object file for separated debug info.
+ SmallVector<CompileUnit *, 1> SkeletonCUs;
// Used to uniquely define abbreviations for the skeleton emission.
FoldingSet<DIEAbbrev> SkeletonAbbrevSet;
@@ -481,6 +500,9 @@ private:
/// \brief Emit type dies into a hashed accelerator table.
void emitAccelTypes();
+ /// \brief Emit visible names into a debug pubnames section.
+ void emitDebugPubnames();
+
/// \brief Emit visible types into a debug pubtypes section.
void emitDebugPubTypes();
@@ -508,9 +530,6 @@ private:
/// section.
CompileUnit *constructSkeletonCU(const MDNode *);
- /// \brief Emit the local split debug info section.
- void emitSkeletonCU(const MCSection *);
-
/// \brief Emit the local split abbreviations.
void emitSkeletonAbbrevs(const MCSection *);
@@ -560,7 +579,7 @@ private:
}
/// \brief Return Label preceding the instruction.
- const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+ MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
/// \brief Ensure that a label will be emitted after MI.
void requestLabelAfterInsn(const MachineInstr *MI) {
@@ -568,7 +587,7 @@ private:
}
/// \brief Return Label immediately following the instruction.
- const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+ MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
public:
//===--------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 975bb94..7133458 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -33,7 +33,6 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -608,7 +607,7 @@ void DwarfException::EmitExceptionTable() {
if (!S.PadLabel) {
if (VerboseAsm)
Asm->OutStreamer.AddComment(" has no landing pad");
- Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ Asm->OutStreamer.EmitIntValue(0, 4/*size*/);
} else {
if (VerboseAsm)
Asm->OutStreamer.AddComment(Twine(" jumps to ") +
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index b802853..98177c0 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -100,7 +100,7 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
EmitCamlGlobal(getModule(), AP, "data_end");
// FIXME: Why does ocaml emit this??
- AP.OutStreamer.EmitIntValue(0, IntPtrSize, 0);
+ AP.OutStreamer.EmitIntValue(0, IntPtrSize);
AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
EmitCamlGlobal(getModule(), AP, "frametable");
@@ -145,7 +145,7 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
"Live root count "+Twine(LiveCount)+" >= 65536.");
}
- AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0);
+ AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize);
AP.EmitInt16(FrameSize);
AP.EmitInt16(LiveCount);
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index cb25674..1561012 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -33,7 +33,6 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index c27e081..e8b5b4f 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
namespace {
class BasicTTI : public ImmutablePass, public TargetTransformInfo {
- const TargetLowering *TLI;
+ const TargetLoweringBase *TLI;
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the result needs to be inserted and/or extracted from vectors.
@@ -37,7 +37,7 @@ public:
llvm_unreachable("This pass cannot be directly constructed");
}
- BasicTTI(const TargetLowering *TLI) : ImmutablePass(ID), TLI(TLI) {
+ BasicTTI(const TargetLoweringBase *TLI) : ImmutablePass(ID), TLI(TLI) {
initializeBasicTTIPass(*PassRegistry::getPassRegistry());
}
@@ -83,6 +83,8 @@ public:
/// @{
virtual unsigned getNumberOfRegisters(bool Vector) const;
+ virtual unsigned getMaximumUnrollFactor() const;
+ virtual unsigned getRegisterBitWidth(bool Vector) const;
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
int Index, Type *SubTp) const;
@@ -99,6 +101,7 @@ public:
virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
ArrayRef<Type*> Tys) const;
virtual unsigned getNumberOfParts(Type *Tp) const;
+ virtual unsigned getAddressComputationCost(Type *Ty) const;
/// @}
};
@@ -110,7 +113,7 @@ INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti",
char BasicTTI::ID = 0;
ImmutablePass *
-llvm::createBasicTargetTransformInfoPass(const TargetLowering *TLI) {
+llvm::createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI) {
return new BasicTTI(TLI);
}
@@ -126,7 +129,7 @@ bool BasicTTI::isLegalICmpImmediate(int64_t imm) const {
bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale) const {
- TargetLowering::AddrMode AM;
+ TargetLoweringBase::AddrMode AM;
AM.BaseGV = BaseGV;
AM.BaseOffs = BaseOffset;
AM.HasBaseReg = HasBaseReg;
@@ -182,6 +185,14 @@ unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
return 1;
}
+unsigned BasicTTI::getRegisterBitWidth(bool Vector) const {
+ return 32;
+}
+
+unsigned BasicTTI::getMaximumUnrollFactor() const {
+ return 1;
+}
+
unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
// Check if any of the operands are vector operands.
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -231,6 +242,27 @@ unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src);
std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst);
+ // Check for NOOP conversions.
+ if (SrcLT.first == DstLT.first &&
+ SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
+
+ // Bitcast between types that are legalized to the same type are free.
+ if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
+ return 0;
+ }
+
+ if (Opcode == Instruction::Trunc &&
+ TLI->isTruncateFree(SrcLT.second, DstLT.second))
+ return 0;
+
+ if (Opcode == Instruction::ZExt &&
+ TLI->isZExtFree(SrcLT.second, DstLT.second))
+ return 0;
+
+ // If the cast is marked as legal (or promote) then assume low cost.
+ if (TLI->isOperationLegalOrPromote(ISD, DstLT.second))
+ return 1;
+
// Handle scalar conversions.
if (!Src->isVectorTy() && !Dst->isVectorTy()) {
@@ -238,14 +270,6 @@ unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
if (Opcode == Instruction::BitCast)
return 0;
- if (Opcode == Instruction::Trunc &&
- TLI->isTruncateFree(SrcLT.second, DstLT.second))
- return 0;
-
- if (Opcode == Instruction::ZExt &&
- TLI->isZExtFree(SrcLT.second, DstLT.second))
- return 0;
-
// Just check the op cost. If the operation is legal then assume it costs 1.
if (!TLI->isOperationExpand(ISD, DstLT.second))
return 1;
@@ -261,10 +285,6 @@ unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
if (SrcLT.first == DstLT.first &&
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
- // Bitcast between types that are legalized to the same type are free.
- if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
- return 0;
-
// Assume that Zext is done using AND.
if (Opcode == Instruction::ZExt)
return 1;
@@ -381,3 +401,7 @@ unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
return LT.first;
}
+
+unsigned BasicTTI::getAddressComputationCost(Type *Ty) const {
+ return 0;
+}
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index d5f3932..ddc7ada 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -9,8 +9,8 @@ add_llvm_library(LLVMCodeGen
CodeGen.cpp
CodePlacementOpt.cpp
CriticalAntiDepBreaker.cpp
- DeadMachineInstructionElim.cpp
DFAPacketizer.cpp
+ DeadMachineInstructionElim.cpp
DwarfEHPrepare.cpp
EarlyIfConversion.cpp
EdgeBundles.cpp
@@ -32,21 +32,20 @@ add_llvm_library(LLVMCodeGen
LiveInterval.cpp
LiveIntervalAnalysis.cpp
LiveIntervalUnion.cpp
+ LiveRangeCalc.cpp
+ LiveRangeEdit.cpp
LiveRegMatrix.cpp
LiveStackAnalysis.cpp
LiveVariables.cpp
- LiveRangeCalc.cpp
- LiveRangeEdit.cpp
LocalStackSlotAllocation.cpp
MachineBasicBlock.cpp
MachineBlockFrequencyInfo.cpp
MachineBlockPlacement.cpp
MachineBranchProbabilityInfo.cpp
+ MachineCSE.cpp
MachineCodeEmitter.cpp
MachineCopyPropagation.cpp
- MachineCSE.cpp
MachineDominators.cpp
- MachinePostDominators.cpp
MachineFunction.cpp
MachineFunctionAnalysis.cpp
MachineFunctionPass.cpp
@@ -58,6 +57,7 @@ add_llvm_library(LLVMCodeGen
MachineModuleInfo.cpp
MachineModuleInfoImpls.cpp
MachinePassRegistry.cpp
+ MachinePostDominators.cpp
MachineRegisterInfo.cpp
MachineSSAUpdater.cpp
MachineScheduler.cpp
@@ -91,16 +91,17 @@ add_llvm_library(LLVMCodeGen
ShrinkWrapping.cpp
SjLjEHPrepare.cpp
SlotIndexes.cpp
- Spiller.cpp
SpillPlacement.cpp
+ Spiller.cpp
SplitKit.cpp
+ StackColoring.cpp
StackProtector.cpp
StackSlotColoring.cpp
- StackColoring.cpp
StrongPHIElimination.cpp
TailDuplication.cpp
TargetFrameLoweringImpl.cpp
TargetInstrInfo.cpp
+ TargetLoweringBase.cpp
TargetLoweringObjectFileImpl.cpp
TargetOptionsImpl.cpp
TargetRegisterInfo.cpp
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 48105d9..0eb74a4 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -57,23 +57,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn());
- // Determine the live-out physregs for this block.
- if (IsReturnBlock) {
- // In a return block, examine the function live-out regs.
- for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
- E = MRI.liveout_end(); I != E; ++I) {
- for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
- unsigned Reg = *AI;
- Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BBSize;
- DefIndices[Reg] = ~0u;
- }
- }
- }
-
- // In a non-return block, examine the live-in regs of all successors.
- // Note a return block can have successors if the return instruction is
- // predicated.
+ // Examine the live-in regs of all successors.
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
@@ -371,12 +355,13 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
return false;
}
-unsigned
-CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
- RegRefIter RegRefEnd,
- unsigned AntiDepReg,
- unsigned LastNewReg,
- const TargetRegisterClass *RC)
+unsigned CriticalAntiDepBreaker::
+findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned AntiDepReg,
+ unsigned LastNewReg,
+ const TargetRegisterClass *RC,
+ SmallVector<unsigned, 2> &Forbid)
{
ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
for (unsigned i = 0; i != Order.size(); ++i) {
@@ -401,6 +386,15 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
KillIndices[AntiDepReg] > DefIndices[NewReg])
continue;
+ // If NewReg overlaps any of the forbidden registers, we can't use it.
+ bool Forbidden = false;
+ for (SmallVector<unsigned, 2>::iterator it = Forbid.begin(),
+ ite = Forbid.end(); it != ite; ++it)
+ if (TRI->regsOverlap(NewReg, *it)) {
+ Forbidden = true;
+ break;
+ }
+ if (Forbidden) continue;
return NewReg;
}
@@ -564,6 +558,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
PrescanInstruction(MI);
+ SmallVector<unsigned, 2> ForbidRegs;
+
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
// defined in a call must not be changed (ABI).
@@ -574,7 +570,9 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
AntiDepReg = 0;
else if (AntiDepReg) {
// If this instruction has a use of AntiDepReg, breaking it
- // is invalid.
+ // is invalid. If the instruction defines other registers,
+ // save a list of them so that we don't pick a new register
+ // that overlaps any of them.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
@@ -584,6 +582,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
AntiDepReg = 0;
break;
}
+ if (MO.isDef() && Reg != AntiDepReg)
+ ForbidRegs.push_back(Reg);
}
}
@@ -606,7 +606,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
AntiDepReg,
LastNewReg[AntiDepReg],
- RC)) {
+ RC, ForbidRegs)) {
DEBUG(dbgs() << "Breaking anti-dependence edge on "
<< TRI->getName(AntiDepReg)
<< " with " << RegRefs.count(AntiDepReg) << " references"
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 8fb2b0e..df13dd3 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -102,7 +102,8 @@ class TargetRegisterInfo;
RegRefIter RegRefEnd,
unsigned AntiDepReg,
unsigned LastNewReg,
- const TargetRegisterClass *RC);
+ const TargetRegisterClass *RC,
+ SmallVector<unsigned, 2> &Forbid);
};
}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index a526d24..a54217f 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -99,15 +99,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// Start out assuming that reserved registers are live out of this block.
LivePhysRegs = MRI->getReservedRegs();
- // Also add any explicit live-out physregs for this block.
- if (!MBB->empty() && MBB->back().isReturn())
- for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
- LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
- unsigned Reg = *LOI;
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- LivePhysRegs.set(Reg);
- }
-
// Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
// live across blocks, but some targets (x86) can have flags live out of a
// block.
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 4cafa96..f27ec77 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -33,7 +33,7 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered");
namespace {
class DwarfEHPrepare : public FunctionPass {
const TargetMachine *TM;
- const TargetLowering *TLI;
+ const TargetLoweringBase *TLI;
// RewindFunction - _Unwind_Resume or the target equivalent.
Constant *RewindFunction;
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index f332925..fac207e 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -17,8 +17,6 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "early-ifcvt"
-#include "llvm/CodeGen/Passes.h"
-#include "MachineTraceMetrics.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
@@ -31,6 +29,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index 0b9e83d..1611db8 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -49,8 +49,6 @@ private:
bool LowerSubregToReg(MachineInstr *MI);
bool LowerCopy(MachineInstr *MI);
- void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
- const TargetRegisterInfo *TRI);
void TransferImplicitDefs(MachineInstr *MI);
};
} // end anonymous namespace
@@ -61,21 +59,6 @@ char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
INITIALIZE_PASS(ExpandPostRA, "postrapseudos",
"Post-RA pseudo instruction expansion pass", false, false)
-/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
-/// and the lowered replacement instructions immediately precede it.
-/// Mark the replacement instructions with the dead flag.
-void
-ExpandPostRA::TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
- const TargetRegisterInfo *TRI) {
- for (MachineBasicBlock::iterator MII =
- prior(MachineBasicBlock::iterator(MI)); ; --MII) {
- if (MII->addRegisterDead(DstReg, TRI))
- break;
- assert(MII != MI->getParent()->begin() &&
- "copyPhysReg output doesn't reference destination register!");
- }
-}
-
/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered
/// replacement instructions immediately precede it. Copy any implicit-def
/// operands from MI to the replacement instruction.
@@ -114,6 +97,12 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
+ if (MI->allDefsAreDead()) {
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "subreg: replaced by: " << *MI);
+ return true;
+ }
+
if (DstSubReg == InsReg) {
// No need to insert an identify copy instruction.
// Watch out for case like this:
@@ -135,10 +124,6 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
MachineBasicBlock::iterator CopyMI = MI;
--CopyMI;
CopyMI->addRegisterDefined(DstReg);
-
- // Transfer the kill/dead flags, if needed.
- if (MI->getOperand(0).isDead())
- TransferDeadFlag(MI, DstSubReg, TRI);
DEBUG(dbgs() << "subreg: " << *CopyMI);
}
@@ -148,6 +133,14 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
}
bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
+
+ if (MI->allDefsAreDead()) {
+ DEBUG(dbgs() << "dead copy: " << *MI);
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+ }
+
MachineOperand &DstMO = MI->getOperand(0);
MachineOperand &SrcMO = MI->getOperand(1);
@@ -155,7 +148,7 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
DEBUG(dbgs() << "identity copy: " << *MI);
// No need to insert an identity copy instruction, but replace with a KILL
// if liveness is changed.
- if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) {
+ if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
// We must make sure the super-register gets killed. Replace the
// instruction with KILL.
MI->setDesc(TII->get(TargetOpcode::KILL));
@@ -171,8 +164,6 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
- if (DstMO.isDead())
- TransferDeadFlag(MI, DstMO.getReg(), TRI);
if (MI->getNumOperands() > 2)
TransferImplicitDefs(MI);
DEBUG({
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index a6a06e4..ef5247c 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -37,21 +37,9 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const;
bool runOnFunction(Function &F);
- };
-
- class Deleter : public FunctionPass {
- static char ID;
-
- public:
- Deleter();
-
- const char *getPassName() const;
- void getAnalysisUsage(AnalysisUsage &AU) const;
-
- bool runOnFunction(Function &F);
bool doFinalization(Module &M);
};
-
+
}
INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
@@ -182,32 +170,9 @@ bool Printer::runOnFunction(Function &F) {
return false;
}
-// -----------------------------------------------------------------------------
-
-char Deleter::ID = 0;
-
-FunctionPass *llvm::createGCInfoDeleter() {
- return new Deleter();
-}
-
-Deleter::Deleter() : FunctionPass(ID) {}
-
-const char *Deleter::getPassName() const {
- return "Delete Garbage Collector Information";
-}
-
-void Deleter::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<GCModuleInfo>();
-}
-
-bool Deleter::runOnFunction(Function &MF) {
- return false;
-}
-
-bool Deleter::doFinalization(Module &M) {
+bool Printer::doFinalization(Module &M) {
GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
- assert(GMI && "Deleter didn't require GCModuleInfo?!");
+ assert(GMI && "Printer didn't require GCModuleInfo?!");
GMI->clear();
return false;
}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 8906991..9958d7d 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -151,7 +151,7 @@ namespace {
/// basic block number.
std::vector<BBInfo> BBAnalysis;
- const TargetLowering *TLI;
+ const TargetLoweringBase *TLI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const InstrItineraryData *InstrItins;
@@ -1557,7 +1557,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
if (Succ == FallThrough)
continue;
FromBBI.BB->removeSuccessor(Succ);
- if (AddEdges)
+ if (AddEdges && !ToBBI.BB->isSuccessor(Succ))
ToBBI.BB->addSuccessor(Succ);
}
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 16e7968..07f0ccf 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -413,30 +413,22 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
}
case Intrinsic::stacksave:
+ case Intrinsic::stackrestore: {
if (!Warned)
- Context.emitWarning("this target does not support the "
- "llvm.stacksave intrinsic");
- Warned = true;
- CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
- break;
-
- case Intrinsic::stackrestore:
- if (!Warned)
- Context.emitWarning("this target does not support the "
- "llvm.stackrestore intrinsic");
+ errs() << "WARNING: this target does not support the llvm.stack"
+ << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+ "save" : "restore") << " intrinsic.\n";
Warned = true;
+ if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
break;
+ }
case Intrinsic::returnaddress:
- Context.emitWarning("this target does not support the "
- "llvm.returnaddress intrinsic");
- CI->replaceAllUsesWith(ConstantPointerNull::get(
- cast<PointerType>(CI->getType())));
- break;
-
case Intrinsic::frameaddress:
- Context.emitWarning("this target does not support the "
- "llvm.frameaddress intrinsic");
+ errs() << "WARNING: this target does not support the llvm."
+ << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+ "return" : "frame") << "address intrinsic.\n";
CI->replaceAllUsesWith(ConstantPointerNull::get(
cast<PointerType>(CI->getType())));
break;
@@ -446,12 +438,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
case Intrinsic::pcmarker:
break; // Simply strip out pcmarker on unsupported architectures
- case Intrinsic::readcyclecounter:
- Context.emitWarning("this target does not support the "
- "llvm.readcyclecounter intrinsic; "
- "it is being lowered to a constant 0");
+ case Intrinsic::readcyclecounter: {
+ errs() << "WARNING: this target does not support the llvm.readcyclecoun"
+ << "ter intrinsic. It is being lowered to a constant 0\n";
CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
break;
+ }
case Intrinsic::dbg_declare:
break; // Simply strip out debugging intrinsics
diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt
index fee0347..81ef1aa 100644
--- a/lib/CodeGen/LLVMBuild.txt
+++ b/lib/CodeGen/LLVMBuild.txt
@@ -22,4 +22,4 @@ subdirectories = AsmPrinter SelectionDAG
type = Library
name = CodeGen
parent = Libraries
-required_libraries = Analysis Core MC Scalar Support Target TransformUtils
+required_libraries = Analysis Core MC Scalar Support Target TransformUtils ObjCARC
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 12cd2d1..1a09837 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -226,7 +226,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
PM.add(Printer);
- PM.add(createGCInfoDeleter());
return false;
}
@@ -245,7 +244,6 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
return true;
addCodeEmitter(PM, JCE);
- PM.add(createGCInfoDeleter());
return false; // success!
}
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 3c01d91..8172154 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -314,24 +314,22 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
void LexicalScope::anchor() { }
/// dump - Print data structures.
-void LexicalScope::dump() const {
+void LexicalScope::dump(unsigned Indent) const {
#ifndef NDEBUG
raw_ostream &err = dbgs();
- err.indent(IndentLevel);
+ err.indent(Indent);
err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
const MDNode *N = Desc;
+ err.indent(Indent);
N->dump();
if (AbstractScope)
- err << "Abstract Scope\n";
+ err << std::string(Indent, ' ') << "Abstract Scope\n";
- IndentLevel += 2;
if (!Children.empty())
- err << "Children ...\n";
+ err << std::string(Indent + 2, ' ') << "Children ...\n";
for (unsigned i = 0, e = Children.size(); i != e; ++i)
if (Children[i] != this)
- Children[i]->dump();
-
- IndentLevel -= 2;
+ Children[i]->dump(Indent + 2);
#endif
}
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 786f353..0b117ac 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -247,10 +247,6 @@ public:
LiveIntervals &LIS, MachineDominatorTree &MDT,
UserValueScopes &UVS);
- /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx.
- void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
- const TargetRegisterInfo *TRI);
-
/// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
/// live. Returns true if any changes were made.
bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
@@ -259,7 +255,7 @@ public:
/// provided virtual register map.
void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
- /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+ /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
void emitDebugValues(VirtRegMap *VRM,
LiveIntervals &LIS, const TargetInstrInfo &TRI);
@@ -286,6 +282,11 @@ class LDVImpl {
MachineDominatorTree *MDT;
const TargetRegisterInfo *TRI;
+ /// Whether emitDebugValues is called.
+ bool EmitDone;
+ /// Whether the machine function is modified during the pass.
+ bool ModifiedMF;
+
/// userValues - All allocated UserValue instances.
SmallVector<UserValue*, 8> userValues;
@@ -320,27 +321,30 @@ class LDVImpl {
void computeIntervals();
public:
- LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
+ LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false),
+ ModifiedMF(false) {}
bool runOnMachineFunction(MachineFunction &mf);
- /// clear - Relase all memory.
+ /// clear - Release all memory.
void clear() {
DeleteContainerPointers(userValues);
userValues.clear();
virtRegToEqClass.clear();
userVarMap.clear();
+ // Make sure we call emitDebugValues if the machine function was modified.
+ assert((!ModifiedMF || EmitDone) &&
+ "Dbg values are not emitted in LDV");
+ EmitDone = false;
+ ModifiedMF = false;
}
/// mapVirtReg - Map virtual register to an equivalence class.
void mapVirtReg(unsigned VirtReg, UserValue *EC);
- /// renameRegister - Replace all references to OldReg with NewReg:SubIdx.
- void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
-
/// splitRegister - Replace all references to OldReg with NewRegs.
void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
- /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+ /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
void emitDebugValues(VirtRegMap *VRM);
void print(raw_ostream&);
@@ -693,6 +697,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
computeIntervals();
DEBUG(print(dbgs()));
LS.releaseMemory();
+ ModifiedMF = Changed;
return Changed;
}
@@ -714,45 +719,6 @@ LiveDebugVariables::~LiveDebugVariables() {
delete static_cast<LDVImpl*>(pImpl);
}
-void UserValue::
-renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
- const TargetRegisterInfo *TRI) {
- for (unsigned i = locations.size(); i; --i) {
- unsigned LocNo = i - 1;
- MachineOperand &Loc = locations[LocNo];
- if (!Loc.isReg() || Loc.getReg() != OldReg)
- continue;
- if (TargetRegisterInfo::isPhysicalRegister(NewReg))
- Loc.substPhysReg(NewReg, *TRI);
- else
- Loc.substVirtReg(NewReg, SubIdx, *TRI);
- coalesceLocation(LocNo);
- }
-}
-
-void LDVImpl::
-renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
- UserValue *UV = lookupVirtReg(OldReg);
- if (!UV)
- return;
-
- if (TargetRegisterInfo::isVirtualRegister(NewReg))
- mapVirtReg(NewReg, UV);
- if (OldReg != NewReg)
- virtRegToEqClass.erase(OldReg);
-
- do {
- UV->renameRegister(OldReg, NewReg, SubIdx, TRI);
- UV = UV->getNext();
- } while (UV);
-}
-
-void LiveDebugVariables::
-renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
- if (pImpl)
- static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx);
-}
-
//===----------------------------------------------------------------------===//
// Live Range Splitting
//===----------------------------------------------------------------------===//
@@ -1011,6 +977,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
userValues[i]->rewriteLocations(*VRM, *TRI);
userValues[i]->emitDebugValues(VRM, *LIS, *TII);
}
+ EmitDone = true;
}
void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 68f4b16..dccd847 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -440,7 +440,7 @@ void LiveInterval::join(LiveInterval &Other,
iterator OutIt = begin();
OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
- for (iterator I = next(OutIt), E = end(); I != E; ++I) {
+ for (iterator I = llvm::next(OutIt), E = end(); I != E; ++I) {
VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]];
assert(nextValNo != 0 && "Huh?");
@@ -464,10 +464,12 @@ void LiveInterval::join(LiveInterval &Other,
ranges.erase(OutIt, end());
}
- // Remember assignements because val# ids are changing.
- SmallVector<unsigned, 16> OtherAssignments;
+ // Rewrite Other values before changing the VNInfo ids.
+ // This can leave Other in an invalid state because we're not coalescing
+ // touching segments that now have identical values. That's OK since Other is
+ // not supposed to be valid after calling join();
for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
- OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]);
+ I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]];
// Update val# info. Renumber them and make sure they all belong to this
// LiveInterval now. Also remove dead val#'s.
@@ -486,148 +488,9 @@ void LiveInterval::join(LiveInterval &Other,
valnos.resize(NumNewVals); // shrinkify
// Okay, now insert the RHS live ranges into the LHS.
- unsigned RangeNo = 0;
- for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) {
- // Map the valno in the other live range to the current live range.
- I->valno = NewVNInfo[OtherAssignments[RangeNo]];
- assert(I->valno && "Adding a dead range?");
- }
- mergeIntervalRanges(Other);
-
- verify();
-}
-
-/// \brief Helper function for merging in another LiveInterval's ranges.
-///
-/// This is a helper routine implementing an efficient merge of another
-/// LiveIntervals ranges into the current interval.
-///
-/// \param LHSValNo If non-NULL, set as the new value number for every range
-/// from RHS which is merged into the LHS.
-/// \param RHSValNo If non-NULL, then only ranges in RHS whose original value
-/// number maches this value number will be merged into LHS.
-void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS,
- VNInfo *LHSValNo,
- const VNInfo *RHSValNo) {
- if (RHS.empty())
- return;
-
- // Ensure we're starting with a valid range. Note that we don't verify RHS
- // because it may have had its value numbers adjusted in preparation for
- // merging.
- verify();
-
- // The strategy for merging these efficiently is as follows:
- //
- // 1) Find the beginning of the impacted ranges in the LHS.
- // 2) Create a new, merged sub-squence of ranges merging from the position in
- // #1 until either LHS or RHS is exhausted. Any part of LHS between RHS
- // entries being merged will be copied into this new range.
- // 3) Replace the relevant section in LHS with these newly merged ranges.
- // 4) Append any remaning ranges from RHS if LHS is exhausted in #2.
- //
- // We don't follow the typical in-place merge strategy for sorted ranges of
- // appending the new ranges to the back and then using std::inplace_merge
- // because one step of the merge can both mutate the original elements and
- // remove elements from the original. Essentially, because the merge includes
- // collapsing overlapping ranges, a more complex approach is required.
-
- // We do an initial binary search to optimize for a common pattern: a large
- // LHS, and a very small RHS.
- const_iterator RI = RHS.begin(), RE = RHS.end();
- iterator LE = end(), LI = std::upper_bound(begin(), LE, *RI);
-
- // Merge into NewRanges until one of the ranges is exhausted.
- SmallVector<LiveRange, 4> NewRanges;
-
- // Keep track of where to begin the replacement.
- iterator ReplaceI = LI;
-
- // If there are preceding ranges in the LHS, put the last one into NewRanges
- // so we can optionally extend it. Adjust the replacement point accordingly.
- if (LI != begin()) {
- ReplaceI = llvm::prior(LI);
- NewRanges.push_back(*ReplaceI);
- }
-
- // Now loop over the mergable portions of both LHS and RHS, merging into
- // NewRanges.
- while (LI != LE && RI != RE) {
- // Skip incoming ranges with the wrong value.
- if (RHSValNo && RI->valno != RHSValNo) {
- ++RI;
- continue;
- }
-
- // Select the first range. We pick the earliest start point, and then the
- // largest range.
- LiveRange R = *LI;
- if (*RI < R) {
- R = *RI;
- ++RI;
- if (LHSValNo)
- R.valno = LHSValNo;
- } else {
- ++LI;
- }
-
- if (NewRanges.empty()) {
- NewRanges.push_back(R);
- continue;
- }
-
- LiveRange &LastR = NewRanges.back();
- if (R.valno == LastR.valno) {
- // Try to merge this range into the last one.
- if (R.start <= LastR.end) {
- LastR.end = std::max(LastR.end, R.end);
- continue;
- }
- } else {
- // We can't merge ranges across a value number.
- assert(R.start >= LastR.end &&
- "Cannot overlap two LiveRanges with differing ValID's");
- }
-
- // If all else fails, just append the range.
- NewRanges.push_back(R);
- }
- assert(RI == RE || LI == LE);
-
- // Check for being able to merge into the trailing sequence of ranges on the LHS.
- if (!NewRanges.empty())
- for (; LI != LE && (LI->valno == NewRanges.back().valno &&
- LI->start <= NewRanges.back().end);
- ++LI)
- NewRanges.back().end = std::max(NewRanges.back().end, LI->end);
-
- // Replace the ranges in the LHS with the newly merged ones. It would be
- // really nice if there were a move-supporting 'replace' directly in
- // SmallVector, but as there is not, we pay the price of copies to avoid
- // wasted memory allocations.
- SmallVectorImpl<LiveRange>::iterator NRI = NewRanges.begin(),
- NRE = NewRanges.end();
- for (; ReplaceI != LI && NRI != NRE; ++ReplaceI, ++NRI)
- *ReplaceI = *NRI;
- if (NRI == NRE)
- ranges.erase(ReplaceI, LI);
- else
- ranges.insert(LI, NRI, NRE);
-
- // And finally insert any trailing end of RHS (if we have one).
- for (; RI != RE; ++RI) {
- LiveRange R = *RI;
- if (LHSValNo)
- R.valno = LHSValNo;
- if (!ranges.empty() &&
- ranges.back().valno == R.valno && R.start <= ranges.back().end)
- ranges.back().end = std::max(ranges.back().end, R.end);
- else
- ranges.push_back(R);
- }
-
- // Ensure we finished with a valid new sequence of ranges.
- verify();
+ LiveRangeUpdater Updater(this);
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
+ Updater.add(*I);
}
/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
@@ -636,7 +499,9 @@ void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS,
/// the overlapping LiveRanges have the specified value number.
void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
VNInfo *LHSValNo) {
- mergeIntervalRanges(RHS, LHSValNo);
+ LiveRangeUpdater Updater(this);
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
+ Updater.add(I->start, I->end, LHSValNo);
}
/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
@@ -647,7 +512,10 @@ void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
const VNInfo *RHSValNo,
VNInfo *LHSValNo) {
- mergeIntervalRanges(RHS, LHSValNo, RHSValNo);
+ LiveRangeUpdater Updater(this);
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
+ if (I->valno == RHSValNo)
+ Updater.add(I->start, I->end, LHSValNo);
}
/// MergeValueNumberInto - This method is called when two value nubmers
@@ -785,6 +653,206 @@ void LiveRange::print(raw_ostream &os) const {
os << *this;
}
+//===----------------------------------------------------------------------===//
+// LiveRangeUpdater class
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeUpdater class always maintains these invariants:
+//
+// - When LastStart is invalid, Spills is empty and the iterators are invalid.
+// This is the initial state, and the state created by flush().
+// In this state, isDirty() returns false.
+//
+// Otherwise, segments are kept in three separate areas:
+//
+// 1. [begin; WriteI) at the front of LI.
+// 2. [ReadI; end) at the back of LI.
+// 3. Spills.
+//
+// - LI.begin() <= WriteI <= ReadI <= LI.end().
+// - Segments in all three areas are fully ordered and coalesced.
+// - Segments in area 1 precede and can't coalesce with segments in area 2.
+// - Segments in Spills precede and can't coalesce with segments in area 2.
+// - No coalescing is possible between segments in Spills and segments in area
+// 1, and there are no overlapping segments.
+//
+// The segments in Spills are not ordered with respect to the segments in area
+// 1. They need to be merged.
+//
+// When they exist, Spills.back().start <= LastStart,
+// and WriteI[-1].start <= LastStart.
+
+void LiveRangeUpdater::print(raw_ostream &OS) const {
+ if (!isDirty()) {
+ if (LI)
+ OS << "Clean " << PrintReg(LI->reg) << " updater: " << *LI << '\n';
+ else
+ OS << "Null updater.\n";
+ return;
+ }
+ assert(LI && "Can't have null LI in dirty updater.");
+ OS << PrintReg(LI->reg) << " updater with gap = " << (ReadI - WriteI)
+ << ", last start = " << LastStart
+ << ":\n Area 1:";
+ for (LiveInterval::const_iterator I = LI->begin(); I != WriteI; ++I)
+ OS << ' ' << *I;
+ OS << "\n Spills:";
+ for (unsigned I = 0, E = Spills.size(); I != E; ++I)
+ OS << ' ' << Spills[I];
+ OS << "\n Area 2:";
+ for (LiveInterval::const_iterator I = ReadI, E = LI->end(); I != E; ++I)
+ OS << ' ' << *I;
+ OS << '\n';
+}
+
+void LiveRangeUpdater::dump() const
+{
+ print(errs());
+}
+
+// Determine if A and B should be coalesced.
+static inline bool coalescable(const LiveRange &A, const LiveRange &B) {
+ assert(A.start <= B.start && "Unordered live ranges.");
+ if (A.end == B.start)
+ return A.valno == B.valno;
+ if (A.end < B.start)
+ return false;
+ assert(A.valno == B.valno && "Cannot overlap different values");
+ return true;
+}
+
+void LiveRangeUpdater::add(LiveRange Seg) {
+ assert(LI && "Cannot add to a null destination");
+
+ // Flush the state if Start moves backwards.
+ if (!LastStart.isValid() || LastStart > Seg.start) {
+ if (isDirty())
+ flush();
+ // This brings us to an uninitialized state. Reinitialize.
+ assert(Spills.empty() && "Leftover spilled segments");
+ WriteI = ReadI = LI->begin();
+ }
+
+ // Remember start for next time.
+ LastStart = Seg.start;
+
+ // Advance ReadI until it ends after Seg.start.
+ LiveInterval::iterator E = LI->end();
+ if (ReadI != E && ReadI->end <= Seg.start) {
+ // First try to close the gap between WriteI and ReadI with spills.
+ if (ReadI != WriteI)
+ mergeSpills();
+ // Then advance ReadI.
+ if (ReadI == WriteI)
+ ReadI = WriteI = LI->find(Seg.start);
+ else
+ while (ReadI != E && ReadI->end <= Seg.start)
+ *WriteI++ = *ReadI++;
+ }
+
+ assert(ReadI == E || ReadI->end > Seg.start);
+
+ // Check if the ReadI segment begins early.
+ if (ReadI != E && ReadI->start <= Seg.start) {
+ assert(ReadI->valno == Seg.valno && "Cannot overlap different values");
+ // Bail if Seg is completely contained in ReadI.
+ if (ReadI->end >= Seg.end)
+ return;
+ // Coalesce into Seg.
+ Seg.start = ReadI->start;
+ ++ReadI;
+ }
+
+ // Coalesce as much as possible from ReadI into Seg.
+ while (ReadI != E && coalescable(Seg, *ReadI)) {
+ Seg.end = std::max(Seg.end, ReadI->end);
+ ++ReadI;
+ }
+
+ // Try coalescing Spills.back() into Seg.
+ if (!Spills.empty() && coalescable(Spills.back(), Seg)) {
+ Seg.start = Spills.back().start;
+ Seg.end = std::max(Spills.back().end, Seg.end);
+ Spills.pop_back();
+ }
+
+ // Try coalescing Seg into WriteI[-1].
+ if (WriteI != LI->begin() && coalescable(WriteI[-1], Seg)) {
+ WriteI[-1].end = std::max(WriteI[-1].end, Seg.end);
+ return;
+ }
+
+ // Seg doesn't coalesce with anything, and needs to be inserted somewhere.
+ if (WriteI != ReadI) {
+ *WriteI++ = Seg;
+ return;
+ }
+
+ // Finally, append to LI or Spills.
+ if (WriteI == E) {
+ LI->ranges.push_back(Seg);
+ WriteI = ReadI = LI->ranges.end();
+ } else
+ Spills.push_back(Seg);
+}
+
+// Merge as many spilled segments as possible into the gap between WriteI
+// and ReadI. Advance WriteI to reflect the inserted instructions.
+void LiveRangeUpdater::mergeSpills() {
+ // Perform a backwards merge of Spills and [SpillI;WriteI).
+ size_t GapSize = ReadI - WriteI;
+ size_t NumMoved = std::min(Spills.size(), GapSize);
+ LiveInterval::iterator Src = WriteI;
+ LiveInterval::iterator Dst = Src + NumMoved;
+ LiveInterval::iterator SpillSrc = Spills.end();
+ LiveInterval::iterator B = LI->begin();
+
+ // This is the new WriteI position after merging spills.
+ WriteI = Dst;
+
+ // Now merge Src and Spills backwards.
+ while (Src != Dst) {
+ if (Src != B && Src[-1].start > SpillSrc[-1].start)
+ *--Dst = *--Src;
+ else
+ *--Dst = *--SpillSrc;
+ }
+ assert(NumMoved == size_t(Spills.end() - SpillSrc));
+ Spills.erase(SpillSrc, Spills.end());
+}
+
+void LiveRangeUpdater::flush() {
+ if (!isDirty())
+ return;
+ // Clear the dirty state.
+ LastStart = SlotIndex();
+
+ assert(LI && "Cannot add to a null destination");
+
+ // Nothing to merge?
+ if (Spills.empty()) {
+ LI->ranges.erase(WriteI, ReadI);
+ LI->verify();
+ return;
+ }
+
+ // Resize the WriteI - ReadI gap to match Spills.
+ size_t GapSize = ReadI - WriteI;
+ if (GapSize < Spills.size()) {
+ // The gap is too small. Make some room.
+ size_t WritePos = WriteI - LI->begin();
+ LI->ranges.insert(ReadI, Spills.size() - GapSize, LiveRange());
+ // This also invalidated ReadI, but it is recomputed below.
+ WriteI = LI->ranges.begin() + WritePos;
+ } else {
+ // Shrink the gap if necessary.
+ LI->ranges.erase(WriteI + Spills.size(), ReadI);
+ }
+ ReadI = WriteI + Spills.size();
+ mergeSpills();
+ LI->verify();
+}
+
unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
// Create initial equivalence classes.
EqClass.clear();
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 4198457..22b35d5 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -40,11 +40,6 @@
#include <limits>
using namespace llvm;
-// Switch to the new experimental algorithm for computing live intervals.
-static cl::opt<bool>
-NewLiveIntervals("new-live-intervals", cl::Hidden,
- cl::desc("Use new algorithm forcomputing live intervals"));
-
char LiveIntervals::ID = 0;
char &llvm::LiveIntervalsID = LiveIntervals::ID;
INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
@@ -60,6 +55,9 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
+ // LiveVariables isn't really required by this analysis, it is only required
+ // here to make sure it is live during TwoAddressInstructionPass and
+ // PHIElimination. This is temporary.
AU.addRequired<LiveVariables>();
AU.addPreserved<LiveVariables>();
AU.addPreservedID(MachineLoopInfoID);
@@ -105,7 +103,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
TRI = TM->getRegisterInfo();
TII = TM->getInstrInfo();
AA = &getAnalysis<AliasAnalysis>();
- LV = &getAnalysis<LiveVariables>();
Indexes = &getAnalysis<SlotIndexes>();
DomTree = &getAnalysis<MachineDominatorTree>();
if (!LRCalc)
@@ -114,16 +111,8 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
// Allocate space for all virtual registers.
VirtRegIntervals.resize(MRI->getNumVirtRegs());
- if (NewLiveIntervals) {
- // This is the new way of computing live intervals.
- // It is independent of LiveVariables, and it can run at any time.
- computeVirtRegs();
- computeRegMasks();
- } else {
- // This is the old way of computing live intervals.
- // It depends on LiveVariables.
- computeIntervals();
- }
+ computeVirtRegs();
+ computeRegMasks();
computeLiveInRegUnits();
DEBUG(dump());
@@ -165,298 +154,6 @@ void LiveIntervals::dumpInstrs() const {
}
#endif
-static
-bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
- unsigned Reg = MI.getOperand(MOIdx).getReg();
- for (unsigned i = MOIdx+1, e = MI.getNumOperands(); i < e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg())
- continue;
- if (MO.getReg() == Reg && MO.isDef()) {
- assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() &&
- MI.getOperand(MOIdx).getSubReg() &&
- (MO.getSubReg() || MO.isImplicit()));
- return true;
- }
- }
- return false;
-}
-
-/// isPartialRedef - Return true if the specified def at the specific index is
-/// partially re-defining the specified live interval. A common case of this is
-/// a definition of the sub-register.
-bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
- LiveInterval &interval) {
- if (!MO.getSubReg() || MO.isEarlyClobber())
- return false;
-
- SlotIndex RedefIndex = MIIdx.getRegSlot();
- const LiveRange *OldLR =
- interval.getLiveRangeContaining(RedefIndex.getRegSlot(true));
- MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
- if (DefMI != 0) {
- return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
- }
- return false;
-}
-
-void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
- MachineBasicBlock::iterator mi,
- SlotIndex MIIdx,
- MachineOperand& MO,
- unsigned MOIdx,
- LiveInterval &interval) {
- DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, TRI));
-
- // Virtual registers may be defined multiple times (due to phi
- // elimination and 2-addr elimination). Much of what we do only has to be
- // done once for the vreg. We use an empty interval to detect the first
- // time we see a vreg.
- LiveVariables::VarInfo& vi = LV->getVarInfo(interval.reg);
- if (interval.empty()) {
- // Get the Idx of the defining instructions.
- SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
-
- // Make sure the first definition is not a partial redefinition.
- assert(!MO.readsReg() && "First def cannot also read virtual register "
- "missing <undef> flag?");
-
- VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator);
- assert(ValNo->id == 0 && "First value in interval is not 0?");
-
- // Loop over all of the blocks that the vreg is defined in. There are
- // two cases we have to handle here. The most common case is a vreg
- // whose lifetime is contained within a basic block. In this case there
- // will be a single kill, in MBB, which comes after the definition.
- if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
- // FIXME: what about dead vars?
- SlotIndex killIdx;
- if (vi.Kills[0] != mi)
- killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot();
- else
- killIdx = defIndex.getDeadSlot();
-
- // If the kill happens after the definition, we have an intra-block
- // live range.
- if (killIdx > defIndex) {
- assert(vi.AliveBlocks.empty() &&
- "Shouldn't be alive across any blocks!");
- LiveRange LR(defIndex, killIdx, ValNo);
- interval.addRange(LR);
- DEBUG(dbgs() << " +" << LR << "\n");
- return;
- }
- }
-
- // The other case we handle is when a virtual register lives to the end
- // of the defining block, potentially live across some blocks, then is
- // live into some number of blocks, but gets killed. Start by adding a
- // range that goes from this definition to the end of the defining block.
- LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo);
- DEBUG(dbgs() << " +" << NewLR);
- interval.addRange(NewLR);
-
- bool PHIJoin = LV->isPHIJoin(interval.reg);
-
- if (PHIJoin) {
- // A phi join register is killed at the end of the MBB and revived as a
- // new valno in the killing blocks.
- assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
- DEBUG(dbgs() << " phi-join");
- } else {
- // Iterate over all of the blocks that the variable is completely
- // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
- // live interval.
- for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
- E = vi.AliveBlocks.end(); I != E; ++I) {
- MachineBasicBlock *aliveBlock = MF->getBlockNumbered(*I);
- LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock),
- ValNo);
- interval.addRange(LR);
- DEBUG(dbgs() << " +" << LR);
- }
- }
-
- // Finally, this virtual register is live from the start of any killing
- // block to the 'use' slot of the killing instruction.
- for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
- MachineInstr *Kill = vi.Kills[i];
- SlotIndex Start = getMBBStartIdx(Kill->getParent());
- SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot();
-
- // Create interval with one of a NEW value number. Note that this value
- // number isn't actually defined by an instruction, weird huh? :)
- if (PHIJoin) {
- assert(getInstructionFromIndex(Start) == 0 &&
- "PHI def index points at actual instruction.");
- ValNo = interval.getNextValue(Start, VNInfoAllocator);
- }
- LiveRange LR(Start, killIdx, ValNo);
- interval.addRange(LR);
- DEBUG(dbgs() << " +" << LR);
- }
-
- } else {
- if (MultipleDefsBySameMI(*mi, MOIdx))
- // Multiple defs of the same virtual register by the same instruction.
- // e.g. %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
- // This is likely due to elimination of REG_SEQUENCE instructions. Return
- // here since there is nothing to do.
- return;
-
- // If this is the second time we see a virtual register definition, it
- // must be due to phi elimination or two addr elimination. If this is
- // the result of two address elimination, then the vreg is one of the
- // def-and-use register operand.
-
- // It may also be partial redef like this:
- // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
- // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
- bool PartReDef = isPartialRedef(MIIdx, MO, interval);
- if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) {
- // If this is a two-address definition, then we have already processed
- // the live range. The only problem is that we didn't realize there
- // are actually two values in the live interval. Because of this we
- // need to take the LiveRegion that defines this register and split it
- // into two values.
- SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
-
- const LiveRange *OldLR =
- interval.getLiveRangeContaining(RedefIndex.getRegSlot(true));
- VNInfo *OldValNo = OldLR->valno;
- SlotIndex DefIndex = OldValNo->def.getRegSlot();
-
- // Delete the previous value, which should be short and continuous,
- // because the 2-addr copy must be in the same MBB as the redef.
- interval.removeRange(DefIndex, RedefIndex);
-
- // The new value number (#1) is defined by the instruction we claimed
- // defined value #0.
- VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
-
- // Value#0 is now defined by the 2-addr instruction.
- OldValNo->def = RedefIndex;
-
- // Add the new live interval which replaces the range for the input copy.
- LiveRange LR(DefIndex, RedefIndex, ValNo);
- DEBUG(dbgs() << " replace range with " << LR);
- interval.addRange(LR);
-
- // If this redefinition is dead, we need to add a dummy unit live
- // range covering the def slot.
- if (MO.isDead())
- interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(),
- OldValNo));
-
- DEBUG(dbgs() << " RESULT: " << interval);
- } else if (LV->isPHIJoin(interval.reg)) {
- // In the case of PHI elimination, each variable definition is only
- // live until the end of the block. We've already taken care of the
- // rest of the live range.
-
- SlotIndex defIndex = MIIdx.getRegSlot();
- if (MO.isEarlyClobber())
- defIndex = MIIdx.getRegSlot(true);
-
- VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator);
-
- SlotIndex killIndex = getMBBEndIdx(mbb);
- LiveRange LR(defIndex, killIndex, ValNo);
- interval.addRange(LR);
- DEBUG(dbgs() << " phi-join +" << LR);
- } else {
- llvm_unreachable("Multiply defined register");
- }
- }
-
- DEBUG(dbgs() << '\n');
-}
-
-void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator MI,
- SlotIndex MIIdx,
- MachineOperand& MO,
- unsigned MOIdx) {
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
- getOrCreateInterval(MO.getReg()));
-}
-
-/// computeIntervals - computes the live intervals for virtual
-/// registers. for some ordering of the machine instructions [1,N] a
-/// live interval is an interval [i, j) where 1 <= i <= j < N for
-/// which a variable is live
-void LiveIntervals::computeIntervals() {
- DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
- << "********** Function: " << MF->getName() << '\n');
-
- RegMaskBlocks.resize(MF->getNumBlockIDs());
-
- SmallVector<unsigned, 8> UndefUses;
- for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock *MBB = MBBI;
- RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size();
-
- if (MBB->empty())
- continue;
-
- // Track the index of the current machine instr.
- SlotIndex MIIndex = getMBBStartIdx(MBB);
- DEBUG(dbgs() << "BB#" << MBB->getNumber()
- << ":\t\t# derived from " << MBB->getName() << "\n");
-
- // Skip over empty initial indices.
- if (getInstructionFromIndex(MIIndex) == 0)
- MIIndex = Indexes->getNextNonNullIndex(MIIndex);
-
- for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
- MI != miEnd; ++MI) {
- DEBUG(dbgs() << MIIndex << "\t" << *MI);
- if (MI->isDebugValue())
- continue;
- assert(Indexes->getInstructionFromIndex(MIIndex) == MI &&
- "Lost SlotIndex synchronization");
-
- // Handle defs.
- for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
- MachineOperand &MO = MI->getOperand(i);
-
- // Collect register masks.
- if (MO.isRegMask()) {
- RegMaskSlots.push_back(MIIndex.getRegSlot());
- RegMaskBits.push_back(MO.getRegMask());
- continue;
- }
-
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
-
- // handle register defs - build intervals
- if (MO.isDef())
- handleRegisterDef(MBB, MI, MIIndex, MO, i);
- else if (MO.isUndef())
- UndefUses.push_back(MO.getReg());
- }
-
- // Move to the next instr slot.
- MIIndex = Indexes->getNextNonNullIndex(MIIndex);
- }
-
- // Compute the number of register mask instructions in this block.
- std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
- RMB.second = RegMaskSlots.size() - RMB.first;
- }
-
- // Create empty intervals for registers defined by implicit_def's (except
- // for those implicit_def that define values which are liveout of their
- // blocks.
- for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) {
- unsigned UndefReg = UndefUses[i];
- (void)getOrCreateInterval(UndefReg);
- }
-}
-
LiveInterval* LiveIntervals::createInterval(unsigned reg) {
float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F;
return new LiveInterval(reg, Weight);
@@ -1335,3 +1032,129 @@ void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
HME.updateAllRanges(MI);
}
+
+void
+LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ ArrayRef<unsigned> OrigRegs) {
+ // Find anchor points, which are at the beginning/end of blocks or at
+ // instructions that already have indexes.
+ while (Begin != MBB->begin() && !Indexes->hasIndex(Begin))
+ --Begin;
+ while (End != MBB->end() && !Indexes->hasIndex(End))
+ ++End;
+
+ SlotIndex endIdx;
+ if (End == MBB->end())
+ endIdx = getMBBEndIdx(MBB).getPrevSlot();
+ else
+ endIdx = getInstructionIndex(End);
+
+ Indexes->repairIndexesInRange(MBB, Begin, End);
+
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MOI->getReg()) &&
+ !hasInterval(MOI->getReg())) {
+ LiveInterval &LI = getOrCreateInterval(MOI->getReg());
+ computeVirtRegInterval(&LI);
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) {
+ unsigned Reg = OrigRegs[i];
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ LiveInterval &LI = getInterval(Reg);
+ // FIXME: Should we support undefs that gain defs?
+ if (!LI.hasAtLeastOneValue())
+ continue;
+
+ LiveInterval::iterator LII = LI.find(endIdx);
+ SlotIndex lastUseIdx;
+ if (LII != LI.end() && LII->start < endIdx)
+ lastUseIdx = LII->end;
+ else
+ --LII;
+
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+
+ SlotIndex instrIdx = getInstructionIndex(MI);
+ bool isStartValid = getInstructionFromIndex(LII->start);
+ bool isEndValid = getInstructionFromIndex(LII->end);
+
+ // FIXME: This doesn't currently handle early-clobber or multiple removed
+ // defs inside of the region to repair.
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ const MachineOperand &MO = *OI;
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ if (MO.isDef()) {
+ if (!isStartValid) {
+ if (LII->end.isDead()) {
+ SlotIndex prevStart;
+ if (LII != LI.begin())
+ prevStart = llvm::prior(LII)->start;
+
+ // FIXME: This could be more efficient if there was a removeRange
+ // method that returned an iterator.
+ LI.removeRange(*LII, true);
+ if (prevStart.isValid())
+ LII = LI.find(prevStart);
+ else
+ LII = LI.begin();
+ } else {
+ LII->start = instrIdx.getRegSlot();
+ LII->valno->def = instrIdx.getRegSlot();
+ if (MO.getSubReg() && !MO.isUndef())
+ lastUseIdx = instrIdx.getRegSlot();
+ else
+ lastUseIdx = SlotIndex();
+ continue;
+ }
+ }
+
+ if (!lastUseIdx.isValid()) {
+ VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(),
+ VNInfoAllocator);
+ LiveRange LR(instrIdx.getRegSlot(), instrIdx.getDeadSlot(), VNI);
+ LII = LI.addRange(LR);
+ } else if (LII->start != instrIdx.getRegSlot()) {
+ VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(),
+ VNInfoAllocator);
+ LiveRange LR(instrIdx.getRegSlot(), lastUseIdx, VNI);
+ LII = LI.addRange(LR);
+ }
+
+ if (MO.getSubReg() && !MO.isUndef())
+ lastUseIdx = instrIdx.getRegSlot();
+ else
+ lastUseIdx = SlotIndex();
+ } else if (MO.isUse()) {
+ // FIXME: This should probably be handled outside of this branch,
+ // either as part of the def case (for defs inside of the region) or
+ // after the loop over the region.
+ if (!isEndValid && !LII->end.isBlock())
+ LII->end = instrIdx.getRegSlot();
+ if (!lastUseIdx.isValid())
+ lastUseIdx = instrIdx.getRegSlot();
+ }
+ }
+ }
+ }
+}
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index c3ff4f1..dede490 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -18,10 +18,11 @@
using namespace llvm;
-void LiveRangeCalc::reset(const MachineFunction *MF,
+void LiveRangeCalc::reset(const MachineFunction *mf,
SlotIndexes *SI,
MachineDominatorTree *MDT,
VNInfo::Allocator *VNIA) {
+ MF = mf;
MRI = &MF->getRegInfo();
Indexes = SI;
DomTree = MDT;
@@ -104,28 +105,28 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) {
// Transfer information from the LiveIn vector to the live ranges.
-void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI) {
+void LiveRangeCalc::updateLiveIns() {
+ LiveRangeUpdater Updater;
for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
E = LiveIn.end(); I != E; ++I) {
if (!I->DomNode)
continue;
MachineBasicBlock *MBB = I->DomNode->getBlock();
-
- VNInfo *VNI = OverrideVNI ? OverrideVNI : I->Value;
- assert(VNI && "No live-in value found");
-
+ assert(I->Value && "No live-in value found");
SlotIndex Start, End;
tie(Start, End) = Indexes->getMBBRange(MBB);
if (I->Kill.isValid())
- I->LI->addRange(LiveRange(Start, I->Kill, VNI));
+ // Value is killed inside this block.
+ End = I->Kill;
else {
- I->LI->addRange(LiveRange(Start, End, VNI));
- // The value is live-through, update LiveOut as well. Defer the Domtree
- // lookup until it is needed.
+ // The value is live-through, update LiveOut as well.
+ // Defer the Domtree lookup until it is needed.
assert(Seen.test(MBB->getNumber()));
- LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0);
+ LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0);
}
+ Updater.setDest(I->LI);
+ Updater.add(Start, End, I->Value);
}
LiveIn.clear();
}
@@ -150,13 +151,11 @@ void LiveRangeCalc::extend(LiveInterval *LI,
// multiple values, and we may need to create even more phi-defs to preserve
// VNInfo SSA form. Perform a search for all predecessor blocks where we
// know the dominating VNInfo.
- VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, PhysReg);
+ if (findReachingDefs(LI, KillMBB, Kill, PhysReg))
+ return;
// When there were multiple different values, we may need new PHIs.
- if (!VNI)
- updateSSA();
-
- updateLiveIns(VNI);
+ calculateValues();
}
@@ -167,16 +166,18 @@ void LiveRangeCalc::calculateValues() {
assert(Indexes && "Missing SlotIndexes");
assert(DomTree && "Missing dominator tree");
updateSSA();
- updateLiveIns(0);
+ updateLiveIns();
}
-VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
- MachineBasicBlock *KillMBB,
- SlotIndex Kill,
- unsigned PhysReg) {
- // Blocks where LI should be live-in.
- SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB);
+bool LiveRangeCalc::findReachingDefs(LiveInterval *LI,
+ MachineBasicBlock *KillMBB,
+ SlotIndex Kill,
+ unsigned PhysReg) {
+ unsigned KillMBBNum = KillMBB->getNumber();
+
+ // Block numbers where LI should be live-in.
+ SmallVector<unsigned, 16> WorkList(1, KillMBBNum);
// Remember if we have seen more than one value.
bool UniqueVNI = true;
@@ -184,7 +185,7 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
// Using Seen as a visited set, perform a BFS for all reaching defs.
for (unsigned i = 0; i != WorkList.size(); ++i) {
- MachineBasicBlock *MBB = WorkList[i];
+ MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]);
#ifndef NDEBUG
if (MBB->pred_empty()) {
@@ -231,25 +232,50 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
// No, we need a live-in value for Pred as well
if (Pred != KillMBB)
- WorkList.push_back(Pred);
+ WorkList.push_back(Pred->getNumber());
else
// Loopback to KillMBB, so value is really live through.
Kill = SlotIndex();
}
}
- // Transfer WorkList to LiveInBlocks in reverse order.
- // This ordering works best with updateSSA().
LiveIn.clear();
- LiveIn.reserve(WorkList.size());
- while(!WorkList.empty())
- addLiveInBlock(LI, DomTree->getNode(WorkList.pop_back_val()));
- // The kill block may not be live-through.
- assert(LiveIn.back().DomNode->getBlock() == KillMBB);
- LiveIn.back().Kill = Kill;
+ // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but
+ // neither require it. Skip the sorting overhead for small updates.
+ if (WorkList.size() > 4)
+ array_pod_sort(WorkList.begin(), WorkList.end());
+
+ // If a unique reaching def was found, blit in the live ranges immediately.
+ if (UniqueVNI) {
+ LiveRangeUpdater Updater(LI);
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(*I);
+ // Trim the live range in KillMBB.
+ if (*I == KillMBBNum && Kill.isValid())
+ End = Kill;
+ else
+ LiveOut[MF->getBlockNumbered(*I)] =
+ LiveOutPair(TheVNI, (MachineDomTreeNode *)0);
+ Updater.add(Start, End, TheVNI);
+ }
+ return true;
+ }
+
+ // Multiple values were found, so transfer the work list to the LiveIn array
+ // where UpdateSSA will use it as a work list.
+ LiveIn.reserve(WorkList.size());
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(*I);
+ addLiveInBlock(LI, DomTree->getNode(MBB));
+ if (MBB == KillMBB)
+ LiveIn.back().Kill = Kill;
+ }
- return UniqueVNI ? TheVNI : 0;
+ return false;
}
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 909829b..57cab7b 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -34,6 +34,7 @@ template <class NodeT> class DomTreeNodeBase;
typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
class LiveRangeCalc {
+ const MachineFunction *MF;
const MachineRegisterInfo *MRI;
SlotIndexes *Indexes;
MachineDominatorTree *DomTree;
@@ -100,17 +101,20 @@ class LiveRangeCalc {
/// used to add entries directly.
SmallVector<LiveInBlock, 16> LiveIn;
- /// findReachingDefs - Assuming that LI is live-in to KillMBB and killed at
- /// Kill, search for values that can reach KillMBB. All blocks that need LI
- /// to be live-in are added to LiveIn. If a unique reaching def is found,
- /// its value is returned, if Kill is jointly dominated by multiple values,
- /// NULL is returned.
+ /// Assuming that LI is live-in to KillMBB and killed at Kill, find the set
+ /// of defs that can reach it.
+ ///
+ /// If only one def can reach Kill, all paths from the def to kill are added
+ /// to LI, and the function returns true.
+ ///
+ /// If multiple values can reach Kill, the blocks that need LI to be live in
+ /// are added to the LiveIn array, and the function returns false.
///
/// PhysReg, when set, is used to verify live-in lists on basic blocks.
- VNInfo *findReachingDefs(LiveInterval *LI,
- MachineBasicBlock *KillMBB,
- SlotIndex Kill,
- unsigned PhysReg);
+ bool findReachingDefs(LiveInterval *LI,
+ MachineBasicBlock *KillMBB,
+ SlotIndex Kill,
+ unsigned PhysReg);
/// updateSSA - Compute the values that will be live in to all requested
/// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
@@ -119,12 +123,11 @@ class LiveRangeCalc {
/// blocks. No values are read from the live ranges.
void updateSSA();
- /// updateLiveIns - Add liveness as specified in the LiveIn vector, using VNI
- /// as a wildcard value for LiveIn entries without a value.
- void updateLiveIns(VNInfo *VNI);
+ /// Add liveness as specified in the LiveIn vector.
+ void updateLiveIns();
public:
- LiveRangeCalc() : MRI(0), Indexes(0), DomTree(0), Alloc(0) {}
+ LiveRangeCalc() : MF(0), MRI(0), Indexes(0), DomTree(0), Alloc(0) {}
//===--------------------------------------------------------------------===//
// High-level interface.
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index f81ad1c..789eddc 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -619,29 +619,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
MBB);
}
- // Finally, if the last instruction in the block is a return, make sure to
- // mark it as using all of the live-out values in the function.
- // Things marked both call and return are tail calls; do not do this for
- // them. The tail callee need not take the same registers as input
- // that it produces as output, and there are dependencies for its input
- // registers elsewhere.
- if (!MBB->empty() && MBB->back().isReturn()
- && !MBB->back().isCall()) {
- MachineInstr *Ret = &MBB->back();
-
- for (MachineRegisterInfo::liveout_iterator
- I = MF->getRegInfo().liveout_begin(),
- E = MF->getRegInfo().liveout_end(); I != E; ++I) {
- assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
- "Cannot have a live-out virtual register!");
- HandlePhysRegUse(*I, Ret);
-
- // Add live-out registers as implicit uses.
- if (!Ret->readsRegister(*I))
- Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
- }
- }
-
// MachineCSE may CSE instructions which write to non-allocatable physical
// registers across MBBs. Remember if any reserved register is liveout.
SmallSet<unsigned, 4> LiveOuts;
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 7e9fb20..898e165 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -15,10 +15,12 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
@@ -663,6 +665,13 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
<< " -- BB#" << NMBB->getNumber()
<< " -- BB#" << Succ->getNumber() << '\n');
+ LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>();
+ SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>();
+ if (LIS)
+ LIS->insertMBBInMaps(NMBB);
+ else if (Indexes)
+ Indexes->insertMBBInMaps(NMBB);
+
// On some targets like Mips, branches may kill virtual registers. Make sure
// that LiveVariables is properly updated after updateTerminator replaces the
// terminators.
@@ -689,14 +698,67 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
}
}
+ SmallVector<unsigned, 4> UsedRegs;
+ if (LIS) {
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ if (!OI->isReg() || OI->getReg() == 0)
+ continue;
+
+ unsigned Reg = OI->getReg();
+ if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end())
+ UsedRegs.push_back(Reg);
+ }
+ }
+ }
+
ReplaceUsesOfBlockWith(Succ, NMBB);
+
+ // If updateTerminator() removes instructions, we need to remove them from
+ // SlotIndexes.
+ SmallVector<MachineInstr*, 4> Terminators;
+ if (Indexes) {
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I)
+ Terminators.push_back(I);
+ }
+
updateTerminator();
+ if (Indexes) {
+ SmallVector<MachineInstr*, 4> NewTerminators;
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I)
+ NewTerminators.push_back(I);
+
+ for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
+ E = Terminators.end(); I != E; ++I) {
+ if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) ==
+ NewTerminators.end())
+ Indexes->removeMachineInstrFromMaps(*I);
+ }
+ }
+
// Insert unconditional "jump Succ" instruction in NMBB if necessary.
NMBB->addSuccessor(Succ);
if (!NMBB->isLayoutSuccessor(Succ)) {
Cond.clear();
MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl);
+
+ if (Indexes) {
+ for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
+ I != E; ++I) {
+ // Some instructions may have been moved to NMBB by updateTerminator(),
+ // so we first remove any instruction that already has an index.
+ if (Indexes->hasIndex(I))
+ Indexes->removeMachineInstrFromMaps(I);
+ Indexes->insertMachineInstrInMaps(I);
+ }
+ }
}
// Fix PHI nodes in Succ so they refer to NMBB instead of this
@@ -731,6 +793,67 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
LV->addNewBlock(NMBB, this, Succ);
}
+ if (LIS) {
+ // After splitting the edge and updating SlotIndexes, live intervals may be
+ // in one of two situations, depending on whether this block was the last in
+ // the function. If the original block was the last in the function, all live
+ // intervals will end prior to the beginning of the new split block. If the
+ // original block was not at the end of the function, all live intervals will
+ // extend to the end of the new split block.
+
+ bool isLastMBB =
+ llvm::next(MachineFunction::iterator(NMBB)) == getParent()->end();
+
+ SlotIndex StartIndex = Indexes->getMBBEndIdx(this);
+ SlotIndex PrevIndex = StartIndex.getPrevSlot();
+ SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB);
+
+ // Find the registers used from NMBB in PHIs in Succ.
+ SmallSet<unsigned, 8> PHISrcRegs;
+ for (MachineBasicBlock::instr_iterator
+ I = Succ->instr_begin(), E = Succ->instr_end();
+ I != E && I->isPHI(); ++I) {
+ for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
+ if (I->getOperand(ni+1).getMBB() == NMBB) {
+ MachineOperand &MO = I->getOperand(ni);
+ unsigned Reg = MO.getReg();
+ PHISrcRegs.insert(Reg);
+ if (MO.isUndef())
+ continue;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+ VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
+ assert(VNI && "PHI sources should be live out of their predecessors.");
+ LI.addRange(LiveRange(StartIndex, EndIndex, VNI));
+ }
+ }
+ }
+
+ MachineRegisterInfo *MRI = &getParent()->getRegInfo();
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
+ continue;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (!LI.liveAt(PrevIndex))
+ continue;
+
+ bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ));
+ if (isLiveOut && isLastMBB) {
+ VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
+ assert(VNI && "LiveInterval should have VNInfo where it is live.");
+ LI.addRange(LiveRange(StartIndex, EndIndex, VNI));
+ } else if (!isLiveOut && !isLastMBB) {
+ LI.removeRange(StartIndex, EndIndex);
+ }
+ }
+
+ // Update all intervals for registers whose uses may have been modified by
+ // updateTerminator().
+ LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs);
+ }
+
if (MachineDominatorTree *MDT =
P->getAnalysisIfAvailable<MachineDominatorTree>()) {
// Update dominator information.
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 07a3e03..3b09c6b 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -171,7 +171,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
const TargetInstrInfo *TII;
/// \brief A handle to the target's lowering info.
- const TargetLowering *TLI;
+ const TargetLoweringBase *TLI;
/// \brief Allocator and owner of BlockChain structures.
///
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 9647e83..5e04f2d 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -346,13 +346,6 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
}
OS << '\n';
}
- if (RegInfo && !RegInfo->liveout_empty()) {
- OS << "Function Live Outs:";
- for (MachineRegisterInfo::liveout_iterator
- I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I)
- OS << ' ' << PrintReg(*I, TRI);
- OS << '\n';
- }
for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
OS << '\n';
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 29d8866..32d0668 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -752,20 +752,19 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
}
bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
- const MachineBasicBlock *MBB = getParent();
- MachineBasicBlock::const_instr_iterator MII = *this; ++MII;
- while (MII != MBB->end() && MII->isInsideBundle()) {
+ assert(!isBundledWithPred() && "Must be called on bundle header");
+ for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) {
if (MII->getDesc().getFlags() & Mask) {
if (Type == AnyInBundle)
return true;
} else {
- if (Type == AllInBundle)
+ if (Type == AllInBundle && !MII->isBundle())
return false;
}
- ++MII;
+ // This was the last instruction in the bundle.
+ if (!MII->isBundledWithSucc())
+ return Type == AllInBundle;
}
-
- return Type == AllInBundle;
}
bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
@@ -897,7 +896,7 @@ void MachineInstr::unbundleFromSucc() {
assert(isBundledWithSucc() && "MI isn't bundled with its successor");
clearFlag(BundledSucc);
MachineBasicBlock::instr_iterator Succ = this;
- --Succ;
+ ++Succ;
assert(Succ->isBundledWithPred() && "Inconsistent bundle flags");
Succ->clearFlag(BundledPred);
}
@@ -982,18 +981,13 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
return NULL;
}
-/// getBundleSize - Return the number of instructions inside the MI bundle.
+/// Return the number of instructions inside the MI bundle, not counting the
+/// header instruction.
unsigned MachineInstr::getBundleSize() const {
- assert(isBundle() && "Expecting a bundle");
-
- const MachineBasicBlock *MBB = getParent();
- MachineBasicBlock::const_instr_iterator I = *this, E = MBB->instr_end();
+ MachineBasicBlock::const_instr_iterator I = this;
unsigned Size = 0;
- while ((++I != E) && I->isInsideBundle()) {
- ++Size;
- }
- assert(Size > 1 && "Malformed bundle");
-
+ while (I->isBundledWithSucc())
+ ++Size, ++I;
return Size;
}
@@ -1434,7 +1428,8 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
}
}
-void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
+void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
+ bool SkipOpers) const {
// We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
const MachineFunction *MF = 0;
const MachineRegisterInfo *MRI = 0;
@@ -1471,6 +1466,9 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
else
OS << "UNKNOWN";
+ if (SkipOpers)
+ return;
+
// Print the rest of the operands.
bool OmittedAnyCallClobbers = false;
bool FirstOp = true;
@@ -1482,10 +1480,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << " ";
getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
- // Print HasSideEffects, IsAlignStack
+ // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
OS << " [sideeffect]";
+ if (ExtraInfo & InlineAsm::Extra_MayLoad)
+ OS << " [mayload]";
+ if (ExtraInfo & InlineAsm::Extra_MayStore)
+ OS << " [maystore]";
if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
OS << " [alignstack]";
if (getInlineAsmDialect() == InlineAsm::AD_ATT)
@@ -1513,12 +1515,12 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
const MachineRegisterInfo &MRI = MF->getRegInfo();
- if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
+ if (MRI.use_empty(Reg)) {
bool HasAliasLive = false;
for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true);
AI.isValid(); ++AI) {
unsigned AliasReg = *AI;
- if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
+ if (!MRI.use_empty(AliasReg)) {
HasAliasLive = true;
break;
}
@@ -1590,7 +1592,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
}
bool HaveSemi = false;
- if (Flags) {
+ const unsigned PrintableFlags = FrameSetup;
+ if (Flags & PrintableFlags) {
if (!HaveSemi) OS << ";"; HaveSemi = true;
OS << " flags: ";
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 760cf8a..ed3ed4d 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -62,7 +62,7 @@ namespace {
class MachineLICM : public MachineFunctionPass {
const TargetMachine *TM;
const TargetInstrInfo *TII;
- const TargetLowering *TLI;
+ const TargetLoweringBase *TLI;
const TargetRegisterInfo *TRI;
const MachineFrameInfo *MFI;
MachineRegisterInfo *MRI;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 21877e5..a777f52 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -283,13 +283,6 @@ bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
return false;
}
-bool MachineRegisterInfo::isLiveOut(unsigned Reg) const {
- for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I)
- if (*I == Reg)
- return true;
- return false;
-}
-
/// getLiveInPhysReg - If VReg is a live-in virtual register, return the
/// corresponding live-in physical register.
unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const {
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 54a7851..a93d070 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include <queue>
@@ -48,15 +49,6 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
static bool ViewMISchedDAGs = false;
#endif // NDEBUG
-// Threshold to very roughly model an out-of-order processor's instruction
-// buffers. If the actual value of this threshold matters much in practice, then
-// it can be specified by the machine model. For now, it's an experimental
-// tuning knob to determine when and if it matters.
-static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden,
- cl::desc("Allow expected latency to exceed the critical path by N cycles "
- "before attempting to balance ILP"),
- cl::init(10U));
-
// Experimental heuristics
static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
cl::desc("Enable load clustering."), cl::init(true));
@@ -65,6 +57,9 @@ static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+// DAG subtrees must have at least this many nodes.
+static const unsigned MinSubtreeSize = 8;
+
//===----------------------------------------------------------------------===//
// Machine Instruction Scheduling Pass and Registry
//===----------------------------------------------------------------------===//
@@ -268,7 +263,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
}
DEBUG(dbgs() << "********** MI Scheduling **********\n");
DEBUG(dbgs() << MF->getName()
- << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: ";
+ << ":BB#" << MBB->getNumber() << " " << MBB->getName()
+ << "\n From: " << *I << " To: ";
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
else dbgs() << "End";
dbgs() << " Remaining: " << RemainingInstrs << "\n");
@@ -310,6 +306,12 @@ void ReadyQueue::dump() {
// preservation.
//===----------------------------------------------------------------------===//
+ScheduleDAGMI::~ScheduleDAGMI() {
+ delete DFSResult;
+ DeleteContainerPointers(Mutations);
+ delete SchedImpl;
+}
+
bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
if (SuccSU != &ExitSU) {
// Do not use WillCreateCycle, it assumes SD scheduling.
@@ -465,7 +467,8 @@ void ScheduleDAGMI::initRegPressure() {
// Cache the list of excess pressure sets in this region. This will also track
// the max pressure in the scheduled code for these sets.
RegionCriticalPSets.clear();
- std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
+ const std::vector<unsigned> &RegionPressure =
+ RPTracker.getPressure().MaxSetPressure;
for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
unsigned Limit = TRI->getRegPressureSetLimit(i);
DEBUG(dbgs() << TRI->getRegPressureSetName(i)
@@ -484,7 +487,7 @@ void ScheduleDAGMI::initRegPressure() {
// FIXME: When the pressure tracker deals in pressure differences then we won't
// iterate over all RegionCriticalPSets[i].
void ScheduleDAGMI::
-updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
+updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
unsigned ID = RegionCriticalPSets[i].PSetID;
int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
@@ -510,12 +513,19 @@ void ScheduleDAGMI::schedule() {
postprocessDAG();
+ SmallVector<SUnit*, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ // This may initialize a DFSResult to be used for queue priority.
+ SchedImpl->initialize(this);
+
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this));
-
if (ViewMISchedDAGs) viewGraph();
- initQueues();
+ // Initialize ready queues now that the DAG and priority data are finalized.
+ initQueues(TopRoots, BotRoots);
bool IsTopNode = false;
while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
@@ -550,7 +560,6 @@ void ScheduleDAGMI::buildDAGWithRegPressure() {
// Build the DAG, and compute current register pressure.
buildSchedGraph(AA, &RPTracker);
- if (ViewMISchedDAGs) viewGraph();
// Initialize top/bottom trackers after computing region pressure.
initRegPressure();
@@ -563,39 +572,56 @@ void ScheduleDAGMI::postprocessDAG() {
}
}
-// Release all DAG roots for scheduling.
-//
-// Nodes with unreleased weak edges can still be roots.
-void ScheduleDAGMI::releaseRoots() {
- SmallVector<SUnit*, 16> BotRoots;
+void ScheduleDAGMI::computeDFSResult() {
+ if (!DFSResult)
+ DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
+ DFSResult->clear();
+ ScheduledTrees.clear();
+ DFSResult->resize(SUnits.size());
+ DFSResult->compute(SUnits);
+ ScheduledTrees.resize(DFSResult->getNumSubtrees());
+}
+void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
+ SmallVectorImpl<SUnit*> &BotRoots) {
for (std::vector<SUnit>::iterator
I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
SUnit *SU = &(*I);
+ assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
+
+ // Order predecessors so DFSResult follows the critical path.
+ SU->biasCriticalPath();
+
// A SUnit is ready to top schedule if it has no predecessors.
- if (!I->NumPredsLeft && SU != &EntrySU)
- SchedImpl->releaseTopNode(SU);
+ if (!I->NumPredsLeft)
+ TopRoots.push_back(SU);
// A SUnit is ready to bottom schedule if it has no successors.
- if (!I->NumSuccsLeft && SU != &ExitSU)
+ if (!I->NumSuccsLeft)
BotRoots.push_back(SU);
}
- // Release bottom roots in reverse order so the higher priority nodes appear
- // first. This is more natural and slightly more efficient.
- for (SmallVectorImpl<SUnit*>::const_reverse_iterator
- I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
- SchedImpl->releaseBottomNode(*I);
+ ExitSU.biasCriticalPath();
}
/// Identify DAG roots and setup scheduler queues.
-void ScheduleDAGMI::initQueues() {
+void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
+ ArrayRef<SUnit*> BotRoots) {
NextClusterSucc = NULL;
NextClusterPred = NULL;
- // Initialize the strategy before modifying the DAG.
- SchedImpl->initialize(this);
-
// Release all DAG roots for scheduling, not including EntrySU/ExitSU.
- releaseRoots();
+ //
+ // Nodes with unreleased weak edges can still be roots.
+ // Release top roots in forward order.
+ for (SmallVectorImpl<SUnit*>::const_iterator
+ I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
+ SchedImpl->releaseTopNode(*I);
+ }
+ // Release bottom roots in reverse order so the higher priority nodes appear
+ // first. This is more natural and slightly more efficient.
+ for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+ I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
+ SchedImpl->releaseBottomNode(*I);
+ }
releaseSuccessors(&EntrySU);
releasePredecessors(&ExitSU);
@@ -660,6 +686,15 @@ void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
SU->isScheduled = true;
+ if (DFSResult) {
+ unsigned SubtreeID = DFSResult->getSubtreeID(SU);
+ if (!ScheduledTrees.test(SubtreeID)) {
+ ScheduledTrees.set(SubtreeID);
+ DFSResult->scheduleTree(SubtreeID);
+ SchedImpl->scheduleTree(SubtreeID);
+ }
+ }
+
// Notify the scheduling strategy after updating the DAG.
SchedImpl->schedNode(SU, IsTopNode);
}
@@ -1019,6 +1054,9 @@ public:
#endif
void reset() {
+ // A new HazardRec is created for each DAG and owned by SchedBoundary.
+ delete HazardRec;
+
Available.clear();
Pending.clear();
CheckPending = false;
@@ -1044,7 +1082,8 @@ public:
/// PendingFlag set.
SchedBoundary(unsigned ID, const Twine &Name):
DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
- Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") {
+ Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
+ HazardRec(0) {
reset();
}
@@ -1188,10 +1227,13 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
DAG = dag;
SchedModel = DAG->getSchedModel();
TRI = DAG->TRI;
+
Rem.init(DAG, SchedModel);
Top.init(DAG, SchedModel, &Rem);
Bot.init(DAG, SchedModel, &Rem);
+ DAG->computeDFSResult();
+
// Initialize resource counts.
// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
@@ -1297,7 +1339,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
if (L > RemLatency)
RemLatency = L;
}
- if (RemLatency + ExpectedLatency >= Rem->CriticalPath + ILPWindow
+ unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
+ if (RemLatency + ExpectedLatency >= CriticalPathLimit
&& RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
Policy.ReduceLatency = true;
DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
@@ -1541,8 +1584,8 @@ void ConvergingScheduler::checkResourceLimits(
ConvergingScheduler::SchedCandidate &BotCand) {
// Set ReduceLatency to true if needed.
- Bot.setLatencyPolicy(TopCand.Policy);
- Top.setLatencyPolicy(BotCand.Policy);
+ Bot.setLatencyPolicy(BotCand.Policy);
+ Top.setLatencyPolicy(TopCand.Policy);
// Handle resource-limited regions.
if (Top.IsResourceLimited && Bot.IsResourceLimited
@@ -2060,12 +2103,11 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.",
namespace {
/// \brief Order nodes by the ILP metric.
struct ILPOrder {
- SchedDFSResult *DFSResult;
- BitVector *ScheduledTrees;
+ const SchedDFSResult *DFSResult;
+ const BitVector *ScheduledTrees;
bool MaximizeILP;
- ILPOrder(SchedDFSResult *dfs, BitVector *schedtrees, bool MaxILP)
- : DFSResult(dfs), ScheduledTrees(schedtrees), MaximizeILP(MaxILP) {}
+ ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
/// \brief Apply a less-than relation on node priority.
///
@@ -2103,26 +2145,22 @@ class ILPScheduler : public MachineSchedStrategy {
/// (a motivating test case must be found).
static const unsigned SubtreeLimit = 16;
- SchedDFSResult DFSResult;
- BitVector ScheduledTrees;
+ ScheduleDAGMI *DAG;
ILPOrder Cmp;
std::vector<SUnit*> ReadyQ;
public:
- ILPScheduler(bool MaximizeILP)
- : DFSResult(/*BottomUp=*/true, SubtreeLimit),
- Cmp(&DFSResult, &ScheduledTrees, MaximizeILP) {}
+ ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
- virtual void initialize(ScheduleDAGMI *DAG) {
+ virtual void initialize(ScheduleDAGMI *dag) {
+ DAG = dag;
+ DAG->computeDFSResult();
+ Cmp.DFSResult = DAG->getDFSResult();
+ Cmp.ScheduledTrees = &DAG->getScheduledTrees();
ReadyQ.clear();
- DFSResult.clear();
- DFSResult.resize(DAG->SUnits.size());
- ScheduledTrees.clear();
}
virtual void registerRoots() {
- DFSResult.compute(ReadyQ);
- ScheduledTrees.resize(DFSResult.getNumSubtrees());
// Restore the heap in ReadyQ with the updated DFS results.
std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
}
@@ -2139,21 +2177,22 @@ public:
IsTopNode = false;
DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
<< *SU->getInstr()
- << " ILP: " << DFSResult.getILP(SU)
- << " Tree: " << DFSResult.getSubtreeID(SU) << " @"
- << DFSResult.getSubtreeLevel(DFSResult.getSubtreeID(SU))<< '\n');
+ << " ILP: " << DAG->getDFSResult()->getILP(SU)
+ << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
+ << DAG->getDFSResult()->getSubtreeLevel(
+ DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
return SU;
}
+ /// \brief Scheduler callback to notify that a new subtree is scheduled.
+ virtual void scheduleTree(unsigned SubtreeID) {
+ std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+
/// Callback after a node is scheduled. Mark a newly scheduled tree, notify
/// DFSResults, and resort the priority Q.
virtual void schedNode(SUnit *SU, bool IsTopNode) {
assert(!IsTopNode && "SchedDFSResult needs bottom-up");
- if (!ScheduledTrees.test(DFSResult.getSubtreeID(SU))) {
- ScheduledTrees.set(DFSResult.getSubtreeID(SU));
- DFSResult.scheduleTree(DFSResult.getSubtreeID(SU));
- std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
- }
}
virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
@@ -2264,3 +2303,90 @@ static MachineSchedRegistry ShufflerRegistry(
"shuffle", "Shuffle machine instructions alternating directions",
createInstructionShuffler);
#endif // !NDEBUG
+
+//===----------------------------------------------------------------------===//
+// GraphWriter support for ScheduleDAGMI.
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace llvm {
+
+template<> struct GraphTraits<
+ ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
+
+template<>
+struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return G->MF.getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool isNodeHidden(const SUnit *Node) {
+ return (Node->NumPreds > 10 || Node->NumSuccs > 10);
+ }
+
+ static bool hasNodeAddressLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ return false;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node,
+ SUnitIterator EI,
+ const ScheduleDAG *Graph) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+ static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
+ std::string Str;
+ raw_string_ostream SS(Str);
+ SS << "SU(" << SU->NodeNum << ')';
+ return SS.str();
+ }
+ static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
+ return G->getGraphNodeLabel(SU);
+ }
+
+ static std::string getNodeAttributes(const SUnit *N,
+ const ScheduleDAG *Graph) {
+ std::string Str("shape=Mrecord");
+ const SchedDFSResult *DFS =
+ static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
+ if (DFS) {
+ Str += ",style=filled,fillcolor=\"#";
+ Str += DOT::getColorString(DFS->getSubtreeID(N));
+ Str += '"';
+ }
+ return Str;
+ }
+};
+} // namespace llvm
+#endif // NDEBUG
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
+#ifndef NDEBUG
+ ViewGraph(this, Name, false, Title);
+#else
+ errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAGMI::viewGraph() {
+ viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 685ccab..f77a7b1 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "machine-trace-metrics"
-#include "MachineTraceMetrics.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h
deleted file mode 100644
index 460730b..0000000
--- a/lib/CodeGen/MachineTraceMetrics.h
+++ /dev/null
@@ -1,350 +0,0 @@
-//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface for the MachineTraceMetrics analysis pass
-// that estimates CPU resource usage and critical data dependency paths through
-// preferred traces. This is useful for super-scalar CPUs where execution speed
-// can be limited both by data dependencies and by limited execution resources.
-//
-// Out-of-order CPUs will often be executing instructions from multiple basic
-// blocks at the same time. This makes it difficult to estimate the resource
-// usage accurately in a single basic block. Resources can be estimated better
-// by looking at a trace through the current basic block.
-//
-// For every block, the MachineTraceMetrics pass will pick a preferred trace
-// that passes through the block. The trace is chosen based on loop structure,
-// branch probabilities, and resource usage. The intention is to pick likely
-// traces that would be the most affected by code transformations.
-//
-// It is expensive to compute a full arbitrary trace for every block, so to
-// save some computations, traces are chosen to be convergent. This means that
-// if the traces through basic blocks A and B ever cross when moving away from
-// A and B, they never diverge again. This applies in both directions - If the
-// traces meet above A and B, they won't diverge when going further back.
-//
-// Traces tend to align with loops. The trace through a block in an inner loop
-// will begin at the loop entry block and end at a back edge. If there are
-// nested loops, the trace may begin and end at those instead.
-//
-// For each trace, we compute the critical path length, which is the number of
-// cycles required to execute the trace when execution is limited by data
-// dependencies only. We also compute the resource height, which is the number
-// of cycles required to execute all instructions in the trace when ignoring
-// data dependencies.
-//
-// Every instruction in the current block has a slack - the number of cycles
-// execution of the instruction can be delayed without extending the critical
-// path.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
-#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-
-namespace llvm {
-
-class InstrItineraryData;
-class MachineBasicBlock;
-class MachineInstr;
-class MachineLoop;
-class MachineLoopInfo;
-class MachineRegisterInfo;
-class TargetInstrInfo;
-class TargetRegisterInfo;
-class raw_ostream;
-
-class MachineTraceMetrics : public MachineFunctionPass {
- const MachineFunction *MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const MachineRegisterInfo *MRI;
- const MachineLoopInfo *Loops;
- TargetSchedModel SchedModel;
-
-public:
- class Ensemble;
- class Trace;
- static char ID;
- MachineTraceMetrics();
- void getAnalysisUsage(AnalysisUsage&) const;
- bool runOnMachineFunction(MachineFunction&);
- void releaseMemory();
- void verifyAnalysis() const;
-
- friend class Ensemble;
- friend class Trace;
-
- /// Per-basic block information that doesn't depend on the trace through the
- /// block.
- struct FixedBlockInfo {
- /// The number of non-trivial instructions in the block.
- /// Doesn't count PHI and COPY instructions that are likely to be removed.
- unsigned InstrCount;
-
- /// True when the block contains calls.
- bool HasCalls;
-
- FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {}
-
- /// Returns true when resource information for this block has been computed.
- bool hasResources() const { return InstrCount != ~0u; }
-
- /// Invalidate resource information.
- void invalidate() { InstrCount = ~0u; }
- };
-
- /// Get the fixed resource information about MBB. Compute it on demand.
- const FixedBlockInfo *getResources(const MachineBasicBlock*);
-
- /// A virtual register or regunit required by a basic block or its trace
- /// successors.
- struct LiveInReg {
- /// The virtual register required, or a register unit.
- unsigned Reg;
-
- /// For virtual registers: Minimum height of the defining instruction.
- /// For regunits: Height of the highest user in the trace.
- unsigned Height;
-
- LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
- };
-
- /// Per-basic block information that relates to a specific trace through the
- /// block. Convergent traces means that only one of these is required per
- /// block in a trace ensemble.
- struct TraceBlockInfo {
- /// Trace predecessor, or NULL for the first block in the trace.
- /// Valid when hasValidDepth().
- const MachineBasicBlock *Pred;
-
- /// Trace successor, or NULL for the last block in the trace.
- /// Valid when hasValidHeight().
- const MachineBasicBlock *Succ;
-
- /// The block number of the head of the trace. (When hasValidDepth()).
- unsigned Head;
-
- /// The block number of the tail of the trace. (When hasValidHeight()).
- unsigned Tail;
-
- /// Accumulated number of instructions in the trace above this block.
- /// Does not include instructions in this block.
- unsigned InstrDepth;
-
- /// Accumulated number of instructions in the trace below this block.
- /// Includes instructions in this block.
- unsigned InstrHeight;
-
- TraceBlockInfo() :
- Pred(0), Succ(0),
- InstrDepth(~0u), InstrHeight(~0u),
- HasValidInstrDepths(false), HasValidInstrHeights(false) {}
-
- /// Returns true if the depth resources have been computed from the trace
- /// above this block.
- bool hasValidDepth() const { return InstrDepth != ~0u; }
-
- /// Returns true if the height resources have been computed from the trace
- /// below this block.
- bool hasValidHeight() const { return InstrHeight != ~0u; }
-
- /// Invalidate depth resources when some block above this one has changed.
- void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
-
- /// Invalidate height resources when a block below this one has changed.
- void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
-
- /// Determine if this block belongs to the same trace as TBI and comes
- /// before it in the trace.
- /// Also returns true when TBI == this.
- bool isEarlierInSameTrace(const TraceBlockInfo &TBI) const {
- return hasValidDepth() && TBI.hasValidDepth() &&
- Head == TBI.Head && InstrDepth <= TBI.InstrDepth;
- }
-
- // Data-dependency-related information. Per-instruction depth and height
- // are computed from data dependencies in the current trace, using
- // itinerary data.
-
- /// Instruction depths have been computed. This implies hasValidDepth().
- bool HasValidInstrDepths;
-
- /// Instruction heights have been computed. This implies hasValidHeight().
- bool HasValidInstrHeights;
-
- /// Critical path length. This is the number of cycles in the longest data
- /// dependency chain through the trace. This is only valid when both
- /// HasValidInstrDepths and HasValidInstrHeights are set.
- unsigned CriticalPath;
-
- /// Live-in registers. These registers are defined above the current block
- /// and used by this block or a block below it.
- /// This does not include PHI uses in the current block, but it does
- /// include PHI uses in deeper blocks.
- SmallVector<LiveInReg, 4> LiveIns;
-
- void print(raw_ostream&) const;
- };
-
- /// InstrCycles represents the cycle height and depth of an instruction in a
- /// trace.
- struct InstrCycles {
- /// Earliest issue cycle as determined by data dependencies and instruction
- /// latencies from the beginning of the trace. Data dependencies from
- /// before the trace are not included.
- unsigned Depth;
-
- /// Minimum number of cycles from this instruction is issued to the of the
- /// trace, as determined by data dependencies and instruction latencies.
- unsigned Height;
- };
-
- /// A trace represents a plausible sequence of executed basic blocks that
- /// passes through the current basic block one. The Trace class serves as a
- /// handle to internal cached data structures.
- class Trace {
- Ensemble &TE;
- TraceBlockInfo &TBI;
-
- unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
-
- public:
- explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
- void print(raw_ostream&) const;
-
- /// Compute the total number of instructions in the trace.
- unsigned getInstrCount() const {
- return TBI.InstrDepth + TBI.InstrHeight;
- }
-
- /// Return the resource depth of the top/bottom of the trace center block.
- /// This is the number of cycles required to execute all instructions from
- /// the trace head to the trace center block. The resource depth only
- /// considers execution resources, it ignores data dependencies.
- /// When Bottom is set, instructions in the trace center block are included.
- unsigned getResourceDepth(bool Bottom) const;
-
- /// Return the resource length of the trace. This is the number of cycles
- /// required to execute the instructions in the trace if they were all
- /// independent, exposing the maximum instruction-level parallelism.
- ///
- /// Any blocks in Extrablocks are included as if they were part of the
- /// trace.
- unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks =
- ArrayRef<const MachineBasicBlock*>()) const;
-
- /// Return the length of the (data dependency) critical path through the
- /// trace.
- unsigned getCriticalPath() const { return TBI.CriticalPath; }
-
- /// Return the depth and height of MI. The depth is only valid for
- /// instructions in or above the trace center block. The height is only
- /// valid for instructions in or below the trace center block.
- InstrCycles getInstrCycles(const MachineInstr *MI) const {
- return TE.Cycles.lookup(MI);
- }
-
- /// Return the slack of MI. This is the number of cycles MI can be delayed
- /// before the critical path becomes longer.
- /// MI must be an instruction in the trace center block.
- unsigned getInstrSlack(const MachineInstr *MI) const;
-
- /// Return the Depth of a PHI instruction in a trace center block successor.
- /// The PHI does not have to be part of the trace.
- unsigned getPHIDepth(const MachineInstr *PHI) const;
- };
-
- /// A trace ensemble is a collection of traces selected using the same
- /// strategy, for example 'minimum resource height'. There is one trace for
- /// every block in the function.
- class Ensemble {
- SmallVector<TraceBlockInfo, 4> BlockInfo;
- DenseMap<const MachineInstr*, InstrCycles> Cycles;
- friend class Trace;
-
- void computeTrace(const MachineBasicBlock*);
- void computeDepthResources(const MachineBasicBlock*);
- void computeHeightResources(const MachineBasicBlock*);
- unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
- void computeInstrDepths(const MachineBasicBlock*);
- void computeInstrHeights(const MachineBasicBlock*);
- void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
- ArrayRef<const MachineBasicBlock*> Trace);
-
- protected:
- MachineTraceMetrics &MTM;
- virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
- virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
- explicit Ensemble(MachineTraceMetrics*);
- const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
- const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
- const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
-
- public:
- virtual ~Ensemble();
- virtual const char *getName() const =0;
- void print(raw_ostream&) const;
- void invalidate(const MachineBasicBlock *MBB);
- void verify() const;
-
- /// Get the trace that passes through MBB.
- /// The trace is computed on demand.
- Trace getTrace(const MachineBasicBlock *MBB);
- };
-
- /// Strategies for selecting traces.
- enum Strategy {
- /// Select the trace through a block that has the fewest instructions.
- TS_MinInstrCount,
-
- TS_NumStrategies
- };
-
- /// Get the trace ensemble representing the given trace selection strategy.
- /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
- /// and valid for the lifetime of the analysis pass.
- Ensemble *getEnsemble(Strategy);
-
- /// Invalidate cached information about MBB. This must be called *before* MBB
- /// is erased, or the CFG is otherwise changed.
- ///
- /// This invalidates per-block information about resource usage for MBB only,
- /// and it invalidates per-trace information for any trace that passes
- /// through MBB.
- ///
- /// Call Ensemble::getTrace() again to update any trace handles.
- void invalidate(const MachineBasicBlock *MBB);
-
-private:
- // One entry per basic block, indexed by block number.
- SmallVector<FixedBlockInfo, 4> BlockInfo;
-
- // One ensemble per strategy.
- Ensemble* Ensembles[TS_NumStrategies];
-};
-
-inline raw_ostream &operator<<(raw_ostream &OS,
- const MachineTraceMetrics::Trace &Tr) {
- Tr.print(OS);
- return OS;
-}
-
-inline raw_ostream &operator<<(raw_ostream &OS,
- const MachineTraceMetrics::Ensemble &En) {
- En.print(OS);
- return OS;
-}
-} // end namespace llvm
-
-#endif
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 4daa211..5584708 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -39,9 +40,16 @@ DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
cl::Hidden, cl::desc("Disable critical edge splitting "
"during PHI elimination"));
+static cl::opt<bool>
+SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false),
+ cl::Hidden, cl::desc("Split all critical edges during "
+ "PHI elimination"));
+
namespace {
class PHIElimination : public MachineFunctionPass {
MachineRegisterInfo *MRI; // Machine register information
+ LiveVariables *LV;
+ LiveIntervals *LIS;
public:
static char ID; // Pass identification, replacement for typeid
@@ -57,8 +65,8 @@ namespace {
/// in predecessor basic blocks.
///
bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
- void LowerAtomicPHINode(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator AfterPHIsIt);
+ void LowerPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt);
/// analyzePHINodes - Gather information about the PHI nodes in
/// here. In particular, we want to map the number of uses of a virtual
@@ -70,7 +78,12 @@ namespace {
/// Split critical edges where necessary for good coalescer performance.
bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
- LiveVariables &LV, MachineLoopInfo *MLI);
+ MachineLoopInfo *MLI);
+
+ // These functions are temporary abstractions around LiveVariables and
+ // LiveIntervals, so they can go away when LiveVariables does.
+ bool isLiveIn(unsigned Reg, MachineBasicBlock *MBB);
+ bool isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB);
typedef std::pair<unsigned, unsigned> BBVRegPair;
typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
@@ -87,7 +100,7 @@ namespace {
};
}
-STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumLowered, "Number of phis lowered");
STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
STATISTIC(NumReused, "Number of reused lowered phis");
@@ -103,6 +116,8 @@ INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination",
void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -110,19 +125,20 @@ void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
bool Changed = false;
// This pass takes the function out of SSA form.
MRI->leaveSSA();
- // Split critical edges to help the coalescer
- if (!DisableEdgeSplitting) {
- if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) {
- MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- Changed |= SplitPHIEdges(MF, *I, *LV, MLI);
- }
+ // Split critical edges to help the coalescer. This does not yet support
+ // updating LiveIntervals, so we disable it.
+ if (!DisableEdgeSplitting && (LV || LIS)) {
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= SplitPHIEdges(MF, *I, MLI);
}
// Populate VRegPHIUseCount
@@ -137,14 +153,20 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
E = ImpDefs.end(); I != E; ++I) {
MachineInstr *DefMI = *I;
unsigned DefReg = DefMI->getOperand(0).getReg();
- if (MRI->use_nodbg_empty(DefReg))
+ if (MRI->use_nodbg_empty(DefReg)) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(DefMI);
DefMI->eraseFromParent();
+ }
}
// Clean up the lowered PHI instructions.
for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end();
- I != E; ++I)
+ I != E; ++I) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(I->first);
MF.DeleteMachineInstr(I->first);
+ }
LoweredPHIs.clear();
ImpDefs.clear();
@@ -166,7 +188,7 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin());
while (MBB.front().isPHI())
- LowerAtomicPHINode(MBB, AfterPHIsIt);
+ LowerPHINode(MBB, AfterPHIsIt);
return true;
}
@@ -193,15 +215,11 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
}
-/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
-/// under the assumption that it needs to be lowered in a way that supports
-/// atomic execution of PHIs. This lowering method is always correct all of the
-/// time.
+/// LowerPHINode - Lower the PHI node at the top of the specified block,
///
-void PHIElimination::LowerAtomicPHINode(
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator AfterPHIsIt) {
- ++NumAtomic;
+void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt) {
+ ++NumLowered;
// Unlink the PHI node from the basic block, but don't delete the PHI yet.
MachineInstr *MPhi = MBB.remove(MBB.begin());
@@ -244,7 +262,6 @@ void PHIElimination::LowerAtomicPHINode(
}
// Update live variable information if there is any.
- LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
if (LV) {
MachineInstr *PHICopy = prior(AfterPHIsIt);
@@ -283,6 +300,48 @@ void PHIElimination::LowerAtomicPHINode(
}
}
+ // Update LiveIntervals for the new copy or implicit def.
+ if (LIS) {
+ MachineInstr *NewInstr = prior(AfterPHIsIt);
+ SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr);
+
+ SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);
+ if (IncomingReg) {
+ // Add the region from the beginning of MBB to the copy instruction to
+ // IncomingReg's live interval.
+ LiveInterval &IncomingLI = LIS->getOrCreateInterval(IncomingReg);
+ VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex);
+ if (!IncomingVNI)
+ IncomingVNI = IncomingLI.getNextValue(MBBStartIndex,
+ LIS->getVNInfoAllocator());
+ IncomingLI.addRange(LiveRange(MBBStartIndex,
+ DestCopyIndex.getRegSlot(),
+ IncomingVNI));
+ }
+
+ LiveInterval &DestLI = LIS->getInterval(DestReg);
+ assert(DestLI.begin() != DestLI.end() &&
+ "PHIs should have nonempty LiveIntervals.");
+ if (DestLI.endIndex().isDead()) {
+ // A dead PHI's live range begins and ends at the start of the MBB, but
+ // the lowered copy, which will still be dead, needs to begin and end at
+ // the copy instruction.
+ VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex);
+ assert(OrigDestVNI && "PHI destination should be live at block entry.");
+ DestLI.removeRange(MBBStartIndex, MBBStartIndex.getDeadSlot());
+ DestLI.createDeadDef(DestCopyIndex.getRegSlot(),
+ LIS->getVNInfoAllocator());
+ DestLI.removeValNo(OrigDestVNI);
+ } else {
+ // Otherwise, remove the region from the beginning of MBB to the copy
+ // instruction from DestReg's live interval.
+ DestLI.removeRange(MBBStartIndex, DestCopyIndex.getRegSlot());
+ VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
+ assert(DestVNI && "PHI destination should be live at its definition.");
+ DestVNI->def = DestCopyIndex.getRegSlot();
+ }
+ }
+
// Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
--VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
@@ -315,45 +374,44 @@ void PHIElimination::LowerAtomicPHINode(
findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
// Insert the copy.
+ MachineInstr *NewSrcInstr = 0;
if (!reusedIncoming && IncomingReg) {
if (SrcUndef) {
// The source register is undefined, so there is no need for a real
// COPY, but we still need to ensure joint dominance by defs.
// Insert an IMPLICIT_DEF instruction.
- BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
- TII->get(TargetOpcode::IMPLICIT_DEF), IncomingReg);
+ NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF),
+ IncomingReg);
// Clean up the old implicit-def, if there even was one.
if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg))
if (DefMI->isImplicitDef())
ImpDefs.insert(DefMI);
} else {
- BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
- TII->get(TargetOpcode::COPY), IncomingReg)
- .addReg(SrcReg, 0, SrcSubReg);
+ NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), IncomingReg)
+ .addReg(SrcReg, 0, SrcSubReg);
}
}
- // Now update live variable information if we have it. Otherwise we're done
- if (SrcUndef || !LV) continue;
+ // We only need to update the LiveVariables kill of SrcReg if this was the
+ // last PHI use of SrcReg to be lowered on this CFG edge and it is not live
+ // out of the predecessor. We can also ignore undef sources.
+ if (LV && !SrcUndef &&
+ !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)] &&
+ !LV->isLiveOut(SrcReg, opBlock)) {
+ // We want to be able to insert a kill of the register if this PHI (aka,
+ // the copy we just inserted) is the last use of the source value. Live
+ // variable analysis conservatively handles this by saying that the value
+ // is live until the end of the block the PHI entry lives in. If the value
+ // really is dead at the PHI copy, there will be no successor blocks which
+ // have the value live-in.
+
+ // Okay, if we now know that the value is not live out of the block, we
+ // can add a kill marker in this block saying that it kills the incoming
+ // value!
- // We want to be able to insert a kill of the register if this PHI (aka, the
- // copy we just inserted) is the last use of the source value. Live
- // variable analysis conservatively handles this by saying that the value is
- // live until the end of the block the PHI entry lives in. If the value
- // really is dead at the PHI copy, there will be no successor blocks which
- // have the value live-in.
-
- // Also check to see if this register is in use by another PHI node which
- // has not yet been eliminated. If so, it will be killed at an appropriate
- // point later.
-
- // Is it used by any PHI instructions in this block?
- bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)];
-
- // Okay, if we now know that the value is not live out of the block, we can
- // add a kill marker in this block saying that it kills the incoming value!
- if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) {
// In our final twist, we have to decide which instruction kills the
// register. In most cases this is the copy, however, terminator
// instructions at the end of the block may also use the value. In this
@@ -394,11 +452,74 @@ void PHIElimination::LowerAtomicPHINode(
unsigned opBlockNum = opBlock.getNumber();
LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
}
+
+ if (LIS) {
+ if (NewSrcInstr) {
+ LIS->InsertMachineInstrInMaps(NewSrcInstr);
+ LIS->addLiveRangeToEndOfBlock(IncomingReg, NewSrcInstr);
+ }
+
+ if (!SrcUndef &&
+ !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]) {
+ LiveInterval &SrcLI = LIS->getInterval(SrcReg);
+
+ bool isLiveOut = false;
+ for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
+ SE = opBlock.succ_end(); SI != SE; ++SI) {
+ SlotIndex startIdx = LIS->getMBBStartIdx(*SI);
+ VNInfo *VNI = SrcLI.getVNInfoAt(startIdx);
+
+ // Definitions by other PHIs are not truly live-in for our purposes.
+ if (VNI && VNI->def != startIdx) {
+ isLiveOut = true;
+ break;
+ }
+ }
+
+ if (!isLiveOut) {
+ MachineBasicBlock::iterator KillInst = opBlock.end();
+ MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
+ for (MachineBasicBlock::iterator Term = FirstTerm;
+ Term != opBlock.end(); ++Term) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+ }
+
+ if (KillInst == opBlock.end()) {
+ // No terminator uses the register.
+
+ if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't just insert a copy.
+ KillInst = FirstTerm;
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
+ break;
+ }
+ } else {
+ // We just inserted this copy.
+ KillInst = prior(InsertPos);
+ }
+ }
+ assert(KillInst->readsRegister(SrcReg) &&
+ "Cannot find kill instruction");
+
+ SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst);
+ SrcLI.removeRange(LastUseIndex.getRegSlot(),
+ LIS->getMBBEndIdx(&opBlock));
+ }
+ }
+ }
}
// Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
- if (reusedIncoming || !IncomingReg)
+ if (reusedIncoming || !IncomingReg) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MPhi);
MF.DeleteMachineInstr(MPhi);
+ }
}
/// analyzePHINodes - Gather information about the PHI nodes in here. In
@@ -418,7 +539,6 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
MachineBasicBlock &MBB,
- LiveVariables &LV,
MachineLoopInfo *MLI) {
if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
return false; // Quick exit for basic blocks without PHIs.
@@ -438,10 +558,10 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
// Avoid splitting backedges of loops. It would introduce small
// out-of-line blocks into the loop which is very bad for code placement.
- if (PreMBB == &MBB)
+ if (PreMBB == &MBB && !SplitAllCriticalEdges)
continue;
const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0;
- if (IsLoopHeader && PreLoop == CurLoop)
+ if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges)
continue;
// LV doesn't consider a phi use live-out, so isLiveOut only returns true
@@ -450,7 +570,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
// there is a risk it may not be coalesced away.
//
// If the copy would be a kill, there is no need to split the edge.
- if (!LV.isLiveOut(Reg, *PreMBB))
+ if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges)
continue;
DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
@@ -465,7 +585,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
// is likely to be left after coalescing. If we are looking at a loop
// exiting edge, split it so we won't insert code in the loop, otherwise
// don't bother.
- bool ShouldSplit = !LV.isLiveIn(Reg, MBB);
+ bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges;
// Check for a loop exiting edge.
if (!ShouldSplit && CurLoop != PreLoop) {
@@ -492,3 +612,33 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
}
return Changed;
}
+
+bool PHIElimination::isLiveIn(unsigned Reg, MachineBasicBlock *MBB) {
+ assert((LV || LIS) &&
+ "isLiveIn() requires either LiveVariables or LiveIntervals");
+ if (LIS)
+ return LIS->isLiveInToMBB(LIS->getInterval(Reg), MBB);
+ else
+ return LV->isLiveIn(Reg, *MBB);
+}
+
+bool PHIElimination::isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB) {
+ assert((LV || LIS) &&
+ "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals");
+ // LiveVariables considers uses in PHIs to be in the predecessor basic block,
+ // so that a register used only in a PHI is not live out of the block. In
+ // contrast, LiveIntervals considers uses in PHIs to be on the edge rather than
+ // in the predecessor basic block, so that a register used only in a PHI is live
+ // out of the block.
+ if (LIS) {
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ if (LI.liveAt(LIS->getMBBStartIdx(*SI)))
+ return true;
+ }
+ return false;
+ } else {
+ return LV->isLiveOut(Reg, *MBB);
+ }
+}
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 2a135bc..b79f9f9 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -89,7 +89,7 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
cl::desc("Print machine instrs"),
cl::value_desc("pass-name"), cl::init("option-unspecified"));
-// Experimental option to run live inteerval analysis early.
+// Experimental option to run live interval analysis early.
static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
cl::desc("Run live interval analysis earlier in the pipeline"));
@@ -238,9 +238,6 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
substitutePass(&PostRAMachineLICMID, &MachineLICMID);
- // Disable early if-conversion. Targets that are ready can enable it.
- disablePass(&EarlyIfConverterID);
-
// Temporarily disable experimental passes.
const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
if (!ST.enableMachineScheduler())
@@ -551,7 +548,12 @@ void TargetPassConfig::addMachineSSAOptimization() {
addPass(&DeadMachineInstructionElimID);
printAndVerify("After codegen DCE pass");
- addPass(&EarlyIfConverterID);
+ // Allow targets to insert passes that improve instruction level parallelism,
+ // like if-conversion. Such passes will typically need dominator trees and
+ // loop info, just like LICM and CSE below.
+ if (addILPOpts())
+ printAndVerify("After ILP optimizations");
+
addPass(&MachineLICMID);
addPass(&MachineCSEID);
addPass(&MachineSinkingID);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 488f1d4..53fe273 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -418,11 +418,11 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
// Start with no live registers.
LiveRegs.reset();
- // Determine the live-out physregs for this block.
- if (!BB->empty() && BB->back().isReturn()) {
- // In a return block, examine the function live-out regs.
- for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
- E = MRI.liveout_end(); I != E; ++I) {
+ // Examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
LiveRegs.set(Reg);
// Repeat, for all subregs.
@@ -430,20 +430,6 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
LiveRegs.set(*SubRegs);
}
}
- else {
- // In a non-return block, examine the live-in regs of all successors.
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
- SE = BB->succ_end(); SI != SE; ++SI) {
- for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
- E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- LiveRegs.set(Reg);
- // Repeat, for all subregs.
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- LiveRegs.set(*SubRegs);
- }
- }
- }
}
bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 1d0e71e..b18d52d 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -139,7 +139,6 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
/// variables for the function's frame information and eliminate call frame
/// pseudo instructions.
void PEI::calculateCallsInformation(MachineFunction &Fn) {
- const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
MachineFrameInfo *MFI = Fn.getFrameInfo();
@@ -186,7 +185,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
// here. The sub/add sp instruction pairs are still inserted, but we don't
// need to track the SP adjustment for frame index elimination.
if (TFI->canSimplifyCallFramePseudos(Fn))
- RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+ TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
}
}
@@ -693,6 +692,14 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
// space in small chunks instead of one large contiguous block.
if (Fn.getTarget().Options.EnableSegmentedStacks)
TFI.adjustForSegmentedStacks(Fn);
+
+ // Emit additional code that is required to explicitly handle the stack in
+ // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
+ // approach is rather similar to that of Segmented Stacks, but it uses a
+ // different conditional check and another BIF for allocating more stack
+ // space.
+ if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
+ TFI.adjustForHiPEPrologue(Fn);
}
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
@@ -739,7 +746,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
MachineBasicBlock::iterator PrevI = BB->end();
if (I != BB->begin()) PrevI = prior(I);
- TRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
+ TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
// Visit the instructions created by eliminateCallFramePseudoInstr().
if (PrevI == BB->end())
@@ -751,34 +758,36 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
MachineInstr *MI = I;
bool DoIncr = true;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
- if (MI->getOperand(i).isFI()) {
- // Some instructions (e.g. inline asm instructions) can have
- // multiple frame indices and/or cause eliminateFrameIndex
- // to insert more than one instruction. We need the register
- // scavenger to go through all of these instructions so that
- // it can update its register information. We keep the
- // iterator at the point before insertion so that we can
- // revisit them in full.
- bool AtBeginning = (I == BB->begin());
- if (!AtBeginning) --I;
-
- // If this instruction has a FrameIndex operand, we need to
- // use that target machine register info object to eliminate
- // it.
- TRI.eliminateFrameIndex(MI, SPAdj,
- FrameIndexVirtualScavenging ? NULL : RS);
-
- // Reset the iterator if we were at the beginning of the BB.
- if (AtBeginning) {
- I = BB->begin();
- DoIncr = false;
- }
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (!MI->getOperand(i).isFI())
+ continue;
- MI = 0;
- break;
+ // Some instructions (e.g. inline asm instructions) can have
+ // multiple frame indices and/or cause eliminateFrameIndex
+ // to insert more than one instruction. We need the register
+ // scavenger to go through all of these instructions so that
+ // it can update its register information. We keep the
+ // iterator at the point before insertion so that we can
+ // revisit them in full.
+ bool AtBeginning = (I == BB->begin());
+ if (!AtBeginning) --I;
+
+ // If this instruction has a FrameIndex operand, we need to
+ // use that target machine register info object to eliminate
+ // it.
+ TRI.eliminateFrameIndex(MI, SPAdj, i,
+ FrameIndexVirtualScavenging ? NULL : RS);
+
+ // Reset the iterator if we were at the beginning of the BB.
+ if (AtBeginning) {
+ I = BB->begin();
+ DoIncr = false;
}
+ MI = 0;
+ break;
+ }
+
if (DoIncr && I != BB->end()) ++I;
// Update register states.
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 8d849dc..bb9c05c 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -113,12 +113,27 @@ namespace {
// PhysRegState - One of the RegState enums, or a virtreg.
std::vector<unsigned> PhysRegState;
+ // Set of register units.
typedef SparseSet<unsigned> UsedInInstrSet;
- // UsedInInstr - Set of physregs that are used in the current instruction,
- // and so cannot be allocated.
+ // Set of register units that are used in the current instruction, and so
+ // cannot be allocated.
UsedInInstrSet UsedInInstr;
+ // Mark a physreg as used in this instruction.
+ void markRegUsedInInstr(unsigned PhysReg) {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ UsedInInstr.insert(*Units);
+ }
+
+ // Check if a physreg or any of its aliases are used in this instruction.
+ bool isRegUsedInInstr(unsigned PhysReg) const {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ if (UsedInInstr.count(*Units))
+ return true;
+ return false;
+ }
+
// SkippedInstrs - Descriptors of instructions whose clobber list was
// ignored because all registers were spilled. It is still necessary to
// mark all the clobbered registers as used by the function.
@@ -177,7 +192,6 @@ namespace {
unsigned VirtReg, unsigned Hint);
void spillAll(MachineBasicBlock::iterator MI);
bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
- void addRetOperands(MachineBasicBlock *MBB);
};
char RAFast::ID = 0;
}
@@ -334,7 +348,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
unsigned PhysReg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
"Bad usePhysReg operand");
-
+ markRegUsedInInstr(PhysReg);
switch (PhysRegState[PhysReg]) {
case regDisabled:
break;
@@ -342,7 +356,6 @@ void RAFast::usePhysReg(MachineOperand &MO) {
PhysRegState[PhysReg] = regFree;
// Fall through
case regFree:
- UsedInInstr.insert(PhysReg);
MO.setIsKill();
return;
default:
@@ -362,13 +375,11 @@ void RAFast::usePhysReg(MachineOperand &MO) {
"Instruction is not using a subregister of a reserved register");
// Leave the superregister in the working set.
PhysRegState[Alias] = regFree;
- UsedInInstr.insert(Alias);
MO.getParent()->addRegisterKilled(Alias, TRI, true);
return;
case regFree:
if (TRI->isSuperRegister(PhysReg, Alias)) {
// Leave the superregister in the working set.
- UsedInInstr.insert(Alias);
MO.getParent()->addRegisterKilled(Alias, TRI, true);
return;
}
@@ -382,7 +393,6 @@ void RAFast::usePhysReg(MachineOperand &MO) {
// All aliases are disabled, bring register into working set.
PhysRegState[PhysReg] = regFree;
- UsedInInstr.insert(PhysReg);
MO.setIsKill();
}
@@ -391,7 +401,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
/// reserved instead of allocated.
void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
RegState NewState) {
- UsedInInstr.insert(PhysReg);
+ markRegUsedInInstr(PhysReg);
switch (unsigned VirtReg = PhysRegState[PhysReg]) {
case regDisabled:
break;
@@ -431,7 +441,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
// can be allocated directly.
// Returns spillImpossible when PhysReg or an alias can't be spilled.
unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
- if (UsedInInstr.count(PhysReg)) {
+ if (isRegUsedInInstr(PhysReg)) {
DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
return spillImpossible;
}
@@ -456,8 +466,6 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
unsigned Cost = 0;
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
unsigned Alias = *AI;
- if (UsedInInstr.count(Alias))
- return spillImpossible;
switch (unsigned VirtReg = PhysRegState[Alias]) {
case regDisabled:
break;
@@ -532,7 +540,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
// First try to find a completely free register.
for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
unsigned PhysReg = *I;
- if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) {
+ if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
assignVirtToPhysReg(*LRI, PhysReg);
return LRI;
}
@@ -598,7 +606,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
LRI->LastUse = MI;
LRI->LastOpNum = OpNum;
LRI->Dirty = true;
- UsedInInstr.insert(LRI->PhysReg);
+ markRegUsedInInstr(LRI->PhysReg);
return LRI;
}
@@ -648,7 +656,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
assert(LRI->PhysReg && "Register not assigned");
LRI->LastUse = MI;
LRI->LastOpNum = OpNum;
- UsedInInstr.insert(LRI->PhysReg);
+ markRegUsedInInstr(LRI->PhysReg);
return LRI;
}
@@ -709,8 +717,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
if (!MO.isReg() || !MO.isDef()) continue;
unsigned Reg = MO.getReg();
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ markRegUsedInInstr(Reg);
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- UsedInInstr.insert(*AI);
if (ThroughRegs.count(PhysRegState[*AI]))
definePhysReg(MI, *AI, regFree);
}
@@ -766,65 +774,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
<< " as used in instr\n");
- UsedInInstr.insert(Reg);
+ markRegUsedInInstr(Reg);
}
// Also mark PartialDefs as used to avoid reallocation.
for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
- UsedInInstr.insert(PartialDefs[i]);
-}
-
-/// addRetOperand - ensure that a return instruction has an operand for each
-/// value live out of the function.
-///
-/// Things marked both call and return are tail calls; do not do this for them.
-/// The tail callee need not take the same registers as input that it produces
-/// as output, and there are dependencies for its input registers elsewhere.
-///
-/// FIXME: This should be done as part of instruction selection, and this helper
-/// should be deleted. Until then, we use custom logic here to create the proper
-/// operand under all circumstances. We can't use addRegisterKilled because that
-/// doesn't make sense for undefined values. We can't simply avoid calling it
-/// for undefined values, because we must ensure that the operand always exists.
-void RAFast::addRetOperands(MachineBasicBlock *MBB) {
- if (MBB->empty() || !MBB->back().isReturn() || MBB->back().isCall())
- return;
-
- MachineInstr *MI = &MBB->back();
-
- for (MachineRegisterInfo::liveout_iterator
- I = MBB->getParent()->getRegInfo().liveout_begin(),
- E = MBB->getParent()->getRegInfo().liveout_end(); I != E; ++I) {
- unsigned Reg = *I;
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
- "Cannot have a live-out virtual register.");
-
- bool hasDef = PhysRegState[Reg] == regReserved;
-
- // Check if this register already has an operand.
- bool Found = false;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isUse())
- continue;
-
- unsigned OperReg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(OperReg))
- continue;
-
- if (OperReg == Reg || TRI->isSuperRegister(OperReg, Reg)) {
- // If the ret already has an operand for this physreg or a superset,
- // don't duplicate it. Set the kill flag if the value is defined.
- if (hasDef && !MO.isKill())
- MO.setIsKill();
- Found = true;
- break;
- }
- }
- if (!Found)
- MachineInstrBuilder(*MF, MI)
- .addReg(Reg, llvm::RegState::Implicit | getKillRegState(hasDef));
- }
+ markRegUsedInInstr(PartialDefs[i]);
}
void RAFast::AllocateBasicBlock() {
@@ -1023,7 +978,7 @@ void RAFast::AllocateBasicBlock() {
for (UsedInInstrSet::iterator
I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
- MRI->setPhysRegUsed(*I);
+ MRI->setRegUnitUsed(*I);
// Track registers defined by instruction - early clobbers and tied uses at
// this point.
@@ -1036,8 +991,7 @@ void RAFast::AllocateBasicBlock() {
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
// Look for physreg defs and tied uses.
if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- UsedInInstr.insert(*AI);
+ markRegUsedInInstr(Reg);
}
}
@@ -1089,7 +1043,7 @@ void RAFast::AllocateBasicBlock() {
for (UsedInInstrSet::iterator
I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
- MRI->setPhysRegUsed(*I);
+ MRI->setRegUnitUsed(*I);
if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
DEBUG(dbgs() << "-- coalescing: " << *MI);
@@ -1109,9 +1063,6 @@ void RAFast::AllocateBasicBlock() {
MBB->erase(Coalesced[i]);
NumCopies += Coalesced.size();
- // addRetOperands must run after we've seen all defs in this block.
- addRetOperands(MBB);
-
DEBUG(MBB->dump());
}
@@ -1128,7 +1079,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
MRI->freezeReservedRegs(Fn);
RegClassInfo.runOnMachineFunction(Fn);
UsedInInstr.clear();
- UsedInInstr.setUniverse(TRI->getNumRegs());
+ UsedInInstr.setUniverse(TRI->getNumRegUnits());
assert(!MRI->isSSA() && "regalloc requires leaving SSA");
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 1884452..6344a73 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -632,16 +632,33 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
// Keep track of the cheapest interference seen so far.
EvictionCost BestCost(~0u);
unsigned BestPhys = 0;
+ unsigned OrderLimit = Order.getOrder().size();
// When we are just looking for a reduced cost per use, don't break any
// hints, and only evict smaller spill weights.
if (CostPerUseLimit < ~0u) {
BestCost.BrokenHints = 0;
BestCost.MaxWeight = VirtReg.weight;
+
+ // Check of any registers in RC are below CostPerUseLimit.
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);
+ unsigned MinCost = RegClassInfo.getMinCost(RC);
+ if (MinCost >= CostPerUseLimit) {
+ DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost
+ << ", no cheaper registers to be found.\n");
+ return 0;
+ }
+
+ // It is normal for register classes to have a long tail of registers with
+ // the same cost. We don't need to look at them if they're too expensive.
+ if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) {
+ OrderLimit = RegClassInfo.getLastCostChange(RC);
+ DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n");
+ }
}
Order.rewind();
- while (unsigned PhysReg = Order.next()) {
+ while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) {
if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
continue;
// The first use of a callee-saved register in a function has cost 1.
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 078a0df..87382d8 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -83,6 +83,9 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
unsigned N = 0;
SmallVector<MCPhysReg, 16> CSRAlias;
+ unsigned MinCost = 0xff;
+ unsigned LastCost = ~0u;
+ unsigned LastCostChange = 0;
// FIXME: Once targets reserve registers instead of removing them from the
// allocation order, we can simply use begin/end here.
@@ -92,17 +95,31 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
// Remove reserved registers from the allocation order.
if (Reserved.test(PhysReg))
continue;
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+ MinCost = std::min(MinCost, Cost);
+
if (CSRNum[PhysReg])
// PhysReg aliases a CSR, save it for later.
CSRAlias.push_back(PhysReg);
- else
+ else {
+ if (Cost != LastCost)
+ LastCostChange = N;
RCI.Order[N++] = PhysReg;
+ LastCost = Cost;
+ }
}
RCI.NumRegs = N + CSRAlias.size();
assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
// CSR aliases go after the volatile registers, preserve the target's order.
- std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]);
+ for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) {
+ unsigned PhysReg = CSRAlias[i];
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+ if (Cost != LastCost)
+ LastCostChange = N;
+ RCI.Order[N++] = PhysReg;
+ LastCost = Cost;
+ }
// Register allocator stress test. Clip register class to N registers.
if (StressRA && RCI.NumRegs > StressRA)
@@ -113,6 +130,9 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
RCI.ProperSubClass = true;
+ RCI.MinCost = uint8_t(MinCost);
+ RCI.LastCostChange = LastCostChange;
+
DEBUG({
dbgs() << "AllocationOrder(" << RC->getName() << ") = [";
for (unsigned I = 0; I != RCI.NumRegs; ++I)
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 36d8101..e2488ad 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -15,7 +15,6 @@
#define DEBUG_TYPE "regalloc"
#include "RegisterCoalescer.h"
-#include "LiveDebugVariables.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -84,7 +83,6 @@ namespace {
const TargetRegisterInfo* TRI;
const TargetInstrInfo* TII;
LiveIntervals *LIS;
- LiveDebugVariables *LDV;
const MachineLoopInfo* Loops;
AliasAnalysis *AA;
RegisterClassInfo RegClassInfo;
@@ -169,8 +167,7 @@ namespace {
/// reMaterializeTrivialDef - If the source of a copy is defined by a
/// trivial computation, replace the copy by rematerialize the definition.
- bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
- MachineInstr *CopyMI);
+ bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI);
/// canJoinPhys - Return true if a physreg copy should be joined.
bool canJoinPhys(const CoalescerPair &CP);
@@ -208,7 +205,6 @@ char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
@@ -394,8 +390,6 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
- AU.addRequired<LiveDebugVariables>();
- AU.addPreserved<LiveDebugVariables>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
@@ -737,9 +731,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial
/// computation, replace the copy by rematerialize the definition.
-bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt,
- unsigned DstReg,
+bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
MachineInstr *CopyMI) {
+ unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
+ unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ return false;
+
+ LiveInterval &SrcInt = LIS->getInterval(SrcReg);
SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
assert(SrcLR != SrcInt.end() && "Live range not found!");
@@ -760,13 +759,17 @@ bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt,
const MCInstrDesc &MCID = DefMI->getDesc();
if (MCID.getNumDefs() != 1)
return false;
+ // Only support subregister destinations when the def is read-undef.
+ MachineOperand &DstOperand = CopyMI->getOperand(0);
+ if (DstOperand.getSubReg() && !DstOperand.isUndef())
+ return false;
if (!DefMI->isImplicitDef()) {
// Make sure the copy destination register class fits the instruction
// definition register class. The mismatch can happen as a result of earlier
// extract_subreg, insert_subreg, subreg_to_reg coalescing.
const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF);
if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
- if (MRI->getRegClass(DstReg) != RC)
+ if (!MRI->constrainRegClass(DstReg, RC))
return false;
} else if (!RC->contains(DstReg))
return false;
@@ -778,6 +781,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt,
TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI);
MachineInstr *NewMI = prior(MII);
+ // The original DefMI may have been a subregister def, but the full register
+ // class of its destination matches the destination of CopyMI, and CopyMI is
+ // either a full register def or is read-undef. Therefore we can clear the
+ // subregister index on the rematerialized instruction.
+ NewMI->getOperand(0).setSubReg(0);
+
// NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
// We need to remember these so we can add intervals once we insert
// NewMI into SlotIndexes.
@@ -883,9 +892,6 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg);
- // Update LiveDebugVariables.
- LDV->renameRegister(SrcReg, DstReg, SubIdx);
-
SmallPtrSet<MachineInstr*, 8> Visited;
for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
MachineInstr *UseMI = I.skipInstruction();) {
@@ -1010,9 +1016,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
if (!canJoinPhys(CP)) {
// Before giving up coalescing, if definition of source is defined by
// trivial computation, try rematerializing it.
- if (!CP.isFlipped() &&
- reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()),
- CP.getDstReg(), CopyMI))
+ if (reMaterializeTrivialDef(CP, CopyMI))
return true;
return false;
}
@@ -1045,9 +1049,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
// If definition of source is defined by trivial computation, try
// rematerializing it.
- if (!CP.isFlipped() &&
- reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()),
- CP.getDstReg(), CopyMI))
+ if (reMaterializeTrivialDef(CP, CopyMI))
return true;
// If we can eliminate the copy without merging the live ranges, do so now.
@@ -2136,7 +2138,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
TRI = TM->getRegisterInfo();
TII = TM->getInstrInfo();
LIS = &getAnalysis<LiveIntervals>();
- LDV = &getAnalysis<LiveDebugVariables>();
AA = &getAnalysis<AliasAnalysis>();
Loops = &getAnalysis<MachineLoopInfo>();
@@ -2182,7 +2183,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
}
DEBUG(dump());
- DEBUG(LDV->dump());
if (VerifyCoalescing)
MF->verify(this, "After register coalescing");
return true;
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 62e95aa..97f22e1 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -357,15 +357,14 @@ protected:
/// Collect physical and virtual register operands.
static void collectOperands(const MachineInstr *MI,
RegisterOperands &RegOpers) {
- for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
+ for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
RegOpers.collect(*OperI);
// Remove redundant physreg dead defs.
- for (unsigned i = RegOpers.DeadDefs.size(); i > 0; --i) {
- unsigned Reg = RegOpers.DeadDefs[i-1];
- if (containsReg(RegOpers.Defs, Reg))
- RegOpers.DeadDefs.erase(&RegOpers.DeadDefs[i-1]);
- }
+ SmallVectorImpl<unsigned>::iterator I =
+ std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(),
+ std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs));
+ RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end());
}
/// Force liveness of registers.
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 88f67da..6da901f 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -316,6 +316,16 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
return Survivor;
}
+static unsigned getFrameIndexOperandNum(MachineInstr *MI) {
+ unsigned i = 0;
+ while (!MI->getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI->getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+ return i;
+}
+
unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
MachineBasicBlock::iterator I,
int SPAdj) {
@@ -364,12 +374,16 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
"Cannot scavenge register without an emergency spill slot!");
TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
MachineBasicBlock::iterator II = prior(I);
- TRI->eliminateFrameIndex(II, SPAdj, this);
+
+ unsigned FIOperandNum = getFrameIndexOperandNum(II);
+ TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
// Restore the scavenged register before its use (or first terminator).
TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
II = prior(UseMI);
- TRI->eliminateFrameIndex(II, SPAdj, this);
+
+ FIOperandNum = getFrameIndexOperandNum(II);
+ TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
}
ScavengeRestore = prior(UseMI);
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index e639c55..45b4f68 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -135,20 +135,14 @@ void SUnit::removePred(const SDep &D) {
for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
I != E; ++I)
if (*I == D) {
- bool FoundSucc = false;
// Find the corresponding successor in N.
SDep P = D;
P.setSUnit(this);
SUnit *N = D.getSUnit();
- for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(),
- EE = N->Succs.end(); II != EE; ++II)
- if (*II == P) {
- FoundSucc = true;
- N->Succs.erase(II);
- break;
- }
- assert(FoundSucc && "Mismatching preds / succs lists!");
- (void)FoundSucc;
+ SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(),
+ N->Succs.end(), P);
+ assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
+ N->Succs.erase(Succ);
Preds.erase(I);
// Update the bookkeeping.
if (P.getKind() == SDep::Data) {
@@ -301,6 +295,21 @@ void SUnit::ComputeHeight() {
} while (!WorkList.empty());
}
+void SUnit::biasCriticalPath() {
+ if (NumPreds < 2)
+ return;
+
+ SUnit::pred_iterator BestI = Preds.begin();
+ unsigned MaxDepth = BestI->getSUnit()->getDepth();
+ for (SUnit::pred_iterator
+ I = llvm::next(BestI), E = Preds.end(); I != E; ++I) {
+ if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth)
+ BestI = I;
+ }
+ if (BestI != Preds.begin())
+ std::swap(*Preds.begin(), *BestI);
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
/// a group of nodes flagged together.
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 662fc0e..71e7a21 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -168,20 +168,6 @@ void ScheduleDAGInstrs::finishBlock() {
BB = 0;
}
-/// Initialize the map with the number of registers.
-void Reg2SUnitsMap::setRegLimit(unsigned Limit) {
- PhysRegSet.setUniverse(Limit);
- SUnits.resize(Limit);
-}
-
-/// Clear the map without deallocating storage.
-void Reg2SUnitsMap::clear() {
- for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) {
- SUnits[*I].clear();
- }
- PhysRegSet.clear();
-}
-
/// Initialize the DAG and common scheduler state for the current scheduling
/// region. This does not actually create the DAG, only clears it. The
/// scheduling driver may call BuildSchedGraph multiple times per scheduling
@@ -228,7 +214,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
if (Reg == 0) continue;
if (TRI->isPhysicalRegister(Reg))
- Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
else {
assert(!IsPostRA && "Virtual register encountered after regalloc.");
if (MO.readsReg()) // ignore undef operands
@@ -245,7 +231,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
if (!Uses.contains(Reg))
- Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
}
}
}
@@ -263,15 +249,14 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
Alias.isValid(); ++Alias) {
if (!Uses.contains(*Alias))
continue;
- std::vector<PhysRegSUOper> &UseList = Uses[*Alias];
- for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
- SUnit *UseSU = UseList[i].SU;
+ for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
+ SUnit *UseSU = I->SU;
if (UseSU == SU)
continue;
// Adjust the dependence latency using operand def/use information,
// then allow the target to perform its own adjustments.
- int UseOp = UseList[i].OpIdx;
+ int UseOp = I->OpIdx;
MachineInstr *RegUse = 0;
SDep Dep;
if (UseOp < 0)
@@ -311,9 +296,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
Alias.isValid(); ++Alias) {
if (!Defs.contains(*Alias))
continue;
- std::vector<PhysRegSUOper> &DefList = Defs[*Alias];
- for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
- SUnit *DefSU = DefList[i].SU;
+ for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
+ SUnit *DefSU = I->SU;
if (DefSU == &ExitSU)
continue;
if (DefSU != SU &&
@@ -337,33 +321,37 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
// Either insert a new Reg2SUnits entry with an empty SUnits list, or
// retrieve the existing SUnits list for this register's uses.
// Push this SUnit on the use list.
- Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx));
+ Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
}
else {
addPhysRegDataDeps(SU, OperIdx);
-
- // Either insert a new Reg2SUnits entry with an empty SUnits list, or
- // retrieve the existing SUnits list for this register's defs.
- std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()];
+ unsigned Reg = MO.getReg();
// clear this register's use list
- if (Uses.contains(MO.getReg()))
- Uses[MO.getReg()].clear();
-
- if (!MO.isDead())
- DefList.clear();
-
- // Calls will not be reordered because of chain dependencies (see
- // below). Since call operands are dead, calls may continue to be added
- // to the DefList making dependence checking quadratic in the size of
- // the block. Instead, we leave only one call at the back of the
- // DefList.
- if (SU->isCall) {
- while (!DefList.empty() && DefList.back().SU->isCall)
- DefList.pop_back();
+ if (Uses.contains(Reg))
+ Uses.eraseAll(Reg);
+
+ if (!MO.isDead()) {
+ Defs.eraseAll(Reg);
+ } else if (SU->isCall) {
+ // Calls will not be reordered because of chain dependencies (see
+ // below). Since call operands are dead, calls may continue to be added
+ // to the DefList making dependence checking quadratic in the size of
+ // the block. Instead, we leave only one call at the back of the
+ // DefList.
+ Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg);
+ Reg2SUnitsMap::iterator B = P.first;
+ Reg2SUnitsMap::iterator I = P.second;
+ for (bool isBegin = I == B; !isBegin; /* empty */) {
+ isBegin = (--I) == B;
+ if (!I->SU->isCall)
+ break;
+ I = Defs.erase(I);
+ }
}
+
// Defs are pushed in the order they are visited and never reordered.
- DefList.push_back(PhysRegSUOper(SU, OperIdx));
+ Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
}
}
@@ -726,8 +714,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
assert(Defs.empty() && Uses.empty() &&
"Only BuildGraph should update Defs/Uses");
- Defs.setRegLimit(TRI->getNumRegs());
- Uses.setRegLimit(TRI->getNumRegs());
+ Defs.setUniverse(TRI->getNumRegs());
+ Uses.setUniverse(TRI->getNumRegs());
assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
// FIXME: Allow SparseSet to reserve space for the creation of virtual
@@ -758,7 +746,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
}
- assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() &&
+ assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) &&
"Cannot schedule terminators or labels!");
SUnit *SU = MISUnitMap[MI];
@@ -1006,7 +994,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
else if (SU == &ExitSU)
oss << "<exit>";
else
- SU->getInstr()->print(oss);
+ SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true);
return oss.str();
}
@@ -1030,58 +1018,95 @@ class SchedDFSImpl {
/// List PredSU, SuccSU pairs that represent data edges between subtrees.
std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
+ struct RootData {
+ unsigned NodeID;
+ unsigned ParentNodeID; // Parent node (member of the parent subtree).
+ unsigned SubInstrCount; // Instr count in this tree only, not children.
+
+ RootData(unsigned id): NodeID(id),
+ ParentNodeID(SchedDFSResult::InvalidSubtreeID),
+ SubInstrCount(0) {}
+
+ unsigned getSparseSetIndex() const { return NodeID; }
+ };
+
+ SparseSet<RootData> RootSet;
+
public:
- SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSData.size()) {}
+ SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
+ RootSet.setUniverse(R.DFSNodeData.size());
+ }
- /// SubtreID is initialized to zero, set to itself to flag the root of a
- /// subtree, set to the parent to indicate an interior node,
- /// then set to a representative subtree ID during finalization.
+ /// Return true if this node been visited by the DFS traversal.
+ ///
+ /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
+ /// ID. Later, SubtreeID is updated but remains valid.
bool isVisited(const SUnit *SU) const {
- return R.DFSData[SU->NodeNum].SubtreeID;
+ return R.DFSNodeData[SU->NodeNum].SubtreeID
+ != SchedDFSResult::InvalidSubtreeID;
}
/// Initialize this node's instruction count. We don't need to flag the node
/// visited until visitPostorder because the DAG cannot have cycles.
void visitPreorder(const SUnit *SU) {
- R.DFSData[SU->NodeNum].InstrCount = SU->getInstr()->isTransient() ? 0 : 1;
+ R.DFSNodeData[SU->NodeNum].InstrCount =
+ SU->getInstr()->isTransient() ? 0 : 1;
}
- /// Mark this node as either the root of a subtree or an interior
- /// node. Increment the parent node's instruction count.
- void visitPostorder(const SUnit *SU, const SDep *PredDep, const SUnit *Parent) {
- R.DFSData[SU->NodeNum].SubtreeID = SU->NodeNum;
-
- // Join the child to its parent if they are connected via data dependence
- // and do not exceed the limit.
- if (!Parent || PredDep->getKind() != SDep::Data)
- return;
-
- unsigned PredCnt = R.DFSData[SU->NodeNum].InstrCount;
- if (PredCnt > R.SubtreeLimit)
- return;
-
- R.DFSData[SU->NodeNum].SubtreeID = Parent->NodeNum;
+ /// Called once for each node after all predecessors are visited. Revisit this
+ /// node's predecessors and potentially join them now that we know the ILP of
+ /// the other predecessors.
+ void visitPostorderNode(const SUnit *SU) {
+ // Mark this node as the root of a subtree. It may be joined with its
+ // successors later.
+ R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
+ RootData RData(SU->NodeNum);
+ RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
+
+ // If any predecessors are still in their own subtree, they either cannot be
+ // joined or are large enough to remain separate. If this parent node's
+ // total instruction count is not greater than a child subtree by at least
+ // the subtree limit, then try to join it now since splitting subtrees is
+ // only useful if multiple high-pressure paths are possible.
+ unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
+ for (SUnit::const_pred_iterator
+ PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
+ if (PI->getKind() != SDep::Data)
+ continue;
+ unsigned PredNum = PI->getSUnit()->NodeNum;
+ if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
+ joinPredSubtree(*PI, SU, /*CheckLimit=*/false);
+
+ // Either link or merge the TreeData entry from the child to the parent.
+ if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
+ // If the predecessor's parent is invalid, this is a tree edge and the
+ // current node is the parent.
+ if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
+ RootSet[PredNum].ParentNodeID = SU->NodeNum;
+ }
+ else if (RootSet.count(PredNum)) {
+ // The predecessor is not a root, but is still in the root set. This
+ // must be the new parent that it was just joined to. Note that
+ // RootSet[PredNum].ParentNodeID may either be invalid or may still be
+ // set to the original parent.
+ RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
+ RootSet.erase(PredNum);
+ }
+ }
+ RootSet[SU->NodeNum] = RData;
+ }
- // Add the recently finished predecessor's bottom-up descendent count.
- R.DFSData[Parent->NodeNum].InstrCount += PredCnt;
- SubtreeClasses.join(Parent->NodeNum, SU->NodeNum);
+ /// Called once for each tree edge after calling visitPostOrderNode on the
+ /// predecessor. Increment the parent node's instruction count and
+ /// preemptively join this subtree to its parent's if it is small enough.
+ void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
+ R.DFSNodeData[Succ->NodeNum].InstrCount
+ += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
+ joinPredSubtree(PredDep, Succ);
}
- /// Determine whether the DFS cross edge should be considered a subtree edge
- /// or a connection between subtrees.
- void visitCross(const SDep &PredDep, const SUnit *Succ) {
- if (PredDep.getKind() == SDep::Data) {
- // If this is a cross edge to a root, join the subtrees. This happens when
- // the root was first reached by a non-data dependence.
- unsigned NodeNum = PredDep.getSUnit()->NodeNum;
- unsigned PredCnt = R.DFSData[NodeNum].InstrCount;
- if (R.DFSData[NodeNum].SubtreeID == NodeNum && PredCnt < R.SubtreeLimit) {
- R.DFSData[NodeNum].SubtreeID = Succ->NodeNum;
- R.DFSData[Succ->NodeNum].InstrCount += PredCnt;
- SubtreeClasses.join(Succ->NodeNum, NodeNum);
- return;
- }
- }
+ /// Add a connection for cross edges.
+ void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
}
@@ -1089,13 +1114,27 @@ public:
/// between trees.
void finalize() {
SubtreeClasses.compress();
+ R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
+ assert(SubtreeClasses.getNumClasses() == RootSet.size()
+ && "number of roots should match trees");
+ for (SparseSet<RootData>::const_iterator
+ RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) {
+ unsigned TreeID = SubtreeClasses[RI->NodeID];
+ if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID)
+ R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID];
+ R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount;
+ // Note that SubInstrCount may be greater than InstrCount if we joined
+ // subtrees across a cross edge. InstrCount will be attributed to the
+ // original parent, while SubInstrCount will be attributed to the joined
+ // parent.
+ }
R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
- for (unsigned Idx = 0, End = R.DFSData.size(); Idx != End; ++Idx) {
- R.DFSData[Idx].SubtreeID = SubtreeClasses[Idx];
+ for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
+ R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
DEBUG(dbgs() << " SU(" << Idx << ") in tree "
- << R.DFSData[Idx].SubtreeID << '\n');
+ << R.DFSNodeData[Idx].SubtreeID << '\n');
}
for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
I = ConnectionPairs.begin(), E = ConnectionPairs.end();
@@ -1111,21 +1150,53 @@ public:
}
protected:
+ /// Join the predecessor subtree with the successor that is its DFS
+ /// parent. Apply some heuristics before joining.
+ bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
+ bool CheckLimit = true) {
+ assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
+
+ // Check if the predecessor is already joined.
+ const SUnit *PredSU = PredDep.getSUnit();
+ unsigned PredNum = PredSU->NodeNum;
+ if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
+ return false;
+
+ // Four is the magic number of successors before a node is considered a
+ // pinch point.
+ unsigned NumDataSucs = 0;
+ for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(),
+ SE = PredSU->Succs.end(); SI != SE; ++SI) {
+ if (SI->getKind() == SDep::Data) {
+ if (++NumDataSucs >= 4)
+ return false;
+ }
+ }
+ if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
+ return false;
+ R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
+ SubtreeClasses.join(Succ->NodeNum, PredNum);
+ return true;
+ }
+
/// Called by finalize() to record a connection between trees.
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
if (!Depth)
return;
- SmallVectorImpl<SchedDFSResult::Connection> &Connections =
- R.SubtreeConnections[FromTree];
- for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
- I = Connections.begin(), E = Connections.end(); I != E; ++I) {
- if (I->TreeID == ToTree) {
- I->Level = std::max(I->Level, Depth);
- return;
+ do {
+ SmallVectorImpl<SchedDFSResult::Connection> &Connections =
+ R.SubtreeConnections[FromTree];
+ for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
+ I = Connections.begin(), E = Connections.end(); I != E; ++I) {
+ if (I->TreeID == ToTree) {
+ I->Level = std::max(I->Level, Depth);
+ return;
+ }
}
- }
- Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
+ Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
+ FromTree = R.DFSTreeData[FromTree].ParentTreeID;
+ } while (FromTree != SchedDFSResult::InvalidSubtreeID);
}
};
} // namespace llvm
@@ -1157,28 +1228,44 @@ public:
};
} // anonymous
+static bool hasDataSucc(const SUnit *SU) {
+ for (SUnit::const_succ_iterator
+ SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) {
+ if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode())
+ return true;
+ }
+ return false;
+}
+
/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
/// search from this root.
-void SchedDFSResult::compute(ArrayRef<SUnit *> Roots) {
+void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
if (!IsBottomUp)
llvm_unreachable("Top-down ILP metric is unimplemnted");
SchedDFSImpl Impl(*this);
- for (ArrayRef<const SUnit*>::const_iterator
- RootI = Roots.begin(), RootE = Roots.end(); RootI != RootE; ++RootI) {
+ for (ArrayRef<SUnit>::const_iterator
+ SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) {
+ const SUnit *SU = &*SI;
+ if (Impl.isVisited(SU) || hasDataSucc(SU))
+ continue;
+
SchedDAGReverseDFS DFS;
- Impl.visitPreorder(*RootI);
- DFS.follow(*RootI);
+ Impl.visitPreorder(SU);
+ DFS.follow(SU);
for (;;) {
// Traverse the leftmost path as far as possible.
while (DFS.getPred() != DFS.getPredEnd()) {
const SDep &PredDep = *DFS.getPred();
DFS.advance();
- // If the pred is already valid, skip it. We may preorder visit a node
- // with InstrCount==0 more than once, but it won't affect heuristics
- // because we don't care about cross edges to leaf copies.
+ // Ignore non-data edges.
+ if (PredDep.getKind() != SDep::Data
+ || PredDep.getSUnit()->isBoundaryNode()) {
+ continue;
+ }
+ // An already visited edge is a cross edge, assuming an acyclic DAG.
if (Impl.isVisited(PredDep.getSUnit())) {
- Impl.visitCross(PredDep, DFS.getCurr());
+ Impl.visitCrossEdge(PredDep, DFS.getCurr());
continue;
}
Impl.visitPreorder(PredDep.getSUnit());
@@ -1187,7 +1274,9 @@ void SchedDFSResult::compute(ArrayRef<SUnit *> Roots) {
// Visit the top of the stack in postorder and backtrack.
const SUnit *Child = DFS.getCurr();
const SDep *PredDep = DFS.backtrack();
- Impl.visitPostorder(Child, PredDep, PredDep ? DFS.getCurr() : 0);
+ Impl.visitPostorderNode(Child);
+ if (PredDep)
+ Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
if (DFS.isComplete())
break;
}
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 6c50913..8ddb3e8 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -41,6 +41,10 @@ namespace llvm {
return true;
}
+ static bool isNodeHidden(const SUnit *Node) {
+ return (Node->NumPreds > 10 || Node->NumSuccs > 10);
+ }
+
static bool hasNodeAddressLabel(const SUnit *Node,
const ScheduleDAG *Graph) {
return true;
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ff00d0d..ec52d7e 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2634,7 +2634,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
if (Result != ISD::SETCC_INVALID &&
(!LegalOperations ||
- TLI.isCondCodeLegal(Result, LL.getSimpleValueType())))
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC,
+ TLI.getSetCCResultType(N0.getSimpleValueType())))))
return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
LL, LR, Result);
}
@@ -3144,7 +3146,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
if (Result != ISD::SETCC_INVALID &&
(!LegalOperations ||
- TLI.isCondCodeLegal(Result, LL.getSimpleValueType())))
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC,
+ TLI.getSetCCResultType(N0.getValueType())))))
return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
LL, LR, Result);
}
@@ -5100,16 +5104,26 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
// If we haven't found a load, we can't narrow it. Don't transform one with
// multiple uses, this would require adding a new load.
- if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() ||
- // Don't change the width of a volatile load.
- cast<LoadSDNode>(N0)->isVolatile())
+ if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
+ return SDValue();
+
+ // Don't change the width of a volatile load.
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ if (LN0->isVolatile())
return SDValue();
// Verify that we are actually reducing a load width here.
- if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits)
+ if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
+ return SDValue();
+
+ // For the transform to be legal, the load must produce only two values
+ // (the value loaded and the chain). Don't transform a pre-increment
+ // load, for example, which produces an extra value. Otherwise the
+ // transformation is not equivalent, and the downstream logic to replace
+ // uses gets things wrong.
+ if (LN0->getNumValues() > 2)
return SDValue();
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT PtrType = N0.getOperand(1).getValueType();
if (PtrType == MVT::Untyped || PtrType.isExtended())
@@ -5153,8 +5167,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
EVT ShImmTy = getShiftAmountTy(Result.getValueType());
if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
ShImmTy = VT;
- Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
- Result, DAG.getConstant(ShLeftAmt, ShImmTy));
+ // If the shift amount is as large as the result size (but, presumably,
+ // no larger than the source) then the useful bits of the result are
+ // zero; we can't simply return the shortened shift, because the result
+ // of that operation is undefined.
+ if (ShLeftAmt >= VT.getSizeInBits())
+ Result = DAG.getConstant(0, VT);
+ else
+ Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
+ Result, DAG.getConstant(ShLeftAmt, ShImmTy));
}
// Return the new loaded value.
@@ -5340,6 +5361,38 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
+ // Fold a series of buildvector, bitcast, and truncate if possible.
+ // For example fold
+ // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
+ // (2xi32 (buildvector x, y)).
+ if (Level == AfterLegalizeVectorOps && VT.isVector() &&
+ N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+ N0.getOperand(0).hasOneUse()) {
+
+ SDValue BuildVect = N0.getOperand(0);
+ EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
+ EVT TruncVecEltTy = VT.getVectorElementType();
+
+ // Check that the element types match.
+ if (BuildVectEltTy == TruncVecEltTy) {
+ // Now we only need to compute the offset of the truncated elements.
+ unsigned BuildVecNumElts = BuildVect.getNumOperands();
+ unsigned TruncVecNumElts = VT.getVectorNumElements();
+ unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+
+ assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
+ "Invalid number of elements");
+
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
+ Opnds.push_back(BuildVect.getOperand(i));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0],
+ Opnds.size());
+ }
+ }
+
// See if we can simplify the input to this truncate through knowledge that
// only the low bits are being used.
// For example "trunc (or (shl x, 8), y)" // -> trunc y
@@ -5822,13 +5875,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N1, NewCFP);
}
- // (fadd (fadd x, x), x) -> (fmul 3.0, x)
- if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) &&
- N0.getOperand(0) == N1) {
- return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
- N1, DAG.getConstantFP(3.0, VT));
- }
-
// (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x)
if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
N1.getOperand(0) == N1.getOperand(1) &&
@@ -5874,12 +5920,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N0, NewCFP);
}
- // (fadd x, (fadd x, x)) -> (fmul 3.0, x)
- if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) &&
- N1.getOperand(0) == N0) {
- return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
- N0, DAG.getConstantFP(3.0, VT));
- }
// (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x)
if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD &&
@@ -5904,6 +5944,26 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
}
+ if (N0.getOpcode() == ISD::FADD) {
+ ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ // (fadd (fadd x, x), x) -> (fmul 3.0, x)
+ if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
+ (N0.getOperand(0) == N1)) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N1, DAG.getConstantFP(3.0, VT));
+ }
+ }
+
+ if (N1.getOpcode() == ISD::FADD) {
+ ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ // (fadd x, (fadd x, x)) -> (fmul 3.0, x)
+ if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
+ N1.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0, DAG.getConstantFP(3.0, VT));
+ }
+ }
+
// (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x)
if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
@@ -6735,18 +6795,24 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
if (Op0.getOpcode() == Op1.getOpcode()) {
// Avoid missing important xor optimizations.
SDValue Tmp = visitXOR(TheXor);
- if (Tmp.getNode() && Tmp.getNode() != TheXor) {
- DEBUG(dbgs() << "\nReplacing.8 ";
- TheXor->dump(&DAG);
- dbgs() << "\nWith: ";
- Tmp.getNode()->dump(&DAG);
- dbgs() << '\n');
- WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
- removeFromWorkList(TheXor);
- DAG.DeleteNode(TheXor);
- return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
- MVT::Other, Chain, Tmp, N2);
+ if (Tmp.getNode()) {
+ if (Tmp.getNode() != TheXor) {
+ DEBUG(dbgs() << "\nReplacing.8 ";
+ TheXor->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Tmp.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
+ removeFromWorkList(TheXor);
+ DAG.DeleteNode(TheXor);
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, Tmp, N2);
+ }
+
+ // visitXOR has changed XOR's operands.
+ Op0 = TheXor->getOperand(0);
+ Op1 = TheXor->getOperand(1);
}
}
@@ -6894,6 +6960,16 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
ISD::MemIndexedMode AM = ISD::UNINDEXED;
if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
return false;
+
+ // Backends without true r+i pre-indexed forms may need to pass a
+ // constant base with a variable offset so that constant coercion
+ // will work with the patterns in canonical form.
+ bool Swapped = false;
+ if (isa<ConstantSDNode>(BasePtr)) {
+ std::swap(BasePtr, Offset);
+ Swapped = true;
+ }
+
// Don't create a indexed load / store with zero offset.
if (isa<ConstantSDNode>(Offset) &&
cast<ConstantSDNode>(Offset)->isNullValue())
@@ -6919,6 +6995,48 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
return false;
}
+ // If the offset is a constant, there may be other adds of constants that
+ // can be folded with this one. We should do this to avoid having to keep
+ // a copy of the original base pointer.
+ SmallVector<SDNode *, 16> OtherUses;
+ if (isa<ConstantSDNode>(Offset))
+ for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(),
+ E = BasePtr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Use = *I;
+ if (Use == Ptr.getNode())
+ continue;
+
+ if (Use->isPredecessorOf(N))
+ continue;
+
+ if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) {
+ OtherUses.clear();
+ break;
+ }
+
+ SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
+ if (Op1.getNode() == BasePtr.getNode())
+ std::swap(Op0, Op1);
+ assert(Op0.getNode() == BasePtr.getNode() &&
+ "Use of ADD/SUB but not an operand");
+
+ if (!isa<ConstantSDNode>(Op1)) {
+ OtherUses.clear();
+ break;
+ }
+
+ // FIXME: In some cases, we can be smarter about this.
+ if (Op1.getValueType() != Offset.getValueType()) {
+ OtherUses.clear();
+ break;
+ }
+
+ OtherUses.push_back(Use);
+ }
+
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
// Now check for #3 and #4.
bool RealUse = false;
@@ -6968,6 +7086,43 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Finally, since the node is now dead, remove it from the graph.
DAG.DeleteNode(N);
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
+ // Replace other uses of BasePtr that can be updated to use Ptr
+ for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
+ unsigned OffsetIdx = 1;
+ if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
+ OffsetIdx = 0;
+ assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
+ BasePtr.getNode() && "Expected BasePtr operand");
+
+ APInt OV =
+ cast<ConstantSDNode>(Offset)->getAPIntValue();
+ if (AM == ISD::PRE_DEC)
+ OV = -OV;
+
+ ConstantSDNode *CN =
+ cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
+ APInt CNV = CN->getAPIntValue();
+ if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1)
+ CNV += OV;
+ else
+ CNV -= OV;
+
+ SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0);
+ SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0));
+ if (OffsetIdx == 0)
+ std::swap(NewOp1, NewOp2);
+
+ SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(),
+ OtherUses[i]->getDebugLoc(),
+ OtherUses[i]->getValueType(0), NewOp1, NewOp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
+ removeFromWorkList(OtherUses[i]);
+ DAG.DeleteNode(OtherUses[i]);
+ }
+
// Replace the uses of Ptr with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
removeFromWorkList(Ptr.getNode());
@@ -7176,12 +7331,15 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Try to infer better alignment information than the load already has.
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > LD->getAlignment())
- return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+ if (Align > LD->getMemOperand()->getBaseAlignment()) {
+ SDValue NewLoad =
+ DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
LD->getValueType(0),
Chain, Ptr, LD->getPointerInfo(),
LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), Align);
+ return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
+ }
}
}
@@ -7576,6 +7734,8 @@ struct ConsecutiveMemoryChainSorter {
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
EVT MemVT = St->getMemoryVT();
int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
+ bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
// Don't merge vectors into wider inputs.
if (MemVT.isVector() || !MemVT.isSimple())
@@ -7751,16 +7911,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// We only use vectors if the constant is known to be zero and the
// function is not marked with the noimplicitfloat attribute.
- if (NonZero || (DAG.getMachineFunction().getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat)))
+ if (NonZero || NoVectors)
LastLegalVectorType = 0;
// Check if we found a legal integer type to store.
if (LastLegalType == 0 && LastLegalVectorType == 0)
return false;
- bool UseVector = LastLegalVectorType > LastLegalType;
+ bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
// Make sure we have something to merge.
@@ -7913,7 +8071,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// All loads much share the same chain.
if (LoadNodes[i].MemNode->getChain() != FirstChain)
break;
-
+
int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
@@ -7933,7 +8091,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// Only use vector types if the vector type is larger than the integer type.
// If they are the same, use integers.
- bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType;
+ bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
// We add +1 here because the LastXXX variables refer to location while
@@ -9104,11 +9262,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
- // After legalize, the target may be depending on adds and other
- // binary ops to provide legal ways to construct constants or other
- // things. Simplifying them may result in a loss of legality.
- if (LegalOperations) return SDValue();
-
assert(N->getValueType(0).isVector() &&
"SimplifyVBinOp only works on vectors!");
@@ -9178,11 +9331,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
- // After legalize, the target may be depending on adds and other
- // binary ops to provide legal ways to construct constants or other
- // things. Simplifying them may result in a loss of legality.
- if (LegalOperations) return SDValue();
-
assert(N->getValueType(0).isVector() &&
"SimplifyVUnaryOp only works on vectors!");
@@ -9285,7 +9433,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// src value info, don't do the transformation if the memory
// locations are not in the default address space.
LLD->getPointerInfo().getAddrSpace() != 0 ||
- RLD->getPointerInfo().getAddrSpace() != 0)
+ RLD->getPointerInfo().getAddrSpace() != 0 ||
+ !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
+ LLD->getBasePtr().getValueType()))
return false;
// Check that the select condition doesn't reach either load. If so,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 0d90a07..ff9b2ba 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -87,6 +87,27 @@ void FastISel::startNewBlock() {
LastLocalValue = EmitStartPt;
}
+bool FastISel::LowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ // Fallback to SDISel argument lowering code to deal with sret pointer
+ // parameter.
+ return false;
+
+ if (!FastLowerArguments())
+ return false;
+
+ // Enter non-dead arguments into ValueMap for uses in non-entry BBs.
+ for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
+ E = FuncInfo.Fn->arg_end(); I != E; ++I) {
+ if (!I->use_empty()) {
+ DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I);
+ assert(VI != LocalValueMap.end() && "Missed an argument?");
+ FuncInfo.ValueMap[I] = VI->second;
+ }
+ }
+ return true;
+}
+
void FastISel::flushLocalValueMap() {
LocalValueMap.clear();
LastLocalValue = EmitStartPt;
@@ -684,7 +705,7 @@ bool FastISel::SelectCall(const User *I) {
// all the values which have already been materialized,
// appear after the call. It also makes sense to skip intrinsics
// since they tend to be inlined.
- if (!isa<IntrinsicInst>(F))
+ if (!isa<IntrinsicInst>(Call))
flushLocalValueMap();
// An arbitrary call. Bail.
@@ -801,7 +822,7 @@ FastISel::SelectInstruction(const Instruction *I) {
}
// First, try doing target-independent selection.
- if (SelectOperator(I, I->getOpcode())) {
+ if (!SkipTargetIndependentFastISel() && SelectOperator(I, I->getOpcode())) {
++NumFastIselSuccessIndependent;
DL = DebugLoc();
return true;
@@ -836,7 +857,8 @@ FastISel::SelectInstruction(const Instruction *I) {
void
FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
- if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+ if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
+ FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
// For more accurate line information if this is the only instruction
// in the block then emit it, otherwise we have the unconditional
// fall-through case, which needs no instructions.
@@ -1067,6 +1089,10 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo,
FastISel::~FastISel() {}
+bool FastISel::FastLowerArguments() {
+ return false;
+}
+
unsigned FastISel::FastEmit_(MVT, MVT,
unsigned) {
return 0;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5eaf67e..f085e44 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -102,7 +102,8 @@ private:
SDNode *Node, bool isSigned);
SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
- RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128);
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128);
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16,
@@ -110,6 +111,7 @@ private:
RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128);
void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
SDValue ExpandBUILD_VECTOR(SDNode *Node);
@@ -1841,26 +1843,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
return ExpandVectorBuildThroughStack(Node);
}
-static bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
- SDValue &Chain, const TargetLowering &TLI) {
- const Function *F = DAG.getMachineFunction().getFunction();
-
- // Conservatively require the attributes of the call to match those of
- // the return. Ignore noalias because it doesn't affect the call sequence.
- Attribute CallerRetAttr = F->getAttributes().getRetAttributes();
- if (AttrBuilder(CallerRetAttr)
- .removeAttribute(Attribute::NoAlias).hasAttributes())
- return false;
-
- // It's not safe to eliminate the sign / zero extension of the return value.
- if (CallerRetAttr.hasAttribute(Attribute::ZExt) ||
- CallerRetAttr.hasAttribute(Attribute::SExt))
- return false;
-
- // Check if the only use is a function return node.
- return TLI.isUsedByReturnOnly(Node, Chain);
-}
-
// ExpandLibCall - Expand a node into a call to a libcall. If the result value
// does not fit into a register, return the lo part and set the hi part to the
// by-reg argument. If it does fit into a single register, return the result
@@ -1891,7 +1873,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
// isTailCall may be true since the callee does not reference caller stack
// frame. Check if it's in the right position.
SDValue TCChain = InChain;
- bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI);
+ bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain);
if (isTailCall)
InChain = TCChain;
@@ -2115,6 +2097,120 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
Results.push_back(Rem);
}
+/// isSinCosLibcallAvailable - Return true if sincos libcall is available.
+static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::SINCOS_F32; break;
+ case MVT::f64: LC = RTLIB::SINCOS_F64; break;
+ case MVT::f80: LC = RTLIB::SINCOS_F80; break;
+ case MVT::f128: LC = RTLIB::SINCOS_F128; break;
+ case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+ }
+ return TLI.getLibcallName(LC) != 0;
+}
+
+/// canCombineSinCosLibcall - Return true if sincos libcall is available and
+/// can be used to combine sin and cos.
+static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI,
+ const TargetMachine &TM) {
+ if (!isSinCosLibcallAvailable(Node, TLI))
+ return false;
+ // GNU sin/cos functions set errno while sincos does not. Therefore
+ // combining sin and cos is only safe if unsafe-fpmath is enabled.
+ bool isGNU = Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU;
+ if (isGNU && !TM.Options.UnsafeFPMath)
+ return false;
+ return true;
+}
+
+/// useSinCos - Only issue sincos libcall if both sin and cos are
+/// needed.
+static bool useSinCos(SDNode *Node) {
+ unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN
+ ? ISD::FCOS : ISD::FSIN;
+
+ SDValue Op0 = Node->getOperand(0);
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node)
+ continue;
+ // The other user might have been turned into sincos already.
+ if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS)
+ return true;
+ }
+ return false;
+}
+
+/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos
+/// pairs.
+void
+SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::SINCOS_F32; break;
+ case MVT::f64: LC = RTLIB::SINCOS_F64; break;
+ case MVT::f80: LC = RTLIB::SINCOS_F80; break;
+ case MVT::f128: LC = RTLIB::SINCOS_F128; break;
+ case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ // Pass the argument.
+ Entry.Node = Node->getOperand(0);
+ Entry.Ty = RetTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Pass the return address of sin.
+ SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = SinPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Also pass the return address of the cos.
+ SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = CosPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ DebugLoc dl = Node->getDebugLoc();
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr,
+ MachinePointerInfo(), false, false, false, 0));
+ Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr,
+ MachinePointerInfo(), false, false, false, 0));
+}
+
/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
/// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are
@@ -2443,18 +2539,6 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
}
}
-/// SplatByte - Distribute ByteVal over NumBits bits.
-// FIXME: Move this helper to a common place.
-static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
- APInt Val = APInt(NumBits, ByteVal);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Val = (Val << Shift) | Val;
- Shift <<= 1;
- }
- return Val;
-}
-
/// ExpandBitCount - Expand the specified bitcount instruction into operations.
///
SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
@@ -2472,10 +2556,10 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
// This is the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
- SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT);
- SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT);
- SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT);
- SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT);
+ SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), VT);
+ SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), VT);
+ SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), VT);
+ SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), VT);
// v = v - ((v >> 1) & 0x55555555...)
Op = DAG.getNode(ISD::SUB, dl, VT, Op,
@@ -2825,7 +2909,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue True, False;
EVT VT = Node->getOperand(0).getValueType();
EVT NVT = Node->getValueType(0);
- APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
+ APFloat apf(DAG.EVTToAPFloatSemantics(VT),
+ APInt::getNullValue(VT.getSizeInBits()));
APInt x = APInt::getSignBit(NVT.getSizeInBits());
(void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
Tmp1 = DAG.getConstantFP(apf, VT);
@@ -3060,14 +3145,33 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::SQRT_PPCF128));
break;
case ISD::FSIN:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
- RTLIB::SIN_F80, RTLIB::SIN_F128,
- RTLIB::SIN_PPCF128));
+ case ISD::FCOS: {
+ EVT VT = Node->getValueType(0);
+ bool isSIN = Node->getOpcode() == ISD::FSIN;
+ // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
+ // fcos which share the same operand and both are used.
+ if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
+ canCombineSinCosLibcall(Node, TLI, TM))
+ && useSinCos(Node)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
+ if (!isSIN)
+ Tmp1 = Tmp1.getValue(1);
+ Results.push_back(Tmp1);
+ } else if (isSIN) {
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128));
+ } else {
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128));
+ }
break;
- case ISD::FCOS:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
- RTLIB::COS_F80, RTLIB::COS_F128,
- RTLIB::COS_PPCF128));
+ }
+ case ISD::FSINCOS:
+ // Expand into sincos libcall.
+ ExpandSinCosLibCall(Node, Results);
break;
case ISD::FLOG:
Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
@@ -3200,7 +3304,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::UREM:
case ISD::SREM: {
EVT VT = Node->getValueType(0);
- SDVTList VTs = DAG.getVTList(VT, VT);
bool isSigned = Node->getOpcode() == ISD::SREM;
unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
@@ -3211,6 +3314,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// If div is legal, it's better to do the normal expansion
!TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
useDivRem(Node, isSigned, false))) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
} else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
// X % Y -> X-X/Y*Y
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 92dc5a9..1ee2192 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -152,23 +152,23 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::ADD_F32,
- RTLIB::ADD_F64,
- RTLIB::ADD_F80,
- RTLIB::ADD_PPCF128),
- NVT, Ops, 2, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32,
+ RTLIB::ADD_F64,
+ RTLIB::ADD_F80,
+ RTLIB::ADD_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::CEIL_F32,
- RTLIB::CEIL_F64,
- RTLIB::CEIL_F80,
- RTLIB::CEIL_PPCF128),
- NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32,
+ RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80,
+ RTLIB::CEIL_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
@@ -216,90 +216,90 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::COS_F32,
- RTLIB::COS_F64,
- RTLIB::COS_F80,
- RTLIB::COS_PPCF128),
- NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32,
+ RTLIB::COS_F64,
+ RTLIB::COS_F80,
+ RTLIB::COS_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::DIV_F32,
- RTLIB::DIV_F64,
- RTLIB::DIV_F80,
- RTLIB::DIV_PPCF128),
- NVT, Ops, 2, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::EXP_F32,
- RTLIB::EXP_F64,
- RTLIB::EXP_F80,
- RTLIB::EXP_PPCF128),
- NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32,
+ RTLIB::EXP_F64,
+ RTLIB::EXP_F80,
+ RTLIB::EXP_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::EXP2_F32,
- RTLIB::EXP2_F64,
- RTLIB::EXP2_F80,
- RTLIB::EXP2_PPCF128),
- NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32,
+ RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80,
+ RTLIB::EXP2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::FLOOR_F32,
- RTLIB::FLOOR_F64,
- RTLIB::FLOOR_F80,
- RTLIB::FLOOR_PPCF128),
- NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,
+ RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::LOG_F32,
- RTLIB::LOG_F64,
- RTLIB::LOG_F80,
- RTLIB::LOG_PPCF128),
- NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32,
+ RTLIB::LOG_F64,
+ RTLIB::LOG_F80,
+ RTLIB::LOG_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::LOG2_F32,
- RTLIB::LOG2_F64,
- RTLIB::LOG2_F80,
- RTLIB::LOG2_PPCF128),
- NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32,
+ RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80,
+ RTLIB::LOG2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::LOG10_F32,
- RTLIB::LOG10_F64,
- RTLIB::LOG10_F80,
- RTLIB::LOG10_PPCF128),
- NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,
+ RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,
+ RTLIB::LOG10_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
@@ -307,35 +307,35 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)),
GetSoftenedFloat(N->getOperand(2)) };
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::FMA_F32,
- RTLIB::FMA_F64,
- RTLIB::FMA_F80,
- RTLIB::FMA_PPCF128),
- NVT, Ops, 3, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_PPCF128),
+ NVT, Ops, 3, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::MUL_F32,
- RTLIB::MUL_F64,
- RTLIB::MUL_F80,
- RTLIB::MUL_PPCF128),
- NVT, Ops, 2, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::NEARBYINT_F32,
- RTLIB::NEARBYINT_F64,
- RTLIB::NEARBYINT_F80,
- RTLIB::NEARBYINT_PPCF128),
- NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
@@ -343,12 +343,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
// Expand Y = FNEG(X) -> Y = SUB -0.0, X
SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
GetSoftenedFloat(N->getOperand(0)) };
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::SUB_F32,
- RTLIB::SUB_F64,
- RTLIB::SUB_F80,
- RTLIB::SUB_PPCF128),
- NVT, Ops, 2, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
@@ -356,7 +356,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SDValue Op = N->getOperand(0);
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc());
}
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -364,8 +364,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = N->getOperand(0);
- return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
- N->getDebugLoc());
+ return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
+ N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
@@ -373,19 +373,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
SDValue Op = N->getOperand(0);
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
- return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::POW_F32,
- RTLIB::POW_F64,
- RTLIB::POW_F80,
- RTLIB::POW_PPCF128),
- NVT, Ops, 2, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32,
+ RTLIB::POW_F64,
+ RTLIB::POW_F80,
+ RTLIB::POW_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
@@ -393,80 +393,80 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
"Unsupported power type!");
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::POWI_F32,
- RTLIB::POWI_F64,
- RTLIB::POWI_F80,
- RTLIB::POWI_PPCF128),
- NVT, Ops, 2, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32,
+ RTLIB::POWI_F64,
+ RTLIB::POWI_F80,
+ RTLIB::POWI_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::REM_F32,
- RTLIB::REM_F64,
- RTLIB::REM_F80,
- RTLIB::REM_PPCF128),
- NVT, Ops, 2, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32,
+ RTLIB::REM_F64,
+ RTLIB::REM_F80,
+ RTLIB::REM_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::RINT_F32,
- RTLIB::RINT_F64,
- RTLIB::RINT_F80,
- RTLIB::RINT_PPCF128),
- NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32,
+ RTLIB::RINT_F64,
+ RTLIB::RINT_F80,
+ RTLIB::RINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::SIN_F32,
- RTLIB::SIN_F64,
- RTLIB::SIN_F80,
- RTLIB::SIN_PPCF128),
- NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32,
+ RTLIB::SIN_F64,
+ RTLIB::SIN_F80,
+ RTLIB::SIN_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::SQRT_F32,
- RTLIB::SQRT_F64,
- RTLIB::SQRT_F80,
- RTLIB::SQRT_PPCF128),
- NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32,
+ RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80,
+ RTLIB::SQRT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::SUB_F32,
- RTLIB::SUB_F64,
- RTLIB::SUB_F80,
- RTLIB::SUB_PPCF128),
- NVT, Ops, 2, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::TRUNC_F32,
- RTLIB::TRUNC_F64,
- RTLIB::TRUNC_F80,
- RTLIB::TRUNC_PPCF128),
- NVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32,
+ RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
@@ -559,8 +559,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
// Sign/zero extend the argument if the libcall takes a larger type.
SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
NVT, N->getOperand(0));
- return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- &Op, 1, false, dl);
+ return TLI.makeLibCall(DAG, LC,
+ TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+ &Op, 1, false, dl);
}
@@ -607,92 +608,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
return false;
}
-/// SoftenSetCCOperands - Soften the operands of a comparison. This code is
-/// shared among BR_CC, SELECT_CC, and SETCC handlers.
-void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
- ISD::CondCode &CCCode, DebugLoc dl) {
- SDValue LHSInt = GetSoftenedFloat(NewLHS);
- SDValue RHSInt = GetSoftenedFloat(NewRHS);
- EVT VT = NewLHS.getValueType();
-
- assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!");
-
- // Expand into one or more soft-fp libcall(s).
- RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
- switch (CCCode) {
- case ISD::SETEQ:
- case ISD::SETOEQ:
- LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
- break;
- case ISD::SETNE:
- case ISD::SETUNE:
- LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64;
- break;
- case ISD::SETGE:
- case ISD::SETOGE:
- LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
- break;
- case ISD::SETLT:
- case ISD::SETOLT:
- LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
- break;
- case ISD::SETLE:
- case ISD::SETOLE:
- LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
- break;
- case ISD::SETGT:
- case ISD::SETOGT:
- LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
- break;
- case ISD::SETUO:
- LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
- break;
- case ISD::SETO:
- LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64;
- break;
- default:
- LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
- switch (CCCode) {
- case ISD::SETONE:
- // SETONE = SETOLT | SETOGT
- LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
- // Fallthrough
- case ISD::SETUGT:
- LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
- break;
- case ISD::SETUGE:
- LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
- break;
- case ISD::SETULT:
- LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
- break;
- case ISD::SETULE:
- LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
- break;
- case ISD::SETUEQ:
- LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
- break;
- default: llvm_unreachable("Do not know how to soften this setcc!");
- }
- }
-
- // Use the target specific return value for comparions lib calls.
- EVT RetVT = TLI.getCmpLibcallReturnType();
- SDValue Ops[2] = { LHSInt, RHSInt };
- NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
- NewRHS = DAG.getConstant(0, RetVT);
- CCCode = TLI.getCmpLibcallCC(LC1);
- if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
- SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT),
- NewLHS, NewRHS, DAG.getCondCode(CCCode));
- NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
- NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS,
- NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2)));
- NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
- NewRHS = SDValue();
- }
-}
-
SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
GetSoftenedFloat(N->getOperand(0)));
@@ -706,15 +621,19 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
- SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
- // If SoftenSetCCOperands returned a scalar, we need to compare the result
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (NewRHS.getNode() == 0) {
NewRHS = DAG.getConstant(0, NewLHS.getValueType());
@@ -733,7 +652,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
@@ -741,22 +660,26 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
EVT RVT = N->getValueType(0);
RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
- SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
- // If SoftenSetCCOperands returned a scalar, we need to compare the result
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (NewRHS.getNode() == 0) {
NewRHS = DAG.getConstant(0, NewLHS.getValueType());
@@ -773,9 +696,13 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
- SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
- // If SoftenSetCCOperands returned a scalar, use it.
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If softenSetCCOperands returned a scalar, use it.
if (NewRHS.getNode() == 0) {
assert(NewLHS.getValueType() == N->getValueType(0) &&
"Unexpected setcc expansion!");
@@ -886,9 +813,11 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
assert(NVT.getSizeInBits() == integerPartWidth &&
"Do not know how to expand this float constant!");
APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
- Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[1])),
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(integerPartWidth, C.getRawData()[1])),
NVT);
- Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[0])),
+ Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(integerPartWidth, C.getRawData()[0])),
NVT);
}
@@ -947,13 +876,13 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::DIV_F32,
- RTLIB::DIV_F64,
- RTLIB::DIV_F80,
- RTLIB::DIV_PPCF128),
- N->getValueType(0), Ops, 2, false,
- N->getDebugLoc());
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
GetPairElements(Call, Lo, Hi);
}
@@ -1014,26 +943,26 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
- SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::FMA_F32,
- RTLIB::FMA_F64,
- RTLIB::FMA_F80,
- RTLIB::FMA_PPCF128),
- N->getValueType(0), Ops, 3, false,
- N->getDebugLoc());
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_PPCF128),
+ N->getValueType(0), Ops, 3, false,
+ N->getDebugLoc());
GetPairElements(Call, Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::MUL_F32,
- RTLIB::MUL_F64,
- RTLIB::MUL_F80,
- RTLIB::MUL_PPCF128),
- N->getValueType(0), Ops, 2, false,
- N->getDebugLoc());
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
GetPairElements(Call, Lo, Hi);
}
@@ -1060,7 +989,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));
- Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), NVT);
}
void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
@@ -1111,13 +1041,13 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
- RTLIB::SUB_F32,
- RTLIB::SUB_F64,
- RTLIB::SUB_F80,
- RTLIB::SUB_PPCF128),
- N->getValueType(0), Ops, 2, false,
- N->getDebugLoc());
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
GetPairElements(Call, Lo, Hi);
}
@@ -1155,7 +1085,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
Chain = Hi.getValue(1);
// The low part is zero.
- Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), NVT);
// Modified the chain - switch anything that used the old chain to use the
// new one.
@@ -1179,7 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
// The integer can be represented exactly in an f64.
Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
MVT::i32, Src);
- Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), NVT);
Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
} else {
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -1193,7 +1125,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
}
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
- Hi = MakeLibCall(LC, VT, &Src, 1, true, dl);
+ Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl);
GetPairElements(Hi, Lo, Hi);
}
@@ -1225,7 +1157,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
}
Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
- DAG.getConstantFP(APFloat(APInt(128, Parts)),
+ DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
+ APInt(128, Parts)),
MVT::ppcf128));
Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
Lo, Hi, DAG.getCondCode(ISD::SETLT));
@@ -1364,7 +1297,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl);
+ return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl);
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
@@ -1377,7 +1310,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
"Logic only correct for ppcf128!");
const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
- APFloat APF = APFloat(APInt(128, TwoE31));
+ APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31));
SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
// FIXME: generated code sucks.
@@ -1396,7 +1329,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
- return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1,
+ false, dl);
}
SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 5e33ef1..182b7f3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1767,7 +1767,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
SDValue Op = N->getOperand(0);
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
- SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl),
+ Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
@@ -1777,7 +1778,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
SDValue Op = N->getOperand(0);
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
- SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl),
+ Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
@@ -1992,7 +1994,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl),
+ Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
@@ -2054,7 +2057,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
@@ -2092,9 +2095,19 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
// Expand the subcomponents.
SDValue LHSL, LHSH;
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
-
- SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) };
EVT VT = LHSL.getValueType();
+
+ // If the shift amount operand is coming from a vector legalization it may
+ // have an illegal type. Fix that first by casting the operand, otherwise
+ // the new SHL_PARTS operation would need further legalization.
+ SDValue ShiftOp = N->getOperand(1);
+ MVT ShiftTy = TLI.getShiftAmountTy(VT);
+ assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(VT.getSizeInBits()) &&
+ "ShiftAmountTy is too small to cover the range of this type!");
+ if (ShiftOp.getValueType() != ShiftTy)
+ ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
+
+ SDValue Ops[] = { LHSL, LHSH, ShiftOp };
Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
Hi = Lo.getValue(1);
return;
@@ -2138,7 +2151,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
return;
}
@@ -2221,7 +2234,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
@@ -2361,7 +2374,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
@@ -2381,7 +2394,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
@@ -2668,7 +2681,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this SINT_TO_FP!");
- return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc());
+ return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, N->getDebugLoc());
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2764,17 +2777,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
}
-static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unknown FP format");
- case MVT::f32: return &APFloat::IEEEsingle;
- case MVT::f64: return &APFloat::IEEEdouble;
- case MVT::f80: return &APFloat::x87DoubleExtended;
- case MVT::f128: return &APFloat::IEEEquad;
- case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
- }
-}
-
SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
SDValue Op = N->getOperand(0);
EVT SrcVT = Op.getValueType();
@@ -2784,8 +2786,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
// The following optimization is valid only if every value in SrcVT (when
// treated as signed) is representable in DstVT. Check that the mantissa
// size of DstVT is >= than the number of bits in SrcVT -1.
- const fltSemantics *sem = EVTToAPFloatSemantics(DstVT);
- if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 &&
+ const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT);
+ if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 &&
TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
// Do a signed conversion then adjust the result.
SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
@@ -2846,7 +2848,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this UINT_TO_FP!");
- return MakeLibCall(LC, DstVT, &Op, 1, true, dl);
+ return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl);
}
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 6aea2d8..e26d165 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -1020,50 +1020,20 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
unsigned NumOps = N->getNumOperands();
DebugLoc dl = N->getDebugLoc();
if (NumOps == 0) {
- return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl);
} else if (NumOps == 1) {
SDValue Op = N->getOperand(0);
- return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl);
} else if (NumOps == 2) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl);
}
SmallVector<SDValue, 8> Ops(NumOps);
for (unsigned i = 0; i < NumOps; ++i)
Ops[i] = N->getOperand(i);
- return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl);
-}
-
-/// MakeLibCall - Generate a libcall taking the given operands as arguments and
-/// returning a result of type RetVT.
-SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
- const SDValue *Ops, unsigned NumOps,
- bool isSigned, DebugLoc dl) {
- TargetLowering::ArgListTy Args;
- Args.reserve(NumOps);
-
- TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0; i != NumOps; ++i) {
- Entry.Node = Ops[i];
- Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
- Args.push_back(Entry);
- }
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy());
-
- Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
- TargetLowering::
- CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
- false, 0, TLI.getLibcallCallingConv(LC),
- /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl);
- std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
- return CallInfo.first;
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0),
+ &Ops[0], NumOps, isSigned, dl);
}
// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 8c53ba3..7de42ea 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -80,35 +80,35 @@ private:
/// PromotedIntegers - For integer nodes that are below legal width, this map
/// indicates what promoted value to use.
- DenseMap<SDValue, SDValue> PromotedIntegers;
+ SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers;
/// ExpandedIntegers - For integer nodes that need to be expanded this map
/// indicates which operands are the expanded version of the input.
- DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers;
+ SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers;
/// SoftenedFloats - For floating point nodes converted to integers of
/// the same size, this map indicates the converted value to use.
- DenseMap<SDValue, SDValue> SoftenedFloats;
+ SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats;
/// ExpandedFloats - For float nodes that need to be expanded this map
/// indicates which operands are the expanded version of the input.
- DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats;
+ SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats;
/// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the
/// scalar value of type 'ty' to use.
- DenseMap<SDValue, SDValue> ScalarizedVectors;
+ SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors;
/// SplitVectors - For nodes that need to be split this map indicates
/// which operands are the expanded version of the input.
- DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors;
+ SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors;
/// WidenedVectors - For vector nodes that need to be widened, indicates
/// the widened value to use.
- DenseMap<SDValue, SDValue> WidenedVectors;
+ SmallDenseMap<SDValue, SDValue, 8> WidenedVectors;
/// ReplacedValues - For values that have been replaced with another,
/// indicates the replacement value to use.
- DenseMap<SDValue, SDValue> ReplacedValues;
+ SmallDenseMap<SDValue, SDValue, 8> ReplacedValues;
/// Worklist - This defines a worklist of nodes to process. In order to be
/// pushed onto this worklist, all operands of a node must have already been
@@ -159,9 +159,6 @@ private:
SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
SDValue JoinIntegers(SDValue Lo, SDValue Hi);
SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
- SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
- const SDValue *Ops, unsigned NumOps, bool isSigned,
- DebugLoc dl);
std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node, bool isSigned);
@@ -433,9 +430,6 @@ private:
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
- void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
- ISD::CondCode &CCCode, DebugLoc dl);
-
//===--------------------------------------------------------------------===//
// Float Expansion Support: LegalizeFloatTypes.cpp
//===--------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index de6bbe3..c6e066e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -40,7 +40,7 @@ class VectorLegalizer {
/// LegalizedNodes - For nodes that are of legal width, and that have more
/// than one use, this map indicates what regularized operand to use. This
/// allows us to avoid legalizing the same thing more than once.
- DenseMap<SDValue, SDValue> LegalizedNodes;
+ SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
// Adds a node to the translation cache
void AddLegalizedOperand(SDValue From, SDValue To) {
@@ -61,6 +61,8 @@ class VectorLegalizer {
// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
// SINT_TO_FLOAT and SHR on vectors isn't legal.
SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+ // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
+ SDValue ExpandSEXTINREG(SDValue Op);
// Implement vselect in terms of XOR, AND, OR when blend is not supported
// by the target.
SDValue ExpandVSELECT(SDValue Op);
@@ -83,6 +85,25 @@ class VectorLegalizer {
};
bool VectorLegalizer::Run() {
+ // Before we start legalizing vector nodes, check if there are any vectors.
+ bool HasVectors = false;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) {
+ // Check if the values of the nodes contain vectors. We don't need to check
+ // the operands because we are going to check their values at some point.
+ for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
+ J != E; ++J)
+ HasVectors |= J->isVector();
+
+ // If we found a vector node we can start the legalization.
+ if (HasVectors)
+ break;
+ }
+
+ // If this basic block has no vectors then no need to legalize vectors.
+ if (!HasVectors)
+ return false;
+
// The legalize process is inherently a bottom-up recursive process (users
// legalize their uses before themselves). Given infinite stack space, we
// could just start legalizing on the root and traverse the whole graph. In
@@ -262,7 +283,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
// FALL THROUGH
}
case TargetLowering::Expand:
- if (Node->getOpcode() == ISD::VSELECT)
+ if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
+ Result = ExpandSEXTINREG(Op);
+ else if (Node->getOpcode() == ISD::VSELECT)
Result = ExpandVSELECT(Op);
else if (Node->getOpcode() == ISD::SELECT)
Result = ExpandSELECT(Op);
@@ -359,30 +382,135 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
EVT SrcVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = LD->getExtensionType();
- SmallVector<SDValue, 8> LoadVals;
+ SmallVector<SDValue, 8> Vals;
SmallVector<SDValue, 8> LoadChains;
unsigned NumElem = SrcVT.getVectorNumElements();
- unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
- for (unsigned Idx=0; Idx<NumElem; Idx++) {
- SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
- Op.getNode()->getValueType(0).getScalarType(),
- Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
- SrcVT.getScalarType(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ EVT SrcEltVT = SrcVT.getScalarType();
+ EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType();
+
+ if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
+ // When elements in a vector is not byte-addressable, we cannot directly
+ // load each element by advancing pointer, which could only address bytes.
+ // Instead, we load all significant words, mask bits off, and concatenate
+ // them to form each element. Finally, they are extended to destination
+ // scalar type to build the destination vector.
+ EVT WideVT = TLI.getPointerTy();
+
+ assert(WideVT.isRound() &&
+ "Could not handle the sophisticated case when the widest integer is"
+ " not power of 2.");
+ assert(WideVT.bitsGE(SrcEltVT) &&
+ "Type is not legalized?");
+
+ unsigned WideBytes = WideVT.getStoreSize();
+ unsigned Offset = 0;
+ unsigned RemainingBytes = SrcVT.getStoreSize();
+ SmallVector<SDValue, 8> LoadVals;
+
+ while (RemainingBytes > 0) {
+ SDValue ScalarLoad;
+ unsigned LoadBytes = WideBytes;
+
+ if (RemainingBytes >= LoadBytes) {
+ ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ } else {
+ EVT LoadVT = WideVT;
+ while (RemainingBytes < LoadBytes) {
+ LoadBytes >>= 1; // Reduce the load size by half.
+ LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
+ }
+ ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LoadVT, LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ }
- BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
- DAG.getIntPtrConstant(Stride));
+ RemainingBytes -= LoadBytes;
+ Offset += LoadBytes;
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(LoadBytes));
+
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ // Extract bits, pack and extend/trunc them into destination type.
+ unsigned SrcEltBits = SrcEltVT.getSizeInBits();
+ SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT);
+
+ unsigned BitOffset = 0;
+ unsigned WideIdx = 0;
+ unsigned WideBits = WideVT.getSizeInBits();
+
+ for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
+ SDValue Lo, Hi, ShAmt;
+
+ if (BitOffset < WideBits) {
+ ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT));
+ Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+ Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
+ }
- LoadVals.push_back(ScalarLoad.getValue(0));
- LoadChains.push_back(ScalarLoad.getValue(1));
+ BitOffset += SrcEltBits;
+ if (BitOffset >= WideBits) {
+ WideIdx++;
+ Offset -= WideBits;
+ if (Offset > 0) {
+ ShAmt = DAG.getConstant(SrcEltBits - Offset,
+ TLI.getShiftAmountTy(WideVT));
+ Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+ Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
+ }
+ }
+
+ if (Hi.getNode())
+ Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
+
+ switch (ExtType) {
+ default: llvm_unreachable("Unknown extended-load op!");
+ case ISD::EXTLOAD:
+ Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ case ISD::ZEXTLOAD:
+ Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ case ISD::SEXTLOAD:
+ ShAmt = DAG.getConstant(WideBits - SrcEltBits,
+ TLI.getShiftAmountTy(WideVT));
+ Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
+ Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
+ Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ }
+ Vals.push_back(Lo);
+ }
+ } else {
+ unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
+ Op.getNode()->getValueType(0).getScalarType(),
+ Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcVT.getScalarType(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(Stride));
+
+ Vals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&LoadChains[0], LoadChains.size());
SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
- Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size());
+ Op.getNode()->getValueType(0), &Vals[0], Vals.size());
AddLegalizedOperand(Op.getValue(0), Value);
AddLegalizedOperand(Op.getValue(1), NewChain);
@@ -501,6 +629,26 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
}
+SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // Make sure that the SRA and SHL instructions are available.
+ if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
+
+ unsigned BW = VT.getScalarType().getSizeInBits();
+ unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
+ SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT);
+
+ Op = Op.getOperand(0);
+ Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
+ return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
+}
+
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 31b9bf3..addfccb 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/DataLayout.h"
@@ -142,6 +143,12 @@ private:
std::vector<SUnit*> LiveRegDefs;
std::vector<SUnit*> LiveRegGens;
+ // Collect interferences between physical register use/defs.
+ // Each interference is an SUnit and set of physical registers.
+ SmallVector<SUnit*, 4> Interferences;
+ typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
+ LRegsMapT LRegsMap;
+
/// Topo - A topological ordering for SUnits which permits fast IsReachable
/// and similar queries.
ScheduleDAGTopologicalSort Topo;
@@ -225,6 +232,8 @@ private:
SmallVector<SUnit*, 2>&);
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+ void releaseInterferences(unsigned Reg = 0);
+
SUnit *PickNodeToScheduleBottomUp();
void ListScheduleBottomUp();
@@ -274,8 +283,17 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
// the expansion of custom DAG-to-DAG patterns.
if (VT == MVT::Untyped) {
const SDNode *Node = RegDefPos.GetNode();
- unsigned Opcode = Node->getMachineOpcode();
+ // Special handling for CopyFromReg of untyped values.
+ if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
+ RegClass = RC->getID();
+ Cost = 1;
+ return;
+ }
+
+ unsigned Opcode = Node->getMachineOpcode();
if (Opcode == TargetOpcode::REG_SEQUENCE) {
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
@@ -312,6 +330,7 @@ void ScheduleDAGRRList::Schedule() {
LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
CallSeqEndForStart.clear();
+ assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
// Build the scheduling graph.
BuildSchedGraph(NULL);
@@ -725,6 +744,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
--NumLiveRegs;
LiveRegDefs[I->getReg()] = NULL;
LiveRegGens[I->getReg()] = NULL;
+ releaseInterferences(I->getReg());
}
}
// Release the special call resource dependence, if this is the beginning
@@ -739,6 +759,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
--NumLiveRegs;
LiveRegDefs[CallResource] = NULL;
LiveRegGens[CallResource] = NULL;
+ releaseInterferences(CallResource);
}
}
@@ -794,6 +815,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
--NumLiveRegs;
LiveRegDefs[I->getReg()] = NULL;
LiveRegGens[I->getReg()] = NULL;
+ releaseInterferences(I->getReg());
}
}
@@ -821,6 +843,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
--NumLiveRegs;
LiveRegDefs[CallResource] = NULL;
LiveRegGens[CallResource] = NULL;
+ releaseInterferences(CallResource);
}
}
@@ -1305,34 +1328,58 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
return !LRegs.empty();
}
+void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = Interferences.size(); i > 0; --i) {
+ SUnit *SU = Interferences[i-1];
+ LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
+ if (Reg) {
+ SmallVector<unsigned, 4> &LRegs = LRegsPos->second;
+ if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end())
+ continue;
+ }
+ SU->isPending = false;
+ // The interfering node may no longer be available due to backtracking.
+ // Furthermore, it may have been made available again, in which case it is
+ // now already in the AvailableQueue.
+ if (SU->isAvailable && !SU->NodeQueueId) {
+ DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
+ AvailableQueue->push(SU);
+ }
+ if (i < Interferences.size())
+ Interferences[i-1] = Interferences.back();
+ Interferences.pop_back();
+ LRegsMap.erase(LRegsPos);
+ }
+}
+
/// Return a node that can be scheduled in this cycle. Requirements:
/// (1) Ready: latency has been satisfied
/// (2) No Hazards: resources are available
/// (3) No Interferences: may unschedule to break register interferences.
SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
- SmallVector<SUnit*, 4> Interferences;
- DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
-
- SUnit *CurSU = AvailableQueue->pop();
+ SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop();
while (CurSU) {
SmallVector<unsigned, 4> LRegs;
if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
break;
- LRegsMap.insert(std::make_pair(CurSU, LRegs));
-
- CurSU->isPending = true; // This SU is not in AvailableQueue right now.
- Interferences.push_back(CurSU);
+ DEBUG(dbgs() << " Interfering reg " << TRI->getName(LRegs[0])
+ << " SU #" << CurSU->NodeNum << '\n');
+ std::pair<LRegsMapT::iterator, bool> LRegsPair =
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+ if (LRegsPair.second) {
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ Interferences.push_back(CurSU);
+ }
+ else {
+ assert(CurSU->isPending && "Intereferences are pending");
+ // Update the interference with current live regs.
+ LRegsPair.first->second = LRegs;
+ }
CurSU = AvailableQueue->pop();
}
- if (CurSU) {
- // Add the nodes that aren't ready back onto the available list.
- for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
- Interferences[i]->isPending = false;
- assert(Interferences[i]->isAvailable && "must still be available");
- AvailableQueue->push(Interferences[i]);
- }
+ if (CurSU)
return CurSU;
- }
// All candidates are delayed due to live physical reg dependencies.
// Try backtracking, code duplication, or inserting cross class copies
@@ -1353,6 +1400,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
}
}
if (!WillCreateCycle(TrySU, BtSU)) {
+ // BacktrackBottomUp mutates Interferences!
BacktrackBottomUp(TrySU, BtSU);
// Force the current node to be scheduled before the node that
@@ -1362,19 +1410,19 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
if (!BtSU->isPending)
AvailableQueue->remove(BtSU);
}
+ DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
+ << TrySU->NodeNum << ")\n");
AddPred(TrySU, SDep(BtSU, SDep::Artificial));
// If one or more successors has been unscheduled, then the current
- // node is no longer avaialable. Schedule a successor that's now
- // available instead.
- if (!TrySU->isAvailable) {
+ // node is no longer available.
+ if (!TrySU->isAvailable)
CurSU = AvailableQueue->pop();
- }
else {
+ AvailableQueue->remove(TrySU);
CurSU = TrySU;
- TrySU->isPending = false;
- Interferences.erase(Interferences.begin()+i);
}
+ // Interferences has been mutated. We must break.
break;
}
}
@@ -1425,17 +1473,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
TrySU->isAvailable = false;
CurSU = NewDef;
}
-
assert(CurSU && "Unable to resolve live physical register dependencies!");
-
- // Add the nodes that aren't ready back onto the available list.
- for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
- Interferences[i]->isPending = false;
- // May no longer be available due to backtracking.
- if (Interferences[i]->isAvailable) {
- AvailableQueue->push(Interferences[i]);
- }
- }
return CurSU;
}
@@ -1456,7 +1494,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
Sequence.reserve(SUnits.size());
- while (!AvailableQueue->empty()) {
+ while (!AvailableQueue->empty() || !Interferences.empty()) {
DEBUG(dbgs() << "\nExamining Available:\n";
AvailableQueue->dump(this));
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 76067a1..2ff37e0 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -15,8 +15,8 @@
#ifndef SCHEDULEDAGSDNODES_H
#define SCHEDULEDAGSDNODES_H
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/SelectionDAG.h"
namespace llvm {
/// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6c29c67..db8ae6e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -60,18 +60,6 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
return Res;
}
-static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unknown FP format");
- case MVT::f16: return &APFloat::IEEEhalf;
- case MVT::f32: return &APFloat::IEEEsingle;
- case MVT::f64: return &APFloat::IEEEdouble;
- case MVT::f80: return &APFloat::x87DoubleExtended;
- case MVT::f128: return &APFloat::IEEEquad;
- case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
- }
-}
-
// Default null implementations of the callbacks.
void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
@@ -95,7 +83,8 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
// convert modifies in place, so make a copy.
APFloat Val2 = APFloat(Val);
bool losesInfo;
- (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven,
&losesInfo);
return !losesInfo;
}
@@ -1081,7 +1070,7 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
EltVT==MVT::f16) {
bool ignored;
APFloat apf = APFloat(Val);
- apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+ apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
&ignored);
return getConstantFP(apf, VT, isTarget);
} else
@@ -2442,7 +2431,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
- APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
+ APFloat apf(EVTToAPFloatSemantics(VT),
+ APInt::getNullValue(VT.getSizeInBits()));
(void)apf.convertFromAPInt(Val,
Opcode==ISD::SINT_TO_FP,
APFloat::rmNearestTiesToEven);
@@ -2450,9 +2440,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
}
case ISD::BITCAST:
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
- return getConstantFP(APFloat(Val), VT);
+ return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT);
else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
- return getConstantFP(APFloat(Val), VT);
+ return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT);
break;
case ISD::BSWAP:
return getConstant(Val.byteSwap(), VT);
@@ -2499,7 +2489,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
// FIXME need to be more flexible about rounding mode.
- (void)V.convert(*EVTToAPFloatSemantics(VT),
+ (void)V.convert(EVTToAPFloatSemantics(VT),
APFloat::rmNearestTiesToEven, &ignored);
return getConstantFP(V, VT);
}
@@ -2690,44 +2680,117 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
return SDValue(N, 0);
}
-SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
- EVT VT,
- ConstantSDNode *Cst1,
- ConstantSDNode *Cst2) {
- const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
+ SDNode *Cst1, SDNode *Cst2) {
+ SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs;
+ SmallVector<SDValue, 4> Outputs;
+ EVT SVT = VT.getScalarType();
- switch (Opcode) {
- case ISD::ADD: return getConstant(C1 + C2, VT);
- case ISD::SUB: return getConstant(C1 - C2, VT);
- case ISD::MUL: return getConstant(C1 * C2, VT);
- case ISD::UDIV:
- if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);
- break;
- case ISD::UREM:
- if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);
- break;
- case ISD::SDIV:
- if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);
- break;
- case ISD::SREM:
- if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);
- break;
- case ISD::AND: return getConstant(C1 & C2, VT);
- case ISD::OR: return getConstant(C1 | C2, VT);
- case ISD::XOR: return getConstant(C1 ^ C2, VT);
- case ISD::SHL: return getConstant(C1 << C2, VT);
- case ISD::SRL: return getConstant(C1.lshr(C2), VT);
- case ISD::SRA: return getConstant(C1.ashr(C2), VT);
- case ISD::ROTL: return getConstant(C1.rotl(C2), VT);
- case ISD::ROTR: return getConstant(C1.rotr(C2), VT);
- default: break;
+ ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1);
+ ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2);
+ if (Scalar1 && Scalar2) {
+ // Scalar instruction.
+ Inputs.push_back(std::make_pair(Scalar1, Scalar2));
+ } else {
+ // For vectors extract each constant element into Inputs so we can constant
+ // fold them individually.
+ BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
+ BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
+ if (!BV1 || !BV2)
+ return SDValue();
+
+ assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
+
+ for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
+ ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
+ ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
+ if (!V1 || !V2) // Not a constant, bail.
+ return SDValue();
+
+ // Avoid BUILD_VECTOR nodes that perform implicit truncation.
+ // FIXME: This is valid and could be handled by truncating the APInts.
+ if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
+ return SDValue();
+
+ Inputs.push_back(std::make_pair(V1, V2));
+ }
}
- return SDValue();
+ // We have a number of constant values, constant fold them element by element.
+ for (unsigned I = 0, E = Inputs.size(); I != E; ++I) {
+ const APInt &C1 = Inputs[I].first->getAPIntValue();
+ const APInt &C2 = Inputs[I].second->getAPIntValue();
+
+ switch (Opcode) {
+ case ISD::ADD:
+ Outputs.push_back(getConstant(C1 + C2, SVT));
+ break;
+ case ISD::SUB:
+ Outputs.push_back(getConstant(C1 - C2, SVT));
+ break;
+ case ISD::MUL:
+ Outputs.push_back(getConstant(C1 * C2, SVT));
+ break;
+ case ISD::UDIV:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.udiv(C2), SVT));
+ break;
+ case ISD::UREM:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.urem(C2), SVT));
+ break;
+ case ISD::SDIV:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.sdiv(C2), SVT));
+ break;
+ case ISD::SREM:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.srem(C2), SVT));
+ break;
+ case ISD::AND:
+ Outputs.push_back(getConstant(C1 & C2, SVT));
+ break;
+ case ISD::OR:
+ Outputs.push_back(getConstant(C1 | C2, SVT));
+ break;
+ case ISD::XOR:
+ Outputs.push_back(getConstant(C1 ^ C2, SVT));
+ break;
+ case ISD::SHL:
+ Outputs.push_back(getConstant(C1 << C2, SVT));
+ break;
+ case ISD::SRL:
+ Outputs.push_back(getConstant(C1.lshr(C2), SVT));
+ break;
+ case ISD::SRA:
+ Outputs.push_back(getConstant(C1.ashr(C2), SVT));
+ break;
+ case ISD::ROTL:
+ Outputs.push_back(getConstant(C1.rotl(C2), SVT));
+ break;
+ case ISD::ROTR:
+ Outputs.push_back(getConstant(C1.rotr(C2), SVT));
+ break;
+ default:
+ return SDValue();
+ }
+ }
+
+ // Handle the scalar case first.
+ if (Outputs.size() == 1)
+ return Outputs.back();
+
+ // Otherwise build a big vector out of the scalar elements we generated.
+ return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(),
+ Outputs.size());
}
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
- SDValue N1, SDValue N2) {
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1,
+ SDValue N2) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
switch (Opcode) {
@@ -3023,16 +3086,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
}
}
- if (N1C) {
- if (N2C) {
- SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);
- if (SV.getNode()) return SV;
- } else { // Cannonicalize constant to RHS if commutative
- if (isCommutativeBinOp(Opcode)) {
- std::swap(N1C, N2C);
- std::swap(N1, N2);
- }
- }
+ // Perform trivial constant folding.
+ SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode());
+ if (SV.getNode()) return SV;
+
+ // Canonicalize constant to RHS if commutative.
+ if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
}
// Constant fold FP operations.
@@ -3040,7 +3101,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
if (N1CFP) {
if (!N2CFP && isCommutativeBinOp(Opcode)) {
- // Cannonicalize constant to RHS if commutative
+ // Canonicalize constant to RHS if commutative.
std::swap(N1CFP, N2CFP);
std::swap(N1, N2);
} else if (N2CFP) {
@@ -3084,7 +3145,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
// FIXME need to be more flexible about rounding mode.
- (void)V.convert(*EVTToAPFloatSemantics(VT),
+ (void)V.convert(EVTToAPFloatSemantics(VT),
APFloat::rmNearestTiesToEven, &ignored);
return getConstantFP(V, VT);
}
@@ -3316,17 +3377,6 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
&ArgChains[0], ArgChains.size());
}
-/// SplatByte - Distribute ByteVal over NumBits bits.
-static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
- APInt Val = APInt(NumBits, ByteVal);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Val = (Val << Shift) | Val;
- Shift <<= 1;
- }
- return Val;
-}
-
/// getMemsetValue - Vectorized representation of the memset value
/// operand.
static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
@@ -3335,17 +3385,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
unsigned NumBits = VT.getScalarType().getSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
- APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
+ assert(C->getAPIntValue().getBitWidth() == 8);
+ APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
if (VT.isInteger())
return DAG.getConstant(Val, VT);
- return DAG.getConstantFP(APFloat(Val), VT);
+ return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT);
}
Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
if (NumBits > 8) {
// Use a multiplication with 0x010101... to extend the input to the
// required length.
- APInt Magic = SplatByte(NumBits, 0x01);
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
}
@@ -3374,10 +3425,11 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
}
assert(!VT.isVector() && "Can't handle vector type here!");
- unsigned NumVTBytes = VT.getSizeInBits() / 8;
+ unsigned NumVTBits = VT.getSizeInBits();
+ unsigned NumVTBytes = NumVTBits / 8;
unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
- APInt Val(NumBytes*8, 0);
+ APInt Val(NumVTBits, 0);
if (TLI.isLittleEndian()) {
for (unsigned i = 0; i != NumBytes; ++i)
Val |= (uint64_t)(unsigned char)Str[i] << i*8;
@@ -3570,6 +3622,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ if (!TRI->needsStackRealignment(MF))
+ while (NewAlign > Align &&
+ TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign))
+ NewAlign /= 2;
+
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -3806,6 +3867,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
unsigned Align, bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3873,6 +3935,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
unsigned Align, bool isVol,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
// Check to see if we should lower the memmove to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3927,6 +3990,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol,
MachinePointerInfo DstPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
// Check to see if we should lower the memset to stores first.
// For cases within the target-specified limits, this is the best choice.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8c22db3..b8ab2a9 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -15,9 +15,9 @@
#include "SelectionDAGBuilder.h"
#include "SDNodeDbgValue.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
@@ -3519,7 +3519,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
EVT VT = TLI.getValueType(I.getType());
- if (I.getAlignment() * 8 < VT.getSizeInBits())
+ if (I.getAlignment() < VT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
SDValue L =
@@ -3549,7 +3549,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
EVT VT = TLI.getValueType(I.getValueOperand()->getType());
- if (I.getAlignment() * 8 < VT.getSizeInBits())
+ if (I.getAlignment() < VT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
if (TLI.getInsertFencesForAtomic())
@@ -3663,7 +3663,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
///
/// Op = (Op & 0x007fffff) | 0x3f800000;
///
-/// where Op is the hexidecimal representation of floating point value.
+/// where Op is the hexadecimal representation of floating point value.
static SDValue
GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
@@ -3677,7 +3677,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
///
/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
///
-/// where Op is the hexidecimal representation of floating point value.
+/// where Op is the hexadecimal representation of floating point value.
static SDValue
GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
DebugLoc dl) {
@@ -3693,7 +3693,8 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
/// getF32Constant - Get 32-bit floating point constant.
static SDValue
getF32Constant(SelectionDAG &DAG, unsigned Flt) {
- return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
+ return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)),
+ MVT::f32);
}
/// expandExp - Lower an exp intrinsic. Handles the special sequences for
@@ -4466,6 +4467,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ if (!Align)
+ Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
MachinePointerInfo(I.getArgOperand(0)),
@@ -4482,6 +4485,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ if (!Align)
+ Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
MachinePointerInfo(I.getArgOperand(0))));
@@ -4499,6 +4504,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ if (!Align)
+ Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
MachinePointerInfo(I.getArgOperand(0)),
@@ -5168,6 +5175,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2);
DAG.setRoot(Res);
}
+ return 0;
}
case Intrinsic::invariant_start:
// Discard region information.
@@ -5276,8 +5284,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// Check if target-independent constraints permit a tail call here.
// Target-dependent constraints are checked within TLI.LowerCallTo.
- if (isTailCall &&
- !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
+ if (isTailCall && !isInTailCallPosition(CS, TLI))
isTailCall = false;
TargetLowering::
@@ -5947,6 +5954,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
// If this is a memory input, and if the operand is not indirect, do what we
// need to to provide an address for the memory input.
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
@@ -6050,6 +6061,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
ExtraInfo |= InlineAsm::Extra_MayLoad;
else if (OpInfo.Type == InlineAsm::isOutput)
ExtraInfo |= InlineAsm::Extra_MayStore;
+ else if (OpInfo.Type == InlineAsm::isClobber)
+ ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
}
}
@@ -6584,10 +6597,6 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
const DataLayout *TD = TLI.getDataLayout();
SmallVector<ISD::InputArg, 16> Ins;
- // Check whether the function can return without sret-demotion.
- SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
-
if (!FuncInfo->CanLowerReturn) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 5701b13..3b5823b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -140,6 +140,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FSQRT: return "fsqrt";
case ISD::FSIN: return "fsin";
case ISD::FCOS: return "fcos";
+ case ISD::FSINCOS: return "fsincos";
case ISD::FTRUNC: return "ftrunc";
case ISD::FFLOOR: return "ffloor";
case ISD::FCEIL: return "fceil";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index d4e9a50..acae58c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -144,7 +144,12 @@ EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
"instruction selector"));
static cl::opt<bool>
EnableFastISelAbort("fast-isel-abort", cl::Hidden,
- cl::desc("Enable abort calls when \"fast\" instruction fails"));
+ cl::desc("Enable abort calls when \"fast\" instruction selection "
+ "fails to lower an instruction"));
+static cl::opt<bool>
+EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction selection "
+ "fails to lower a formal argument"));
static cl::opt<bool>
UseMBPI("use-mbpi",
@@ -354,6 +359,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TTI = getAnalysisIfAvailable<TargetTransformInfo>();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
+ TargetSubtargetInfo &ST =
+ const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>());
+ ST.resetSubtargetFeatures(MF);
+
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
@@ -368,6 +377,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
SDB->init(GFI, *AA, LibInfo);
+ MF->setHasMSInlineAsm(false);
SelectAllBasicBlocks(Fn);
// If the first basic block in the function has live ins that need to be
@@ -438,24 +448,26 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Determine if there are any calls in this machine function.
MachineFrameInfo *MFI = MF->getFrameInfo();
- if (!MFI->hasCalls()) {
- for (MachineFunction::const_iterator
- I = MF->begin(), E = MF->end(); I != E; ++I) {
- const MachineBasicBlock *MBB = I;
- for (MachineBasicBlock::const_iterator
- II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
- const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode());
-
- if ((MCID.isCall() && !MCID.isReturn()) ||
- II->isStackAligningInlineAsm()) {
- MFI->setHasCalls(true);
- goto done;
- }
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+ ++I) {
+
+ if (MFI->hasCalls() && MF->hasMSInlineAsm())
+ break;
+
+ const MachineBasicBlock *MBB = I;
+ for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end();
+ II != IE; ++II) {
+ const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode());
+ if ((MCID.isCall() && !MCID.isReturn()) ||
+ II->isStackAligningInlineAsm()) {
+ MFI->setHasCalls(true);
+ }
+ if (II->isMSInlineAsm()) {
+ MF->setHasMSInlineAsm(true);
}
}
}
- done:
// Determine if there is a call to setjmp in the machine function.
MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
@@ -1032,10 +1044,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (FuncInfo->MBB->isLandingPad())
PrepareEHLandingPad();
- // Lower any arguments needed in this block if this is the entry block.
- if (LLVMBB == &Fn.getEntryBlock())
- LowerArguments(LLVMBB);
-
// Before doing SelectionDAG ISel, see if FastISel has been requested.
if (FastIS) {
FastIS->startNewBlock();
@@ -1043,9 +1051,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Emit code for any incoming arguments. This must happen before
// beginning FastISel on the entry block.
if (LLVMBB == &Fn.getEntryBlock()) {
- CurDAG->setRoot(SDB->getControlRoot());
- SDB->clear();
- CodeGenAndEmitDAG();
+ // Lower any arguments needed in this block if this is the entry block.
+ if (!FastIS->LowerArguments()) {
+
+ if (EnableFastISelAbortArgs)
+ // The "fast" selector couldn't lower these arguments. For the
+ // purpose of debugging, just abort.
+ llvm_unreachable("FastISel didn't lower all arguments");
+
+ // Call target indepedent SDISel argument lowering code if the target
+ // specific routine is not successful.
+ LowerArguments(LLVMBB);
+ CurDAG->setRoot(SDB->getControlRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
// If we inserted any instructions at the beginning, make a note of
// where they are, so we can be sure to emit subsequent instructions
@@ -1156,6 +1176,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
FastIS->recomputeInsertPt();
+ } else {
+ // Lower any arguments needed in this block if this is the entry block.
+ if (LLVMBB == &Fn.getEntryBlock())
+ LowerArguments(LLVMBB);
}
if (Begin != BI)
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 34f3bc9..f5fc66c 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -33,1114 +33,168 @@
#include <cctype>
using namespace llvm;
-/// InitLibcallNames - Set default libcall names.
-///
-static void InitLibcallNames(const char **Names) {
- Names[RTLIB::SHL_I16] = "__ashlhi3";
- Names[RTLIB::SHL_I32] = "__ashlsi3";
- Names[RTLIB::SHL_I64] = "__ashldi3";
- Names[RTLIB::SHL_I128] = "__ashlti3";
- Names[RTLIB::SRL_I16] = "__lshrhi3";
- Names[RTLIB::SRL_I32] = "__lshrsi3";
- Names[RTLIB::SRL_I64] = "__lshrdi3";
- Names[RTLIB::SRL_I128] = "__lshrti3";
- Names[RTLIB::SRA_I16] = "__ashrhi3";
- Names[RTLIB::SRA_I32] = "__ashrsi3";
- Names[RTLIB::SRA_I64] = "__ashrdi3";
- Names[RTLIB::SRA_I128] = "__ashrti3";
- Names[RTLIB::MUL_I8] = "__mulqi3";
- Names[RTLIB::MUL_I16] = "__mulhi3";
- Names[RTLIB::MUL_I32] = "__mulsi3";
- Names[RTLIB::MUL_I64] = "__muldi3";
- Names[RTLIB::MUL_I128] = "__multi3";
- Names[RTLIB::MULO_I32] = "__mulosi4";
- Names[RTLIB::MULO_I64] = "__mulodi4";
- Names[RTLIB::MULO_I128] = "__muloti4";
- Names[RTLIB::SDIV_I8] = "__divqi3";
- Names[RTLIB::SDIV_I16] = "__divhi3";
- Names[RTLIB::SDIV_I32] = "__divsi3";
- Names[RTLIB::SDIV_I64] = "__divdi3";
- Names[RTLIB::SDIV_I128] = "__divti3";
- Names[RTLIB::UDIV_I8] = "__udivqi3";
- Names[RTLIB::UDIV_I16] = "__udivhi3";
- Names[RTLIB::UDIV_I32] = "__udivsi3";
- Names[RTLIB::UDIV_I64] = "__udivdi3";
- Names[RTLIB::UDIV_I128] = "__udivti3";
- Names[RTLIB::SREM_I8] = "__modqi3";
- Names[RTLIB::SREM_I16] = "__modhi3";
- Names[RTLIB::SREM_I32] = "__modsi3";
- Names[RTLIB::SREM_I64] = "__moddi3";
- Names[RTLIB::SREM_I128] = "__modti3";
- Names[RTLIB::UREM_I8] = "__umodqi3";
- Names[RTLIB::UREM_I16] = "__umodhi3";
- Names[RTLIB::UREM_I32] = "__umodsi3";
- Names[RTLIB::UREM_I64] = "__umoddi3";
- Names[RTLIB::UREM_I128] = "__umodti3";
-
- // These are generally not available.
- Names[RTLIB::SDIVREM_I8] = 0;
- Names[RTLIB::SDIVREM_I16] = 0;
- Names[RTLIB::SDIVREM_I32] = 0;
- Names[RTLIB::SDIVREM_I64] = 0;
- Names[RTLIB::SDIVREM_I128] = 0;
- Names[RTLIB::UDIVREM_I8] = 0;
- Names[RTLIB::UDIVREM_I16] = 0;
- Names[RTLIB::UDIVREM_I32] = 0;
- Names[RTLIB::UDIVREM_I64] = 0;
- Names[RTLIB::UDIVREM_I128] = 0;
-
- Names[RTLIB::NEG_I32] = "__negsi2";
- Names[RTLIB::NEG_I64] = "__negdi2";
- Names[RTLIB::ADD_F32] = "__addsf3";
- Names[RTLIB::ADD_F64] = "__adddf3";
- Names[RTLIB::ADD_F80] = "__addxf3";
- Names[RTLIB::ADD_F128] = "__addtf3";
- Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
- Names[RTLIB::SUB_F32] = "__subsf3";
- Names[RTLIB::SUB_F64] = "__subdf3";
- Names[RTLIB::SUB_F80] = "__subxf3";
- Names[RTLIB::SUB_F128] = "__subtf3";
- Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
- Names[RTLIB::MUL_F32] = "__mulsf3";
- Names[RTLIB::MUL_F64] = "__muldf3";
- Names[RTLIB::MUL_F80] = "__mulxf3";
- Names[RTLIB::MUL_F128] = "__multf3";
- Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
- Names[RTLIB::DIV_F32] = "__divsf3";
- Names[RTLIB::DIV_F64] = "__divdf3";
- Names[RTLIB::DIV_F80] = "__divxf3";
- Names[RTLIB::DIV_F128] = "__divtf3";
- Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
- Names[RTLIB::REM_F32] = "fmodf";
- Names[RTLIB::REM_F64] = "fmod";
- Names[RTLIB::REM_F80] = "fmodl";
- Names[RTLIB::REM_F128] = "fmodl";
- Names[RTLIB::REM_PPCF128] = "fmodl";
- Names[RTLIB::FMA_F32] = "fmaf";
- Names[RTLIB::FMA_F64] = "fma";
- Names[RTLIB::FMA_F80] = "fmal";
- Names[RTLIB::FMA_F128] = "fmal";
- Names[RTLIB::FMA_PPCF128] = "fmal";
- Names[RTLIB::POWI_F32] = "__powisf2";
- Names[RTLIB::POWI_F64] = "__powidf2";
- Names[RTLIB::POWI_F80] = "__powixf2";
- Names[RTLIB::POWI_F128] = "__powitf2";
- Names[RTLIB::POWI_PPCF128] = "__powitf2";
- Names[RTLIB::SQRT_F32] = "sqrtf";
- Names[RTLIB::SQRT_F64] = "sqrt";
- Names[RTLIB::SQRT_F80] = "sqrtl";
- Names[RTLIB::SQRT_F128] = "sqrtl";
- Names[RTLIB::SQRT_PPCF128] = "sqrtl";
- Names[RTLIB::LOG_F32] = "logf";
- Names[RTLIB::LOG_F64] = "log";
- Names[RTLIB::LOG_F80] = "logl";
- Names[RTLIB::LOG_F128] = "logl";
- Names[RTLIB::LOG_PPCF128] = "logl";
- Names[RTLIB::LOG2_F32] = "log2f";
- Names[RTLIB::LOG2_F64] = "log2";
- Names[RTLIB::LOG2_F80] = "log2l";
- Names[RTLIB::LOG2_F128] = "log2l";
- Names[RTLIB::LOG2_PPCF128] = "log2l";
- Names[RTLIB::LOG10_F32] = "log10f";
- Names[RTLIB::LOG10_F64] = "log10";
- Names[RTLIB::LOG10_F80] = "log10l";
- Names[RTLIB::LOG10_F128] = "log10l";
- Names[RTLIB::LOG10_PPCF128] = "log10l";
- Names[RTLIB::EXP_F32] = "expf";
- Names[RTLIB::EXP_F64] = "exp";
- Names[RTLIB::EXP_F80] = "expl";
- Names[RTLIB::EXP_F128] = "expl";
- Names[RTLIB::EXP_PPCF128] = "expl";
- Names[RTLIB::EXP2_F32] = "exp2f";
- Names[RTLIB::EXP2_F64] = "exp2";
- Names[RTLIB::EXP2_F80] = "exp2l";
- Names[RTLIB::EXP2_F128] = "exp2l";
- Names[RTLIB::EXP2_PPCF128] = "exp2l";
- Names[RTLIB::SIN_F32] = "sinf";
- Names[RTLIB::SIN_F64] = "sin";
- Names[RTLIB::SIN_F80] = "sinl";
- Names[RTLIB::SIN_F128] = "sinl";
- Names[RTLIB::SIN_PPCF128] = "sinl";
- Names[RTLIB::COS_F32] = "cosf";
- Names[RTLIB::COS_F64] = "cos";
- Names[RTLIB::COS_F80] = "cosl";
- Names[RTLIB::COS_F128] = "cosl";
- Names[RTLIB::COS_PPCF128] = "cosl";
- Names[RTLIB::POW_F32] = "powf";
- Names[RTLIB::POW_F64] = "pow";
- Names[RTLIB::POW_F80] = "powl";
- Names[RTLIB::POW_F128] = "powl";
- Names[RTLIB::POW_PPCF128] = "powl";
- Names[RTLIB::CEIL_F32] = "ceilf";
- Names[RTLIB::CEIL_F64] = "ceil";
- Names[RTLIB::CEIL_F80] = "ceill";
- Names[RTLIB::CEIL_F128] = "ceill";
- Names[RTLIB::CEIL_PPCF128] = "ceill";
- Names[RTLIB::TRUNC_F32] = "truncf";
- Names[RTLIB::TRUNC_F64] = "trunc";
- Names[RTLIB::TRUNC_F80] = "truncl";
- Names[RTLIB::TRUNC_F128] = "truncl";
- Names[RTLIB::TRUNC_PPCF128] = "truncl";
- Names[RTLIB::RINT_F32] = "rintf";
- Names[RTLIB::RINT_F64] = "rint";
- Names[RTLIB::RINT_F80] = "rintl";
- Names[RTLIB::RINT_F128] = "rintl";
- Names[RTLIB::RINT_PPCF128] = "rintl";
- Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
- Names[RTLIB::NEARBYINT_F64] = "nearbyint";
- Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
- Names[RTLIB::NEARBYINT_F128] = "nearbyintl";
- Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
- Names[RTLIB::FLOOR_F32] = "floorf";
- Names[RTLIB::FLOOR_F64] = "floor";
- Names[RTLIB::FLOOR_F80] = "floorl";
- Names[RTLIB::FLOOR_F128] = "floorl";
- Names[RTLIB::FLOOR_PPCF128] = "floorl";
- Names[RTLIB::COPYSIGN_F32] = "copysignf";
- Names[RTLIB::COPYSIGN_F64] = "copysign";
- Names[RTLIB::COPYSIGN_F80] = "copysignl";
- Names[RTLIB::COPYSIGN_F128] = "copysignl";
- Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
- Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2";
- Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2";
- Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
- Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
- Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
- Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
- Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
- Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2";
- Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
- Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
- Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2";
- Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
- Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
- Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
- Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
- Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
- Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
- Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
- Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
- Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
- Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
- Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
- Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
- Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
- Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
- Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi";
- Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi";
- Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti";
- Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
- Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
- Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
- Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
- Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
- Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
- Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
- Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
- Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
- Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
- Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
- Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
- Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
- Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
- Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
- Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
- Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi";
- Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi";
- Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti";
- Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
- Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
- Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
- Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
- Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
- Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
- Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf";
- Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
- Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
- Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
- Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
- Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf";
- Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
- Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
- Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
- Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
- Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf";
- Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
- Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
- Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
- Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
- Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf";
- Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
- Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
- Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
- Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
- Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf";
- Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
- Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
- Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
- Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
- Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf";
- Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
- Names[RTLIB::OEQ_F32] = "__eqsf2";
- Names[RTLIB::OEQ_F64] = "__eqdf2";
- Names[RTLIB::OEQ_F128] = "__eqtf2";
- Names[RTLIB::UNE_F32] = "__nesf2";
- Names[RTLIB::UNE_F64] = "__nedf2";
- Names[RTLIB::UNE_F128] = "__netf2";
- Names[RTLIB::OGE_F32] = "__gesf2";
- Names[RTLIB::OGE_F64] = "__gedf2";
- Names[RTLIB::OGE_F128] = "__getf2";
- Names[RTLIB::OLT_F32] = "__ltsf2";
- Names[RTLIB::OLT_F64] = "__ltdf2";
- Names[RTLIB::OLT_F128] = "__lttf2";
- Names[RTLIB::OLE_F32] = "__lesf2";
- Names[RTLIB::OLE_F64] = "__ledf2";
- Names[RTLIB::OLE_F128] = "__letf2";
- Names[RTLIB::OGT_F32] = "__gtsf2";
- Names[RTLIB::OGT_F64] = "__gtdf2";
- Names[RTLIB::OGT_F128] = "__gttf2";
- Names[RTLIB::UO_F32] = "__unordsf2";
- Names[RTLIB::UO_F64] = "__unorddf2";
- Names[RTLIB::UO_F128] = "__unordtf2";
- Names[RTLIB::O_F32] = "__unordsf2";
- Names[RTLIB::O_F64] = "__unorddf2";
- Names[RTLIB::O_F128] = "__unordtf2";
- Names[RTLIB::MEMCPY] = "memcpy";
- Names[RTLIB::MEMMOVE] = "memmove";
- Names[RTLIB::MEMSET] = "memset";
- Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
- Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
- Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
- Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
- Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
- Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
- Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
- Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
- Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
- Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
- Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
- Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
- Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
- Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
- Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
- Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
- Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
- Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
- Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
- Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
- Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
- Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
- Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
- Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
- Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
- Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
- Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
- Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4";
- Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
- Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
- Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
- Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
- Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
-}
-
-/// InitLibcallCallingConvs - Set default libcall CallingConvs.
-///
-static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
- for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
- CCs[i] = CallingConv::C;
- }
-}
-
-/// getFPEXT - Return the FPEXT_*_* value for the given types, or
-/// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
- if (OpVT == MVT::f32) {
- if (RetVT == MVT::f64)
- return FPEXT_F32_F64;
- if (RetVT == MVT::f128)
- return FPEXT_F32_F128;
- } else if (OpVT == MVT::f64) {
- if (RetVT == MVT::f128)
- return FPEXT_F64_F128;
- }
-
- return UNKNOWN_LIBCALL;
-}
-
-/// getFPROUND - Return the FPROUND_*_* value for the given types, or
-/// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
- if (RetVT == MVT::f32) {
- if (OpVT == MVT::f64)
- return FPROUND_F64_F32;
- if (OpVT == MVT::f80)
- return FPROUND_F80_F32;
- if (OpVT == MVT::f128)
- return FPROUND_F128_F32;
- if (OpVT == MVT::ppcf128)
- return FPROUND_PPCF128_F32;
- } else if (RetVT == MVT::f64) {
- if (OpVT == MVT::f80)
- return FPROUND_F80_F64;
- if (OpVT == MVT::f128)
- return FPROUND_F128_F64;
- if (OpVT == MVT::ppcf128)
- return FPROUND_PPCF128_F64;
- }
-
- return UNKNOWN_LIBCALL;
-}
-
-/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
-/// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
- if (OpVT == MVT::f32) {
- if (RetVT == MVT::i8)
- return FPTOSINT_F32_I8;
- if (RetVT == MVT::i16)
- return FPTOSINT_F32_I16;
- if (RetVT == MVT::i32)
- return FPTOSINT_F32_I32;
- if (RetVT == MVT::i64)
- return FPTOSINT_F32_I64;
- if (RetVT == MVT::i128)
- return FPTOSINT_F32_I128;
- } else if (OpVT == MVT::f64) {
- if (RetVT == MVT::i8)
- return FPTOSINT_F64_I8;
- if (RetVT == MVT::i16)
- return FPTOSINT_F64_I16;
- if (RetVT == MVT::i32)
- return FPTOSINT_F64_I32;
- if (RetVT == MVT::i64)
- return FPTOSINT_F64_I64;
- if (RetVT == MVT::i128)
- return FPTOSINT_F64_I128;
- } else if (OpVT == MVT::f80) {
- if (RetVT == MVT::i32)
- return FPTOSINT_F80_I32;
- if (RetVT == MVT::i64)
- return FPTOSINT_F80_I64;
- if (RetVT == MVT::i128)
- return FPTOSINT_F80_I128;
- } else if (OpVT == MVT::f128) {
- if (RetVT == MVT::i32)
- return FPTOSINT_F128_I32;
- if (RetVT == MVT::i64)
- return FPTOSINT_F128_I64;
- if (RetVT == MVT::i128)
- return FPTOSINT_F128_I128;
- } else if (OpVT == MVT::ppcf128) {
- if (RetVT == MVT::i32)
- return FPTOSINT_PPCF128_I32;
- if (RetVT == MVT::i64)
- return FPTOSINT_PPCF128_I64;
- if (RetVT == MVT::i128)
- return FPTOSINT_PPCF128_I128;
- }
- return UNKNOWN_LIBCALL;
-}
-
-/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
-/// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
- if (OpVT == MVT::f32) {
- if (RetVT == MVT::i8)
- return FPTOUINT_F32_I8;
- if (RetVT == MVT::i16)
- return FPTOUINT_F32_I16;
- if (RetVT == MVT::i32)
- return FPTOUINT_F32_I32;
- if (RetVT == MVT::i64)
- return FPTOUINT_F32_I64;
- if (RetVT == MVT::i128)
- return FPTOUINT_F32_I128;
- } else if (OpVT == MVT::f64) {
- if (RetVT == MVT::i8)
- return FPTOUINT_F64_I8;
- if (RetVT == MVT::i16)
- return FPTOUINT_F64_I16;
- if (RetVT == MVT::i32)
- return FPTOUINT_F64_I32;
- if (RetVT == MVT::i64)
- return FPTOUINT_F64_I64;
- if (RetVT == MVT::i128)
- return FPTOUINT_F64_I128;
- } else if (OpVT == MVT::f80) {
- if (RetVT == MVT::i32)
- return FPTOUINT_F80_I32;
- if (RetVT == MVT::i64)
- return FPTOUINT_F80_I64;
- if (RetVT == MVT::i128)
- return FPTOUINT_F80_I128;
- } else if (OpVT == MVT::f128) {
- if (RetVT == MVT::i32)
- return FPTOUINT_F128_I32;
- if (RetVT == MVT::i64)
- return FPTOUINT_F128_I64;
- if (RetVT == MVT::i128)
- return FPTOUINT_F128_I128;
- } else if (OpVT == MVT::ppcf128) {
- if (RetVT == MVT::i32)
- return FPTOUINT_PPCF128_I32;
- if (RetVT == MVT::i64)
- return FPTOUINT_PPCF128_I64;
- if (RetVT == MVT::i128)
- return FPTOUINT_PPCF128_I128;
- }
- return UNKNOWN_LIBCALL;
-}
-
-/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
-/// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
- if (OpVT == MVT::i32) {
- if (RetVT == MVT::f32)
- return SINTTOFP_I32_F32;
- if (RetVT == MVT::f64)
- return SINTTOFP_I32_F64;
- if (RetVT == MVT::f80)
- return SINTTOFP_I32_F80;
- if (RetVT == MVT::f128)
- return SINTTOFP_I32_F128;
- if (RetVT == MVT::ppcf128)
- return SINTTOFP_I32_PPCF128;
- } else if (OpVT == MVT::i64) {
- if (RetVT == MVT::f32)
- return SINTTOFP_I64_F32;
- if (RetVT == MVT::f64)
- return SINTTOFP_I64_F64;
- if (RetVT == MVT::f80)
- return SINTTOFP_I64_F80;
- if (RetVT == MVT::f128)
- return SINTTOFP_I64_F128;
- if (RetVT == MVT::ppcf128)
- return SINTTOFP_I64_PPCF128;
- } else if (OpVT == MVT::i128) {
- if (RetVT == MVT::f32)
- return SINTTOFP_I128_F32;
- if (RetVT == MVT::f64)
- return SINTTOFP_I128_F64;
- if (RetVT == MVT::f80)
- return SINTTOFP_I128_F80;
- if (RetVT == MVT::f128)
- return SINTTOFP_I128_F128;
- if (RetVT == MVT::ppcf128)
- return SINTTOFP_I128_PPCF128;
- }
- return UNKNOWN_LIBCALL;
-}
-
-/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
-/// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
- if (OpVT == MVT::i32) {
- if (RetVT == MVT::f32)
- return UINTTOFP_I32_F32;
- if (RetVT == MVT::f64)
- return UINTTOFP_I32_F64;
- if (RetVT == MVT::f80)
- return UINTTOFP_I32_F80;
- if (RetVT == MVT::f128)
- return UINTTOFP_I32_F128;
- if (RetVT == MVT::ppcf128)
- return UINTTOFP_I32_PPCF128;
- } else if (OpVT == MVT::i64) {
- if (RetVT == MVT::f32)
- return UINTTOFP_I64_F32;
- if (RetVT == MVT::f64)
- return UINTTOFP_I64_F64;
- if (RetVT == MVT::f80)
- return UINTTOFP_I64_F80;
- if (RetVT == MVT::f128)
- return UINTTOFP_I64_F128;
- if (RetVT == MVT::ppcf128)
- return UINTTOFP_I64_PPCF128;
- } else if (OpVT == MVT::i128) {
- if (RetVT == MVT::f32)
- return UINTTOFP_I128_F32;
- if (RetVT == MVT::f64)
- return UINTTOFP_I128_F64;
- if (RetVT == MVT::f80)
- return UINTTOFP_I128_F80;
- if (RetVT == MVT::f128)
- return UINTTOFP_I128_F128;
- if (RetVT == MVT::ppcf128)
- return UINTTOFP_I128_PPCF128;
- }
- return UNKNOWN_LIBCALL;
-}
-
-/// InitCmpLibcallCCs - Set default comparison libcall CC.
-///
-static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
- memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
- CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
- CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
- CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
- CCs[RTLIB::UNE_F32] = ISD::SETNE;
- CCs[RTLIB::UNE_F64] = ISD::SETNE;
- CCs[RTLIB::UNE_F128] = ISD::SETNE;
- CCs[RTLIB::OGE_F32] = ISD::SETGE;
- CCs[RTLIB::OGE_F64] = ISD::SETGE;
- CCs[RTLIB::OGE_F128] = ISD::SETGE;
- CCs[RTLIB::OLT_F32] = ISD::SETLT;
- CCs[RTLIB::OLT_F64] = ISD::SETLT;
- CCs[RTLIB::OLT_F128] = ISD::SETLT;
- CCs[RTLIB::OLE_F32] = ISD::SETLE;
- CCs[RTLIB::OLE_F64] = ISD::SETLE;
- CCs[RTLIB::OLE_F128] = ISD::SETLE;
- CCs[RTLIB::OGT_F32] = ISD::SETGT;
- CCs[RTLIB::OGT_F64] = ISD::SETGT;
- CCs[RTLIB::OGT_F128] = ISD::SETGT;
- CCs[RTLIB::UO_F32] = ISD::SETNE;
- CCs[RTLIB::UO_F64] = ISD::SETNE;
- CCs[RTLIB::UO_F128] = ISD::SETNE;
- CCs[RTLIB::O_F32] = ISD::SETEQ;
- CCs[RTLIB::O_F64] = ISD::SETEQ;
- CCs[RTLIB::O_F128] = ISD::SETEQ;
-}
-
/// NOTE: The constructor takes ownership of TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm,
const TargetLoweringObjectFile *tlof)
- : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
- // All operations default to being supported.
- memset(OpActions, 0, sizeof(OpActions));
- memset(LoadExtActions, 0, sizeof(LoadExtActions));
- memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
- memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
- memset(CondCodeActions, 0, sizeof(CondCodeActions));
-
- // Set default actions for various operations.
- for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
- // Default all indexed load / store to expand.
- for (unsigned IM = (unsigned)ISD::PRE_INC;
- IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
- setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
- setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
- }
-
- // These operations default to expand.
- setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
- }
+ : TargetLoweringBase(tm, tlof) {}
- // Most targets ignore the @llvm.prefetch intrinsic.
- setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
-
- // ConstantFP nodes default to expand. Targets can either change this to
- // Legal, in which case all fp constants are legal, or use isFPImmLegal()
- // to optimize expansions for certain constants.
- setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
-
- // These library functions default to expand.
- setOperationAction(ISD::FLOG , MVT::f16, Expand);
- setOperationAction(ISD::FLOG2, MVT::f16, Expand);
- setOperationAction(ISD::FLOG10, MVT::f16, Expand);
- setOperationAction(ISD::FEXP , MVT::f16, Expand);
- setOperationAction(ISD::FEXP2, MVT::f16, Expand);
- setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
- setOperationAction(ISD::FCEIL, MVT::f16, Expand);
- setOperationAction(ISD::FRINT, MVT::f16, Expand);
- setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
- setOperationAction(ISD::FLOG , MVT::f32, Expand);
- setOperationAction(ISD::FLOG2, MVT::f32, Expand);
- setOperationAction(ISD::FLOG10, MVT::f32, Expand);
- setOperationAction(ISD::FEXP , MVT::f32, Expand);
- setOperationAction(ISD::FEXP2, MVT::f32, Expand);
- setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
- setOperationAction(ISD::FCEIL, MVT::f32, Expand);
- setOperationAction(ISD::FRINT, MVT::f32, Expand);
- setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
- setOperationAction(ISD::FLOG , MVT::f64, Expand);
- setOperationAction(ISD::FLOG2, MVT::f64, Expand);
- setOperationAction(ISD::FLOG10, MVT::f64, Expand);
- setOperationAction(ISD::FEXP , MVT::f64, Expand);
- setOperationAction(ISD::FEXP2, MVT::f64, Expand);
- setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
- setOperationAction(ISD::FCEIL, MVT::f64, Expand);
- setOperationAction(ISD::FRINT, MVT::f64, Expand);
- setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
- setOperationAction(ISD::FLOG , MVT::f128, Expand);
- setOperationAction(ISD::FLOG2, MVT::f128, Expand);
- setOperationAction(ISD::FLOG10, MVT::f128, Expand);
- setOperationAction(ISD::FEXP , MVT::f128, Expand);
- setOperationAction(ISD::FEXP2, MVT::f128, Expand);
- setOperationAction(ISD::FFLOOR, MVT::f128, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand);
- setOperationAction(ISD::FCEIL, MVT::f128, Expand);
- setOperationAction(ISD::FRINT, MVT::f128, Expand);
- setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
-
- // Default ISD::TRAP to expand (which turns it into abort).
- setOperationAction(ISD::TRAP, MVT::Other, Expand);
-
- // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
- // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
- //
- setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
-
- IsLittleEndian = TD->isLittleEndian();
- PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
- memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
- memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
- maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
- maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize
- = maxStoresPerMemmoveOptSize = 4;
- benefitFromCodePlacementOpt = false;
- UseUnderscoreSetJmp = false;
- UseUnderscoreLongJmp = false;
- SelectIsExpensive = false;
- IntDivIsCheap = false;
- Pow2DivIsCheap = false;
- JumpIsExpensive = false;
- predictableSelectIsExpensive = false;
- StackPointerRegisterToSaveRestore = 0;
- ExceptionPointerRegister = 0;
- ExceptionSelectorRegister = 0;
- BooleanContents = UndefinedBooleanContent;
- BooleanVectorContents = UndefinedBooleanContent;
- SchedPreferenceInfo = Sched::ILP;
- JumpBufSize = 0;
- JumpBufAlignment = 0;
- MinFunctionAlignment = 0;
- PrefFunctionAlignment = 0;
- PrefLoopAlignment = 0;
- MinStackArgumentAlignment = 1;
- ShouldFoldAtomicFences = false;
- InsertFencesForAtomic = false;
- SupportJumpTables = true;
- MinimumJumpTableEntries = 4;
-
- InitLibcallNames(LibcallRoutineNames);
- InitCmpLibcallCCs(CmpLibcallCCs);
- InitLibcallCallingConvs(LibcallCallingConvs);
-}
-
-TargetLowering::~TargetLowering() {
- delete &TLOF;
-}
-
-MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const {
- return MVT::getIntegerVT(8*TD->getPointerSize(0));
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ return NULL;
}
-/// canOpTrap - Returns true if the operation can trap for the value type.
-/// VT must be a legal type.
-bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
- assert(isTypeLegal(VT));
- switch (Op) {
- default:
+/// Check whether a given call node is in tail position within its function. If
+/// so, it sets Chain to the input chain of the tail call.
+bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+ SDValue &Chain) const {
+ const Function *F = DAG.getMachineFunction().getFunction();
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ AttributeSet CallerAttrs = F->getAttributes();
+ if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias).hasAttributes())
return false;
- case ISD::FDIV:
- case ISD::FREM:
- case ISD::SDIV:
- case ISD::UDIV:
- case ISD::SREM:
- case ISD::UREM:
- return true;
- }
-}
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ return false;
-static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
- unsigned &NumIntermediates,
- MVT &RegisterVT,
- TargetLowering *TLI) {
- // Figure out the right, legal destination reg to copy into.
- unsigned NumElts = VT.getVectorNumElements();
- MVT EltTy = VT.getVectorElementType();
-
- unsigned NumVectorRegs = 1;
-
- // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
- // could break down into LHS/RHS like LegalizeDAG does.
- if (!isPowerOf2_32(NumElts)) {
- NumVectorRegs = NumElts;
- NumElts = 1;
- }
-
- // Divide the input until we get to a supported size. This will always
- // end with a scalar if the target doesn't support vectors.
- while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
- NumElts >>= 1;
- NumVectorRegs <<= 1;
- }
-
- NumIntermediates = NumVectorRegs;
-
- MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
- if (!TLI->isTypeLegal(NewVT))
- NewVT = EltTy;
- IntermediateVT = NewVT;
-
- unsigned NewVTSize = NewVT.getSizeInBits();
-
- // Convert sizes such as i33 to i64.
- if (!isPowerOf2_32(NewVTSize))
- NewVTSize = NextPowerOf2(NewVTSize);
-
- MVT DestVT = TLI->getRegisterType(NewVT);
- RegisterVT = DestVT;
- if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
- return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
-
- // Otherwise, promotion or legal types use the same number of registers as
- // the vector decimated to the appropriate level.
- return NumVectorRegs;
-}
-
-/// isLegalRC - Return true if the value types that can be represented by the
-/// specified register class are all legal.
-bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const {
- for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
- I != E; ++I) {
- if (isTypeLegal(*I))
- return true;
- }
- return false;
-}
-
-/// findRepresentativeClass - Return the largest legal super-reg register class
-/// of the register class for the specified type and its associated "cost".
-std::pair<const TargetRegisterClass*, uint8_t>
-TargetLowering::findRepresentativeClass(MVT VT) const {
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
- if (!RC)
- return std::make_pair(RC, 0);
-
- // Compute the set of all super-register classes.
- BitVector SuperRegRC(TRI->getNumRegClasses());
- for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI)
- SuperRegRC.setBitsInMask(RCI.getMask());
-
- // Find the first legal register class with the largest spill size.
- const TargetRegisterClass *BestRC = RC;
- for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) {
- const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
- // We want the largest possible spill size.
- if (SuperRC->getSize() <= BestRC->getSize())
- continue;
- if (!isLegalRC(SuperRC))
- continue;
- BestRC = SuperRC;
- }
- return std::make_pair(BestRC, 1);
-}
-
-/// computeRegisterProperties - Once all of the register classes are added,
-/// this allows us to compute derived properties we expose.
-void TargetLowering::computeRegisterProperties() {
- assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
- "Too many value types for ValueTypeActions to hold!");
-
- // Everything defaults to needing one register.
- for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
- NumRegistersForVT[i] = 1;
- RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
- }
- // ...except isVoid, which doesn't need any registers.
- NumRegistersForVT[MVT::isVoid] = 0;
-
- // Find the largest integer register class.
- unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
- for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
- assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
-
- // Every integer value type larger than this largest register takes twice as
- // many registers to represent as the previous ValueType.
- for (unsigned ExpandedReg = LargestIntReg + 1;
- ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) {
- NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
- RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
- TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
- ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg,
- TypeExpandInteger);
- }
-
- // Inspect all of the ValueType's smaller than the largest integer
- // register to see which ones need promotion.
- unsigned LegalIntReg = LargestIntReg;
- for (unsigned IntReg = LargestIntReg - 1;
- IntReg >= (unsigned)MVT::i1; --IntReg) {
- MVT IVT = (MVT::SimpleValueType)IntReg;
- if (isTypeLegal(IVT)) {
- LegalIntReg = IntReg;
- } else {
- RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
- (const MVT::SimpleValueType)LegalIntReg;
- ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
- }
- }
-
- // ppcf128 type is really two f64's.
- if (!isTypeLegal(MVT::ppcf128)) {
- NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
- RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
- TransformToType[MVT::ppcf128] = MVT::f64;
- ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
- }
-
- // Decide how to handle f64. If the target does not have native f64 support,
- // expand it to i64 and we will be generating soft float library calls.
- if (!isTypeLegal(MVT::f64)) {
- NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
- RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
- TransformToType[MVT::f64] = MVT::i64;
- ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
- }
-
- // Decide how to handle f32. If the target does not have native support for
- // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
- if (!isTypeLegal(MVT::f32)) {
- if (isTypeLegal(MVT::f64)) {
- NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
- RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
- TransformToType[MVT::f32] = MVT::f64;
- ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
- } else {
- NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
- RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
- TransformToType[MVT::f32] = MVT::i32;
- ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
- }
- }
-
- // Loop over all of the vector value types to see which need transformations.
- for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
- if (isTypeLegal(VT)) continue;
-
- // Determine if there is a legal wider type. If so, we should promote to
- // that wider vector type.
- MVT EltVT = VT.getVectorElementType();
- unsigned NElts = VT.getVectorNumElements();
- if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) {
- bool IsLegalWiderType = false;
- // First try to promote the elements of integer vectors. If no legal
- // promotion was found, fallback to the widen-vector method.
- for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- MVT SVT = (MVT::SimpleValueType)nVT;
- // Promote vectors of integers to vectors with the same number
- // of elements, with a wider element type.
- if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
- && SVT.getVectorNumElements() == NElts &&
- isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
- TransformToType[i] = SVT;
- RegisterTypeForVT[i] = SVT;
- NumRegistersForVT[i] = 1;
- ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
- IsLegalWiderType = true;
- break;
- }
- }
-
- if (IsLegalWiderType) continue;
-
- // Try to widen the vector.
- for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- MVT SVT = (MVT::SimpleValueType)nVT;
- if (SVT.getVectorElementType() == EltVT &&
- SVT.getVectorNumElements() > NElts &&
- isTypeLegal(SVT)) {
- TransformToType[i] = SVT;
- RegisterTypeForVT[i] = SVT;
- NumRegistersForVT[i] = 1;
- ValueTypeActions.setTypeAction(VT, TypeWidenVector);
- IsLegalWiderType = true;
- break;
- }
- }
- if (IsLegalWiderType) continue;
- }
-
- MVT IntermediateVT;
- MVT RegisterVT;
- unsigned NumIntermediates;
- NumRegistersForVT[i] =
- getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
- RegisterVT, this);
- RegisterTypeForVT[i] = RegisterVT;
-
- MVT NVT = VT.getPow2VectorType();
- if (NVT == VT) {
- // Type is already a power of 2. The default action is to split.
- TransformToType[i] = MVT::Other;
- unsigned NumElts = VT.getVectorNumElements();
- ValueTypeActions.setTypeAction(VT,
- NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
- } else {
- TransformToType[i] = NVT;
- ValueTypeActions.setTypeAction(VT, TypeWidenVector);
- }
- }
-
- // Determine the 'representative' register class for each value type.
- // An representative register class is the largest (meaning one which is
- // not a sub-register class / subreg register class) legal register class for
- // a group of value types. For example, on i386, i8, i16, and i32
- // representative would be GR32; while on x86_64 it's GR64.
- for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
- const TargetRegisterClass* RRC;
- uint8_t Cost;
- tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i);
- RepRegClassForVT[i] = RRC;
- RepRegClassCostForVT[i] = Cost;
- }
+ // Check if the only use is a function return node.
+ return isUsedByReturnOnly(Node, Chain);
}
-const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
- return NULL;
-}
-EVT TargetLowering::getSetCCResultType(EVT VT) const {
- assert(!VT.isVector() && "No default SetCC type for vectors!");
- return getPointerTy(0).SimpleTy;
+/// Generate a libcall taking the given operands as arguments and returning a
+/// result of type RetVT.
+SDValue TargetLowering::makeLibCall(SelectionDAG &DAG,
+ RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, DebugLoc dl) const {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
+
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, 0, getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue,SDValue> CallInfo = LowerCallTo(CLI);
+
+ return CallInfo.first;
}
-MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const {
- return MVT::i32; // return the default value
-}
-/// getVectorTypeBreakdown - Vector types are broken down into some number of
-/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
-/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
-/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
-///
-/// This method returns the number of registers needed, and the VT for each
-/// register. It also returns the VT and quantity of the intermediate values
-/// before they are promoted/expanded.
-///
-unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
- EVT &IntermediateVT,
- unsigned &NumIntermediates,
- MVT &RegisterVT) const {
- unsigned NumElts = VT.getVectorNumElements();
-
- // If there is a wider vector type with the same element type as this one,
- // or a promoted vector type that has the same number of elements which
- // are wider, then we should convert to that legal vector type.
- // This handles things like <2 x float> -> <4 x float> and
- // <4 x i1> -> <4 x i32>.
- LegalizeTypeAction TA = getTypeAction(Context, VT);
- if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
- EVT RegisterEVT = getTypeToTransformTo(Context, VT);
- if (isTypeLegal(RegisterEVT)) {
- IntermediateVT = RegisterEVT;
- RegisterVT = RegisterEVT.getSimpleVT();
- NumIntermediates = 1;
- return 1;
+/// SoftenSetCCOperands - Soften the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+ SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) const {
+ assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
+ && "Unsupported setcc type!");
+
+ // Expand into one or more soft-fp libcall(s).
+ RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ switch (CCCode) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ break;
+ case ISD::SETNE:
+ case ISD::SETUNE:
+ LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
+ (VT == MVT::f64) ? RTLIB::UNE_F64 : RTLIB::UNE_F128;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
+ break;
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ break;
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ break;
+ case ISD::SETUO:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+ (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+ break;
+ case ISD::SETO:
+ LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
+ (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128;
+ break;
+ default:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+ (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+ switch (CCCode) {
+ case ISD::SETONE:
+ // SETONE = SETOLT | SETOGT
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ // Fallthrough
+ case ISD::SETUGT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ break;
+ case ISD::SETUGE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
+ break;
+ case ISD::SETULT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ break;
+ case ISD::SETULE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
+ break;
+ case ISD::SETUEQ:
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ break;
+ default: llvm_unreachable("Do not know how to soften this setcc!");
}
}
- // Figure out the right, legal destination reg to copy into.
- EVT EltTy = VT.getVectorElementType();
-
- unsigned NumVectorRegs = 1;
-
- // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
- // could break down into LHS/RHS like LegalizeDAG does.
- if (!isPowerOf2_32(NumElts)) {
- NumVectorRegs = NumElts;
- NumElts = 1;
+ // Use the target specific return value for comparions lib calls.
+ EVT RetVT = getCmpLibcallReturnType();
+ SDValue Ops[2] = { NewLHS, NewRHS };
+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewRHS = DAG.getConstant(0, RetVT);
+ CCCode = getCmpLibcallCC(LC1);
+ if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+ SDValue Tmp = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT),
+ NewLHS, NewRHS, DAG.getCondCode(CCCode));
+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewLHS = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), NewLHS,
+ NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+ NewRHS = SDValue();
}
-
- // Divide the input until we get to a supported size. This will always
- // end with a scalar if the target doesn't support vectors.
- while (NumElts > 1 && !isTypeLegal(
- EVT::getVectorVT(Context, EltTy, NumElts))) {
- NumElts >>= 1;
- NumVectorRegs <<= 1;
- }
-
- NumIntermediates = NumVectorRegs;
-
- EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
- if (!isTypeLegal(NewVT))
- NewVT = EltTy;
- IntermediateVT = NewVT;
-
- MVT DestVT = getRegisterType(Context, NewVT);
- RegisterVT = DestVT;
- unsigned NewVTSize = NewVT.getSizeInBits();
-
- // Convert sizes such as i33 to i64.
- if (!isPowerOf2_32(NewVTSize))
- NewVTSize = NextPowerOf2(NewVTSize);
-
- if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
- return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
-
- // Otherwise, promotion or legal types use the same number of registers as
- // the vector decimated to the appropriate level.
- return NumVectorRegs;
-}
-
-/// Get the EVTs and ArgFlags collections that represent the legalized return
-/// type of the given function. This does not require a DAG or a return value,
-/// and is suitable for use before any DAGs for the function are constructed.
-/// TODO: Move this out of TargetLowering.cpp.
-void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
- SmallVectorImpl<ISD::OutputArg> &Outs,
- const TargetLowering &TLI) {
- SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, ReturnType, ValueVTs);
- unsigned NumValues = ValueVTs.size();
- if (NumValues == 0) return;
-
- for (unsigned j = 0, f = NumValues; j != f; ++j) {
- EVT VT = ValueVTs[j];
- ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
-
- if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
- ExtendKind = ISD::SIGN_EXTEND;
- else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
- ExtendKind = ISD::ZERO_EXTEND;
-
- // FIXME: C calling convention requires the return type to be promoted to
- // at least 32-bit. But this is not necessary for non-C calling
- // conventions. The frontend should mark functions whose return values
- // require promoting with signext or zeroext attributes.
- if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
- MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
- if (VT.bitsLT(MinVT))
- VT = MinVT;
- }
-
- unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
- MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
-
- // 'inreg' on function refers to return value
- ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg))
- Flags.setInReg();
-
- // Propagate extension type if any
- if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
- Flags.setSExt();
- else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
- Flags.setZExt();
-
- for (unsigned i = 0; i < NumParts; ++i)
- Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0));
- }
-}
-
-/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
-/// function arguments in the caller parameter area. This is the actual
-/// alignment, not its logarithm.
-unsigned TargetLowering::getByValTypeAlignment(Type *Ty) const {
- return TD->getCallFrameTypeAlignment(Ty);
}
/// getJumpTableEncoding - Return the entry encoding for a jump table in the
@@ -1199,103 +253,6 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
}
//===----------------------------------------------------------------------===//
-// TargetTransformInfo Helpers
-//===----------------------------------------------------------------------===//
-
-int TargetLowering::InstructionOpcodeToISD(unsigned Opcode) const {
- enum InstructionOpcodes {
-#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
-#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
-#include "llvm/IR/Instruction.def"
- };
- switch (static_cast<InstructionOpcodes>(Opcode)) {
- case Ret: return 0;
- case Br: return 0;
- case Switch: return 0;
- case IndirectBr: return 0;
- case Invoke: return 0;
- case Resume: return 0;
- case Unreachable: return 0;
- case Add: return ISD::ADD;
- case FAdd: return ISD::FADD;
- case Sub: return ISD::SUB;
- case FSub: return ISD::FSUB;
- case Mul: return ISD::MUL;
- case FMul: return ISD::FMUL;
- case UDiv: return ISD::UDIV;
- case SDiv: return ISD::UDIV;
- case FDiv: return ISD::FDIV;
- case URem: return ISD::UREM;
- case SRem: return ISD::SREM;
- case FRem: return ISD::FREM;
- case Shl: return ISD::SHL;
- case LShr: return ISD::SRL;
- case AShr: return ISD::SRA;
- case And: return ISD::AND;
- case Or: return ISD::OR;
- case Xor: return ISD::XOR;
- case Alloca: return 0;
- case Load: return ISD::LOAD;
- case Store: return ISD::STORE;
- case GetElementPtr: return 0;
- case Fence: return 0;
- case AtomicCmpXchg: return 0;
- case AtomicRMW: return 0;
- case Trunc: return ISD::TRUNCATE;
- case ZExt: return ISD::ZERO_EXTEND;
- case SExt: return ISD::SIGN_EXTEND;
- case FPToUI: return ISD::FP_TO_UINT;
- case FPToSI: return ISD::FP_TO_SINT;
- case UIToFP: return ISD::UINT_TO_FP;
- case SIToFP: return ISD::SINT_TO_FP;
- case FPTrunc: return ISD::FP_ROUND;
- case FPExt: return ISD::FP_EXTEND;
- case PtrToInt: return ISD::BITCAST;
- case IntToPtr: return ISD::BITCAST;
- case BitCast: return ISD::BITCAST;
- case ICmp: return ISD::SETCC;
- case FCmp: return ISD::SETCC;
- case PHI: return 0;
- case Call: return 0;
- case Select: return ISD::SELECT;
- case UserOp1: return 0;
- case UserOp2: return 0;
- case VAArg: return 0;
- case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
- case InsertElement: return ISD::INSERT_VECTOR_ELT;
- case ShuffleVector: return ISD::VECTOR_SHUFFLE;
- case ExtractValue: return ISD::MERGE_VALUES;
- case InsertValue: return ISD::MERGE_VALUES;
- case LandingPad: return 0;
- }
-
- llvm_unreachable("Unknown instruction type encountered!");
-}
-
-std::pair<unsigned, MVT>
-TargetLowering::getTypeLegalizationCost(Type *Ty) const {
- LLVMContext &C = Ty->getContext();
- EVT MTy = getValueType(Ty);
-
- unsigned Cost = 1;
- // We keep legalizing the type until we find a legal kind. We assume that
- // the only operation that costs anything is the split. After splitting
- // we need to handle two types.
- while (true) {
- LegalizeKind LK = getTypeConversion(C, MTy);
-
- if (LK.first == TypeLegal)
- return std::make_pair(Cost, MTy.getSimpleVT());
-
- if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
- Cost *= 2;
-
- // Keep legalizing the type.
- MTy = LK.second;
- }
-}
-
-//===----------------------------------------------------------------------===//
// Optimization Methods
//===----------------------------------------------------------------------===//
@@ -2239,7 +1196,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
for (unsigned offset=0; offset<origWidth/width; offset++) {
if ((newMask & Mask) == Mask) {
- if (!TD->isLittleEndian())
+ if (!getDataLayout()->isLittleEndian())
bestOffset = (origWidth/width - offset - 1) * (width/8);
else
bestOffset = (uint64_t)offset * (width/8);
@@ -2913,7 +1870,9 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
TargetLowering::ConstraintType
TargetLowering::getConstraintType(const std::string &Constraint) const {
- if (Constraint.size() == 1) {
+ unsigned S = Constraint.size();
+
+ if (S == 1) {
switch (Constraint[0]) {
default: break;
case 'r': return C_RegisterClass;
@@ -2942,9 +1901,11 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
}
}
- if (Constraint.size() > 1 && Constraint[0] == '{' &&
- Constraint[Constraint.size()-1] == '}')
+ if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
+ if (S == 8 && !Constraint.compare(1, 6, "memory", 6)) // "{memory}"
+ return C_Memory;
return C_Register;
+ }
return C_Unknown;
}
@@ -3040,7 +2001,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
// Figure out which register class contains this reg.
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo();
for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
E = RI->regclass_end(); RCI != E; ++RCI) {
const TargetRegisterClass *RC = *RCI;
@@ -3077,7 +2038,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
/// a matching constraint like "4".
bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
assert(!ConstraintCode.empty() && "No known constraint!");
- return isdigit(ConstraintCode[0]);
+ return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
}
/// getMatchedOperand - If this is an input matching constraint, this method
@@ -3164,7 +2125,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
// If OpTy is not a single value, it may be a struct/union that we
// can tile with integers.
if (!OpTy->isSingleValueType() && OpTy->isSized()) {
- unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ unsigned BitSize = getDataLayout()->getTypeSizeInBits(OpTy);
switch (BitSize) {
default: break;
case 1:
@@ -3179,7 +2140,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
}
} else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
OpInfo.ConstraintVT = MVT::getIntegerVT(
- 8*TD->getPointerSize(PT->getAddressSpace()));
+ 8*getDataLayout()->getPointerSize(PT->getAddressSpace()));
} else {
OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
}
@@ -3474,44 +2435,6 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
}
}
-//===----------------------------------------------------------------------===//
-// Loop Strength Reduction hooks
-//===----------------------------------------------------------------------===//
-
-/// isLegalAddressingMode - Return true if the addressing mode represented
-/// by AM is legal for this target, for a load/store of the specified type.
-bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
- // The default implementation of this implements a conservative RISCy, r+r and
- // r+i addr mode.
-
- // Allows a sign-extended 16-bit immediate field.
- if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
- return false;
-
- // No global is ever allowed as a base.
- if (AM.BaseGV)
- return false;
-
- // Only support r+r,
- switch (AM.Scale) {
- case 0: // "r+i" or just "i", depending on HasBaseReg.
- break;
- case 1:
- if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
- return false;
- // Otherwise we have r+r or r+i.
- break;
- case 2:
- if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
- return false;
- // Allow 2*r as r+r.
- break;
- }
-
- return true;
-}
-
/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication
/// with the multiplicative inverse of the constant.
SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 09e923c..b58bb85 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -43,7 +43,7 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges");
namespace {
class SjLjEHPrepare : public FunctionPass {
- const TargetLowering *TLI;
+ const TargetLoweringBase *TLI;
Type *FunctionContextTy;
Constant *RegisterFn;
Constant *UnregisterFn;
@@ -58,7 +58,7 @@ namespace {
AllocaInst *FuncCtx;
public:
static char ID; // Pass identification, replacement for typeid
- explicit SjLjEHPrepare(const TargetLowering *tli = NULL)
+ explicit SjLjEHPrepare(const TargetLoweringBase *tli = NULL)
: FunctionPass(ID), TLI(tli) { }
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
@@ -82,7 +82,7 @@ namespace {
char SjLjEHPrepare::ID = 0;
// Public Interface To the SjLjEHPrepare pass.
-FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) {
+FunctionPass *llvm::createSjLjEHPreparePass(const TargetLoweringBase *TLI) {
return new SjLjEHPrepare(TLI);
}
// doInitialization - Set up decalarations and types needed to process
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 95faafa..20049a8 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -142,6 +142,76 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
++NumLocalRenum;
}
+// Repair indexes after adding and removing instructions.
+void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End) {
+ // FIXME: Is this really necessary? The only caller repairIntervalsForRange()
+ // does the same thing.
+ // Find anchor points, which are at the beginning/end of blocks or at
+ // instructions that already have indexes.
+ while (Begin != MBB->begin() && !hasIndex(Begin))
+ --Begin;
+ while (End != MBB->end() && !hasIndex(End))
+ ++End;
+
+ bool includeStart = (Begin == MBB->begin());
+ SlotIndex startIdx;
+ if (includeStart)
+ startIdx = getMBBStartIdx(MBB);
+ else
+ startIdx = getInstructionIndex(Begin);
+
+ SlotIndex endIdx;
+ if (End == MBB->end())
+ endIdx = getMBBEndIdx(MBB);
+ else
+ endIdx = getInstructionIndex(End);
+
+ // FIXME: Conceptually, this code is implementing an iterator on MBB that
+ // optionally includes an additional position prior to MBB->begin(), indicated
+ // by the includeStart flag. This is done so that we can iterate MIs in a MBB
+ // in parallel with SlotIndexes, but there should be a better way to do this.
+ IndexList::iterator ListB = startIdx.listEntry();
+ IndexList::iterator ListI = endIdx.listEntry();
+ MachineBasicBlock::iterator MBBI = End;
+ bool pastStart = false;
+ while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) {
+ assert(ListI->getIndex() >= startIdx.getIndex() &&
+ (includeStart || !pastStart) &&
+ "Decremented past the beginning of region to repair.");
+
+ MachineInstr *SlotMI = ListI->getInstr();
+ MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : 0;
+ bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart);
+
+ if (SlotMI == MI && !MBBIAtBegin) {
+ --ListI;
+ if (MBBI != Begin)
+ --MBBI;
+ else
+ pastStart = true;
+ } else if (MI && mi2iMap.find(MI) == mi2iMap.end()) {
+ if (MBBI != Begin)
+ --MBBI;
+ else
+ pastStart = true;
+ } else {
+ --ListI;
+ if (SlotMI)
+ removeMachineInstrFromMaps(SlotMI);
+ }
+ }
+
+ // In theory this could be combined with the previous loop, but it is tricky
+ // to update the IndexList while we are iterating it.
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr *MI = I;
+ if (!MI->isDebugValue() && mi2iMap.find(MI) == mi2iMap.end())
+ insertMachineInstrInMaps(MI);
+ }
+}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void SlotIndexes::dump() const {
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 42502eb..ec44b8c 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -23,7 +23,6 @@
#define DEBUG_TYPE "stackcoloring"
#include "llvm/CodeGen/Passes.h"
-#include "MachineTraceMetrics.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -103,12 +102,13 @@ class StackColoring : public MachineFunctionPass {
};
/// Maps active slots (per bit) for each basic block.
- DenseMap<MachineBasicBlock*, BlockLifetimeInfo> BlockLiveness;
+ typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap;
+ LivenessMap BlockLiveness;
/// Maps serial numbers to basic blocks.
- DenseMap<MachineBasicBlock*, int> BasicBlocks;
+ DenseMap<const MachineBasicBlock*, int> BasicBlocks;
/// Maps basic blocks to a serial number.
- SmallVector<MachineBasicBlock*, 8> BasicBlockNumbering;
+ SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
/// Maps liveness intervals for each slot.
SmallVector<LiveInterval*, 16> Intervals;
@@ -145,7 +145,7 @@ public:
private:
/// Debug.
- void dump();
+ void dump() const;
/// Removes all of the lifetime marker instructions from the function.
/// \returns true if any markers were removed.
@@ -200,31 +200,35 @@ void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-void StackColoring::dump() {
+void StackColoring::dump() const {
for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
FI != FE; ++FI) {
- unsigned Num = BasicBlocks[*FI];
- DEBUG(dbgs()<<"Inspecting block #"<<Num<<" ["<<FI->getName()<<"]\n");
- Num = 0;
+ DEBUG(dbgs()<<"Inspecting block #"<<BasicBlocks.lookup(*FI)<<
+ " ["<<FI->getName()<<"]\n");
+
+ LivenessMap::const_iterator BI = BlockLiveness.find(*FI);
+ assert(BI != BlockLiveness.end() && "Block not found");
+ const BlockLifetimeInfo &BlockInfo = BI->second;
+
DEBUG(dbgs()<<"BEGIN : {");
- for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i)
- DEBUG(dbgs()<<BlockLiveness[*FI].Begin.test(i)<<" ");
+ for (unsigned i=0; i < BlockInfo.Begin.size(); ++i)
+ DEBUG(dbgs()<<BlockInfo.Begin.test(i)<<" ");
DEBUG(dbgs()<<"}\n");
DEBUG(dbgs()<<"END : {");
- for (unsigned i=0; i < BlockLiveness[*FI].End.size(); ++i)
- DEBUG(dbgs()<<BlockLiveness[*FI].End.test(i)<<" ");
+ for (unsigned i=0; i < BlockInfo.End.size(); ++i)
+ DEBUG(dbgs()<<BlockInfo.End.test(i)<<" ");
DEBUG(dbgs()<<"}\n");
DEBUG(dbgs()<<"LIVE_IN: {");
- for (unsigned i=0; i < BlockLiveness[*FI].LiveIn.size(); ++i)
- DEBUG(dbgs()<<BlockLiveness[*FI].LiveIn.test(i)<<" ");
+ for (unsigned i=0; i < BlockInfo.LiveIn.size(); ++i)
+ DEBUG(dbgs()<<BlockInfo.LiveIn.test(i)<<" ");
DEBUG(dbgs()<<"}\n");
DEBUG(dbgs()<<"LIVEOUT: {");
- for (unsigned i=0; i < BlockLiveness[*FI].LiveOut.size(); ++i)
- DEBUG(dbgs()<<BlockLiveness[*FI].LiveOut.test(i)<<" ");
+ for (unsigned i=0; i < BlockInfo.LiveOut.size(); ++i)
+ DEBUG(dbgs()<<BlockInfo.LiveOut.test(i)<<" ");
DEBUG(dbgs()<<"}\n");
}
}
@@ -242,8 +246,11 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
BasicBlocks[*FI] = BasicBlockNumbering.size();
BasicBlockNumbering.push_back(*FI);
- BlockLiveness[*FI].Begin.resize(NumSlot);
- BlockLiveness[*FI].End.resize(NumSlot);
+ // Keep a reference to avoid repeated lookups.
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[*FI];
+
+ BlockInfo.Begin.resize(NumSlot);
+ BlockInfo.End.resize(NumSlot);
for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end();
BI != BE; ++BI) {
@@ -255,7 +262,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
Markers.push_back(BI);
bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START;
- MachineOperand &MI = BI->getOperand(0);
+ const MachineOperand &MI = BI->getOperand(0);
unsigned Slot = MI.getIndex();
MarkersFound++;
@@ -267,15 +274,15 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
}
if (IsStart) {
- BlockLiveness[*FI].Begin.set(Slot);
+ BlockInfo.Begin.set(Slot);
} else {
- if (BlockLiveness[*FI].Begin.test(Slot)) {
+ if (BlockInfo.Begin.test(Slot)) {
// Allocas that start and end within a single block are handled
// specially when computing the LiveIntervals to avoid pessimizing
// the liveness propagation.
- BlockLiveness[*FI].Begin.reset(Slot);
+ BlockInfo.Begin.reset(Slot);
} else {
- BlockLiveness[*FI].End.set(Slot);
+ BlockInfo.End.set(Slot);
}
}
}
@@ -292,47 +299,58 @@ void StackColoring::calculateLocalLiveness() {
// formulation, and END is equivalent to GEN. The result of this computation
// is a map from blocks to bitvectors where the bitvectors represent which
// allocas are live in/out of that block.
- SmallPtrSet<MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(),
- BasicBlockNumbering.end());
+ SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(),
+ BasicBlockNumbering.end());
unsigned NumSSMIters = 0;
bool changed = true;
while (changed) {
changed = false;
++NumSSMIters;
- SmallPtrSet<MachineBasicBlock*, 8> NextBBSet;
+ SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet;
- for (SmallVector<MachineBasicBlock*, 8>::iterator
+ for (SmallVector<const MachineBasicBlock*, 8>::iterator
PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end();
PI != PE; ++PI) {
- MachineBasicBlock *BB = *PI;
+ const MachineBasicBlock *BB = *PI;
if (!BBSet.count(BB)) continue;
+ // Use an iterator to avoid repeated lookups.
+ LivenessMap::iterator BI = BlockLiveness.find(BB);
+ assert(BI != BlockLiveness.end() && "Block not found");
+ BlockLifetimeInfo &BlockInfo = BI->second;
+
BitVector LocalLiveIn;
BitVector LocalLiveOut;
// Forward propagation from begins to ends.
- for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
- PE = BB->pred_end(); PI != PE; ++PI)
- LocalLiveIn |= BlockLiveness[*PI].LiveOut;
- LocalLiveIn |= BlockLiveness[BB].End;
- LocalLiveIn.reset(BlockLiveness[BB].Begin);
+ for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
+ PE = BB->pred_end(); PI != PE; ++PI) {
+ LivenessMap::const_iterator I = BlockLiveness.find(*PI);
+ assert(I != BlockLiveness.end() && "Predecessor not found");
+ LocalLiveIn |= I->second.LiveOut;
+ }
+ LocalLiveIn |= BlockInfo.End;
+ LocalLiveIn.reset(BlockInfo.Begin);
// Reverse propagation from ends to begins.
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
- SE = BB->succ_end(); SI != SE; ++SI)
- LocalLiveOut |= BlockLiveness[*SI].LiveIn;
- LocalLiveOut |= BlockLiveness[BB].Begin;
- LocalLiveOut.reset(BlockLiveness[BB].End);
+ for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI) {
+ LivenessMap::const_iterator I = BlockLiveness.find(*SI);
+ assert(I != BlockLiveness.end() && "Successor not found");
+ LocalLiveOut |= I->second.LiveIn;
+ }
+ LocalLiveOut |= BlockInfo.Begin;
+ LocalLiveOut.reset(BlockInfo.End);
LocalLiveIn |= LocalLiveOut;
LocalLiveOut |= LocalLiveIn;
// After adopting the live bits, we need to turn-off the bits which
// are de-activated in this block.
- LocalLiveOut.reset(BlockLiveness[BB].End);
- LocalLiveIn.reset(BlockLiveness[BB].Begin);
+ LocalLiveOut.reset(BlockInfo.End);
+ LocalLiveIn.reset(BlockInfo.Begin);
// If we have both BEGIN and END markers in the same basic block then
// we know that the BEGIN marker comes after the END, because we already
@@ -341,25 +359,25 @@ void StackColoring::calculateLocalLiveness() {
// Want to enable the LIVE_IN and LIVE_OUT of slots that have both
// BEGIN and END because it means that the value lives before and after
// this basic block.
- BitVector LocalEndBegin = BlockLiveness[BB].End;
- LocalEndBegin &= BlockLiveness[BB].Begin;
+ BitVector LocalEndBegin = BlockInfo.End;
+ LocalEndBegin &= BlockInfo.Begin;
LocalLiveIn |= LocalEndBegin;
LocalLiveOut |= LocalEndBegin;
- if (LocalLiveIn.test(BlockLiveness[BB].LiveIn)) {
+ if (LocalLiveIn.test(BlockInfo.LiveIn)) {
changed = true;
- BlockLiveness[BB].LiveIn |= LocalLiveIn;
+ BlockInfo.LiveIn |= LocalLiveIn;
- for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
PE = BB->pred_end(); PI != PE; ++PI)
NextBBSet.insert(*PI);
}
- if (LocalLiveOut.test(BlockLiveness[BB].LiveOut)) {
+ if (LocalLiveOut.test(BlockInfo.LiveOut)) {
changed = true;
- BlockLiveness[BB].LiveOut |= LocalLiveOut;
+ BlockInfo.LiveOut |= LocalLiveOut;
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
NextBBSet.insert(*SI);
}
@@ -383,9 +401,9 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
Finishes.resize(NumSlots);
// Create the interval for the basic blocks with lifetime markers in them.
- for (SmallVector<MachineInstr*, 8>::iterator it = Markers.begin(),
+ for (SmallVectorImpl<MachineInstr*>::const_iterator it = Markers.begin(),
e = Markers.end(); it != e; ++it) {
- MachineInstr *MI = *it;
+ const MachineInstr *MI = *it;
if (MI->getParent() != MBB)
continue;
@@ -394,7 +412,7 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
"Invalid Lifetime marker");
bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START;
- MachineOperand &Mo = MI->getOperand(0);
+ const MachineOperand &Mo = MI->getOperand(0);
int Slot = Mo.getIndex();
assert(Slot >= 0 && "Invalid slot");
@@ -481,7 +499,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// Keep a list of *allocas* which need to be remapped.
DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
- for (DenseMap<int, int>::iterator it = SlotRemap.begin(),
+ for (DenseMap<int, int>::const_iterator it = SlotRemap.begin(),
e = SlotRemap.end(); it != e; ++it) {
const AllocaInst *From = MFI->getObjectAllocation(it->first);
const AllocaInst *To = MFI->getObjectAllocation(it->second);
@@ -576,8 +594,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
}
void StackColoring::removeInvalidSlotRanges() {
- MachineFunction::iterator BB, BBE;
- MachineBasicBlock::iterator I, IE;
+ MachineFunction::const_iterator BB, BBE;
+ MachineBasicBlock::const_iterator I, IE;
for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
@@ -596,7 +614,7 @@ void StackColoring::removeInvalidSlotRanges() {
// Check all of the machine operands.
for (unsigned i = 0 ; i < I->getNumOperands(); ++i) {
- MachineOperand &MO = I->getOperand(i);
+ const MachineOperand &MO = I->getOperand(i);
if (!MO.isFI())
continue;
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 665388b..f3be37c 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -16,6 +16,8 @@
#define DEBUG_TYPE "stack-protector"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/IR/Attributes.h"
@@ -32,17 +34,27 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+STATISTIC(NumFunProtected, "Number of functions protected");
+STATISTIC(NumAddrTaken, "Number of local variables that have their address"
+ " taken.");
+
namespace {
class StackProtector : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// target type sizes.
- const TargetLowering *TLI;
+ const TargetLoweringBase *TLI;
Function *F;
Module *M;
DominatorTree *DT;
+ /// VisitedPHIs - The set of PHI nodes visited when determining
+ /// if a variable's reference has been taken. This set
+ /// is maintained to ensure we don't visit the same PHI node multiple
+ /// times.
+ SmallPtrSet<const PHINode*, 16> VisitedPHIs;
+
/// InsertStackProtectors - Insert code into the prologue and epilogue of
/// the function.
///
@@ -58,17 +70,21 @@ namespace {
/// ContainsProtectableArray - Check whether the type either is an array or
/// contains an array of sufficient size so that we need stack protectors
/// for it.
- bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const;
+ bool ContainsProtectableArray(Type *Ty, bool Strong = false,
+ bool InStruct = false) const;
+
+ /// \brief Check whether a stack allocation has its address taken.
+ bool HasAddressTaken(const Instruction *AI);
/// RequiresStackProtector - Check whether or not this function needs a
/// stack protector based upon the stack protector level.
- bool RequiresStackProtector() const;
+ bool RequiresStackProtector();
public:
static char ID; // Pass identification, replacement for typeid.
StackProtector() : FunctionPass(ID), TLI(0) {
initializeStackProtectorPass(*PassRegistry::getPassRegistry());
}
- StackProtector(const TargetLowering *tli)
+ StackProtector(const TargetLoweringBase *tli)
: FunctionPass(ID), TLI(tli) {
initializeStackProtectorPass(*PassRegistry::getPassRegistry());
}
@@ -85,7 +101,7 @@ char StackProtector::ID = 0;
INITIALIZE_PASS(StackProtector, "stack-protector",
"Insert stack protectors", false, false)
-FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
+FunctionPass *llvm::createStackProtectorPass(const TargetLoweringBase *tli) {
return new StackProtector(tli);
}
@@ -96,15 +112,21 @@ bool StackProtector::runOnFunction(Function &Fn) {
if (!RequiresStackProtector()) return false;
+ ++NumFunProtected;
return InsertStackProtectors();
}
/// ContainsProtectableArray - Check whether the type either is an array or
/// contains a char array of sufficient size so that we need stack protectors
/// for it.
-bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const {
+bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong,
+ bool InStruct) const {
if (!Ty) return false;
if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ // In strong mode any array, regardless of type and size, triggers a
+ // protector
+ if (Strong)
+ return true;
const TargetMachine &TM = TLI->getTargetMachine();
if (!AT->getElementType()->isIntegerTy(8)) {
Triple Trip(TM.getTargetTriple());
@@ -126,39 +148,103 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const {
for (StructType::element_iterator I = ST->element_begin(),
E = ST->element_end(); I != E; ++I)
- if (ContainsProtectableArray(*I, true))
+ if (ContainsProtectableArray(*I, Strong, true))
return true;
return false;
}
-/// RequiresStackProtector - Check whether or not this function needs a stack
-/// protector based upon the stack protector level. The heuristic we use is to
-/// add a guard variable to functions that call alloca, and functions with
-/// buffers larger than SSPBufferSize bytes.
-bool StackProtector::RequiresStackProtector() const {
+bool StackProtector::HasAddressTaken(const Instruction *AI) {
+ for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) {
+ const User *U = *UI;
+ if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (AI == SI->getValueOperand())
+ return true;
+ } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) {
+ if (AI == SI->getOperand(0))
+ return true;
+ } else if (isa<CallInst>(U)) {
+ return true;
+ } else if (isa<InvokeInst>(U)) {
+ return true;
+ } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) {
+ if (HasAddressTaken(SI))
+ return true;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
+ // Keep track of what PHI nodes we have already visited to ensure
+ // they are only visited once.
+ if (VisitedPHIs.insert(PN))
+ if (HasAddressTaken(PN))
+ return true;
+ } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ if (HasAddressTaken(GEP))
+ return true;
+ } else if (const BitCastInst *BI = dyn_cast<BitCastInst>(U)) {
+ if (HasAddressTaken(BI))
+ return true;
+ }
+ }
+ return false;
+}
+
+/// \brief Check whether or not this function needs a stack protector based
+/// upon the stack protector level.
+///
+/// We use two heuristics: a standard (ssp) and strong (sspstrong).
+/// The standard heuristic which will add a guard variable to functions that
+/// call alloca with a either a variable size or a size >= SSPBufferSize,
+/// functions with character buffers larger than SSPBufferSize, and functions
+/// with aggregates containing character buffers larger than SSPBufferSize. The
+/// strong heuristic will add a guard variables to functions that call alloca
+/// regardless of size, functions with any buffer regardless of type and size,
+/// functions with aggregates that contain any buffer regardless of type and
+/// size, and functions that contain stack-based variables that have had their
+/// address taken.
+bool StackProtector::RequiresStackProtector() {
+ bool Strong = false;
if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::StackProtectReq))
return true;
-
- if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtect))
+ else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectStrong))
+ Strong = true;
+ else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtect))
return false;
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
BasicBlock *BB = I;
for (BasicBlock::iterator
- II = BB->begin(), IE = BB->end(); II != IE; ++II)
+ II = BB->begin(), IE = BB->end(); II != IE; ++II) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
- if (AI->isArrayAllocation())
- // This is a call to alloca with a variable size. Emit stack
- // protectors.
+ if (AI->isArrayAllocation()) {
+ // SSP-Strong: Enable protectors for any call to alloca, regardless
+ // of size.
+ if (Strong)
+ return true;
+
+ if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(AI->getArraySize())) {
+ unsigned BufferSize = TLI->getTargetMachine().Options.SSPBufferSize;
+ if (CI->getLimitedValue(BufferSize) >= BufferSize)
+ // A call to alloca with size >= SSPBufferSize requires
+ // stack protectors.
+ return true;
+ } else // A call to alloca with a variable size requires protectors.
+ return true;
+ }
+
+ if (ContainsProtectableArray(AI->getAllocatedType(), Strong))
return true;
- if (ContainsProtectableArray(AI->getAllocatedType()))
+ if (Strong && HasAddressTaken(AI)) {
+ ++NumAddrTaken;
return true;
+ }
}
+ }
}
return false;
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index d5fbf14..20eb918 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -80,7 +80,7 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
strlen(MAI.getSeparatorString())) == 0)
atInsnStart = true;
- if (atInsnStart && !std::isspace(*Str)) {
+ if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
Length += MAI.getMaxInstLength();
atInsnStart = false;
}
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
new file mode 100644
index 0000000..2a02f6a
--- /dev/null
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -0,0 +1,1290 @@
+//===-- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLoweringBase class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cctype>
+using namespace llvm;
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
+ Names[RTLIB::SHL_I16] = "__ashlhi3";
+ Names[RTLIB::SHL_I32] = "__ashlsi3";
+ Names[RTLIB::SHL_I64] = "__ashldi3";
+ Names[RTLIB::SHL_I128] = "__ashlti3";
+ Names[RTLIB::SRL_I16] = "__lshrhi3";
+ Names[RTLIB::SRL_I32] = "__lshrsi3";
+ Names[RTLIB::SRL_I64] = "__lshrdi3";
+ Names[RTLIB::SRL_I128] = "__lshrti3";
+ Names[RTLIB::SRA_I16] = "__ashrhi3";
+ Names[RTLIB::SRA_I32] = "__ashrsi3";
+ Names[RTLIB::SRA_I64] = "__ashrdi3";
+ Names[RTLIB::SRA_I128] = "__ashrti3";
+ Names[RTLIB::MUL_I8] = "__mulqi3";
+ Names[RTLIB::MUL_I16] = "__mulhi3";
+ Names[RTLIB::MUL_I32] = "__mulsi3";
+ Names[RTLIB::MUL_I64] = "__muldi3";
+ Names[RTLIB::MUL_I128] = "__multi3";
+ Names[RTLIB::MULO_I32] = "__mulosi4";
+ Names[RTLIB::MULO_I64] = "__mulodi4";
+ Names[RTLIB::MULO_I128] = "__muloti4";
+ Names[RTLIB::SDIV_I8] = "__divqi3";
+ Names[RTLIB::SDIV_I16] = "__divhi3";
+ Names[RTLIB::SDIV_I32] = "__divsi3";
+ Names[RTLIB::SDIV_I64] = "__divdi3";
+ Names[RTLIB::SDIV_I128] = "__divti3";
+ Names[RTLIB::UDIV_I8] = "__udivqi3";
+ Names[RTLIB::UDIV_I16] = "__udivhi3";
+ Names[RTLIB::UDIV_I32] = "__udivsi3";
+ Names[RTLIB::UDIV_I64] = "__udivdi3";
+ Names[RTLIB::UDIV_I128] = "__udivti3";
+ Names[RTLIB::SREM_I8] = "__modqi3";
+ Names[RTLIB::SREM_I16] = "__modhi3";
+ Names[RTLIB::SREM_I32] = "__modsi3";
+ Names[RTLIB::SREM_I64] = "__moddi3";
+ Names[RTLIB::SREM_I128] = "__modti3";
+ Names[RTLIB::UREM_I8] = "__umodqi3";
+ Names[RTLIB::UREM_I16] = "__umodhi3";
+ Names[RTLIB::UREM_I32] = "__umodsi3";
+ Names[RTLIB::UREM_I64] = "__umoddi3";
+ Names[RTLIB::UREM_I128] = "__umodti3";
+
+ // These are generally not available.
+ Names[RTLIB::SDIVREM_I8] = 0;
+ Names[RTLIB::SDIVREM_I16] = 0;
+ Names[RTLIB::SDIVREM_I32] = 0;
+ Names[RTLIB::SDIVREM_I64] = 0;
+ Names[RTLIB::SDIVREM_I128] = 0;
+ Names[RTLIB::UDIVREM_I8] = 0;
+ Names[RTLIB::UDIVREM_I16] = 0;
+ Names[RTLIB::UDIVREM_I32] = 0;
+ Names[RTLIB::UDIVREM_I64] = 0;
+ Names[RTLIB::UDIVREM_I128] = 0;
+
+ Names[RTLIB::NEG_I32] = "__negsi2";
+ Names[RTLIB::NEG_I64] = "__negdi2";
+ Names[RTLIB::ADD_F32] = "__addsf3";
+ Names[RTLIB::ADD_F64] = "__adddf3";
+ Names[RTLIB::ADD_F80] = "__addxf3";
+ Names[RTLIB::ADD_F128] = "__addtf3";
+ Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
+ Names[RTLIB::SUB_F32] = "__subsf3";
+ Names[RTLIB::SUB_F64] = "__subdf3";
+ Names[RTLIB::SUB_F80] = "__subxf3";
+ Names[RTLIB::SUB_F128] = "__subtf3";
+ Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
+ Names[RTLIB::MUL_F32] = "__mulsf3";
+ Names[RTLIB::MUL_F64] = "__muldf3";
+ Names[RTLIB::MUL_F80] = "__mulxf3";
+ Names[RTLIB::MUL_F128] = "__multf3";
+ Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
+ Names[RTLIB::DIV_F32] = "__divsf3";
+ Names[RTLIB::DIV_F64] = "__divdf3";
+ Names[RTLIB::DIV_F80] = "__divxf3";
+ Names[RTLIB::DIV_F128] = "__divtf3";
+ Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
+ Names[RTLIB::REM_F32] = "fmodf";
+ Names[RTLIB::REM_F64] = "fmod";
+ Names[RTLIB::REM_F80] = "fmodl";
+ Names[RTLIB::REM_F128] = "fmodl";
+ Names[RTLIB::REM_PPCF128] = "fmodl";
+ Names[RTLIB::FMA_F32] = "fmaf";
+ Names[RTLIB::FMA_F64] = "fma";
+ Names[RTLIB::FMA_F80] = "fmal";
+ Names[RTLIB::FMA_F128] = "fmal";
+ Names[RTLIB::FMA_PPCF128] = "fmal";
+ Names[RTLIB::POWI_F32] = "__powisf2";
+ Names[RTLIB::POWI_F64] = "__powidf2";
+ Names[RTLIB::POWI_F80] = "__powixf2";
+ Names[RTLIB::POWI_F128] = "__powitf2";
+ Names[RTLIB::POWI_PPCF128] = "__powitf2";
+ Names[RTLIB::SQRT_F32] = "sqrtf";
+ Names[RTLIB::SQRT_F64] = "sqrt";
+ Names[RTLIB::SQRT_F80] = "sqrtl";
+ Names[RTLIB::SQRT_F128] = "sqrtl";
+ Names[RTLIB::SQRT_PPCF128] = "sqrtl";
+ Names[RTLIB::LOG_F32] = "logf";
+ Names[RTLIB::LOG_F64] = "log";
+ Names[RTLIB::LOG_F80] = "logl";
+ Names[RTLIB::LOG_F128] = "logl";
+ Names[RTLIB::LOG_PPCF128] = "logl";
+ Names[RTLIB::LOG2_F32] = "log2f";
+ Names[RTLIB::LOG2_F64] = "log2";
+ Names[RTLIB::LOG2_F80] = "log2l";
+ Names[RTLIB::LOG2_F128] = "log2l";
+ Names[RTLIB::LOG2_PPCF128] = "log2l";
+ Names[RTLIB::LOG10_F32] = "log10f";
+ Names[RTLIB::LOG10_F64] = "log10";
+ Names[RTLIB::LOG10_F80] = "log10l";
+ Names[RTLIB::LOG10_F128] = "log10l";
+ Names[RTLIB::LOG10_PPCF128] = "log10l";
+ Names[RTLIB::EXP_F32] = "expf";
+ Names[RTLIB::EXP_F64] = "exp";
+ Names[RTLIB::EXP_F80] = "expl";
+ Names[RTLIB::EXP_F128] = "expl";
+ Names[RTLIB::EXP_PPCF128] = "expl";
+ Names[RTLIB::EXP2_F32] = "exp2f";
+ Names[RTLIB::EXP2_F64] = "exp2";
+ Names[RTLIB::EXP2_F80] = "exp2l";
+ Names[RTLIB::EXP2_F128] = "exp2l";
+ Names[RTLIB::EXP2_PPCF128] = "exp2l";
+ Names[RTLIB::SIN_F32] = "sinf";
+ Names[RTLIB::SIN_F64] = "sin";
+ Names[RTLIB::SIN_F80] = "sinl";
+ Names[RTLIB::SIN_F128] = "sinl";
+ Names[RTLIB::SIN_PPCF128] = "sinl";
+ Names[RTLIB::COS_F32] = "cosf";
+ Names[RTLIB::COS_F64] = "cos";
+ Names[RTLIB::COS_F80] = "cosl";
+ Names[RTLIB::COS_F128] = "cosl";
+ Names[RTLIB::COS_PPCF128] = "cosl";
+ Names[RTLIB::POW_F32] = "powf";
+ Names[RTLIB::POW_F64] = "pow";
+ Names[RTLIB::POW_F80] = "powl";
+ Names[RTLIB::POW_F128] = "powl";
+ Names[RTLIB::POW_PPCF128] = "powl";
+ Names[RTLIB::CEIL_F32] = "ceilf";
+ Names[RTLIB::CEIL_F64] = "ceil";
+ Names[RTLIB::CEIL_F80] = "ceill";
+ Names[RTLIB::CEIL_F128] = "ceill";
+ Names[RTLIB::CEIL_PPCF128] = "ceill";
+ Names[RTLIB::TRUNC_F32] = "truncf";
+ Names[RTLIB::TRUNC_F64] = "trunc";
+ Names[RTLIB::TRUNC_F80] = "truncl";
+ Names[RTLIB::TRUNC_F128] = "truncl";
+ Names[RTLIB::TRUNC_PPCF128] = "truncl";
+ Names[RTLIB::RINT_F32] = "rintf";
+ Names[RTLIB::RINT_F64] = "rint";
+ Names[RTLIB::RINT_F80] = "rintl";
+ Names[RTLIB::RINT_F128] = "rintl";
+ Names[RTLIB::RINT_PPCF128] = "rintl";
+ Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
+ Names[RTLIB::NEARBYINT_F64] = "nearbyint";
+ Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
+ Names[RTLIB::NEARBYINT_F128] = "nearbyintl";
+ Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+ Names[RTLIB::FLOOR_F32] = "floorf";
+ Names[RTLIB::FLOOR_F64] = "floor";
+ Names[RTLIB::FLOOR_F80] = "floorl";
+ Names[RTLIB::FLOOR_F128] = "floorl";
+ Names[RTLIB::FLOOR_PPCF128] = "floorl";
+ Names[RTLIB::COPYSIGN_F32] = "copysignf";
+ Names[RTLIB::COPYSIGN_F64] = "copysign";
+ Names[RTLIB::COPYSIGN_F80] = "copysignl";
+ Names[RTLIB::COPYSIGN_F128] = "copysignl";
+ Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
+ Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2";
+ Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2";
+ Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+ Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
+ Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
+ Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+ Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
+ Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2";
+ Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
+ Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
+ Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2";
+ Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+ Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
+ Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
+ Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+ Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+ Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+ Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
+ Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
+ Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+ Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+ Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
+ Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
+ Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
+ Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
+ Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi";
+ Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi";
+ Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti";
+ Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
+ Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
+ Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+ Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
+ Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
+ Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+ Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+ Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+ Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
+ Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
+ Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+ Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+ Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
+ Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
+ Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
+ Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
+ Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi";
+ Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi";
+ Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti";
+ Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
+ Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
+ Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
+ Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+ Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+ Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
+ Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf";
+ Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
+ Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+ Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+ Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
+ Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf";
+ Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
+ Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
+ Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
+ Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
+ Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf";
+ Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
+ Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+ Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+ Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
+ Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf";
+ Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
+ Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+ Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+ Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
+ Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf";
+ Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
+ Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
+ Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
+ Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
+ Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf";
+ Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
+ Names[RTLIB::OEQ_F32] = "__eqsf2";
+ Names[RTLIB::OEQ_F64] = "__eqdf2";
+ Names[RTLIB::OEQ_F128] = "__eqtf2";
+ Names[RTLIB::UNE_F32] = "__nesf2";
+ Names[RTLIB::UNE_F64] = "__nedf2";
+ Names[RTLIB::UNE_F128] = "__netf2";
+ Names[RTLIB::OGE_F32] = "__gesf2";
+ Names[RTLIB::OGE_F64] = "__gedf2";
+ Names[RTLIB::OGE_F128] = "__getf2";
+ Names[RTLIB::OLT_F32] = "__ltsf2";
+ Names[RTLIB::OLT_F64] = "__ltdf2";
+ Names[RTLIB::OLT_F128] = "__lttf2";
+ Names[RTLIB::OLE_F32] = "__lesf2";
+ Names[RTLIB::OLE_F64] = "__ledf2";
+ Names[RTLIB::OLE_F128] = "__letf2";
+ Names[RTLIB::OGT_F32] = "__gtsf2";
+ Names[RTLIB::OGT_F64] = "__gtdf2";
+ Names[RTLIB::OGT_F128] = "__gttf2";
+ Names[RTLIB::UO_F32] = "__unordsf2";
+ Names[RTLIB::UO_F64] = "__unorddf2";
+ Names[RTLIB::UO_F128] = "__unordtf2";
+ Names[RTLIB::O_F32] = "__unordsf2";
+ Names[RTLIB::O_F64] = "__unorddf2";
+ Names[RTLIB::O_F128] = "__unordtf2";
+ Names[RTLIB::MEMCPY] = "memcpy";
+ Names[RTLIB::MEMMOVE] = "memmove";
+ Names[RTLIB::MEMSET] = "memset";
+ Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
+ Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
+ Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
+ Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
+ Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
+ Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
+ Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
+ Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
+ Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
+
+ if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) {
+ Names[RTLIB::SINCOS_F32] = "sincosf";
+ Names[RTLIB::SINCOS_F64] = "sincos";
+ Names[RTLIB::SINCOS_F80] = "sincosl";
+ Names[RTLIB::SINCOS_F128] = "sincosl";
+ Names[RTLIB::SINCOS_PPCF128] = "sincosl";
+ } else {
+ // These are generally not available.
+ Names[RTLIB::SINCOS_F32] = 0;
+ Names[RTLIB::SINCOS_F64] = 0;
+ Names[RTLIB::SINCOS_F80] = 0;
+ Names[RTLIB::SINCOS_F128] = 0;
+ Names[RTLIB::SINCOS_PPCF128] = 0;
+ }
+}
+
+/// InitLibcallCallingConvs - Set default libcall CallingConvs.
+///
+static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
+ for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
+ CCs[i] = CallingConv::C;
+ }
+}
+
+/// getFPEXT - Return the FPEXT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::f64)
+ return FPEXT_F32_F64;
+ if (RetVT == MVT::f128)
+ return FPEXT_F32_F128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::f128)
+ return FPEXT_F64_F128;
+ }
+
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPROUND - Return the FPROUND_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
+ if (RetVT == MVT::f32) {
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_F32;
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F32;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F32;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F32;
+ } else if (RetVT == MVT::f64) {
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F64;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F64;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F64;
+ }
+
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i8)
+ return FPTOSINT_F32_I8;
+ if (RetVT == MVT::i16)
+ return FPTOSINT_F32_I16;
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i8)
+ return FPTOSINT_F64_I8;
+ if (RetVT == MVT::i16)
+ return FPTOSINT_F64_I16;
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F80_I128;
+ } else if (OpVT == MVT::f128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F128_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i8)
+ return FPTOUINT_F32_I8;
+ if (RetVT == MVT::i16)
+ return FPTOUINT_F32_I16;
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i8)
+ return FPTOUINT_F64_I8;
+ if (RetVT == MVT::i16)
+ return FPTOUINT_F64_I16;
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F80_I128;
+ } else if (OpVT == MVT::f128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F128_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I32_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I32_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I32_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I32_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I64_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I64_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I64_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I64_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I128_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I128_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I128_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I128_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I32_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I32_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I32_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I32_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I64_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I64_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I64_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I64_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I128_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I128_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I128_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I128_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+ memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+ CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
+ CCs[RTLIB::UNE_F32] = ISD::SETNE;
+ CCs[RTLIB::UNE_F64] = ISD::SETNE;
+ CCs[RTLIB::UNE_F128] = ISD::SETNE;
+ CCs[RTLIB::OGE_F32] = ISD::SETGE;
+ CCs[RTLIB::OGE_F64] = ISD::SETGE;
+ CCs[RTLIB::OGE_F128] = ISD::SETGE;
+ CCs[RTLIB::OLT_F32] = ISD::SETLT;
+ CCs[RTLIB::OLT_F64] = ISD::SETLT;
+ CCs[RTLIB::OLT_F128] = ISD::SETLT;
+ CCs[RTLIB::OLE_F32] = ISD::SETLE;
+ CCs[RTLIB::OLE_F64] = ISD::SETLE;
+ CCs[RTLIB::OLE_F128] = ISD::SETLE;
+ CCs[RTLIB::OGT_F32] = ISD::SETGT;
+ CCs[RTLIB::OGT_F64] = ISD::SETGT;
+ CCs[RTLIB::OGT_F128] = ISD::SETGT;
+ CCs[RTLIB::UO_F32] = ISD::SETNE;
+ CCs[RTLIB::UO_F64] = ISD::SETNE;
+ CCs[RTLIB::UO_F128] = ISD::SETNE;
+ CCs[RTLIB::O_F32] = ISD::SETEQ;
+ CCs[RTLIB::O_F64] = ISD::SETEQ;
+ CCs[RTLIB::O_F128] = ISD::SETEQ;
+}
+
+/// NOTE: The constructor takes ownership of TLOF.
+TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
+ const TargetLoweringObjectFile *tlof)
+ : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
+ // All operations default to being supported.
+ memset(OpActions, 0, sizeof(OpActions));
+ memset(LoadExtActions, 0, sizeof(LoadExtActions));
+ memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
+ memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
+ memset(CondCodeActions, 0, sizeof(CondCodeActions));
+
+ // Set default actions for various operations.
+ for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+ // Default all indexed load / store to expand.
+ for (unsigned IM = (unsigned)ISD::PRE_INC;
+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+ setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
+ setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // These operations default to expand.
+ setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // Most targets ignore the @llvm.prefetch intrinsic.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+
+ // ConstantFP nodes default to expand. Targets can either change this to
+ // Legal, in which case all fp constants are legal, or use isFPImmLegal()
+ // to optimize expansions for certain constants.
+ setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
+
+ // These library functions default to expand.
+ setOperationAction(ISD::FLOG , MVT::f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f16, Expand);
+ setOperationAction(ISD::FEXP , MVT::f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
+ setOperationAction(ISD::FLOG , MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG , MVT::f128, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f128, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f128, Expand);
+ setOperationAction(ISD::FEXP , MVT::f128, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f128, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f128, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f128, Expand);
+ setOperationAction(ISD::FRINT, MVT::f128, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
+
+ // Default ISD::TRAP to expand (which turns it into abort).
+ setOperationAction(ISD::TRAP, MVT::Other, Expand);
+
+ // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
+ // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
+ //
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
+
+ IsLittleEndian = TD->isLittleEndian();
+ PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
+ memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+ memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
+ MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
+ MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
+ = MaxStoresPerMemmoveOptSize = 4;
+ BenefitFromCodePlacementOpt = false;
+ UseUnderscoreSetJmp = false;
+ UseUnderscoreLongJmp = false;
+ SelectIsExpensive = false;
+ IntDivIsCheap = false;
+ Pow2DivIsCheap = false;
+ JumpIsExpensive = false;
+ PredictableSelectIsExpensive = false;
+ StackPointerRegisterToSaveRestore = 0;
+ ExceptionPointerRegister = 0;
+ ExceptionSelectorRegister = 0;
+ BooleanContents = UndefinedBooleanContent;
+ BooleanVectorContents = UndefinedBooleanContent;
+ SchedPreferenceInfo = Sched::ILP;
+ JumpBufSize = 0;
+ JumpBufAlignment = 0;
+ MinFunctionAlignment = 0;
+ PrefFunctionAlignment = 0;
+ PrefLoopAlignment = 0;
+ MinStackArgumentAlignment = 1;
+ ShouldFoldAtomicFences = false;
+ InsertFencesForAtomic = false;
+ SupportJumpTables = true;
+ MinimumJumpTableEntries = 4;
+
+ InitLibcallNames(LibcallRoutineNames, TM);
+ InitCmpLibcallCCs(CmpLibcallCCs);
+ InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLoweringBase::~TargetLoweringBase() {
+ delete &TLOF;
+}
+
+MVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
+ return MVT::getIntegerVT(8*TD->getPointerSize(0));
+}
+
+/// canOpTrap - Returns true if the operation can trap for the value type.
+/// VT must be a legal type.
+bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
+ assert(isTypeLegal(VT));
+ switch (Op) {
+ default:
+ return false;
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ return true;
+ }
+}
+
+
+static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT,
+ TargetLoweringBase *TLI) {
+ // Figure out the right, legal destination reg to copy into.
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+ if (!TLI->isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
+ MVT DestVT = TLI->getRegisterType(NewVT);
+ RegisterVT = DestVT;
+ if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+}
+
+/// isLegalRC - Return true if the value types that can be represented by the
+/// specified register class are all legal.
+bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const {
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (isTypeLegal(*I))
+ return true;
+ }
+ return false;
+}
+
+/// findRepresentativeClass - Return the largest legal super-reg register class
+/// of the register class for the specified type and its associated "cost".
+std::pair<const TargetRegisterClass*, uint8_t>
+TargetLoweringBase::findRepresentativeClass(MVT VT) const {
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
+ if (!RC)
+ return std::make_pair(RC, 0);
+
+ // Compute the set of all super-register classes.
+ BitVector SuperRegRC(TRI->getNumRegClasses());
+ for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI)
+ SuperRegRC.setBitsInMask(RCI.getMask());
+
+ // Find the first legal register class with the largest spill size.
+ const TargetRegisterClass *BestRC = RC;
+ for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) {
+ const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
+ // We want the largest possible spill size.
+ if (SuperRC->getSize() <= BestRC->getSize())
+ continue;
+ if (!isLegalRC(SuperRC))
+ continue;
+ BestRC = SuperRC;
+ }
+ return std::make_pair(BestRC, 1);
+}
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLoweringBase::computeRegisterProperties() {
+ assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+ "Too many value types for ValueTypeActions to hold!");
+
+ // Everything defaults to needing one register.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ NumRegistersForVT[i] = 1;
+ RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
+ }
+ // ...except isVoid, which doesn't need any registers.
+ NumRegistersForVT[MVT::isVoid] = 0;
+
+ // Find the largest integer register class.
+ unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
+ for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+ assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+ // Every integer value type larger than this largest register takes twice as
+ // many registers to represent as the previous ValueType.
+ for (unsigned ExpandedReg = LargestIntReg + 1;
+ ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) {
+ NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+ RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
+ TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
+ ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg,
+ TypeExpandInteger);
+ }
+
+ // Inspect all of the ValueType's smaller than the largest integer
+ // register to see which ones need promotion.
+ unsigned LegalIntReg = LargestIntReg;
+ for (unsigned IntReg = LargestIntReg - 1;
+ IntReg >= (unsigned)MVT::i1; --IntReg) {
+ MVT IVT = (MVT::SimpleValueType)IntReg;
+ if (isTypeLegal(IVT)) {
+ LegalIntReg = IntReg;
+ } else {
+ RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
+ (const MVT::SimpleValueType)LegalIntReg;
+ ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
+ }
+ }
+
+ // ppcf128 type is really two f64's.
+ if (!isTypeLegal(MVT::ppcf128)) {
+ NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+ TransformToType[MVT::ppcf128] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
+ }
+
+ // Decide how to handle f64. If the target does not have native f64 support,
+ // expand it to i64 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+ RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+ TransformToType[MVT::f64] = MVT::i64;
+ ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
+ }
+
+ // Decide how to handle f32. If the target does not have native support for
+ // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+ if (!isTypeLegal(MVT::f32)) {
+ if (isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+ TransformToType[MVT::f32] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
+ } else {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+ TransformToType[MVT::f32] = MVT::i32;
+ ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
+ }
+ }
+
+ // Loop over all of the vector value types to see which need transformations.
+ for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ if (isTypeLegal(VT)) continue;
+
+ // Determine if there is a legal wider type. If so, we should promote to
+ // that wider vector type.
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) {
+ bool IsLegalWiderType = false;
+ // First try to promote the elements of integer vectors. If no legal
+ // promotion was found, fallback to the widen-vector method.
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType)nVT;
+ // Promote vectors of integers to vectors with the same number
+ // of elements, with a wider element type.
+ if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
+ && SVT.getVectorNumElements() == NElts &&
+ isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+
+ if (IsLegalWiderType) continue;
+
+ // Try to widen the vector.
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType)nVT;
+ if (SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts &&
+ isTypeLegal(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType) continue;
+ }
+
+ MVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ NumRegistersForVT[i] =
+ getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
+ RegisterVT, this);
+ RegisterTypeForVT[i] = RegisterVT;
+
+ MVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ unsigned NumElts = VT.getVectorNumElements();
+ ValueTypeActions.setTypeAction(VT,
+ NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ }
+ }
+
+ // Determine the 'representative' register class for each value type.
+ // An representative register class is the largest (meaning one which is
+ // not a sub-register class / subreg register class) legal register class for
+ // a group of value types. For example, on i386, i8, i16, and i32
+ // representative would be GR32; while on x86_64 it's GR64.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ const TargetRegisterClass* RRC;
+ uint8_t Cost;
+ tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i);
+ RepRegClassForVT[i] = RRC;
+ RepRegClassCostForVT[i] = Cost;
+ }
+}
+
+EVT TargetLoweringBase::getSetCCResultType(EVT VT) const {
+ assert(!VT.isVector() && "No default SetCC type for vectors!");
+ return getPointerTy(0).SimpleTy;
+}
+
+MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
+ return MVT::i32; // return the default value
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register. It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+ EVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT) const {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // If there is a wider vector type with the same element type as this one,
+ // or a promoted vector type that has the same number of elements which
+ // are wider, then we should convert to that legal vector type.
+ // This handles things like <2 x float> -> <4 x float> and
+ // <4 x i1> -> <4 x i32>.
+ LegalizeTypeAction TA = getTypeAction(Context, VT);
+ if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
+ EVT RegisterEVT = getTypeToTransformTo(Context, VT);
+ if (isTypeLegal(RegisterEVT)) {
+ IntermediateVT = RegisterEVT;
+ RegisterVT = RegisterEVT.getSimpleVT();
+ NumIntermediates = 1;
+ return 1;
+ }
+ }
+
+ // Figure out the right, legal destination reg to copy into.
+ EVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !isTypeLegal(
+ EVT::getVectorVT(Context, EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+ if (!isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ MVT DestVT = getRegisterType(Context, NewVT);
+ RegisterVT = DestVT;
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
+ if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+}
+
+/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// type of the given function. This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
+/// TODO: Move this out of TargetLowering.cpp.
+void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
+ SmallVectorImpl<ISD::OutputArg> &Outs,
+ const TargetLowering &TLI) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, ReturnType, ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+ MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg))
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ Flags.setSExt();
+ else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i)
+ Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0));
+ }
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. This is the actual
+/// alignment, not its logarithm.
+unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const {
+ return TD->getCallFrameTypeAlignment(Ty);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetTransformInfo Helpers
+//===----------------------------------------------------------------------===//
+
+int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
+ enum InstructionOpcodes {
+#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
+#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
+#include "llvm/IR/Instruction.def"
+ };
+ switch (static_cast<InstructionOpcodes>(Opcode)) {
+ case Ret: return 0;
+ case Br: return 0;
+ case Switch: return 0;
+ case IndirectBr: return 0;
+ case Invoke: return 0;
+ case Resume: return 0;
+ case Unreachable: return 0;
+ case Add: return ISD::ADD;
+ case FAdd: return ISD::FADD;
+ case Sub: return ISD::SUB;
+ case FSub: return ISD::FSUB;
+ case Mul: return ISD::MUL;
+ case FMul: return ISD::FMUL;
+ case UDiv: return ISD::UDIV;
+ case SDiv: return ISD::UDIV;
+ case FDiv: return ISD::FDIV;
+ case URem: return ISD::UREM;
+ case SRem: return ISD::SREM;
+ case FRem: return ISD::FREM;
+ case Shl: return ISD::SHL;
+ case LShr: return ISD::SRL;
+ case AShr: return ISD::SRA;
+ case And: return ISD::AND;
+ case Or: return ISD::OR;
+ case Xor: return ISD::XOR;
+ case Alloca: return 0;
+ case Load: return ISD::LOAD;
+ case Store: return ISD::STORE;
+ case GetElementPtr: return 0;
+ case Fence: return 0;
+ case AtomicCmpXchg: return 0;
+ case AtomicRMW: return 0;
+ case Trunc: return ISD::TRUNCATE;
+ case ZExt: return ISD::ZERO_EXTEND;
+ case SExt: return ISD::SIGN_EXTEND;
+ case FPToUI: return ISD::FP_TO_UINT;
+ case FPToSI: return ISD::FP_TO_SINT;
+ case UIToFP: return ISD::UINT_TO_FP;
+ case SIToFP: return ISD::SINT_TO_FP;
+ case FPTrunc: return ISD::FP_ROUND;
+ case FPExt: return ISD::FP_EXTEND;
+ case PtrToInt: return ISD::BITCAST;
+ case IntToPtr: return ISD::BITCAST;
+ case BitCast: return ISD::BITCAST;
+ case ICmp: return ISD::SETCC;
+ case FCmp: return ISD::SETCC;
+ case PHI: return 0;
+ case Call: return 0;
+ case Select: return ISD::SELECT;
+ case UserOp1: return 0;
+ case UserOp2: return 0;
+ case VAArg: return 0;
+ case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
+ case InsertElement: return ISD::INSERT_VECTOR_ELT;
+ case ShuffleVector: return ISD::VECTOR_SHUFFLE;
+ case ExtractValue: return ISD::MERGE_VALUES;
+ case InsertValue: return ISD::MERGE_VALUES;
+ case LandingPad: return 0;
+ }
+
+ llvm_unreachable("Unknown instruction type encountered!");
+}
+
+std::pair<unsigned, MVT>
+TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const {
+ LLVMContext &C = Ty->getContext();
+ EVT MTy = getValueType(Ty);
+
+ unsigned Cost = 1;
+ // We keep legalizing the type until we find a legal kind. We assume that
+ // the only operation that costs anything is the split. After splitting
+ // we need to handle two types.
+ while (true) {
+ LegalizeKind LK = getTypeConversion(C, MTy);
+
+ if (LK.first == TypeLegal)
+ return std::make_pair(Cost, MTy.getSimpleVT());
+
+ if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
+ Cost *= 2;
+
+ // Keep legalizing the type.
+ MTy = LK.second;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // The default implementation of this implements a conservative RISCy, r+r and
+ // r+i addr mode.
+
+ // Allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // Only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ }
+
+ return true;
+}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 76c2546..1170bf2 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -406,14 +406,14 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
// MachO
//===----------------------------------------------------------------------===//
-/// emitModuleFlags - Emit the module flags that specify the garbage collection
-/// information.
+/// emitModuleFlags - Perform code emission for module flags.
void TargetLoweringObjectFileMachO::
emitModuleFlags(MCStreamer &Streamer,
ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
Mangler *Mang, const TargetMachine &TM) const {
unsigned VersionVal = 0;
unsigned ImageInfoFlags = 0;
+ MDNode *LinkerOptions = 0;
StringRef SectionVal;
for (ArrayRef<Module::ModuleFlagEntry>::iterator
@@ -427,14 +427,33 @@ emitModuleFlags(MCStreamer &Streamer,
StringRef Key = MFE.Key->getString();
Value *Val = MFE.Val;
- if (Key == "Objective-C Image Info Version")
+ if (Key == "Objective-C Image Info Version") {
VersionVal = cast<ConstantInt>(Val)->getZExtValue();
- else if (Key == "Objective-C Garbage Collection" ||
- Key == "Objective-C GC Only" ||
- Key == "Objective-C Is Simulated")
+ } else if (Key == "Objective-C Garbage Collection" ||
+ Key == "Objective-C GC Only" ||
+ Key == "Objective-C Is Simulated") {
ImageInfoFlags |= cast<ConstantInt>(Val)->getZExtValue();
- else if (Key == "Objective-C Image Info Section")
+ } else if (Key == "Objective-C Image Info Section") {
SectionVal = cast<MDString>(Val)->getString();
+ } else if (Key == "Linker Options") {
+ LinkerOptions = cast<MDNode>(Val);
+ }
+ }
+
+ // Emit the linker options if present.
+ if (LinkerOptions) {
+ for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
+ MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
+ SmallVector<std::string, 4> StrOptions;
+
+ // Convert to strings.
+ for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
+ MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
+ StrOptions.push_back(MDOption->getString());
+ }
+
+ Streamer.EmitLinkerOptions(StrOptions);
+ }
}
// The section is mandatory. If we don't have it, then we don't have GC info.
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index 9b776d1..84b4bfc 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 8e6f809..26c5fe4 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -67,7 +67,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
const InstrItineraryData *InstrItins;
MachineRegisterInfo *MRI;
LiveVariables *LV;
- SlotIndexes *Indexes;
LiveIntervals *LIS;
AliasAnalysis *AA;
CodeGenOpt::Level OptLevel;
@@ -121,7 +120,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned SrcIdx, unsigned DstIdx,
- unsigned Dist);
+ unsigned Dist, bool shouldOnlyCommute);
void scanUses(unsigned DstReg);
@@ -164,6 +163,8 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
+static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
+
/// sink3AddrInstruction - A two-address instruction has been converted to a
/// three-address instruction to avoid clobbering a register. Try to sink it
/// past the instruction that would kill the above mentioned register to reduce
@@ -205,14 +206,29 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
// Find the instruction that kills SavedReg.
MachineInstr *KillMI = NULL;
- for (MachineRegisterInfo::use_nodbg_iterator
- UI = MRI->use_nodbg_begin(SavedReg),
- UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
- MachineOperand &UseMO = UI.getOperand();
- if (!UseMO.isKill())
- continue;
- KillMI = UseMO.getParent();
- break;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(SavedReg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ }
+ if (!KillMI) {
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SavedReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ if (!UseMO.isKill())
+ continue;
+ KillMI = UseMO.getParent();
+ break;
+ }
}
// If we find the instruction that kills SavedReg, and it is in an
@@ -251,7 +267,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
if (DefReg == MOReg)
return false;
- if (MO.isKill()) {
+ if (MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))) {
if (OtherMI == KillMI && MOReg == SavedReg)
// Save the operand that kills the register. We want to unset the kill
// marker if we can sink MI past it.
@@ -264,13 +280,15 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
}
assert(KillMO && "Didn't find kill");
- // Update kill and LV information.
- KillMO->setIsKill(false);
- KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
- KillMO->setIsKill(true);
+ if (!LIS) {
+ // Update kill and LV information.
+ KillMO->setIsKill(false);
+ KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
+ KillMO->setIsKill(true);
- if (LV)
- LV->replaceKillInstruction(SavedReg, KillMI, MI);
+ if (LV)
+ LV->replaceKillInstruction(SavedReg, KillMI, MI);
+ }
// Move instruction to its destination.
MBB->remove(MI);
@@ -331,6 +349,33 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
return true;
}
+/// isPLainlyKilled - Test if the given register value, which is used by the
+// given instruction, is killed by the given instruction.
+static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
+ LiveIntervals *LIS) {
+ if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ !LIS->isNotInMIMap(MI)) {
+ // FIXME: Sometimes tryInstructionTransform() will add instructions and
+ // test whether they can be folded before keeping them. In this case it
+ // sets a kill before recursively calling tryInstructionTransform() again.
+ // If there is no interval available, we assume that this instruction is
+ // one of those. A kill flag is manually inserted on the operand so the
+ // check below will handle it.
+ LiveInterval &LI = LIS->getInterval(Reg);
+ // This is to match the kill flag version where undefs don't have kill
+ // flags.
+ if (!LI.hasAtLeastOneValue())
+ return false;
+
+ SlotIndex useIdx = LIS->getInstructionIndex(MI);
+ LiveInterval::const_iterator I = LI.find(useIdx);
+ assert(I != LI.end() && "Reg must be live-in to use.");
+ return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx);
+ }
+
+ return MI->killsRegister(Reg);
+}
+
/// isKilled - Test if the given register value, which is used by the given
/// instruction, is killed by the given instruction. This looks through
/// coalescable copies to see if the original value is potentially not killed.
@@ -346,12 +391,20 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
/// normal heuristics commute the (two-address) add, which lets
/// coalescing eliminate the extra copy.
///
+/// If allowFalsePositives is true then likely kills are treated as kills even
+/// if it can't be proven that they are kills.
static bool isKilled(MachineInstr &MI, unsigned Reg,
const MachineRegisterInfo *MRI,
- const TargetInstrInfo *TII) {
+ const TargetInstrInfo *TII,
+ LiveIntervals *LIS,
+ bool allowFalsePositives) {
MachineInstr *DefMI = &MI;
for (;;) {
- if (!DefMI->killsRegister(Reg))
+ // All uses of physical registers are likely to be kills.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ (allowFalsePositives || MRI->hasOneUse(Reg)))
+ return true;
+ if (!isPlainlyKilled(DefMI, Reg, LIS))
return false;
if (TargetRegisterInfo::isPhysicalRegister(Reg))
return true;
@@ -472,7 +525,7 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// insert => %reg1030<def> = MOV8rr %reg1029
// %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
- if (!MI->killsRegister(regC))
+ if (!isPlainlyKilled(MI, regC, LIS))
return false;
// Ok, we have something like:
@@ -528,19 +581,9 @@ commuteInstruction(MachineBasicBlock::iterator &mi,
}
DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
- // If the instruction changed to commute it, update livevar.
- if (NewMI != MI) {
- if (LV)
- // Update live variables
- LV->replaceKillInstruction(RegC, MI, NewMI);
- if (Indexes)
- Indexes->replaceMachineInstrInMaps(MI, NewMI);
-
- MBB->insert(mi, NewMI); // Insert the new inst
- MBB->erase(mi); // Nuke the old inst.
- mi = NewMI;
- DistanceMap.insert(std::make_pair(NewMI, Dist));
- }
+ assert(NewMI == MI &&
+ "TargetInstrInfo::commuteInstruction() should not return a new "
+ "instruction unless it was requested.");
// Update source register map.
unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
@@ -587,8 +630,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
bool Sunk = false;
- if (Indexes)
- Indexes->replaceMachineInstrInMaps(mi, NewMI);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(mi, NewMI);
if (NewMI->findRegisterUseOperand(RegB, false, TRI))
// FIXME: Temporary workaround. If the new instruction doesn't
@@ -700,9 +743,9 @@ bool TwoAddressInstructionPass::
rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned Reg) {
- // Bail immediately if we don't have LV available. We use it to find kills
- // efficiently.
- if (!LV)
+ // Bail immediately if we don't have LV or LIS available. We use them to find
+ // kills efficiently.
+ if (!LV && !LIS)
return false;
MachineInstr *MI = &*mi;
@@ -711,7 +754,22 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
// Must be created from unfolded load. Don't waste time trying this.
return false;
- MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ MachineInstr *KillMI = 0;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ } else {
+ KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ }
if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
// Don't mess with copies, they may be coalesced later.
return false;
@@ -747,24 +805,27 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
Defs.insert(MOReg);
else {
Uses.insert(MOReg);
- if (MO.isKill() && MOReg != Reg)
+ if (MOReg != Reg && (MO.isKill() ||
+ (LIS && isPlainlyKilled(MI, MOReg, LIS))))
Kills.insert(MOReg);
}
}
// Move the copies connected to MI down as well.
- MachineBasicBlock::iterator From = MI;
- MachineBasicBlock::iterator To = llvm::next(From);
- while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) {
- Defs.insert(To->getOperand(0).getReg());
- ++To;
+ MachineBasicBlock::iterator Begin = MI;
+ MachineBasicBlock::iterator AfterMI = llvm::next(Begin);
+
+ MachineBasicBlock::iterator End = AfterMI;
+ while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) {
+ Defs.insert(End->getOperand(0).getReg());
+ ++End;
}
// Check if the reschedule will not break depedencies.
unsigned NumVisited = 0;
MachineBasicBlock::iterator KillPos = KillMI;
++KillPos;
- for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) {
+ for (MachineBasicBlock::iterator I = End; I != KillPos; ++I) {
MachineInstr *OtherMI = I;
// DBG_VALUE cannot be counted against the limit.
if (OtherMI->isDebugValue())
@@ -795,11 +856,13 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
} else {
if (Defs.count(MOReg))
return false;
+ bool isKill = MO.isKill() ||
+ (LIS && isPlainlyKilled(OtherMI, MOReg, LIS));
if (MOReg != Reg &&
- ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg)))
+ ((isKill && Uses.count(MOReg)) || Kills.count(MOReg)))
// Don't want to extend other live ranges and update kills.
return false;
- if (MOReg == Reg && !MO.isKill())
+ if (MOReg == Reg && !isKill)
// We can't schedule across a use of the register in question.
return false;
// Ensure that if this is register in question, its the kill we expect.
@@ -810,19 +873,35 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
}
// Move debug info as well.
- while (From != MBB->begin() && llvm::prior(From)->isDebugValue())
- --From;
+ while (Begin != MBB->begin() && llvm::prior(Begin)->isDebugValue())
+ --Begin;
+
+ nmi = End;
+ MachineBasicBlock::iterator InsertPos = KillPos;
+ if (LIS) {
+ // We have to move the copies first so that the MBB is still well-formed
+ // when calling handleMove().
+ for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) {
+ MachineInstr *CopyMI = MBBI;
+ ++MBBI;
+ MBB->splice(InsertPos, MBB, CopyMI);
+ LIS->handleMove(CopyMI);
+ InsertPos = CopyMI;
+ }
+ End = llvm::next(MachineBasicBlock::iterator(MI));
+ }
// Copies following MI may have been moved as well.
- nmi = To;
- MBB->splice(KillPos, MBB, From, To);
+ MBB->splice(InsertPos, MBB, Begin, End);
DistanceMap.erase(DI);
// Update live variables
- LV->removeVirtualRegisterKilled(Reg, KillMI);
- LV->addVirtualRegisterKilled(Reg, MI);
- if (LIS)
+ if (LIS) {
LIS->handleMove(MI);
+ } else {
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ }
DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
return true;
@@ -858,9 +937,9 @@ bool TwoAddressInstructionPass::
rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned Reg) {
- // Bail immediately if we don't have LV available. We use it to find kills
- // efficiently.
- if (!LV)
+ // Bail immediately if we don't have LV or LIS available. We use them to find
+ // kills efficiently.
+ if (!LV && !LIS)
return false;
MachineInstr *MI = &*mi;
@@ -869,7 +948,22 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
// Must be created from unfolded load. Don't waste time trying this.
return false;
- MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ MachineInstr *KillMI = 0;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ } else {
+ KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ }
if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
// Don't mess with copies, they may be coalesced later.
return false;
@@ -896,10 +990,11 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
continue;
if (isDefTooClose(MOReg, DI->second, MI))
return false;
- if (MOReg == Reg && !MO.isKill())
+ bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS));
+ if (MOReg == Reg && !isKill)
return false;
Uses.insert(MOReg);
- if (MO.isKill() && MOReg != Reg)
+ if (isKill && MOReg != Reg)
Kills.insert(MOReg);
} else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
Defs.insert(MOReg);
@@ -939,7 +1034,8 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
if (Kills.count(MOReg))
// Don't want to extend other live ranges and update kills.
return false;
- if (OtherMI != MI && MOReg == Reg && !MO.isKill())
+ if (OtherMI != MI && MOReg == Reg &&
+ !(MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))))
// We can't schedule across a use of the register in question.
return false;
} else {
@@ -973,10 +1069,12 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
DistanceMap.erase(DI);
// Update live variables
- LV->removeVirtualRegisterKilled(Reg, KillMI);
- LV->addVirtualRegisterKilled(Reg, MI);
- if (LIS)
+ if (LIS) {
LIS->handleMove(KillMI);
+ } else {
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ }
DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
return true;
@@ -987,11 +1085,13 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
/// either eliminate the tied operands or improve the opportunities for
/// coalescing away the register copy. Returns true if no copy needs to be
/// inserted to untie mi's operands (either because they were untied, or
-/// because mi was rescheduled, and will be visited again later).
+/// because mi was rescheduled, and will be visited again later). If the
+/// shouldOnlyCommute flag is true, only instruction commutation is attempted.
bool TwoAddressInstructionPass::
tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
- unsigned SrcIdx, unsigned DstIdx, unsigned Dist) {
+ unsigned SrcIdx, unsigned DstIdx,
+ unsigned Dist, bool shouldOnlyCommute) {
if (OptLevel == CodeGenOpt::None)
return false;
@@ -1001,7 +1101,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
assert(TargetRegisterInfo::isVirtualRegister(regB) &&
"cannot make instruction into two-address form");
- bool regBKilled = isKilled(MI, regB, MRI, TII);
+ bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
if (TargetRegisterInfo::isVirtualRegister(regA))
scanUses(regA);
@@ -1021,7 +1121,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
if (regCIdx != ~0U) {
regC = MI.getOperand(regCIdx).getReg();
- if (!regBKilled && isKilled(MI, regC, MRI, TII))
+ if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS, false))
// If C dies but B does not, swap the B and C operands.
// This makes the live ranges of A and C joinable.
TryCommute = true;
@@ -1040,6 +1140,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
return false;
}
+ if (shouldOnlyCommute)
+ return false;
+
// If there is one more use of regB later in the same MBB, consider
// re-schedule this MI below it.
if (rescheduleMIBelowKill(mi, nmi, regB)) {
@@ -1115,10 +1218,12 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
MachineBasicBlock::iterator NewMI = NewMIs[1];
- bool TransformSuccess =
- tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist);
- if (TransformSuccess ||
- NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+ bool TransformResult =
+ tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true);
+ (void)TransformResult;
+ assert(!TransformResult &&
+ "tryInstructionTransform() should return false.");
+ if (NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
// Success, or at least we made an improvement. Keep the unfolded
// instructions and discard the original.
if (LV) {
@@ -1149,10 +1254,26 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
}
LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
}
+
+ SmallVector<unsigned, 4> OrigRegs;
+ if (LIS) {
+ for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
+ MOE = MI.operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg())
+ OrigRegs.push_back(MOI->getReg());
+ }
+ }
+
MI.eraseFromParent();
+
+ // Update LiveIntervals.
+ if (LIS) {
+ MachineBasicBlock::iterator Begin(NewMIs[0]);
+ MachineBasicBlock::iterator End(NewMIs[1]);
+ LIS->repairIntervalsInRange(MBB, Begin, End, OrigRegs);
+ }
+
mi = NewMIs[1];
- if (TransformSuccess)
- return true;
} else {
// Transforming didn't eliminate the tie and didn't lead to an
// improvement. Clean up the unfolded instructions and keep the
@@ -1215,9 +1336,15 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
TiedPairList &TiedPairs,
unsigned &Dist) {
bool IsEarlyClobber = false;
+ for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+ const MachineOperand &DstMO = MI->getOperand(TiedPairs[tpi].second);
+ IsEarlyClobber |= DstMO.isEarlyClobber();
+ }
+
bool RemovedKillFlag = false;
bool AllUsesCopied = true;
unsigned LastCopiedReg = 0;
+ SlotIndex LastCopyIdx;
unsigned RegB = 0;
for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
unsigned SrcIdx = TiedPairs[tpi].first;
@@ -1225,7 +1352,6 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
const MachineOperand &DstMO = MI->getOperand(DstIdx);
unsigned RegA = DstMO.getReg();
- IsEarlyClobber |= DstMO.isEarlyClobber();
// Grab RegB from the instruction because it may have changed if the
// instruction was commuted.
@@ -1263,9 +1389,17 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
DistanceMap.insert(std::make_pair(PrevMI, Dist));
DistanceMap[MI] = ++Dist;
- SlotIndex CopyIdx;
- if (Indexes)
- CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot();
+ if (LIS) {
+ LastCopyIdx = LIS->InsertMachineInstrInMaps(PrevMI).getRegSlot();
+
+ if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+ LiveInterval &LI = LIS->getInterval(RegA);
+ VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+ SlotIndex endIdx =
+ LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber);
+ LI.addRange(LiveRange(LastCopyIdx, endIdx, VNI));
+ }
+ }
DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
@@ -1311,6 +1445,18 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
LV->addVirtualRegisterKilled(RegB, PrevMI);
}
+ // Update LiveIntervals.
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(RegB);
+ SlotIndex MIIdx = LIS->getInstructionIndex(MI);
+ LiveInterval::const_iterator I = LI.find(MIIdx);
+ assert(I != LI.end() && "RegB must be live-in to use.");
+
+ SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber);
+ if (I->end == UseIdx)
+ LI.removeRange(LastCopyIdx, UseIdx);
+ }
+
} else if (RemovedKillFlag) {
// Some tied uses of regB matched their destination registers, so
// regB is still used in this instruction, but a kill flag was
@@ -1335,7 +1481,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
InstrItins = TM.getInstrItineraryData();
- Indexes = getAnalysisIfAvailable<SlotIndexes>();
LV = getAnalysisIfAvailable<LiveVariables>();
LIS = getAnalysisIfAvailable<LiveIntervals>();
AA = &getAnalysis<AliasAnalysis>();
@@ -1399,7 +1544,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
unsigned SrcReg = mi->getOperand(SrcIdx).getReg();
unsigned DstReg = mi->getOperand(DstIdx).getReg();
if (SrcReg != DstReg &&
- tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist)) {
+ tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) {
// The tied operands have been eliminated or shifted further down the
// block to ease elimination. Continue processing with 'nmi'.
TiedOperands.clear();
@@ -1437,6 +1582,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
}
}
+ if (LIS)
+ MF->verify(this, "After two-address instruction pass");
+
return MadeChange;
}
@@ -1462,6 +1610,13 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
llvm_unreachable(0);
}
+ SmallVector<unsigned, 4> OrigRegs;
+ if (LIS) {
+ OrigRegs.push_back(MI->getOperand(0).getReg());
+ for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2)
+ OrigRegs.push_back(MI->getOperand(i).getReg());
+ }
+
bool DefEmitted = false;
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
MachineOperand &UseMO = MI->getOperand(i);
@@ -1505,6 +1660,9 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
DEBUG(dbgs() << "Inserted: " << *CopyMI);
}
+ MachineBasicBlock::iterator EndMBBI =
+ llvm::next(MachineBasicBlock::iterator(MI));
+
if (!DefEmitted) {
DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
@@ -1514,4 +1672,8 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
DEBUG(dbgs() << "Eliminated: " << *MI);
MI->eraseFromParent();
}
+
+ // Udpate LiveIntervals.
+ if (LIS)
+ LIS->repairIntervalsInRange(MBB, MBBI, EndMBBI, OrigRegs);
}