aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp32
-rw-r--r--lib/Analysis/ConstantFolding.cpp35
-rw-r--r--lib/Analysis/InlineCost.cpp30
-rw-r--r--lib/Analysis/InstructionSimplify.cpp28
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp39
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp9
-rw-r--r--lib/Analysis/RegionInfo.cpp16
-rw-r--r--lib/Analysis/ScalarEvolution.cpp6
-rw-r--r--lib/Archive/ArchiveReader.cpp7
-rw-r--r--lib/Archive/ArchiveWriter.cpp7
-rw-r--r--lib/AsmParser/LLLexer.cpp4
-rw-r--r--lib/AsmParser/LLParser.cpp18
-rw-r--r--lib/AsmParser/LLToken.h7
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp2
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp2
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp233
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp11
-rw-r--r--lib/CodeGen/LiveInterval.cpp20
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp89
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp4
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp36
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp74
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp80
-rw-r--r--lib/CodeGen/MachineBranchProbabilityInfo.cpp20
-rw-r--r--lib/CodeGen/MachineCSE.cpp49
-rw-r--r--lib/CodeGen/MachineInstr.cpp165
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp86
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp2
-rw-r--r--lib/CodeGen/MachineSink.cpp17
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp1153
-rw-r--r--lib/CodeGen/MachineTraceMetrics.h341
-rw-r--r--lib/CodeGen/MachineVerifier.cpp662
-rw-r--r--lib/CodeGen/Passes.cpp12
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp108
-rw-r--r--lib/CodeGen/RegAllocFast.cpp12
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp2
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp43
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp96
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp65
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp221
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp9
-rw-r--r--lib/CodeGen/SplitKit.cpp9
-rw-r--r--lib/CodeGen/StackProtector.cpp70
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp4
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp14
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp585
-rw-r--r--lib/DebugInfo/DWARFContext.cpp4
-rw-r--r--lib/DebugInfo/DWARFDebugLine.cpp117
-rw-r--r--lib/DebugInfo/DWARFDebugLine.h61
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp3
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp11
-rw-r--r--lib/ExecutionEngine/JIT/JITMemoryManager.cpp3
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp61
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.h23
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp27
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp104
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h6
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h2
-rw-r--r--lib/Linker/LinkModules.cpp9
-rw-r--r--lib/MC/CMakeLists.txt1
-rw-r--r--lib/MC/MCAssembler.cpp5
-rw-r--r--lib/MC/MCDwarf.cpp22
-rw-r--r--lib/MC/MCObjectFileInfo.cpp6
-rw-r--r--lib/MC/MCObjectWriter.cpp34
-rw-r--r--lib/MC/MCParser/AsmParser.cpp87
-rw-r--r--lib/MC/MCParser/DarwinAsmParser.cpp41
-rw-r--r--lib/MC/MCRegisterInfo.cpp71
-rw-r--r--lib/MC/MCStreamer.cpp5
-rw-r--r--lib/MC/MCWin64EH.cpp6
-rw-r--r--lib/Support/APFloat.cpp56
-rw-r--r--lib/Support/CMakeLists.txt1
-rw-r--r--lib/Support/DataExtractor.cpp6
-rw-r--r--lib/Support/Debug.cpp10
-rw-r--r--lib/Support/FileOutputBuffer.cpp148
-rw-r--r--lib/Support/Mutex.cpp3
-rw-r--r--lib/Support/Triple.cpp2
-rw-r--r--lib/Support/Unix/Path.inc4
-rw-r--r--lib/Support/Unix/PathV2.inc118
-rw-r--r--lib/Support/Unix/Process.inc7
-rw-r--r--lib/Support/Windows/PathV2.inc199
-rw-r--r--lib/TableGen/TGParser.cpp32
-rw-r--r--lib/Target/ARM/ARM.td2
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp28
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp293
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h12
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp12
-rw-r--r--lib/Target/ARM/ARMCallingConv.td31
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp100
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp2
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp164
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp10
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp137
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp367
-rw-r--r--lib/Target/ARM/ARMISelLowering.h15
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td78
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td34
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td168
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td31
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp6
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp6
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td8
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td1
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td1
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp13
-rw-r--r--lib/Target/ARM/ARMSubtarget.h5
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp116
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp861
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp50
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h106
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp90
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp11
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp42
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h5
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp5
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td12
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp33
-rw-r--r--lib/Target/Mangler.cpp3
-rw-r--r--lib/Target/Mips/AsmParser/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp68
-rw-r--r--lib/Target/Mips/CMakeLists.txt8
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp20
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp15
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp17
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp6
-rw-r--r--lib/Target/Mips/Makefile4
-rw-r--r--lib/Target/Mips/Mips.td17
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp87
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h43
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp132
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h76
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td294
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp111
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h37
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td23
-rw-r--r--lib/Target/Mips/MipsCallingConv.td12
-rw-r--r--lib/Target/Mips/MipsELFWriterInfo.cpp92
-rw-r--r--lib/Target/Mips/MipsELFWriterInfo.h59
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp224
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h25
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp68
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp169
-rw-r--r--lib/Target/Mips/MipsISelLowering.h1
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td56
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td36
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp309
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h98
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td52
-rw-r--r--lib/Target/Mips/MipsJITInfo.cpp47
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp2
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h33
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp74
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h13
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td3
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp210
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h44
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp320
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h86
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp138
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.h39
-rw-r--r--lib/Target/Mips/MipsSubtarget.h4
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp31
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h129
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp5
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp6
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td9
-rw-r--r--lib/Target/PowerPC/TargetInfo/Makefile2
-rw-r--r--lib/Target/README.txt5
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp3
-rw-r--r--lib/Target/TargetLibraryInfo.cpp105
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp59
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp9
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.h10
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c16
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h71
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h25
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp7
-rw-r--r--lib/Target/X86/X86.h2
-rw-r--r--lib/Target/X86/X86.td8
-rw-r--r--lib/Target/X86/X86AsmPrinter.h8
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.cpp1
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h4
-rw-r--r--lib/Target/X86/X86FastISel.cpp82
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp18
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp192
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp1460
-rw-r--r--lib/Target/X86/X86ISelLowering.h39
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td2
-rw-r--r--lib/Target/X86/X86InstrExtension.td8
-rw-r--r--lib/Target/X86/X86InstrFMA.td364
-rw-r--r--lib/Target/X86/X86InstrFormats.td12
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td45
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp460
-rw-r--r--lib/Target/X86/X86InstrInfo.h12
-rw-r--r--lib/Target/X86/X86InstrMMX.td18
-rw-r--r--lib/Target/X86/X86InstrSSE.td934
-rw-r--r--lib/Target/X86/X86JITInfo.cpp17
-rw-r--r--lib/Target/X86/X86JITInfo.h2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp62
-rw-r--r--lib/Target/X86/X86MCInstLower.h6
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h6
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp8
-rw-r--r--lib/Target/X86/X86RegisterInfo.td7
-rw-r--r--lib/Target/X86/X86Relocations.h2
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp39
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp2
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp5
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h2
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp7
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp5
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp12
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp41
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp172
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp15
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp29
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp372
-rw-r--r--lib/Transforms/Instrumentation/MaximumSpanningTree.h53
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp70
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp13
-rw-r--r--lib/Transforms/Scalar/GVN.cpp103
-rw-r--r--lib/Transforms/Scalar/LICM.cpp37
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp3
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp2
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp189
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp120
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp24
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp126
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp7
-rw-r--r--lib/VMCore/AsmWriter.cpp11
-rw-r--r--lib/VMCore/Attributes.cpp3
-rw-r--r--lib/VMCore/CMakeLists.txt1
-rw-r--r--lib/VMCore/Core.cpp10
-rw-r--r--lib/VMCore/Dominators.cpp90
-rw-r--r--lib/VMCore/Metadata.cpp2
-rw-r--r--lib/VMCore/Module.cpp140
-rw-r--r--lib/VMCore/Type.cpp30
-rw-r--r--lib/VMCore/TypeFinder.cpp148
-rw-r--r--lib/VMCore/ValueTypes.cpp8
-rw-r--r--lib/VMCore/Verifier.cpp47
254 files changed, 11957 insertions, 6183 deletions
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 2730ce6..b255ce6 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -1,4 +1,4 @@
-//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -*- C++ -*-===//
+//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -78,6 +78,19 @@ static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
static const uint32_t FPH_TAKEN_WEIGHT = 20;
static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
+/// \brief Invoke-terminating normal branch taken weight
+///
+/// This is the weight for branching to the normal destination of an invoke
+/// instruction. We expect this to happen most of the time. Set the weight to an
+/// absurdly high value so that nested loops subsume it.
+static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
+
+/// \brief Invoke-terminating normal branch not-taken weight.
+///
+/// This is the weight for branching to the unwind destination of an invoke
+/// instruction. This is essentially never taken.
+static const uint32_t IH_NONTAKEN_WEIGHT = 1;
+
// Standard weight value. Used when none of the heuristics set weight for
// the edge.
static const uint32_t NORMAL_WEIGHT = 16;
@@ -371,6 +384,19 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) {
return true;
}
+bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {
+ InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator());
+ if (!II)
+ return false;
+
+ BasicBlock *Normal = II->getNormalDest();
+ BasicBlock *Unwind = II->getUnwindDest();
+
+ setEdgeWeight(BB, Normal, IH_TAKEN_WEIGHT);
+ setEdgeWeight(BB, Unwind, IH_NONTAKEN_WEIGHT);
+ return true;
+}
+
void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LoopInfo>();
AU.setPreservesAll();
@@ -397,7 +423,9 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
continue;
if (calcZeroHeuristics(*I))
continue;
- calcFloatingPointHeuristics(*I);
+ if (calcFloatingPointHeuristics(*I))
+ continue;
+ calcInvokeHeuristics(*I);
}
PostDominatedByUnreachable.clear();
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 7ced848..f5e619c 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -358,17 +358,20 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
NumElts = AT->getNumElements();
else
NumElts = cast<VectorType>(C->getType())->getNumElements();
-
+
for (; Index != NumElts; ++Index) {
if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
BytesLeft, TD))
return false;
- if (EltSize >= BytesLeft)
+
+ uint64_t BytesWritten = EltSize - Offset;
+ assert(BytesWritten <= EltSize && "Not indexing into this element?");
+ if (BytesWritten >= BytesLeft)
return true;
-
+
Offset = 0;
- BytesLeft -= EltSize;
- CurPtr += EltSize;
+ BytesLeft -= BytesWritten;
+ CurPtr += BytesWritten;
}
return true;
}
@@ -600,6 +603,22 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
return C;
}
+/// Strip the pointer casts, but preserve the address space information.
+static Constant* StripPtrCastKeepAS(Constant* Ptr) {
+ assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
+ PointerType *OldPtrTy = cast<PointerType>(Ptr->getType());
+ Ptr = cast<Constant>(Ptr->stripPointerCasts());
+ PointerType *NewPtrTy = cast<PointerType>(Ptr->getType());
+
+ // Preserve the address space number of the pointer.
+ if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) {
+ NewPtrTy = NewPtrTy->getElementType()->getPointerTo(
+ OldPtrTy->getAddressSpace());
+ Ptr = ConstantExpr::getBitCast(Ptr, NewPtrTy);
+ }
+ return Ptr;
+}
+
/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
/// constant expression, do so.
static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
@@ -636,13 +655,13 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
}
return 0;
}
-
+
unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
APInt Offset =
APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(),
makeArrayRef((Value **)Ops.data() + 1,
Ops.size() - 1)));
- Ptr = cast<Constant>(Ptr->stripPointerCasts());
+ Ptr = StripPtrCastKeepAS(Ptr);
// If this is a GEP of a GEP, fold it all into a single GEP.
while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
@@ -661,7 +680,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
Ptr = cast<Constant>(GEP->getOperand(0));
Offset += APInt(BitWidth,
TD->getIndexedOffset(Ptr->getType(), NestedOps));
- Ptr = cast<Constant>(Ptr->stripPointerCasts());
+ Ptr = StripPtrCastKeepAS(Ptr);
}
// If the base value for this address is a literal integer value, fold the
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index a6bf4a8..bc1ecd2 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -797,9 +797,33 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
FiftyPercentVectorBonus = Threshold;
TenPercentVectorBonus = Threshold / 2;
- // Subtract off one instruction per call argument as those will be free after
- // inlining.
- Cost -= CS.arg_size() * InlineConstants::InstrCost;
+ // Give out bonuses per argument, as the instructions setting them up will
+ // be gone after inlining.
+ for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
+ if (TD && CS.isByValArgument(I)) {
+ // We approximate the number of loads and stores needed by dividing the
+ // size of the byval type by the target's pointer size.
+ PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+ unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
+ unsigned PointerSize = TD->getPointerSizeInBits();
+ // Ceiling division.
+ unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
+
+ // If it generates more than 8 stores it is likely to be expanded as an
+ // inline memcpy so we take that as an upper bound. Otherwise we assume
+ // one load and one store per word copied.
+ // FIXME: The maxStoresPerMemcpy setting from the target should be used
+ // here instead of a magic number of 8, but it's not available via
+ // TargetData.
+ NumStores = std::min(NumStores, 8U);
+
+ Cost -= 2 * NumStores * InlineConstants::InstrCost;
+ } else {
+ // For non-byval arguments subtract off one instruction per call
+ // argument.
+ Cost -= InlineConstants::InstrCost;
+ }
+ }
// If there is only one call of the function, and it has internal linkage,
// the cost of inlining it drops dramatically.
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 16a9a04..379a35a 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1719,10 +1719,13 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return ConstantInt::get(ITy, false);
// A local identified object (alloca or noalias call) can't equal any
- // incoming argument, unless they're both null.
- if (isa<Instruction>(LHSPtr) && isa<Argument>(RHSPtr) &&
- Pred == CmpInst::ICMP_EQ)
- return ConstantInt::get(ITy, false);
+ // incoming argument, unless they're both null or they belong to
+ // different functions. The latter happens during inlining.
+ if (Instruction *LHSInst = dyn_cast<Instruction>(LHSPtr))
+ if (Argument *RHSArg = dyn_cast<Argument>(RHSPtr))
+ if (LHSInst->getParent()->getParent() == RHSArg->getParent() &&
+ Pred == CmpInst::ICMP_EQ)
+ return ConstantInt::get(ITy, false);
}
// Assume that the constant null is on the right.
@@ -1732,14 +1735,17 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
else if (Pred == CmpInst::ICMP_NE)
return ConstantInt::get(ITy, true);
}
- } else if (isa<Argument>(LHSPtr)) {
+ } else if (Argument *LHSArg = dyn_cast<Argument>(LHSPtr)) {
RHSPtr = RHSPtr->stripInBoundsOffsets();
- // An alloca can't be equal to an argument.
- if (isa<AllocaInst>(RHSPtr)) {
- if (Pred == CmpInst::ICMP_EQ)
- return ConstantInt::get(ITy, false);
- else if (Pred == CmpInst::ICMP_NE)
- return ConstantInt::get(ITy, true);
+ // An alloca can't be equal to an argument unless they come from separate
+ // functions via inlining.
+ if (AllocaInst *RHSInst = dyn_cast<AllocaInst>(RHSPtr)) {
+ if (LHSArg->getParent() == RHSInst->getParent()->getParent()) {
+ if (Pred == CmpInst::ICMP_EQ)
+ return ConstantInt::get(ITy, false);
+ else if (Pred == CmpInst::ICMP_NE)
+ return ConstantInt::get(ITy, true);
+ }
}
}
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 8d99ec3..b986b32 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -64,7 +64,7 @@ static const AllocFnsTy AllocationFnData[] = {
{"realloc", ReallocLike, 2, 1, -1},
{"reallocf", ReallocLike, 2, 1, -1},
{"strdup", StrDupLike, 1, -1, -1},
- {"strndup", StrDupLike, 2, -1, -1}
+ {"strndup", StrDupLike, 2, 1, -1}
};
@@ -358,11 +358,16 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const TargetData *TD,
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
V = V->stripPointerCasts();
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // If we have already seen this instruction, bail out. Cycles can happen in
+ // unreachable code after constant propagation.
+ if (!SeenInsts.insert(I))
+ return unknown();
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
- return visitGEPOperator(*GEP);
- if (Instruction *I = dyn_cast<Instruction>(V))
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+ return visitGEPOperator(*GEP);
return visit(*I);
+ }
if (Argument *A = dyn_cast<Argument>(V))
return visitArgument(*A);
if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V))
@@ -371,9 +376,12 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
return visitGlobalVariable(*GV);
if (UndefValue *UV = dyn_cast<UndefValue>(V))
return visitUndefValue(*UV);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (CE->getOpcode() == Instruction::IntToPtr)
return unknown(); // clueless
+ if (CE->getOpcode() == Instruction::GetElementPtr)
+ return visitGEPOperator(cast<GEPOperator>(*CE));
+ }
DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V
<< '\n');
@@ -414,8 +422,21 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) {
// handle strdup-like functions separately
if (FnData->AllocTy == StrDupLike) {
- // TODO
- return unknown();
+ APInt Size(IntTyBits, GetStringLength(CS.getArgument(0)));
+ if (!Size)
+ return unknown();
+
+ // strndup limits strlen
+ if (FnData->FstParam > 0) {
+ ConstantInt *Arg= dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam));
+ if (!Arg)
+ return unknown();
+
+ APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits);
+ if (Size.ugt(MaxSize))
+ Size = MaxSize + 1;
+ }
+ return std::make_pair(Size, Zero);
}
ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam));
@@ -512,8 +533,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const TargetData *TD,
LLVMContext &Context)
-: TD(TD), Context(Context), Builder(Context, TargetFolder(TD)),
-Visitor(TD, Context) {
+: TD(TD), Context(Context), Builder(Context, TargetFolder(TD)) {
IntTy = TD->getIntPtrType(Context);
Zero = ConstantInt::get(IntTy, 0);
}
@@ -538,6 +558,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
+ ObjectSizeOffsetVisitor Visitor(TD, Context);
SizeOffsetType Const = Visitor.compute(V);
if (Visitor.bothKnown(Const))
return std::make_pair(ConstantInt::get(Context, Const.first),
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 7fb154d..059e574 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -227,13 +227,18 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
// Otherwise if the two calls don't interact (e.g. InstCS is readnone)
// keep scanning.
- break;
+ continue;
default:
return MemDepResult::getClobber(Inst);
}
}
+
+ // If we could not obtain a pointer for the instruction and the instruction
+ // touches memory then assume that this is a dependency.
+ if (MR != AliasAnalysis::NoModRef)
+ return MemDepResult::getClobber(Inst);
}
-
+
// No dependence found. If this is the entry block of the function, it is
// unknown, otherwise it is non-local.
if (BB != &BB->getParent()->getEntryBlock())
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 5f4458b..868f483 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -262,22 +262,6 @@ Region::const_block_node_iterator Region::block_node_end() const {
return GraphTraits<FlatIt<const Region*> >::nodes_end(this);
}
-Region::block_iterator Region::block_begin() {
- return block_node_begin();
-}
-
-Region::block_iterator Region::block_end() {
- return block_node_end();
-}
-
-Region::const_block_iterator Region::block_begin() const {
- return block_node_begin();
-}
-
-Region::const_block_iterator Region::block_end() const {
- return block_node_end();
-}
-
Region::element_iterator Region::element_begin() {
return GraphTraits<Region*>::nodes_begin(this);
}
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index f0f3b1c..a654648 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -5370,6 +5370,12 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
SqrtTerm *= B;
SqrtTerm -= Four * (A * C);
+ if (SqrtTerm.isNegative()) {
+ // The loop is provably infinite.
+ const SCEV *CNC = SE.getCouldNotCompute();
+ return std::make_pair(CNC, CNC);
+ }
+
// Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
// integer value or else APInt::sqrt() will assert.
APInt SqrtVal(SqrtTerm.sqrt());
diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp
index 68873e2..5cfc810 100644
--- a/lib/Archive/ArchiveReader.cpp
+++ b/lib/Archive/ArchiveReader.cpp
@@ -82,14 +82,9 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error)
ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At;
At += sizeof(ArchiveMemberHeader);
- // Extract the size and determine if the file is
- // compressed or not (negative length).
int flags = 0;
int MemberSize = atoi(Hdr->size);
- if (MemberSize < 0) {
- flags |= ArchiveMember::CompressedFlag;
- MemberSize = -MemberSize;
- }
+ assert(MemberSize >= 0);
// Check the size of the member for sanity
if (At + MemberSize > End) {
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index 9ef2943..ec6b4b8 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -204,7 +204,6 @@ Archive::writeMember(
std::ofstream& ARFile,
bool CreateSymbolTable,
bool TruncateNames,
- bool ShouldCompress,
std::string* ErrMsg
) {
@@ -349,7 +348,7 @@ Archive::writeSymbolTable(std::ofstream& ARFile) {
// table, flattening the file names (no directories, 15 chars max) and
// compressing each archive member.
bool
-Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
+Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames,
std::string* ErrMsg)
{
// Make sure they haven't opened up the file, not loaded it,
@@ -394,7 +393,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
// builds the symbol table, symTab.
for (MembersList::iterator I = begin(), E = end(); I != E; ++I) {
if (writeMember(*I, ArchiveFile, CreateSymbolTable,
- TruncateNames, Compress, ErrMsg)) {
+ TruncateNames, ErrMsg)) {
TmpArchive.eraseFromDisk();
ArchiveFile.close();
return true;
@@ -446,7 +445,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
// compatibility with other ar(1) implementations as well as allowing the
// archive to store both native .o and LLVM .bc files, both indexed.
if (foreignST) {
- if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) {
+ if (writeMember(*foreignST, FinalFile, false, false, ErrMsg)) {
FinalFile.close();
TmpArchive.eraseFromDisk();
return true;
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 670c1bb..e045804 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -456,11 +456,12 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(private);
KEYWORD(linker_private);
KEYWORD(linker_private_weak);
- KEYWORD(linker_private_weak_def_auto);
+ KEYWORD(linker_private_weak_def_auto); // FIXME: For backwards compatibility.
KEYWORD(internal);
KEYWORD(available_externally);
KEYWORD(linkonce);
KEYWORD(linkonce_odr);
+ KEYWORD(linkonce_odr_auto_hide);
KEYWORD(weak);
KEYWORD(weak_odr);
KEYWORD(appending);
@@ -553,6 +554,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(naked);
KEYWORD(nonlazybind);
KEYWORD(address_safety);
+ KEYWORD(ia_nsdialect);
KEYWORD(type);
KEYWORD(opaque);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 095b7c5..a9c7e98 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -184,12 +184,13 @@ bool LLParser::ParseTopLevelEntities() {
case lltok::kw_private: // OptionalLinkage
case lltok::kw_linker_private: // OptionalLinkage
case lltok::kw_linker_private_weak: // OptionalLinkage
- case lltok::kw_linker_private_weak_def_auto: // OptionalLinkage
+ case lltok::kw_linker_private_weak_def_auto: // FIXME: backwards compat.
case lltok::kw_internal: // OptionalLinkage
case lltok::kw_weak: // OptionalLinkage
case lltok::kw_weak_odr: // OptionalLinkage
case lltok::kw_linkonce: // OptionalLinkage
case lltok::kw_linkonce_odr: // OptionalLinkage
+ case lltok::kw_linkonce_odr_auto_hide: // OptionalLinkage
case lltok::kw_appending: // OptionalLinkage
case lltok::kw_dllexport: // OptionalLinkage
case lltok::kw_common: // OptionalLinkage
@@ -576,8 +577,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
Linkage != GlobalValue::InternalLinkage &&
Linkage != GlobalValue::PrivateLinkage &&
Linkage != GlobalValue::LinkerPrivateLinkage &&
- Linkage != GlobalValue::LinkerPrivateWeakLinkage &&
- Linkage != GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+ Linkage != GlobalValue::LinkerPrivateWeakLinkage)
return Error(LinkageLoc, "invalid linkage type for alias");
Constant *Aliasee;
@@ -962,6 +962,7 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) {
case lltok::kw_naked: Attrs |= Attribute::Naked; break;
case lltok::kw_nonlazybind: Attrs |= Attribute::NonLazyBind; break;
case lltok::kw_address_safety: Attrs |= Attribute::AddressSafety; break;
+ case lltok::kw_ia_nsdialect: Attrs |= Attribute::IANSDialect; break;
case lltok::kw_alignstack: {
unsigned Alignment;
@@ -989,12 +990,12 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) {
/// ::= 'private'
/// ::= 'linker_private'
/// ::= 'linker_private_weak'
-/// ::= 'linker_private_weak_def_auto'
/// ::= 'internal'
/// ::= 'weak'
/// ::= 'weak_odr'
/// ::= 'linkonce'
/// ::= 'linkonce_odr'
+/// ::= 'linkonce_odr_auto_hide'
/// ::= 'available_externally'
/// ::= 'appending'
/// ::= 'dllexport'
@@ -1011,14 +1012,15 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
case lltok::kw_linker_private_weak:
Res = GlobalValue::LinkerPrivateWeakLinkage;
break;
- case lltok::kw_linker_private_weak_def_auto:
- Res = GlobalValue::LinkerPrivateWeakDefAutoLinkage;
- break;
case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break;
case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break;
case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break;
case lltok::kw_linkonce: Res = GlobalValue::LinkOnceAnyLinkage; break;
case lltok::kw_linkonce_odr: Res = GlobalValue::LinkOnceODRLinkage; break;
+ case lltok::kw_linkonce_odr_auto_hide:
+ case lltok::kw_linker_private_weak_def_auto: // FIXME: For backwards compat.
+ Res = GlobalValue::LinkOnceODRAutoHideLinkage;
+ break;
case lltok::kw_available_externally:
Res = GlobalValue::AvailableExternallyLinkage;
break;
@@ -2652,11 +2654,11 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
case GlobalValue::PrivateLinkage:
case GlobalValue::LinkerPrivateLinkage:
case GlobalValue::LinkerPrivateWeakLinkage:
- case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
case GlobalValue::InternalLinkage:
case GlobalValue::AvailableExternallyLinkage:
case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
case GlobalValue::DLLExportLinkage:
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 0461e7b..9fd63f2 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -37,8 +37,10 @@ namespace lltok {
kw_global, kw_constant,
kw_private, kw_linker_private, kw_linker_private_weak,
- kw_linker_private_weak_def_auto, kw_internal,
- kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending,
+ kw_linker_private_weak_def_auto, // FIXME: For backwards compatibility.
+ kw_internal,
+ kw_linkonce, kw_linkonce_odr, kw_linkonce_odr_auto_hide,
+ kw_weak, kw_weak_odr, kw_appending,
kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
kw_default, kw_hidden, kw_protected,
kw_unnamed_addr,
@@ -105,6 +107,7 @@ namespace lltok {
kw_naked,
kw_nonlazybind,
kw_address_safety,
+ kw_ia_nsdialect,
kw_type,
kw_opaque,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 4ffee38..65fd52e 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -89,7 +89,7 @@ static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) {
case 12: return GlobalValue::AvailableExternallyLinkage;
case 13: return GlobalValue::LinkerPrivateLinkage;
case 14: return GlobalValue::LinkerPrivateWeakLinkage;
- case 15: return GlobalValue::LinkerPrivateWeakDefAutoLinkage;
+ case 15: return GlobalValue::LinkOnceODRAutoHideLinkage;
}
}
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 5b1725f..1d2dfc3 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -365,7 +365,7 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) {
case GlobalValue::AvailableExternallyLinkage: return 12;
case GlobalValue::LinkerPrivateLinkage: return 13;
case GlobalValue::LinkerPrivateWeakLinkage: return 14;
- case GlobalValue::LinkerPrivateWeakDefAutoLinkage: return 15;
+ case GlobalValue::LinkOnceODRAutoHideLinkage: return 15;
}
llvm_unreachable("Invalid linkage");
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index b48b5af..7364f42 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -220,16 +220,16 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
case GlobalValue::CommonLinkage:
case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
case GlobalValue::LinkerPrivateWeakLinkage:
- case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
if (MAI->getWeakDefDirective() != 0) {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
if ((GlobalValue::LinkageTypes)Linkage !=
- GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+ GlobalValue::LinkOnceODRAutoHideLinkage)
// .weak_definition _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
else
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index d231665..d30e5bb 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains support for writing dwarf compile unit.
+// This file contains support for constructing a dwarf compile unit.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index d240389..2e189ad 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -61,6 +61,7 @@ add_llvm_library(LLVMCodeGen
MachineSSAUpdater.cpp
MachineScheduler.cpp
MachineSink.cpp
+ MachineTraceMetrics.cpp
MachineVerifier.cpp
OcamlGC.cpp
OptimizePHIs.cpp
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 9840a40..f9347ef 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -17,12 +17,14 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "early-ifcvt"
+#include "MachineTraceMetrics.h"
#include "llvm/Function.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -30,6 +32,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -48,7 +51,10 @@ BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden,
static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden,
cl::desc("Turn all knobs to 11"));
-typedef SmallSetVector<MachineBasicBlock*, 8> BlockSetVector;
+STATISTIC(NumDiamondsSeen, "Number of diamonds");
+STATISTIC(NumDiamondsConv, "Number of diamonds converted");
+STATISTIC(NumTrianglesSeen, "Number of triangles");
+STATISTIC(NumTrianglesConv, "Number of triangles converted");
//===----------------------------------------------------------------------===//
// SSAIfConv
@@ -94,6 +100,12 @@ public:
/// equal to Tail.
bool isTriangle() const { return TBB == Tail || FBB == Tail; }
+ /// Returns the Tail predecessor for the True side.
+ MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; }
+
+ /// Returns the Tail predecessor for the False side.
+ MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; }
+
/// Information about each phi in the Tail block.
struct PHIInfo {
MachineInstr *PHI;
@@ -132,6 +144,12 @@ private:
/// Find a valid insertion point in Head.
bool findInsertionPoint();
+ /// Replace PHI instructions in Tail with selects.
+ void replacePHIInstrs();
+
+ /// Insert selects and rewrite PHI operands to use them.
+ void rewritePHIOperands();
+
public:
/// runOnMachineFunction - Initialize per-function data structures.
void runOnMachineFunction(MachineFunction &MF) {
@@ -335,11 +353,7 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1)
return false;
- // We could support additional Tail predecessors by updating phis instead of
- // eliminating them. Let's see an example where it matters first.
Tail = Succ0->succ_begin()[0];
- if (Tail->pred_size() != 2)
- return false;
// This is not a triangle.
if (Tail != Succ1) {
@@ -389,8 +403,8 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
// Any phis in the tail block must be convertible to selects.
PHIs.clear();
- MachineBasicBlock *TPred = TBB == Tail ? Head : TBB;
- MachineBasicBlock *FPred = FBB == Tail ? Head : FBB;
+ MachineBasicBlock *TPred = getTPred();
+ MachineBasicBlock *FPred = getFPred();
for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
I != E && I->isPHI(); ++I) {
PHIs.push_back(&*I);
@@ -426,24 +440,18 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
if (!findInsertionPoint())
return false;
+ if (isTriangle())
+ ++NumTrianglesSeen;
+ else
+ ++NumDiamondsSeen;
return true;
}
-
-/// convertIf - Execute the if conversion after canConvertIf has determined the
-/// feasibility.
-///
-/// Any basic blocks erased will be added to RemovedBlocks.
-///
-void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
- assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
-
- // Move all instructions into Head, except for the terminators.
- if (TBB != Tail)
- Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
- if (FBB != Tail)
- Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
-
+/// replacePHIInstrs - Completely replace PHI instructions with selects.
+/// This is possible when the only Tail predecessors are the if-converted
+/// blocks.
+void SSAIfConv::replacePHIInstrs() {
+ assert(Tail->pred_size() == 2 && "Cannot replace PHIs");
MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
assert(FirstTerm != Head->end() && "No terminators");
DebugLoc HeadDL = FirstTerm->getDebugLoc();
@@ -459,6 +467,66 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
PI.PHI->eraseFromParent();
PI.PHI = 0;
}
+}
+
+/// rewritePHIOperands - When there are additional Tail predecessors, insert
+/// select instructions in Head and rewrite PHI operands to use the selects.
+/// Keep the PHI instructions in Tail to handle the other predecessors.
+void SSAIfConv::rewritePHIOperands() {
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ unsigned PHIDst = PI.PHI->getOperand(0).getReg();
+ unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm));
+
+ // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
+ for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {
+ MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB();
+ if (MBB == getTPred()) {
+ PI.PHI->getOperand(i-1).setMBB(Head);
+ PI.PHI->getOperand(i-2).setReg(DstReg);
+ } else if (MBB == getFPred()) {
+ PI.PHI->RemoveOperand(i-1);
+ PI.PHI->RemoveOperand(i-2);
+ }
+ }
+ DEBUG(dbgs() << " --> " << *PI.PHI);
+ }
+}
+
+/// convertIf - Execute the if conversion after canConvertIf has determined the
+/// feasibility.
+///
+/// Any basic blocks erased will be added to RemovedBlocks.
+///
+void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
+ assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
+
+ // Update statistics.
+ if (isTriangle())
+ ++NumTrianglesConv;
+ else
+ ++NumDiamondsConv;
+
+ // Move all instructions into Head, except for the terminators.
+ if (TBB != Tail)
+ Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
+ if (FBB != Tail)
+ Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
+
+ // Are there extra Tail predecessors?
+ bool ExtraPreds = Tail->pred_size() != 2;
+ if (ExtraPreds)
+ rewritePHIOperands();
+ else
+ replacePHIInstrs();
// Fix up the CFG, temporarily leave Head without any successors.
Head->removeSuccessor(TBB);
@@ -470,6 +538,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
// Fix up Head's terminators.
// It should become a single branch or a fallthrough.
+ DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
TII->RemoveBranch(*Head);
// Erase the now empty conditional blocks. It is likely that Head can fall
@@ -484,7 +553,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
}
assert(Head->succ_empty() && "Additional head successors?");
- if (Head->isLayoutSuccessor(Tail)) {
+ if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
// Splice Tail onto the end of Head.
DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber()
<< " into head BB#" << Head->getNumber() << '\n');
@@ -512,9 +581,12 @@ namespace {
class EarlyIfConverter : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const MCSchedModel *SchedModel;
MachineRegisterInfo *MRI;
MachineDominatorTree *DomTree;
MachineLoopInfo *Loops;
+ MachineTraceMetrics *Traces;
+ MachineTraceMetrics::Ensemble *MinInstr;
SSAIfConv IfConv;
public:
@@ -527,6 +599,8 @@ private:
bool tryConvertIf(MachineBasicBlock*);
void updateDomTree(ArrayRef<MachineBasicBlock*> Removed);
void updateLoops(ArrayRef<MachineBasicBlock*> Removed);
+ void invalidateTraces();
+ bool shouldConvertIf();
};
} // end anonymous namespace
@@ -537,6 +611,7 @@ INITIALIZE_PASS_BEGIN(EarlyIfConverter,
"early-ifcvt", "Early If Converter", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
INITIALIZE_PASS_END(EarlyIfConverter,
"early-ifcvt", "Early If Converter", false, false)
@@ -546,6 +621,8 @@ void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineTraceMetrics>();
+ AU.addPreserved<MachineTraceMetrics>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -576,12 +653,117 @@ void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) {
Loops->removeBlock(Removed[i]);
}
+/// Invalidate MachineTraceMetrics before if-conversion.
+void EarlyIfConverter::invalidateTraces() {
+ Traces->verifyAnalysis();
+ Traces->invalidate(IfConv.Head);
+ Traces->invalidate(IfConv.Tail);
+ Traces->invalidate(IfConv.TBB);
+ Traces->invalidate(IfConv.FBB);
+ Traces->verifyAnalysis();
+}
+
+// Adjust cycles with downward saturation.
+static unsigned adjCycles(unsigned Cyc, int Delta) {
+ if (Delta < 0 && Cyc + Delta > Cyc)
+ return 0;
+ return Cyc + Delta;
+}
+
+/// Apply cost model and heuristics to the if-conversion in IfConv.
+/// Return true if the conversion is a good idea.
+///
+bool EarlyIfConverter::shouldConvertIf() {
+ // Stress testing mode disables all cost considerations.
+ if (Stress)
+ return true;
+
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+
+ MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
+ MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
+ DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);
+ unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),
+ FBBTrace.getCriticalPath());
+
+ // Set a somewhat arbitrary limit on the critical path extension we accept.
+ unsigned CritLimit = SchedModel->MispredictPenalty/2;
+
+ // If-conversion only makes sense when there is unexploited ILP. Compute the
+ // maximum-ILP resource length of the trace after if-conversion. Compare it
+ // to the shortest critical path.
+ SmallVector<const MachineBasicBlock*, 1> ExtraBlocks;
+ if (IfConv.TBB != IfConv.Tail)
+ ExtraBlocks.push_back(IfConv.TBB);
+ unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks);
+ DEBUG(dbgs() << "Resource length " << ResLength
+ << ", minimal critical path " << MinCrit << '\n');
+ if (ResLength > MinCrit + CritLimit) {
+ DEBUG(dbgs() << "Not enough available ILP.\n");
+ return false;
+ }
+
+ // Assume that the depth of the first head terminator will also be the depth
+ // of the select instruction inserted, as determined by the flag dependency.
+ // TBB / FBB data dependencies may delay the select even more.
+ MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);
+ unsigned BranchDepth =
+ HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth;
+ DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
+
+ // Look at all the tail phis, and compute the critical path extension caused
+ // by inserting select instructions.
+ MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
+ for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
+ SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
+ unsigned Slack = TailTrace.getInstrSlack(PI.PHI);
+ unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth;
+ DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
+
+ // The condition is pulled into the critical path.
+ unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);
+ if (CondDepth > MaxDepth) {
+ unsigned Extra = CondDepth - MaxDepth;
+ DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The TBB value is pulled into the critical path.
+ unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles);
+ if (TDepth > MaxDepth) {
+ unsigned Extra = TDepth - MaxDepth;
+ DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The FBB value is pulled into the critical path.
+ unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles);
+ if (FDepth > MaxDepth) {
+ unsigned Extra = FDepth - MaxDepth;
+ DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
/// Attempt repeated if-conversion on MBB, return true if successful.
///
bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
bool Changed = false;
- while (IfConv.canConvertIf(MBB)) {
+ while (IfConv.canConvertIf(MBB) && shouldConvertIf()) {
// If-convert MBB and update analyses.
+ invalidateTraces();
SmallVector<MachineBasicBlock*, 4> RemovedBlocks;
IfConv.convertIf(RemovedBlocks);
Changed = true;
@@ -597,9 +779,12 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
<< ((Value*)MF.getFunction())->getName() << '\n');
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
+ SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel;
MRI = &MF.getRegInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ Traces = &getAnalysis<MachineTraceMetrics>();
+ MinInstr = 0;
bool Changed = false;
IfConv.runOnMachineFunction(MF);
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index b14afc2..7a17331 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -131,13 +131,16 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
} else {
TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
MI->getOperand(2).isKill());
+
+ // Implicitly define DstReg for subsequent uses.
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+ CopyMI->addRegisterDefined(DstReg);
+
// Transfer the kill/dead flags, if needed.
if (MI->getOperand(0).isDead())
TransferDeadFlag(MI, DstSubReg, TRI);
- DEBUG({
- MachineBasicBlock::iterator dMI = MI;
- dbgs() << "subreg: " << *(--dMI);
- });
+ DEBUG(dbgs() << "subreg: " << *CopyMI);
}
DEBUG(dbgs() << '\n');
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 01077db..0a795e6 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -160,7 +160,7 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
valnos.pop_back();
} while (!valnos.empty() && valnos.back()->isUnused());
} else {
- ValNo->setIsUnused(true);
+ ValNo->markUnused();
}
}
@@ -667,9 +667,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
}
}
- // Merge the relevant flags.
- V2->mergeFlags(V1);
-
// Now that V1 is dead, remove it.
markValNoForDeletion(V1);
@@ -737,9 +734,7 @@ void LiveInterval::print(raw_ostream &OS) const {
} else {
OS << vni->def;
if (vni->isPHIDef())
- OS << "-phidef";
- if (vni->hasPHIKill())
- OS << "-phikill";
+ OS << "-phi";
}
}
}
@@ -827,14 +822,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
MachineOperand &MO = RI.getOperand();
MachineInstr *MI = MO.getParent();
++RI;
- if (MO.isUse() && MO.isUndef())
- continue;
// DBG_VALUE instructions should have been eliminated earlier.
- SlotIndex Idx = LIS.getInstructionIndex(MI);
- Idx = Idx.getRegSlot(MO.isUse());
- const VNInfo *VNI = LI.getVNInfoAt(Idx);
- // FIXME: We should be able to assert(VNI) here, but the coalescer leaves
- // dangling defs around.
+ LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI));
+ const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+ // In the case of an <undef> use that isn't tied to any def, VNI will be
+ // NULL. If the use is tied to a def, VNI will be the defined value.
if (!VNI)
continue;
MO.setReg(LIV[getEqClass(VNI)]->reg);
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 819707f..d0f8ae1 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -27,6 +27,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -38,7 +39,13 @@
#include <cmath>
using namespace llvm;
+// Switch to the new experimental algorithm for computing live intervals.
+static cl::opt<bool>
+NewLiveIntervals("new-live-intervals", cl::Hidden,
+ cl::desc("Use new algorithm forcomputing live intervals"));
+
char LiveIntervals::ID = 0;
+char &llvm::LiveIntervalsID = LiveIntervals::ID;
INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
"Live Interval Analysis", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
@@ -105,7 +112,19 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
AllocatableRegs = TRI->getAllocatableSet(fn);
ReservedRegs = TRI->getReservedRegs(fn);
- computeIntervals();
+ // Allocate space for all virtual registers.
+ VirtRegIntervals.resize(MRI->getNumVirtRegs());
+
+ if (NewLiveIntervals) {
+ // This is the new way of computing live intervals.
+ // It is independent of LiveVariables, and it can run at any time.
+ computeVirtRegs();
+ computeRegMasks();
+ } else {
+ // This is the old way of computing live intervals.
+ // It depends on LiveVariables.
+ computeIntervals();
+ }
computeLiveInRegUnits();
DEBUG(dump());
@@ -238,7 +257,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// new valno in the killing blocks.
assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
DEBUG(dbgs() << " phi-join");
- ValNo->setHasPHIKill(true);
} else {
// Iterate over all of the blocks that the variable is completely
// live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
@@ -266,7 +284,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
assert(getInstructionFromIndex(Start) == 0 &&
"PHI def index points at actual instruction.");
ValNo = interval.getNextValue(Start, VNInfoAllocator);
- ValNo->setIsPHIDef(true);
}
LiveRange LR(Start, killIdx, ValNo);
interval.addRange(LR);
@@ -340,7 +357,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
SlotIndex killIndex = getMBBEndIdx(mbb);
LiveRange LR(defIndex, killIndex, ValNo);
interval.addRange(LR);
- ValNo->setHasPHIKill(true);
DEBUG(dbgs() << " phi-join +" << LR);
} else {
llvm_unreachable("Multiply defined register");
@@ -442,6 +458,49 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
}
+/// computeVirtRegInterval - Compute the live interval of a virtual register,
+/// based on defs and uses.
+void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) {
+ assert(LRCalc && "LRCalc not initialized.");
+ assert(LI->empty() && "Should only compute empty intervals.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ LRCalc->createDeadDefs(LI);
+ LRCalc->extendToUses(LI);
+}
+
+void LiveIntervals::computeVirtRegs() {
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *LI = createInterval(Reg);
+ VirtRegIntervals[Reg] = LI;
+ computeVirtRegInterval(LI);
+ }
+}
+
+void LiveIntervals::computeRegMasks() {
+ RegMaskBlocks.resize(MF->getNumBlockIDs());
+
+ // Find all instructions with regmask operands.
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+ RMB.first = RegMaskSlots.size();
+ for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end();
+ MI != ME; ++MI)
+ for (MIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isRegMask())
+ continue;
+ RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot());
+ RegMaskBits.push_back(MO->getRegMask());
+ }
+ // Compute the number of register mask instructions in this block.
+ RMB.second = RegMaskSlots.size() - RMB.first;;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Register Unit Liveness
//===----------------------------------------------------------------------===//
@@ -648,7 +707,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
continue;
if (VNI->isPHIDef()) {
// This is a dead PHI. Remove it.
- VNI->setIsUnused(true);
+ VNI->markUnused();
NewLI.removeRange(*LII);
DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
CanSeparate = true;
@@ -720,6 +779,25 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
return MBB1 == MBB2 ? MBB1 : NULL;
}
+bool
+LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ const VNInfo *PHI = *I;
+ if (PHI->isUnused() || !PHI->isPHIDef())
+ continue;
+ const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def);
+ // Conservatively return true instead of scanning huge predecessor lists.
+ if (PHIMBB->pred_size() > 100)
+ return true;
+ for (MachineBasicBlock::const_pred_iterator
+ PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI)
+ if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI)))
+ return true;
+ }
+ return false;
+}
+
float
LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
// Limit the loop depth ridiculousness.
@@ -744,7 +822,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
VNInfo* VN = Interval.getNextValue(
SlotIndex(getInstructionIndex(startInst).getRegSlot()),
getVNInfoAllocator());
- VN->setHasPHIKill(true);
LiveRange LR(
SlotIndex(getInstructionIndex(startInst).getRegSlot()),
getMBBEndIdx(startInst->getParent()), VN);
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 9384075..d828f25 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -54,8 +54,7 @@ void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) {
.getRegSlot(I.getOperand().isEarlyClobber());
// Create the def in LI. This may find an existing def.
- VNInfo *VNI = LI->createDeadDef(Idx, *Alloc);
- VNI->setIsPHIDef(MI->isPHI());
+ LI->createDeadDef(Idx, *Alloc);
}
}
@@ -320,7 +319,6 @@ void LiveRangeCalc::updateSSA() {
SlotIndex Start, End;
tie(Start, End) = Indexes->getMBBRange(MBB);
VNInfo *VNI = I->LI->getNextValue(Start, *Alloc);
- VNI->setIsPHIDef(true);
I->Value = VNI;
// This block is done, we know the final value.
I->DomNode = 0;
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 896fdbf..b4ce9aa 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -239,6 +239,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
// Collect virtual registers to be erased after MI is gone.
SmallVector<unsigned, 8> RegsToErase;
+ bool ReadsPhysRegs = false;
// Check for live intervals that may shrink
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
@@ -246,8 +247,12 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
if (!MOI->isReg())
continue;
unsigned Reg = MOI->getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Check if MI reads any unreserved physregs.
+ if (Reg && MOI->readsReg() && !LIS.isReserved(Reg))
+ ReadsPhysRegs = true;
continue;
+ }
LiveInterval &LI = LIS.getInterval(Reg);
// Shrink read registers, unless it is likely to be expensive and
@@ -271,11 +276,30 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
}
}
- if (TheDelegate)
- TheDelegate->LRE_WillEraseInstruction(MI);
- LIS.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- ++NumDCEDeleted;
+ // Currently, we don't support DCE of physreg live ranges. If MI reads
+ // any unreserved physregs, don't erase the instruction, but turn it into
+ // a KILL instead. This way, the physreg live ranges don't end up
+ // dangling.
+ // FIXME: It would be better to have something like shrinkToUses() for
+ // physregs. That could potentially enable more DCE and it would free up
+ // the physreg. It would not happen often, though.
+ if (ReadsPhysRegs) {
+ MI->setDesc(TII.get(TargetOpcode::KILL));
+ // Remove all operands that aren't physregs.
+ for (unsigned i = MI->getNumOperands(); i; --i) {
+ const MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ continue;
+ MI->RemoveOperand(i-1);
+ }
+ DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
+ } else {
+ if (TheDelegate)
+ TheDelegate->LRE_WillEraseInstruction(MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ ++NumDCEDeleted;
+ }
// Erase any virtregs that are now empty and unused. There may be <undef>
// uses around. Keep the empty live range in that case.
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index ecc1e95..cf13dbd 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -109,7 +109,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
assert(N->getParent() != 0 && "machine instruction not in a basic block");
// Remove from the use/def lists.
- N->RemoveRegOperandsFromUseLists();
+ if (MachineFunction *MF = N->getParent()->getParent())
+ N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
N->setParent(0);
@@ -310,8 +311,11 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
if (!succ_empty()) {
if (Indexes) OS << '\t';
OS << " Successors according to CFG:";
- for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
+ for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
OS << " BB#" << (*SI)->getNumber();
+ if (!Weights.empty())
+ OS << '(' << *getWeightIterator(SI) << ')';
+ }
OS << '\n';
}
}
@@ -477,18 +481,42 @@ MachineBasicBlock::removeSuccessor(succ_iterator I) {
void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
MachineBasicBlock *New) {
- uint32_t weight = 0;
- succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old);
+ if (Old == New)
+ return;
- // If Weight list is empty it means we don't use it (disabled optimization).
- if (!Weights.empty()) {
- weight_iterator WI = getWeightIterator(SI);
- weight = *WI;
+ succ_iterator E = succ_end();
+ succ_iterator NewI = E;
+ succ_iterator OldI = E;
+ for (succ_iterator I = succ_begin(); I != E; ++I) {
+ if (*I == Old) {
+ OldI = I;
+ if (NewI != E)
+ break;
+ }
+ if (*I == New) {
+ NewI = I;
+ if (OldI != E)
+ break;
+ }
}
+ assert(OldI != E && "Old is not a successor of this block");
+ Old->removePredecessor(this);
- // Update the successor information.
- removeSuccessor(SI);
- addSuccessor(New, weight);
+ // If New isn't already a successor, let it take Old's place.
+ if (NewI == E) {
+ New->addPredecessor(this);
+ *OldI = New;
+ return;
+ }
+
+ // New is already a successor.
+ // Update its weight instead of adding a duplicate edge.
+ if (!Weights.empty()) {
+ weight_iterator OldWI = getWeightIterator(OldI);
+ *getWeightIterator(NewI) += *OldWI;
+ Weights.erase(OldWI);
+ }
+ Successors.erase(OldI);
}
void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
@@ -507,14 +535,13 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
while (!fromMBB->succ_empty()) {
MachineBasicBlock *Succ = *fromMBB->succ_begin();
- uint32_t weight = 0;
-
+ uint32_t Weight = 0;
// If Weight list is empty it means we don't use it (disabled optimization).
if (!fromMBB->Weights.empty())
- weight = *fromMBB->Weights.begin();
+ Weight = *fromMBB->Weights.begin();
- addSuccessor(Succ, weight);
+ addSuccessor(Succ, Weight);
fromMBB->removeSuccessor(Succ);
}
}
@@ -526,7 +553,10 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
while (!fromMBB->succ_empty()) {
MachineBasicBlock *Succ = *fromMBB->succ_begin();
- addSuccessor(Succ);
+ uint32_t Weight = 0;
+ if (!fromMBB->Weights.empty())
+ Weight = *fromMBB->Weights.begin();
+ addSuccessor(Succ, Weight);
fromMBB->removeSuccessor(Succ);
// Fix up any PHI nodes in the successor.
@@ -540,9 +570,12 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
}
}
+bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
+ return std::find(pred_begin(), pred_end(), MBB) != pred_end();
+}
+
bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
- const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB);
- return I != Successors.end();
+ return std::find(succ_begin(), succ_end(), MBB) != succ_end();
}
bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
@@ -909,12 +942,11 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
/// getSuccWeight - Return weight of the edge from this block to MBB.
///
-uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const {
+uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const {
if (Weights.empty())
return 0;
- const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
- return *getWeightIterator(I);
+ return *getWeightIterator(Succ);
}
/// getWeightIterator - Return wight iterator corresonding to the I successor
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 5a15f92..c4dca2c 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -985,8 +985,22 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// boiler plate.
Cond.clear();
MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
- if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond))
+ if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // If PrevBB has a two-way branch, try to re-order the branches
+ // such that we branch to the successor with higher weight first.
+ if (TBB && !Cond.empty() && FBB &&
+ MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
+ !TII->ReverseBranchCondition(Cond)) {
+ DEBUG(dbgs() << "Reverse order of the two branches: "
+ << getBlockName(PrevBB) << "\n");
+ DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
+ << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->RemoveBranch(*PrevBB);
+ TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
+ }
PrevBB->updateTerminator();
+ }
}
// Fixup the last block.
@@ -997,29 +1011,63 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Walk through the backedges of the function now that we have fully laid out
// the basic blocks and align the destination of each backedge. We don't rely
- // on the loop info here so that we can align backedges in unnatural CFGs and
- // backedges that were introduced purely because of the loop rotations done
- // during this layout pass.
- // FIXME: This isn't quite right, we shouldn't align backedges that result
- // from blocks being sunken below the exit block for the function.
+ // exclusively on the loop info here so that we can align backedges in
+ // unnatural CFGs and backedges that were introduced purely because of the
+ // loop rotations done during this layout pass.
if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
return;
unsigned Align = TLI->getPrefLoopAlignment();
if (!Align)
return; // Don't care about loop alignment.
+ if (FunctionChain.begin() == FunctionChain.end())
+ return; // Empty chain.
- SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks;
- for (BlockChain::iterator BI = FunctionChain.begin(),
+ const BranchProbability ColdProb(1, 5); // 20%
+ BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
+ BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
+ for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()),
BE = FunctionChain.end();
BI != BE; ++BI) {
- PreviousBlocks.insert(*BI);
- // Set alignment on the destination of all the back edges in the new
- // ordering.
- for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(),
- SE = (*BI)->succ_end();
- SI != SE; ++SI)
- if (PreviousBlocks.count(*SI))
- (*SI)->setAlignment(Align);
+ // Don't align non-looping basic blocks. These are unlikely to execute
+ // enough times to matter in practice. Note that we'll still handle
+ // unnatural CFGs inside of a natural outer loop (the common case) and
+ // rotated loops.
+ MachineLoop *L = MLI->getLoopFor(*BI);
+ if (!L)
+ continue;
+
+ // If the block is cold relative to the function entry don't waste space
+ // aligning it.
+ BlockFrequency Freq = MBFI->getBlockFreq(*BI);
+ if (Freq < WeightedEntryFreq)
+ continue;
+
+ // If the block is cold relative to its loop header, don't align it
+ // regardless of what edges into the block exist.
+ MachineBasicBlock *LoopHeader = L->getHeader();
+ BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
+ if (Freq < (LoopHeaderFreq * ColdProb))
+ continue;
+
+ // Check for the existence of a non-layout predecessor which would benefit
+ // from aligning this block.
+ MachineBasicBlock *LayoutPred = *llvm::prior(BI);
+
+ // Force alignment if all the predecessors are jumps. We already checked
+ // that the block isn't cold above.
+ if (!LayoutPred->isSuccessor(*BI)) {
+ (*BI)->setAlignment(Align);
+ continue;
+ }
+
+ // Align this block if the layout predecessor's edge into this block is
+ // cold relative to the block. When this is true, othe predecessors make up
+ // all of the hot entries into the block and thus alignment is likely to be
+ // important.
+ BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
+ BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
+ if (LayoutEdgeFreq <= (Freq * ColdProb))
+ (*BI)->setAlignment(Align);
}
}
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 0cc1af0..4479211 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -38,7 +38,7 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
Scale = 1;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, *I);
+ uint32_t Weight = getEdgeWeight(MBB, I);
Sum += Weight;
}
@@ -53,22 +53,30 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
Sum = 0;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, *I);
+ uint32_t Weight = getEdgeWeight(MBB, I);
Sum += Weight / Scale;
}
assert(Sum <= UINT32_MAX);
return Sum;
}
-uint32_t
-MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const {
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+ MachineBasicBlock::const_succ_iterator Dst) const {
uint32_t Weight = Src->getSuccWeight(Dst);
if (!Weight)
return DEFAULT_WEIGHT;
return Weight;
}
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ // This is a linear search. Try to use the const_succ_iterator version when
+ // possible.
+ return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst));
+}
+
bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
MachineBasicBlock *Dst) const {
// Hot probability is at least 4/5 = 80%
@@ -82,7 +90,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
MachineBasicBlock *MaxSucc = 0;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, *I);
+ uint32_t Weight = getEdgeWeight(MBB, I);
if (Weight > MaxWeight) {
MaxWeight = Weight;
MaxSucc = *I;
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 9cfe9ab..896461f 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -215,8 +215,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
if (MO.isDef() &&
(MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
continue;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- PhysRefs.insert(*AI);
+ // Reading constant physregs is ok.
+ if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
if (MO.isDef())
PhysDefs.push_back(Reg);
}
@@ -324,6 +326,29 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI) {
// FIXME: Heuristics that works around the lack the live range splitting.
+ // If CSReg is used at all uses of Reg, CSE should not increase register
+ // pressure of CSReg.
+ bool MayIncreasePressure = true;
+ if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
+ TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MayIncreasePressure = false;
+ SmallPtrSet<MachineInstr*, 8> CSUses;
+ for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ CSUses.insert(Use);
+ }
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ if (!CSUses.count(Use)) {
+ MayIncreasePressure = true;
+ break;
+ }
+ }
+ }
+ if (!MayIncreasePressure) return true;
+
// Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
// an immediate predecessor. We don't want to increase register pressure and
// end up causing other computation to be spilled.
@@ -394,6 +419,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
bool Changed = false;
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
+ SmallVector<unsigned, 2> ImplicitDefsToUpdate;
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
MachineInstr *MI = &*I;
++I;
@@ -463,15 +489,24 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Check if it's profitable to perform this CSE.
bool DoCSE = true;
- unsigned NumDefs = MI->getDesc().getNumDefs();
+ unsigned NumDefs = MI->getDesc().getNumDefs() +
+ MI->getDesc().getNumImplicitDefs();
+
for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
unsigned OldReg = MO.getReg();
unsigned NewReg = CSMI->getOperand(i).getReg();
- if (OldReg == NewReg)
+
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
+ ImplicitDefsToUpdate.push_back(i);
+ if (OldReg == NewReg) {
+ --NumDefs;
continue;
+ }
assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
TargetRegisterInfo::isVirtualRegister(NewReg) &&
@@ -503,6 +538,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
MRI->clearKillFlags(CSEPairs[i].second);
}
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
+ CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
+
if (CrossMBBPhysDef) {
// Add physical register defs now coming in from a predecessor to MBB
// livein list.
@@ -526,6 +566,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
Exps.push_back(MI);
}
CSEPairs.clear();
+ ImplicitDefsToUpdate.clear();
}
return Changed;
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 8dada05..b166849 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -47,55 +47,6 @@ using namespace llvm;
// MachineOperand Implementation
//===----------------------------------------------------------------------===//
-/// AddRegOperandToRegInfo - Add this register operand to the specified
-/// MachineRegisterInfo. If it is null, then the next/prev fields should be
-/// explicitly nulled out.
-void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
- assert(isReg() && "Can only add reg operand to use lists");
-
- // If the reginfo pointer is null, just explicitly null out or next/prev
- // pointers, to ensure they are not garbage.
- if (RegInfo == 0) {
- Contents.Reg.Prev = 0;
- Contents.Reg.Next = 0;
- return;
- }
-
- // Otherwise, add this operand to the head of the registers use/def list.
- MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg());
-
- // For SSA values, we prefer to keep the definition at the start of the list.
- // we do this by skipping over the definition if it is at the head of the
- // list.
- if (*Head && (*Head)->isDef())
- Head = &(*Head)->Contents.Reg.Next;
-
- Contents.Reg.Next = *Head;
- if (Contents.Reg.Next) {
- assert(getReg() == Contents.Reg.Next->getReg() &&
- "Different regs on the same list!");
- Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next;
- }
-
- Contents.Reg.Prev = Head;
- *Head = this;
-}
-
-/// RemoveRegOperandFromRegInfo - Remove this register operand from the
-/// MachineRegisterInfo it is linked with.
-void MachineOperand::RemoveRegOperandFromRegInfo() {
- assert(isOnRegUseList() && "Reg operand is not on a use list");
- // Unlink this from the doubly linked list of operands.
- MachineOperand *NextOp = Contents.Reg.Next;
- *Contents.Reg.Prev = NextOp;
- if (NextOp) {
- assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!");
- NextOp->Contents.Reg.Prev = Contents.Reg.Prev;
- }
- Contents.Reg.Prev = 0;
- Contents.Reg.Next = 0;
-}
-
void MachineOperand::setReg(unsigned Reg) {
if (getReg() == Reg) return; // No change.
@@ -105,9 +56,10 @@ void MachineOperand::setReg(unsigned Reg) {
if (MachineInstr *MI = getParent())
if (MachineBasicBlock *MBB = MI->getParent())
if (MachineFunction *MF = MBB->getParent()) {
- RemoveRegOperandFromRegInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
SmallContents.RegNo = Reg;
- AddRegOperandToRegInfo(&MF->getRegInfo());
+ MRI.addRegOperandToUseList(this);
return;
}
@@ -136,15 +88,36 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
setReg(Reg);
}
+/// Change a def to a use, or a use to a def.
+void MachineOperand::setIsDef(bool Val) {
+ assert(isReg() && "Wrong MachineOperand accessor");
+ assert((!Val || !isDebug()) && "Marking a debug operation as def");
+ if (IsDef == Val)
+ return;
+ // MRI may keep uses and defs in different list positions.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ IsDef = Val;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+ IsDef = Val;
+}
+
/// ChangeToImmediate - Replace this operand with a new immediate operand of
/// the specified value. If an operand is known to be an immediate already,
/// the setImm method should be used.
void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
// If this operand is currently a register operand, and if this is in a
// function, deregister the operand from the register's use/def list.
- if (isReg() && getParent() && getParent()->getParent() &&
- getParent()->getParent()->getParent())
- RemoveRegOperandFromRegInfo();
+ if (isReg() && isOnRegUseList())
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ MF->getRegInfo().removeRegOperandFromUseList(this);
OpKind = MO_Immediate;
Contents.ImmVal = ImmVal;
@@ -156,24 +129,20 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
bool isKill, bool isDead, bool isUndef,
bool isDebug) {
- // If this operand is already a register operand, use setReg to update the
+ MachineRegisterInfo *RegInfo = 0;
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ RegInfo = &MF->getRegInfo();
+ // If this operand is already a register operand, remove it from the
// register's use/def lists.
- if (isReg()) {
- assert(!isEarlyClobber());
- setReg(Reg);
- } else {
- // Otherwise, change this to a register and set the reg#.
- OpKind = MO_Register;
- SmallContents.RegNo = Reg;
-
- // If this operand is embedded in a function, add the operand to the
- // register's use/def list.
- if (MachineInstr *MI = getParent())
- if (MachineBasicBlock *MBB = MI->getParent())
- if (MachineFunction *MF = MBB->getParent())
- AddRegOperandToRegInfo(&MF->getRegInfo());
- }
+ if (RegInfo && isReg())
+ RegInfo->removeRegOperandFromUseList(this);
+ // Change this to a register and set the reg#.
+ OpKind = MO_Register;
+ SmallContents.RegNo = Reg;
+ SubReg = 0;
IsDef = isDef;
IsImp = isImp;
IsKill = isKill;
@@ -182,7 +151,13 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
IsInternalRead = false;
IsEarlyClobber = false;
IsDebug = isDebug;
- SubReg = 0;
+ // Ensure isOnRegUseList() returns false.
+ Contents.Reg.Prev = 0;
+
+ // If this operand is embedded in a function, add the operand to the
+ // register's use/def list.
+ if (RegInfo)
+ RegInfo->addRegOperandToUseList(this);
}
/// isIdenticalTo - Return true if this operand is identical to the specified
@@ -208,6 +183,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
case MachineOperand::MO_FrameIndex:
return getIndex() == Other.getIndex();
case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
case MachineOperand::MO_JumpTableIndex:
return getIndex() == Other.getIndex();
@@ -245,6 +221,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
case MachineOperand::MO_FrameIndex:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
MO.getOffset());
case MachineOperand::MO_JumpTableIndex:
@@ -353,6 +330,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
if (getOffset()) OS << "+" << getOffset();
OS << '>';
break;
+ case MachineOperand::MO_TargetIndex:
+ OS << "<ti#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
case MachineOperand::MO_JumpTableIndex:
OS << "<jt#" << getIndex() << '>';
break;
@@ -650,24 +632,21 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
/// this instruction from their respective use lists. This requires that the
/// operands already be on their use lists.
-void MachineInstr::RemoveRegOperandsFromUseLists() {
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
if (Operands[i].isReg())
- Operands[i].RemoveRegOperandFromRegInfo();
- }
+ MRI.removeRegOperandFromUseList(&Operands[i]);
}
/// AddRegOperandsToUseLists - Add all of the register operands in
/// this instruction from their respective use lists. This requires that the
/// operands not be on their use lists yet.
-void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) {
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
if (Operands[i].isReg())
- Operands[i].AddRegOperandToRegInfo(&RegInfo);
- }
+ MRI.addRegOperandToUseList(&Operands[i]);
}
-
/// addOperand - Add the specified operand to the instruction. If it is an
/// implicit operand, it is added to the end of the operand list. If it is
/// an explicit operand it is added at the end of the explicit operand list
@@ -695,7 +674,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
--OpNo;
if (RegInfo)
- Operands[OpNo].RemoveRegOperandFromRegInfo();
+ RegInfo->removeRegOperandFromUseList(&Operands[OpNo]);
}
}
@@ -712,7 +691,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (Reallocate)
for (unsigned i = 0; i != OpNo; ++i)
if (Operands[i].isReg())
- Operands[i].RemoveRegOperandFromRegInfo();
+ RegInfo->removeRegOperandFromUseList(&Operands[i]);
// Insert the new operand at OpNo.
Operands.insert(Operands.begin() + OpNo, Op);
@@ -723,13 +702,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (Reallocate)
for (unsigned i = 0; i != OpNo; ++i)
if (Operands[i].isReg())
- Operands[i].AddRegOperandToRegInfo(RegInfo);
+ RegInfo->addRegOperandToUseList(&Operands[i]);
// When adding a register operand, tell RegInfo about it.
if (Operands[OpNo].isReg()) {
- // Add the new operand to RegInfo, even when RegInfo is NULL.
- // This will initialize the linked list pointers.
- Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
+ // Ensure isOnRegUseList() returns false, regardless of Op's status.
+ Operands[OpNo].Contents.Reg.Prev = 0;
+ // Add the new operand to RegInfo.
+ if (RegInfo)
+ RegInfo->addRegOperandToUseList(&Operands[OpNo]);
// If the register operand is flagged as early, mark the operand as such.
if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
Operands[OpNo].setIsEarlyClobber(true);
@@ -739,7 +720,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (RegInfo) {
for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) {
assert(Operands[i].isReg() && "Should only be an implicit reg!");
- Operands[i].AddRegOperandToRegInfo(RegInfo);
+ RegInfo->addRegOperandToUseList(&Operands[i]);
}
}
}
@@ -749,12 +730,13 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
///
void MachineInstr::RemoveOperand(unsigned OpNo) {
assert(OpNo < Operands.size() && "Invalid operand number");
+ MachineRegisterInfo *RegInfo = getRegInfo();
// Special case removing the last one.
if (OpNo == Operands.size()-1) {
// If needed, remove from the reg def/use list.
- if (Operands.back().isReg() && Operands.back().isOnRegUseList())
- Operands.back().RemoveRegOperandFromRegInfo();
+ if (RegInfo && Operands.back().isReg() && Operands.back().isOnRegUseList())
+ RegInfo->removeRegOperandFromUseList(&Operands.back());
Operands.pop_back();
return;
@@ -763,11 +745,10 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
// Otherwise, we are removing an interior operand. If we have reginfo to
// update, remove all operands that will be shifted down from their reg lists,
// move everything down, then re-add them.
- MachineRegisterInfo *RegInfo = getRegInfo();
if (RegInfo) {
for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
if (Operands[i].isReg())
- Operands[i].RemoveRegOperandFromRegInfo();
+ RegInfo->removeRegOperandFromUseList(&Operands[i]);
}
}
@@ -776,7 +757,7 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
if (RegInfo) {
for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
if (Operands[i].isReg())
- Operands[i].AddRegOperandToRegInfo(RegInfo);
+ RegInfo->addRegOperandToUseList(&Operands[i]);
}
}
}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 82e1235..5fb938f 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -102,17 +102,9 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
// New virtual register number.
unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
-
- // Add a reg, but keep track of whether the vector reallocated or not.
- const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0);
- void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg];
VRegInfo.grow(Reg);
VRegInfo[Reg].first = RegClass;
RegAllocHints.grow(Reg);
-
- if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
- // The vector reallocated, handle this now.
- HandleVRegListReallocation();
return Reg;
}
@@ -126,21 +118,68 @@ void MachineRegisterInfo::clearVirtRegs() {
VRegInfo.clear();
}
-/// HandleVRegListReallocation - We just added a virtual register to the
-/// VRegInfo info list and it reallocated. Update the use/def lists info
-/// pointers.
-void MachineRegisterInfo::HandleVRegListReallocation() {
- // The back pointers for the vreg lists point into the previous vector.
- // Update them to point to their correct slots.
- for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- MachineOperand *List = VRegInfo[Reg].second;
- if (!List) continue;
- // Update the back-pointer to be accurate once more.
- List->Contents.Reg.Prev = &VRegInfo[Reg].second;
+/// Add MO to the linked list of operands for its register.
+void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
+ assert(!MO->isOnRegUseList() && "Already on list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+
+ // Head points to the first list element.
+ // Next is NULL on the last list element.
+ // Prev pointers are circular, so Head->Prev == Last.
+
+ // Head is NULL for an empty list.
+ if (!Head) {
+ MO->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Next = 0;
+ HeadRef = MO;
+ return;
+ }
+ assert(MO->getReg() == Head->getReg() && "Different regs on the same list!");
+
+ // Insert MO between Last and Head in the circular Prev chain.
+ MachineOperand *Last = Head->Contents.Reg.Prev;
+ assert(Last && "Inconsistent use list");
+ assert(MO->getReg() == Last->getReg() && "Different regs on the same list!");
+ Head->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Prev = Last;
+
+ // Def operands always precede uses. This allows def_iterator to stop early.
+ // Insert def operands at the front, and use operands at the back.
+ if (MO->isDef()) {
+ // Insert def at the front.
+ MO->Contents.Reg.Next = Head;
+ HeadRef = MO;
+ } else {
+ // Insert use at the end.
+ MO->Contents.Reg.Next = 0;
+ Last->Contents.Reg.Next = MO;
}
}
+/// Remove MO from its use-def list.
+void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) {
+ assert(MO->isOnRegUseList() && "Operand not on use list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+ assert(Head && "List already empty");
+
+ // Unlink this from the doubly linked list of operands.
+ MachineOperand *Next = MO->Contents.Reg.Next;
+ MachineOperand *Prev = MO->Contents.Reg.Prev;
+
+ // Prev links are circular, next link is NULL instead of looping back to Head.
+ if (MO == Head)
+ HeadRef = Next;
+ else
+ Prev->Contents.Reg.Next = Next;
+
+ (Next ? Next : Head)->Contents.Reg.Prev = Prev;
+
+ MO->Contents.Reg.Prev = 0;
+ MO->Contents.Reg.Next = 0;
+}
+
/// replaceRegWith - Replace all instances of FromReg with ToReg in the
/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
/// except that it also changes any definitions of the register as well.
@@ -178,13 +217,6 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const {
return &*I;
}
-bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
- use_iterator UI = use_begin(RegNo);
- if (UI == use_end())
- return false;
- return ++UI == use_end();
-}
-
bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
use_nodbg_iterator UI = use_nodbg_begin(RegNo);
if (UI == use_nodbg_end())
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index acb1ee6..076547a 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -42,7 +42,7 @@ MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
}
MachineSSAUpdater::~MachineSSAUpdater() {
- delete &getAvailableVals(AV);
+ delete static_cast<AvailableValsTy*>(AV);
}
/// Initialize - Reset this object to get ready for a new set of SSA
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 1ce546b..bc383cb 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -99,6 +99,16 @@ namespace {
bool PerformTrivialForwardCoalescing(MachineInstr *MI,
MachineBasicBlock *MBB);
};
+
+ // SuccessorSorter - Sort Successors according to their loop depth.
+ struct SuccessorSorter {
+ SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {}
+ bool operator()(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) const {
+ return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
+ }
+ MachineLoopInfo *LI;
+ };
} // end anonymous namespace
char MachineSinking::ID = 0;
@@ -526,8 +536,11 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
// Otherwise, we should look at all the successors and decide which one
// we should sink to.
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- E = MBB->succ_end(); SI != E; ++SI) {
+ // We give successors with smaller loop depth higher priority.
+ SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
+ std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI));
+ for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(),
+ E = Succs.end(); SI != E; ++SI) {
MachineBasicBlock *SuccBlock = *SI;
bool LocalUse = false;
if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
new file mode 100644
index 0000000..1a3aa60
--- /dev/null
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -0,0 +1,1153 @@
+//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-trace-metrics"
+#include "MachineTraceMetrics.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SparseSet.h"
+
+using namespace llvm;
+
+char MachineTraceMetrics::ID = 0;
+char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
+
+INITIALIZE_PASS_BEGIN(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+
+MachineTraceMetrics::MachineTraceMetrics()
+ : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) {
+ std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0);
+}
+
+void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ TII = MF->getTarget().getInstrInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+ ItinData = MF->getTarget().getInstrItineraryData();
+ MRI = &MF->getRegInfo();
+ Loops = &getAnalysis<MachineLoopInfo>();
+ BlockInfo.resize(MF->getNumBlockIDs());
+ return false;
+}
+
+void MachineTraceMetrics::releaseMemory() {
+ MF = 0;
+ BlockInfo.clear();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i) {
+ delete Ensembles[i];
+ Ensembles[i] = 0;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed block information
+//===----------------------------------------------------------------------===//
+//
+// The number of instructions in a basic block and the CPU resources used by
+// those instructions don't depend on any given trace strategy.
+
+/// Compute the resource usage in basic block MBB.
+const MachineTraceMetrics::FixedBlockInfo*
+MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
+ assert(MBB && "No basic block");
+ FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
+ if (FBI->hasResources())
+ return FBI;
+
+ // Compute resource usage in the block.
+ // FIXME: Compute per-functional unit counts.
+ FBI->HasCalls = false;
+ unsigned InstrCount = 0;
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MachineInstr *MI = I;
+ if (MI->isTransient())
+ continue;
+ ++InstrCount;
+ if (MI->isCall())
+ FBI->HasCalls = true;
+ }
+ FBI->InstrCount = InstrCount;
+ return FBI;
+}
+
+//===----------------------------------------------------------------------===//
+// Ensemble utility functions
+//===----------------------------------------------------------------------===//
+
+MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
+ : MTM(*ct) {
+ BlockInfo.resize(MTM.BlockInfo.size());
+}
+
+// Virtual destructor serves as an anchor.
+MachineTraceMetrics::Ensemble::~Ensemble() {}
+
+const MachineLoop*
+MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
+ return MTM.Loops->getLoopFor(MBB);
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace above MBB.
+void MachineTraceMetrics::Ensemble::
+computeDepthResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+
+ // Compute resources from trace above. The top block is simple.
+ if (!TBI->Pred) {
+ TBI->InstrDepth = 0;
+ TBI->Head = MBB->getNumber();
+ return;
+ }
+
+ // Compute from the block above. A post-order traversal ensures the
+ // predecessor is always computed first.
+ TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()];
+ assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
+ const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
+ TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
+ TBI->Head = PredTBI->Head;
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace below MBB.
+void MachineTraceMetrics::Ensemble::
+computeHeightResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+
+ // Compute resources for the current block.
+ TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
+
+ // The trace tail is done.
+ if (!TBI->Succ) {
+ TBI->Tail = MBB->getNumber();
+ return;
+ }
+
+ // Compute from the block below. A post-order traversal ensures the
+ // predecessor is always computed first.
+ TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()];
+ assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
+ TBI->InstrHeight += SuccTBI->InstrHeight;
+ TBI->Tail = SuccTBI->Tail;
+}
+
+// Check if depth resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getDepthResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidDepth() ? TBI : 0;
+}
+
+// Check if height resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getHeightResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidHeight() ? TBI : 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Trace Selection Strategies
+//===----------------------------------------------------------------------===//
+//
+// A trace selection strategy is implemented as a sub-class of Ensemble. The
+// trace through a block B is computed by two DFS traversals of the CFG
+// starting from B. One upwards, and one downwards. During the upwards DFS,
+// pickTracePred() is called on the post-ordered blocks. During the downwards
+// DFS, pickTraceSucc() is called in a post-order.
+//
+
+// We never allow traces that leave loops, but we do allow traces to enter
+// nested loops. We also never allow traces to contain back-edges.
+//
+// This means that a loop header can never appear above the center block of a
+// trace, except as the trace head. Below the center block, loop exiting edges
+// are banned.
+//
+// Return true if an edge from the From loop to the To loop is leaving a loop.
+// Either of To and From can be null.
+static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
+ return From && !From->contains(To);
+}
+
+// MinInstrCountEnsemble - Pick the trace that executes the least number of
+// instructions.
+namespace {
+class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
+ const char *getName() const { return "MinInstr"; }
+ const MachineBasicBlock *pickTracePred(const MachineBasicBlock*);
+ const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*);
+
+public:
+ MinInstrCountEnsemble(MachineTraceMetrics *mtm)
+ : MachineTraceMetrics::Ensemble(mtm) {}
+};
+}
+
+// Select the preferred predecessor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return 0;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ // Don't leave loops, and never follow back-edges.
+ if (CurLoop && MBB == CurLoop->getHeader())
+ return 0;
+ unsigned CurCount = MTM.getResources(MBB)->InstrCount;
+ const MachineBasicBlock *Best = 0;
+ unsigned BestDepth = 0;
+ for (MachineBasicBlock::const_pred_iterator
+ I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+ const MachineBasicBlock *Pred = *I;
+ const MachineTraceMetrics::TraceBlockInfo *PredTBI =
+ getDepthResources(Pred);
+ // Ignore cycles that aren't natural loops.
+ if (!PredTBI)
+ continue;
+ // Pick the predecessor that would give this block the smallest InstrDepth.
+ unsigned Depth = PredTBI->InstrDepth + CurCount;
+ if (!Best || Depth < BestDepth)
+ Best = Pred, BestDepth = Depth;
+ }
+ return Best;
+}
+
+// Select the preferred successor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return 0;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ const MachineBasicBlock *Best = 0;
+ unsigned BestHeight = 0;
+ for (MachineBasicBlock::const_succ_iterator
+ I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+ const MachineBasicBlock *Succ = *I;
+ // Don't consider back-edges.
+ if (CurLoop && Succ == CurLoop->getHeader())
+ continue;
+ // Don't consider successors exiting CurLoop.
+ if (isExitingLoop(CurLoop, getLoopFor(Succ)))
+ continue;
+ const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
+ getHeightResources(Succ);
+ // Ignore cycles that aren't natural loops.
+ if (!SuccTBI)
+ continue;
+ // Pick the successor that would give this block the smallest InstrHeight.
+ unsigned Height = SuccTBI->InstrHeight;
+ if (!Best || Height < BestHeight)
+ Best = Succ, BestHeight = Height;
+ }
+ return Best;
+}
+
+// Get an Ensemble sub-class for the requested trace strategy.
+MachineTraceMetrics::Ensemble *
+MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
+ assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
+ Ensemble *&E = Ensembles[strategy];
+ if (E)
+ return E;
+
+ // Allocate new Ensemble on demand.
+ switch (strategy) {
+ case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
+ default: llvm_unreachable("Invalid trace strategy enum");
+ }
+}
+
+void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
+ BlockInfo[MBB->getNumber()].invalidate();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->invalidate(MBB);
+}
+
+void MachineTraceMetrics::verifyAnalysis() const {
+ if (!MF)
+ return;
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->verify();
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Trace building
+//===----------------------------------------------------------------------===//
+//
+// Traces are built by two CFG traversals. To avoid recomputing too much, use a
+// set abstraction that confines the search to the current loop, and doesn't
+// revisit blocks.
+
+namespace {
+struct LoopBounds {
+ MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
+ SmallPtrSet<const MachineBasicBlock*, 8> Visited;
+ const MachineLoopInfo *Loops;
+ bool Downward;
+ LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
+ const MachineLoopInfo *loops)
+ : Blocks(blocks), Loops(loops), Downward(false) {}
+};
+}
+
+// Specialize po_iterator_storage in order to prune the post-order traversal so
+// it is limited to the current loop and doesn't traverse the loop back edges.
+namespace llvm {
+template<>
+class po_iterator_storage<LoopBounds, true> {
+ LoopBounds &LB;
+public:
+ po_iterator_storage(LoopBounds &lb) : LB(lb) {}
+ void finishPostorder(const MachineBasicBlock*) {}
+
+ bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
+ // Skip already visited To blocks.
+ MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
+ if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
+ return false;
+ // From is null once when To is the trace center block.
+ if (From) {
+ if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
+ // Don't follow backedges, don't leave FromLoop when going upwards.
+ if ((LB.Downward ? To : From) == FromLoop->getHeader())
+ return false;
+ // Don't leave FromLoop.
+ if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
+ return false;
+ }
+ }
+ // To is a new block. Mark the block as visited in case the CFG has cycles
+ // that MachineLoopInfo didn't recognize as a natural loop.
+ return LB.Visited.insert(To);
+ }
+};
+}
+
+/// Compute the trace through MBB.
+void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
+ << MBB->getNumber() << '\n');
+ // Set up loop bounds for the backwards post-order traversal.
+ LoopBounds Bounds(BlockInfo, MTM.Loops);
+
+ // Run an upwards post-order search for the trace start.
+ Bounds.Downward = false;
+ Bounds.Visited.clear();
+ typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
+ for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
+ I != E; ++I) {
+ DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the predecessors have been visited, pick the preferred one.
+ TBI.Pred = pickTracePred(*I);
+ DEBUG({
+ if (TBI.Pred)
+ dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leading to I is now known, compute the depth resources.
+ computeDepthResources(*I);
+ }
+
+ // Run a downwards post-order search for the trace end.
+ Bounds.Downward = true;
+ Bounds.Visited.clear();
+ typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
+ for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
+ I != E; ++I) {
+ DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the successors have been visited, pick the preferred one.
+ TBI.Succ = pickTraceSucc(*I);
+ DEBUG({
+ if (TBI.Succ)
+ dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leaving I is now known, compute the height resources.
+ computeHeightResources(*I);
+ }
+}
+
+/// Invalidate traces through BadMBB.
+void
+MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
+ SmallVector<const MachineBasicBlock*, 16> WorkList;
+ TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
+
+ // Invalidate height resources of blocks above MBB.
+ if (BadTBI.hasValidHeight()) {
+ BadTBI.invalidateHeight();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " height.\n");
+ // Find any MBB predecessors that have MBB as their preferred successor.
+ // They are the only ones that need to be invalidated.
+ for (MachineBasicBlock::const_pred_iterator
+ I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+ TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ if (!TBI.hasValidHeight())
+ continue;
+ if (TBI.Succ == MBB) {
+ TBI.invalidateHeight();
+ WorkList.push_back(*I);
+ continue;
+ }
+ // Verify that TBI.Succ is actually a *I successor.
+ assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Invalidate depth resources of blocks below MBB.
+ if (BadTBI.hasValidDepth()) {
+ BadTBI.invalidateDepth();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " depth.\n");
+ // Find any MBB successors that have MBB as their preferred predecessor.
+ // They are the only ones that need to be invalidated.
+ for (MachineBasicBlock::const_succ_iterator
+ I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+ TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ if (!TBI.hasValidDepth())
+ continue;
+ if (TBI.Pred == MBB) {
+ TBI.invalidateDepth();
+ WorkList.push_back(*I);
+ continue;
+ }
+ // Verify that TBI.Pred is actually a *I predecessor.
+ assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Clear any per-instruction data. We only have to do this for BadMBB itself
+ // because the instructions in that block may change. Other blocks may be
+ // invalidated, but their instructions will stay the same, so there is no
+ // need to erase the Cycle entries. They will be overwritten when we
+ // recompute.
+ for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end();
+ I != E; ++I)
+ Cycles.erase(I);
+}
+
+void MachineTraceMetrics::Ensemble::verify() const {
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
+ "Outdated BlockInfo size");
+ for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
+ const TraceBlockInfo &TBI = BlockInfo[Num];
+ if (TBI.hasValidDepth() && TBI.Pred) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
+ "Trace is broken, depth should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
+ }
+ if (TBI.hasValidHeight() && TBI.Succ) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
+ "Trace is broken, height should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
+ assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
+ "Trace contains backedge");
+ }
+ }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Data Dependencies
+//===----------------------------------------------------------------------===//
+//
+// Compute the depth and height of each instruction based on data dependencies
+// and instruction latencies. These cycle numbers assume that the CPU can issue
+// an infinite number of instructions per cycle as long as their dependencies
+// are ready.
+
+// A data dependency is represented as a defining MI and operand numbers on the
+// defining and using MI.
+namespace {
+struct DataDep {
+ const MachineInstr *DefMI;
+ unsigned DefOp;
+ unsigned UseOp;
+
+ DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
+ : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
+
+ /// Create a DataDep from an SSA form virtual register.
+ DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
+ : UseOp(UseOp) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
+ MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
+ assert(!DefI.atEnd() && "Register has no defs");
+ DefMI = &*DefI;
+ DefOp = DefI.getOperandNo();
+ assert((++DefI).atEnd() && "Register has multiple defs");
+ }
+};
+}
+
+// Get the input data dependencies that must be ready before UseMI can issue.
+// Return true if UseMI has any physreg operands.
+static bool getDataDeps(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineRegisterInfo *MRI) {
+ bool HasPhysRegs = false;
+ for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ HasPhysRegs = true;
+ continue;
+ }
+ // Collect virtual register reads.
+ if (MO->readsReg())
+ Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
+ }
+ return HasPhysRegs;
+}
+
+// Get the input data dependencies of a PHI instruction, using Pred as the
+// preferred predecessor.
+// This will add at most one dependency to Deps.
+static void getPHIDeps(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineBasicBlock *Pred,
+ const MachineRegisterInfo *MRI) {
+ // No predecessor at the beginning of a trace. Ignore dependencies.
+ if (!Pred)
+ return;
+ assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI");
+ for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) {
+ if (UseMI->getOperand(i + 1).getMBB() == Pred) {
+ unsigned Reg = UseMI->getOperand(i).getReg();
+ Deps.push_back(DataDep(MRI, Reg, i));
+ return;
+ }
+ }
+}
+
+// Keep track of physreg data dependencies by recording each live register unit.
+// Associate each regunit with an instruction operand. Depending on the
+// direction instructions are scanned, it could be the operand that defined the
+// regunit, or the highest operand to read the regunit.
+namespace {
+struct LiveRegUnit {
+ unsigned RegUnit;
+ unsigned Cycle;
+ const MachineInstr *MI;
+ unsigned Op;
+
+ unsigned getSparseSetIndex() const { return RegUnit; }
+
+ LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {}
+};
+}
+
+// Identify physreg dependencies for UseMI, and update the live regunit
+// tracking set when scanning instructions downwards.
+static void updatePhysDepsDownwards(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> Kills;
+ SmallVector<unsigned, 8> LiveDefOps;
+
+ for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ // Track live defs and kills for updating RegUnits.
+ if (MO->isDef()) {
+ if (MO->isDead())
+ Kills.push_back(Reg);
+ else
+ LiveDefOps.push_back(MO.getOperandNo());
+ } else if (MO->isKill())
+ Kills.push_back(Reg);
+ // Identify dependencies.
+ if (!MO->readsReg())
+ continue;
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
+ break;
+ }
+ }
+
+ // Update RegUnits to reflect live registers after UseMI.
+ // First kills.
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
+ RegUnits.erase(*Units);
+
+ // Second, live defs.
+ for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
+ unsigned DefOp = LiveDefOps[i];
+ for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
+ Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ LRU.MI = UseMI;
+ LRU.Op = DefOp;
+ }
+ }
+}
+
+/// The length of the critical path through a trace is the maximum of two path
+/// lengths:
+///
+/// 1. The maximum height+depth over all instructions in the trace center block.
+///
+/// 2. The longest cross-block dependency chain. For small blocks, it is
+/// possible that the critical path through the trace doesn't include any
+/// instructions in the block.
+///
+/// This function computes the second number from the live-in list of the
+/// center block.
+unsigned MachineTraceMetrics::Ensemble::
+computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
+ assert(TBI.HasValidInstrDepths && "Missing depth info");
+ assert(TBI.HasValidInstrHeights && "Missing height info");
+ unsigned MaxLen = 0;
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ const LiveInReg &LIR = TBI.LiveIns[i];
+ if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
+ continue;
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ // Ignore dependencies outside the current trace.
+ const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
+ if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head)
+ continue;
+ unsigned Len = LIR.Height + Cycles[DefMI].Depth;
+ MaxLen = std::max(MaxLen, Len);
+ }
+ return MaxLen;
+}
+
+/// Compute instruction depths for all instructions above or in MBB in its
+/// trace. This assumes that the trace through MBB has already been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrDepths(const MachineBasicBlock *MBB) {
+ // The top of the trace may already be computed, and HasValidInstrDepths
+ // implies Head->HasValidInstrDepths, so we only need to start from the first
+ // block in the trace that needs to be recomputed.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidDepth() && "Incomplete trace");
+ if (TBI.HasValidInstrDepths)
+ break;
+ Stack.push_back(MBB);
+ MBB = TBI.Pred;
+ } while (MBB);
+
+ // FIXME: If MBB is non-null at this point, it is the last pre-computed block
+ // in the trace. We should track any live-out physregs that were defined in
+ // the trace. This is quite rare in SSA form, typically created by CSE
+ // hoisting a compare.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // Go through trace blocks in top-down order, stopping after the center block.
+ SmallVector<DataDep, 8> Deps;
+ while (!Stack.empty()) {
+ MBB = Stack.pop_back_val();
+ DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrDepths = true;
+ TBI.CriticalPath = 0;
+
+ // Also compute the critical path length through MBB when possible.
+ if (TBI.HasValidInstrHeights)
+ TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
+
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MachineInstr *UseMI = I;
+
+ // Collect all data dependencies.
+ Deps.clear();
+ if (UseMI->isPHI())
+ getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
+ else if (getDataDeps(UseMI, Deps, MTM.MRI))
+ updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI);
+
+ // Filter and process dependencies, computing the earliest issue cycle.
+ unsigned Cycle = 0;
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+ const DataDep &Dep = Deps[i];
+ const TraceBlockInfo&DepTBI =
+ BlockInfo[Dep.DefMI->getParent()->getNumber()];
+ // Ignore dependencies from outside the current trace.
+ if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head)
+ continue;
+ assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
+ unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData,
+ Dep.DefMI, Dep.DefOp,
+ UseMI, Dep.UseOp,
+ /* FindMin = */ false);
+ Cycle = std::max(Cycle, DepCycle);
+ }
+ // Remember the instruction depth.
+ InstrCycles &MICycles = Cycles[UseMI];
+ MICycles.Depth = Cycle;
+
+ if (!TBI.HasValidInstrHeights) {
+ DEBUG(dbgs() << Cycle << '\t' << *UseMI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI);
+ }
+ }
+}
+
+// Identify physreg dependencies for MI when scanning instructions upwards.
+// Return the issue height of MI after considering any live regunits.
+// Height is the issue height computed from virtual register dependencies alone.
+static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const InstrItineraryData *ItinData,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> ReadOps;
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MO->readsReg())
+ ReadOps.push_back(MO.getOperandNo());
+ if (!MO->isDef())
+ continue;
+ // This is a def of Reg. Remove corresponding entries from RegUnits, and
+ // update MI Height to consider the physreg dependencies.
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ unsigned DepHeight = I->Cycle;
+ if (!MI->isTransient()) {
+ // We may not know the UseMI of this dependency, if it came from the
+ // live-in list.
+ if (I->MI)
+ DepHeight += TII->computeOperandLatency(ItinData,
+ MI, MO.getOperandNo(),
+ I->MI, I->Op);
+ else
+ // No UseMI. Just use the MI latency instead.
+ DepHeight += TII->getInstrLatency(ItinData, MI);
+ }
+ Height = std::max(Height, DepHeight);
+ // This regunit is dead above MI.
+ RegUnits.erase(I);
+ }
+ }
+
+ // Now we know the height of MI. Update any regunits read.
+ for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
+ unsigned Reg = MI->getOperand(ReadOps[i]).getReg();
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ // Set the height to the highest reader of the unit.
+ if (LRU.Cycle <= Height && LRU.MI != MI) {
+ LRU.Cycle = Height;
+ LRU.MI = MI;
+ LRU.Op = ReadOps[i];
+ }
+ }
+ }
+
+ return Height;
+}
+
+
+typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
+
+// Push the height of DefMI upwards if required to match UseMI.
+// Return true if this is the first time DefMI was seen.
+static bool pushDepHeight(const DataDep &Dep,
+ const MachineInstr *UseMI, unsigned UseHeight,
+ MIHeightMap &Heights,
+ const InstrItineraryData *ItinData,
+ const TargetInstrInfo *TII) {
+ // Adjust height by Dep.DefMI latency.
+ if (!Dep.DefMI->isTransient())
+ UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp,
+ UseMI, Dep.UseOp);
+
+ // Update Heights[DefMI] to be the maximum height seen.
+ MIHeightMap::iterator I;
+ bool New;
+ tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
+ if (New)
+ return true;
+
+ // DefMI has been pushed before. Give it the max height.
+ if (I->second < UseHeight)
+ I->second = UseHeight;
+ return false;
+}
+
+/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists
+/// of all the blocks in Trace. Stop when reaching the block that contains
+/// DefMI.
+void MachineTraceMetrics::Ensemble::
+addLiveIns(const MachineInstr *DefMI,
+ ArrayRef<const MachineBasicBlock*> Trace) {
+ assert(!Trace.empty() && "Trace should contain at least one block");
+ unsigned Reg = DefMI->getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ const MachineBasicBlock *DefMBB = DefMI->getParent();
+
+ // Reg is live-in to all blocks in Trace that follow DefMBB.
+ for (unsigned i = Trace.size(); i; --i) {
+ const MachineBasicBlock *MBB = Trace[i-1];
+ if (MBB == DefMBB)
+ return;
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ // Just add the register. The height will be updated later.
+ TBI.LiveIns.push_back(Reg);
+ }
+}
+
+/// Compute instruction heights in the trace through MBB. This updates MBB and
+/// the blocks below it in the trace. It is assumed that the trace has already
+/// been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrHeights(const MachineBasicBlock *MBB) {
+ // The bottom of the trace may already be computed.
+ // Find the blocks that need updating.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidHeight() && "Incomplete trace");
+ if (TBI.HasValidInstrHeights)
+ break;
+ Stack.push_back(MBB);
+ TBI.LiveIns.clear();
+ MBB = TBI.Succ;
+ } while (MBB);
+
+ // As we move upwards in the trace, keep track of instructions that are
+ // required by deeper trace instructions. Map MI -> height required so far.
+ MIHeightMap Heights;
+
+ // For physregs, the def isn't known when we see the use.
+ // Instead, keep track of the highest use of each regunit.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // If the bottom of the trace was already precomputed, initialize heights
+ // from its live-in list.
+ // MBB is the highest precomputed block in the trace.
+ if (MBB) {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ LiveInReg LI = TBI.LiveIns[i];
+ if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
+ // For virtual registers, the def latency is included.
+ unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
+ if (Height < LI.Height)
+ Height = LI.Height;
+ } else {
+ // For register units, the def latency is not included because we don't
+ // know the def yet.
+ RegUnits[LI.Reg].Cycle = LI.Height;
+ }
+ }
+ }
+
+ // Go through the trace blocks in bottom-up order.
+ SmallVector<DataDep, 8> Deps;
+ for (;!Stack.empty(); Stack.pop_back()) {
+ MBB = Stack.back();
+ DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrHeights = true;
+ TBI.CriticalPath = 0;
+
+ // Get dependencies from PHIs in the trace successor.
+ const MachineBasicBlock *Succ = TBI.Succ;
+ // If MBB is the last block in the trace, and it has a back-edge to the
+ // loop header, get loop-carried dependencies from PHIs in the header. For
+ // that purpose, pretend that all the loop header PHIs have height 0.
+ if (!Succ)
+ if (const MachineLoop *Loop = getLoopFor(MBB))
+ if (MBB->isSuccessor(Loop->getHeader()))
+ Succ = Loop->getHeader();
+
+ if (Succ) {
+ for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end();
+ I != E && I->isPHI(); ++I) {
+ const MachineInstr *PHI = I;
+ Deps.clear();
+ getPHIDeps(PHI, Deps, MBB, MTM.MRI);
+ if (!Deps.empty()) {
+ // Loop header PHI heights are all 0.
+ unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0;
+ DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
+ if (pushDepHeight(Deps.front(), PHI, Height,
+ Heights, MTM.ItinData, MTM.TII))
+ addLiveIns(Deps.front().DefMI, Stack);
+ }
+ }
+ }
+
+ // Go through the block backwards.
+ for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
+ BI != BB;) {
+ const MachineInstr *MI = --BI;
+
+ // Find the MI height as determined by virtual register uses in the
+ // trace below.
+ unsigned Cycle = 0;
+ MIHeightMap::iterator HeightI = Heights.find(MI);
+ if (HeightI != Heights.end()) {
+ Cycle = HeightI->second;
+ // We won't be seeing any more MI uses.
+ Heights.erase(HeightI);
+ }
+
+ // Don't process PHI deps. They depend on the specific predecessor, and
+ // we'll get them when visiting the predecessor.
+ Deps.clear();
+ bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI);
+
+ // There may also be regunit dependencies to include in the height.
+ if (HasPhysRegs)
+ Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
+ MTM.ItinData, MTM.TII, MTM.TRI);
+
+ // Update the required height of any virtual registers read by MI.
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i)
+ if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII))
+ addLiveIns(Deps[i].DefMI, Stack);
+
+ InstrCycles &MICycles = Cycles[MI];
+ MICycles.Height = Cycle;
+ if (!TBI.HasValidInstrDepths) {
+ DEBUG(dbgs() << Cycle << '\t' << *MI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI);
+ }
+
+ // Update virtual live-in heights. They were added by addLiveIns() with a 0
+ // height because the final height isn't known until now.
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ LiveInReg &LIR = TBI.LiveIns[i];
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ LIR.Height = Heights.lookup(DefMI);
+ DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
+ }
+
+ // Transfer the live regunits to the live-in list.
+ for (SparseSet<LiveRegUnit>::const_iterator
+ RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
+ TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
+ DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
+ << '@' << RI->Cycle);
+ }
+ DEBUG(dbgs() << '\n');
+
+ if (!TBI.HasValidInstrDepths)
+ continue;
+ // Add live-ins to the critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath,
+ computeCrossBlockCriticalPath(TBI));
+ DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
+ }
+}
+
+MachineTraceMetrics::Trace
+MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
+ // FIXME: Check cache tags, recompute as needed.
+ computeTrace(MBB);
+ computeInstrDepths(MBB);
+ computeInstrHeights(MBB);
+ return Trace(*this, BlockInfo[MBB->getNumber()]);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const {
+ assert(MI && "Not an instruction.");
+ assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) &&
+ "MI must be in the trace center block");
+ InstrCycles Cyc = getInstrCycles(MI);
+ return getCriticalPath() - (Cyc.Depth + Cyc.Height);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
+ const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
+ SmallVector<DataDep, 1> Deps;
+ getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
+ assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
+ DataDep &Dep = Deps.front();
+ unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData,
+ Dep.DefMI, Dep.DefOp,
+ PHI, Dep.UseOp,
+ /* FindMin = */ false);
+ return DepCycle;
+}
+
+unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
+ // For now, we compute the resource depth from instruction count / issue
+ // width. Eventually, we should compute resource depth per functional unit
+ // and return the max.
+ unsigned Instrs = TBI.InstrDepth;
+ if (Bottom)
+ Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
+ if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
+ if (Model->IssueWidth != 0)
+ return Instrs / Model->IssueWidth;
+ // Assume issue width 1 without a schedule model.
+ return Instrs;
+}
+
+unsigned MachineTraceMetrics::Trace::
+getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+ unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
+ for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
+ Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+ if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
+ if (Model->IssueWidth != 0)
+ return Instrs / Model->IssueWidth;
+ // Assume issue width 1 without a schedule model.
+ return Instrs;
+}
+
+void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
+ OS << getName() << " ensemble:\n";
+ for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
+ OS << " BB#" << i << '\t';
+ BlockInfo[i].print(OS);
+ OS << '\n';
+ }
+}
+
+void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
+ if (hasValidDepth()) {
+ OS << "depth=" << InstrDepth;
+ if (Pred)
+ OS << " pred=BB#" << Pred->getNumber();
+ else
+ OS << " pred=null";
+ OS << " head=BB#" << Head;
+ if (HasValidInstrDepths)
+ OS << " +instrs";
+ } else
+ OS << "depth invalid";
+ OS << ", ";
+ if (hasValidHeight()) {
+ OS << "height=" << InstrHeight;
+ if (Succ)
+ OS << " succ=BB#" << Succ->getNumber();
+ else
+ OS << " succ=null";
+ OS << " tail=BB#" << Tail;
+ if (HasValidInstrHeights)
+ OS << " +instrs";
+ } else
+ OS << "height invalid";
+ if (HasValidInstrDepths && HasValidInstrHeights)
+ OS << ", crit=" << CriticalPath;
+}
+
+void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
+ unsigned MBBNum = &TBI - &TE.BlockInfo[0];
+
+ OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
+ << " --> BB#" << TBI.Tail << ':';
+ if (TBI.hasValidHeight() && TBI.hasValidDepth())
+ OS << ' ' << getInstrCount() << " instrs.";
+ if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
+ OS << ' ' << TBI.CriticalPath << " cycles.";
+
+ const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
+ OS << "\nBB#" << MBBNum;
+ while (Block->hasValidDepth() && Block->Pred) {
+ unsigned Num = Block->Pred->getNumber();
+ OS << " <- BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+
+ Block = &TBI;
+ OS << "\n ";
+ while (Block->hasValidHeight() && Block->Succ) {
+ unsigned Num = Block->Succ->getNumber();
+ OS << " -> BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+ OS << '\n';
+}
diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h
new file mode 100644
index 0000000..c5b86f3
--- /dev/null
+++ b/lib/CodeGen/MachineTraceMetrics.h
@@ -0,0 +1,341 @@
+//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the MachineTraceMetrics analysis pass
+// that estimates CPU resource usage and critical data dependency paths through
+// preferred traces. This is useful for super-scalar CPUs where execution speed
+// can be limited both by data dependencies and by limited execution resources.
+//
+// Out-of-order CPUs will often be executing instructions from multiple basic
+// blocks at the same time. This makes it difficult to estimate the resource
+// usage accurately in a single basic block. Resources can be estimated better
+// by looking at a trace through the current basic block.
+//
+// For every block, the MachineTraceMetrics pass will pick a preferred trace
+// that passes through the block. The trace is chosen based on loop structure,
+// branch probabilities, and resource usage. The intention is to pick likely
+// traces that would be the most affected by code transformations.
+//
+// It is expensive to compute a full arbitrary trace for every block, so to
+// save some computations, traces are chosen to be convergent. This means that
+// if the traces through basic blocks A and B ever cross when moving away from
+// A and B, they never diverge again. This applies in both directions - If the
+// traces meet above A and B, they won't diverge when going further back.
+//
+// Traces tend to align with loops. The trace through a block in an inner loop
+// will begin at the loop entry block and end at a back edge. If there are
+// nested loops, the trace may begin and end at those instead.
+//
+// For each trace, we compute the critical path length, which is the number of
+// cycles required to execute the trace when execution is limited by data
+// dependencies only. We also compute the resource height, which is the number
+// of cycles required to execute all instructions in the trace when ignoring
+// data dependencies.
+//
+// Every instruction in the current block has a slack - the number of cycles
+// execution of the instruction can be delayed without extending the critical
+// path.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
+#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class InstrItineraryData;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineLoop;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class raw_ostream;
+
+class MachineTraceMetrics : public MachineFunctionPass {
+ const MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const InstrItineraryData *ItinData;
+ const MachineRegisterInfo *MRI;
+ const MachineLoopInfo *Loops;
+
+public:
+ class Ensemble;
+ class Trace;
+ static char ID;
+ MachineTraceMetrics();
+ void getAnalysisUsage(AnalysisUsage&) const;
+ bool runOnMachineFunction(MachineFunction&);
+ void releaseMemory();
+ void verifyAnalysis() const;
+
+ friend class Ensemble;
+ friend class Trace;
+
+ /// Per-basic block information that doesn't depend on the trace through the
+ /// block.
+ struct FixedBlockInfo {
+ /// The number of non-trivial instructions in the block.
+ /// Doesn't count PHI and COPY instructions that are likely to be removed.
+ unsigned InstrCount;
+
+ /// True when the block contains calls.
+ bool HasCalls;
+
+ FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {}
+
+ /// Returns true when resource information for this block has been computed.
+ bool hasResources() const { return InstrCount != ~0u; }
+
+ /// Invalidate resource information.
+ void invalidate() { InstrCount = ~0u; }
+ };
+
+ /// Get the fixed resource information about MBB. Compute it on demand.
+ const FixedBlockInfo *getResources(const MachineBasicBlock*);
+
+ /// A virtual register or regunit required by a basic block or its trace
+ /// successors.
+ struct LiveInReg {
+ /// The virtual register required, or a register unit.
+ unsigned Reg;
+
+ /// For virtual registers: Minimum height of the defining instruction.
+ /// For regunits: Height of the highest user in the trace.
+ unsigned Height;
+
+ LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
+ };
+
+ /// Per-basic block information that relates to a specific trace through the
+ /// block. Convergent traces means that only one of these is required per
+ /// block in a trace ensemble.
+ struct TraceBlockInfo {
+ /// Trace predecessor, or NULL for the first block in the trace.
+ /// Valid when hasValidDepth().
+ const MachineBasicBlock *Pred;
+
+ /// Trace successor, or NULL for the last block in the trace.
+ /// Valid when hasValidHeight().
+ const MachineBasicBlock *Succ;
+
+ /// The block number of the head of the trace. (When hasValidDepth()).
+ unsigned Head;
+
+ /// The block number of the tail of the trace. (When hasValidHeight()).
+ unsigned Tail;
+
+ /// Accumulated number of instructions in the trace above this block.
+ /// Does not include instructions in this block.
+ unsigned InstrDepth;
+
+ /// Accumulated number of instructions in the trace below this block.
+ /// Includes instructions in this block.
+ unsigned InstrHeight;
+
+ TraceBlockInfo() :
+ Pred(0), Succ(0),
+ InstrDepth(~0u), InstrHeight(~0u),
+ HasValidInstrDepths(false), HasValidInstrHeights(false) {}
+
+ /// Returns true if the depth resources have been computed from the trace
+ /// above this block.
+ bool hasValidDepth() const { return InstrDepth != ~0u; }
+
+ /// Returns true if the height resources have been computed from the trace
+ /// below this block.
+ bool hasValidHeight() const { return InstrHeight != ~0u; }
+
+ /// Invalidate depth resources when some block above this one has changed.
+ void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
+
+ /// Invalidate height resources when a block below this one has changed.
+ void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
+
+ // Data-dependency-related information. Per-instruction depth and height
+ // are computed from data dependencies in the current trace, using
+ // itinerary data.
+
+ /// Instruction depths have been computed. This implies hasValidDepth().
+ bool HasValidInstrDepths;
+
+ /// Instruction heights have been computed. This implies hasValidHeight().
+ bool HasValidInstrHeights;
+
+ /// Critical path length. This is the number of cycles in the longest data
+ /// dependency chain through the trace. This is only valid when both
+ /// HasValidInstrDepths and HasValidInstrHeights are set.
+ unsigned CriticalPath;
+
+ /// Live-in registers. These registers are defined above the current block
+ /// and used by this block or a block below it.
+ /// This does not include PHI uses in the current block, but it does
+ /// include PHI uses in deeper blocks.
+ SmallVector<LiveInReg, 4> LiveIns;
+
+ void print(raw_ostream&) const;
+ };
+
+ /// InstrCycles represents the cycle height and depth of an instruction in a
+ /// trace.
+ struct InstrCycles {
+ /// Earliest issue cycle as determined by data dependencies and instruction
+ /// latencies from the beginning of the trace. Data dependencies from
+ /// before the trace are not included.
+ unsigned Depth;
+
+ /// Minimum number of cycles from this instruction is issued to the of the
+ /// trace, as determined by data dependencies and instruction latencies.
+ unsigned Height;
+ };
+
+ /// A trace represents a plausible sequence of executed basic blocks that
+ /// passes through the current basic block one. The Trace class serves as a
+ /// handle to internal cached data structures.
+ class Trace {
+ Ensemble &TE;
+ TraceBlockInfo &TBI;
+
+ unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
+
+ public:
+ explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
+ void print(raw_ostream&) const;
+
+ /// Compute the total number of instructions in the trace.
+ unsigned getInstrCount() const {
+ return TBI.InstrDepth + TBI.InstrHeight;
+ }
+
+ /// Return the resource depth of the top/bottom of the trace center block.
+ /// This is the number of cycles required to execute all instructions from
+ /// the trace head to the trace center block. The resource depth only
+ /// considers execution resources, it ignores data dependencies.
+ /// When Bottom is set, instructions in the trace center block are included.
+ unsigned getResourceDepth(bool Bottom) const;
+
+ /// Return the resource length of the trace. This is the number of cycles
+ /// required to execute the instructions in the trace if they were all
+ /// independent, exposing the maximum instruction-level parallelism.
+ ///
+ /// Any blocks in Extrablocks are included as if they were part of the
+ /// trace.
+ unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks =
+ ArrayRef<const MachineBasicBlock*>()) const;
+
+ /// Return the length of the (data dependency) critical path through the
+ /// trace.
+ unsigned getCriticalPath() const { return TBI.CriticalPath; }
+
+ /// Return the depth and height of MI. The depth is only valid for
+ /// instructions in or above the trace center block. The height is only
+ /// valid for instructions in or below the trace center block.
+ InstrCycles getInstrCycles(const MachineInstr *MI) const {
+ return TE.Cycles.lookup(MI);
+ }
+
+ /// Return the slack of MI. This is the number of cycles MI can be delayed
+ /// before the critical path becomes longer.
+ /// MI must be an instruction in the trace center block.
+ unsigned getInstrSlack(const MachineInstr *MI) const;
+
+ /// Return the Depth of a PHI instruction in a trace center block successor.
+ /// The PHI does not have to be part of the trace.
+ unsigned getPHIDepth(const MachineInstr *PHI) const;
+ };
+
+ /// A trace ensemble is a collection of traces selected using the same
+ /// strategy, for example 'minimum resource height'. There is one trace for
+ /// every block in the function.
+ class Ensemble {
+ SmallVector<TraceBlockInfo, 4> BlockInfo;
+ DenseMap<const MachineInstr*, InstrCycles> Cycles;
+ friend class Trace;
+
+ void computeTrace(const MachineBasicBlock*);
+ void computeDepthResources(const MachineBasicBlock*);
+ void computeHeightResources(const MachineBasicBlock*);
+ unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
+ void computeInstrDepths(const MachineBasicBlock*);
+ void computeInstrHeights(const MachineBasicBlock*);
+ void addLiveIns(const MachineInstr *DefMI,
+ ArrayRef<const MachineBasicBlock*> Trace);
+
+ protected:
+ MachineTraceMetrics &MTM;
+ virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
+ virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
+ explicit Ensemble(MachineTraceMetrics*);
+ const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
+ const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
+ const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
+
+ public:
+ virtual ~Ensemble();
+ virtual const char *getName() const =0;
+ void print(raw_ostream&) const;
+ void invalidate(const MachineBasicBlock *MBB);
+ void verify() const;
+
+ /// Get the trace that passes through MBB.
+ /// The trace is computed on demand.
+ Trace getTrace(const MachineBasicBlock *MBB);
+ };
+
+ /// Strategies for selecting traces.
+ enum Strategy {
+ /// Select the trace through a block that has the fewest instructions.
+ TS_MinInstrCount,
+
+ TS_NumStrategies
+ };
+
+ /// Get the trace ensemble representing the given trace selection strategy.
+ /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
+ /// and valid for the lifetime of the analysis pass.
+ Ensemble *getEnsemble(Strategy);
+
+ /// Invalidate cached information about MBB. This must be called *before* MBB
+ /// is erased, or the CFG is otherwise changed.
+ ///
+ /// This invalidates per-block information about resource usage for MBB only,
+ /// and it invalidates per-trace information for any trace that passes
+ /// through MBB.
+ ///
+ /// Call Ensemble::getTrace() again to update any trace handles.
+ void invalidate(const MachineBasicBlock *MBB);
+
+private:
+ // One entry per basic block, indexed by block number.
+ SmallVector<FixedBlockInfo, 4> BlockInfo;
+
+ // One ensemble per strategy.
+ Ensemble* Ensembles[TS_NumStrategies];
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+ const MachineTraceMetrics::Trace &Tr) {
+ Tr.print(OS);
+ return OS;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+ const MachineTraceMetrics::Ensemble &En) {
+ En.print(OS);
+ return OS;
+}
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index d8dece6..852c169 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -73,8 +73,10 @@ namespace {
typedef SmallVector<const uint32_t*, 4> RegMaskVector;
typedef DenseSet<unsigned> RegSet;
typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+ typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet;
const MachineInstr *FirstTerminator;
+ BlockSet FunctionBlocks;
BitVector regsReserved;
BitVector regsAllocatable;
@@ -117,6 +119,9 @@ namespace {
// block. This set is disjoint from regsLiveOut.
RegSet vregsRequired;
+ // Set versions of block's predecessor and successor lists.
+ BlockSet Preds, Succs;
+
BBInfo() : reachable(false) {}
// Add register to vregsPassed if it belongs there. Return true if
@@ -203,6 +208,10 @@ namespace {
void report(const char *msg, const MachineBasicBlock *MBB);
void report(const char *msg, const MachineInstr *MI);
void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+ void report(const char *msg, const MachineFunction *MF,
+ const LiveInterval &LI);
+ void report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveInterval &LI);
void checkLiveness(const MachineOperand *MO, unsigned MONum);
void markReachable(const MachineBasicBlock *MBB);
@@ -212,6 +221,10 @@ namespace {
void calcRegsRequired();
void verifyLiveVariables();
void verifyLiveIntervals();
+ void verifyLiveInterval(const LiveInterval&);
+ void verifyLiveIntervalValue(const LiveInterval&, VNInfo*);
+ void verifyLiveIntervalSegment(const LiveInterval&,
+ LiveInterval::const_iterator);
};
struct MachineVerifierPass : public MachineFunctionPass {
@@ -350,9 +363,9 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
assert(MBB);
report(msg, MBB->getParent());
- *OS << "- basic block: " << MBB->getName()
- << " " << (void*)MBB
- << " (BB#" << MBB->getNumber() << ")";
+ *OS << "- basic block: BB#" << MBB->getNumber()
+ << ' ' << MBB->getName()
+ << " (" << (void*)MBB << ')';
if (Indexes)
*OS << " [" << Indexes->getMBBStartIdx(MBB)
<< ';' << Indexes->getMBBEndIdx(MBB) << ')';
@@ -377,6 +390,28 @@ void MachineVerifier::report(const char *msg,
*OS << "\n";
}
+void MachineVerifier::report(const char *msg, const MachineFunction *MF,
+ const LiveInterval &LI) {
+ report(msg, MF);
+ *OS << "- interval: ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ *OS << PrintReg(LI.reg, TRI);
+ else
+ *OS << PrintRegUnit(LI.reg, TRI);
+ *OS << ' ' << LI << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveInterval &LI) {
+ report(msg, MBB);
+ *OS << "- interval: ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ *OS << PrintReg(LI.reg, TRI);
+ else
+ *OS << PrintRegUnit(LI.reg, TRI);
+ *OS << ' ' << LI << '\n';
+}
+
void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
BBInfo &MInfo = MBBInfoMap[MBB];
if (!MInfo.reachable) {
@@ -404,6 +439,22 @@ void MachineVerifier::visitMachineFunctionBefore() {
regsAllocatable = TRI->getAllocatableSet(*MF);
markReachable(&MF->front());
+
+ // Build a set of the basic blocks in the function.
+ FunctionBlocks.clear();
+ for (MachineFunction::const_iterator
+ I = MF->begin(), E = MF->end(); I != E; ++I) {
+ FunctionBlocks.insert(I);
+ BBInfo &MInfo = MBBInfoMap[I];
+
+ MInfo.Preds.insert(I->pred_begin(), I->pred_end());
+ if (MInfo.Preds.size() != I->pred_size())
+ report("MBB has duplicate entries in its predecessor list.", I);
+
+ MInfo.Succs.insert(I->succ_begin(), I->succ_end());
+ if (MInfo.Succs.size() != I->succ_size())
+ report("MBB has duplicate entries in its successor list.", I);
+ }
}
// Does iterator point to a and b as the first two elements?
@@ -440,6 +491,25 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
E = MBB->succ_end(); I != E; ++I) {
if ((*I)->isLandingPad())
LandingPadSuccs.insert(*I);
+ if (!FunctionBlocks.count(*I))
+ report("MBB has successor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Preds.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ *OS << "MBB is not in the predecessor list of the successor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
+ }
+
+ // Check the predecessor list.
+ for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(),
+ E = MBB->pred_end(); I != E; ++I) {
+ if (!FunctionBlocks.count(*I))
+ report("MBB has predecessor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Succs.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ *OS << "MBB is not in the successor list of the predecessor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
}
const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
@@ -510,7 +580,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
++MBBI;
if (MBBI == MF->end()) {
report("MBB conditionally falls through out of function!", MBB);
- } if (MBB->succ_size() != 2) {
+ } if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (&*MBBI != TBB)
+ report("MBB exits via conditional branch/fall-through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/fall-through but doesn't have "
"exactly two CFG successors!", MBB);
} else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
@@ -530,7 +608,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
} else if (TBB && FBB) {
// Block conditionally branches somewhere, otherwise branches
// somewhere else.
- if (MBB->succ_size() != 2) {
+ if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (FBB != TBB)
+ report("MBB exits via conditional branch/branch through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/branch through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/branch but doesn't have "
"exactly two CFG successors!", MBB);
} else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
@@ -651,10 +737,10 @@ void
MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
const MachineInstr *MI = MO->getParent();
const MCInstrDesc &MCID = MI->getDesc();
- const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
// The first MCID.NumDefs operands must be explicit register defines
if (MONum < MCID.getNumDefs()) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
if (!MO->isReg())
report("Explicit definition must be a register", MO, MONum);
else if (!MO->isDef() && !MCOI.isOptionalDef())
@@ -662,6 +748,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
else if (MO->isImplicit())
report("Explicit definition marked as implicit", MO, MONum);
} else if (MONum < MCID.getNumOperands()) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
// Don't check if it's the last operand in a variadic instruction. See,
// e.g., LDM_RET in the arm back end.
if (MO->isReg() &&
@@ -685,6 +772,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MRI->tracksLiveness() && !MI->isDebugValue())
checkLiveness(MO, MONum);
+ // Verify two-address constraints after leaving SSA form.
+ unsigned DefIdx;
+ if (!MRI->isSSA() && MO->isUse() &&
+ MI->isRegTiedToDefOperand(MONum, &DefIdx) &&
+ Reg != MI->getOperand(DefIdx).getReg())
+ report("Two-address instruction operands must be identical", MO, MONum);
// Check register classes.
if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
@@ -786,20 +879,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (MO->readsReg()) {
regsLiveInButUnused.erase(Reg);
- bool isKill = false;
- unsigned defIdx;
- if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
- // A two-addr use counts as a kill if use and def are the same.
- unsigned DefReg = MI->getOperand(defIdx).getReg();
- if (Reg == DefReg)
- isKill = true;
- else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- report("Two-address instruction operands must be identical", MO, MONum);
- }
- } else
- isKill = MO->isKill();
-
- if (isKill)
+ if (MO->isKill())
addRegWithSubRegs(regsKilled, Reg);
// Check that LiveVars knows this kill.
@@ -811,23 +891,44 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
// Check LiveInts liveness and kill.
- if (TargetRegisterInfo::isVirtualRegister(Reg) &&
- LiveInts && !LiveInts->isNotInMIMap(MI)) {
- SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true);
- if (LiveInts->hasInterval(Reg)) {
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- if (!LI.liveAt(UseIdx)) {
- report("No live range at use", MO, MONum);
- *OS << UseIdx << " is not live in " << LI << '\n';
+ if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(MI);
+ // Check the cached regunit intervals.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) {
+ LiveRangeQuery LRQ(*LI, UseIdx);
+ if (!LRQ.valueIn()) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
+ << ' ' << *LI << '\n';
+ }
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n';
+ }
+ }
}
- // Check for extra kill flags.
- // Note that we allow missing kill flags for now.
- if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) {
- report("Live range continues after kill flag", MO, MONum);
- *OS << "Live range: " << LI << '\n';
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (LiveInts->hasInterval(Reg)) {
+ // This is a virtual register interval.
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ LiveRangeQuery LRQ(LI, UseIdx);
+ if (!LRQ.valueIn()) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << LI << '\n';
+ }
+ // Check for extra kill flags.
+ // Note that we allow missing kill flags for now.
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << "Live range: " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no live interval", MO, MONum);
}
- } else {
- report("Virtual register has no Live interval", MO, MONum);
}
}
@@ -1124,281 +1225,282 @@ void MachineVerifier::verifyLiveIntervals() {
const LiveInterval &LI = LiveInts->getInterval(Reg);
assert(Reg == LI.reg && "Invalid reg to interval mapping");
+ verifyLiveInterval(LI);
+ }
- for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
- I!=E; ++I) {
- VNInfo *VNI = *I;
- const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
+ // Verify all the cached regunit intervals.
+ for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
+ if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i))
+ verifyLiveInterval(*LI);
+}
- if (!DefVNI) {
- if (!VNI->isUnused()) {
- report("Valno not live at def and not marked unused", MF);
- *OS << "Valno #" << VNI->id << " in " << LI << '\n';
- }
- continue;
- }
+void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI,
+ VNInfo *VNI) {
+ if (VNI->isUnused())
+ return;
- if (VNI->isUnused())
- continue;
+ const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
- if (DefVNI != VNI) {
- report("Live range at def has different valno", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " where valno #" << DefVNI->id << " is live in " << LI << '\n';
- continue;
- }
+ if (!DefVNI) {
+ report("Valno not live at def and not marked unused", MF, LI);
+ *OS << "Valno #" << VNI->id << '\n';
+ return;
+ }
- const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
- if (!MBB) {
- report("Invalid definition index", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- continue;
- }
+ if (DefVNI != VNI) {
+ report("Live range at def has different valno", MF, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " where valno #" << DefVNI->id << " is live\n";
+ return;
+ }
- if (VNI->isPHIDef()) {
- if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
- report("PHIDef value is not defined at MBB start", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << ", not at the beginning of BB#" << MBB->getNumber()
- << " in " << LI << '\n';
- }
- } else {
- // Non-PHI def.
- const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
- if (!MI) {
- report("No instruction at def index", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- continue;
- }
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+ if (!MBB) {
+ report("Invalid definition index", MF, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ return;
+ }
- bool hasDef = false;
- bool isEarlyClobber = false;
- for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
- if (!MOI->isReg() || !MOI->isDef())
- continue;
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- if (MOI->getReg() != LI.reg)
- continue;
- } else {
- if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
- !TRI->regsOverlap(LI.reg, MOI->getReg()))
- continue;
- }
- hasDef = true;
- if (MOI->isEarlyClobber())
- isEarlyClobber = true;
- }
+ if (VNI->isPHIDef()) {
+ if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+ report("PHIDef value is not defined at MBB start", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << ", not at the beginning of BB#" << MBB->getNumber() << '\n';
+ }
+ return;
+ }
- if (!hasDef) {
- report("Defining instruction does not modify register", MI);
- *OS << "Valno #" << VNI->id << " in " << LI << '\n';
- }
+ // Non-PHI def.
+ const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+ if (!MI) {
+ report("No instruction at def index", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ return;
+ }
- // Early clobber defs begin at USE slots, but other defs must begin at
- // DEF slots.
- if (isEarlyClobber) {
- if (!VNI->def.isEarlyClobber()) {
- report("Early clobber def must be at an early-clobber slot", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- }
- } else if (!VNI->def.isRegister()) {
- report("Non-PHI, non-early clobber def must be at a register slot",
- MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- }
- }
+ bool hasDef = false;
+ bool isEarlyClobber = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ if (MOI->getReg() != LI.reg)
+ continue;
+ } else {
+ if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+ !TRI->hasRegUnit(MOI->getReg(), LI.reg))
+ continue;
}
+ hasDef = true;
+ if (MOI->isEarlyClobber())
+ isEarlyClobber = true;
+ }
- for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
- const VNInfo *VNI = I->valno;
- assert(VNI && "Live range has no valno");
+ if (!hasDef) {
+ report("Defining instruction does not modify register", MI);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
- if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
- report("Foreign valno in live range", MF);
- I->print(*OS);
- *OS << " has a valno not in " << LI << '\n';
- }
+ // Early clobber defs begin at USE slots, but other defs must begin at
+ // DEF slots.
+ if (isEarlyClobber) {
+ if (!VNI->def.isEarlyClobber()) {
+ report("Early clobber def must be at an early-clobber slot", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ }
+ } else if (!VNI->def.isRegister()) {
+ report("Non-PHI, non-early clobber def must be at a register slot",
+ MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ }
+}
- if (VNI->isUnused()) {
- report("Live range valno is marked unused", MF);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
+void
+MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI,
+ LiveInterval::const_iterator I) {
+ const VNInfo *VNI = I->valno;
+ assert(VNI && "Live range has no valno");
+
+ if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
+ report("Foreign valno in live range", MF, LI);
+ *OS << *I << " has a bad valno\n";
+ }
- const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
- if (!MBB) {
- report("Bad start of live segment, no basic block", MF);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- continue;
- }
- SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
- if (I->start != MBBStartIdx && I->start != VNI->def) {
- report("Live segment must begin at MBB entry or valno def", MBB);
- I->print(*OS);
- *OS << " in " << LI << '\n' << "Basic block starts at "
- << MBBStartIdx << '\n';
- }
+ if (VNI->isUnused()) {
+ report("Live range valno is marked unused", MF, LI);
+ *OS << *I << '\n';
+ }
- const MachineBasicBlock *EndMBB =
- LiveInts->getMBBFromIndex(I->end.getPrevSlot());
- if (!EndMBB) {
- report("Bad end of live segment, no basic block", MF);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- continue;
- }
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+ if (!MBB) {
+ report("Bad start of live segment, no basic block", MF, LI);
+ *OS << *I << '\n';
+ return;
+ }
+ SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+ if (I->start != MBBStartIdx && I->start != VNI->def) {
+ report("Live segment must begin at MBB entry or valno def", MBB, LI);
+ *OS << *I << '\n';
+ }
- // No more checks for live-out segments.
- if (I->end == LiveInts->getMBBEndIdx(EndMBB))
- continue;
+ const MachineBasicBlock *EndMBB =
+ LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+ if (!EndMBB) {
+ report("Bad end of live segment, no basic block", MF, LI);
+ *OS << *I << '\n';
+ return;
+ }
- // The live segment is ending inside EndMBB
- const MachineInstr *MI =
- LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
- if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB);
- I->print(*OS);
- *OS << " in " << LI << '\n' << "Basic block starts at "
- << MBBStartIdx << '\n';
+ // No more checks for live-out segments.
+ if (I->end == LiveInts->getMBBEndIdx(EndMBB))
+ return;
+
+ // RegUnit intervals are allowed dead phis.
+ if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() &&
+ I->start == VNI->def && I->end == VNI->def.getDeadSlot())
+ return;
+
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ return;
+ }
+
+ // The block slot must refer to a basic block boundary.
+ if (I->end.isBlock()) {
+ report("Live segment ends at B slot of an instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+
+ if (I->end.isDead()) {
+ // Segment ends on the dead slot.
+ // That means there must be a dead def.
+ if (!SlotIndex::isSameInstr(I->start, I->end)) {
+ report("Live segment ending at dead slot spans instructions", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+ }
+
+ // A live segment can only end at an early-clobber slot if it is being
+ // redefined by an early-clobber def.
+ if (I->end.isEarlyClobber()) {
+ if (I+1 == LI.end() || (I+1)->start != I->end) {
+ report("Live segment ending at early clobber slot must be "
+ "redefined by an EC def in the same instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+ }
+
+ // The following checks only apply to virtual registers. Physreg liveness
+ // is too weird to check.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ // A live range can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ bool hasRead = false;
+ bool hasDeadDef = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || MOI->getReg() != LI.reg)
continue;
- }
+ if (MOI->readsReg())
+ hasRead = true;
+ if (MOI->isDef() && MOI->isDead())
+ hasDeadDef = true;
+ }
- // The block slot must refer to a basic block boundary.
- if (I->end.isBlock()) {
- report("Live segment ends at B slot of an instruction", MI);
+ if (I->end.isDead()) {
+ if (!hasDeadDef) {
+ report("Instruction doesn't have a dead def operand", MI);
I->print(*OS);
*OS << " in " << LI << '\n';
}
-
- if (I->end.isDead()) {
- // Segment ends on the dead slot.
- // That means there must be a dead def.
- if (!SlotIndex::isSameInstr(I->start, I->end)) {
- report("Live segment ending at dead slot spans instructions", MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
- }
-
- // A live segment can only end at an early-clobber slot if it is being
- // redefined by an early-clobber def.
- if (I->end.isEarlyClobber()) {
- if (I+1 == E || (I+1)->start != I->end) {
- report("Live segment ending at early clobber slot must be "
- "redefined by an EC def in the same instruction", MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
+ } else {
+ if (!hasRead) {
+ report("Instruction ending live range doesn't read the register", MI);
+ *OS << *I << " in " << LI << '\n';
}
+ }
+ }
- // The following checks only apply to virtual registers. Physreg liveness
- // is too weird to check.
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- // A live range can end with either a redefinition, a kill flag on a
- // use, or a dead flag on a def.
- bool hasRead = false;
- bool hasDeadDef = false;
- for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
- if (!MOI->isReg() || MOI->getReg() != LI.reg)
- continue;
- if (MOI->readsReg())
- hasRead = true;
- if (MOI->isDef() && MOI->isDead())
- hasDeadDef = true;
- }
-
- if (I->end.isDead()) {
- if (!hasDeadDef) {
- report("Instruction doesn't have a dead def operand", MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
- } else {
- if (!hasRead) {
- report("Instruction ending live range doesn't read the register",
- MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
- }
- }
+ // Now check all the basic blocks in this live segment.
+ MachineFunction::const_iterator MFI = MBB;
+ // Is this live range the beginning of a non-PHIDef VN?
+ if (I->start == VNI->def && !VNI->isPHIDef()) {
+ // Not live-in to any blocks.
+ if (MBB == EndMBB)
+ return;
+ // Skip this block.
+ ++MFI;
+ }
+ for (;;) {
+ assert(LiveInts->isLiveInToMBB(LI, MFI));
+ // We don't know how to track physregs into a landing pad.
+ if (!TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+ MFI->isLandingPad()) {
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ continue;
+ }
- // Now check all the basic blocks in this live segment.
- MachineFunction::const_iterator MFI = MBB;
- // Is this live range the beginning of a non-PHIDef VN?
- if (I->start == VNI->def && !VNI->isPHIDef()) {
- // Not live-in to any blocks.
- if (MBB == EndMBB)
- continue;
- // Skip this block.
- ++MFI;
+ // Is VNI a PHI-def in the current block?
+ bool IsPHI = VNI->isPHIDef() &&
+ VNI->def == LiveInts->getMBBStartIdx(MFI);
+
+ // Check that VNI is live-out of all predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+ PE = MFI->pred_end(); PI != PE; ++PI) {
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
+ const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
+
+ // All predecessors must have a live-out value.
+ if (!PVNI) {
+ report("Register not marked live out of predecessor", *PI, LI);
+ *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
+ << PEnd << '\n';
+ continue;
}
- for (;;) {
- assert(LiveInts->isLiveInToMBB(LI, MFI));
- // We don't know how to track physregs into a landing pad.
- if (TargetRegisterInfo::isPhysicalRegister(LI.reg) &&
- MFI->isLandingPad()) {
- if (&*MFI == EndMBB)
- break;
- ++MFI;
- continue;
- }
- // Is VNI a PHI-def in the current block?
- bool IsPHI = VNI->isPHIDef() &&
- VNI->def == LiveInts->getMBBStartIdx(MFI);
-
- // Check that VNI is live-out of all predecessors.
- for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
- PE = MFI->pred_end(); PI != PE; ++PI) {
- SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
- const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
-
- // All predecessors must have a live-out value.
- if (!PVNI) {
- report("Register not marked live out of predecessor", *PI);
- *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
- << PEnd << " in " << LI << '\n';
- continue;
- }
-
- // Only PHI-defs can take different predecessor values.
- if (!IsPHI && PVNI != VNI) {
- report("Different value live out of predecessor", *PI);
- *OS << "Valno #" << PVNI->id << " live out of BB#"
- << (*PI)->getNumber() << '@' << PEnd
- << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << " in "
- << PrintReg(Reg) << ": " << LI << '\n';
- }
- }
- if (&*MFI == EndMBB)
- break;
- ++MFI;
+ // Only PHI-defs can take different predecessor values.
+ if (!IsPHI && PVNI != VNI) {
+ report("Different value live out of predecessor", *PI, LI);
+ *OS << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd
+ << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << '\n';
}
}
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ }
+}
- // Check the LI only has one connected component.
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- ConnectedVNInfoEqClasses ConEQ(*LiveInts);
- unsigned NumComp = ConEQ.Classify(&LI);
- if (NumComp > 1) {
- report("Multiple connected components in live interval", MF);
- *OS << NumComp << " components in " << LI << '\n';
- for (unsigned comp = 0; comp != NumComp; ++comp) {
- *OS << comp << ": valnos";
- for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
- E = LI.vni_end(); I!=E; ++I)
- if (comp == ConEQ.getEqClass(*I))
- *OS << ' ' << (*I)->id;
- *OS << '\n';
- }
+void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I!=E; ++I)
+ verifyLiveIntervalValue(LI, *I);
+
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I)
+ verifyLiveIntervalSegment(LI, I);
+
+ // Check the LI only has one connected component.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF, LI);
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ *OS << comp << ": valnos";
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+ E = LI.vni_end(); I!=E; ++I)
+ if (comp == ConEQ.getEqClass(*I))
+ *OS << ' ' << (*I)->id;
+ *OS << '\n';
}
}
}
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 69d6d00..56526f2 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -88,6 +88,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
cl::desc("Print machine instrs"),
cl::value_desc("pass-name"), cl::init("option-unspecified"));
+// Experimental option to run live inteerval analysis early.
+static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
+ cl::desc("Run live interval analysis earlier in the pipeline"));
+
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
@@ -452,7 +456,8 @@ void TargetPassConfig::addMachinePasses() {
printAndVerify("After Instruction Selection");
// Expand pseudo-instructions emitted by ISel.
- addPass(&ExpandISelPseudosID);
+ if (addPass(&ExpandISelPseudosID))
+ printAndVerify("After ExpandISelPseudos");
// Add passes that optimize machine instructions in SSA form.
if (getOptLevel() != CodeGenOpt::None) {
@@ -648,6 +653,11 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
addPass(&MachineLoopInfoID);
addPass(&PHIEliminationID);
}
+
+ // Eventually, we want to run LiveIntervals before PHI elimination.
+ if (EarlyLiveIntervals)
+ addPass(&LiveIntervalsID);
+
addPass(&TwoAddressInstructionPassID);
if (EnableStrongPHIElim)
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 91c33c4..9099862 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -78,6 +78,8 @@ STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
+STATISTIC(NumLoadFold, "Number of loads folded");
+STATISTIC(NumSelects, "Number of selects optimized");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
@@ -108,12 +110,14 @@ namespace {
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ bool optimizeSelect(MachineInstr *MI);
bool isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
};
}
@@ -384,6 +388,47 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
return false;
}
+/// Optimize a select instruction.
+bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
+ unsigned TrueOp = 0;
+ unsigned FalseOp = 0;
+ bool Optimizable = false;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
+ return false;
+ if (!Optimizable)
+ return false;
+ if (!TII->optimizeSelect(MI))
+ return false;
+ MI->eraseFromParent();
+ ++NumSelects;
+ return true;
+}
+
+/// isLoadFoldable - Check whether MI is a candidate for folding into a later
+/// instruction. We only fold loads to virtual registers and the virtual
+/// register defined has a single use.
+bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
+ unsigned &FoldAsLoadDefReg) {
+ if (!MI->canFoldAsLoad() || !MI->mayLoad())
+ return false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.getNumDefs() != 1)
+ return false;
+
+ unsigned Reg = MI->getOperand(0).getReg();
+ // To reduce compilation time, we check MRI->hasOneUse when inserting
+ // loads. It should be checked when processing uses of the load, since
+ // uses can be removed during peephole.
+ if (!MI->getOperand(0).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MRI->hasOneUse(Reg)) {
+ FoldAsLoadDefReg = Reg;
+ return true;
+ }
+ return false;
+}
+
bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
@@ -441,6 +486,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
SmallPtrSet<MachineInstr*, 8> LocalMIs;
SmallSet<unsigned, 4> ImmDefRegs;
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+ unsigned FoldAsLoadDefReg;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
@@ -448,37 +494,33 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
LocalMIs.clear();
ImmDefRegs.clear();
ImmDefMIs.clear();
+ FoldAsLoadDefReg = 0;
- bool First = true;
- MachineBasicBlock::iterator PMII;
for (MachineBasicBlock::iterator
MII = I->begin(), MIE = I->end(); MII != MIE; ) {
MachineInstr *MI = &*MII;
+ // We may be erasing MI below, increment MII now.
+ ++MII;
LocalMIs.insert(MI);
+ // If there exists an instruction which belongs to the following
+ // categories, we will discard the load candidate.
if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
MI->hasUnmodeledSideEffects()) {
- ++MII;
+ FoldAsLoadDefReg = 0;
continue;
}
-
- if (MI->isBitcast()) {
- if (optimizeBitcastInstr(MI, MBB)) {
- // MI is deleted.
- LocalMIs.erase(MI);
- Changed = true;
- MII = First ? I->begin() : llvm::next(PMII);
- continue;
- }
- } else if (MI->isCompare()) {
- if (optimizeCmpInstr(MI, MBB)) {
- // MI is deleted.
- LocalMIs.erase(MI);
- Changed = true;
- MII = First ? I->begin() : llvm::next(PMII);
- continue;
- }
+ if (MI->mayStore() || MI->isCall())
+ FoldAsLoadDefReg = 0;
+
+ if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) ||
+ (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
+ (MI->isSelect() && optimizeSelect(MI))) {
+ // MI is deleted.
+ LocalMIs.erase(MI);
+ Changed = true;
+ continue;
}
if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
@@ -489,9 +531,29 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
}
- First = false;
- PMII = MII;
- ++MII;
+ // Check whether MI is a load candidate for folding into a later
+ // instruction. If MI is not a candidate, check whether we can fold an
+ // earlier load into MI.
+ if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
+ // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
+ // can enable folding by converting SUB to CMP.
+ MachineInstr *DefMI = 0;
+ MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
+ FoldAsLoadDefReg, DefMI);
+ if (FoldMI) {
+ // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
+ LocalMIs.erase(MI);
+ LocalMIs.erase(DefMI);
+ LocalMIs.insert(FoldMI);
+ MI->eraseFromParent();
+ DefMI->eraseFromParent();
+ ++NumLoadFold;
+
+ // MI is replaced with FoldMI.
+ Changed = true;
+ continue;
+ }
+ }
}
}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 8325f20..6b3a48e 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -201,20 +201,16 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
/// its virtual register, and it is guaranteed to be a block-local register.
///
bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
- // Check for non-debug uses or defs following MO.
- // This is the most likely way to fail - fast path it.
- MachineOperand *Next = &MO;
- while ((Next = Next->getNextOperandForReg()))
- if (!Next->isDebug())
- return false;
-
// If the register has ever been spilled or reloaded, we conservatively assume
// it is a global register used in multiple blocks.
if (StackSlotForVirtReg[MO.getReg()] != -1)
return false;
// Check that the use/def chain has exactly one operand - MO.
- return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO;
+ MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg());
+ if (&I.getOperand() != &MO)
+ return false;
+ return ++I == MRI->reg_nodbg_end();
}
/// addKillFlag - Set kill flags on last use of a virtual register.
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 6ac5428..d0cff48 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -1747,7 +1747,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
<< "********** Function: "
- << ((Value*)mf.getFunction())->getName() << '\n');
+ << mf.getFunction()->getName() << '\n');
MF = &mf;
if (VerifyEnabled)
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 733312f..9906334 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -460,14 +460,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
// Okay, merge "B1" into the same value number as "B0".
- if (BValNo != ValLR->valno) {
- // If B1 is killed by a PHI, then the merged live range must also be killed
- // by the same PHI, as B0 and B1 can not overlap.
- bool HasPHIKill = BValNo->hasPHIKill();
+ if (BValNo != ValLR->valno)
IntB.MergeValueNumberInto(BValNo, ValLR->valno);
- if (HasPHIKill)
- ValLR->valno->setHasPHIKill(true);
- }
DEBUG(dbgs() << " result = " << IntB << '\n');
// If the source instruction was killing the source register before the
@@ -494,6 +488,11 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
LiveInterval &IntB,
VNInfo *AValNo,
VNInfo *BValNo) {
+ // If AValNo has PHI kills, conservatively assume that IntB defs can reach
+ // the PHI values.
+ if (LIS->hasPHIKill(IntA, AValNo))
+ return true;
+
for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
AI != AE; ++AI) {
if (AI->valno != AValNo) continue;
@@ -558,10 +557,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// AValNo is the value number in A that defines the copy, A3 in the example.
VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
assert(AValNo && "COPY source not live");
-
- // If other defs can reach uses of this def, then it's not safe to perform
- // the optimization.
- if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
+ if (AValNo->isPHIDef() || AValNo->isUnused())
return false;
MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
if (!DefMI)
@@ -657,6 +653,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
if (ULR == IntA.end() || ULR->valno != AValNo)
continue;
+ // Kill flags are no longer accurate. They are recomputed after RA.
+ UseMO.setIsKill(false);
if (TargetRegisterInfo::isPhysicalRegister(NewReg))
UseMO.substPhysReg(NewReg, *TRI);
else
@@ -1093,6 +1091,11 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// register live range doesn't need to be accurate as long as all the
// defs are there.
+ // Delete the identity copy.
+ MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg);
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+
// We don't track kills for reserved registers.
MRI->clearKillFlags(CP.getSrcReg());
@@ -1382,24 +1385,6 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
++J;
}
- // Update kill info. Some live ranges are extended due to copy coalescing.
- for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
- E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
- VNInfo *VNI = I->first;
- unsigned LHSValID = LHSValNoAssignments[VNI->id];
- if (VNI->hasPHIKill())
- NewVNInfo[LHSValID]->setHasPHIKill(true);
- }
-
- // Update kill info. Some live ranges are extended due to copy coalescing.
- for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
- E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
- VNInfo *VNI = I->first;
- unsigned RHSValID = RHSValNoAssignments[VNI->id];
- if (VNI->hasPHIKill())
- NewVNInfo[RHSValID]->setHasPHIKill(true);
- }
-
// Clear kill flags where live ranges are extended.
while (!LHSOldKills.empty())
LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI);
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 110f478..9c1dba3 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -411,12 +411,11 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
const MachineInstr *MI = SU->getInstr();
unsigned Reg = MI->getOperand(OperIdx).getReg();
- // SSA defs do not have output/anti dependencies.
+ // Singly defined vregs do not have output/anti dependencies.
// The current operand is a def, so we have at least one.
- //
- // FIXME: This optimization is disabled pending PR13112.
- //if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
- // return;
+ // Check here if there are any others...
+ if (MRI.hasOneDef(Reg))
+ return;
// Add output dependence to the next nearest def of this vreg.
//
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 747bc44..1c485a0 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -228,6 +228,9 @@ namespace {
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
SDValue visitFABS(SDNode *N);
+ SDValue visitFCEIL(SDNode *N);
+ SDValue visitFTRUNC(SDNode *N);
+ SDValue visitFFLOOR(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
@@ -1140,6 +1143,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
+ case ISD::FFLOOR: return visitFFLOOR(N);
+ case ISD::FCEIL: return visitFCEIL(N);
+ case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
case ISD::BR_CC: return visitBR_CC(N);
case ISD::LOAD: return visitLOAD(N);
@@ -5679,7 +5685,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
- TLI.isOperationLegal(ISD::FMA, VT)) {
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
@@ -5704,6 +5710,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
// fold vector ops
if (VT.isVector()) {
@@ -5724,11 +5731,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+ return DAG.getNode(ISD::FNEG, dl, VT, N1);
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
- return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+ return DAG.getNode(ISD::FADD, dl, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// If 'unsafe math' is enabled, fold
@@ -5756,23 +5763,34 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
- TLI.isOperationLegal(ISD::FMA, VT)) {
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
- return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ return DAG.getNode(ISD::FMA, dl, VT,
N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1));
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
}
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
- return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
- DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT,
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT,
N1.getOperand(0)),
N1.getOperand(1), N0);
}
+
+ // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
}
return SDValue();
@@ -6231,6 +6249,42 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fceil c1) -> fceil(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ftrunc c1) -> ftrunc(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ffloor c1) -> ffloor(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
@@ -7822,9 +7876,29 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
return SDValue();
- // Widen the input vector by adding undef values.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
- VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ // If the element type of the input vector is not the same as
+ // the output element type, make concat_vectors based on input element
+ // type and then bitcast it to the output vector type.
+ //
+ // In another words avoid nodes like this:
+ // <NODE> v16i8 = concat_vectors v4i16 v4i16
+ // Replace it with this one:
+ // <NODE0> v8i16 = concat_vectors v4i16 v4i16
+ // <NODE1> v16i8 = bitcast NODE0
+ EVT ItemType = VecIn1.getValueType().getVectorElementType();
+ if (ItemType != VT.getVectorElementType()) {
+ EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(),
+ ItemType,
+ VecIn1.getValueType().getVectorNumElements()*2);
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1);
+ } else
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+
}
// If VecIn2 is unused then change it to undef.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index e5ea6e6..683fac6 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -55,6 +55,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -789,6 +790,17 @@ FastISel::SelectInstruction(const Instruction *I) {
MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+ // As a special case, don't handle calls to builtin library functions that
+ // may be translated directly to target instructions.
+ if (const CallInst *Call = dyn_cast<CallInst>(I)) {
+ const Function *F = Call->getCalledFunction();
+ LibFunc::Func Func;
+ if (F && !F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func))
+ return false;
+ }
+
// First, try doing target-independent selection.
if (SelectOperator(I, I->getOpcode())) {
++NumFastIselSuccessIndependent;
@@ -1040,7 +1052,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
}
}
-FastISel::FastISel(FunctionLoweringInfo &funcInfo)
+FastISel::FastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
: FuncInfo(funcInfo),
MRI(FuncInfo.MF->getRegInfo()),
MFI(*FuncInfo.MF->getFrameInfo()),
@@ -1049,7 +1062,8 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo)
TD(*TM.getTargetData()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()),
- TRI(*TM.getRegisterInfo()) {
+ TRI(*TM.getRegisterInfo()),
+ LibInfo(libInfo) {
}
FastISel::~FastISel() {}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 936c126..4488d27 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -411,6 +411,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
} else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
BA->getTargetFlags()));
+ } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(),
+ TI->getOffset(),
+ TI->getTargetFlags()));
} else {
assert(Op.getValueType() != MVT::Other &&
Op.getValueType() != MVT::Glue &&
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b0776af..908ebb9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -428,7 +428,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
DebugLoc dl = LD->getDebugLoc();
if (VT.isFloatingPoint() || VT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
- if (TLI.isTypeLegal(intVT)) {
+ if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) {
// Expand to a (misaligned) integer load of the same size,
// then bitconvert to floating point or vector.
SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
@@ -436,8 +436,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
LD->isNonTemporal(),
LD->isInvariant(), LD->getAlignment());
SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
- if (VT.isFloatingPoint() && LoadedVT != VT)
- Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
+ if (LoadedVT != VT)
+ Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
+ ISD::ANY_EXTEND, dl, VT, Result);
ValResult = Result;
ChainResult = Chain;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 5384576..84e41fc 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -61,6 +61,7 @@ namespace llvm {
if (isa<BasicBlockSDNode>(Node)) return true;
if (isa<FrameIndexSDNode>(Node)) return true;
if (isa<ConstantPoolSDNode>(Node)) return true;
+ if (isa<TargetIndexSDNode>(Node)) return true;
if (isa<JumpTableSDNode>(Node)) return true;
if (isa<ExternalSymbolSDNode>(Node)) return true;
if (isa<BlockAddressSDNode>(Node)) return true;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b971b69..f4fe892 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -403,6 +403,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddPointer(GA->getGlobal());
ID.AddInteger(GA->getOffset());
ID.AddInteger(GA->getTargetFlags());
+ ID.AddInteger(GA->getAddressSpace());
break;
}
case ISD::BasicBlock:
@@ -438,16 +439,25 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(CP->getTargetFlags());
break;
}
+ case ISD::TargetIndex: {
+ const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N);
+ ID.AddInteger(TI->getIndex());
+ ID.AddInteger(TI->getOffset());
+ ID.AddInteger(TI->getTargetFlags());
+ break;
+ }
case ISD::LOAD: {
const LoadSDNode *LD = cast<LoadSDNode>(N);
ID.AddInteger(LD->getMemoryVT().getRawBits());
ID.AddInteger(LD->getRawSubclassData());
+ ID.AddInteger(LD->getPointerInfo().getAddrSpace());
break;
}
case ISD::STORE: {
const StoreSDNode *ST = cast<StoreSDNode>(N);
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
case ISD::ATOMIC_CMP_SWAP:
@@ -467,6 +477,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
const AtomicSDNode *AT = cast<AtomicSDNode>(N);
ID.AddInteger(AT->getMemoryVT().getRawBits());
ID.AddInteger(AT->getRawSubclassData());
+ ID.AddInteger(AT->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::PREFETCH: {
+ const MemSDNode *PF = cast<MemSDNode>(N);
+ ID.AddInteger(PF->getPointerInfo().getAddrSpace());
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -483,6 +499,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
break;
}
} // end switch (N->getOpcode())
+
+ // Target specific memory nodes could also have address spaces to check.
+ if (N->isTargetMemoryOpcode())
+ ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
}
/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
@@ -1100,6 +1120,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
ID.AddPointer(GV);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
+ ID.AddInteger(GV->getType()->getAddressSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1199,6 +1220,24 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
return SDValue(N, 0);
}
+SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
+ unsigned char TargetFlags) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0);
+ ID.AddInteger(Index);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
@@ -2444,6 +2483,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
case ISD::FABS:
V.clearSign();
return getConstantFP(V, VT);
+ case ISD::FCEIL: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FTRUNC: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FFLOOR: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
case ISD::FP_EXTEND: {
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
@@ -3901,6 +3958,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -3973,6 +4031,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Val};
AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -4029,6 +4088,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr};
AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -4106,6 +4166,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
@@ -4225,6 +4286,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
MMO->isNonTemporal(),
MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -4314,6 +4376,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
ID.AddInteger(VT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4381,6 +4444,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
ID.AddInteger(SVT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4405,6 +4469,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8cbe818..f3cf758 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1601,7 +1601,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
// Update successor info
addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
- addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
+ // TrueBB and FalseBB are always different unless the incoming IR is
+ // degenerate. This only happens when running llc on weird IR.
+ if (CB.TrueBB != CB.FalseBB)
+ addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -3460,7 +3463,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
SDValue InChain = getRoot();
- EVT VT = EVT::getEVT(I.getType());
+ EVT VT = TLI.getValueType(I.getType());
if (I.getAlignment() * 8 < VT.getSizeInBits())
report_fatal_error("Cannot generate unaligned atomic load");
@@ -3490,7 +3493,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDValue InChain = getRoot();
- EVT VT = EVT::getEVT(I.getValueOperand()->getType());
+ EVT VT = TLI.getValueType(I.getValueOperand()->getType());
if (I.getAlignment() * 8 < VT.getSizeInBits())
report_fatal_error("Cannot generate unaligned atomic store");
@@ -4929,6 +4932,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
return 0;
+ case Intrinsic::floor:
+ setValue(&I, DAG.getNode(ISD::FFLOOR, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
case Intrinsic::fma:
setValue(&I, DAG.getNode(ISD::FMA, dl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -5506,6 +5514,22 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
return false;
}
+/// visitUnaryFloatCall - If a call instruction is a unary floating-point
+/// operation (as expected), translate it to an SDNode with the specified opcode
+/// and return true.
+bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // Sanity check that it really is a unary floating-point call.
+ if (I.getNumArgOperands() != 1 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ !I.onlyReadsMemory())
+ return false;
+
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp));
+ return true;
+}
void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Handle inline assembly differently.
@@ -5536,150 +5560,97 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check for well-known libc/libm calls. If the function is internal, it
// can't be a library call.
- if (!F->hasLocalLinkage() && F->hasName()) {
- StringRef Name = F->getName();
- if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
- (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
- (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
+ LibFunc::Func Func;
+ if (!F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ switch (Func) {
+ default: break;
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ case LibFunc::copysignl:
if (I.getNumArgOperands() == 2 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
- I.getType() == I.getArgOperand(1)->getType()) {
+ I.getType() == I.getArgOperand(1)->getType() &&
+ I.onlyReadsMemory()) {
SDValue LHS = getValue(I.getArgOperand(0));
SDValue RHS = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
LHS.getValueType(), LHS, RHS));
return;
}
- } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
- (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
- (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ if (visitUnaryFloatCall(I, ISD::FABS))
return;
- }
- } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
- (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
- (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::sin:
+ case LibFunc::sinf:
+ case LibFunc::sinl:
+ if (visitUnaryFloatCall(I, ISD::FSIN))
return;
- }
- } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
- (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
- (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::cos:
+ case LibFunc::cosf:
+ case LibFunc::cosl:
+ if (visitUnaryFloatCall(I, ISD::FCOS))
return;
- }
- } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
- (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
- (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::sqrt:
+ case LibFunc::sqrtf:
+ case LibFunc::sqrtl:
+ if (visitUnaryFloatCall(I, ISD::FSQRT))
return;
- }
- } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
- (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
- (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ if (visitUnaryFloatCall(I, ISD::FFLOOR))
return;
- }
- } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
- (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
- (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
return;
- }
- } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
- (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
- (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ if (visitUnaryFloatCall(I, ISD::FCEIL))
return;
- }
- } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
- (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
- (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ if (visitUnaryFloatCall(I, ISD::FRINT))
return;
- }
- } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
- (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
- (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ if (visitUnaryFloatCall(I, ISD::FTRUNC))
return;
- }
- } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
- (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
- (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::log2:
+ case LibFunc::log2f:
+ case LibFunc::log2l:
+ if (visitUnaryFloatCall(I, ISD::FLOG2))
return;
- }
- } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
- (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
- (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ case LibFunc::exp2l:
+ if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
- }
- } else if (Name == "memcmp") {
+ break;
+ case LibFunc::memcmp:
if (visitMemCmpCall(I))
return;
+ break;
}
}
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index d0fde6f..4090002 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -520,6 +520,7 @@ private:
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
bool visitMemCmpCall(const CallInst &I);
+ bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
void visitAtomicLoad(const LoadInst &I);
void visitAtomicStore(const StoreInst &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 9fc225f..13cd011 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
case ISD::ConstantPool: return "ConstantPool";
+ case ISD::TargetIndex: return "TargetIndex";
case ISD::ExternalSymbol: return "ExternalSymbol";
case ISD::BlockAddress: return "BlockAddress";
case ISD::INTRINSIC_WO_CHAIN:
@@ -409,6 +410,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << " " << offset;
if (unsigned int TF = CP->getTargetFlags())
OS << " [TF=" << TF << ']';
+ } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) {
+ OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">";
+ if (unsigned TF = TI->getTargetFlags())
+ OS << " [TF=" << TF << ']';
} else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
OS << "<";
const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 287c679..4e5e3ba 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -979,7 +979,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
if (TM.Options.EnableFastISel)
- FastIS = TLI.createFastISel(*FuncInfo);
+ FastIS = TLI.createFastISel(*FuncInfo, LibInfo);
// Iterate over all basic blocks in the function.
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index dff9b2c..6820175 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2303,7 +2303,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOpcode() == ISD::AND)
if (ConstantSDNode *AndRHS =
dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy = DCI.isBeforeLegalize() ?
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
getPointerTy() : getShiftAmountTy(N0.getValueType());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
@@ -2333,7 +2333,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
const APInt &AndRHSC = AndRHS->getAPIntValue();
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
- EVT ShiftTy = DCI.isBeforeLegalize() ?
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
getPointerTy() : getShiftAmountTy(N0.getValueType());
EVT CmpTy = N0.getValueType();
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
@@ -2361,7 +2361,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
NewC = NewC.lshr(ShiftBits);
if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
- EVT ShiftTy = DCI.isBeforeLegalize() ?
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
getPointerTy() : getShiftAmountTy(N0.getValueType());
EVT CmpTy = N0.getValueType();
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
@@ -2464,7 +2464,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
// if it is not already.
ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
- if (NewCond != Cond)
+ if (NewCond != Cond && (DCI.isBeforeLegalizeOps() ||
+ getCondCodeAction(NewCond, N0.getValueType()) == Legal))
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 9a751c1..4a2b7ec 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -652,7 +652,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
// Adjust RegAssign if a register assignment is killed at VNI->def. We
// want to avoid calculating the live range of the source register if
// possible.
- AssignI.find(VNI->def.getPrevSlot());
+ AssignI.find(Def.getPrevSlot());
if (!AssignI.valid() || AssignI.start() >= Def)
continue;
// If MI doesn't kill the assigned register, just leave it.
@@ -739,6 +739,8 @@ void SplitEditor::hoistCopiesForSize() {
for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
VI != VE; ++VI) {
VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
assert(ParentVNI && "Parent not live at complement def");
@@ -812,6 +814,8 @@ void SplitEditor::hoistCopiesForSize() {
for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
VI != VE; ++VI) {
VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
const DomPair &Dom = NearestDom[ParentVNI->id];
if (!Dom.first || Dom.second == VNI->def)
@@ -1047,8 +1051,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
if (ParentVNI->isUnused())
continue;
unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
- VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def);
- VNI->setIsPHIDef(ParentVNI->isPHIDef());
+ defValue(RegIdx, ParentVNI, ParentVNI->def);
// Force rematted values to be recomputed everywhere.
// The new live ranges may be truncated.
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 43a6ad8..a04ac3f 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -28,15 +28,10 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Triple.h"
using namespace llvm;
-// SSPBufferSize - The lower bound for a buffer to be considered for stack
-// smashing protection.
-static cl::opt<unsigned>
-SSPBufferSize("stack-protector-buffer-size", cl::init(8),
- cl::desc("Lower bound for a buffer to be considered for "
- "stack protection"));
-
namespace {
class StackProtector : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -46,7 +41,7 @@ namespace {
Function *F;
Module *M;
- DominatorTree* DT;
+ DominatorTree *DT;
/// InsertStackProtectors - Insert code into the prologue and epilogue of
/// the function.
@@ -60,6 +55,11 @@ namespace {
/// check fails.
BasicBlock *CreateFailBB();
+ /// ContainsProtectableArray - Check whether the type either is an array or
+ /// contains an array of sufficient size so that we need stack protectors
+ /// for it.
+ bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const;
+
/// RequiresStackProtector - Check whether or not this function needs a
/// stack protector based upon the stack protector level.
bool RequiresStackProtector() const;
@@ -70,8 +70,8 @@ namespace {
}
StackProtector(const TargetLowering *tli)
: FunctionPass(ID), TLI(tli) {
- initializeStackProtectorPass(*PassRegistry::getPassRegistry());
- }
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
@@ -95,10 +95,43 @@ bool StackProtector::runOnFunction(Function &Fn) {
DT = getAnalysisIfAvailable<DominatorTree>();
if (!RequiresStackProtector()) return false;
-
+
return InsertStackProtectors();
}
+/// ContainsProtectableArray - Check whether the type either is an array or
+/// contains a char array of sufficient size so that we need stack protectors
+/// for it.
+bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const {
+ if (!Ty) return false;
+ if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ const TargetMachine &TM = TLI->getTargetMachine();
+ if (!AT->getElementType()->isIntegerTy(8)) {
+ Triple Trip(TM.getTargetTriple());
+
+ // If we're on a non-Darwin platform or we're inside of a structure, don't
+ // add stack protectors unless the array is a character array.
+ if (InStruct || !Trip.isOSDarwin())
+ return false;
+ }
+
+ // If an array has more than SSPBufferSize bytes of allocated space, then we
+ // emit stack protectors.
+ if (TM.Options.SSPBufferSize <= TLI->getTargetData()->getTypeAllocSize(AT))
+ return true;
+ }
+
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ if (!ST) return false;
+
+ for (StructType::element_iterator I = ST->element_begin(),
+ E = ST->element_end(); I != E; ++I)
+ if (ContainsProtectableArray(*I, true))
+ return true;
+
+ return false;
+}
+
/// RequiresStackProtector - Check whether or not this function needs a stack
/// protector based upon the stack protector level. The heuristic we use is to
/// add a guard variable to functions that call alloca, and functions with
@@ -110,8 +143,6 @@ bool StackProtector::RequiresStackProtector() const {
if (!F->hasFnAttr(Attribute::StackProtect))
return false;
- const TargetData *TD = TLI->getTargetData();
-
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
BasicBlock *BB = I;
@@ -123,11 +154,8 @@ bool StackProtector::RequiresStackProtector() const {
// protectors.
return true;
- if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType()))
- // If an array has more than SSPBufferSize bytes of allocated space,
- // then we emit stack protectors.
- if (SSPBufferSize <= TD->getTypeAllocSize(AT))
- return true;
+ if (ContainsProtectableArray(AI->getAllocatedType()))
+ return true;
}
}
@@ -159,17 +187,17 @@ bool StackProtector::InsertStackProtectors() {
// StackGuardSlot = alloca i8*
// StackGuard = load __stack_chk_guard
// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
- //
+ //
PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
unsigned AddressSpace, Offset;
if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
Constant *OffsetVal =
ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
-
+
StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
PointerType::get(PtrTy, AddressSpace));
} else {
- StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+ StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
}
BasicBlock &Entry = F->getEntryBlock();
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index c6fdc73..5b06195 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -672,8 +672,8 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
LiveInterval &SrcInterval = LI->getInterval(SrcReg);
SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex);
+ (void)SrcVNI;
assert(SrcVNI);
- SrcVNI->setHasPHIKill(true);
continue;
}
@@ -744,7 +744,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot());
assert(DestVNI);
- DestVNI->setIsPHIDef(true);
// Prior to PHI elimination, the live ranges of PHIs begin at their defining
// instruction. After PHI elimination, PHI instructions are replaced by VNs
@@ -777,7 +776,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
LI->getVNInfoAllocator());
- CopyVNI->setIsPHIDef(true);
CopyLI.addRange(LiveRange(MBBStartIndex,
DestCopyIndex.getRegSlot(),
CopyVNI));
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index a3d6771..ddee6b2 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -570,12 +570,12 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
}
/// Return the default expected latency for a def based on it's opcode.
-unsigned TargetInstrInfo::defaultDefLatency(const InstrItineraryData *ItinData,
+unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
const MachineInstr *DefMI) const {
if (DefMI->mayLoad())
- return ItinData->SchedModel->LoadLatency;
+ return SchedModel->LoadLatency;
if (isHighLatencyDef(DefMI->getOpcode()))
- return ItinData->SchedModel->HighLatency;
+ return SchedModel->HighLatency;
return 1;
}
@@ -638,7 +638,7 @@ static int computeDefOperandLatency(
return 1;
}
else if(ItinData->isEmpty())
- return TII->defaultDefLatency(ItinData, DefMI);
+ return TII->defaultDefLatency(ItinData->SchedModel, DefMI);
// ...operand lookup required
return -1;
@@ -669,7 +669,8 @@ computeOperandLatency(const InstrItineraryData *ItinData,
// Expected latency is the max of the stage latency and itinerary props.
if (!FindMin)
- InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI));
+ InstrLatency = std::max(InstrLatency,
+ defaultDefLatency(ItinData->SchedModel, DefMI));
return InstrLatency;
}
@@ -742,6 +743,7 @@ computeOperandLatency(const InstrItineraryData *ItinData,
// Expected latency is the max of the stage latency and itinerary props.
if (!FindMin)
- InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI));
+ InstrLatency = std::max(InstrLatency,
+ defaultDefLatency(ItinData->SchedModel, DefMI));
return InstrLatency;
}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index e4c0119..aa601af 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -30,6 +30,7 @@
#define DEBUG_TYPE "twoaddrinstr"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -55,18 +56,19 @@ STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
-STATISTIC(NumReMats, "Number of instructions re-materialized");
-STATISTIC(NumDeletes, "Number of dead instructions deleted");
STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up");
STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down");
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
+ MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const InstrItineraryData *InstrItins;
MachineRegisterInfo *MRI;
LiveVariables *LV;
+ SlotIndexes *Indexes;
+ LiveIntervals *LIS;
AliasAnalysis *AA;
CodeGenOpt::Level OptLevel;
@@ -92,16 +94,9 @@ namespace {
unsigned Reg,
MachineBasicBlock::iterator OldPos);
- bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC,
- MachineInstr *MI, MachineInstr *DefMI,
- MachineBasicBlock *MBB, unsigned Loc);
-
bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
unsigned &LastDef);
- MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB,
- unsigned Dist);
-
bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
MachineInstr *MI, MachineBasicBlock *MBB,
unsigned Dist);
@@ -117,14 +112,6 @@ namespace {
MachineFunction::iterator &mbbi,
unsigned RegA, unsigned RegB, unsigned Dist);
- typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
- bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
- SmallVector<NewKill, 4> &NewKills,
- MachineBasicBlock *MBB, unsigned Dist);
- bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- MachineFunction::iterator &mbbi, unsigned Dist);
-
bool isDefTooClose(unsigned Reg, unsigned Dist,
MachineInstr *MI, MachineBasicBlock *MBB);
@@ -150,6 +137,11 @@ namespace {
void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &Processed);
+ typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
+ typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
+ bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
+ void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+
void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
@@ -167,6 +159,8 @@ namespace {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
@@ -241,7 +235,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
// appropriate location, we can try to sink the current instruction
// past it.
if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
- KillMI->isTerminator())
+ KillMI == OldPos || KillMI->isTerminator())
return false;
// If any of the definitions are used by another instruction between the
@@ -284,6 +278,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
}
}
}
+ assert(KillMO && "Didn't find kill");
// Update kill and LV information.
KillMO->setIsKill(false);
@@ -297,59 +292,13 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
MBB->remove(MI);
MBB->insert(KillPos, MI);
+ if (LIS)
+ LIS->handleMove(MI);
+
++Num3AddrSunk;
return true;
}
-/// isTwoAddrUse - Return true if the specified MI is using the specified
-/// register as a two-address operand.
-static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
- const MCInstrDesc &MCID = UseMI->getDesc();
- for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = UseMI->getOperand(i);
- if (MO.isReg() && MO.getReg() == Reg &&
- (MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
- // Earlier use is a two-address one.
- return true;
- }
- return false;
-}
-
-/// isProfitableToReMat - Return true if the heuristics determines it is likely
-/// to be profitable to re-materialize the definition of Reg rather than copy
-/// the register.
-bool
-TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
- const TargetRegisterClass *RC,
- MachineInstr *MI, MachineInstr *DefMI,
- MachineBasicBlock *MBB, unsigned Loc) {
- bool OtherUse = false;
- for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg),
- UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
- MachineOperand &UseMO = UI.getOperand();
- MachineInstr *UseMI = UseMO.getParent();
- MachineBasicBlock *UseMBB = UseMI->getParent();
- if (UseMBB == MBB) {
- DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
- if (DI != DistanceMap.end() && DI->second == Loc)
- continue; // Current use.
- OtherUse = true;
- // There is at least one other use in the MBB that will clobber the
- // register.
- if (isTwoAddrUse(UseMI, Reg))
- return true;
- }
- }
-
- // If other uses in MBB are not two-address uses, then don't remat.
- if (OtherUse)
- return false;
-
- // No other uses in the same block, remat if it's defined in the same
- // block so it does not unnecessarily extend the live range.
- return MBB == DefMI->getParent();
-}
-
/// NoUseAfterLastDef - Return true if there are no intervening uses between the
/// last instruction in the MBB that defines the specified register and the
/// two-address instruction which is being processed. It also returns the last
@@ -377,31 +326,6 @@ bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
return !(LastUse > LastDef && LastUse < Dist);
}
-MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg,
- MachineBasicBlock *MBB,
- unsigned Dist) {
- unsigned LastUseDist = 0;
- MachineInstr *LastUse = 0;
- for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
- E = MRI->reg_end(); I != E; ++I) {
- MachineOperand &MO = I.getOperand();
- MachineInstr *MI = MO.getParent();
- if (MI->getParent() != MBB || MI->isDebugValue())
- continue;
- DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
- if (DI == DistanceMap.end())
- continue;
- if (DI->second >= Dist)
- continue;
-
- if (MO.isUse() && DI->second > LastUseDist) {
- LastUse = DI->first;
- LastUseDist = DI->second;
- }
- }
- return LastUse;
-}
-
/// isCopyToReg - Return true if the specified MI is a copy instruction or
/// a extract_subreg instruction. It also returns the source and destination
/// registers and whether they are physical registers by reference.
@@ -538,7 +462,7 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
}
-/// isProfitableToReMat - Return true if it's potentially profitable to commute
+/// isProfitableToCommute - Return true if it's potentially profitable to commute
/// the two-address instruction that's being processed.
bool
TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
@@ -628,6 +552,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
if (LV)
// Update live variables
LV->replaceKillInstruction(RegC, MI, NewMI);
+ if (Indexes)
+ Indexes->replaceMachineInstrInMaps(MI, NewMI);
mbbi->insert(mi, NewMI); // Insert the new inst
mbbi->erase(mi); // Nuke the old inst.
@@ -676,6 +602,9 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
bool Sunk = false;
+ if (Indexes)
+ Indexes->replaceMachineInstrInMaps(mi, NewMI);
+
if (NewMI->findRegisterUseOperand(RegB, false, TRI))
// FIXME: Temporary workaround. If the new instruction doesn't
// uses RegB, convertToThreeAddress must have created more
@@ -785,92 +714,6 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
return;
}
-/// isSafeToDelete - If the specified instruction does not produce any side
-/// effects and all of its defs are dead, then it's safe to delete.
-static bool isSafeToDelete(MachineInstr *MI,
- const TargetInstrInfo *TII,
- SmallVector<unsigned, 4> &Kills) {
- if (MI->mayStore() || MI->isCall())
- return false;
- if (MI->isTerminator() || MI->hasUnmodeledSideEffects())
- return false;
-
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- if (MO.isDef() && !MO.isDead())
- return false;
- if (MO.isUse() && MO.isKill())
- Kills.push_back(MO.getReg());
- }
- return true;
-}
-
-/// canUpdateDeletedKills - Check if all the registers listed in Kills are
-/// killed by instructions in MBB preceding the current instruction at
-/// position Dist. If so, return true and record information about the
-/// preceding kills in NewKills.
-bool TwoAddressInstructionPass::
-canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
- SmallVector<NewKill, 4> &NewKills,
- MachineBasicBlock *MBB, unsigned Dist) {
- while (!Kills.empty()) {
- unsigned Kill = Kills.back();
- Kills.pop_back();
- if (TargetRegisterInfo::isPhysicalRegister(Kill))
- return false;
-
- MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist);
- if (!LastKill)
- return false;
-
- bool isModRef = LastKill->definesRegister(Kill);
- NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef),
- LastKill));
- }
- return true;
-}
-
-/// DeleteUnusedInstr - If an instruction with a tied register operand can
-/// be safely deleted, just delete it.
-bool
-TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- MachineFunction::iterator &mbbi,
- unsigned Dist) {
- // Check if the instruction has no side effects and if all its defs are dead.
- SmallVector<unsigned, 4> Kills;
- if (!isSafeToDelete(mi, TII, Kills))
- return false;
-
- // If this instruction kills some virtual registers, we need to
- // update the kill information. If it's not possible to do so,
- // then bail out.
- SmallVector<NewKill, 4> NewKills;
- if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist))
- return false;
-
- if (LV) {
- while (!NewKills.empty()) {
- MachineInstr *NewKill = NewKills.back().second;
- unsigned Kill = NewKills.back().first.first;
- bool isDead = NewKills.back().first.second;
- NewKills.pop_back();
- if (LV->removeVirtualRegisterKilled(Kill, mi)) {
- if (isDead)
- LV->addVirtualRegisterDead(Kill, NewKill);
- else
- LV->addVirtualRegisterKilled(Kill, NewKill);
- }
- }
- }
-
- mbbi->erase(mi); // Nuke the old inst.
- mi = nmi;
- return true;
-}
-
/// RescheduleMIBelowKill - If there is one more local instruction that reads
/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
/// instruction in order to eliminate the need for the copy.
@@ -1000,6 +843,8 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
// Update live variables
LV->removeVirtualRegisterKilled(Reg, KillMI);
LV->addVirtualRegisterKilled(Reg, MI);
+ if (LIS)
+ LIS->handleMove(MI);
DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
return true;
@@ -1154,6 +999,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
// Update live variables
LV->removeVirtualRegisterKilled(Reg, KillMI);
LV->addVirtualRegisterKilled(Reg, MI);
+ if (LIS)
+ LIS->handleMove(KillMI);
DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
return true;
@@ -1180,16 +1027,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
assert(TargetRegisterInfo::isVirtualRegister(regB) &&
"cannot make instruction into two-address form");
-
- // If regA is dead and the instruction can be deleted, just delete
- // it so it doesn't clobber regB.
bool regBKilled = isKilled(MI, regB, MRI, TII);
- if (!regBKilled && MI.getOperand(DstIdx).isDead() &&
- DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
- ++NumDeletes;
- DEBUG(dbgs() << "\tdeleted unused instruction.\n");
- return true; // Done with this instruction."
- }
if (TargetRegisterInfo::isVirtualRegister(regA))
ScanUses(regA, &*mbbi, Processed);
@@ -1273,16 +1111,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
if (NewOpc != 0) {
const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
if (UnfoldMCID.getNumDefs() == 1) {
- MachineFunction &MF = *mbbi->getParent();
-
// Unfold the load.
DEBUG(dbgs() << "2addr: UNFOLDING: " << MI);
const TargetRegisterClass *RC =
TRI->getAllocatableClass(
- TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, MF));
+ TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
unsigned Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
- if (!TII->unfoldMemoryOperand(MF, &MI, Reg,
+ if (!TII->unfoldMemoryOperand(*MF, &MI, Reg,
/*UnfoldLoad=*/true,/*UnfoldStore=*/false,
NewMIs)) {
DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
@@ -1359,15 +1195,177 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
return false;
}
+// Collect tied operands of MI that need to be handled.
+// Rewrite trivial cases immediately.
+// Return true if any tied operands where found, including the trivial ones.
+bool TwoAddressInstructionPass::
+collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ bool AnyOps = false;
+ unsigned NumOps = MI->isInlineAsm() ?
+ MI->getNumOperands() : MCID.getNumOperands();
+
+ for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+ unsigned DstIdx = 0;
+ if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx))
+ continue;
+ AnyOps = true;
+ MachineOperand &SrcMO = MI->getOperand(SrcIdx);
+ MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned SrcReg = SrcMO.getReg();
+ unsigned DstReg = DstMO.getReg();
+ // Tied constraint already satisfied?
+ if (SrcReg == DstReg)
+ continue;
+
+ assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
+
+ // Deal with <undef> uses immediately - simply rewrite the src operand.
+ if (SrcMO.isUndef()) {
+ // Constrain the DstReg register class if required.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
+ TRI, *MF))
+ MRI->constrainRegClass(DstReg, RC);
+ SrcMO.setReg(DstReg);
+ DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
+ continue;
+ }
+ TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx));
+ }
+ return AnyOps;
+}
+
+// Process a list of tied MI operands that all use the same source register.
+// The tied pairs are of the form (SrcIdx, DstIdx).
+void
+TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
+ TiedPairList &TiedPairs,
+ unsigned &Dist) {
+ bool IsEarlyClobber = false;
+ bool RemovedKillFlag = false;
+ bool AllUsesCopied = true;
+ unsigned LastCopiedReg = 0;
+ unsigned RegB = 0;
+ for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+ unsigned SrcIdx = TiedPairs[tpi].first;
+ unsigned DstIdx = TiedPairs[tpi].second;
+
+ const MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned RegA = DstMO.getReg();
+ IsEarlyClobber |= DstMO.isEarlyClobber();
+
+ // Grab RegB from the instruction because it may have changed if the
+ // instruction was commuted.
+ RegB = MI->getOperand(SrcIdx).getReg();
+
+ if (RegA == RegB) {
+ // The register is tied to multiple destinations (or else we would
+ // not have continued this far), but this use of the register
+ // already matches the tied destination. Leave it.
+ AllUsesCopied = false;
+ continue;
+ }
+ LastCopiedReg = RegA;
+
+ assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+ "cannot make instruction into two-address form");
+
+#ifndef NDEBUG
+ // First, verify that we don't have a use of "a" in the instruction
+ // (a = b + a for example) because our transformation will not
+ // work. This should never occur because we are in SSA form.
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i)
+ assert(i == DstIdx ||
+ !MI->getOperand(i).isReg() ||
+ MI->getOperand(i).getReg() != RegA);
+#endif
+
+ // Emit a copy.
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), RegA).addReg(RegB);
+
+ // Update DistanceMap.
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ DistanceMap.insert(std::make_pair(PrevMI, Dist));
+ DistanceMap[MI] = ++Dist;
+
+ SlotIndex CopyIdx;
+ if (Indexes)
+ CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot();
+
+ DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
+
+ MachineOperand &MO = MI->getOperand(SrcIdx);
+ assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
+ "inconsistent operand info for 2-reg pass");
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+
+ // Make sure regA is a legal regclass for the SrcIdx operand.
+ if (TargetRegisterInfo::isVirtualRegister(RegA) &&
+ TargetRegisterInfo::isVirtualRegister(RegB))
+ MRI->constrainRegClass(RegA, MRI->getRegClass(RegB));
+
+ MO.setReg(RegA);
+
+ // Propagate SrcRegMap.
+ SrcRegMap[RegA] = RegB;
+ }
+
+
+ if (AllUsesCopied) {
+ if (!IsEarlyClobber) {
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(LastCopiedReg);
+ }
+ }
+ }
+
+ // Update live variables for regB.
+ if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) {
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ LV->addVirtualRegisterKilled(RegB, PrevMI);
+ }
+
+ } else if (RemovedKillFlag) {
+ // Some tied uses of regB matched their destination registers, so
+ // regB is still used in this instruction, but a kill flag was
+ // removed from a different tied use of regB, so now we need to add
+ // a kill flag to one of the remaining uses of regB.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ MO.setIsKill(true);
+ break;
+ }
+ }
+ }
+}
+
/// runOnMachineFunction - Reduce two-address instructions to two operands.
///
-bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
- const TargetMachine &TM = MF.getTarget();
- MRI = &MF.getRegInfo();
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ const TargetMachine &TM = MF->getTarget();
+ MRI = &MF->getRegInfo();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
InstrItins = TM.getInstrItineraryData();
+ Indexes = getAnalysisIfAvailable<SlotIndexes>();
LV = getAnalysisIfAvailable<LiveVariables>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
AA = &getAnalysis<AliasAnalysis>();
OptLevel = TM.getOptLevel();
@@ -1375,20 +1373,15 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
DEBUG(dbgs() << "********** Function: "
- << MF.getFunction()->getName() << '\n');
+ << MF->getFunction()->getName() << '\n');
// This pass takes the function out of SSA form.
MRI->leaveSSA();
- // ReMatRegs - Keep track of the registers whose def's are remat'ed.
- BitVector ReMatRegs(MRI->getNumVirtRegs());
-
- typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
- TiedOperandMap;
- TiedOperandMap TiedOperands(4);
+ TiedOperandMap TiedOperands;
SmallPtrSet<MachineInstr*, 8> Processed;
- for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
mbbi != mbbe; ++mbbi) {
unsigned Dist = 0;
DistanceMap.clear();
@@ -1407,50 +1400,21 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
if (mi->isRegSequence())
RegSequences.push_back(&*mi);
- const MCInstrDesc &MCID = mi->getDesc();
- bool FirstTied = true;
-
DistanceMap.insert(std::make_pair(mi, ++Dist));
ProcessCopy(&*mi, &*mbbi, Processed);
// First scan through all the tied register uses in this instruction
// and record a list of pairs of tied operands for each register.
- unsigned NumOps = mi->isInlineAsm()
- ? mi->getNumOperands() : MCID.getNumOperands();
- for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
- unsigned DstIdx = 0;
- if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
- continue;
-
- if (FirstTied) {
- FirstTied = false;
- ++NumTwoAddressInstrs;
- DEBUG(dbgs() << '\t' << *mi);
- }
-
- assert(mi->getOperand(SrcIdx).isReg() &&
- mi->getOperand(SrcIdx).getReg() &&
- mi->getOperand(SrcIdx).isUse() &&
- "two address instruction invalid");
-
- unsigned regB = mi->getOperand(SrcIdx).getReg();
-
- // Deal with <undef> uses immediately - simply rewrite the src operand.
- if (mi->getOperand(SrcIdx).isUndef()) {
- unsigned DstReg = mi->getOperand(DstIdx).getReg();
- // Constrain the DstReg register class if required.
- if (TargetRegisterInfo::isVirtualRegister(DstReg))
- if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
- TRI, MF))
- MRI->constrainRegClass(DstReg, RC);
- mi->getOperand(SrcIdx).setReg(DstReg);
- DEBUG(dbgs() << "\t\trewrite undef:\t" << *mi);
- continue;
- }
- TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx));
+ if (!collectTiedOperands(mi, TiedOperands)) {
+ mi = nmi;
+ continue;
}
+ ++NumTwoAddressInstrs;
+ MadeChange = true;
+ DEBUG(dbgs() << '\t' << *mi);
+
// If the instruction has a single pair of tied operands, try some
// transformations that may either eliminate the tied operands or
// improve the opportunities for coalescing away the register copy.
@@ -1477,139 +1441,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
// Now iterate over the information collected above.
for (TiedOperandMap::iterator OI = TiedOperands.begin(),
OE = TiedOperands.end(); OI != OE; ++OI) {
- SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;
-
- bool IsEarlyClobber = false;
- bool RemovedKillFlag = false;
- bool AllUsesCopied = true;
- unsigned LastCopiedReg = 0;
- unsigned regB = OI->first;
- for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
- unsigned SrcIdx = TiedPairs[tpi].first;
- unsigned DstIdx = TiedPairs[tpi].second;
-
- const MachineOperand &DstMO = mi->getOperand(DstIdx);
- unsigned regA = DstMO.getReg();
- IsEarlyClobber |= DstMO.isEarlyClobber();
-
- // Grab regB from the instruction because it may have changed if the
- // instruction was commuted.
- regB = mi->getOperand(SrcIdx).getReg();
-
- if (regA == regB) {
- // The register is tied to multiple destinations (or else we would
- // not have continued this far), but this use of the register
- // already matches the tied destination. Leave it.
- AllUsesCopied = false;
- continue;
- }
- LastCopiedReg = regA;
-
- assert(TargetRegisterInfo::isVirtualRegister(regB) &&
- "cannot make instruction into two-address form");
-
-#ifndef NDEBUG
- // First, verify that we don't have a use of "a" in the instruction
- // (a = b + a for example) because our transformation will not
- // work. This should never occur because we are in SSA form.
- for (unsigned i = 0; i != mi->getNumOperands(); ++i)
- assert(i == DstIdx ||
- !mi->getOperand(i).isReg() ||
- mi->getOperand(i).getReg() != regA);
-#endif
-
- // Emit a copy or rematerialize the definition.
- bool isCopy = false;
- const TargetRegisterClass *rc = MRI->getRegClass(regB);
- MachineInstr *DefMI = MRI->getUniqueVRegDef(regB);
- // If it's safe and profitable, remat the definition instead of
- // copying it.
- if (DefMI &&
- DefMI->isAsCheapAsAMove() &&
- DefMI->isSafeToReMat(TII, AA, regB) &&
- isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
- DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
- unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
- TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
- ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB));
- ++NumReMats;
- } else {
- BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
- regA).addReg(regB);
- isCopy = true;
- }
-
- // Update DistanceMap.
- MachineBasicBlock::iterator prevMI = prior(mi);
- DistanceMap.insert(std::make_pair(prevMI, Dist));
- DistanceMap[mi] = ++Dist;
-
- DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI);
-
- MachineOperand &MO = mi->getOperand(SrcIdx);
- assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
- "inconsistent operand info for 2-reg pass");
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
- }
-
- // Make sure regA is a legal regclass for the SrcIdx operand.
- if (TargetRegisterInfo::isVirtualRegister(regA) &&
- TargetRegisterInfo::isVirtualRegister(regB))
- MRI->constrainRegClass(regA, MRI->getRegClass(regB));
-
- MO.setReg(regA);
-
- if (isCopy)
- // Propagate SrcRegMap.
- SrcRegMap[regA] = regB;
- }
-
- if (AllUsesCopied) {
- if (!IsEarlyClobber) {
- // Replace other (un-tied) uses of regB with LastCopiedReg.
- for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
- }
- MO.setReg(LastCopiedReg);
- }
- }
- }
-
- // Update live variables for regB.
- if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi))
- LV->addVirtualRegisterKilled(regB, prior(mi));
-
- } else if (RemovedKillFlag) {
- // Some tied uses of regB matched their destination registers, so
- // regB is still used in this instruction, but a kill flag was
- // removed from a different tied use of regB, so now we need to add
- // a kill flag to one of the remaining uses of regB.
- for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
- MO.setIsKill(true);
- break;
- }
- }
- }
-
- // We didn't change anything if there was a single tied pair, and that
- // pair didn't require copies.
- if (AllUsesCopied || TiedPairs.size() > 1) {
- MadeChange = true;
-
- // Schedule the source copy / remat inserted to form two-address
- // instruction. FIXME: Does it matter the distance map may not be
- // accurate after it's scheduled?
- TII->scheduleTwoAddrSource(prior(mi), mi, *TRI);
- }
-
+ processTiedPairs(mi, OI->second, Dist);
DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
}
@@ -1634,15 +1466,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
}
}
- // Some remat'ed instructions are dead.
- for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) {
- unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
- if (MRI->use_nodbg_empty(VReg)) {
- MachineInstr *DefMI = MRI->getVRegDef(VReg);
- DefMI->eraseFromParent();
- }
- }
-
// Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
// SSA form. It's now safe to de-SSA.
MadeChange |= EliminateRegSequences();
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index a4e0d8e..797662b 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -167,9 +167,7 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address,
const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu);
if (lineTable) {
// Get the index of the row we're looking for in the line table.
- uint64_t hiPC = cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(
- cu, DW_AT_high_pc, -1ULL);
- uint32_t rowIndex = lineTable->lookupAddress(address, hiPC);
+ uint32_t rowIndex = lineTable->lookupAddress(address);
if (rowIndex != -1U) {
const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex];
// Take file/line info from the line table.
diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp
index 117fa31..d99575d 100644
--- a/lib/DebugInfo/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARFDebugLine.cpp
@@ -95,14 +95,46 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const {
DWARFDebugLine::State::~State() {}
void DWARFDebugLine::State::appendRowToMatrix(uint32_t offset) {
+ if (Sequence::Empty) {
+ // Record the beginning of instruction sequence.
+ Sequence::Empty = false;
+ Sequence::LowPC = Address;
+ Sequence::FirstRowIndex = row;
+ }
++row; // Increase the row number.
LineTable::appendRow(*this);
+ if (EndSequence) {
+ // Record the end of instruction sequence.
+ Sequence::HighPC = Address;
+ Sequence::LastRowIndex = row;
+ if (Sequence::isValid())
+ LineTable::appendSequence(*this);
+ Sequence::reset();
+ }
Row::postAppend();
}
+void DWARFDebugLine::State::finalize() {
+ row = DoneParsingLineTable;
+ if (!Sequence::Empty) {
+ fprintf(stderr, "warning: last sequence in debug line table is not"
+ "terminated!\n");
+ }
+ // Sort all sequences so that address lookup will work faster.
+ if (!Sequences.empty()) {
+ std::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC);
+ // Note: actually, instruction address ranges of sequences should not
+ // overlap (in shared objects and executables). If they do, the address
+ // lookup would still work, though, but result would be ambiguous.
+ // We don't report warning in this case. For example,
+ // sometimes .so compiled from multiple object files contains a few
+ // rudimentary sequences for address ranges [0x0, 0xsomething).
+ }
+}
+
DWARFDebugLine::DumpingState::~DumpingState() {}
-void DWARFDebugLine::DumpingState::finalize(uint32_t offset) {
+void DWARFDebugLine::DumpingState::finalize() {
LineTable::dump(OS);
}
@@ -180,8 +212,9 @@ DWARFDebugLine::parsePrologue(DataExtractor debug_line_data,
fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should"
" have ended at 0x%8.8x but it ended ad 0x%8.8x\n",
prologue_offset, end_prologue_offset, *offset_ptr);
+ return false;
}
- return end_prologue_offset;
+ return true;
}
bool
@@ -430,47 +463,53 @@ DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
}
}
- state.finalize(*offset_ptr);
+ state.finalize();
return end_offset;
}
-static bool findMatchingAddress(const DWARFDebugLine::Row& row1,
- const DWARFDebugLine::Row& row2) {
- return row1.Address < row2.Address;
-}
-
uint32_t
-DWARFDebugLine::LineTable::lookupAddress(uint64_t address,
- uint64_t cu_high_pc) const {
- uint32_t index = UINT32_MAX;
- if (!Rows.empty()) {
- // Use the lower_bound algorithm to perform a binary search since we know
- // that our line table data is ordered by address.
- DWARFDebugLine::Row row;
- row.Address = address;
- typedef std::vector<Row>::const_iterator iterator;
- iterator begin_pos = Rows.begin();
- iterator end_pos = Rows.end();
- iterator pos = std::lower_bound(begin_pos, end_pos, row,
- findMatchingAddress);
- if (pos == end_pos) {
- if (address < cu_high_pc)
- return Rows.size()-1;
- } else {
- // Rely on fact that we are using a std::vector and we can do
- // pointer arithmetic to find the row index (which will be one less
- // that what we found since it will find the first position after
- // the current address) since std::vector iterators are just
- // pointers to the container type.
- index = pos - begin_pos;
- if (pos->Address > address) {
- if (index > 0)
- --index;
- else
- index = UINT32_MAX;
- }
- }
+DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const {
+ uint32_t unknown_index = UINT32_MAX;
+ if (Sequences.empty())
+ return unknown_index;
+ // First, find an instruction sequence containing the given address.
+ DWARFDebugLine::Sequence sequence;
+ sequence.LowPC = address;
+ SequenceIter first_seq = Sequences.begin();
+ SequenceIter last_seq = Sequences.end();
+ SequenceIter seq_pos = std::lower_bound(first_seq, last_seq, sequence,
+ DWARFDebugLine::Sequence::orderByLowPC);
+ DWARFDebugLine::Sequence found_seq;
+ if (seq_pos == last_seq) {
+ found_seq = Sequences.back();
+ } else if (seq_pos->LowPC == address) {
+ found_seq = *seq_pos;
+ } else {
+ if (seq_pos == first_seq)
+ return unknown_index;
+ found_seq = *(seq_pos - 1);
+ }
+ if (!found_seq.containsPC(address))
+ return unknown_index;
+ // Search for instruction address in the rows describing the sequence.
+ // Rows are stored in a vector, so we may use arithmetical operations with
+ // iterators.
+ DWARFDebugLine::Row row;
+ row.Address = address;
+ RowIter first_row = Rows.begin() + found_seq.FirstRowIndex;
+ RowIter last_row = Rows.begin() + found_seq.LastRowIndex;
+ RowIter row_pos = std::lower_bound(first_row, last_row, row,
+ DWARFDebugLine::Row::orderByAddress);
+ if (row_pos == last_row) {
+ return found_seq.LastRowIndex - 1;
+ }
+ uint32_t index = found_seq.FirstRowIndex + (row_pos - first_row);
+ if (row_pos->Address > address) {
+ if (row_pos == first_row)
+ return unknown_index;
+ else
+ index--;
}
- return index; // Failed to find address.
+ return index;
}
diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h
index a8c0669..6382b45 100644
--- a/lib/DebugInfo/DWARFDebugLine.h
+++ b/lib/DebugInfo/DWARFDebugLine.h
@@ -88,6 +88,10 @@ public:
void reset(bool default_is_stmt);
void dump(raw_ostream &OS) const;
+ static bool orderByAddress(const Row& LHS, const Row& RHS) {
+ return LHS.Address < RHS.Address;
+ }
+
// The program-counter value corresponding to a machine instruction
// generated by the compiler.
uint64_t Address;
@@ -125,21 +129,63 @@ public:
EpilogueBegin:1;
};
+ // Represents a series of contiguous machine instructions. Line table for each
+ // compilation unit may consist of multiple sequences, which are not
+ // guaranteed to be in the order of ascending instruction address.
+ struct Sequence {
+ // Sequence describes instructions at address range [LowPC, HighPC)
+ // and is described by line table rows [FirstRowIndex, LastRowIndex).
+ uint64_t LowPC;
+ uint64_t HighPC;
+ unsigned FirstRowIndex;
+ unsigned LastRowIndex;
+ bool Empty;
+
+ Sequence() { reset(); }
+ void reset() {
+ LowPC = 0;
+ HighPC = 0;
+ FirstRowIndex = 0;
+ LastRowIndex = 0;
+ Empty = true;
+ }
+ static bool orderByLowPC(const Sequence& LHS, const Sequence& RHS) {
+ return LHS.LowPC < RHS.LowPC;
+ }
+ bool isValid() const {
+ return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex);
+ }
+ bool containsPC(uint64_t pc) const {
+ return (LowPC <= pc && pc < HighPC);
+ }
+ };
+
struct LineTable {
void appendRow(const DWARFDebugLine::Row &state) { Rows.push_back(state); }
+ void appendSequence(const DWARFDebugLine::Sequence &sequence) {
+ Sequences.push_back(sequence);
+ }
void clear() {
Prologue.clear();
Rows.clear();
+ Sequences.clear();
}
- uint32_t lookupAddress(uint64_t address, uint64_t cu_high_pc) const;
+ // Returns the index of the row with file/line info for a given address,
+ // or -1 if there is no such row.
+ uint32_t lookupAddress(uint64_t address) const;
void dump(raw_ostream &OS) const;
struct Prologue Prologue;
- std::vector<Row> Rows;
+ typedef std::vector<Row> RowVector;
+ typedef RowVector::const_iterator RowIter;
+ typedef std::vector<Sequence> SequenceVector;
+ typedef SequenceVector::const_iterator SequenceIter;
+ RowVector Rows;
+ SequenceVector Sequences;
};
- struct State : public Row, public LineTable {
+ struct State : public Row, public Sequence, public LineTable {
// Special row codes.
enum {
StartParsingLineTable = 0,
@@ -150,8 +196,11 @@ public:
virtual ~State();
virtual void appendRowToMatrix(uint32_t offset);
- virtual void finalize(uint32_t offset) { row = DoneParsingLineTable; }
- virtual void reset() { Row::reset(Prologue.DefaultIsStmt); }
+ virtual void finalize();
+ virtual void reset() {
+ Row::reset(Prologue.DefaultIsStmt);
+ Sequence::reset();
+ }
// The row number that starts at zero for the prologue, and increases for
// each row added to the matrix.
@@ -161,7 +210,7 @@ public:
struct DumpingState : public State {
DumpingState(raw_ostream &OS) : OS(OS) {}
virtual ~DumpingState();
- virtual void finalize(uint32_t offset);
+ virtual void finalize();
private:
raw_ostream &OS;
};
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index a744d0c..4afc900 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -501,7 +501,8 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
return 0;
}
- if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0) {
+ if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0 &&
+ ExecutionEngine::MCJITCtor == 0) {
if (ErrorStr)
*ErrorStr = "JIT has not been linked in.";
}
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index a942299..97995ad 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -361,7 +361,7 @@ bool JIT::removeModule(Module *M) {
MutexGuard locked(lock);
- if (jitstate->getModule() == M) {
+ if (jitstate && jitstate->getModule() == M) {
delete jitstate;
jitstate = 0;
}
@@ -433,13 +433,18 @@ GenericValue JIT::runFunction(Function *F,
}
break;
case 1:
- if (FTy->getNumParams() == 1 &&
- FTy->getParamType(0)->isIntegerTy(32)) {
+ if (FTy->getParamType(0)->isIntegerTy(32)) {
GenericValue rv;
int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
return rv;
}
+ if (FTy->getParamType(0)->isPointerTy()) {
+ GenericValue rv;
+ int (*PF)(char *) = (int(*)(char *))(intptr_t)FPtr;
+ rv.IntVal = APInt(32, PF((char*)GVTOP(ArgValues[0])));
+ return rv;
+ }
break;
}
}
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 7be6ef8..61bc119 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -461,6 +461,9 @@ namespace {
/// allocateCodeSection - Allocate memory for a code section.
uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID) {
+ // Grow the required block size to account for the block header
+ Size += sizeof(*CurBlock);
+
// FIXME: Alignement handling.
FreeRangeHeader* candidateBlock = FreeMemoryList;
FreeRangeHeader* head = FreeMemoryList;
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 84274c0..99c65ec 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -18,6 +18,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MutexGuard.h"
#include "llvm/Target/TargetData.h"
using namespace llvm;
@@ -43,20 +44,40 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
// FIXME: Don't do this here.
sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
- // If the target supports JIT code generation, create the JIT.
- if (TargetJITInfo *TJ = TM->getJITInfo())
- return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM), GVsWithCode);
-
- if (ErrorStr)
- *ErrorStr = "target does not support JIT code generation";
- return 0;
+ return new MCJIT(M, TM, new MCJITMemoryManager(JMM), GVsWithCode);
}
-MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
- RTDyldMemoryManager *MM, bool AllocateGVsWithCode)
- : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) {
+MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM,
+ bool AllocateGVsWithCode)
+ : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM),
+ isCompiled(false), M(m), OS(Buffer) {
setTargetData(TM->getTargetData());
+}
+
+MCJIT::~MCJIT() {
+ delete MemMgr;
+ delete TM;
+}
+
+void MCJIT::emitObject(Module *m) {
+ /// Currently, MCJIT only supports a single module and the module passed to
+ /// this function call is expected to be the contained module. The module
+ /// is passed as a parameter here to prepare for multiple module support in
+ /// the future.
+ assert(M == m);
+
+ // Get a thread lock to make sure we aren't trying to compile multiple times
+ MutexGuard locked(lock);
+
+ // FIXME: Track compilation state on a per-module basis when multiple modules
+ // are supported.
+ // Re-compilation is not supported
+ if (isCompiled)
+ return;
+
+ PassManager PM;
+
PM.add(new TargetData(*TM->getTargetData()));
// Turn the machine code intermediate representation into bytes in memory
@@ -69,23 +90,22 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
// FIXME: When we support multiple modules, we'll want to move the code
// gen and finalization out of the constructor here and do it more
// on-demand as part of getPointerToFunction().
- PM.run(*M);
+ PM.run(*m);
// Flush the output buffer so the SmallVector gets its data.
OS.flush();
// Load the object into the dynamic linker.
- MemoryBuffer *MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(),
+ MemoryBuffer* MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(),
Buffer.size()),
"", false);
if (Dyld.loadObject(MB))
report_fatal_error(Dyld.getErrorString());
+
// Resolve any relocations.
Dyld.resolveRelocations();
-}
-MCJIT::~MCJIT() {
- delete MemMgr;
- delete TM;
+ // FIXME: Add support for per-module compilation state
+ isCompiled = true;
}
void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
@@ -93,6 +113,10 @@ void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
}
void *MCJIT::getPointerToFunction(Function *F) {
+ // FIXME: Add support for per-module compilation state
+ if (!isCompiled)
+ emitObject(M);
+
if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
bool AbortOnFailure = !F->hasExternalWeakLinkage();
void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
@@ -100,6 +124,7 @@ void *MCJIT::getPointerToFunction(Function *F) {
return Addr;
}
+ // FIXME: Should the Dyld be retaining module information? Probably not.
// FIXME: Should we be using the mangler for this? Probably.
StringRef BaseName = F->getName();
if (BaseName[0] == '\1')
@@ -218,6 +243,10 @@ GenericValue MCJIT::runFunction(Function *F,
void *MCJIT::getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure) {
+ // FIXME: Add support for per-module compilation state
+ if (!isCompiled)
+ emitObject(M);
+
if (!isSymbolSearchingDisabled() && MemMgr) {
void *ptr = MemMgr->getPointerToNamedFunction(Name, false);
if (ptr)
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 2b3df98..138a7b6 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -23,23 +23,22 @@ namespace llvm {
// blah blah. Purely in get-it-up-and-limping mode for now.
class MCJIT : public ExecutionEngine {
- MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji,
- RTDyldMemoryManager *MemMgr, bool AllocateGVsWithCode);
+ MCJIT(Module *M, TargetMachine *tm, RTDyldMemoryManager *MemMgr,
+ bool AllocateGVsWithCode);
TargetMachine *TM;
MCContext *Ctx;
RTDyldMemoryManager *MemMgr;
+ RuntimeDyld Dyld;
- // FIXME: These may need moved to a separate 'jitstate' member like the
- // non-MC JIT does for multithreading and such. Just keep them here for now.
- PassManager PM;
+ // FIXME: Add support for multiple modules
+ bool isCompiled;
Module *M;
- // FIXME: This really doesn't belong here.
+
+ // FIXME: Move these to a single container which manages JITed objects
SmallVector<char, 4096> Buffer; // Working buffer into which we JIT.
raw_svector_ostream OS;
- RuntimeDyld Dyld;
-
public:
~MCJIT();
@@ -91,6 +90,14 @@ public:
TargetMachine *TM);
// @}
+
+protected:
+ /// emitObject -- Generate a JITed object in memory from the specified module
+ /// Currently, MCJIT only supports a single module and the module passed to
+ /// this function call is expected to be the contained module. The module
+ /// is passed as a parameter here to prepare for multiple module support in
+ /// the future.
+ void emitObject(Module *M);
};
} // End llvm namespace
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index b464040..a98ddc0 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -108,7 +108,8 @@ bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) {
CommonSymbols[*i] = Size;
} else {
if (SymType == object::SymbolRef::ST_Function ||
- SymType == object::SymbolRef::ST_Data) {
+ SymType == object::SymbolRef::ST_Data ||
+ SymType == object::SymbolRef::ST_Unknown) {
uint64_t FileOffset;
StringRef SectionData;
section_iterator si = obj->end_sections();
@@ -333,15 +334,31 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,
}
uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
- // TODO: There is only ARM far stub now. We should add the Thumb stub,
- // and stubs for branches Thumb - ARM and ARM - Thumb.
if (Arch == Triple::arm) {
+ // TODO: There is only ARM far stub now. We should add the Thumb stub,
+ // and stubs for branches Thumb - ARM and ARM - Thumb.
uint32_t *StubAddr = (uint32_t*)Addr;
*StubAddr = 0xe51ff004; // ldr pc,<label>
return (uint8_t*)++StubAddr;
- }
- else
+ } else if (Arch == Triple::mipsel) {
+ uint32_t *StubAddr = (uint32_t*)Addr;
+ // 0: 3c190000 lui t9,%hi(addr).
+ // 4: 27390000 addiu t9,t9,%lo(addr).
+ // 8: 03200008 jr t9.
+ // c: 00000000 nop.
+ const unsigned LuiT9Instr = 0x3c190000, AdduiT9Instr = 0x27390000;
+ const unsigned JrT9Instr = 0x03200008, NopInstr = 0x0;
+
+ *StubAddr = LuiT9Instr;
+ StubAddr++;
+ *StubAddr = AdduiT9Instr;
+ StubAddr++;
+ *StubAddr = JrT9Instr;
+ StubAddr++;
+ *StubAddr = NopInstr;
return Addr;
+ }
+ return Addr;
}
// Assign an address to a symbol name and resolve all the relocations
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 39aed34..0aea598 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -55,7 +55,7 @@ public:
const MemoryBuffer& getBuffer() const { return *InputData; }
- // Methods for type inquiry through isa, cast, and dyn_cast
+ // Methods for type inquiry through isa, cast and dyn_cast
static inline bool classof(const Binary *v) {
return (isa<ELFObjectFile<target_endianness, is64Bits> >(v)
&& classof(cast<ELFObjectFile<target_endianness, is64Bits> >(v)));
@@ -208,10 +208,9 @@ void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress,
case ELF::R_X86_64_32:
case ELF::R_X86_64_32S: {
Value += Addend;
- // FIXME: Handle the possibility of this assertion failing
- assert((Type == ELF::R_X86_64_32 && !(Value & 0xFFFFFFFF00000000ULL)) ||
- (Type == ELF::R_X86_64_32S &&
- (Value & 0xFFFFFFFF00000000ULL) == 0xFFFFFFFF00000000ULL));
+ assert((Type == ELF::R_X86_64_32 && (Value <= UINT32_MAX)) ||
+ (Type == ELF::R_X86_64_32S &&
+ ((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN)));
uint32_t TruncatedAddr = (Value & 0xFFFFFFFF);
uint32_t *Target = reinterpret_cast<uint32_t*>(LocalAddress);
*Target = TruncatedAddr;
@@ -220,7 +219,7 @@ void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress,
case ELF::R_X86_64_PC32: {
uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress);
int64_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
- assert(RealOffset <= 214783647 && RealOffset >= -214783648);
+ assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN);
int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
*Placeholder = TruncOffset;
break;
@@ -248,7 +247,7 @@ void RuntimeDyldELF::resolveX86Relocation(uint8_t *LocalAddress,
}
default:
// There are other relocation types, but it appears these are the
- // only ones currently used by the LLVM ELF object writer
+ // only ones currently used by the LLVM ELF object writer
llvm_unreachable("Relocation type not implemented yet!");
break;
}
@@ -307,6 +306,44 @@ void RuntimeDyldELF::resolveARMRelocation(uint8_t *LocalAddress,
}
}
+void RuntimeDyldELF::resolveMIPSRelocation(uint8_t *LocalAddress,
+ uint32_t FinalAddress,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend) {
+ uint32_t* TargetPtr = (uint32_t*)LocalAddress;
+ Value += Addend;
+
+ DEBUG(dbgs() << "resolveMipselocation, LocalAddress: " << LocalAddress
+ << " FinalAddress: " << format("%p",FinalAddress)
+ << " Value: " << format("%x",Value)
+ << " Type: " << format("%x",Type)
+ << " Addend: " << format("%x",Addend)
+ << "\n");
+
+ switch(Type) {
+ default:
+ llvm_unreachable("Not implemented relocation type!");
+ break;
+ case ELF::R_MIPS_32:
+ *TargetPtr = Value + (*TargetPtr);
+ break;
+ case ELF::R_MIPS_26:
+ *TargetPtr = ((*TargetPtr) & 0xfc000000) | (( Value & 0x0fffffff) >> 2);
+ break;
+ case ELF::R_MIPS_HI16:
+ // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+ Value += ((*TargetPtr) & 0x0000ffff) << 16;
+ *TargetPtr = ((*TargetPtr) & 0xffff0000) |
+ (((Value + 0x8000) >> 16) & 0xffff);
+ break;
+ case ELF::R_MIPS_LO16:
+ Value += ((*TargetPtr) & 0x0000ffff);
+ *TargetPtr = ((*TargetPtr) & 0xffff0000) | (Value & 0xffff);
+ break;
+ }
+}
+
void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress,
uint64_t FinalAddress,
uint64_t Value,
@@ -327,6 +364,12 @@ void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress,
(uint32_t)(Value & 0xffffffffL), Type,
(uint32_t)(Addend & 0xffffffffL));
break;
+ case Triple::mips: // Fall through.
+ case Triple::mipsel:
+ resolveMIPSRelocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL),
+ (uint32_t)(Value & 0xffffffffL), Type,
+ (uint32_t)(Addend & 0xffffffffL));
+ break;
default: llvm_unreachable("Unsupported CPU type!");
}
}
@@ -424,6 +467,53 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
Section.StubOffset, RelType, 0);
Section.StubOffset += getMaxStubSize();
}
+ } else if (Arch == Triple::mipsel && RelType == ELF::R_MIPS_26) {
+ // This is an Mips branch relocation, need to use a stub function.
+ DEBUG(dbgs() << "\t\tThis is a Mips branch relocation.");
+ SectionEntry &Section = Sections[Rel.SectionID];
+ uint8_t *Target = Section.Address + Rel.Offset;
+ uint32_t *TargetAddress = (uint32_t *)Target;
+
+ // Extract the addend from the instruction.
+ uint32_t Addend = ((*TargetAddress) & 0x03ffffff) << 2;
+
+ Value.Addend += Addend;
+
+ // Look up for existing stub.
+ StubMap::const_iterator i = Stubs.find(Value);
+ if (i != Stubs.end()) {
+ resolveRelocation(Target, (uint64_t)Target,
+ (uint64_t)Section.Address +
+ i->second, RelType, 0);
+ DEBUG(dbgs() << " Stub function found\n");
+ } else {
+ // Create a new stub function.
+ DEBUG(dbgs() << " Create a new stub function\n");
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+ Section.StubOffset);
+
+ // Creating Hi and Lo relocations for the filled stub instructions.
+ RelocationEntry REHi(Rel.SectionID,
+ StubTargetAddr - Section.Address,
+ ELF::R_MIPS_HI16, Value.Addend);
+ RelocationEntry RELo(Rel.SectionID,
+ StubTargetAddr - Section.Address + 4,
+ ELF::R_MIPS_LO16, Value.Addend);
+
+ if (Value.SymbolName) {
+ addRelocationForSymbol(REHi, Value.SymbolName);
+ addRelocationForSymbol(RELo, Value.SymbolName);
+ } else {
+ addRelocationForSection(REHi, Value.SectionID);
+ addRelocationForSection(RELo, Value.SectionID);
+ }
+
+ resolveRelocation(Target, (uint64_t)Target,
+ (uint64_t)Section.Address +
+ Section.StubOffset, RelType, 0);
+ Section.StubOffset += getMaxStubSize();
+ }
} else {
RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
if (Value.SymbolName)
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index e413f78..eade49e 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -42,6 +42,12 @@ protected:
uint32_t Type,
int32_t Addend);
+ void resolveMIPSRelocation(uint8_t *LocalAddress,
+ uint32_t FinalAddress,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend);
+
virtual void resolveRelocation(uint8_t *LocalAddress,
uint64_t FinalAddress,
uint64_t Value,
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index c38ca69..3d89994 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -161,6 +161,8 @@ protected:
inline unsigned getMaxStubSize() {
if (Arch == Triple::arm || Arch == Triple::thumb)
return 8; // 32-bit instruction and 32-bit address
+ else if (Arch == Triple::mipsel)
+ return 16;
else
return 0;
}
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index afba2e8..a6599bf 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -16,6 +16,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/Module.h"
+#include "llvm/TypeFinder.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
@@ -595,13 +596,13 @@ void ModuleLinker::computeTypeMapping() {
// At this point, the destination module may have a type "%foo = { i32 }" for
// example. When the source module got loaded into the same LLVMContext, if
// it had the same type, it would have been renamed to "%foo.42 = { i32 }".
- std::vector<StructType*> SrcStructTypes;
- SrcM->findUsedStructTypes(SrcStructTypes, true);
+ TypeFinder SrcStructTypes;
+ SrcStructTypes.run(*SrcM, true);
SmallPtrSet<StructType*, 32> SrcStructTypesSet(SrcStructTypes.begin(),
SrcStructTypes.end());
- std::vector<StructType*> DstStructTypes;
- DstM->findUsedStructTypes(DstStructTypes, true);
+ TypeFinder DstStructTypes;
+ DstStructTypes.run(*DstM, true);
SmallPtrSet<StructType*, 32> DstStructTypesSet(DstStructTypes.begin(),
DstStructTypes.end());
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index f11e686..99bff96 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -28,6 +28,7 @@ add_llvm_library(LLVMMC
MCObjectStreamer.cpp
MCObjectWriter.cpp
MCPureStreamer.cpp
+ MCRegisterInfo.cpp
MCSection.cpp
MCSectionCOFF.cpp
MCSectionELF.cpp
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 0aa0c98..b7d2c28 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -27,6 +27,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/LEB128.h"
using namespace llvm;
@@ -719,9 +720,9 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
Data.clear();
raw_svector_ostream OSE(Data);
if (LF.isSigned())
- MCObjectWriter::EncodeSLEB128(Value, OSE);
+ encodeSLEB128(Value, OSE);
else
- MCObjectWriter::EncodeULEB128(Value, OSE);
+ encodeULEB128(Value, OSE);
OSE.flush();
return OldSize != LF.getContents().size();
}
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 75eaf80..4c63e43 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/ADT/Hashing.h"
@@ -361,7 +362,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
OS << char(dwarf::DW_LNS_const_add_pc);
else {
OS << char(dwarf::DW_LNS_advance_pc);
- MCObjectWriter::EncodeULEB128(AddrDelta, OS);
+ encodeULEB128(AddrDelta, OS);
}
OS << char(dwarf::DW_LNS_extended_op);
OS << char(1);
@@ -376,7 +377,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
// it with DW_LNS_advance_line.
if (Temp >= DWARF2_LINE_RANGE) {
OS << char(dwarf::DW_LNS_advance_line);
- MCObjectWriter::EncodeSLEB128(LineDelta, OS);
+ encodeSLEB128(LineDelta, OS);
LineDelta = 0;
Temp = 0 - DWARF2_LINE_BASE;
@@ -412,7 +413,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
// Otherwise use DW_LNS_advance_pc.
OS << char(dwarf::DW_LNS_advance_pc);
- MCObjectWriter::EncodeULEB128(AddrDelta, OS);
+ encodeULEB128(AddrDelta, OS);
if (NeedCopy)
OS << char(dwarf::DW_LNS_copy);
@@ -1293,20 +1294,17 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
streamer.EmitSymbolValue(&cieStart, 4);
}
- unsigned fdeEncoding = MOFI->getFDEEncoding(UsingCFI);
- unsigned size = getSizeForEncoding(streamer, fdeEncoding);
-
// PC Begin
- unsigned PCBeginEncoding = IsEH ? fdeEncoding :
- (unsigned)dwarf::DW_EH_PE_absptr;
- unsigned PCBeginSize = getSizeForEncoding(streamer, PCBeginEncoding);
- EmitSymbol(streamer, *frame.Begin, PCBeginEncoding, "FDE initial location");
+ unsigned PCEncoding = IsEH ? MOFI->getFDEEncoding(UsingCFI)
+ : (unsigned)dwarf::DW_EH_PE_absptr;
+ unsigned PCSize = getSizeForEncoding(streamer, PCEncoding);
+ EmitSymbol(streamer, *frame.Begin, PCEncoding, "FDE initial location");
// PC Range
const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
*frame.End, 0);
if (verboseAsm) streamer.AddComment("FDE address range");
- streamer.EmitAbsValue(Range, size);
+ streamer.EmitAbsValue(Range, PCSize);
if (IsEH) {
// Augmentation Data Length
@@ -1329,7 +1327,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
// Padding
- streamer.EmitValueToAlignment(PCBeginSize);
+ streamer.EmitValueToAlignment(PCSize);
return fdeEnd;
}
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 4e6a1b9..29b4a94 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -507,15 +507,13 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
PDataSection =
Ctx->getCOFFSection(".pdata",
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getDataRel());
XDataSection =
Ctx->getCOFFSection(".xdata",
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getDataRel());
TLSDataSection =
Ctx->getCOFFSection(".tls$",
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index 030f247..94d7cd6 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -17,40 +17,6 @@ using namespace llvm;
MCObjectWriter::~MCObjectWriter() {
}
-/// Utility function to encode a SLEB128 value.
-void MCObjectWriter::EncodeSLEB128(int64_t Value, raw_ostream &OS) {
- bool More;
- do {
- uint8_t Byte = Value & 0x7f;
- // NOTE: this assumes that this signed shift is an arithmetic right shift.
- Value >>= 7;
- More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) ||
- ((Value == -1) && ((Byte & 0x40) != 0))));
- if (More)
- Byte |= 0x80; // Mark this byte that that more bytes will follow.
- OS << char(Byte);
- } while (More);
-}
-
-/// Utility function to encode a ULEB128 value.
-void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS,
- unsigned Padding) {
- do {
- uint8_t Byte = Value & 0x7f;
- Value >>= 7;
- if (Value != 0 || Padding != 0)
- Byte |= 0x80; // Mark this byte that that more bytes will follow.
- OS << char(Byte);
- } while (Value != 0);
-
- // Pad with 0x80 and emit a null byte at the end.
- if (Padding != 0) {
- for (; Padding != 1; --Padding)
- OS << '\x80';
- OS << '\x00';
- }
-}
-
bool
MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
const MCSymbolRefExpr *A,
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 2daad0a..240c10b 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -46,14 +46,17 @@ namespace {
/// \brief Helper class for tracking macro definitions.
typedef std::vector<AsmToken> MacroArgument;
+typedef std::vector<MacroArgument> MacroArguments;
+typedef StringRef MacroParameter;
+typedef std::vector<MacroParameter> MacroParameters;
struct Macro {
StringRef Name;
StringRef Body;
- std::vector<StringRef> Parameters;
+ MacroParameters Parameters;
public:
- Macro(StringRef N, StringRef B, const std::vector<StringRef> &P) :
+ Macro(StringRef N, StringRef B, const MacroParameters &P) :
Name(N), Body(B), Parameters(P) {}
};
@@ -181,8 +184,8 @@ private:
bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M);
bool expandMacro(raw_svector_ostream &OS, StringRef Body,
- const std::vector<StringRef> &Parameters,
- const std::vector<MacroArgument> &A,
+ const MacroParameters &Parameters,
+ const MacroArguments &A,
const SMLoc &L);
void HandleMacroExit();
@@ -207,7 +210,7 @@ private:
void EatToEndOfStatement();
bool ParseMacroArgument(MacroArgument &MA);
- bool ParseMacroArguments(const Macro *M, std::vector<MacroArgument> &A);
+ bool ParseMacroArguments(const Macro *M, MacroArguments &A);
/// \brief Parse up to the end of statement and a return the contents from the
/// current token until the end of the statement; the current token on exit
@@ -1451,9 +1454,17 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
NewDiag.print(0, OS);
}
+// FIXME: This is mostly duplicated from the function in AsmLexer.cpp. The
+// difference being that that function accepts '@' as part of identifiers and
+// we can't do that. AsmLexer.cpp should probably be changed to handle
+// '@' as a special case when needed.
+static bool isIdentifierChar(char c) {
+ return isalnum(c) || c == '_' || c == '$' || c == '.';
+}
+
bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
- const std::vector<StringRef> &Parameters,
- const std::vector<MacroArgument> &A,
+ const MacroParameters &Parameters,
+ const MacroArguments &A,
const SMLoc &L) {
unsigned NParameters = Parameters.size();
if (NParameters != 0 && NParameters != A.size())
@@ -1515,7 +1526,7 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
Pos += 2;
} else {
unsigned I = Pos + 1;
- while (isalnum(Body[I]) && I + 1 != End)
+ while (isIdentifierChar(Body[I]) && I + 1 != End)
++I;
const char *Begin = Body.data() + Pos +1;
@@ -1555,8 +1566,6 @@ bool AsmParser::ParseMacroArgument(MacroArgument &MA) {
unsigned ParenLevel = 0;
for (;;) {
- SMLoc LastTokenLoc;
-
if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
return TokError("unexpected token in macro instantiation");
@@ -1578,13 +1587,12 @@ bool AsmParser::ParseMacroArgument(MacroArgument &MA) {
Lex();
}
if (ParenLevel != 0)
- return TokError("unbalanced parenthesises in macro argument");
+ return TokError("unbalanced parentheses in macro argument");
return false;
}
// Parse the macro instantiation arguments.
-bool AsmParser::ParseMacroArguments(const Macro *M,
- std::vector<MacroArgument> &A) {
+bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) {
const unsigned NParameters = M ? M->Parameters.size() : 0;
// Parse two kinds of macro invocations:
@@ -1597,8 +1605,8 @@ bool AsmParser::ParseMacroArguments(const Macro *M,
if (ParseMacroArgument(MA))
return true;
- if (!MA.empty())
- A.push_back(MA);
+ A.push_back(MA);
+
if (Lexer.is(AsmToken::EndOfStatement))
return false;
@@ -1615,17 +1623,23 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
if (ActiveMacros.size() == 20)
return TokError("macros cannot be nested more than 20 levels deep");
- std::vector<MacroArgument> MacroArguments;
- if (ParseMacroArguments(M, MacroArguments))
+ MacroArguments A;
+ if (ParseMacroArguments(M, A))
return true;
+ // Remove any trailing empty arguments. Do this after-the-fact as we have
+ // to keep empty arguments in the middle of the list or positionality
+ // gets off. e.g., "foo 1, , 2" vs. "foo 1, 2,"
+ while (!A.empty() && A.back().empty())
+ A.pop_back();
+
// Macro instantiation is lexical, unfortunately. We construct a new buffer
// to hold the macro body with substitutions.
SmallString<256> Buf;
StringRef Body = M->Body;
raw_svector_ostream OS(Buf);
- if (expandMacro(OS, Body, M->Parameters, MacroArguments, getTok().getLoc()))
+ if (expandMacro(OS, Body, M->Parameters, A, getTok().getLoc()))
return true;
// We include the .endmacro in the buffer as our queue to exit the macro
@@ -3065,14 +3079,14 @@ bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive,
SMLoc DirectiveLoc) {
StringRef Name;
if (getParser().ParseIdentifier(Name))
- return TokError("expected identifier in directive");
+ return TokError("expected identifier in '.macro' directive");
- std::vector<StringRef> Parameters;
+ MacroParameters Parameters;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- for(;;) {
- StringRef Parameter;
+ for (;;) {
+ MacroParameter Parameter;
if (getParser().ParseIdentifier(Parameter))
- return TokError("expected identifier in directive");
+ return TokError("expected identifier in '.macro' directive");
Parameters.push_back(Parameter);
if (getLexer().isNot(AsmToken::Comma))
@@ -3126,7 +3140,7 @@ bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive,
/// ::= .endm
/// ::= .endmacro
bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive,
- SMLoc DirectiveLoc) {
+ SMLoc DirectiveLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '" + Directive + "' directive");
@@ -3224,7 +3238,7 @@ Macro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) {
// We Are Anonymous.
StringRef Name;
- std::vector<StringRef> Parameters;
+ MacroParameters Parameters;
return new Macro(Name, Body, Parameters);
}
@@ -3270,8 +3284,8 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) {
// Macro instantiation is lexical, unfortunately. We construct a new buffer
// to hold the macro body with substitutions.
SmallString<256> Buf;
- std::vector<StringRef> Parameters;
- const std::vector<MacroArgument> A;
+ MacroParameters Parameters;
+ MacroArguments A;
raw_svector_ostream OS(Buf);
while (Count--) {
if (expandMacro(OS, M->Body, Parameters, A, getTok().getLoc()))
@@ -3285,8 +3299,8 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) {
/// ParseDirectiveIrp
/// ::= .irp symbol,values
bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
- std::vector<StringRef> Parameters;
- StringRef Parameter;
+ MacroParameters Parameters;
+ MacroParameter Parameter;
if (ParseIdentifier(Parameter))
return TokError("expected identifier in '.irp' directive");
@@ -3298,7 +3312,7 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
Lex();
- std::vector<MacroArgument> A;
+ MacroArguments A;
if (ParseMacroArguments(0, A))
return true;
@@ -3315,9 +3329,8 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
SmallString<256> Buf;
raw_svector_ostream OS(Buf);
- for (std::vector<MacroArgument>::iterator i = A.begin(), e = A.end(); i != e;
- ++i) {
- std::vector<MacroArgument> Args;
+ for (MacroArguments::iterator i = A.begin(), e = A.end(); i != e; ++i) {
+ MacroArguments Args;
Args.push_back(*i);
if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc()))
@@ -3332,8 +3345,8 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
/// ParseDirectiveIrpc
/// ::= .irpc symbol,values
bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
- std::vector<StringRef> Parameters;
- StringRef Parameter;
+ MacroParameters Parameters;
+ MacroParameter Parameter;
if (ParseIdentifier(Parameter))
return TokError("expected identifier in '.irpc' directive");
@@ -3345,7 +3358,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
Lex();
- std::vector<MacroArgument> A;
+ MacroArguments A;
if (ParseMacroArguments(0, A))
return true;
@@ -3371,7 +3384,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
MacroArgument Arg;
Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I+1)));
- std::vector<MacroArgument> Args;
+ MacroArguments Args;
Args.push_back(Arg);
if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc()))
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 5662fea..18033d0 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -50,6 +50,9 @@ public:
AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump");
AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load");
AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePushSection>(".pushsection");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePopSection>(".popsection");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePrevious>(".previous");
AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>(
".secure_log_unique");
AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>(
@@ -112,6 +115,9 @@ public:
bool ParseDirectiveDumpOrLoad(StringRef, SMLoc);
bool ParseDirectiveLsym(StringRef, SMLoc);
bool ParseDirectiveSection(StringRef, SMLoc);
+ bool ParseDirectivePushSection(StringRef, SMLoc);
+ bool ParseDirectivePopSection(StringRef, SMLoc);
+ bool ParseDirectivePrevious(StringRef, SMLoc);
bool ParseDirectiveSecureLogReset(StringRef, SMLoc);
bool ParseDirectiveSecureLogUnique(StringRef, SMLoc);
bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc);
@@ -297,7 +303,7 @@ public:
};
-}
+} // end anonymous namespace
bool DarwinAsmParser::ParseSectionSwitch(const char *Segment,
const char *Section,
@@ -457,6 +463,37 @@ bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
return false;
}
+/// ParseDirectivePushSection:
+/// ::= .pushsection identifier (',' identifier)*
+bool DarwinAsmParser::ParseDirectivePushSection(StringRef S, SMLoc Loc) {
+ getStreamer().PushSection();
+
+ if (ParseDirectiveSection(S, Loc)) {
+ getStreamer().PopSection();
+ return true;
+ }
+
+ return false;
+}
+
+/// ParseDirectivePopSection:
+/// ::= .popsection
+bool DarwinAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
+ if (!getStreamer().PopSection())
+ return TokError(".popsection without corresponding .pushsection");
+ return false;
+}
+
+/// ParseDirectivePrevious:
+/// ::= .previous
+bool DarwinAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
+ const MCSection *PreviousSection = getStreamer().getPreviousSection();
+ if (PreviousSection == NULL)
+ return TokError(".previous without corresponding .section");
+ getStreamer().SwitchSection(PreviousSection);
+ return false;
+}
+
/// ParseDirectiveSecureLogUnique
/// ::= .secure_log_unique ... message ...
bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
@@ -707,4 +744,4 @@ MCAsmParserExtension *createDarwinAsmParser() {
return new DarwinAsmParser;
}
-}
+} // end llvm namespace
diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp
new file mode 100644
index 0000000..4d1aff3
--- /dev/null
+++ b/lib/MC/MCRegisterInfo.cpp
@@ -0,0 +1,71 @@
+//=== MC/MCRegisterInfo.cpp - Target Register Description -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements MCRegisterInfo functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCRegisterInfo.h"
+
+using namespace llvm;
+
+unsigned MCRegisterInfo::getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
+ const MCRegisterClass *RC) const {
+ for (MCSuperRegIterator Supers(Reg, this); Supers.isValid(); ++Supers)
+ if (RC->contains(*Supers) && Reg == getSubReg(*Supers, SubIdx))
+ return *Supers;
+ return 0;
+}
+
+unsigned MCRegisterInfo::getSubReg(unsigned Reg, unsigned Idx) const {
+ // Get a pointer to the corresponding SubRegIndices list. This list has the
+ // name of each sub-register in the same order as MCSubRegIterator.
+ const uint16_t *SRI = SubRegIndices + get(Reg).SubRegIndices;
+ for (MCSubRegIterator Subs(Reg, this); Subs.isValid(); ++Subs, ++SRI)
+ if (*SRI == Idx)
+ return *Subs;
+ return 0;
+}
+
+unsigned MCRegisterInfo::getSubRegIndex(unsigned Reg, unsigned SubReg) const {
+ // Get a pointer to the corresponding SubRegIndices list. This list has the
+ // name of each sub-register in the same order as MCSubRegIterator.
+ const uint16_t *SRI = SubRegIndices + get(Reg).SubRegIndices;
+ for (MCSubRegIterator Subs(Reg, this); Subs.isValid(); ++Subs, ++SRI)
+ if (*Subs == SubReg)
+ return *SRI;
+ return 0;
+}
+
+int MCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs;
+ unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize;
+
+ DwarfLLVMRegPair Key = { RegNum, 0 };
+ const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
+ if (I == M+Size || I->FromReg != RegNum)
+ return -1;
+ return I->ToReg;
+}
+
+int MCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
+ const DwarfLLVMRegPair *M = isEH ? EHDwarf2LRegs : Dwarf2LRegs;
+ unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize;
+
+ DwarfLLVMRegPair Key = { RegNum, 0 };
+ const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
+ assert(I != M+Size && I->FromReg == RegNum && "Invalid RegNum");
+ return I->ToReg;
+}
+
+int MCRegisterInfo::getSEHRegNum(unsigned RegNum) const {
+ const DenseMap<unsigned, int>::const_iterator I = L2SEHRegs.find(RegNum);
+ if (I == L2SEHRegs.end()) return (int)RegNum;
+ return I->second;
+}
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index e363f28..0bac24d 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -15,6 +15,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include <cstdlib>
@@ -94,7 +95,7 @@ void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace,
unsigned Padding) {
SmallString<128> Tmp;
raw_svector_ostream OSE(Tmp);
- MCObjectWriter::EncodeULEB128(Value, OSE, Padding);
+ encodeULEB128(Value, OSE, Padding);
EmitBytes(OSE.str(), AddrSpace);
}
@@ -103,7 +104,7 @@ void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace,
void MCStreamer::EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace) {
SmallString<128> Tmp;
raw_svector_ostream OSE(Tmp);
- MCObjectWriter::EncodeSLEB128(Value, OSE);
+ encodeSLEB128(Value, OSE);
EmitBytes(OSE.str(), AddrSpace);
}
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index 79e66fc..c05b4b1 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -228,8 +228,7 @@ static const MCSection *getWin64EHTableSection(StringRef suffix,
return context.getCOFFSection((".xdata"+suffix).str(),
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getDataRel());
}
@@ -239,8 +238,7 @@ static const MCSection *getWin64EHFuncTableSection(StringRef suffix,
return context.getObjectFileInfo()->getPDataSection();
return context.getCOFFSection((".pdata"+suffix).str(),
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getDataRel());
}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 409d4fb..ed261a4 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1765,6 +1765,50 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
return fs;
}
+/* Rounding-mode corrrect round to integral value. */
+APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
+ opStatus fs;
+ assertArithmeticOK(*semantics);
+
+ // If the exponent is large enough, we know that this value is already
+ // integral, and the arithmetic below would potentially cause it to saturate
+ // to +/-Inf. Bail out early instead.
+ if (exponent+1 >= (int)semanticsPrecision(*semantics))
+ return opOK;
+
+ // The algorithm here is quite simple: we add 2^(p-1), where p is the
+ // precision of our format, and then subtract it back off again. The choice
+ // of rounding modes for the addition/subtraction determines the rounding mode
+ // for our integral rounding as well.
+ // NOTE: When the input value is negative, we do subtraction followed by
+ // addition instead.
+ APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
+ IntegerConstant <<= semanticsPrecision(*semantics)-1;
+ APFloat MagicConstant(*semantics);
+ fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
+ rmNearestTiesToEven);
+ MagicConstant.copySign(*this);
+
+ if (fs != opOK)
+ return fs;
+
+ // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
+ bool inputSign = isNegative();
+
+ fs = add(MagicConstant, rounding_mode);
+ if (fs != opOK && fs != opInexact)
+ return fs;
+
+ fs = subtract(MagicConstant, rounding_mode);
+
+ // Restore the input sign.
+ if (inputSign != isNegative())
+ changeSign();
+
+ return fs;
+}
+
+
/* Comparison requires normalized numbers. */
APFloat::cmpResult
APFloat::compare(const APFloat &rhs) const
@@ -3278,16 +3322,8 @@ APFloat::APFloat(double d) : exponent2(0), sign2(0) {
}
namespace {
- static void append(SmallVectorImpl<char> &Buffer,
- unsigned N, const char *Str) {
- unsigned Start = Buffer.size();
- Buffer.set_size(Start + N);
- memcpy(&Buffer[Start], Str, N);
- }
-
- template <unsigned N>
- void append(SmallVectorImpl<char> &Buffer, const char (&Str)[N]) {
- append(Buffer, N, Str);
+ void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
+ Buffer.append(Str.begin(), Str.end());
}
/// Removes data from the given significand until it is no more
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 9103327..83baf60 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_library(LLVMSupport
Dwarf.cpp
ErrorHandling.cpp
FileUtilities.cpp
+ FileOutputBuffer.cpp
FoldingSet.cpp
FormattedStream.cpp
GraphWriter.cpp
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
index dc21155..3d5cce0 100644
--- a/lib/Support/DataExtractor.cpp
+++ b/lib/Support/DataExtractor.cpp
@@ -139,7 +139,7 @@ uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const {
while (isValidOffset(offset)) {
byte = Data[offset++];
- result |= (byte & 0x7f) << shift;
+ result |= uint64_t(byte & 0x7f) << shift;
shift += 7;
if ((byte & 0x80) == 0)
break;
@@ -160,7 +160,7 @@ int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const {
while (isValidOffset(offset)) {
byte = Data[offset++];
- result |= (byte & 0x7f) << shift;
+ result |= uint64_t(byte & 0x7f) << shift;
shift += 7;
if ((byte & 0x80) == 0)
break;
@@ -168,7 +168,7 @@ int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const {
// Sign bit of byte is 2nd high order bit (0x40)
if (shift < 64 && (byte & 0x40))
- result |= -(1 << shift);
+ result |= -(1ULL << shift);
*offset_ptr = offset;
return result;
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index 9fdb12e..c8e8900 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements a handle way of adding debugging information to your
+// This file implements a handy way of adding debugging information to your
// code, without it being enabled all of the time, and without having to add
// command line options to enable it.
//
@@ -18,8 +18,8 @@
// can specify '-debug-only=foo' to enable JUST the debug information for the
// foo class.
//
-// When compiling in release mode, the -debug-* options and all code in DEBUG()
-// statements disappears, so it does not effect the runtime of the code.
+// When compiling without assertions, the -debug-* options and all code in
+// DEBUG() statements disappears, so it does not affect the runtime of the code.
//
//===----------------------------------------------------------------------===//
@@ -89,11 +89,11 @@ bool llvm::isCurrentDebugType(const char *DebugType) {
return CurrentDebugType.empty() || DebugType == CurrentDebugType;
}
-/// SetCurrentDebugType - Set the current debug type, as if the -debug-only=X
+/// setCurrentDebugType - Set the current debug type, as if the -debug-only=X
/// option were specified. Note that DebugFlag also needs to be set to true for
/// debug output to be produced.
///
-void llvm::SetCurrentDebugType(const char *Type) {
+void llvm::setCurrentDebugType(const char *Type) {
CurrentDebugType = Type;
}
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
new file mode 100644
index 0000000..7dc9587
--- /dev/null
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -0,0 +1,148 @@
+//===- FileOutputBuffer.cpp - File Output Buffer ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility for creating a in-memory buffer that will be written to a file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FileOutputBuffer.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+
+
+namespace llvm {
+
+
+FileOutputBuffer::FileOutputBuffer(uint8_t *Start, uint8_t *End,
+ StringRef Path, StringRef TmpPath)
+ : BufferStart(Start), BufferEnd(End) {
+ FinalPath.assign(Path);
+ TempPath.assign(TmpPath);
+}
+
+
+FileOutputBuffer::~FileOutputBuffer() {
+ // If not already commited, delete buffer and remove temp file.
+ if ( BufferStart != NULL ) {
+ sys::fs::unmap_file_pages((void*)BufferStart, getBufferSize());
+ bool Existed;
+ sys::fs::remove(Twine(TempPath), Existed);
+ }
+}
+
+
+error_code FileOutputBuffer::create(StringRef FilePath,
+ size_t Size,
+ OwningPtr<FileOutputBuffer> &Result,
+ unsigned Flags) {
+ // If file already exists, it must be a regular file (to be mappable).
+ sys::fs::file_status Stat;
+ error_code EC = sys::fs::status(FilePath, Stat);
+ switch (Stat.type()) {
+ case sys::fs::file_type::file_not_found:
+ // If file does not exist, we'll create one.
+ break;
+ case sys::fs::file_type::regular_file: {
+ // If file is not currently writable, error out.
+ // FIXME: There is no sys::fs:: api for checking this.
+ // FIXME: In posix, you use the access() call to check this.
+ }
+ break;
+ default:
+ if (EC)
+ return EC;
+ else
+ return make_error_code(errc::operation_not_permitted);
+ }
+
+ // Delete target file.
+ bool Existed;
+ EC = sys::fs::remove(FilePath, Existed);
+ if (EC)
+ return EC;
+
+ // Create new file in same directory but with random name.
+ SmallString<128> TempFilePath;
+ int FD;
+ EC = sys::fs::unique_file(Twine(FilePath) + ".tmp%%%%%%%",
+ FD, TempFilePath, false, 0644);
+ if (EC)
+ return EC;
+
+ // The unique_file() interface leaks lower layers and returns a file
+ // descriptor. There is no way to directly close it, so use this hack
+ // to hand it off to raw_fd_ostream to close for us.
+ {
+ raw_fd_ostream Dummy(FD, /*shouldClose=*/true);
+ }
+
+ // Resize file to requested initial size
+ EC = sys::fs::resize_file(Twine(TempFilePath), Size);
+ if (EC)
+ return EC;
+
+ // If requested, make the output file executable.
+ if ( Flags & F_executable ) {
+ sys::fs::file_status Stat2;
+ EC = sys::fs::status(Twine(TempFilePath), Stat2);
+ if (EC)
+ return EC;
+
+ sys::fs::perms new_perms = Stat2.permissions();
+ if ( new_perms & sys::fs::owner_read )
+ new_perms |= sys::fs::owner_exe;
+ if ( new_perms & sys::fs::group_read )
+ new_perms |= sys::fs::group_exe;
+ if ( new_perms & sys::fs::others_read )
+ new_perms |= sys::fs::others_exe;
+ new_perms |= sys::fs::add_perms;
+ EC = sys::fs::permissions(Twine(TempFilePath), new_perms);
+ if (EC)
+ return EC;
+ }
+
+ // Memory map new file.
+ void *Base;
+ EC = sys::fs::map_file_pages(Twine(TempFilePath), 0, Size, true, Base);
+ if (EC)
+ return EC;
+
+ // Create FileOutputBuffer object to own mapped range.
+ uint8_t *Start = reinterpret_cast<uint8_t*>(Base);
+ Result.reset(new FileOutputBuffer(Start, Start+Size, FilePath, TempFilePath));
+
+ return error_code::success();
+}
+
+
+error_code FileOutputBuffer::commit(int64_t NewSmallerSize) {
+ // Unmap buffer, letting OS flush dirty pages to file on disk.
+ void *Start = reinterpret_cast<void*>(BufferStart);
+ error_code EC = sys::fs::unmap_file_pages(Start, getBufferSize());
+ if (EC)
+ return EC;
+
+ // If requested, resize file as part of commit.
+ if ( NewSmallerSize != -1 ) {
+ EC = sys::fs::resize_file(Twine(TempPath), NewSmallerSize);
+ if (EC)
+ return EC;
+ }
+
+ // Rename file to final name.
+ return sys::fs::rename(Twine(TempPath), Twine(FinalPath));
+}
+
+
+} // namespace
+
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index da5baab..4e4a026 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -59,7 +59,8 @@ MutexImpl::MutexImpl( bool recursive)
errorcode = pthread_mutexattr_settype(&attr, kind);
assert(errorcode == 0);
-#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && \
+ !defined(__DragonFly__) && !defined(__Bitrig__)
// Make it a process local mutex
errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
assert(errorcode == 0);
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 7b26ea9..cca549d 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -124,6 +124,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
case RTEMS: return "rtems";
case NativeClient: return "nacl";
case CNK: return "cnk";
+ case Bitrig: return "bitrig";
}
llvm_unreachable("Invalid OSType");
@@ -293,6 +294,7 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("rtems", Triple::RTEMS)
.StartsWith("nacl", Triple::NativeClient)
.StartsWith("cnk", Triple::CNK)
+ .StartsWith("bitrig", Triple::Bitrig)
.Default(Triple::UnknownOS);
}
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index b41390a..6bddbdf 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -260,7 +260,7 @@ Path::GetCurrentDirectory() {
return Path(pathname);
}
-#if defined(__FreeBSD__) || defined (__NetBSD__) || \
+#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__)
static int
test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
@@ -329,7 +329,7 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
if (realpath(exe_path, link_path))
return Path(link_path);
}
-#elif defined(__FreeBSD__) || defined (__NetBSD__) || \
+#elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__)
char exe_path[PATH_MAX];
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index 93ccd1a..f59551e 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -50,6 +50,12 @@
#include <limits.h>
#endif
+// Both stdio.h and cstdio are included via different pathes and
+// stdcxx's cstdio doesn't include stdio.h, so it doesn't #undef the macros
+// either.
+#undef ferror
+#undef feof
+
// For GNU Hurd
#if defined(__GNU__) && !defined(PATH_MAX)
# define PATH_MAX 4096
@@ -461,6 +467,118 @@ rety_open_create:
return error_code::success();
}
+error_code mapped_file_region::init(int fd, uint64_t offset) {
+ AutoFD FD(fd);
+
+ // Figure out how large the file is.
+ struct stat FileInfo;
+ if (fstat(fd, &FileInfo) == -1)
+ return error_code(errno, system_category());
+ uint64_t FileSize = FileInfo.st_size;
+
+ if (Size == 0)
+ Size = FileSize;
+ else if (FileSize < Size) {
+ // We need to grow the file.
+ if (ftruncate(fd, Size) == -1)
+ return error_code(errno, system_category());
+ }
+
+ int flags = (Mode == readwrite) ? MAP_SHARED : MAP_PRIVATE;
+ int prot = (Mode == readonly) ? PROT_READ : (PROT_READ | PROT_WRITE);
+#ifdef MAP_FILE
+ flags |= MAP_FILE;
+#endif
+ Mapping = ::mmap(0, Size, prot, flags, fd, offset);
+ if (Mapping == MAP_FAILED)
+ return error_code(errno, system_category());
+ return error_code::success();
+}
+
+mapped_file_region::mapped_file_region(const Twine &path,
+ mapmode mode,
+ uint64_t length,
+ uint64_t offset,
+ error_code &ec)
+ : Mode(mode)
+ , Size(length)
+ , Mapping() {
+ // Make sure that the requested size fits within SIZE_T.
+ if (length > std::numeric_limits<size_t>::max()) {
+ ec = make_error_code(errc::invalid_argument);
+ return;
+ }
+
+ SmallString<128> path_storage;
+ StringRef name = path.toNullTerminatedStringRef(path_storage);
+ int oflags = (mode == readonly) ? O_RDONLY : O_RDWR;
+ int ofd = ::open(name.begin(), oflags);
+ if (ofd == -1) {
+ ec = error_code(errno, system_category());
+ return;
+ }
+
+ ec = init(ofd, offset);
+ if (ec)
+ Mapping = 0;
+}
+
+mapped_file_region::mapped_file_region(int fd,
+ mapmode mode,
+ uint64_t length,
+ uint64_t offset,
+ error_code &ec)
+ : Mode(mode)
+ , Size(length)
+ , Mapping() {
+ // Make sure that the requested size fits within SIZE_T.
+ if (length > std::numeric_limits<size_t>::max()) {
+ ec = make_error_code(errc::invalid_argument);
+ return;
+ }
+
+ ec = init(fd, offset);
+ if (ec)
+ Mapping = 0;
+}
+
+mapped_file_region::~mapped_file_region() {
+ if (Mapping)
+ ::munmap(Mapping, Size);
+}
+
+#if LLVM_USE_RVALUE_REFERENCES
+mapped_file_region::mapped_file_region(mapped_file_region &&other)
+ : Mode(other.Mode), Size(other.Size), Mapping(other.Mapping) {
+ other.Mapping = 0;
+}
+#endif
+
+mapped_file_region::mapmode mapped_file_region::flags() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ return Mode;
+}
+
+uint64_t mapped_file_region::size() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ return Size;
+}
+
+char *mapped_file_region::data() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ assert(Mode != readonly && "Cannot get non const data for readonly mapping!");
+ return reinterpret_cast<char*>(Mapping);
+}
+
+const char *mapped_file_region::const_data() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ return reinterpret_cast<const char*>(Mapping);
+}
+
+int mapped_file_region::alignment() {
+ return Process::GetPageSize();
+}
+
error_code detail::directory_iterator_construct(detail::DirIterState &it,
StringRef path){
SmallString<128> path_null(path);
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 174112e..5204147 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -20,9 +20,10 @@
#ifdef HAVE_SYS_RESOURCE_H
#include <sys/resource.h>
#endif
-// DragonFly BSD has deprecated <malloc.h> for <stdlib.h> instead,
-// Unix.h includes this for us already.
-#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__)
+// DragonFlyBSD, OpenBSD, and Bitrig have deprecated <malloc.h> for
+// <stdlib.h> instead. Unix.h includes this for us already.
+#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__) && \
+ !defined(__OpenBSD__) && !defined(__Bitrig__)
#include <malloc.h>
#endif
#ifdef HAVE_MALLOC_MALLOC_H
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index 66eeab0..696768b 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -22,6 +22,8 @@
#include <sys/stat.h>
#include <sys/types.h>
+#undef max
+
// MinGW doesn't define this.
#ifndef _ERRNO_T_DEFINED
#define _ERRNO_T_DEFINED
@@ -703,6 +705,203 @@ error_code get_magic(const Twine &path, uint32_t len,
return error_code::success();
}
+error_code mapped_file_region::init(int FD, uint64_t Offset) {
+ FileDescriptor = FD;
+ // Make sure that the requested size fits within SIZE_T.
+ if (Size > std::numeric_limits<SIZE_T>::max()) {
+ if (FileDescriptor)
+ _close(FileDescriptor);
+ else
+ ::CloseHandle(FileHandle);
+ return make_error_code(errc::invalid_argument);
+ }
+
+ DWORD flprotect;
+ switch (Mode) {
+ case readonly: flprotect = PAGE_READONLY; break;
+ case readwrite: flprotect = PAGE_READWRITE; break;
+ case priv: flprotect = PAGE_WRITECOPY; break;
+ default: llvm_unreachable("invalid mapping mode");
+ }
+
+ FileMappingHandle = ::CreateFileMapping(FileHandle,
+ 0,
+ flprotect,
+ Size >> 32,
+ Size & 0xffffffff,
+ 0);
+ if (FileMappingHandle == NULL) {
+ error_code ec = windows_error(GetLastError());
+ if (FileDescriptor)
+ _close(FileDescriptor);
+ else
+ ::CloseHandle(FileHandle);
+ return ec;
+ }
+
+ DWORD dwDesiredAccess;
+ switch (Mode) {
+ case readonly: dwDesiredAccess = FILE_MAP_READ; break;
+ case readwrite: dwDesiredAccess = FILE_MAP_WRITE; break;
+ case priv: dwDesiredAccess = FILE_MAP_COPY; break;
+ default: llvm_unreachable("invalid mapping mode");
+ }
+ Mapping = ::MapViewOfFile(FileMappingHandle,
+ dwDesiredAccess,
+ Offset >> 32,
+ Offset & 0xffffffff,
+ Size);
+ if (Mapping == NULL) {
+ error_code ec = windows_error(GetLastError());
+ ::CloseHandle(FileMappingHandle);
+ if (FileDescriptor)
+ _close(FileDescriptor);
+ else
+ ::CloseHandle(FileHandle);
+ return ec;
+ }
+
+ if (Size == 0) {
+ MEMORY_BASIC_INFORMATION mbi;
+ SIZE_T Result = VirtualQuery(Mapping, &mbi, sizeof(mbi));
+ if (Result == 0) {
+ error_code ec = windows_error(GetLastError());
+ ::UnmapViewOfFile(Mapping);
+ ::CloseHandle(FileMappingHandle);
+ if (FileDescriptor)
+ _close(FileDescriptor);
+ else
+ ::CloseHandle(FileHandle);
+ return ec;
+ }
+ Size = mbi.RegionSize;
+ }
+ return error_code::success();
+}
+
+mapped_file_region::mapped_file_region(const Twine &path,
+ mapmode mode,
+ uint64_t length,
+ uint64_t offset,
+ error_code &ec)
+ : Mode(mode)
+ , Size(length)
+ , Mapping()
+ , FileDescriptor()
+ , FileHandle(INVALID_HANDLE_VALUE)
+ , FileMappingHandle() {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ // Convert path to UTF-16.
+ if (ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))
+ return;
+
+ // Get file handle for creating a file mapping.
+ FileHandle = ::CreateFileW(c_str(path_utf16),
+ Mode == readonly ? GENERIC_READ
+ : GENERIC_READ | GENERIC_WRITE,
+ Mode == readonly ? FILE_SHARE_READ
+ : 0,
+ 0,
+ Mode == readonly ? OPEN_EXISTING
+ : OPEN_ALWAYS,
+ Mode == readonly ? FILE_ATTRIBUTE_READONLY
+ : FILE_ATTRIBUTE_NORMAL,
+ 0);
+ if (FileHandle == INVALID_HANDLE_VALUE) {
+ ec = windows_error(::GetLastError());
+ return;
+ }
+
+ FileDescriptor = 0;
+ ec = init(FileDescriptor, offset);
+ if (ec) {
+ Mapping = FileMappingHandle = 0;
+ FileHandle = INVALID_HANDLE_VALUE;
+ FileDescriptor = 0;
+ }
+}
+
+mapped_file_region::mapped_file_region(int fd,
+ mapmode mode,
+ uint64_t length,
+ uint64_t offset,
+ error_code &ec)
+ : Mode(mode)
+ , Size(length)
+ , Mapping()
+ , FileDescriptor(fd)
+ , FileHandle(INVALID_HANDLE_VALUE)
+ , FileMappingHandle() {
+ FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+ if (FileHandle == INVALID_HANDLE_VALUE) {
+ _close(FileDescriptor);
+ FileDescriptor = 0;
+ ec = make_error_code(errc::bad_file_descriptor);
+ return;
+ }
+
+ ec = init(FileDescriptor, offset);
+ if (ec) {
+ Mapping = FileMappingHandle = 0;
+ FileHandle = INVALID_HANDLE_VALUE;
+ FileDescriptor = 0;
+ }
+}
+
+mapped_file_region::~mapped_file_region() {
+ if (Mapping)
+ ::UnmapViewOfFile(Mapping);
+ if (FileMappingHandle)
+ ::CloseHandle(FileMappingHandle);
+ if (FileDescriptor)
+ _close(FileDescriptor);
+ else if (FileHandle != INVALID_HANDLE_VALUE)
+ ::CloseHandle(FileHandle);
+}
+
+#if LLVM_USE_RVALUE_REFERENCES
+mapped_file_region::mapped_file_region(mapped_file_region &&other)
+ : Mode(other.Mode)
+ , Size(other.Size)
+ , Mapping(other.Mapping)
+ , FileDescriptor(other.FileDescriptor)
+ , FileHandle(other.FileHandle)
+ , FileMappingHandle(other.FileMappingHandle) {
+ other.Mapping = other.FileMappingHandle = 0;
+ other.FileHandle = INVALID_HANDLE_VALUE;
+ other.FileDescriptor = 0;
+}
+#endif
+
+mapped_file_region::mapmode mapped_file_region::flags() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ return Mode;
+}
+
+uint64_t mapped_file_region::size() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ return Size;
+}
+
+char *mapped_file_region::data() const {
+ assert(Mode != readonly && "Cannot get non const data for readonly mapping!");
+ assert(Mapping && "Mapping failed but used anyway!");
+ return reinterpret_cast<char*>(Mapping);
+}
+
+const char *mapped_file_region::const_data() const {
+ assert(Mapping && "Mapping failed but used anyway!");
+ return reinterpret_cast<const char*>(Mapping);
+}
+
+int mapped_file_region::alignment() {
+ SYSTEM_INFO SysInfo;
+ ::GetSystemInfo(&SysInfo);
+ return SysInfo.dwAllocationGranularity;
+}
+
error_code detail::directory_iterator_construct(detail::DirIterState &it,
StringRef path){
SmallVector<wchar_t, 128> path_utf16;
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 9424677..b9c7ff6 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -2284,23 +2284,33 @@ InstantiateMulticlassDef(MultiClass &MC,
Ref.Rec = DefProto;
AddSubClass(CurRec, Ref);
- if (DefNameString == 0) {
- // We must resolve references to NAME.
- if (SetValue(CurRec, Ref.RefLoc, "NAME", std::vector<unsigned>(),
- DefmPrefix)) {
- Error(DefmPrefixLoc, "Could not resolve "
- + CurRec->getNameInitAsString() + ":NAME to '"
- + DefmPrefix->getAsUnquotedString() + "'");
- return 0;
- }
+ // Set the value for NAME. We don't resolve references to it 'til later,
+ // though, so that uses in nested multiclass names don't get
+ // confused.
+ if (SetValue(CurRec, Ref.RefLoc, "NAME", std::vector<unsigned>(),
+ DefmPrefix)) {
+ Error(DefmPrefixLoc, "Could not resolve "
+ + CurRec->getNameInitAsString() + ":NAME to '"
+ + DefmPrefix->getAsUnquotedString() + "'");
+ return 0;
+ }
+ // If the DefNameString didn't resolve, we probably have a reference to
+ // NAME and need to replace it. We need to do at least this much greedily,
+ // otherwise nested multiclasses will end up with incorrect NAME expansions.
+ if (DefNameString == 0) {
RecordVal *DefNameRV = CurRec->getValue("NAME");
CurRec->resolveReferencesTo(DefNameRV);
}
if (!CurMultiClass) {
- // We do this after resolving NAME because before resolution, many
- // multiclass defs will have the same name expression. If we are
+ // Now that we're at the top level, resolve all NAME references
+ // in the resultant defs that weren't in the def names themselves.
+ RecordVal *DefNameRV = CurRec->getValue("NAME");
+ CurRec->resolveReferencesTo(DefNameRV);
+
+ // Now that NAME references are resolved and we're at the top level of
+ // any multiclass expansions, add the record to the RecordKeeper. If we are
// currently in a multiclass, it means this defm appears inside a
// multiclass and its name won't be fully resolvable until we see
// the top-level defm. Therefore, we don't add this to the
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index cd3c0e0..69e2346 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -224,7 +224,7 @@ def : ProcNoItin<"cortex-m3", [HasV7Ops,
def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureThumb2, FeatureNoARM, FeatureDB,
FeatureHWDiv, FeatureDSPThumb2,
- FeatureT2XtPk, FeatureVFP2,
+ FeatureT2XtPk, FeatureVFP4,
FeatureVFPOnlySP, FeatureMClass]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 9a1ce06..e9e2803 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -529,10 +529,24 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
- // These modifiers are not yet supported.
+ // This modifier is not yet supported.
case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
- case 'H': // The highest-numbered register of a pair.
return true;
+ case 'H': { // The highest-numbered register of a pair.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (!MO.isReg())
+ return true;
+ const TargetRegisterClass &RC = ARM::GPRRegClass;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ unsigned RegIdx = TRI->getEncodingValue(MO.getReg());
+ RegIdx |= 1; //The odd register is also the higher-numbered one of a pair.
+
+ unsigned Reg = RC.getRegister(RegIdx);
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
+ }
}
}
@@ -1136,8 +1150,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
assert(SrcReg == ARM::SP &&
"Only stack pointer as a source reg is supported");
for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
- i != NumOps; ++i)
- RegList.push_back(MI->getOperand(i).getReg());
+ i != NumOps; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Actually, there should never be any impdef stuff here. Skip it
+ // temporary to workaround PR11902.
+ if (MO.isImplicit())
+ continue;
+ RegList.push_back(MO.getReg());
+ }
break;
case ARM::STR_PRE_IMM:
case ARM::STR_PRE_REG:
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 714238a..29033e5 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -795,8 +795,28 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else
llvm_unreachable("Unknown reg class!");
break;
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ // Use aligned spills if the stack can be realigned.
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
@@ -868,6 +888,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
}
break;
case ARM::VST1q64:
+ case ARM::VST1d64TPseudo:
+ case ARM::VST1d64QPseudo:
if (MI->getOperand(0).isFI() &&
MI->getOperand(2).getSubReg() == 0) {
FrameIndex = MI->getOperand(0).getIndex();
@@ -942,8 +964,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else
llvm_unreachable("Unknown reg class!");
break;
- case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
.addFrameIndex(FI).addImm(16)
@@ -1016,6 +1058,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
}
break;
case ARM::VLD1q64:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD1d64QPseudo:
if (MI->getOperand(1).isFI() &&
MI->getOperand(0).getSubReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
@@ -1524,6 +1568,139 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
}
+/// Identify instructions that can be folded into a MOVCC instruction, and
+/// return the corresponding opcode for the predicated pseudo-instruction.
+static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI,
+ const MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return 0;
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return 0;
+ MI = MRI.getVRegDef(Reg);
+ if (!MI)
+ return 0;
+ // Check if MI has any non-dead defs or physreg uses. This also detects
+ // predicated instructions which will be reading CPSR.
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Reject frame index operands, PEI can't handle the predicated pseudos.
+ if (MO.isFI() || MO.isCPI() || MO.isJTI())
+ return 0;
+ if (!MO.isReg())
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ return 0;
+ if (MO.isDef() && !MO.isDead())
+ return 0;
+ }
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::ANDri: return ARM::ANDCCri;
+ case ARM::ANDrr: return ARM::ANDCCrr;
+ case ARM::ANDrsi: return ARM::ANDCCrsi;
+ case ARM::ANDrsr: return ARM::ANDCCrsr;
+ case ARM::t2ANDri: return ARM::t2ANDCCri;
+ case ARM::t2ANDrr: return ARM::t2ANDCCrr;
+ case ARM::t2ANDrs: return ARM::t2ANDCCrs;
+ case ARM::EORri: return ARM::EORCCri;
+ case ARM::EORrr: return ARM::EORCCrr;
+ case ARM::EORrsi: return ARM::EORCCrsi;
+ case ARM::EORrsr: return ARM::EORCCrsr;
+ case ARM::t2EORri: return ARM::t2EORCCri;
+ case ARM::t2EORrr: return ARM::t2EORCCrr;
+ case ARM::t2EORrs: return ARM::t2EORCCrs;
+ case ARM::ORRri: return ARM::ORRCCri;
+ case ARM::ORRrr: return ARM::ORRCCrr;
+ case ARM::ORRrsi: return ARM::ORRCCrsi;
+ case ARM::ORRrsr: return ARM::ORRCCrsr;
+ case ARM::t2ORRri: return ARM::t2ORRCCri;
+ case ARM::t2ORRrr: return ARM::t2ORRCCrr;
+ case ARM::t2ORRrs: return ARM::t2ORRCCrs;
+
+ // ARM ADD/SUB
+ case ARM::ADDri: return ARM::ADDCCri;
+ case ARM::ADDrr: return ARM::ADDCCrr;
+ case ARM::ADDrsi: return ARM::ADDCCrsi;
+ case ARM::ADDrsr: return ARM::ADDCCrsr;
+ case ARM::SUBri: return ARM::SUBCCri;
+ case ARM::SUBrr: return ARM::SUBCCrr;
+ case ARM::SUBrsi: return ARM::SUBCCrsi;
+ case ARM::SUBrsr: return ARM::SUBCCrsr;
+
+ // Thumb2 ADD/SUB
+ case ARM::t2ADDri: return ARM::t2ADDCCri;
+ case ARM::t2ADDri12: return ARM::t2ADDCCri12;
+ case ARM::t2ADDrr: return ARM::t2ADDCCrr;
+ case ARM::t2ADDrs: return ARM::t2ADDCCrs;
+ case ARM::t2SUBri: return ARM::t2SUBCCri;
+ case ARM::t2SUBri12: return ARM::t2SUBCCri12;
+ case ARM::t2SUBrr: return ARM::t2SUBCCrr;
+ case ARM::t2SUBrs: return ARM::t2SUBCCrs;
+ }
+}
+
+bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ unsigned &TrueOp, unsigned &FalseOp,
+ bool &Optimizable) const {
+ assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+ "Unknown select instruction");
+ // MOVCC operands:
+ // 0: Def.
+ // 1: True use.
+ // 2: False use.
+ // 3: Condition code.
+ // 4: CPSR use.
+ TrueOp = 1;
+ FalseOp = 2;
+ Cond.push_back(MI->getOperand(3));
+ Cond.push_back(MI->getOperand(4));
+ // We can always fold a def.
+ Optimizable = true;
+ return false;
+}
+
+MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
+ bool PreferFalse) const {
+ assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+ "Unknown select instruction");
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineInstr *DefMI = 0;
+ unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI);
+ bool Invert = !Opc;
+ if (!Opc)
+ Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI);
+ if (!Opc)
+ return 0;
+
+ // Create a new predicated version of DefMI.
+ // Rfalse is the first use.
+ MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(Opc), MI->getOperand(0).getReg())
+ .addOperand(MI->getOperand(Invert ? 2 : 1));
+
+ // Copy all the DefMI operands, excluding its (null) predicate.
+ const MCInstrDesc &DefDesc = DefMI->getDesc();
+ for (unsigned i = 1, e = DefDesc.getNumOperands();
+ i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
+ NewMI.addOperand(DefMI->getOperand(i));
+
+ unsigned CondCode = MI->getOperand(3).getImm();
+ if (Invert)
+ NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
+ else
+ NewMI.addImm(CondCode);
+ NewMI.addOperand(MI->getOperand(4));
+
+ // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
+ if (NewMI->hasOptionalDef())
+ AddDefaultCC(NewMI);
+
+ // The caller will erase MI, but not DefMI.
+ DefMI->eraseFromParent();
+ return NewMI;
+}
+
/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
/// instruction is encoded with an 'S' bit is determined by the optional CPSR
/// def operand.
@@ -3180,11 +3357,18 @@ enum ARMExeDomain {
//
std::pair<uint16_t, uint16_t>
ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
- // VMOVD is a VFP instruction, but can be changed to NEON if it isn't
- // predicated.
+ // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
+ // if they are not predicated.
if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+ // Cortex-A9 is particularly picky about mixing the two and wants these
+ // converted.
+ if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+ (MI->getOpcode() == ARM::VMOVRS ||
+ MI->getOpcode() == ARM::VMOVSR))
+ return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+
// No other instructions can be swizzled, so just determine their domain.
unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
@@ -3204,22 +3388,95 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
void
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
- // We only know how to change VMOVD into VORR.
- assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
- if (Domain != ExeNEON)
- return;
+ unsigned DstReg, SrcReg, DReg;
+ unsigned Lane;
+ MachineInstrBuilder MIB(MI);
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ bool isKill;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("cannot handle opcode!");
+ break;
+ case ARM::VMOVD:
+ if (Domain != ExeNEON)
+ break;
- // Zap the predicate operands.
- assert(!isPredicated(MI) && "Cannot predicate a VORRd");
- MI->RemoveOperand(3);
- MI->RemoveOperand(2);
+ // Zap the predicate operands.
+ assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(2);
- // Change to a VORRd which requires two identical use operands.
- MI->setDesc(get(ARM::VORRd));
+ // Change to a VORRd which requires two identical use operands.
+ MI->setDesc(get(ARM::VORRd));
+
+ // Add the extra source operand and new predicates.
+ // This will go before any implicit ops.
+ AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
+ break;
+ case ARM::VMOVRS:
+ if (Domain != ExeNEON)
+ break;
+ assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
+
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass);
+ Lane = 0;
+ if (DReg == ARM::NoRegister) {
+ DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass);
+ Lane = 1;
+ assert(DReg && "S-register with no D super-register?");
+ }
+
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(2);
+ MI->RemoveOperand(1);
+
+ MI->setDesc(get(ARM::VGETLNi32));
+ MIB.addReg(DReg);
+ MIB.addImm(Lane);
+
+ MIB->getOperand(1).setIsUndef();
+ MIB.addReg(SrcReg, RegState::Implicit);
+
+ AddDefaultPred(MIB);
+ break;
+ case ARM::VMOVSR:
+ if (Domain != ExeNEON)
+ break;
+ assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
+
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass);
+ Lane = 0;
+ if (DReg == ARM::NoRegister) {
+ DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass);
+ Lane = 1;
+ assert(DReg && "S-register with no D super-register?");
+ }
+ isKill = MI->getOperand(0).isKill();
+
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(2);
+ MI->RemoveOperand(1);
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(ARM::VSETLNi32));
+ MIB.addReg(DReg, RegState::Define);
+ MIB.addReg(DReg, RegState::Undef);
+ MIB.addReg(SrcReg);
+ MIB.addImm(Lane);
+
+ if (isKill)
+ MIB->addRegisterKilled(DstReg, TRI, true);
+ MIB->addRegisterDefined(DstReg, TRI);
+
+ AddDefaultPred(MIB);
+ break;
+ }
- // Add the extra source operand and new predicates.
- // This will go before any implicit ops.
- AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
}
bool ARMBaseInstrInfo::hasNOP() const {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 1a10a4a..92e5ee8 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -202,6 +202,13 @@ public:
unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const;
+ virtual bool analyzeSelect(const MachineInstr *MI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ unsigned &TrueOp, unsigned &FalseOp,
+ bool &Optimizable) const;
+
+ virtual MachineInstr *optimizeSelect(MachineInstr *MI, bool) const;
+
/// FoldImmediate - 'Reg' is known to be defined by a move immediate
/// instruction, try to fold the immediate into the use instruction.
virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
@@ -352,6 +359,11 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
int getMatchingCondBranchOpcode(int Opc);
+/// Determine if MI can be folded into an ARM MOVCC instruction, and return the
+/// opcode of the SSA instruction representing the conditional MI.
+unsigned canFoldARMInstrIntoMOVCC(unsigned Reg,
+ MachineInstr *&MI,
+ const MachineRegisterInfo &MRI);
/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
/// the instruction is encoded with an 'S' bit is determined by the optional
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 231bd26..9deb96e 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -62,8 +62,20 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const uint16_t*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ bool ghcCall = false;
+
+ if (MF) {
+ const Function *F = MF->getFunction();
+ ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+ }
+
+ if (ghcCall) {
+ return CSR_GHC_SaveList;
+ }
+ else {
return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+ }
}
const uint32_t*
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index b9a2512..bda1517 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -79,6 +79,25 @@ def RetFastCC_ARM_APCS : CallingConv<[
CCDelegateTo<RetCC_ARM_APCS>
]>;
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for GHC
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_APCS_GHC : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
+ CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
+ CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim
+ CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>>
+]>;
//===----------------------------------------------------------------------===//
// ARM AAPCS (EABI) Calling Convention, common parts
@@ -113,6 +132,9 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_ARM_AAPCS : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -138,6 +160,9 @@ def RetCC_ARM_AAPCS : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_ARM_AAPCS_VFP : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -171,3 +196,9 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
// Also save R7-R4 first to match the stack frame fixed spill areas.
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
+
+// GHC set of callee saved regs is empty as all those regs are
+// used for passing STG regs around
+// add is a workaround for not being able to compile empty list:
+// def CSR_GHC : CalleeSavedRegs<()>;
+def CSR_GHC : CalleeSavedRegs<(add)>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index af260a5..132b81f 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -264,7 +264,7 @@ namespace {
emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
return 0;
}
- unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
int32_t Imm12 = MO1.getImm();
uint32_t Binary;
Binary = Imm12 & 0xfff;
@@ -314,18 +314,24 @@ namespace {
// {7-0} = imm8
uint32_t Binary = 0;
const MachineOperand &MO = MI.getOperand(Op);
- uint32_t Reg = getMachineOpValue(MI, MO);
- Binary |= (Reg << 9);
-
- // If there is a non-zero immediate offset, encode it.
- if (MO.isReg()) {
- const MachineOperand &MO1 = MI.getOperand(Op + 1);
- if (uint32_t ImmOffs = ARM_AM::getAM5Offset(MO1.getImm())) {
- if (ARM_AM::getAM5Op(MO1.getImm()) == ARM_AM::add)
- Binary |= 1 << 8;
- Binary |= ImmOffs & 0xff;
- return Binary;
- }
+ const MachineOperand &MO1 = MI.getOperand(Op + 1);
+ if (!MO.isReg()) {
+ emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+ return 0;
+ }
+ unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
+ int32_t Imm12 = MO1.getImm();
+
+ // Special value for #-0
+ if (Imm12 == INT32_MIN)
+ Imm12 = 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs
+ // sub.
+ bool isAdd = true;
+ if (Imm12 < 0) {
+ Imm12 = -Imm12;
+ isAdd = false;
}
// If immediate offset is omitted, default to +0.
@@ -367,6 +373,12 @@ namespace {
void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
intptr_t JTBase = 0) const;
+ unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const;
};
}
@@ -455,7 +467,7 @@ unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
const MachineOperand &MO) const {
if (MO.isReg())
- return getARMRegisterNumbering(MO.getReg());
+ return II->getRegisterInfo().getEncodingValue(MO.getReg());
else if (MO.isImm())
return static_cast<unsigned>(MO.getImm());
else if (MO.isFPImm())
@@ -816,7 +828,7 @@ void ARMCodeEmitter::emitLEApcrelInstruction(const MachineInstr &MI) {
Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
// Encode Rn which is PC.
- Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift;
// Encode the displacement which is a so_imm.
// Set bit I(25) to identify this is the immediate form of <shifter_op>
@@ -844,7 +856,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
// Encode Rn which is PC.
- Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift;
// Encode the displacement.
Binary |= 1 << ARMII::I_BitShift;
@@ -1045,7 +1057,7 @@ unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
if (Rs) {
// Encode Rs bit[11:8].
assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
}
// Encode shift_imm bit[11:7].
@@ -1101,7 +1113,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
else if (ImplicitRd)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
if (MCID.Opcode == ARM::MOVi16) {
// Get immediate from MI.
@@ -1151,7 +1163,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
if (!isUnary) {
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
else {
Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
++OpIdx;
@@ -1168,7 +1180,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
if (MO.isReg()) {
// Encode register Rm.
- emitWordLE(Binary | getARMRegisterNumbering(MO.getReg()));
+ emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg()));
return;
}
@@ -1217,14 +1229,14 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
// Set first operand
if (ImplicitRd)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
// Set second operand
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
@@ -1251,7 +1263,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
Binary |= 1 << ARMII::I_BitShift;
assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
// Set bit[3:0] to the corresponding Rm register
- Binary |= getARMRegisterNumbering(MO2.getReg());
+ Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
// If this instr is in scaled register offset/index instruction, set
// shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
@@ -1295,7 +1307,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
// Set second operand
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
@@ -1314,7 +1326,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
// If this instr is in register offset/index encoding, set bit[3:0]
// to the corresponding Rm register.
if (MO2.getReg()) {
- Binary |= getARMRegisterNumbering(MO2.getReg());
+ Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
emitWordLE(Binary);
return;
}
@@ -1385,7 +1397,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || MO.isImplicit())
break;
- unsigned RegNum = getARMRegisterNumbering(MO.getReg());
+ unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg());
assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
RegNum < 16);
Binary |= 0x1 << RegNum;
@@ -1632,7 +1644,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
// The return register is LR.
- Binary |= getARMRegisterNumbering(ARM::LR);
+ Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR);
else
// otherwise, set the return register
Binary |= getMachineOpValue(MI, 0);
@@ -1640,11 +1652,12 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
-static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegD = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
bool isSPVFP = ARM::SPRRegClass.contains(RegD);
- RegD = getARMRegisterNumbering(RegD);
+ RegD = II->getRegisterInfo().getEncodingValue(RegD);
if (!isSPVFP) {
Binary |= (RegD & 0x0F) << ARMII::RegRdShift;
Binary |= ((RegD & 0x10) >> 4) << ARMII::D_BitShift;
@@ -1655,11 +1668,12 @@ static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
return Binary;
}
-static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegN = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
bool isSPVFP = ARM::SPRRegClass.contains(RegN);
- RegN = getARMRegisterNumbering(RegN);
+ RegN = II->getRegisterInfo().getEncodingValue(RegN);
if (!isSPVFP) {
Binary |= (RegN & 0x0F) << ARMII::RegRnShift;
Binary |= ((RegN & 0x10) >> 4) << ARMII::N_BitShift;
@@ -1670,11 +1684,12 @@ static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
return Binary;
}
-static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegM = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
bool isSPVFP = ARM::SPRRegClass.contains(RegM);
- RegM = getARMRegisterNumbering(RegM);
+ RegM = II->getRegisterInfo().getEncodingValue(RegM);
if (!isSPVFP) {
Binary |= (RegM & 0x0F);
Binary |= ((RegM & 0x10) >> 4) << ARMII::M_BitShift;
@@ -1885,28 +1900,31 @@ void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
-static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegD = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegD = getARMRegisterNumbering(RegD);
+ RegD = II->getRegisterInfo().getEncodingValue(RegD);
Binary |= (RegD & 0xf) << ARMII::RegRdShift;
Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
return Binary;
}
-static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegN = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegN = getARMRegisterNumbering(RegN);
+ RegN = II->getRegisterInfo().getEncodingValue(RegN);
Binary |= (RegN & 0xf) << ARMII::RegRnShift;
Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
return Binary;
}
-static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegM = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegM = getARMRegisterNumbering(RegM);
+ RegM = II->getRegisterInfo().getEncodingValue(RegM);
Binary |= (RegM & 0xf);
Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
return Binary;
@@ -1940,7 +1958,7 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
- RegT = getARMRegisterNumbering(RegT);
+ RegT = II->getRegisterInfo().getEncodingValue(RegT);
Binary |= (RegT << ARMII::RegRdShift);
Binary |= encodeNEONRn(MI, RegNOpIdx);
@@ -1969,7 +1987,7 @@ void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
unsigned RegT = MI.getOperand(1).getReg();
- RegT = getARMRegisterNumbering(RegT);
+ RegT = II->getRegisterInfo().getEncodingValue(RegT);
Binary |= (RegT << ARMII::RegRdShift);
Binary |= encodeNEONRn(MI, 0);
emitWordLE(Binary);
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index a242b13..15bb32e 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1009,7 +1009,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
unsigned OpIdx = 0;
unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
+ unsigned Lane = TRI->getEncodingValue(SrcReg) & 1;
unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
Lane & 1 ? ARM::ssub_1 : ARM::ssub_0,
&ARM::DPR_VFP2RegClass);
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index b96395f..5a5ca1b 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -87,8 +87,9 @@ class ARMFastISel : public FastISel {
LLVMContext *Context;
public:
- explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
- : FastISel(funcInfo),
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo),
TM(funcInfo.MF->getTarget()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()) {
@@ -99,51 +100,53 @@ class ARMFastISel : public FastISel {
}
// Code from FastISel.cpp.
- virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC);
- virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill);
- virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill);
- virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- unsigned Op2, bool Op2IsKill);
- virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- uint64_t Imm);
- virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- const ConstantFP *FPImm);
- virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- uint64_t Imm);
- virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- uint64_t Imm);
- virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- uint64_t Imm1, uint64_t Imm2);
-
- virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
- unsigned Op0, bool Op0IsKill,
- uint32_t Idx);
+ private:
+ unsigned FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC);
+ unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill);
+ unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm);
+ unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm);
+ unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2);
+
+ unsigned FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx);
// Backend specific FastISel code.
+ private:
virtual bool TargetSelectInstruction(const Instruction *I);
virtual unsigned TargetMaterializeConstant(const Constant *C);
virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI);
-
+ private:
#include "ARMGenFastISel.inc"
// Instruction selection routines.
@@ -167,6 +170,7 @@ class ARMFastISel : public FastISel {
bool SelectRet(const Instruction *I);
bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
+ bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
// Utility routines.
private:
@@ -1819,9 +1823,12 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
default:
llvm_unreachable("Unsupported calling convention");
case CallingConv::Fast:
- // Ignore fastcc. Silence compiler warnings.
- (void)RetFastCC_ARM_APCS;
- (void)FastCC_ARM_APCS;
+ if (Subtarget->hasVFP2() && !isVarArg) {
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+ // For AAPCS ABI targets, just use VFP variant of the calling convention.
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ }
// Fallthrough
case CallingConv::C:
// Use target triple & subtarget features to do actual dispatch.
@@ -1842,6 +1849,11 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ case CallingConv::GHC:
+ if (Return)
+ llvm_unreachable("Can't return in GHC call convention");
+ else
+ return CC_ARM_APCS_GHC;
}
}
@@ -2608,6 +2620,61 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
return true;
}
+bool ARMFastISel::SelectShift(const Instruction *I,
+ ARM_AM::ShiftOpc ShiftTy) {
+ // We handle thumb2 mode by target independent selector
+ // or SelectionDAG ISel.
+ if (isThumb2)
+ return false;
+
+ // Only handle i32 now.
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+ if (DestVT != MVT::i32)
+ return false;
+
+ unsigned Opc = ARM::MOVsr;
+ unsigned ShiftImm;
+ Value *Src2Value = I->getOperand(1);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
+ ShiftImm = CI->getZExtValue();
+
+ // Fall back to selection DAG isel if the shift amount
+ // is zero or greater than the width of the value type.
+ if (ShiftImm == 0 || ShiftImm >=32)
+ return false;
+
+ Opc = ARM::MOVsi;
+ }
+
+ Value *Src1Value = I->getOperand(0);
+ unsigned Reg1 = getRegForValue(Src1Value);
+ if (Reg1 == 0) return false;
+
+ unsigned Reg2;
+ if (Opc == ARM::MOVsr) {
+ Reg2 = getRegForValue(Src2Value);
+ if (Reg2 == 0) return false;
+ }
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ if(ResultReg == 0) return false;
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg1);
+
+ if (Opc == ARM::MOVsi)
+ MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
+ else if (Opc == ARM::MOVsr) {
+ MIB.addReg(Reg2);
+ MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
+ }
+
+ AddOptionalDefs(MIB);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
// TODO: SoftFP support.
bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
@@ -2668,6 +2735,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
case Instruction::ZExt:
case Instruction::SExt:
return SelectIntExt(I);
+ case Instruction::Shl:
+ return SelectShift(I, ARM_AM::lsl);
+ case Instruction::LShr:
+ return SelectShift(I, ARM_AM::lsr);
+ case Instruction::AShr:
+ return SelectShift(I, ARM_AM::asr);
default: break;
}
return false;
@@ -2720,14 +2793,15 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
}
namespace llvm {
- FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+ FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
// Completely untested on non-iOS.
const TargetMachine &TM = funcInfo.MF->getTarget();
// Darwin and thumb1 only for now.
const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only())
- return new ARMFastISel(funcInfo);
+ return new ARMFastISel(funcInfo, libInfo);
return 0;
}
}
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 2629496..aee72d2 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -15,6 +15,8 @@
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Function.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -151,6 +153,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
int FramePtrSpillFI = 0;
int D8SpillFI = 0;
+ // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
// Allocate the vararg register save area. This is not counted in NumBytes.
if (VARegSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
@@ -354,6 +360,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
if (!AFI->hasStackFrame()) {
if (NumBytes != 0)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 1953192..c6f9d15 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -47,11 +47,6 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
cl::desc("Check fp vmla / vmls hazard at isel time"),
cl::init(true));
-static cl::opt<bool>
-DisableARMIntABS("disable-arm-int-abs", cl::Hidden,
- cl::desc("Enable / disable ARM integer abs transform"),
- cl::init(false));
-
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -244,7 +239,6 @@ private:
/// SelectCMOVOp - Select CMOV instructions for ARM.
SDNode *SelectCMOVOp(SDNode *N);
- SDNode *SelectConditionalOp(SDNode *N);
SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
@@ -2368,115 +2362,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
}
-SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
- SDValue FalseVal = N->getOperand(0);
- SDValue TrueVal = N->getOperand(1);
- ARMCC::CondCodes CCVal =
- (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
- SDValue CCR = N->getOperand(3);
- assert(CCR.getOpcode() == ISD::Register);
- SDValue InFlag = N->getOperand(4);
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
-
- if (Subtarget->isThumb()) {
- SDValue CPTmp0;
- SDValue CPTmp1;
- if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break;
- case ARMISD::COR: Opc = ARM::t2ORRCCrs; break;
- case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break;
- }
- SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
- }
-
- ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
- if (T) {
- unsigned TrueImm = T->getZExtValue();
- if (is_t2_so_imm(TrueImm)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::t2ANDCCri; break;
- case ARMISD::COR: Opc = ARM::t2ORRCCri; break;
- case ARMISD::CXOR: Opc = ARM::t2EORCCri; break;
- }
- SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
- SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
- }
- }
-
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break;
- case ARMISD::COR: Opc = ARM::t2ORRCCrr; break;
- case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break;
- }
- SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
- }
-
- SDValue CPTmp0;
- SDValue CPTmp1;
- SDValue CPTmp2;
- if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::ANDCCrsi; break;
- case ARMISD::COR: Opc = ARM::ORRCCrsi; break;
- case ARMISD::CXOR: Opc = ARM::EORCCrsi; break;
- }
- SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
- }
-
- if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::ANDCCrsr; break;
- case ARMISD::COR: Opc = ARM::ORRCCrsr; break;
- case ARMISD::CXOR: Opc = ARM::EORCCrsr; break;
- }
- SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
- }
-
- ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
- if (T) {
- unsigned TrueImm = T->getZExtValue();
- if (is_so_imm(TrueImm)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::ANDCCri; break;
- case ARMISD::COR: Opc = ARM::ORRCCri; break;
- case ARMISD::CXOR: Opc = ARM::EORCCri; break;
- }
- SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
- SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
- }
- }
-
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::ANDCCrr; break;
- case ARMISD::COR: Opc = ARM::ORRCCrr; break;
- case ARMISD::CXOR: Opc = ARM::EORCCrr; break;
- }
- SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
-}
-
/// Target-specific DAG combining for ISD::XOR.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
@@ -2492,14 +2377,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
SDValue XORSrc1 = N->getOperand(1);
EVT VT = N->getValueType(0);
- if (DisableARMIntABS)
- return NULL;
-
if (Subtarget->isThumb1Only())
return NULL;
- if (XORSrc0.getOpcode() != ISD::ADD ||
- XORSrc1.getOpcode() != ISD::SRA)
+ if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
return NULL;
SDValue ADDSrc0 = XORSrc0.getOperand(0);
@@ -2510,16 +2391,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
EVT XType = SRASrc0.getValueType();
unsigned Size = XType.getSizeInBits() - 1;
- if (ADDSrc1 == XORSrc1 &&
- ADDSrc0 == SRASrc0 &&
- XType.isInteger() &&
- SRAConstant != NULL &&
+ if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
+ XType.isInteger() && SRAConstant != NULL &&
Size == SRAConstant->getZExtValue()) {
-
- unsigned Opcode = ARM::ABS;
- if (Subtarget->isThumb2())
- Opcode = ARM::t2ABS;
-
+ unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
}
@@ -2814,10 +2689,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::CMOV:
return SelectCMOVOp(N);
- case ARMISD::CAND:
- case ARMISD::COR:
- case ARMISD::CXOR:
- return SelectConditionalOp(N);
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 04370c0..df4039b 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -90,75 +90,70 @@ static const uint16_t GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
-void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
- EVT PromotedBitwiseVT) {
+void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
+ MVT PromotedBitwiseVT) {
if (VT != PromotedLdStVT) {
- setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
- PromotedLdStVT.getSimpleVT());
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
- setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
- PromotedLdStVT.getSimpleVT());
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
}
- EVT ElemTy = VT.getVectorElementType();
+ MVT ElemTy = VT.getVectorElementType();
if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
- setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
if (ElemTy == MVT::i32) {
- setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
} else {
- setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
- }
- setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
- setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ }
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT.isInteger()) {
- setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
}
// Promote all bit-wise operations.
if (VT.isInteger() && VT != PromotedBitwiseVT) {
- setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::AND, VT.getSimpleVT(),
- PromotedBitwiseVT.getSimpleVT());
- setOperationAction(ISD::OR, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::OR, VT.getSimpleVT(),
- PromotedBitwiseVT.getSimpleVT());
- setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
- PromotedBitwiseVT.getSimpleVT());
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
}
// Neon does not support vector divide/remainder operations.
- setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
}
-void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
+void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
addRegisterClass(VT, &ARM::DPRRegClass);
addTypeForNEON(VT, MVT::f64, MVT::v2i32);
}
-void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
+void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
addRegisterClass(VT, &ARM::QPRRegClass);
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
@@ -903,9 +898,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
case ARMISD::CMOV: return "ARMISD::CMOV";
- case ARMISD::CAND: return "ARMISD::CAND";
- case ARMISD::COR: return "ARMISD::COR";
- case ARMISD::CXOR: return "ARMISD::CXOR";
case ARMISD::RBIT: return "ARMISD::RBIT";
@@ -1041,8 +1033,9 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
// Create a fast isel object.
FastISel *
-ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
- return ARM::createFastISel(funcInfo);
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const {
+ return ARM::createFastISel(funcInfo, libInfo);
}
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
@@ -1171,6 +1164,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ case CallingConv::GHC:
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
}
}
@@ -4271,6 +4266,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// Record this extraction against the appropriate vector if possible...
SDValue SourceVec = V.getOperand(0);
+ // If the element number isn't a constant, we can't effectively
+ // analyze what's going on.
+ if (!isa<ConstantSDNode>(V.getOperand(1)))
+ return SDValue();
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
bool FoundSource = false;
for (unsigned j = 0; j < SourceVecs.size(); ++j) {
@@ -6152,13 +6151,12 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
}
// Add the jump table entries as successors to the MBB.
- MachineBasicBlock *PrevMBB = 0;
+ SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
for (std::vector<MachineBasicBlock*>::iterator
I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
MachineBasicBlock *CurMBB = *I;
- if (PrevMBB != CurMBB)
+ if (SeenMBBs.insert(CurMBB))
DispContBB->addSuccessor(CurMBB);
- PrevMBB = CurMBB;
}
// N.B. the order the invoke BBs are processed in doesn't matter here.
@@ -6971,62 +6969,137 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// ARM Optimization Hooks
//===----------------------------------------------------------------------===//
+// Helper function that checks if N is a null or all ones constant.
+static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ if (!C)
+ return false;
+ return AllOnes ? C->isAllOnesValue() : C->isNullValue();
+}
+
+// Return true if N is conditionally 0 or all ones.
+// Detects these expressions where cc is an i1 value:
+//
+// (select cc 0, y) [AllOnes=0]
+// (select cc y, 0) [AllOnes=0]
+// (zext cc) [AllOnes=0]
+// (sext cc) [AllOnes=0/1]
+// (select cc -1, y) [AllOnes=1]
+// (select cc y, -1) [AllOnes=1]
+//
+// Invert is set when N is the null/all ones constant when CC is false.
+// OtherOp is set to the alternative value of N.
+static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
+ SDValue &CC, bool &Invert,
+ SDValue &OtherOp,
+ SelectionDAG &DAG) {
+ switch (N->getOpcode()) {
+ default: return false;
+ case ISD::SELECT: {
+ CC = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ if (isZeroOrAllOnes(N1, AllOnes)) {
+ Invert = false;
+ OtherOp = N2;
+ return true;
+ }
+ if (isZeroOrAllOnes(N2, AllOnes)) {
+ Invert = true;
+ OtherOp = N1;
+ return true;
+ }
+ return false;
+ }
+ case ISD::ZERO_EXTEND:
+ // (zext cc) can never be the all ones value.
+ if (AllOnes)
+ return false;
+ // Fall through.
+ case ISD::SIGN_EXTEND: {
+ EVT VT = N->getValueType(0);
+ CC = N->getOperand(0);
+ if (CC.getValueType() != MVT::i1)
+ return false;
+ Invert = !AllOnes;
+ if (AllOnes)
+ // When looking for an AllOnes constant, N is an sext, and the 'other'
+ // value is 0.
+ OtherOp = DAG.getConstant(0, VT);
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ // When looking for a 0 constant, N can be zext or sext.
+ OtherOp = DAG.getConstant(1, VT);
+ else
+ OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+ return true;
+ }
+ }
+}
+
+// Combine a constant select operand into its use:
+//
+// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
+// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
+// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
+//
+// The transform is rejected if the select doesn't have a constant operand that
+// is null, or all ones when AllOnes is set.
+//
+// Also recognize sext/zext from i1:
+//
+// (add (zext cc), x) -> (select cc (add x, 1), x)
+// (add (sext cc), x) -> (select cc (add x, -1), x)
+//
+// These transformations eventually create predicated instructions.
+//
+// @param N The node to transform.
+// @param Slct The N operand that is a select.
+// @param OtherOp The other N operand (x above).
+// @param DCI Context.
+// @param AllOnes Require the select constant to be all ones instead of null.
+// @returns The new node, or SDValue() on failure.
static
SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ bool AllOnes = false) {
SelectionDAG &DAG = DCI.DAG;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = N->getValueType(0);
- unsigned Opc = N->getOpcode();
- bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
- SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
- SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
- ISD::CondCode CC = ISD::SETCC_INVALID;
-
- if (isSlctCC) {
- CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
- } else {
- SDValue CCOp = Slct.getOperand(0);
- if (CCOp.getOpcode() == ISD::SETCC)
- CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
- }
-
- bool DoXform = false;
- bool InvCC = false;
- assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
- "Bad input!");
-
- if (LHS.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(LHS)->isNullValue()) {
- DoXform = true;
- } else if (CC != ISD::SETCC_INVALID &&
- RHS.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(RHS)->isNullValue()) {
- std::swap(LHS, RHS);
- SDValue Op0 = Slct.getOperand(0);
- EVT OpVT = isSlctCC ? Op0.getValueType() :
- Op0.getOperand(0).getValueType();
- bool isInt = OpVT.isInteger();
- CC = ISD::getSetCCInverse(CC, isInt);
-
- if (!TLI.isCondCodeLegal(CC, OpVT))
- return SDValue(); // Inverse operator isn't legal.
-
- DoXform = true;
- InvCC = true;
- }
-
- if (DoXform) {
- SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
- if (isSlctCC)
- return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
- Slct.getOperand(0), Slct.getOperand(1), CC);
- SDValue CCOp = Slct.getOperand(0);
- if (InvCC)
- CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
- CCOp.getOperand(0), CCOp.getOperand(1), CC);
- return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
- CCOp, OtherOp, Result);
+ SDValue NonConstantVal;
+ SDValue CCOp;
+ bool SwapSelectOps;
+ if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
+ NonConstantVal, DAG))
+ return SDValue();
+
+ // Slct is now know to be the desired identity constant when CC is true.
+ SDValue TrueVal = OtherOp;
+ SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+ OtherOp, NonConstantVal);
+ // Unless SwapSelectOps says CC should be false.
+ if (SwapSelectOps)
+ std::swap(TrueVal, FalseVal);
+
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ CCOp, TrueVal, FalseVal);
+}
+
+// Attempt combineSelectAndUse on each operand of a commutative operator N.
+static
+SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (N0.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes);
+ if (Result.getNode())
+ return Result;
+ }
+ if (N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes);
+ if (Result.getNode())
+ return Result;
}
return SDValue();
}
@@ -7134,7 +7207,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
return Result;
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
- if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+ if (N0.getNode()->hasOneUse()) {
SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
if (Result.getNode()) return Result;
}
@@ -7166,7 +7239,7 @@ static SDValue PerformSUBCombine(SDNode *N,
SDValue N1 = N->getOperand(1);
// fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
- if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+ if (N1.getNode()->hasOneUse()) {
SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
if (Result.getNode()) return Result;
}
@@ -7294,49 +7367,6 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
}
-static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) {
- if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse())
- return false;
-
- SDValue FalseVal = N.getOperand(0);
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal);
- if (!C)
- return false;
- if (AllOnes)
- return C->isAllOnesValue();
- return C->isNullValue();
-}
-
-/// formConditionalOp - Combine an operation with a conditional move operand
-/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y)
-/// (and x, (cmov -1, y, cond)) => (and.cond, x, y)
-static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG,
- bool Commutable) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
- bool isAND = N->getOpcode() == ISD::AND;
- bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND);
- if (!isCand && Commutable) {
- isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND);
- if (isCand)
- std::swap(N0, N1);
- }
- if (!isCand)
- return SDValue();
-
- unsigned Opc = 0;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ISD::AND: Opc = ARMISD::CAND; break;
- case ISD::OR: Opc = ARMISD::COR; break;
- case ISD::XOR: Opc = ARMISD::CXOR; break;
- }
- return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0,
- N1.getOperand(1), N1.getOperand(2), N1.getOperand(3),
- N1.getOperand(4));
-}
-
static SDValue PerformANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
@@ -7371,10 +7401,10 @@ static SDValue PerformANDCombine(SDNode *N,
}
if (!Subtarget->isThumb1Only()) {
- // (and x, (cmov -1, y, cond)) => (and.cond x, y)
- SDValue CAND = formConditionalOp(N, DAG, true);
- if (CAND.getNode())
- return CAND;
+ // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, true, DCI);
+ if (Result.getNode())
+ return Result;
}
return SDValue();
@@ -7414,14 +7444,17 @@ static SDValue PerformORCombine(SDNode *N,
}
if (!Subtarget->isThumb1Only()) {
- // (or x, (cmov 0, y, cond)) => (or.cond x, y)
- SDValue COR = formConditionalOp(N, DAG, true);
- if (COR.getNode())
- return COR;
+ // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
+ if (Result.getNode())
+ return Result;
}
+ // The code below optimizes (or (and X, Y), Z).
+ // The AND operand needs to have a single user to make these optimizations
+ // profitable.
SDValue N0 = N->getOperand(0);
- if (N0.getOpcode() != ISD::AND)
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
return SDValue();
SDValue N1 = N->getOperand(1);
@@ -7578,10 +7611,10 @@ static SDValue PerformXORCombine(SDNode *N,
return SDValue();
if (!Subtarget->isThumb1Only()) {
- // (xor x, (cmov 0, y, cond)) => (xor.cond x, y)
- SDValue CXOR = formConditionalOp(N, DAG, true);
- if (CXOR.getNode())
- return CXOR;
+ // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
+ if (Result.getNode())
+ return Result;
}
return SDValue();
@@ -8802,6 +8835,8 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
case MVT::i16:
case MVT::i32:
return true;
+ case MVT::f64:
+ return Subtarget->hasNEON();
// FIXME: VLD1 etc with standard alignment is legal.
}
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 7ad48b9..13b83de 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -63,9 +63,6 @@ namespace llvm {
FMSTAT, // ARM fmstat instruction.
CMOV, // ARM conditional move instructions.
- CAND, // ARM conditional and instructions.
- COR, // ARM conditional or instructions.
- CXOR, // ARM conditional xor instructions.
BCC_i64,
@@ -361,7 +358,8 @@ namespace llvm {
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
- virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const;
Sched::Preference getSchedulingPreference(SDNode *N) const;
@@ -393,9 +391,9 @@ namespace llvm {
///
unsigned ARMPCLabelIndex;
- void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT);
- void addDRTypeForNEON(EVT VT);
- void addQRTypeForNEON(EVT VT);
+ void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
+ void addDRTypeForNEON(MVT VT);
+ void addQRTypeForNEON(MVT VT);
typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
@@ -544,7 +542,8 @@ namespace llvm {
namespace ARM {
- FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
}
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 1b8fc3f..992aba5 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -242,6 +242,9 @@ def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
"Subtarget->isTargetDarwin()">;
+def IsLE : Predicate<"TLI.isLittleEndian()">;
+def IsBE : Predicate<"TLI.isBigEndian()">;
+
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -416,8 +419,11 @@ def pclabel : Operand<i32> {
}
// ADR instruction labels.
+def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; }
def adrlabel : Operand<i32> {
let EncoderMethod = "getAdrLabelOpValue";
+ let ParserMatchClass = AdrLabelAsmOperand;
+ let PrintMethod = "printAdrLabelOperand";
}
def neon_vcvt_imm32 : Operand<i32> {
@@ -968,7 +974,7 @@ include "ARMInstrFormats.td"
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_bin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0> {
+ PatFrag opnode, bit Commutable = 0> {
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
@@ -1037,7 +1043,7 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0> {
+ PatFrag opnode, bit Commutable = 0> {
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
@@ -1285,7 +1291,7 @@ class AI_exta_rrot_np<bits<8> opcod, string opc>
/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
- string baseOpc, bit Commutable = 0> {
+ bit Commutable = 0> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
@@ -1351,8 +1357,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
/// AI1_rsc_irs - Define instructions and patterns for rsc
let TwoOperandAliasConstraint = "$Rn = $Rd" in
-multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode,
- string baseOpc> {
+multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
@@ -2816,9 +2821,6 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
let Inst{15-12} = Rd;
}
-def : ARMInstAlias<"movs${p} $Rd, $Rm",
- (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
-
// A version for the smaller set of tail call registers.
let neverHasSideEffects = 1 in
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
@@ -3029,10 +3031,10 @@ def UBFX : I<(outs GPR:$Rd),
defm ADD : AsI1_bin_irs<0b0100, "add",
IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(add node:$LHS, node:$RHS)>, "ADD", 1>;
+ BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
defm SUB : AsI1_bin_irs<0b0010, "sub",
IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">;
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// ADD and SUB with 's' bit set.
//
@@ -3050,15 +3052,13 @@ defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
- BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>,
- "ADC", 1>;
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
- BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
- "SBC">;
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
-defm RSB : AsI1_rbin_irs <0b0011, "rsb",
- IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(sub node:$LHS, node:$RHS)>, "RSB">;
+defm RSB : AsI1_rbin_irs<0b0011, "rsb",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// FIXME: Eliminate them if we can write def : Pat patterns which defines
// CPSR and the implicit def of CPSR is not needed.
@@ -3066,8 +3066,7 @@ defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm RSC : AI1_rsc_irs<0b0111, "rsc",
- BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
- "RSC">;
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
// The assume-no-carry-in form uses the negation of the input since add/sub
@@ -3276,16 +3275,16 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos),
defm AND : AsI1_bin_irs<0b0000, "and",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>;
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
defm ORR : AsI1_bin_irs<0b1100, "orr",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(or node:$LHS, node:$RHS)>, "ORR", 1>;
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm EOR : AsI1_bin_irs<0b0001, "eor",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>;
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
defm BIC : AsI1_bin_irs<0b1110, "bic",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">;
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
// FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just
// like in the actual instruction encoding. The complexity of mapping the mask
@@ -3940,7 +3939,7 @@ def BCCZi64 : PseudoInst<(outs),
// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
-let isCommutable = 1 in
+let isCommutable = 1, isSelect = 1 in
def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
4, IIC_iCMOVr,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
@@ -3993,25 +3992,29 @@ multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi,
InstrItinClass iii, InstrItinClass iir,
InstrItinClass iis> {
def ri : ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s),
+ (ins GPR:$Rfalse, GPR:$Rn, so_imm:$imm,
+ pred:$p, cc_out:$s),
4, iii, [],
(iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
def rr : ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ (ins GPR:$Rfalse, GPR:$Rn, GPR:$Rm,
+ pred:$p, cc_out:$s),
4, iir, [],
(irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
def rsi : ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s),
+ (ins GPR:$Rfalse, GPR:$Rn, so_reg_imm:$shift,
+ pred:$p, cc_out:$s),
4, iis, [],
(irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd),
- (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s),
+ (ins GPRnopc:$Rfalse, GPRnopc:$Rn, so_reg_reg:$shift,
+ pred:$p, cc_out:$s),
4, iis, [],
(irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
}
defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr,
@@ -4020,6 +4023,10 @@ defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr,
IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr,
IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm ADDCC : AsI1_bincc_irs<ADDri, ADDrr, ADDrsi, ADDrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm SUBCC : AsI1_bincc_irs<SUBri, SUBrr, SUBrsi, SUBrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
} // neverHasSideEffects
@@ -4068,11 +4075,8 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
// Pseudo instruction that combines movs + predicated rsbmi
// to implement integer ABS
-let usesCustomInserter = 1, Defs = [CPSR] in {
-def ABS : ARMPseudoInst<
- (outs GPR:$dst), (ins GPR:$src),
- 8, NoItinerary, []>;
-}
+let usesCustomInserter = 1, Defs = [CPSR] in
+def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
let usesCustomInserter = 1 in {
let Defs = [CPSR] in {
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 3134088..048d340 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -398,6 +398,27 @@ def VecListFourQWordIndexed : Operand<i32> {
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
+def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() == 2;
+}]>;
+def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() == 2;
+}]>;
+def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() == 1;
+}]>;
+def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() == 1;
+}]>;
+def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
//===----------------------------------------------------------------------===//
// NEON-specific DAG Nodes.
@@ -2238,6 +2259,19 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+// Use vld1/vst1 for unaligned f64 load / store
+def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
+ (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
+ (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
+ (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
+ (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
+ (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
+def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
+ (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
//===----------------------------------------------------------------------===//
// NEON pattern fragments
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index d83530a..8ecf009 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -172,6 +172,7 @@ def t2ldr_pcrel_imm12 : Operand<i32> {
// ADR instruction labels.
def t2adrlabel : Operand<i32> {
let EncoderMethod = "getT2AdrLabelOpValue";
+ let PrintMethod = "printAdrLabelOperand";
}
@@ -529,7 +530,7 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
/// changed to modify CPSR.
multiclass T2I_bin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0,
+ PatFrag opnode, bit Commutable = 0,
string wide = ""> {
// shifted imm
def ri : T2sTwoRegImm<
@@ -565,15 +566,15 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
// Assembly aliases for optional destination operand when it's the same
// as the source operand.
def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn,
t2_so_imm:$imm, pred:$p,
cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn,
rGPR:$Rm, pred:$p,
cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn,
t2_so_reg:$shift, pred:$p,
cc_out:$s)>;
}
@@ -582,36 +583,30 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
// the ".w" suffix to indicate that they are wide.
multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0> :
- T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> {
+ PatFrag opnode, bit Commutable = 0> :
+ T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w"> {
// Assembler aliases w/ the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
- t2_so_imm:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
// Assembler aliases w/o the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rd, rGPR:$Rn,
- t2_so_reg:$shift, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rd, rGPR:$Rn, t2_so_reg:$shift,
+ pred:$p, cc_out:$s)>;
// and with the optional destination operand, too.
def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
- t2_so_imm:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm,
+ pred:$p, cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
- t2_so_reg:$shift, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift,
+ pred:$p, cc_out:$s)>;
}
/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
@@ -762,6 +757,33 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{24} = 1;
let Inst{23-21} = op23_21;
}
+
+ // Predicated versions.
+ def CCri : t2PseudoExpand<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_imm:$imm,
+ pred:$p, cc_out:$s), 4, IIC_iALUi, [],
+ (!cast<Instruction>(NAME#ri) GPRnopc:$Rd,
+ GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rfalse = $Rd">;
+ def CCri12 : t2PseudoExpand<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rfalse, GPR:$Rn, imm0_4095:$imm,
+ pred:$p),
+ 4, IIC_iALUi, [],
+ (!cast<Instruction>(NAME#ri12) GPRnopc:$Rd,
+ GPR:$Rn, imm0_4095:$imm, pred:$p)>,
+ RegConstraint<"$Rfalse = $Rd">;
+ def CCrr : t2PseudoExpand<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rfalse, GPRnopc:$Rn, rGPR:$Rm,
+ pred:$p, cc_out:$s), 4, IIC_iALUr, [],
+ (!cast<Instruction>(NAME#rr) GPRnopc:$Rd,
+ GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rfalse = $Rd">;
+ def CCrs : t2PseudoExpand<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_reg:$Rm,
+ pred:$p, cc_out:$s), 4, IIC_iALUsi, [],
+ (!cast<Instruction>(NAME#rs) GPRnopc:$Rd,
+ GPRnopc:$Rn, t2_so_reg:$Rm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rfalse = $Rd">;
}
/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
@@ -808,8 +830,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
// rotate operation that produces a value.
-multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
- string baseOpc> {
+multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> {
// 5-bit imm
def ri : T2sTwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi,
@@ -834,33 +855,27 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
// Optional destination register
def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
- ty:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
// Assembler aliases w/o the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
- ty:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, ty:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
// and with the optional destination operand, too.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
- ty:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
}
/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
@@ -868,7 +883,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
/// a explicit result, only implicitly set CPSR.
multiclass T2I_cmp_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc> {
+ PatFrag opnode> {
let isCompare = 1, Defs = [CPSR] in {
// shifted imm
def ri : T2OneRegCmpImm<
@@ -913,12 +928,9 @@ let isCompare = 1, Defs = [CPSR] in {
// No alias here for 'rr' version as not all instantiations of this
// multiclass want one (CMP in particular, does not).
def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPRnopc:$Rn,
- t2_so_imm:$imm, pred:$p)>;
+ (!cast<Instruction>(NAME#"ri") GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPRnopc:$Rn,
- t2_so_reg:$shift,
- pred:$p)>;
+ (!cast<Instruction>(NAME#"rs") GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
}
/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
@@ -2152,13 +2164,13 @@ def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
//
defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31,
- BinOpFrag<(shl node:$LHS, node:$RHS)>, "t2LSL">;
+ BinOpFrag<(shl node:$LHS, node:$RHS)>>;
defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr,
- BinOpFrag<(srl node:$LHS, node:$RHS)>, "t2LSR">;
+ BinOpFrag<(srl node:$LHS, node:$RHS)>>;
defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr,
- BinOpFrag<(sra node:$LHS, node:$RHS)>, "t2ASR">;
+ BinOpFrag<(sra node:$LHS, node:$RHS)>>;
defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31,
- BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">;
+ BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
@@ -2214,18 +2226,17 @@ def t2MOVsra_flag : T2TwoRegShiftImm<
defm t2AND : T2I_bin_w_irs<0b0000, "and",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>;
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
defm t2ORR : T2I_bin_w_irs<0b0010, "orr",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(or node:$LHS, node:$RHS)>, "t2ORR", 1>;
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>;
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(and node:$LHS, (not node:$RHS))>,
- "t2BIC">;
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
class T2BitFI<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -2305,8 +2316,7 @@ let Constraints = "$src = $Rd" in {
defm t2ORN : T2I_bin_irs<0b0011, "orn",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(or node:$LHS, (not node:$RHS))>,
- "t2ORN", 0, "">;
+ BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">;
/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
/// unary operation that produces a value. These are predicable and can be
@@ -2878,7 +2888,7 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
//
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
- BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>, "t2CMP">;
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm),
(t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>;
@@ -2932,13 +2942,10 @@ let isCompare = 1, Defs = [CPSR] in {
// Assembler aliases w/o the ".w" suffix.
// No alias here for 'rr' version as not all instantiations of this multiclass
// want one (CMP in particular, does not).
-def : t2InstAlias<!strconcat("cmn", "${p}", " $Rn, $imm"),
- (!cast<Instruction>(!strconcat("t2CMN", "ri")) GPRnopc:$Rn,
- t2_so_imm:$imm, pred:$p)>;
-def : t2InstAlias<!strconcat("cmn", "${p}", " $Rn, $shift"),
- (!cast<Instruction>(!strconcat("t2CMNz", "rs")) GPRnopc:$Rn,
- t2_so_reg:$shift,
- pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rn, $imm",
+ (t2CMNri GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rn, $shift",
+ (t2CMNzrs GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
(t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
@@ -2948,19 +2955,17 @@ def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
defm t2TST : T2I_cmp_irs<0b0000, "tst",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>,
- "t2TST">;
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>,
- "t2TEQ">;
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
-let isCommutable = 1 in
+let isCommutable = 1, isSelect = 1 in
def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
(ins rGPR:$false, rGPR:$Rm, pred:$p),
4, IIC_iCMOVr,
@@ -3048,22 +3053,25 @@ multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
// shifted imm
def ri : t2PseudoExpand<(outs rGPR:$Rd),
- (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s),
+ (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_imm:$imm,
+ pred:$p, cc_out:$s),
4, iii, [],
(iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
// register
def rr : t2PseudoExpand<(outs rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s),
+ (ins rGPR:$Rfalse, rGPR:$Rn, rGPR:$Rm,
+ pred:$p, cc_out:$s),
4, iir, [],
(irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
// shifted register
def rs : t2PseudoExpand<(outs rGPR:$Rd),
- (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s),
+ (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s),
4, iis, [],
(irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
} // T2I_bincc_irs
defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs,
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 23c132e..7d6692f 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -61,6 +61,15 @@ def vfp_f64imm : Operand<f64>,
let ParserMatchClass = FPImmOperand;
}
+def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def alignedstore32 : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
// The VCVT to/from fixed-point instructions encode the 'fbits' operand
// (the number of fixed bits) differently than it appears in the assembly
// source. It's encoded as "Size - fbits" where Size is the size of the
@@ -86,7 +95,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
IIC_fpLoad64, "vldr", "\t$Dd, $addr",
- [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
+ [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;
def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
IIC_fpLoad32, "vldr", "\t$Sd, $addr",
@@ -100,7 +109,7 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
IIC_fpStore64, "vstr", "\t$Dd, $addr",
- [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
+ [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;
def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
IIC_fpStore32, "vstr", "\t$Sd, $addr",
@@ -433,25 +442,25 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
// Between half-precision and single-precision. For disassembly only.
// FIXME: Verify encoding after integrated assembler is working.
-def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
-def : ARMPat<(f32_to_f16 SPR:$a),
- (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-
-def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
-def : ARMPat<(f16_to_f32 GPR:$a),
- (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+def : Pat<(f32_to_f16 SPR:$a),
+ (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
+def : Pat<(f16_to_f32 GPR:$a),
+ (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
-def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index c5db211..357fc3f 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -291,9 +291,9 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
ResultPtr = ResultPtr >> 2;
*((intptr_t*)RelocPos) |= ResultPtr;
- // Set register Rn to PC.
- *((intptr_t*)RelocPos) |=
- getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ // Set register Rn to PC (which is register 15 on all architectures).
+ // FIXME: This avoids the need for register info in the JIT class.
+ *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift;
break;
}
case ARM::reloc_arm_so_imm_cp_entry: {
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index cb1b2a2..897ceb6 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -456,8 +456,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
DebugLoc dl = Loc->getDebugLoc();
const MachineOperand &PMO = Loc->getOperand(0);
unsigned PReg = PMO.getReg();
- unsigned PRegNum = PMO.isUndef() ? UINT_MAX
- : getARMRegisterNumbering(PReg);
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
unsigned Count = 1;
unsigned Limit = ~0U;
@@ -483,8 +482,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
int NewOffset = MemOps[i].Offset;
const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
unsigned Reg = MO.getReg();
- unsigned RegNum = MO.isUndef() ? UINT_MAX
- : getARMRegisterNumbering(Reg);
+ unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
// Register numbers must be in ascending order. For VFP / NEON load and
// store multiples, the registers must also be consecutive and within the
// limit on the number of registers per instruction.
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 3857647..6f974fd 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -12,16 +12,16 @@
//===----------------------------------------------------------------------===//
// Registers are identified with 4-bit ID numbers.
-class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
- field bits<4> Num;
+class ARMReg<bits<16> Enc, string n, list<Register> subregs = []> : Register<n> {
+ let HWEncoding = Enc;
let Namespace = "ARM";
let SubRegs = subregs;
// All bits of ARM registers with sub-registers are covered by sub-registers.
let CoveredBySubRegs = 1;
}
-class ARMFReg<bits<6> num, string n> : Register<n> {
- field bits<6> Num;
+class ARMFReg<bits<16> Enc, string n> : Register<n> {
+ let HWEncoding = Enc;
let Namespace = "ARM";
}
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 56197d4..2c63825 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -1069,6 +1069,7 @@ def CortexA8Model : SchedMachineModel {
let LoadLatency = 2; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
// Itineraries are queried instead.
+ let MispredictPenalty = 13; // Based on estimate of pipeline depth.
let Itineraries = CortexA8Itineraries;
}
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 738974e..7bc590f 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1886,6 +1886,7 @@ def CortexA9Model : SchedMachineModel {
let LoadLatency = 2; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
// Itineraries are queried instead.
+ let MispredictPenalty = 8; // Based on estimate of pipeline depth.
let Itineraries = CortexA9Itineraries;
}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index e067a9f..89e29ad 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -97,6 +97,9 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
if (!HasV6T2Ops && hasThumb2())
HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
+ // Keep a pointer to static instruction cost data for the specified CPU.
+ SchedModel = getSchedModelForCPU(CPUString);
+
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
@@ -179,15 +182,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
}
unsigned ARMSubtarget::getMispredictionPenalty() const {
- // If we have a reasonable estimate of the pipeline depth, then we can
- // estimate the penalty of a misprediction based on that.
- if (isCortexA8())
- return 13;
- else if (isCortexA9())
- return 8;
-
- // Otherwise, just return a sensible default.
- return 10;
+ return SchedModel->MispredictPenalty;
}
bool ARMSubtarget::enablePostRAScheduler(
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e72b06f..b394061 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -74,7 +74,7 @@ protected:
/// HasThumb2 - True if Thumb2 instructions are supported.
bool HasThumb2;
- /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
+ /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
/// v6m, v7m for example.
bool IsMClass;
@@ -155,6 +155,9 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
+ /// SchedModel - Processor specific instruction costs.
+ const MCSchedModel *SchedModel;
+
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 4497720..3a5957b 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -796,6 +796,13 @@ public:
int64_t Value = CE->getValue();
return Value > 0 && Value <= 32;
}
+ bool isAdrLabel() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, but it can't fit
+ // into shift immediate encoding, we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm())) return true;
+ else return (isARMSOImm() || isARMSOImmNeg());
+ }
bool isARMSOImm() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -1033,7 +1040,8 @@ public:
// Immediate offset a multiple of 4 in range [-1020, 1020].
if (!Memory.OffsetImm) return true;
int64_t Val = Memory.OffsetImm->getValue();
- return Val >= -1020 && Val <= 1020 && (Val & 3) == 0;
+ // Special case, #-0 is INT32_MIN.
+ return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN;
}
bool isMemImm0_1020s4Offset() const {
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -1644,6 +1652,22 @@ public:
Inst.addOperand(MCOperand::CreateImm(Imm));
}
+ void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ assert(isImm() && "Not an immediate!");
+
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup.
+ if (!isa<MCConstantExpr>(getImm())) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ return;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
@@ -2884,7 +2908,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!RC->contains(EndReg))
return Error(EndLoc, "invalid register in register list");
// Ranges must go from low to high.
- if (getARMRegisterNumbering(Reg) > getARMRegisterNumbering(EndReg))
+ if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
return Error(EndLoc, "bad range in register list");
// Add all the registers in the range to the register list.
@@ -2911,13 +2935,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!RC->contains(Reg))
return Error(RegLoc, "invalid register in register list");
// List must be monotonically increasing.
- if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) {
+ if (MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
Warning(RegLoc, "register list not in ascending order");
else
return Error(RegLoc, "register list not in ascending order");
}
- if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
+ if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) {
Warning(RegLoc, "duplicated register (" + RegTok.getString() +
") in register list");
continue;
@@ -3256,29 +3280,59 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- if (!Tok.is(AsmToken::Identifier))
- return MatchOperand_NoMatch;
- StringRef OptStr = Tok.getString();
-
- unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
- .Case("sy", ARM_MB::SY)
- .Case("st", ARM_MB::ST)
- .Case("sh", ARM_MB::ISH)
- .Case("ish", ARM_MB::ISH)
- .Case("shst", ARM_MB::ISHST)
- .Case("ishst", ARM_MB::ISHST)
- .Case("nsh", ARM_MB::NSH)
- .Case("un", ARM_MB::NSH)
- .Case("nshst", ARM_MB::NSHST)
- .Case("unst", ARM_MB::NSHST)
- .Case("osh", ARM_MB::OSH)
- .Case("oshst", ARM_MB::OSHST)
- .Default(~0U);
+ unsigned Opt;
+
+ if (Tok.is(AsmToken::Identifier)) {
+ StringRef OptStr = Tok.getString();
+
+ Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
+ .Case("sy", ARM_MB::SY)
+ .Case("st", ARM_MB::ST)
+ .Case("sh", ARM_MB::ISH)
+ .Case("ish", ARM_MB::ISH)
+ .Case("shst", ARM_MB::ISHST)
+ .Case("ishst", ARM_MB::ISHST)
+ .Case("nsh", ARM_MB::NSH)
+ .Case("un", ARM_MB::NSH)
+ .Case("nshst", ARM_MB::NSHST)
+ .Case("unst", ARM_MB::NSHST)
+ .Case("osh", ARM_MB::OSH)
+ .Case("oshst", ARM_MB::OSHST)
+ .Default(~0U);
- if (Opt == ~0U)
- return MatchOperand_NoMatch;
+ if (Opt == ~0U)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ } else if (Tok.is(AsmToken::Hash) ||
+ Tok.is(AsmToken::Dollar) ||
+ Tok.is(AsmToken::Integer)) {
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ Parser.Lex(); // Eat the '#'.
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ const MCExpr *MemBarrierID;
+ if (getParser().ParseExpression(MemBarrierID)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(MemBarrierID);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+
+ int Val = CE->getValue();
+ if (Val & ~0xf) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Opt = ARM_MB::RESERVED_0 + Val;
+ } else
+ return MatchOperand_ParseFail;
- Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
return MatchOperand_Success;
}
@@ -5250,8 +5304,8 @@ validateInstruction(MCInst &Inst,
case ARM::LDRD_POST:
case ARM::LDREXD: {
// Rt2 must be Rt + 1.
- unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
- unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"destination operands must be sequential");
@@ -5259,8 +5313,8 @@ validateInstruction(MCInst &Inst,
}
case ARM::STRD: {
// Rt2 must be Rt + 1.
- unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
- unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"source operands must be sequential");
@@ -5270,8 +5324,8 @@ validateInstruction(MCInst &Inst,
case ARM::STRD_POST:
case ARM::STREXD: {
// Rt2 must be Rt + 1.
- unsigned Rt = getARMRegisterNumbering(Inst.getOperand(1).getReg());
- unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(2).getReg());
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"source operands must be sequential");
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 47cca2a..c90751d 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -18,10 +18,12 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
@@ -383,7 +385,6 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
#include "ARMGenDisassemblerTables.inc"
-#include "ARMGenInstrInfo.inc"
#include "ARMGenEDInfo.inc"
static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
@@ -427,7 +428,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
(bytes[0] << 0);
// Calling the auto-generated decoder function.
- DecodeStatus result = decodeARMInstruction32(MI, insn, Address, this, STI);
+ DecodeStatus result = decodeInstruction(DecoderTableARM32, MI, insn,
+ Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
return result;
@@ -436,14 +438,15 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// VFP and NEON instructions, similarly, are shared between ARM
// and Thumb modes.
MI.clear();
- result = decodeVFPInstruction32(MI, insn, Address, this, STI);
+ result = decodeInstruction(DecoderTableVFP32, MI, insn, Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
return result;
}
MI.clear();
- result = decodeNEONDataInstruction32(MI, insn, Address, this, STI);
+ result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address,
+ this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
// Add a fake predicate operand, because we share these instruction
@@ -454,7 +457,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeNEONLoadStoreInstruction32(MI, insn, Address, this, STI);
+ result = decodeInstruction(DecoderTableNEONLoadStore32, MI, insn, Address,
+ this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
// Add a fake predicate operand, because we share these instruction
@@ -465,7 +469,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeNEONDupInstruction32(MI, insn, Address, this, STI);
+ result = decodeInstruction(DecoderTableNEONDup32, MI, insn, Address,
+ this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
// Add a fake predicate operand, because we share these instruction
@@ -765,7 +770,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
uint16_t insn16 = (bytes[1] << 8) | bytes[0];
- DecodeStatus result = decodeThumbInstruction16(MI, insn16, Address, this, STI);
+ DecodeStatus result = decodeInstruction(DecoderTableThumb16, MI, insn16,
+ Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 2;
Check(result, AddThumbPredicate(MI));
@@ -773,7 +779,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI);
+ result = decodeInstruction(DecoderTableThumbSBit16, MI, insn16,
+ Address, this, STI);
if (result) {
Size = 2;
bool InITBlock = ITBlock.instrInITBlock();
@@ -783,7 +790,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeThumb2Instruction16(MI, insn16, Address, this, STI);
+ result = decodeInstruction(DecoderTableThumb216, MI, insn16,
+ Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 2;
@@ -818,7 +826,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
(bytes[1] << 24) |
(bytes[0] << 16);
MI.clear();
- result = decodeThumbInstruction32(MI, insn32, Address, this, STI);
+ result = decodeInstruction(DecoderTableThumb32, MI, insn32, Address,
+ this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
bool InITBlock = ITBlock.instrInITBlock();
@@ -828,7 +837,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeThumb2Instruction32(MI, insn32, Address, this, STI);
+ result = decodeInstruction(DecoderTableThumb232, MI, insn32, Address,
+ this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
Check(result, AddThumbPredicate(MI));
@@ -836,7 +846,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeVFPInstruction32(MI, insn32, Address, this, STI);
+ result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
UpdateThumbVFPPredicate(MI);
@@ -844,19 +854,21 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeNEONDupInstruction32(MI, insn32, Address, this, STI);
+ result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address,
+ this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
Check(result, AddThumbPredicate(MI));
return result;
}
- if (fieldFromInstruction32(insn32, 24, 8) == 0xF9) {
+ if (fieldFromInstruction(insn32, 24, 8) == 0xF9) {
MI.clear();
uint32_t NEONLdStInsn = insn32;
NEONLdStInsn &= 0xF0FFFFFF;
NEONLdStInsn |= 0x04000000;
- result = decodeNEONLoadStoreInstruction32(MI, NEONLdStInsn, Address, this, STI);
+ result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn,
+ Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
Check(result, AddThumbPredicate(MI));
@@ -864,13 +876,14 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- if (fieldFromInstruction32(insn32, 24, 4) == 0xF) {
+ if (fieldFromInstruction(insn32, 24, 4) == 0xF) {
MI.clear();
uint32_t NEONDataInsn = insn32;
NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
NEONDataInsn |= 0x12000000; // Set bits 28 and 25
- result = decodeNEONDataInstruction32(MI, NEONDataInsn, Address, this, STI);
+ result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn,
+ Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
Check(result, AddThumbPredicate(MI));
@@ -1117,9 +1130,9 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rm = fieldFromInstruction32(Val, 0, 4);
- unsigned type = fieldFromInstruction32(Val, 5, 2);
- unsigned imm = fieldFromInstruction32(Val, 7, 5);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned type = fieldFromInstruction(Val, 5, 2);
+ unsigned imm = fieldFromInstruction(Val, 7, 5);
// Register-immediate
if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
@@ -1154,9 +1167,9 @@ static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rm = fieldFromInstruction32(Val, 0, 4);
- unsigned type = fieldFromInstruction32(Val, 5, 2);
- unsigned Rs = fieldFromInstruction32(Val, 8, 4);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned type = fieldFromInstruction(Val, 5, 2);
+ unsigned Rs = fieldFromInstruction(Val, 8, 4);
// Register-register
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
@@ -1224,8 +1237,8 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Vd = fieldFromInstruction32(Val, 8, 5);
- unsigned regs = fieldFromInstruction32(Val, 0, 8);
+ unsigned Vd = fieldFromInstruction(Val, 8, 5);
+ unsigned regs = fieldFromInstruction(Val, 0, 8);
if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -1241,8 +1254,8 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Vd = fieldFromInstruction32(Val, 8, 5);
- unsigned regs = fieldFromInstruction32(Val, 0, 8);
+ unsigned Vd = fieldFromInstruction(Val, 8, 5);
+ unsigned regs = fieldFromInstruction(Val, 0, 8);
regs = regs >> 1;
@@ -1263,8 +1276,8 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
// the mask of all bits LSB-and-lower, and then xor them to create
// the mask of that's all ones on [msb, lsb]. Finally we not it to
// create the final mask.
- unsigned msb = fieldFromInstruction32(Val, 5, 5);
- unsigned lsb = fieldFromInstruction32(Val, 0, 5);
+ unsigned msb = fieldFromInstruction(Val, 5, 5);
+ unsigned lsb = fieldFromInstruction(Val, 0, 5);
DecodeStatus S = MCDisassembler::Success;
if (lsb > msb) Check(S, MCDisassembler::SoftFail);
@@ -1281,12 +1294,12 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- unsigned CRd = fieldFromInstruction32(Insn, 12, 4);
- unsigned coproc = fieldFromInstruction32(Insn, 8, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 8);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned U = fieldFromInstruction32(Insn, 23, 1);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned CRd = fieldFromInstruction(Insn, 12, 4);
+ unsigned coproc = fieldFromInstruction(Insn, 8, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
switch (Inst.getOpcode()) {
case ARM::LDC_OFFSET:
@@ -1426,14 +1439,14 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 12);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- unsigned reg = fieldFromInstruction32(Insn, 25, 1);
- unsigned P = fieldFromInstruction32(Insn, 24, 1);
- unsigned W = fieldFromInstruction32(Insn, 21, 1);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned reg = fieldFromInstruction(Insn, 25, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
// On stores, the writeback operand precedes Rt.
switch (Inst.getOpcode()) {
@@ -1476,7 +1489,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
ARM_AM::AddrOpc Op = ARM_AM::add;
- if (!fieldFromInstruction32(Insn, 23, 1))
+ if (!fieldFromInstruction(Insn, 23, 1))
Op = ARM_AM::sub;
bool writeback = (P == 0) || (W == 1);
@@ -1493,7 +1506,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
ARM_AM::ShiftOpc Opc = ARM_AM::lsl;
- switch( fieldFromInstruction32(Insn, 5, 2)) {
+ switch( fieldFromInstruction(Insn, 5, 2)) {
case 0:
Opc = ARM_AM::lsl;
break;
@@ -1509,7 +1522,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
}
- unsigned amt = fieldFromInstruction32(Insn, 7, 5);
+ unsigned amt = fieldFromInstruction(Insn, 7, 5);
unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode);
Inst.addOperand(MCOperand::CreateImm(imm));
@@ -1529,11 +1542,11 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 13, 4);
- unsigned Rm = fieldFromInstruction32(Val, 0, 4);
- unsigned type = fieldFromInstruction32(Val, 5, 2);
- unsigned imm = fieldFromInstruction32(Val, 7, 5);
- unsigned U = fieldFromInstruction32(Val, 12, 1);
+ unsigned Rn = fieldFromInstruction(Val, 13, 4);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned type = fieldFromInstruction(Val, 5, 2);
+ unsigned imm = fieldFromInstruction(Val, 7, 5);
+ unsigned U = fieldFromInstruction(Val, 12, 1);
ARM_AM::ShiftOpc ShOp = ARM_AM::lsl;
switch (type) {
@@ -1570,15 +1583,15 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned type = fieldFromInstruction32(Insn, 22, 1);
- unsigned imm = fieldFromInstruction32(Insn, 8, 4);
- unsigned U = ((~fieldFromInstruction32(Insn, 23, 1)) & 1) << 8;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- unsigned W = fieldFromInstruction32(Insn, 21, 1);
- unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned type = fieldFromInstruction(Insn, 22, 1);
+ unsigned imm = fieldFromInstruction(Insn, 8, 4);
+ unsigned U = ((~fieldFromInstruction(Insn, 23, 1)) & 1) << 8;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
unsigned Rt2 = Rt + 1;
bool writeback = (W == 1) | (P == 0);
@@ -1609,7 +1622,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
S = MCDisassembler::SoftFail;
if (Rt2 == 15)
S = MCDisassembler::SoftFail;
- if (!type && fieldFromInstruction32(Insn, 8, 4))
+ if (!type && fieldFromInstruction(Insn, 8, 4))
S = MCDisassembler::SoftFail;
break;
case ARM::STRH:
@@ -1761,8 +1774,8 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned mode = fieldFromInstruction32(Insn, 23, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned mode = fieldFromInstruction(Insn, 23, 2);
switch (mode) {
case 0:
@@ -1791,9 +1804,9 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- unsigned reglist = fieldFromInstruction32(Insn, 0, 16);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned reglist = fieldFromInstruction(Insn, 0, 16);
if (pred == 0xF) {
switch (Inst.getOpcode()) {
@@ -1850,9 +1863,9 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
}
// For stores (which become SRS's, the only operand is the mode.
- if (fieldFromInstruction32(Insn, 20, 1) == 0) {
+ if (fieldFromInstruction(Insn, 20, 1) == 0) {
Inst.addOperand(
- MCOperand::CreateImm(fieldFromInstruction32(Insn, 0, 4)));
+ MCOperand::CreateImm(fieldFromInstruction(Insn, 0, 4)));
return S;
}
@@ -1873,10 +1886,10 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- unsigned imod = fieldFromInstruction32(Insn, 18, 2);
- unsigned M = fieldFromInstruction32(Insn, 17, 1);
- unsigned iflags = fieldFromInstruction32(Insn, 6, 3);
- unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+ unsigned imod = fieldFromInstruction(Insn, 18, 2);
+ unsigned M = fieldFromInstruction(Insn, 17, 1);
+ unsigned iflags = fieldFromInstruction(Insn, 6, 3);
+ unsigned mode = fieldFromInstruction(Insn, 0, 5);
DecodeStatus S = MCDisassembler::Success;
@@ -1913,10 +1926,10 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- unsigned imod = fieldFromInstruction32(Insn, 9, 2);
- unsigned M = fieldFromInstruction32(Insn, 8, 1);
- unsigned iflags = fieldFromInstruction32(Insn, 5, 3);
- unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+ unsigned imod = fieldFromInstruction(Insn, 9, 2);
+ unsigned M = fieldFromInstruction(Insn, 8, 1);
+ unsigned iflags = fieldFromInstruction(Insn, 5, 3);
+ unsigned mode = fieldFromInstruction(Insn, 0, 5);
DecodeStatus S = MCDisassembler::Success;
@@ -1955,13 +1968,13 @@ static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 8, 4);
unsigned imm = 0;
- imm |= (fieldFromInstruction32(Insn, 0, 8) << 0);
- imm |= (fieldFromInstruction32(Insn, 12, 3) << 8);
- imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
- imm |= (fieldFromInstruction32(Insn, 26, 1) << 11);
+ imm |= (fieldFromInstruction(Insn, 0, 8) << 0);
+ imm |= (fieldFromInstruction(Insn, 12, 3) << 8);
+ imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
+ imm |= (fieldFromInstruction(Insn, 26, 1) << 11);
if (Inst.getOpcode() == ARM::t2MOVTi16)
if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -1979,12 +1992,12 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
unsigned imm = 0;
- imm |= (fieldFromInstruction32(Insn, 0, 12) << 0);
- imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
+ imm |= (fieldFromInstruction(Insn, 0, 12) << 0);
+ imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
if (Inst.getOpcode() == ARM::MOVTi16)
if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -2005,11 +2018,11 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 8, 4);
- unsigned Ra = fieldFromInstruction32(Insn, 12, 4);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 8, 4);
+ unsigned Ra = fieldFromInstruction(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if (pred == 0xF)
return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
@@ -2033,9 +2046,9 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned add = fieldFromInstruction32(Val, 12, 1);
- unsigned imm = fieldFromInstruction32(Val, 0, 12);
- unsigned Rn = fieldFromInstruction32(Val, 13, 4);
+ unsigned add = fieldFromInstruction(Val, 12, 1);
+ unsigned imm = fieldFromInstruction(Val, 0, 12);
+ unsigned Rn = fieldFromInstruction(Val, 13, 4);
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2053,9 +2066,9 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 9, 4);
- unsigned U = fieldFromInstruction32(Val, 8, 1);
- unsigned imm = fieldFromInstruction32(Val, 0, 8);
+ unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ unsigned U = fieldFromInstruction(Val, 8, 1);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2077,11 +2090,11 @@ static DecodeStatus
DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) |
- (fieldFromInstruction32(Insn, 11, 1) << 18) |
- (fieldFromInstruction32(Insn, 13, 1) << 17) |
- (fieldFromInstruction32(Insn, 16, 6) << 11) |
- (fieldFromInstruction32(Insn, 26, 1) << 19);
+ unsigned imm = (fieldFromInstruction(Insn, 0, 11) << 0) |
+ (fieldFromInstruction(Insn, 11, 1) << 18) |
+ (fieldFromInstruction(Insn, 13, 1) << 17) |
+ (fieldFromInstruction(Insn, 16, 6) << 11) |
+ (fieldFromInstruction(Insn, 26, 1) << 19);
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1)));
@@ -2093,12 +2106,12 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 24) << 2;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 24) << 2;
if (pred == 0xF) {
Inst.setOpcode(ARM::BLXi);
- imm |= fieldFromInstruction32(Insn, 24, 1) << 1;
+ imm |= fieldFromInstruction(Insn, 24, 1) << 1;
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
@@ -2119,8 +2132,8 @@ static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rm = fieldFromInstruction32(Val, 0, 4);
- unsigned align = fieldFromInstruction32(Val, 4, 2);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned align = fieldFromInstruction(Val, 4, 2);
if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2136,12 +2149,12 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned wb = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ Rn |= fieldFromInstruction(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
// First output register
switch (Inst.getOpcode()) {
@@ -2410,12 +2423,12 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned wb = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ Rn |= fieldFromInstruction(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
// Writeback Operand
switch (Inst.getOpcode()) {
@@ -2681,12 +2694,12 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned align = fieldFromInstruction32(Insn, 4, 1);
- unsigned size = fieldFromInstruction32(Insn, 6, 2);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 1);
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
align *= (1 << size);
@@ -2726,12 +2739,12 @@ static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned align = fieldFromInstruction32(Insn, 4, 1);
- unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 1);
+ unsigned size = 1 << fieldFromInstruction(Insn, 6, 2);
align *= 2*size;
switch (Inst.getOpcode()) {
@@ -2774,11 +2787,11 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1;
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2809,13 +2822,13 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned size = fieldFromInstruction32(Insn, 6, 2);
- unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
- unsigned align = fieldFromInstruction32(Insn, 4, 1);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+ unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1;
+ unsigned align = fieldFromInstruction(Insn, 4, 1);
if (size == 0x3) {
size = 4;
@@ -2862,14 +2875,14 @@ DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned imm = fieldFromInstruction32(Insn, 0, 4);
- imm |= fieldFromInstruction32(Insn, 16, 3) << 4;
- imm |= fieldFromInstruction32(Insn, 24, 1) << 7;
- imm |= fieldFromInstruction32(Insn, 8, 4) << 8;
- imm |= fieldFromInstruction32(Insn, 5, 1) << 12;
- unsigned Q = fieldFromInstruction32(Insn, 6, 1);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned imm = fieldFromInstruction(Insn, 0, 4);
+ imm |= fieldFromInstruction(Insn, 16, 3) << 4;
+ imm |= fieldFromInstruction(Insn, 24, 1) << 7;
+ imm |= fieldFromInstruction(Insn, 8, 4) << 8;
+ imm |= fieldFromInstruction(Insn, 5, 1) << 12;
+ unsigned Q = fieldFromInstruction(Insn, 6, 1);
if (Q) {
if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -2907,11 +2920,11 @@ static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 18, 2);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ Rm |= fieldFromInstruction(Insn, 5, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 18, 2);
if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2950,13 +2963,13 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- Rn |= fieldFromInstruction32(Insn, 7, 1) << 4;
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
- unsigned op = fieldFromInstruction32(Insn, 6, 1);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ Rn |= fieldFromInstruction(Insn, 7, 1) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ Rm |= fieldFromInstruction(Insn, 5, 1) << 4;
+ unsigned op = fieldFromInstruction(Insn, 6, 1);
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2986,8 +2999,8 @@ static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned dst = fieldFromInstruction16(Insn, 8, 3);
- unsigned imm = fieldFromInstruction16(Insn, 0, 8);
+ unsigned dst = fieldFromInstruction(Insn, 8, 3);
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
if (!Check(S, DecodetGPRRegisterClass(Inst, dst, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3034,8 +3047,8 @@ static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 0, 3);
- unsigned Rm = fieldFromInstruction32(Val, 3, 3);
+ unsigned Rn = fieldFromInstruction(Val, 0, 3);
+ unsigned Rm = fieldFromInstruction(Val, 3, 3);
if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3049,8 +3062,8 @@ static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 0, 3);
- unsigned imm = fieldFromInstruction32(Val, 3, 5);
+ unsigned Rn = fieldFromInstruction(Val, 0, 3);
+ unsigned imm = fieldFromInstruction(Val, 3, 5);
if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3081,9 +3094,9 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 6, 4);
- unsigned Rm = fieldFromInstruction32(Val, 2, 4);
- unsigned imm = fieldFromInstruction32(Val, 0, 2);
+ unsigned Rn = fieldFromInstruction(Val, 6, 4);
+ unsigned Rm = fieldFromInstruction(Val, 2, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 2);
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3104,13 +3117,13 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
case ARM::t2PLIs:
break;
default: {
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
return MCDisassembler::Fail;
}
}
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
if (Rn == 0xF) {
switch (Inst.getOpcode()) {
case ARM::t2LDRBs:
@@ -3133,16 +3146,16 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
- int imm = fieldFromInstruction32(Insn, 0, 12);
- if (!fieldFromInstruction32(Insn, 23, 1)) imm *= -1;
+ int imm = fieldFromInstruction(Insn, 0, 12);
+ if (!fieldFromInstruction(Insn, 23, 1)) imm *= -1;
Inst.addOperand(MCOperand::CreateImm(imm));
return S;
}
- unsigned addrmode = fieldFromInstruction32(Insn, 4, 2);
- addrmode |= fieldFromInstruction32(Insn, 0, 4) << 2;
- addrmode |= fieldFromInstruction32(Insn, 16, 4) << 6;
+ unsigned addrmode = fieldFromInstruction(Insn, 4, 2);
+ addrmode |= fieldFromInstruction(Insn, 0, 4) << 2;
+ addrmode |= fieldFromInstruction(Insn, 16, 4) << 6;
if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3151,9 +3164,14 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- int imm = Val & 0xFF;
- if (!(Val & 0x100)) imm *= -1;
- Inst.addOperand(MCOperand::CreateImm(imm << 2));
+ if (Val == 0)
+ Inst.addOperand(MCOperand::CreateImm(INT32_MIN));
+ else {
+ int imm = Val & 0xFF;
+
+ if (!(Val & 0x100)) imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm << 2));
+ }
return MCDisassembler::Success;
}
@@ -3162,8 +3180,8 @@ static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 9, 4);
- unsigned imm = fieldFromInstruction32(Val, 0, 9);
+ unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 9);
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3177,8 +3195,8 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 8, 4);
- unsigned imm = fieldFromInstruction32(Val, 0, 8);
+ unsigned Rn = fieldFromInstruction(Val, 8, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3205,8 +3223,8 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 9, 4);
- unsigned imm = fieldFromInstruction32(Val, 0, 9);
+ unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 9);
// Some instructions always use an additive offset.
switch (Inst.getOpcode()) {
@@ -3236,12 +3254,12 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned addr = fieldFromInstruction32(Insn, 0, 8);
- addr |= fieldFromInstruction32(Insn, 9, 1) << 8;
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction(Insn, 0, 8);
+ addr |= fieldFromInstruction(Insn, 9, 1) << 8;
addr |= Rn << 9;
- unsigned load = fieldFromInstruction32(Insn, 20, 1);
+ unsigned load = fieldFromInstruction(Insn, 20, 1);
if (!load) {
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -3266,8 +3284,8 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 13, 4);
- unsigned imm = fieldFromInstruction32(Val, 0, 12);
+ unsigned Rn = fieldFromInstruction(Val, 13, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 12);
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3279,7 +3297,7 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
- unsigned imm = fieldFromInstruction16(Insn, 0, 7);
+ unsigned imm = fieldFromInstruction(Insn, 0, 7);
Inst.addOperand(MCOperand::CreateReg(ARM::SP));
Inst.addOperand(MCOperand::CreateReg(ARM::SP));
@@ -3293,8 +3311,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
DecodeStatus S = MCDisassembler::Success;
if (Inst.getOpcode() == ARM::tADDrSP) {
- unsigned Rdm = fieldFromInstruction16(Insn, 0, 3);
- Rdm |= fieldFromInstruction16(Insn, 7, 1) << 3;
+ unsigned Rdm = fieldFromInstruction(Insn, 0, 3);
+ Rdm |= fieldFromInstruction(Insn, 7, 1) << 3;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3302,7 +3320,7 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
return MCDisassembler::Fail;
} else if (Inst.getOpcode() == ARM::tADDspr) {
- unsigned Rm = fieldFromInstruction16(Insn, 3, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 3, 4);
Inst.addOperand(MCOperand::CreateReg(ARM::SP));
Inst.addOperand(MCOperand::CreateReg(ARM::SP));
@@ -3315,8 +3333,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
- unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2;
- unsigned flags = fieldFromInstruction16(Insn, 0, 3);
+ unsigned imod = fieldFromInstruction(Insn, 4, 1) | 0x2;
+ unsigned flags = fieldFromInstruction(Insn, 0, 3);
Inst.addOperand(MCOperand::CreateImm(imod));
Inst.addOperand(MCOperand::CreateImm(flags));
@@ -3327,8 +3345,8 @@ static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned add = fieldFromInstruction32(Insn, 4, 1);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned add = fieldFromInstruction(Insn, 4, 1);
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3375,8 +3393,8 @@ DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
if (Rn == ARM::SP) S = MCDisassembler::SoftFail;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -3391,9 +3409,9 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned pred = fieldFromInstruction32(Insn, 22, 4);
+ unsigned pred = fieldFromInstruction(Insn, 22, 4);
if (pred == 0xE || pred == 0xF) {
- unsigned opc = fieldFromInstruction32(Insn, 4, 28);
+ unsigned opc = fieldFromInstruction(Insn, 4, 28);
switch (opc) {
default:
return MCDisassembler::Fail;
@@ -3408,15 +3426,15 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
break;
}
- unsigned imm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 4);
return DecodeMemBarrierOption(Inst, imm, Address, Decoder);
}
- unsigned brtarget = fieldFromInstruction32(Insn, 0, 11) << 1;
- brtarget |= fieldFromInstruction32(Insn, 11, 1) << 19;
- brtarget |= fieldFromInstruction32(Insn, 13, 1) << 18;
- brtarget |= fieldFromInstruction32(Insn, 16, 6) << 12;
- brtarget |= fieldFromInstruction32(Insn, 26, 1) << 20;
+ unsigned brtarget = fieldFromInstruction(Insn, 0, 11) << 1;
+ brtarget |= fieldFromInstruction(Insn, 11, 1) << 19;
+ brtarget |= fieldFromInstruction(Insn, 13, 1) << 18;
+ brtarget |= fieldFromInstruction(Insn, 16, 6) << 12;
+ brtarget |= fieldFromInstruction(Insn, 26, 1) << 20;
if (!Check(S, DecodeT2BROperand(Inst, brtarget, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3431,10 +3449,10 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
// a splat operation or a rotation.
static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- unsigned ctrl = fieldFromInstruction32(Val, 10, 2);
+ unsigned ctrl = fieldFromInstruction(Val, 10, 2);
if (ctrl == 0) {
- unsigned byte = fieldFromInstruction32(Val, 8, 2);
- unsigned imm = fieldFromInstruction32(Val, 0, 8);
+ unsigned byte = fieldFromInstruction(Val, 8, 2);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
switch (byte) {
case 0:
Inst.addOperand(MCOperand::CreateImm(imm));
@@ -3451,8 +3469,8 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
break;
}
} else {
- unsigned unrot = fieldFromInstruction32(Val, 0, 7) | 0x80;
- unsigned rot = fieldFromInstruction32(Val, 7, 5);
+ unsigned unrot = fieldFromInstruction(Val, 0, 7) | 0x80;
+ unsigned rot = fieldFromInstruction(Val, 7, 5);
unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31));
Inst.addOperand(MCOperand::CreateImm(imm));
}
@@ -3494,19 +3512,8 @@ static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- switch (Val) {
- default:
+ if (Val & ~0xf)
return MCDisassembler::Fail;
- case 0xF: // SY
- case 0xE: // ST
- case 0xB: // ISH
- case 0xA: // ISHST
- case 0x7: // NSH
- case 0x6: // NSHST
- case 0x3: // OSH
- case 0x2: // OSHST
- break;
- }
Inst.addOperand(MCOperand::CreateImm(Val));
return MCDisassembler::Success;
@@ -3523,9 +3530,9 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
@@ -3546,10 +3553,10 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder){
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rt = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3573,12 +3580,12 @@ static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 12);
- imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
- imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
@@ -3598,13 +3605,13 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 12);
- imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
- imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
if (Rm == 0xF) S = MCDisassembler::SoftFail;
@@ -3626,12 +3633,12 @@ static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 12);
- imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
- imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
@@ -3651,12 +3658,12 @@ static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 12);
- imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
- imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
@@ -3676,11 +3683,11 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -3688,22 +3695,22 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 5, 3);
+ index = fieldFromInstruction(Insn, 5, 3);
break;
case 1:
- if (fieldFromInstruction32(Insn, 5, 1))
+ if (fieldFromInstruction(Insn, 5, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 4, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
align = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 6, 1))
+ if (fieldFromInstruction(Insn, 6, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 4, 2) != 0)
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 4, 2) != 0)
align = 4;
}
@@ -3735,11 +3742,11 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -3747,22 +3754,22 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 5, 3);
+ index = fieldFromInstruction(Insn, 5, 3);
break;
case 1:
- if (fieldFromInstruction32(Insn, 5, 1))
+ if (fieldFromInstruction(Insn, 5, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 4, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
align = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 6, 1))
+ if (fieldFromInstruction(Insn, 6, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 4, 2) != 0)
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 4, 2) != 0)
align = 4;
}
@@ -3793,11 +3800,11 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -3806,24 +3813,24 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- index = fieldFromInstruction32(Insn, 5, 3);
- if (fieldFromInstruction32(Insn, 4, 1))
+ index = fieldFromInstruction(Insn, 5, 3);
+ if (fieldFromInstruction(Insn, 4, 1))
align = 2;
break;
case 1:
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 4, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
align = 4;
- if (fieldFromInstruction32(Insn, 5, 1))
+ if (fieldFromInstruction(Insn, 5, 1))
inc = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 5, 1))
+ if (fieldFromInstruction(Insn, 5, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 4, 1) != 0)
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 4, 1) != 0)
align = 8;
- if (fieldFromInstruction32(Insn, 6, 1))
+ if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
break;
}
@@ -3860,11 +3867,11 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -3873,24 +3880,24 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- index = fieldFromInstruction32(Insn, 5, 3);
- if (fieldFromInstruction32(Insn, 4, 1))
+ index = fieldFromInstruction(Insn, 5, 3);
+ if (fieldFromInstruction(Insn, 4, 1))
align = 2;
break;
case 1:
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 4, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
align = 4;
- if (fieldFromInstruction32(Insn, 5, 1))
+ if (fieldFromInstruction(Insn, 5, 1))
inc = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 5, 1))
+ if (fieldFromInstruction(Insn, 5, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 4, 1) != 0)
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 4, 1) != 0)
align = 8;
- if (fieldFromInstruction32(Insn, 6, 1))
+ if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
break;
}
@@ -3924,11 +3931,11 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -3937,22 +3944,22 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 5, 3);
+ index = fieldFromInstruction(Insn, 5, 3);
break;
case 1:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 5, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
inc = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 4, 2))
+ if (fieldFromInstruction(Insn, 4, 2))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 6, 1))
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
break;
}
@@ -3994,11 +4001,11 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -4007,22 +4014,22 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 5, 3);
+ index = fieldFromInstruction(Insn, 5, 3);
break;
case 1:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 5, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
inc = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 4, 2))
+ if (fieldFromInstruction(Insn, 4, 2))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 6, 1))
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
break;
}
@@ -4058,11 +4065,11 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -4071,22 +4078,22 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
align = 4;
- index = fieldFromInstruction32(Insn, 5, 3);
+ index = fieldFromInstruction(Insn, 5, 3);
break;
case 1:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
align = 8;
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 5, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
inc = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 4, 2))
- align = 4 << fieldFromInstruction32(Insn, 4, 2);
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 6, 1))
+ if (fieldFromInstruction(Insn, 4, 2))
+ align = 4 << fieldFromInstruction(Insn, 4, 2);
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
break;
}
@@ -4132,11 +4139,11 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -4145,22 +4152,22 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
align = 4;
- index = fieldFromInstruction32(Insn, 5, 3);
+ index = fieldFromInstruction(Insn, 5, 3);
break;
case 1:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
align = 8;
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 5, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
inc = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 4, 2))
- align = 4 << fieldFromInstruction32(Insn, 4, 2);
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 6, 1))
+ if (fieldFromInstruction(Insn, 4, 2))
+ align = 4 << fieldFromInstruction(Insn, 4, 2);
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
break;
}
@@ -4196,11 +4203,11 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 5, 1);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- Rm |= fieldFromInstruction32(Insn, 0, 4) << 1;
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 5, 1);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ Rm |= fieldFromInstruction(Insn, 0, 4) << 1;
if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
S = MCDisassembler::SoftFail;
@@ -4222,11 +4229,11 @@ static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 5, 1);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- Rm |= fieldFromInstruction32(Insn, 0, 4) << 1;
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 5, 1);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ Rm |= fieldFromInstruction(Insn, 0, 4) << 1;
if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
S = MCDisassembler::SoftFail;
@@ -4248,8 +4255,8 @@ static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned pred = fieldFromInstruction16(Insn, 4, 4);
- unsigned mask = fieldFromInstruction16(Insn, 0, 4);
+ unsigned pred = fieldFromInstruction(Insn, 4, 4);
+ unsigned mask = fieldFromInstruction(Insn, 0, 4);
if (pred == 0xF) {
pred = 0xE;
@@ -4271,13 +4278,13 @@ DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned addr = fieldFromInstruction32(Insn, 0, 8);
- unsigned W = fieldFromInstruction32(Insn, 21, 1);
- unsigned U = fieldFromInstruction32(Insn, 23, 1);
- unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction(Insn, 0, 8);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
bool writeback = (W == 1) | (P == 0);
addr |= (U << 8) | (Rn << 9);
@@ -4308,13 +4315,13 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned addr = fieldFromInstruction32(Insn, 0, 8);
- unsigned W = fieldFromInstruction32(Insn, 21, 1);
- unsigned U = fieldFromInstruction32(Insn, 23, 1);
- unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction(Insn, 0, 8);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
bool writeback = (W == 1) | (P == 0);
addr |= (U << 8) | (Rn << 9);
@@ -4340,13 +4347,13 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
uint64_t Address, const void *Decoder) {
- unsigned sign1 = fieldFromInstruction32(Insn, 21, 1);
- unsigned sign2 = fieldFromInstruction32(Insn, 23, 1);
+ unsigned sign1 = fieldFromInstruction(Insn, 21, 1);
+ unsigned sign2 = fieldFromInstruction(Insn, 23, 1);
if (sign1 != sign2) return MCDisassembler::Fail;
- unsigned Val = fieldFromInstruction32(Insn, 0, 8);
- Val |= fieldFromInstruction32(Insn, 12, 3) << 8;
- Val |= fieldFromInstruction32(Insn, 26, 1) << 11;
+ unsigned Val = fieldFromInstruction(Insn, 0, 8);
+ Val |= fieldFromInstruction(Insn, 12, 3) << 8;
+ Val |= fieldFromInstruction(Insn, 26, 1) << 11;
Val |= sign1 << 12;
Inst.addOperand(MCOperand::CreateImm(SignExtend32<13>(Val)));
@@ -4366,10 +4373,10 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rt2 = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if (pred == 0xF)
return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
@@ -4393,12 +4400,12 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
- Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
- unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
- Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
- unsigned imm = fieldFromInstruction32(Insn, 16, 6);
- unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction(Insn, 8, 4);
DecodeStatus S = MCDisassembler::Success;
@@ -4421,12 +4428,12 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
- Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
- unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
- Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
- unsigned imm = fieldFromInstruction32(Insn, 16, 6);
- unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction(Insn, 8, 4);
DecodeStatus S = MCDisassembler::Success;
@@ -4451,13 +4458,13 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 16, 4);
- unsigned Rt = fieldFromInstruction32(Val, 12, 4);
- unsigned Rm = fieldFromInstruction32(Val, 0, 4);
- Rm |= (fieldFromInstruction32(Val, 23, 1) << 4);
- unsigned Cond = fieldFromInstruction32(Val, 28, 4);
+ unsigned Rn = fieldFromInstruction(Val, 16, 4);
+ unsigned Rt = fieldFromInstruction(Val, 12, 4);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ Rm |= (fieldFromInstruction(Val, 23, 1) << 4);
+ unsigned Cond = fieldFromInstruction(Val, 28, 4);
- if (fieldFromInstruction32(Val, 8, 4) != 0 || Rn == Rt)
+ if (fieldFromInstruction(Val, 8, 4) != 0 || Rn == Rt)
S = MCDisassembler::SoftFail;
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
@@ -4479,11 +4486,11 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
DecodeStatus S = MCDisassembler::Success;
- unsigned CRm = fieldFromInstruction32(Val, 0, 4);
- unsigned opc1 = fieldFromInstruction32(Val, 4, 4);
- unsigned cop = fieldFromInstruction32(Val, 8, 4);
- unsigned Rt = fieldFromInstruction32(Val, 12, 4);
- unsigned Rt2 = fieldFromInstruction32(Val, 16, 4);
+ unsigned CRm = fieldFromInstruction(Val, 0, 4);
+ unsigned opc1 = fieldFromInstruction(Val, 4, 4);
+ unsigned cop = fieldFromInstruction(Val, 8, 4);
+ unsigned Rt = fieldFromInstruction(Val, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Val, 16, 4);
if ((cop & ~0x1) == 0xa)
return MCDisassembler::Fail;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 2f6b1b0..8b9109e 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -792,6 +792,25 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
}
+void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ if (MO.isExpr()) {
+ O << *MO.getExpr();
+ return;
+ }
+
+ int32_t OffImm = (int32_t)MO.getImm();
+
+ if (OffImm == INT32_MIN)
+ O << "#-0";
+ else if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else
+ O << "#" << OffImm;
+}
+
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << "#" << MI->getOperand(OpNum).getImm() * 4;
@@ -953,12 +972,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
O << "[" << getRegisterName(MO1.getReg());
- int32_t OffImm = (int32_t)MO2.getImm() / 4;
+ int32_t OffImm = (int32_t)MO2.getImm();
+
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
// Don't print +0.
- if (OffImm < 0)
- O << ", #-" << -OffImm * 4;
+ if (OffImm == INT32_MIN)
+ O << ", #-0";
+ else if (OffImm < 0)
+ O << ", #-" << -OffImm;
else if (OffImm > 0)
- O << ", #" << OffImm * 4;
+ O << ", #" << OffImm;
O << "]";
}
@@ -990,15 +1014,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- int32_t OffImm = (int32_t)MO1.getImm() / 4;
+ int32_t OffImm = (int32_t)MO1.getImm();
+
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
// Don't print +0.
- if (OffImm != 0) {
- O << ", ";
- if (OffImm < 0)
- O << "#-" << -OffImm * 4;
- else if (OffImm > 0)
- O << "#" << OffImm * 4;
- }
+ if (OffImm == INT32_MIN)
+ O << ", #-0";
+ else if (OffImm < 0)
+ O << ", #-" << -OffImm;
+ else if (OffImm > 0)
+ O << ", #" << OffImm;
}
void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 8acb7ee..73d7bfd 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -73,6 +73,7 @@ public:
void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ae11be8..de48a0e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -120,14 +120,22 @@ namespace ARM_MB {
// The Memory Barrier Option constants map directly to the 4-bit encoding of
// the option field for memory barrier operations.
enum MemBOpt {
- SY = 15,
- ST = 14,
- ISH = 11,
- ISHST = 10,
- NSH = 7,
- NSHST = 6,
+ RESERVED_0 = 0,
+ RESERVED_1 = 1,
+ OSHST = 2,
OSH = 3,
- OSHST = 2
+ RESERVED_4 = 4,
+ RESERVED_5 = 5,
+ NSHST = 6,
+ NSH = 7,
+ RESERVED_8 = 8,
+ RESERVED_9 = 9,
+ ISHST = 10,
+ ISH = 11,
+ RESERVED_12 = 12,
+ RESERVED_13 = 13,
+ ST = 14,
+ SY = 15
};
inline static const char *MemBOptToString(unsigned val) {
@@ -135,92 +143,24 @@ namespace ARM_MB {
default: llvm_unreachable("Unknown memory operation");
case SY: return "sy";
case ST: return "st";
+ case RESERVED_13: return "#0xd";
+ case RESERVED_12: return "#0xc";
case ISH: return "ish";
case ISHST: return "ishst";
+ case RESERVED_9: return "#0x9";
+ case RESERVED_8: return "#0x8";
case NSH: return "nsh";
case NSHST: return "nshst";
+ case RESERVED_5: return "#0x5";
+ case RESERVED_4: return "#0x4";
case OSH: return "osh";
case OSHST: return "oshst";
+ case RESERVED_1: return "#0x1";
+ case RESERVED_0: return "#0x0";
}
}
} // namespace ARM_MB
-/// getARMRegisterNumbering - Given the enum value for some register, e.g.
-/// ARM::LR, return the number that it corresponds to (e.g. 14).
-inline static unsigned getARMRegisterNumbering(unsigned Reg) {
- using namespace ARM;
- switch (Reg) {
- default:
- llvm_unreachable("Unknown ARM register!");
- case R0: case S0: case D0: case Q0: return 0;
- case R1: case S1: case D1: case Q1: return 1;
- case R2: case S2: case D2: case Q2: return 2;
- case R3: case S3: case D3: case Q3: return 3;
- case R4: case S4: case D4: case Q4: return 4;
- case R5: case S5: case D5: case Q5: return 5;
- case R6: case S6: case D6: case Q6: return 6;
- case R7: case S7: case D7: case Q7: return 7;
- case R8: case S8: case D8: case Q8: return 8;
- case R9: case S9: case D9: case Q9: return 9;
- case R10: case S10: case D10: case Q10: return 10;
- case R11: case S11: case D11: case Q11: return 11;
- case R12: case S12: case D12: case Q12: return 12;
- case SP: case S13: case D13: case Q13: return 13;
- case LR: case S14: case D14: case Q14: return 14;
- case PC: case S15: case D15: case Q15: return 15;
-
- case S16: case D16: return 16;
- case S17: case D17: return 17;
- case S18: case D18: return 18;
- case S19: case D19: return 19;
- case S20: case D20: return 20;
- case S21: case D21: return 21;
- case S22: case D22: return 22;
- case S23: case D23: return 23;
- case S24: case D24: return 24;
- case S25: case D25: return 25;
- case S26: case D26: return 26;
- case S27: case D27: return 27;
- case S28: case D28: return 28;
- case S29: case D29: return 29;
- case S30: case D30: return 30;
- case S31: case D31: return 31;
-
- // Composite registers use the regnum of the first register in the list.
- /* Q0 */ case D0_D2: return 0;
- case D1_D2: case D1_D3: return 1;
- /* Q1 */ case D2_D4: return 2;
- case D3_D4: case D3_D5: return 3;
- /* Q2 */ case D4_D6: return 4;
- case D5_D6: case D5_D7: return 5;
- /* Q3 */ case D6_D8: return 6;
- case D7_D8: case D7_D9: return 7;
- /* Q4 */ case D8_D10: return 8;
- case D9_D10: case D9_D11: return 9;
- /* Q5 */ case D10_D12: return 10;
- case D11_D12: case D11_D13: return 11;
- /* Q6 */ case D12_D14: return 12;
- case D13_D14: case D13_D15: return 13;
- /* Q7 */ case D14_D16: return 14;
- case D15_D16: case D15_D17: return 15;
- /* Q8 */ case D16_D18: return 16;
- case D17_D18: case D17_D19: return 17;
- /* Q9 */ case D18_D20: return 18;
- case D19_D20: case D19_D21: return 19;
- /* Q10 */ case D20_D22: return 20;
- case D21_D22: case D21_D23: return 21;
- /* Q11 */ case D22_D24: return 22;
- case D23_D24: case D23_D25: return 23;
- /* Q12 */ case D24_D26: return 24;
- case D25_D26: case D25_D27: return 25;
- /* Q13 */ case D26_D28: return 26;
- case D27_D28: case D27_D29: return 27;
- /* Q14 */ case D28_D30: return 28;
- case D29_D30: case D29_D31: return 29;
- /* Q15 */
- }
-}
-
/// isARMLowRegister - Returns true if the register is a low register (r0-r7).
///
static inline bool isARMLowRegister(unsigned Reg) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 1964bcd..94f1082 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -18,6 +18,7 @@
#include "MCTargetDesc/ARMMCExpr.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -38,11 +39,12 @@ class ARMMCCodeEmitter : public MCCodeEmitter {
void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
const MCInstrInfo &MCII;
const MCSubtargetInfo &STI;
+ const MCContext &CTX;
public:
ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
MCContext &ctx)
- : MCII(mcii), STI(sti) {
+ : MCII(mcii), STI(sti), CTX(ctx) {
}
~ARMMCCodeEmitter() {}
@@ -405,7 +407,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const {
if (MO.isReg()) {
unsigned Reg = MO.getReg();
- unsigned RegNo = getARMRegisterNumbering(Reg);
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
// Q registers are encoded as 2x their register number.
switch (Reg) {
@@ -434,7 +436,7 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- Reg = getARMRegisterNumbering(MO.getReg());
+ Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
int32_t SImm = MO1.getImm();
bool isAdd = true;
@@ -641,8 +643,8 @@ getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
return Val;
}
-/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
-/// target.
+/// getAdrLabelOpValue - Return encoding info for 12-bit shifted-immediate
+/// ADR label target.
uint32_t ARMMCCodeEmitter::
getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
@@ -652,15 +654,23 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
Fixups);
int32_t offset = MO.getImm();
uint32_t Val = 0x2000;
- if (offset < 0) {
+
+ if (offset == INT32_MIN) {
+ Val = 0x1000;
+ offset = 0;
+ } else if (offset < 0) {
Val = 0x1000;
offset *= -1;
}
- Val |= offset;
+
+ int SoImmVal = ARM_AM::getSOImmVal(offset);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ Val |= SoImmVal;
return Val;
}
-/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// getT2AdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
/// target.
uint32_t ARMMCCodeEmitter::
getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
@@ -670,14 +680,16 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
Fixups);
int32_t Val = MO.getImm();
- if (Val < 0) {
+ if (Val == INT32_MIN)
+ Val = 0x1000;
+ else if (Val < 0) {
Val *= -1;
Val |= 0x1000;
}
return Val;
}
-/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// getThumbAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
/// target.
uint32_t ARMMCCodeEmitter::
getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
@@ -699,8 +711,8 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
// {2-0} = Rn
const MCOperand &MO1 = MI.getOperand(OpIdx);
const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
- unsigned Rn = getARMRegisterNumbering(MO1.getReg());
- unsigned Rm = getARMRegisterNumbering(MO2.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+ unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
return (Rm << 3) | Rn;
}
@@ -716,7 +728,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
Imm12 = 0;
isAdd = false ; // 'U' bit is set as part of the fixup.
@@ -796,7 +808,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
Imm8 = 0;
isAdd = false ; // 'U' bit is set as part of the fixup.
@@ -832,7 +844,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
// {7-0} = imm8
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
unsigned Imm8 = MO1.getImm();
return (Reg << 8) | Imm8;
}
@@ -915,8 +927,8 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx+1);
const MCOperand &MO2 = MI.getOperand(OpIdx+2);
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
- unsigned Rm = getARMRegisterNumbering(MO1.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
@@ -946,7 +958,7 @@ getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
// {12} isAdd
// {11-0} imm12/Rm
const MCOperand &MO = MI.getOperand(OpIdx);
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
Binary |= Rn << 14;
return Binary;
@@ -969,7 +981,7 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
Binary <<= 7; // Shift amount is bits [11:7]
Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
- Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0]
+ Binary |= CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Rm is bits [3:0]
}
return Binary | (isAdd << 12) | (isReg << 13);
}
@@ -982,7 +994,7 @@ getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx+1);
bool isAdd = MO1.getImm() != 0;
- return getARMRegisterNumbering(MO.getReg()) | (isAdd << 4);
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg()) | (isAdd << 4);
}
uint32_t ARMMCCodeEmitter::
@@ -1000,7 +1012,7 @@ getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
// if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
if (!isImm)
- Imm8 = getARMRegisterNumbering(MO.getReg());
+ Imm8 = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
return Imm8 | (isAdd << 8) | (isImm << 9);
}
@@ -1018,7 +1030,7 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
if (!MO.isReg()) {
- unsigned Rn = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
assert(MO.isExpr() && "Unexpected machine operand type!");
const MCExpr *Expr = MO.getExpr();
@@ -1028,14 +1040,14 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
++MCNumCPRelocations;
return (Rn << 9) | (1 << 13);
}
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
unsigned Imm = MO2.getImm();
bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
bool isImm = MO1.getReg() == 0;
uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
// if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
if (!isImm)
- Imm8 = getARMRegisterNumbering(MO1.getReg());
+ Imm8 = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
}
@@ -1063,7 +1075,7 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
// {2-0} = Rn
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
unsigned Imm5 = MO1.getImm();
return ((Imm5 & 0x1f) << 3) | Rn;
}
@@ -1090,7 +1102,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
Imm8 = 0;
isAdd = false; // 'U' bit is handled as part of the fixup.
@@ -1136,7 +1148,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
// Encode Rm.
- unsigned Binary = getARMRegisterNumbering(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1161,7 +1173,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
// Encode the shift operation Rs.
// Encode Rs bit[11:8].
assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ return Binary | (CTX.getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
}
unsigned ARMMCCodeEmitter::
@@ -1180,7 +1192,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
// Encode Rm.
- unsigned Binary = getARMRegisterNumbering(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1219,9 +1231,9 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
// Encoded as [Rn, Rm, imm].
// FIXME: Needs fixup support.
- unsigned Value = getARMRegisterNumbering(MO1.getReg());
+ unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
Value <<= 4;
- Value |= getARMRegisterNumbering(MO2.getReg());
+ Value |= CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
Value <<= 2;
Value |= MO3.getImm();
@@ -1235,7 +1247,7 @@ getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
const MCOperand &MO2 = MI.getOperand(OpNum+1);
// FIXME: Needs fixup support.
- unsigned Value = getARMRegisterNumbering(MO1.getReg());
+ unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
// Even though the immediate is 8 bits long, we need 9 bits in order
// to represent the (inverse of the) sign bit.
@@ -1297,7 +1309,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
// Encode Rm.
- unsigned Binary = getARMRegisterNumbering(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1353,7 +1365,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
if (SPRRegs || DPRRegs) {
// VLDM/VSTM
- unsigned RegNo = getARMRegisterNumbering(Reg);
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
Binary |= (RegNo & 0x1f) << 8;
if (SPRRegs)
@@ -1362,7 +1374,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
Binary |= NumRegs * 2;
} else {
for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
- unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(MI.getOperand(I).getReg());
Binary |= 1 << RegNo;
}
}
@@ -1378,7 +1390,7 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1401,7 +1413,7 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1427,7 +1439,7 @@ getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1446,7 +1458,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(Op);
if (MO.getReg() == 0) return 0x0D;
- return getARMRegisterNumbering(MO.getReg());
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
}
unsigned ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 78faf59..a51e0fa 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -408,15 +408,22 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// Even when it's not a scattered relocation, movw/movt always uses
// a PAIR relocation.
if (Type == macho::RIT_ARM_Half) {
- // The other-half value only gets populated for the movt relocation.
+ // The other-half value only gets populated for the movt and movw
+ // relocation entries.
uint32_t Value = 0;;
switch ((unsigned)Fixup.getKind()) {
default: break;
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ Value = (FixedValue >> 16) & 0xffff;
+ break;
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
- Value = FixedValue;
+ Value = FixedValue & 0xffff;
break;
}
macho::RelocationEntry MREPair;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 2097bb9..e9e20dd 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -563,48 +563,6 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return Offset == 0;
}
-/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
-/// two-addrss instruction inserted by two-address pass.
-void
-Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
- MachineInstr *UseMI,
- const TargetRegisterInfo &TRI) const {
- if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill())
- return;
-
- unsigned PredReg = 0;
- ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg);
- if (CC == ARMCC::AL || PredReg != ARM::CPSR)
- return;
-
- // Schedule the copy so it doesn't come between previous instructions
- // and UseMI which can form an IT block.
- unsigned SrcReg = SrcMI->getOperand(1).getReg();
- ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
- MachineBasicBlock *MBB = UseMI->getParent();
- MachineBasicBlock::iterator MBBI = SrcMI;
- unsigned NumInsts = 0;
- while (--MBBI != MBB->begin()) {
- if (MBBI->isDebugValue())
- continue;
-
- MachineInstr *NMI = &*MBBI;
- ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg);
- if (!(NCC == CC || NCC == OCC) ||
- NMI->modifiesRegister(SrcReg, &TRI) ||
- NMI->modifiesRegister(ARM::CPSR, &TRI))
- break;
- if (++NumInsts == 4)
- // Too many in a row!
- return;
- }
-
- if (NumInsts) {
- MBB->remove(SrcMI);
- MBB->insert(++MBBI, SrcMI);
- }
-}
-
ARMCC::CondCodes
llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
unsigned Opc = MI->getOpcode();
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 0911f8a..2cdcd06 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -57,11 +57,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
- /// two-addrss instruction inserted by two-address pass.
- void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI,
- const TargetRegisterInfo &TRI) const;
-
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index c8e757b..4ddcd38 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -285,14 +285,14 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
Out << "GlobalValue::LinkerPrivateLinkage"; break;
case GlobalValue::LinkerPrivateWeakLinkage:
Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
- case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
- Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break;
case GlobalValue::AvailableExternallyLinkage:
Out << "GlobalValue::AvailableExternallyLinkage "; break;
case GlobalValue::LinkOnceAnyLinkage:
Out << "GlobalValue::LinkOnceAnyLinkage "; break;
case GlobalValue::LinkOnceODRLinkage:
Out << "GlobalValue::LinkOnceODRLinkage "; break;
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
+ Out << "GlobalValue::LinkOnceODRAutoHideLinkage"; break;
case GlobalValue::WeakAnyLinkage:
Out << "GlobalValue::WeakAnyLinkage"; break;
case GlobalValue::WeakODRLinkage:
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 1357cc5..d756aec 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -328,7 +328,10 @@ CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const {
// can get a useful trip count. The trip count can
// be either a register or an immediate. The location
// of the value depends upon the type (reg or imm).
- while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
+ for (MachineRegisterInfo::reg_iterator
+ RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
+ RI != RE; ++RI) {
+ IV_Opnd = &RI.getOperand();
const MachineInstr *MI = IV_Opnd->getParent();
if (L->contains(MI) && isCompareEqualsImm(MI)) {
const MachineOperand &MO = MI->getOperand(2);
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index c7be5ce..c0c0df6 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -2580,22 +2580,16 @@ let isCall = 1, neverHasSideEffects = 1,
}
// Tail Calls.
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
- Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
- R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
def TCRETURNtg : JInst<(outs), (ins calltarget:$dst),
"jump $dst // TAILCALL", []>;
}
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
- Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
- R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
def TCRETURNtext : JInst<(outs), (ins calltarget:$dst),
"jump $dst // TAILCALL", []>;
}
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
- Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
- R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
def TCRETURNR : JInst<(outs), (ins IntRegs:$dst),
"jumpr $dst // TAILCALL", []>;
}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 5d087db..4bacb8f 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -40,28 +40,27 @@ EnableIEEERndNear(
HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
HexagonGenSubtargetInfo(TT, CPU, FS),
- HexagonArchVersion(V2),
CPUString(CPU.str()) {
- ParseSubtargetFeatures(CPU, FS);
- switch(HexagonArchVersion) {
- case HexagonSubtarget::V2:
- break;
- case HexagonSubtarget::V3:
- EnableV3 = true;
- break;
- case HexagonSubtarget::V4:
- break;
- case HexagonSubtarget::V5:
- break;
- default:
- // If the programmer has not specified a Hexagon version, default
- // to -mv4.
+ // If the programmer has not specified a Hexagon version, default to -mv4.
+ if (CPUString.empty())
CPUString = "hexagonv4";
- HexagonArchVersion = HexagonSubtarget::V4;
- break;
+
+ if (CPUString == "hexagonv2") {
+ HexagonArchVersion = V2;
+ } else if (CPUString == "hexagonv3") {
+ EnableV3 = true;
+ HexagonArchVersion = V3;
+ } else if (CPUString == "hexagonv4") {
+ HexagonArchVersion = V4;
+ } else if (CPUString == "hexagonv5") {
+ HexagonArchVersion = V5;
+ } else {
+ llvm_unreachable("Unrecognized Hexagon processor version");
}
+ ParseSubtargetFeatures(CPUString, FS);
+
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 786a0c5..05f6fa6 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -183,8 +183,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
ManglerPrefixTy PrefixTy = Mangler::Default;
if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
PrefixTy = Mangler::Private;
- else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() ||
- GV->hasLinkerPrivateWeakDefAutoLinkage())
+ else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage())
PrefixTy = Mangler::LinkerPrivate;
// If this global has a name, handle it simply.
diff --git a/lib/Target/Mips/AsmParser/CMakeLists.txt b/lib/Target/Mips/AsmParser/CMakeLists.txt
index 6c7343b..28f5219 100644
--- a/lib/Target/Mips/AsmParser/CMakeLists.txt
+++ b/lib/Target/Mips/AsmParser/CMakeLists.txt
@@ -1,3 +1,4 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMMipsAsmParser
MipsAsmParser.cpp
)
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 58b5590..43bd345 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -11,11 +11,20 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
namespace {
class MipsAsmParser : public MCTargetAsmParser {
+
+#define GET_ASSEMBLER_HEADER
+#include "MipsGenAsmMatcher.inc"
+
bool MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out);
@@ -23,10 +32,11 @@ class MipsAsmParser : public MCTargetAsmParser {
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
bool ParseInstruction(StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool ParseDirective(AsmToken DirectiveID);
+ OperandMatchResultTy parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&);
public:
MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
: MCTargetAsmParser() {
@@ -35,6 +45,57 @@ public:
};
}
+namespace {
+
+/// MipsOperand - Instances of this class represent a parsed Mips machine
+/// instruction.
+class MipsOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ k_CondCode,
+ k_CoprocNum,
+ k_Immediate,
+ k_Memory,
+ k_PostIndexRegister,
+ k_Register,
+ k_Token
+ } Kind;
+
+ MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ llvm_unreachable("unimplemented!");
+ }
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const{
+ llvm_unreachable("unimplemented!");
+ }
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ llvm_unreachable("unimplemented!");
+ }
+ void addMemOperands(MCInst &Inst, unsigned N) const {
+ llvm_unreachable("unimplemented!");
+ }
+
+ bool isReg() const { return Kind == k_Register; }
+ bool isImm() const { return Kind == k_Immediate; }
+ bool isToken() const { return Kind == k_Token; }
+ bool isMem() const { return Kind == k_Memory; }
+
+ StringRef getToken() const {
+ assert(Kind == k_Token && "Invalid access!");
+ return "";
+ }
+
+ unsigned getReg() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return 0;
+ }
+
+ virtual void print(raw_ostream &OS) const {
+ llvm_unreachable("unimplemented!");
+ }
+};
+}
+
bool MipsAsmParser::
MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -58,6 +119,11 @@ ParseDirective(AsmToken DirectiveID) {
return true;
}
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::
+ parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&) {
+ return MatchOperand_ParseFail;
+}
+
extern "C" void LLVMInitializeMipsAsmParser() {
RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget);
RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget);
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index e9a228c..f535c50 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -10,13 +10,18 @@ tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv)
tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info)
+tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher)
add_public_tablegen_target(MipsCommonTableGen)
add_llvm_target(MipsCodeGen
+ Mips16FrameLowering.cpp
+ Mips16InstrInfo.cpp
+ Mips16RegisterInfo.cpp
MipsAnalyzeImmediate.cpp
MipsAsmPrinter.cpp
MipsCodeEmitter.cpp
MipsDelaySlotFiller.cpp
+ MipsELFWriterInfo.cpp
MipsJITInfo.cpp
MipsInstrInfo.cpp
MipsISelDAGToDAG.cpp
@@ -26,6 +31,9 @@ add_llvm_target(MipsCodeGen
MipsMCInstLower.cpp
MipsMachineFunction.cpp
MipsRegisterInfo.cpp
+ MipsSEFrameLowering.cpp
+ MipsSEInstrInfo.cpp
+ MipsSERegisterInfo.cpp
MipsSubtarget.cpp
MipsTargetMachine.cpp
MipsTargetObjectFile.cpp
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 042b456..aa57472 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -16,6 +16,7 @@
#include "MipsRegisterInfo.h"
#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -274,7 +275,8 @@ MipsDisassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
// Calling the auto-generated decoder function.
- Result = decodeMipsInstruction32(instr, Insn, Address, this, STI);
+ Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
+ this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
@@ -298,13 +300,15 @@ Mips64Disassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
// Calling the auto-generated decoder function.
- Result = decodeMips64Instruction32(instr, Insn, Address, this, STI);
+ Result = decodeInstruction(DecoderTableMips6432, instr, Insn, Address,
+ this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
}
// If we fail to decode in Mips64 decoder space we can try in Mips32
- Result = decodeMipsInstruction32(instr, Insn, Address, this, STI);
+ Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
+ this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
@@ -379,8 +383,8 @@ static DecodeStatus DecodeMem(MCInst &Inst,
uint64_t Address,
const void *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
- unsigned Reg = fieldFromInstruction32(Insn, 16, 5);
- unsigned Base = fieldFromInstruction32(Insn, 21, 5);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
Reg = getReg(Decoder, Mips::CPURegsRegClassID, Reg);
Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
@@ -401,8 +405,8 @@ static DecodeStatus DecodeFMem(MCInst &Inst,
uint64_t Address,
const void *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
- unsigned Reg = fieldFromInstruction32(Insn, 16, 5);
- unsigned Base = fieldFromInstruction32(Insn, 21, 5);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg);
Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
@@ -484,7 +488,7 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
uint64_t Address,
const void *Decoder) {
- unsigned JumpOffset = fieldFromInstruction32(Insn, 0, 26) << 2;
+ unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2;
Inst.addOperand(MCOperand::CreateImm(JumpOffset));
return MCDisassembler::Success;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 6fe0c11..18961fd 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -35,6 +35,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
return 0;
case FK_GPRel_4:
case FK_Data_4:
+ case FK_Data_8:
case Mips::fixup_Mips_LO16:
case Mips::fixup_Mips_GPOFF_HI:
case Mips::fixup_Mips_GPOFF_LO:
@@ -59,9 +60,17 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
break;
case Mips::fixup_Mips_HI16:
case Mips::fixup_Mips_GOT_Local:
- // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+ // Get the 2nd 16-bits. Also add 1 if bit 15 is 1.
Value = ((Value + 0x8000) >> 16) & 0xffff;
break;
+ case Mips::fixup_Mips_HIGHER:
+ // Get the 3rd 16-bits.
+ Value = ((Value + 0x80008000LL) >> 32) & 0xffff;
+ break;
+ case Mips::fixup_Mips_HIGHEST:
+ // Get the 4th 16-bits.
+ Value = ((Value + 0x800080008000LL) >> 48) & 0xffff;
+ break;
}
return Value;
@@ -168,7 +177,9 @@ public:
{ "fixup_Mips_GPOFF_LO", 0, 16, 0 },
{ "fixup_Mips_GOT_PAGE", 0, 16, 0 },
{ "fixup_Mips_GOT_OFST", 0, 16, 0 },
- { "fixup_Mips_GOT_DISP", 0, 16, 0 }
+ { "fixup_Mips_GOT_DISP", 0, 16, 0 },
+ { "fixup_Mips_HIGHER", 0, 16, 0 },
+ { "fixup_Mips_HIGHEST", 0, 16, 0 }
};
if (Kind < FirstTargetFixupKind)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 77c1524..b8489ca 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -34,7 +34,8 @@ namespace {
class MipsELFObjectWriter : public MCELFObjectTargetWriter {
public:
- MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64);
+ MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
+ bool _isN64, bool IsLittleEndian);
virtual ~MipsELFObjectWriter();
@@ -53,7 +54,7 @@ namespace {
}
MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
- bool _isN64)
+ bool _isN64, bool IsLittleEndian)
: MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
/*HasRelocationAddend*/ false,
/*IsN64*/ _isN64) {}
@@ -103,6 +104,9 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
case FK_Data_4:
Type = ELF::R_MIPS_32;
break;
+ case FK_Data_8:
+ Type = ELF::R_MIPS_64;
+ break;
case FK_GPRel_4:
Type = ELF::R_MIPS_GPREL32;
break;
@@ -169,6 +173,12 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type);
Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type);
break;
+ case Mips::fixup_Mips_HIGHER:
+ Type = ELF::R_MIPS_HIGHER;
+ break;
+ case Mips::fixup_Mips_HIGHEST:
+ Type = ELF::R_MIPS_HIGHEST;
+ break;
}
return Type;
}
@@ -265,6 +275,7 @@ MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS,
bool IsLittleEndian,
bool Is64Bit) {
MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI,
- (Is64Bit) ? true : false);
+ (Is64Bit) ? true : false,
+ IsLittleEndian);
return createELFObjectWriter(MOTW, OS, IsLittleEndian);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index f5cbbd5..77faec5 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -110,6 +110,12 @@ namespace Mips {
// resulting in - R_MIPS_GOT_DISP
fixup_Mips_GOT_DISP,
+ // resulting in - R_MIPS_GOT_HIGHER
+ fixup_Mips_HIGHER,
+
+ // resulting in - R_MIPS_HIGHEST
+ fixup_Mips_HIGHEST,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index ff3b3a7..8dab62d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -255,6 +255,12 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
case MCSymbolRefExpr::VK_Mips_TPREL_LO:
FixupKind = Mips::fixup_Mips_TPREL_LO;
break;
+ case MCSymbolRefExpr::VK_Mips_HIGHER:
+ FixupKind = Mips::fixup_Mips_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_Mips_HIGHEST:
+ FixupKind = Mips::fixup_Mips_HIGHEST;
+ break;
} // switch
Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind)));
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index 596f071..93de517 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -16,7 +16,9 @@ BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \
MipsGenDAGISel.inc MipsGenCallingConv.inc \
MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \
- MipsGenEDInfo.inc MipsGenDisassemblerTables.inc
+ MipsGenEDInfo.inc MipsGenDisassemblerTables.inc \
+ MipsGenAsmMatcher.inc
+
DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 8548ae0..7cec531 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -44,6 +44,8 @@ def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64",
"Enable n64 ABI">;
def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
"Enable eabi ABI">;
+def FeatureAndroid : SubtargetFeature<"android", "IsAndroid", "true",
+ "Target is android">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
"true", "Enable vector FPU instructions.">;
def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true",
@@ -93,9 +95,20 @@ def MipsAsmWriter : AsmWriter {
bit isMCAsmWriter = 1;
}
+def MipsAsmParser : AsmParser {
+ let ShouldEmitMatchRegisterName = 0;
+}
+
+def MipsAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+
+ // Recognize hard coded registers.
+ string RegisterPrefix = "$";
+}
+
def Mips : Target {
let InstructionSet = MipsInstrInfo;
-
+ let AssemblyParsers = [MipsAsmParser];
let AssemblyWriters = [MipsAsmWriter];
+ let AssemblyParserVariants = [MipsAsmParserVariant];
}
-
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
new file mode 100644
index 0000000..030042f
--- /dev/null
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -0,0 +1,87 @@
+//===-- Mips16FrameLowering.cpp - Mips16 Frame Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16FrameLowering.h"
+#include "MipsInstrInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsInstrInfo &TII =
+ *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ uint64_t StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+ // Adjust stack.
+ if (isInt<16>(-StackSize))
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::SaveRaF16)).addImm(StackSize);
+}
+
+void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsInstrInfo &TII =
+ *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (!StackSize)
+ return;
+
+ // Adjust stack.
+ if (isInt<16>(StackSize))
+ // assumes stacksize multiple of 8
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::RestoreRaF16)).addImm(StackSize);
+}
+
+bool Mips16FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ // FIXME: implement.
+ return true;
+}
+
+bool
+Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ // FIXME: implement.
+ return true;
+}
+
+void Mips16FrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+}
+
+const MipsFrameLowering *
+llvm::createMips16FrameLowering(const MipsSubtarget &ST) {
+ return new Mips16FrameLowering(ST);
+}
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
new file mode 100644
index 0000000..25cc37b
--- /dev/null
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -0,0 +1,43 @@
+//===-- Mips16FrameLowering.h - Mips16 frame lowering ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16_FRAMEINFO_H
+#define MIPS16_FRAMEINFO_H
+
+#include "MipsFrameLowering.h"
+
+namespace llvm {
+class Mips16FrameLowering : public MipsFrameLowering {
+public:
+ explicit Mips16FrameLowering(const MipsSubtarget &STI)
+ : MipsFrameLowering(STI) {}
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
new file mode 100644
index 0000000..2bc286b
--- /dev/null
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -0,0 +1,132 @@
+//===-- Mips16InstrInfo.cpp - Mips16 Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16InstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+
+Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
+ : MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0),
+ RI(*tm.getSubtargetImpl(), *this) {}
+
+const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned Mips16InstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned Mips16InstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ return 0;
+}
+
+void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc = 0, ZeroReg = 0;
+
+ if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
+ if (Mips::CPURegsRegClass.contains(SrcReg))
+ Opc = Mips::Mov32R16;
+ }
+
+ assert(Opc && "Cannot copy registers");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
+
+ if (DestReg)
+ MIB.addReg(DestReg, RegState::Define);
+
+ if (ZeroReg)
+ MIB.addReg(ZeroReg);
+
+ if (SrcReg)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void Mips16InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(false && "Implement this function.");
+}
+
+void Mips16InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(false && "Implement this function.");
+}
+
+bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ MachineBasicBlock &MBB = *MI->getParent();
+
+ switch(MI->getDesc().getOpcode()) {
+ default:
+ return false;
+ case Mips::RetRA16:
+ ExpandRetRA16(MBB, MI, Mips::JrRa16);
+ break;
+ }
+
+ MBB.erase(MI);
+ return true;
+}
+
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+ assert(false && "Implement this function.");
+ return 0;
+}
+
+unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
+ return 0;
+}
+
+void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opc) const {
+ BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
+}
+
+const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) {
+ return new Mips16InstrInfo(TM);
+}
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
new file mode 100644
index 0000000..260c5b6
--- /dev/null
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -0,0 +1,76 @@
+//===-- Mips16InstrInfo.h - Mips16 Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16INSTRUCTIONINFO_H
+#define MIPS16INSTRUCTIONINFO_H
+
+#include "MipsInstrInfo.h"
+#include "Mips16RegisterInfo.h"
+
+namespace llvm {
+
+class Mips16InstrInfo : public MipsInstrInfo {
+ const Mips16RegisterInfo RI;
+
+public:
+ explicit Mips16InstrInfo(MipsTargetMachine &TM);
+
+ virtual const MipsRegisterInfo &getRegisterInfo() const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+
+private:
+ virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
+
+ void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned Opc) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index c852042..94cf984 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -11,10 +11,6 @@
//
//===----------------------------------------------------------------------===//
-def uimm5 : Operand<i8> {
- let DecoderMethod= "DecodeSimm16";
-}
-
//
// RRR-type instruction format
//
@@ -46,9 +42,32 @@ class FEXT_RI16_ins<bits<5> _op, string asmstr,
class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>:
FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>;
+
+class FEXT_2RI16_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm),
+ !strconcat(asmstr, "\t$rx, $imm"), [], itin> {
+ let Constraints = "$rx_ = $rx";
+}
+
+
//
// RR-type instruction format
//
+
+class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
+}
+
+class FRxRxRy16_ins<bits<5> f, string asmstr,
+ InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rz, $ry"),
+ [], itin> {
+ let Constraints = "$rx = $rz";
+}
+
let rx=0 in
class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_,
string asmstr, InstrItinClass itin>:
@@ -64,11 +83,16 @@ class FEXT_RRI16_mem_ins<bits<5> op, string asmstr, Operand MemOpnd,
FEXT_RRI16<op, (outs CPU16Regs:$ry), (ins MemOpnd:$addr),
!strconcat(asmstr, "\t$ry, $addr"), [], itin>;
+class FEXT_RRI16_mem2_ins<bits<5> op, string asmstr, Operand MemOpnd,
+ InstrItinClass itin>:
+ FEXT_RRI16<op, (outs ), (ins CPU16Regs:$ry, MemOpnd:$addr),
+ !strconcat(asmstr, "\t$ry, $addr"), [], itin>;
+
//
// EXT-SHIFT instruction format
//
class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>:
- FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, uimm5:$sa),
+ FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa),
!strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>;
//
@@ -80,20 +104,49 @@ def mem16 : Operand<i32> {
}
//
+// Some general instruction class info
+//
+//
+
+class ArithLogic16Defs<bit isCom=0> {
+ bits<5> shamt = 0;
+ bit isCommutable = isCom;
+ bit isReMaterializable = 1;
+ bit neverHasSideEffects = 1;
+}
+
+//
+
+// Format: ADDIU rx, immediate MIPS16e
+// Purpose: Add Immediate Unsigned Word (2-Operand, Extended)
+// To add a constant to a 32-bit integer.
+//
+def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>;
+
+def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>,
+ ArithLogic16Defs<0>;
+
+//
+
// Format: ADDIU rx, pc, immediate MIPS16e
// Purpose: Add Immediate Unsigned Word (3-Operand, PC-Relative, Extended)
// To add a constant to the program counter.
//
-class AddiuRxPcImmX16_base : FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
-def AddiuRxPcImmX16 : AddiuRxPcImmX16_base;
+def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
//
// Format: ADDU rz, rx, ry MIPS16e
// Purpose: Add Unsigned Word (3-Operand)
// To add 32-bit integers.
//
-class AdduRxRyRz16_base: FRRR16_ins<01, "addu", IIAlu>;
-def AdduRxRyRz16: AdduRxRyRz16_base;
+def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>;
+
+//
+// Format: AND rx, ry MIPS16e
+// Purpose: AND
+// To do a bitwise logical AND.
+
+def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
//
// Format: JR ra MIPS16e
@@ -105,6 +158,34 @@ def AdduRxRyRz16: AdduRxRyRz16_base;
def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu>;
//
+// Format: LB ry, offset(rx) MIPS16e
+// Purpose: Load Byte (Extended)
+// To load a byte from memory as a signed value.
+//
+def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IIAlu>;
+
+//
+// Format: LBU ry, offset(rx) MIPS16e
+// Purpose: Load Byte Unsigned (Extended)
+// To load a byte from memory as a unsigned value.
+//
+def LbuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IIAlu>;
+
+//
+// Format: LH ry, offset(rx) MIPS16e
+// Purpose: Load Halfword signed (Extended)
+// To load a halfword from memory as a signed value.
+//
+def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IIAlu>;
+
+//
+// Format: LHU ry, offset(rx) MIPS16e
+// Purpose: Load Halfword unsigned (Extended)
+// To load a halfword from memory as an unsigned value.
+//
+def LhuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IIAlu>;
+
+//
// Format: LI rx, immediate MIPS16e
// Purpose: Load Immediate (Extended)
// To load a constant into a GPR.
@@ -116,8 +197,7 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
// Purpose: Load Word (Extended)
// To load a word from memory as a signed value.
//
-class LwRxRyOffMemX16_base: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>;
-def LwRxRyOffMemX16: LwRxRyOffMemX16_base;
+def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>;
//
// Format: MOVE r32, rz MIPS16e
@@ -125,6 +205,28 @@ def LwRxRyOffMemX16: LwRxRyOffMemX16_base;
// To move the contents of a GPR to a GPR.
//
def Mov32R16: FI8_MOV32R16_ins<"move", IIAlu>;
+
+//
+// Format: NEG rx, ry MIPS16e
+// Purpose: Negate
+// To negate an integer value.
+//
+def NegRxRy16: FRR16_ins<0b11101, "neg", IIAlu>;
+
+//
+// Format: NOT rx, ry MIPS16e
+// Purpose: Not
+// To complement an integer value
+//
+def NotRxRy16: FRR16_ins<0b01111, "not", IIAlu>;
+
+//
+// Format: OR rx, ry MIPS16e
+// Purpose: Or
+// To do a bitwise logical OR.
+//
+def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>;
+
//
// Format: RESTORE {ra,}{s0/s1/s0-1,}{framesize}
// (All args are optional) MIPS16e
@@ -156,6 +258,20 @@ def SaveRaF16:
"save \t$$ra, $frame_size", [], IILoad >;
//
+// Format: SB ry, offset(rx) MIPS16e
+// Purpose: Store Byte (Extended)
+// To store a byte to memory.
+//
+def SbRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIAlu>;
+
+//
+// Format: SH ry, offset(rx) MIPS16e
+// Purpose: Store Halfword (Extended)
+// To store a halfword to memory.
+//
+def ShRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIAlu>;
+
+//
// Format: SLL rx, ry, sa MIPS16e
// Purpose: Shift Word Left Logical (Extended)
// To execute a left-shift of a word by a fixed number of bits—0 to 31 bits.
@@ -163,57 +279,127 @@ def SaveRaF16:
def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>;
//
+// Format: SLLV ry, rx MIPS16e
+// Purpose: Shift Word Left Logical Variable
+// To execute a left-shift of a word by a variable number of bits.
+//
+def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>;
+
+
+//
+// Format: SRAV ry, rx MIPS16e
+// Purpose: Shift Word Right Arithmetic Variable
+// To execute an arithmetic right-shift of a word by a variable
+// number of bits.
+//
+def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>;
+
+
+//
+// Format: SRA rx, ry, sa MIPS16e
+// Purpose: Shift Word Right Arithmetic (Extended)
+// To execute an arithmetic right-shift of a word by a fixed
+// number of bits—1 to 8 bits.
+//
+def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>;
+
+
+//
+// Format: SRLV ry, rx MIPS16e
+// Purpose: Shift Word Right Logical Variable
+// To execute a logical right-shift of a word by a variable
+// number of bits.
+//
+def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>;
+
+
+//
+// Format: SRL rx, ry, sa MIPS16e
+// Purpose: Shift Word Right Logical (Extended)
+// To execute a logical right-shift of a word by a fixed
+// number of bits—1 to 31 bits.
+//
+def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>;
+
+//
+// Format: SUBU rz, rx, ry MIPS16e
+// Purpose: Subtract Unsigned Word
+// To subtract 32-bit integers
+//
+def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>;
+
+//
// Format: SW ry, offset(rx) MIPS16e
// Purpose: Store Word (Extended)
// To store a word to memory.
//
-class SwRxRyOffMemX16_base: FEXT_RRI16_mem_ins<0b11011, "sw", mem16, IIAlu>;
-def SwRxRyOffMemX16: SwRxRyOffMemX16_base;
+def SwRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIAlu>;
+
+//
+// Format: XOR rx, ry MIPS16e
+// Purpose: Xor
+// To do a bitwise logical XOR.
+//
+def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIAlu>, ArithLogic16Defs<1>;
class Mips16Pat<dag pattern, dag result> : Pat<pattern, result> {
let Predicates = [InMips16Mode];
}
-class ArithLogicR16Defs<SDNode OpNode, bit isComm = 0> {
- dag OutOperandList = (outs CPU16Regs:$rz);
- dag InOperandList = (ins CPU16Regs:$rx, CPU16Regs:$ry);
- list<dag> Pattern = [(set CPU16Regs:$rz,
- (OpNode CPU16Regs:$rx, CPU16Regs:$ry))];
-}
+// Unary Arith/Logic
+//
+class ArithLogicU_pat<PatFrag OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$r),
+ (I CPU16Regs:$r)>;
-multiclass ArithLogicR16_base {
- def _add: AdduRxRyRz16_base, ArithLogicR16Defs<add, 1>;
-}
+def: ArithLogicU_pat<not, NotRxRy16>;
+def: ArithLogicU_pat<ineg, NegRxRy16>;
-defm ArithLogicR16_patt : ArithLogicR16_base;
+class ArithLogic16_pat<SDNode OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$l, CPU16Regs:$r),
+ (I CPU16Regs:$l, CPU16Regs:$r)>;
-class LoadM16Defs<PatFrag OpNode, Operand _MemOpnd, bit Pseudo=0> {
- bit isPseudo = Pseudo;
- Operand MemOpnd = _MemOpnd;
- dag OutOperandList = (outs CPU16Regs:$ry);
- dag InOperandList = (ins MemOpnd:$addr);
- list<dag> Pattern = [(set CPU16Regs:$ry, (OpNode addr:$addr))];
-}
+def: ArithLogic16_pat<add, AdduRxRyRz16>;
+def: ArithLogic16_pat<and, AndRxRxRy16>;
+def: ArithLogic16_pat<or, OrRxRxRy16>;
+def: ArithLogic16_pat<sub, SubuRxRyRz16>;
+def: ArithLogic16_pat<xor, XorRxRxRy16>;
-multiclass LoadM16_base {
- def _LwRxRyOffMemX16: LwRxRyOffMemX16_base, LoadM16Defs<load_a, mem16>;
-}
+// Arithmetic and logical instructions with 2 register operands.
-defm LoadM16: LoadM16_base;
+class ArithLogicI16_pat<SDNode OpNode, PatFrag imm_type, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$in, imm_type:$imm),
+ (I CPU16Regs:$in, imm_type:$imm)>;
-class StoreM16Defs<PatFrag OpNode, Operand _MemOpnd, bit Pseudo=0> {
- bit isPseudo = Pseudo;
- Operand MemOpnd = _MemOpnd;
- dag OutOperandList = (outs );
- dag InOperandList = (ins CPU16Regs:$ry, MemOpnd:$addr);
- list<dag> Pattern = [(OpNode CPU16Regs:$ry, addr:$addr)];
-}
+def: ArithLogicI16_pat<add, immSExt16, AddiuRxRxImmX16>;
+def: ArithLogicI16_pat<shl, immZExt5, SllX16>;
+def: ArithLogicI16_pat<srl, immZExt5, SrlX16>;
+def: ArithLogicI16_pat<sra, immZExt5, SraX16>;
-multiclass StoreM16_base {
- def _SwRxRyOffMemX16: SwRxRyOffMemX16_base, StoreM16Defs<store_a, mem16>;
-}
+class shift_rotate_reg16_pat<SDNode OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$r, CPU16Regs:$ra),
+ (I CPU16Regs:$r, CPU16Regs:$ra)>;
+
+def: shift_rotate_reg16_pat<shl, SllvRxRy16>;
+def: shift_rotate_reg16_pat<sra, SravRxRy16>;
+def: shift_rotate_reg16_pat<srl, SrlvRxRy16>;
+
+class LoadM16_pat<PatFrag OpNode, Instruction I> :
+ Mips16Pat<(OpNode addr:$addr), (I addr:$addr)>;
+
+def: LoadM16_pat<sextloadi8, LbRxRyOffMemX16>;
+def: LoadM16_pat<zextloadi8, LbuRxRyOffMemX16>;
+def: LoadM16_pat<sextloadi16_a, LhRxRyOffMemX16>;
+def: LoadM16_pat<zextloadi16_a, LhuRxRyOffMemX16>;
+def: LoadM16_pat<load_a, LwRxRyOffMemX16>;
+
+class StoreM16_pat<PatFrag OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$r, addr:$addr), (I CPU16Regs:$r, addr:$addr)>;
+
+def: StoreM16_pat<truncstorei8, SbRxRyOffMemX16>;
+def: StoreM16_pat<truncstorei16_a, ShRxRyOffMemX16>;
+def: StoreM16_pat<store_a, SwRxRyOffMemX16>;
-defm StoreM16: StoreM16_base;
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1 in
@@ -226,18 +412,8 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1,
hasExtraSrcRegAllocReq = 1 in
def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>;
-// As stack alignment is always done with addiu, we need a 16-bit immediate
-// This is basically deprecated code but needs to be there for things
-// to work.
-let Defs = [SP], Uses = [SP] in {
-def ADJCALLSTACKDOWN16 : MipsPseudo16<(outs), (ins uimm16:$amt),
- ";",
- [(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP16 : MipsPseudo16<(outs), (ins uimm16:$amt1, uimm16:$amt2),
- ";",
- [(callseq_end timm:$amt1, timm:$amt2)]>;
-}
-
// Small immediates
-def : Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
-def : Mips16Pat<(MipsLo tglobaladdr:$in), (LiRxImmX16 tglobaladdr:$in)>;
+def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
+
+def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)),
+ (AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
new file mode 100644
index 0000000..c15d1bf
--- /dev/null
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -0,0 +1,111 @@
+//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16RegisterInfo.h"
+#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST,
+ const TargetInstrInfo &TII)
+ : MipsRegisterInfo(ST, TII) {}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void Mips16RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
+ unsigned OpNo, int FrameIndex,
+ uint64_t StackSize,
+ int64_t SPOffset) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ // The following stack frame objects are always
+ // referenced relative to $sp:
+ // 1. Outgoing arguments.
+ // 2. Pointer to dynamically allocated stack space.
+ // 3. Locations for callee-saved registers.
+ // Everything else is referenced relative to whatever register
+ // getFrameRegister() returns.
+ unsigned FrameReg;
+
+ if (MipsFI->isOutArgFI(FrameIndex) ||
+ (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+ FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ else
+ FrameReg = getFrameRegister(MF);
+
+ // Calculate final offset.
+ // - There is no need to change the offset if the frame object
+ // is one of the
+ // following: an outgoing argument, pointer to a dynamically allocated
+ // stack space or a $gp restore location,
+ // - If the frame object is any of the following,
+ // its offset must be adjusted
+ // by adding the size of the stack:
+ // incoming argument, callee-saved register location or local variable.
+ int64_t Offset;
+
+ if (MipsFI->isOutArgFI(FrameIndex))
+ Offset = SPOffset;
+ else
+ Offset = SPOffset + (int64_t)StackSize;
+
+ Offset += MI.getOperand(OpNo + 1).getImm();
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ MI.getOperand(OpNo).ChangeToRegister(FrameReg, false);
+ MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
+
+
+}
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
new file mode 100644
index 0000000..3f4b3a7
--- /dev/null
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -0,0 +1,37 @@
+//===-- Mips16RegisterInfo.h - Mips16 Register Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16REGISTERINFO_H
+#define MIPS16REGISTERINFO_H
+
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+
+class Mips16RegisterInfo : public MipsRegisterInfo {
+public:
+ Mips16RegisterInfo(const MipsSubtarget &Subtarget,
+ const TargetInstrInfo &TII);
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+private:
+ virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index cceee24..20fc178 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -208,26 +208,25 @@ def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>;
def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>;
def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>;
-def LEA_ADDiu64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
+def LEA_ADDiu64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
}
let Uses = [SP_64], DecoderNamespace = "Mips64" in
-def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
- Requires<[IsN64, HasStandardEncoding]> {
- let isCodeGenOnly = 1;
-}
+def DynAlloc64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
+ Requires<[IsN64, HasStandardEncoding]>;
let DecoderNamespace = "Mips64" in {
def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
def DEXT : ExtBase<3, "dext", CPU64Regs>;
def DINS : InsBase<7, "dins", CPU64Regs>;
-def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
- "dsll\t$rd, $rt, 32", [], IIAlu>;
-def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
- "sll\t$rd, $rt, 0", [], IIAlu>;
-let isCodeGenOnly = 1 in
-def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt),
- "sll\t$rd, $rt, 0", [], IIAlu>;
+let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
+ def DSLL64_32 : FR<0x00, 0x3c, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "dsll\t$rd, $rt, 32", [], IIAlu>;
+ def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "sll\t$rd, $rt, 0", [], IIAlu>;
+ def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt),
+ "sll\t$rd, $rt, 0", [], IIAlu>;
+}
}
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 8aadefd..19213fa 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -145,6 +145,17 @@ def RetCC_MipsEABI : CallingConv<[
]>;
//===----------------------------------------------------------------------===//
+// Mips Android Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_MipsAndroid : CallingConv<[
+ // f32 are returned in registers F0, F2, F1, F3
+ CCIfType<[f32], CCAssignToReg<[F0, F2, F1, F3]>>,
+
+ CCDelegateTo<RetCC_MipsO32>
+]>;
+
+//===----------------------------------------------------------------------===//
// Mips FastCC Calling Convention
//===----------------------------------------------------------------------===//
def CC_MipsO32_FastCC : CallingConv<[
@@ -210,6 +221,7 @@ def RetCC_Mips : CallingConv<[
CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>,
CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
+ CCIfSubtarget<"isAndroid()", CCDelegateTo<RetCC_MipsAndroid>>,
CCDelegateTo<RetCC_MipsO32>
]>;
diff --git a/lib/Target/Mips/MipsELFWriterInfo.cpp b/lib/Target/Mips/MipsELFWriterInfo.cpp
new file mode 100644
index 0000000..ac3a547
--- /dev/null
+++ b/lib/Target/Mips/MipsELFWriterInfo.cpp
@@ -0,0 +1,92 @@
+//===-- MipsELFWriterInfo.cpp - ELF Writer Info for the Mips backend ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the Mips backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsELFWriterInfo.h"
+#include "MipsRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the MipsELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+MipsELFWriterInfo::MipsELFWriterInfo(bool is64Bit_, bool isLittleEndian_)
+ : TargetELFWriterInfo(is64Bit_, isLittleEndian_) {
+ EMachine = EM_MIPS;
+}
+
+MipsELFWriterInfo::~MipsELFWriterInfo() {}
+
+unsigned MipsELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+ switch(MachineRelTy) {
+ case Mips::reloc_mips_pc16:
+ return ELF::R_MIPS_GOT16;
+ case Mips::reloc_mips_hi:
+ return ELF::R_MIPS_HI16;
+ case Mips::reloc_mips_lo:
+ return ELF::R_MIPS_LO16;
+ case Mips::reloc_mips_26:
+ return ELF::R_MIPS_26;
+ default:
+ llvm_unreachable("unknown Mips machine relocation type");
+ }
+}
+
+long int MipsELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
+ switch(RelTy) {
+ case ELF::R_MIPS_26: return Modifier;
+ default:
+ llvm_unreachable("unknown Mips relocation type");
+ }
+}
+
+unsigned MipsELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ switch(RelTy) {
+ case ELF::R_MIPS_GOT16:
+ case ELF::R_MIPS_26:
+ return 32;
+ default:
+ llvm_unreachable("unknown Mips relocation type");
+ }
+}
+
+bool MipsELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ switch(RelTy) {
+ case ELF::R_MIPS_GOT16:
+ return true;
+ case ELF::R_MIPS_26:
+ return false;
+ default:
+ llvm_unreachable("unknown Mips relocation type");
+ }
+}
+
+unsigned MipsELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ return Mips::reloc_mips_26;
+}
+
+long int MipsELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+
+ if (RelTy == ELF::R_MIPS_GOT16)
+ return SymOffset - (RelOffset + 4);
+
+ llvm_unreachable("computeRelocation unknown for this relocation type");
+}
diff --git a/lib/Target/Mips/MipsELFWriterInfo.h b/lib/Target/Mips/MipsELFWriterInfo.h
new file mode 100644
index 0000000..23f3f03
--- /dev/null
+++ b/lib/Target/Mips/MipsELFWriterInfo.h
@@ -0,0 +1,59 @@
+//===-- MipsELFWriterInfo.h - ELF Writer Info for Mips ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the Mips backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_ELF_WRITER_INFO_H
+#define MIPS_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class MipsELFWriterInfo : public TargetELFWriterInfo {
+
+ public:
+ MipsELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
+ virtual ~MipsELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// hasRelocationAddend - True if the target uses an addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const { return is64Bit ? true : false; }
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatable field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+ unsigned RelTy) const;
+ };
+
+} // end llvm namespace
+
+#endif // MIPS_ELF_WRITER_INFO_H
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 6338f3c..8c0474b 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -15,6 +15,7 @@
#include "MipsAnalyzeImmediate.h"
#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -81,6 +82,14 @@ using namespace llvm;
//
//===----------------------------------------------------------------------===//
+const MipsFrameLowering *MipsFrameLowering::create(MipsTargetMachine &TM,
+ const MipsSubtarget &ST) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16FrameLowering(ST);
+
+ return llvm::createMipsSEFrameLowering(ST);
+}
+
// hasFP - Return true if the specified function should have a dedicated frame
// pointer register. This is true if the function has variable sized allocas or
// if frame pointer elimination is disabled.
@@ -89,218 +98,3 @@ bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
}
-
-bool MipsFrameLowering::targetHandlesStackFrameRounding() const {
- return true;
-}
-
-void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- const MipsRegisterInfo *RegInfo =
- static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
- const MipsInstrInfo &TII =
- *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
- MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
- unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
- unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
- unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
- unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
-
- // First, compute final stack size.
- unsigned StackAlign = getStackAlignment();
- uint64_t StackSize = RoundUpToAlignment(MFI->getStackSize(), StackAlign);
-
- if (MipsFI->globalBaseRegSet())
- StackSize += MFI->getObjectOffset(MipsFI->getGlobalRegFI()) + StackAlign;
- else
- StackSize += RoundUpToAlignment(MipsFI->getMaxCallFrameSize(), StackAlign);
-
- // Update stack size
- MFI->setStackSize(StackSize);
-
- // No need to allocate space on the stack.
- if (StackSize == 0 && !MFI->adjustsStack()) return;
-
- MachineModuleInfo &MMI = MF.getMMI();
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
- MachineLocation DstML, SrcML;
-
- // Adjust stack.
- if (isInt<16>(-StackSize)) {// addi sp, sp, (-stacksize)
- if (STI.inMips16Mode())
- BuildMI(MBB, MBBI, dl,
- TII.get(Mips::SaveRaF16)).addImm(StackSize); // cleanup
- else
- BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize);
- }
- else { // Expand immediate that doesn't fit in 16-bit.
- unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
-
- MF.getInfo<MipsFunctionInfo>()->setEmitNOAT();
- Mips::loadImmediate(-StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false,
- 0);
- BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg);
- }
-
- // emit ".cfi_def_cfa_offset StackSize"
- MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl,
- TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
- DstML = MachineLocation(MachineLocation::VirtualFP);
- SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
- Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
-
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-
- if (CSI.size()) {
- // Find the instruction past the last instruction that saves a callee-saved
- // register to the stack.
- for (unsigned i = 0; i < CSI.size(); ++i)
- ++MBBI;
-
- // Iterate over list of callee-saved registers and emit .cfi_offset
- // directives.
- MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl,
- TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
-
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
- unsigned Reg = I->getReg();
-
- // If Reg is a double precision register, emit two cfa_offsets,
- // one for each of the paired single precision registers.
- if (Mips::AFGR64RegClass.contains(Reg)) {
- MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
- MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
- MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven));
- MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd));
-
- if (!STI.isLittle())
- std::swap(SrcML0, SrcML1);
-
- Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0));
- Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1));
- } else {
- // Reg is either in CPURegs or FGR32.
- DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
- SrcML = MachineLocation(Reg);
- Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
- }
- }
- }
-
- // if framepointer enabled, set it to point to the stack pointer.
- if (hasFP(MF)) {
- // Insert instruction "move $fp, $sp" at this location.
- BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO);
-
- // emit ".cfi_def_cfa_register $fp"
- MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl,
- TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
- DstML = MachineLocation(FP);
- SrcML = MachineLocation(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
- }
-}
-
-void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const MipsInstrInfo &TII =
- *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
- DebugLoc dl = MBBI->getDebugLoc();
- unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
- unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
- unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
- unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
-
- // if framepointer enabled, restore the stack pointer.
- if (hasFP(MF)) {
- // Find the first instruction that restores a callee-saved register.
- MachineBasicBlock::iterator I = MBBI;
-
- for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
- --I;
-
- // Insert instruction "move $sp, $fp" at this location.
- BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
- }
-
- // Get the number of bytes from FrameInfo
- uint64_t StackSize = MFI->getStackSize();
-
- if (!StackSize)
- return;
-
- // Adjust stack.
- if (isInt<16>(StackSize)) { // addi sp, sp, (-stacksize)
- if (STI.inMips16Mode())
- // assumes stacksize multiple of 8
- BuildMI(MBB, MBBI, dl,
- TII.get(Mips::RestoreRaF16)).addImm(StackSize);
- else
- BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize);
- }
- else { // Expand immediate that doesn't fit in 16-bit.
- unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
-
- MF.getInfo<MipsFunctionInfo>()->setEmitNOAT();
- Mips::loadImmediate(StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false,
- 0);
- BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg);
- }
-}
-
-void MipsFrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
-
- // FIXME: remove this code if register allocator can correctly mark
- // $fp and $ra used or unused.
-
- // Mark $fp and $ra as used or unused.
- if (hasFP(MF))
- MRI.setPhysRegUsed(FP);
-}
-
-bool MipsFrameLowering::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- MachineFunction *MF = MBB.getParent();
- MachineBasicBlock *EntryBlock = MF->begin();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
-
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- // Add the callee-saved register as live-in. Do not add if the register is
- // RA and return address is taken, because it has already been added in
- // method MipsTargetLowering::LowerRETURNADDR.
- // It's killed at the spill, unless the register is RA and return address
- // is taken.
- unsigned Reg = CSI[i].getReg();
- bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64)
- && MF->getFrameInfo()->isReturnAddressTaken();
- if (!IsRAAndRetAddrIsTaken)
- EntryBlock->addLiveIn(Reg);
-
- // Insert the spill to the stack frame.
- bool IsKill = !IsRAAndRetAddrIsTaken;
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(*EntryBlock, MI, Reg, IsKill,
- CSI[i].getFrameIdx(), RC, TRI);
- }
-
- return true;
-}
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index e364ded..ed7b7fe 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -27,28 +27,19 @@ protected:
public:
explicit MipsFrameLowering(const MipsSubtarget &sti)
- : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0),
- STI(sti) {
- }
+ : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0,
+ sti.hasMips64() ? 16 : 8), STI(sti) {}
- bool targetHandlesStackFrameRounding() const;
-
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
- bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ static const MipsFrameLowering *create(MipsTargetMachine &TM,
+ const MipsSubtarget &ST);
bool hasFP(const MachineFunction &MF) const;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const;
};
+/// Create MipsInstrInfo objects.
+const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST);
+const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST);
+
} // End llvm namespace
#endif
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index ea33b74..5a97c17 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -117,28 +117,23 @@ private:
void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- if (((MF.getTarget().getRelocationModel() == Reloc::Static) ||
- Subtarget.inMips16Mode()) && !MipsFI->globalBaseRegSet())
+ if (!MipsFI->globalBaseRegSet())
return;
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator I = MBB.begin();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const MipsRegisterInfo *TargetRegInfo = TM.getRegisterInfo();
- const MipsInstrInfo *MII = TM.getInstrInfo();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
- int FI = 0;
+ const TargetRegisterClass *RC;
- FI= MipsFI->initGlobalRegFI();
-
- const TargetRegisterClass *RC = Subtarget.isABI_N64() ?
- (const TargetRegisterClass*)&Mips::CPU64RegsRegClass :
- (const TargetRegisterClass*)&Mips::CPURegsRegClass;
-
- if (Subtarget.inMips16Mode())
- RC=(const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+ if (Subtarget.isABI_N64())
+ RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
+ else if (Subtarget.inMips16Mode())
+ RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+ else
+ RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
V0 = RegInfo.createVirtualRegister(RC);
V1 = RegInfo.createVirtualRegister(RC);
@@ -158,23 +153,17 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
.addReg(Mips::T9_64);
BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
.addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC,
- TargetRegInfo);
return;
}
if (Subtarget.inMips16Mode()) {
BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16),
- V1)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
- BuildMI(MBB, I, DL, TII.get(Mips::SllX16),
- V2 ).addReg(V0).addImm(16);
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
.addReg(V1).addReg(V2);
-
-
return;
}
@@ -203,19 +192,11 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
.addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC,
- TargetRegInfo);
return;
}
assert(Subtarget.isABI_O32());
-
- //if (Subtarget.inMips16Mode())
- // return; // no need to load GP. It can be calculated anywhere
-
-
-
// For O32 ABI, the following instruction sequence is emitted to initialize
// the global base register:
//
@@ -237,7 +218,6 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
MBB.addLiveIn(Mips::V0);
BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
.addReg(Mips::V0).addReg(Mips::T9);
- MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, TargetRegInfo);
}
bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
@@ -262,13 +242,14 @@ bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
// Replace uses with ZeroReg.
for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
- E = MRI->use_end(); U != E; ++U) {
+ E = MRI->use_end(); U != E;) {
MachineOperand &MO = U.getOperand();
+ unsigned OpNo = U.getOperandNo();
MachineInstr *MI = MO.getParent();
+ ++U;
// Do not replace if it is a phi's operand or is tied to def operand.
- if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo()) ||
- MI->isPseudo())
+ if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
continue;
MO.setReg(ZeroReg);
@@ -309,21 +290,6 @@ bool MipsDAGToDAGISel::
SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
EVT ValTy = Addr.getValueType();
- // If Parent is an unaligned f32 load or store, select a (base + index)
- // floating point load/store instruction (luxc1 or suxc1).
- const LSBaseSDNode *LS = 0;
-
- if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) {
- EVT VT = LS->getMemoryVT();
-
- if (VT.getSizeInBits() / 8 > LS->getAlignment()) {
- assert(TLI.allowsUnalignedMemoryAccesses(VT) &&
- "Unaligned loads/stores not supported for this type.");
- if (VT == MVT::f32)
- return false;
- }
- }
-
// if Address is FI, get the TargetFrameIndex.
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
@@ -382,6 +348,8 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
}
// If an indexed floating point load/store can be emitted, return false.
+ const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
+
if (LS &&
(LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
Subtarget.hasMips32r2Or64())
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 7741f9f..c5207c6 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -157,7 +157,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
@@ -178,7 +177,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
}
@@ -217,6 +215,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
if (!Subtarget->hasMips32r2())
setOperationAction(ISD::ROTR, MVT::i32, Expand);
@@ -314,8 +314,6 @@ bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
case MVT::i64:
case MVT::i32:
return true;
- case MVT::f32:
- return Subtarget->hasMips32r2Or64();
default:
return false;
}
@@ -794,7 +792,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
@@ -1504,42 +1501,6 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
// Misc Lower Operation implementation
//===----------------------------------------------------------------------===//
SDValue MipsTargetLowering::
-LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
-{
- MachineFunction &MF = DAG.getMachineFunction();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- unsigned SP = IsN64 ? Mips::SP_64 : Mips::SP;
-
- assert(getTargetMachine().getFrameLowering()->getStackAlignment() >=
- cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() &&
- "Cannot lower if the alignment of the allocated space is larger than \
- that of the stack.");
-
- SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
-
- // Get a reference from Mips stack pointer
- SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SP, getPointerTy());
-
- // Subtract the dynamic size from the actual stack size to
- // obtain the new stack size.
- SDValue Sub = DAG.getNode(ISD::SUB, dl, getPointerTy(), StackPointer, Size);
-
- // The Sub result contains the new stack start address, so it
- // must be placed in the stack pointer register.
- Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, SP, Sub, SDValue());
-
- // This node always has two return values: a new stack pointer
- // value and a chain
- SDVTList VTLs = DAG.getVTList(getPointerTy(), MVT::Other);
- SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy());
- SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) };
-
- return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3);
-}
-
-SDValue MipsTargetLowering::
LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
{
// The first operand is the chain, the second is the condition, the third is
@@ -2455,9 +2416,9 @@ static unsigned getNextIntArgReg(unsigned Reg) {
// Write ByVal Arg to arg registers and stack.
static void
-WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
+WriteByValArg(SDValue Chain, DebugLoc dl,
SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
- SmallVector<SDValue, 8> &MemOpChains, int &LastFI,
+ SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
MVT PtrType, bool isLittle) {
@@ -2531,24 +2492,24 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
return;
}
- // Create a fixed object on stack at offset LocMemOffset and copy
- // remaining part of byval arg to it using memcpy.
+ // Copy remaining part of byval arg using memcpy.
SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
DAG.getConstant(Offset, MVT::i32));
- LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true);
- SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
- ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
- DAG.getConstant(RemainingSize, MVT::i32),
- std::min(ByValAlign, (unsigned)4),
- /*isVolatile=*/false, /*AlwaysInline=*/false,
- MachinePointerInfo(0), MachinePointerInfo(0));
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr,
+ DAG.getIntPtrConstant(LocMemOffset));
+ Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
+ DAG.getConstant(RemainingSize, MVT::i32),
+ std::min(ByValAlign, (unsigned)4),
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
+ MemOpChains.push_back(Chain);
}
// Copy Mips64 byVal arg to registers and stack.
void static
-PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
+PassByValArg64(SDValue Chain, DebugLoc dl,
SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
- SmallVector<SDValue, 8> &MemOpChains, int &LastFI,
+ SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
EVT PtrTy, bool isLittle) {
@@ -2620,16 +2581,16 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
assert(MemCpySize && "MemCpySize must not be zero.");
- // Create a fixed object on stack at offset LocMemOffset and copy
- // remainder of byval arg to it with memcpy.
+ // Copy remainder of byval arg to it with memcpy.
SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg,
DAG.getConstant(Offset, PtrTy));
- LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true);
- SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy);
- ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
- DAG.getConstant(MemCpySize, PtrTy), Alignment,
- /*isVolatile=*/false, /*AlwaysInline=*/false,
- MachinePointerInfo(0), MachinePointerInfo(0));
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr,
+ DAG.getIntPtrConstant(LocMemOffset));
+ Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
+ DAG.getConstant(MemCpySize, PtrTy), Alignment,
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
+ MemOpChains.push_back(Chain);
}
/// LowerCall - functions arguments are copied from virtual regs to
@@ -2643,9 +2604,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
- SDValue InChain = CLI.Chain;
+ SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
- SDValue CalleeSave = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
@@ -2675,18 +2635,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
-
- // Chain is the output chain of the last Load/Store or CopyToReg node.
- // ByValChain is the output chain of the last Memcpy node created for copying
- // byval arguments to the stack.
- SDValue Chain, CallSeqStart, ByValChain;
- SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
- Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal);
- ByValChain = InChain;
-
- // Get the frame index of the stack frame object that points to the location
- // of dynamically allocated area on the stack.
- int DynAllocFI = MipsFI->getDynAllocFI();
+ unsigned StackAlignment = TFL->getStackAlignment();
+ NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment);
// Update size of the maximum argument space.
// For O32, a minimum of four words (16 bytes) of argument space is
@@ -2694,27 +2644,23 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (IsO32 && (CallConv != CallingConv::Fast))
NextStackOffset = std::max(NextStackOffset, (unsigned)16);
- unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize();
-
- if (MaxCallFrameSize < NextStackOffset) {
- MipsFI->setMaxCallFrameSize(NextStackOffset);
+ // Chain is the output chain of the last Load/Store or CopyToReg node.
+ // ByValChain is the output chain of the last Memcpy node created for copying
+ // byval arguments to the stack.
+ SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
+ Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal);
- // Set the offsets relative to $sp of the $gp restore slot and dynamically
- // allocated stack space. These offsets must be aligned to a boundary
- // determined by the stack alignment of the ABI.
- unsigned StackAlignment = TFL->getStackAlignment();
- NextStackOffset = (NextStackOffset + StackAlignment - 1) /
- StackAlignment * StackAlignment;
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl,
+ IsN64 ? Mips::SP_64 : Mips::SP,
+ getPointerTy());
- MFI->setObjectOffset(DynAllocFI, NextStackOffset);
- }
+ if (MipsFI->getMaxCallFrameSize() < NextStackOffset)
+ MipsFI->setMaxCallFrameSize(NextStackOffset);
// With EABI is it possible to have 16 args on registers.
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
- int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0;
-
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
SDValue Arg = OutVals[i];
@@ -2727,11 +2673,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
if (IsO32)
- WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI,
+ WriteByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr,
MFI, DAG, Arg, VA, Flags, getPointerTy(),
Subtarget->isLittle());
else
- PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI,
+ PassByValArg64(Chain, dl, RegsToPass, MemOpChains, StackPtr,
MFI, DAG, Arg, VA, Flags, getPointerTy(),
Subtarget->isLittle());
continue;
@@ -2781,29 +2727,14 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Register can't get to this point...
assert(VA.isMemLoc());
- // Create the frame index object for this incoming parameter
- LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
- VA.getLocMemOffset(), true);
- SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
-
// emit ISD::STORE whichs stores the
// parameter value to a stack Location
+ SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ DAG.getIntPtrConstant(VA.getLocMemOffset()));
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(), false, false, 0));
}
- // Extend range of indices of frame objects for outgoing arguments that were
- // created during this function call. Skip this step if no such objects were
- // created.
- if (LastFI)
- MipsFI->extendOutArgFIRange(FirstFI, LastFI);
-
- // If a memcpy has been created to copy a byval arg to a stack, replace the
- // chain input of CallSeqStart with ByValChain.
- if (InChain != ByValChain)
- DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain,
- NextStackOffsetVal);
-
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
if (!MemOpChains.empty())
@@ -2867,6 +2798,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
+ // T9 register operand.
+ SDValue T9;
+
// T9 should contain the address of the callee function if
// -reloction-model=pic or it is an indirect call.
if (IsPICCall || !GlobalOrExternal) {
@@ -2874,7 +2808,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0));
InFlag = Chain.getValue(1);
- Callee = DAG.getRegister(T9Reg, getPointerTy());
+
+ if (Subtarget->inMips16Mode())
+ T9 = DAG.getRegister(T9Reg, getPointerTy());
+ else
+ Callee = DAG.getRegister(T9Reg, getPointerTy());
}
// Insert node "GP copy globalreg" before call to function.
@@ -2902,7 +2840,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
- Ops.push_back(Subtarget->inMips16Mode()? CalleeSave: Callee);
+ Ops.push_back(Callee);
// Add argument registers to the end of the list so that they are
// known live into the call.
@@ -2910,8 +2848,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
- if (Subtarget->inMips16Mode())
- Ops.push_back(Callee);
+ // Add T9 register operand.
+ if (T9.getNode())
+ Ops.push_back(T9);
+
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
@@ -2925,8 +2865,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getIntPtrConstant(NextStackOffset, true),
+ Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
DAG.getIntPtrConstant(0, true), InFlag);
InFlag = Chain.getValue(1);
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index edab03c..95ea8fa 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -132,7 +132,6 @@ namespace llvm {
// Lower Operand specifics
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 9654b86..df45df4 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -101,18 +101,18 @@ class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
}
// FP indexed load.
class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC,
- RegisterClass PRC, PatFrag FOp>:
+ RegisterClass PRC, SDPatternOperator FOp = null_frag>:
FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index),
- !strconcat(opstr, "\t$fd, $index($base)"),
+ !strconcat(opstr, "\t$fd, ${index}(${base})"),
[(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> {
let fs = 0;
}
// FP indexed store.
class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC,
- RegisterClass PRC, PatFrag FOp>:
+ RegisterClass PRC, SDPatternOperator FOp= null_frag>:
FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index),
- !strconcat(opstr, "\t$fs, $index($base)"),
+ !strconcat(opstr, "\t$fs, ${index}(${base})"),
[(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> {
let fd = 0;
}
@@ -270,7 +270,7 @@ let Predicates = [NotN64, HasStandardEncoding] in {
}
let Predicates = [NotN64, HasMips64, HasStandardEncoding],
- DecoderNamespace = "Mips64" in {
+ DecoderNamespace = "Mips64" in {
def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>;
def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>;
}
@@ -283,9 +283,7 @@ let Predicates = [NotN64, NotMips64, HasStandardEncoding] in {
// Indexed loads and stores.
let Predicates = [HasMips32r2Or64, HasStandardEncoding] in {
def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load_a>;
- def LUXC1 : FPIdxLoad<0x5, "luxc1", FGR32, CPURegs, load_u>;
def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store_a>;
- def SUXC1 : FPIdxStore<0xd, "suxc1", FGR32, CPURegs, store_u>;
}
let Predicates = [HasMips32r2, NotMips64, HasStandardEncoding] in {
@@ -301,13 +299,23 @@ let Predicates = [HasMips64, NotN64, HasStandardEncoding], DecoderNamespace="Mip
// n64
let Predicates = [IsN64, HasStandardEncoding], isCodeGenOnly=1 in {
def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>;
- def LUXC1_P8 : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>;
def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>;
def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store_a>;
- def SUXC1_P8 : FPIdxStore<0xd, "suxc1", FGR32, CPU64Regs, store_u>;
def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store_a>;
}
+// Load/store doubleword indexed unaligned.
+let Predicates = [NotMips64, HasStandardEncoding] in {
+ def LUXC1 : FPIdxLoad<0x5, "luxc1", AFGR64, CPURegs>;
+ def SUXC1 : FPIdxStore<0xd, "suxc1", AFGR64, CPURegs>;
+}
+
+let Predicates = [HasMips64, HasStandardEncoding],
+ DecoderNamespace="Mips64" in {
+ def LUXC164 : FPIdxLoad<0x5, "luxc1", FGR64, CPURegs>;
+ def SUXC164 : FPIdxStore<0xd, "suxc1", FGR64, CPURegs>;
+}
+
/// Floating-point Aritmetic
defm FADD : FFR2P_M<0x00, "add", fadd, 1>;
defm FDIV : FFR2P_M<0x03, "div", fdiv>;
@@ -408,25 +416,23 @@ let Defs=[FCR31] in {
//===----------------------------------------------------------------------===//
// Floating Point Pseudo-Instructions
//===----------------------------------------------------------------------===//
-def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),
- "# MOVCCRToCCR", []>;
+def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src),
+ "# MOVCCRToCCR", []>;
// This pseudo instr gets expanded into 2 mtc1 instrs after register
// allocation.
def BuildPairF64 :
- MipsPseudo<(outs AFGR64:$dst),
- (ins CPURegs:$lo, CPURegs:$hi), "",
- [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
+ PseudoSE<(outs AFGR64:$dst),
+ (ins CPURegs:$lo, CPURegs:$hi), "",
+ [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
// This pseudo instr gets expanded into 2 mfc1 instrs after register
// allocation.
// if n is 0, lower part of src is extracted.
// if n is 1, higher part of src is extracted.
def ExtractElementF64 :
- MipsPseudo<(outs CPURegs:$dst),
- (ins AFGR64:$src, i32imm:$n), "",
- [(set CPURegs:$dst,
- (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
+ PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n), "",
+ [(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
//===----------------------------------------------------------------------===//
// Floating Point Patterns
@@ -466,17 +472,3 @@ let Predicates = [IsFP64bit, HasStandardEncoding] in {
def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>;
def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
}
-
-// Patterns for unaligned floating point loads and stores.
-let Predicates = [HasMips32r2Or64, NotN64, HasStandardEncoding] in {
- def : MipsPat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>;
- def : MipsPat<(store_u FGR32:$src, CPURegs:$addr),
- (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>;
-}
-
-let Predicates = [IsN64, HasStandardEncoding] in {
- def : MipsPat<(f32 (load_u CPU64Regs:$addr)),
- (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>;
- def : MipsPat<(store_u FGR32:$src, CPU64Regs:$addr),
- (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>;
-}
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 15a77fb..8feb853 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -70,25 +70,35 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
let DecoderNamespace = "Mips";
field bits<32> SoftFail = 0;
+}
+// Mips32/64 Instruction Format
+class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin, Format f>:
+ MipsInst<outs, ins, asmstr, pattern, itin, f> {
let Predicates = [HasStandardEncoding];
-
}
// Mips Pseudo Instructions Format
class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> {
+ MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> {
let isCodeGenOnly = 1;
let isPseudo = 1;
}
+// Mips32/64 Pseudo Instruction Format
+class PseudoSE<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsPseudo<outs, ins, asmstr, pattern> {
+ let Predicates = [HasStandardEncoding];
+}
+
//===----------------------------------------------------------------------===//
// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
//===----------------------------------------------------------------------===//
class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst<outs, ins, asmstr, pattern, itin, FrmR>
+ InstSE<outs, ins, asmstr, pattern, itin, FrmR>
{
bits<5> rd;
bits<5> rs;
@@ -111,7 +121,7 @@ class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
//===----------------------------------------------------------------------===//
class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmI>
+ InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmI>
{
bits<5> rt;
bits<5> rs;
@@ -126,7 +136,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst<outs, ins, asmstr, pattern, itin, FrmI>
+ InstSE<outs, ins, asmstr, pattern, itin, FrmI>
{
bits<5> rs;
bits<5> rt;
@@ -144,7 +154,7 @@ class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
//===----------------------------------------------------------------------===//
class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmJ>
+ InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmJ>
{
bits<26> addr;
@@ -172,7 +182,7 @@ class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
string asmstr, list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmFR>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFR>
{
bits<5> fd;
bits<5> fs;
@@ -196,7 +206,7 @@ class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
//===----------------------------------------------------------------------===//
class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmFI>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFI>
{
bits<5> ft;
bits<5> base;
@@ -214,7 +224,7 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
//===----------------------------------------------------------------------===//
class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
{
bits<5> fs;
bits<5> ft;
@@ -235,7 +245,7 @@ class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
{
bits<5> rd;
bits<5> rs;
@@ -256,7 +266,7 @@ class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
{
bits<5> fd;
bits<5> fs;
@@ -303,7 +313,7 @@ class FFR2P<bits<6> funct, bits<5> fmt, string opstr,
// Floating point madd/msub/nmadd/nmsub.
class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr,
list<dag> pattern>
- : MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> {
+ : InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> {
bits<5> fd;
bits<5> fr;
bits<5> fs;
@@ -321,7 +331,7 @@ class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr,
// FP indexed load/store instructions.
class FFMemIdx<bits<6> funct, dag outs, dag ins, string asmstr,
list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
{
bits<5> base;
bits<5> index;
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 458e4f7..50e3eb5 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -27,68 +27,19 @@
using namespace llvm;
-MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
+MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm, unsigned UncondBr)
: MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
- TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()),
- InMips16Mode(TM.getSubtarget<MipsSubtarget>().inMips16Mode()),
- RI(*TM.getSubtargetImpl(), *this),
- UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {}
+ TM(tm), UncondBrOpc(UncondBr) {}
-const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const {
- return RI;
-}
+const MipsInstrInfo *MipsInstrInfo::create(MipsTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16InstrInfo(TM);
-static bool isZeroImm(const MachineOperand &op) {
- return op.isImm() && op.getImm() == 0;
+ return llvm::createMipsSEInstrInfo(TM);
}
-/// isLoadFromStackSlot - If the specified machine instruction is a direct
-/// load from a stack slot, return the virtual or physical register number of
-/// the destination along with the FrameIndex of the loaded stack slot. If
-/// not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than loading from the stack slot.
-unsigned MipsInstrInfo::
-isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
-{
- unsigned Opc = MI->getOpcode();
-
- if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) ||
- (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) ||
- (Opc == Mips::LDC1) || (Opc == Mips::LDC164) ||
- (Opc == Mips::LDC164_P8)) {
- if ((MI->getOperand(1).isFI()) && // is a stack slot
- (MI->getOperand(2).isImm()) && // the imm is zero
- (isZeroImm(MI->getOperand(2)))) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- }
-
- return 0;
-}
-
-/// isStoreToStackSlot - If the specified machine instruction is a direct
-/// store to a stack slot, return the virtual or physical register number of
-/// the source reg along with the FrameIndex of the loaded stack slot. If
-/// not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than storing to the stack slot.
-unsigned MipsInstrInfo::
-isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
-{
- unsigned Opc = MI->getOpcode();
-
- if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) ||
- (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) ||
- (Opc == Mips::SDC1) || (Opc == Mips::SDC164) ||
- (Opc == Mips::SDC164_P8)) {
- if ((MI->getOperand(1).isFI()) && // is a stack slot
- (MI->getOperand(2).isImm()) && // the imm is zero
- (isZeroImm(MI->getOperand(2)))) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- }
- return 0;
+bool MipsInstrInfo::isZeroImm(const MachineOperand &op) const {
+ return op.isImm() && op.getImm() == 0;
}
/// insertNoop - If data hazard condition is found insert the target nop
@@ -100,83 +51,8 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
BuildMI(MBB, MI, DL, get(Mips::NOP));
}
-void MipsInstrInfo::
-copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
- unsigned Opc = 0, ZeroReg = 0;
-
- if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
- if (Mips::CPURegsRegClass.contains(SrcReg)) {
- if (InMips16Mode)
- Opc=Mips::Mov32R16;
- else {
- Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
- }
- }
- else if (Mips::CCRRegClass.contains(SrcReg))
- Opc = Mips::CFC1;
- else if (Mips::FGR32RegClass.contains(SrcReg))
- Opc = Mips::MFC1;
- else if (SrcReg == Mips::HI)
- Opc = Mips::MFHI, SrcReg = 0;
- else if (SrcReg == Mips::LO)
- Opc = Mips::MFLO, SrcReg = 0;
- }
- else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
- if (Mips::CCRRegClass.contains(DestReg))
- Opc = Mips::CTC1;
- else if (Mips::FGR32RegClass.contains(DestReg))
- Opc = Mips::MTC1;
- else if (DestReg == Mips::HI)
- Opc = Mips::MTHI, DestReg = 0;
- else if (DestReg == Mips::LO)
- Opc = Mips::MTLO, DestReg = 0;
- }
- else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
- Opc = Mips::FMOV_S;
- else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
- Opc = Mips::FMOV_D32;
- else if (Mips::FGR64RegClass.contains(DestReg, SrcReg))
- Opc = Mips::FMOV_D64;
- else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
- Opc = Mips::MOVCCRToCCR;
- else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
- if (Mips::CPU64RegsRegClass.contains(SrcReg))
- Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
- else if (SrcReg == Mips::HI64)
- Opc = Mips::MFHI64, SrcReg = 0;
- else if (SrcReg == Mips::LO64)
- Opc = Mips::MFLO64, SrcReg = 0;
- else if (Mips::FGR64RegClass.contains(SrcReg))
- Opc = Mips::DMFC1;
- }
- else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
- if (DestReg == Mips::HI64)
- Opc = Mips::MTHI64, DestReg = 0;
- else if (DestReg == Mips::LO64)
- Opc = Mips::MTLO64, DestReg = 0;
- else if (Mips::FGR64RegClass.contains(DestReg))
- Opc = Mips::DMTC1;
- }
-
- assert(Opc && "Cannot copy registers");
-
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
-
- if (DestReg)
- MIB.addReg(DestReg, RegState::Define);
-
- if (ZeroReg)
- MIB.addReg(ZeroReg);
-
- if (SrcReg)
- MIB.addReg(SrcReg, getKillRegState(KillSrc));
-}
-
-static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI,
- unsigned Flag) {
+MachineMemOperand *MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI,
+ unsigned Flag) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
@@ -185,130 +61,6 @@ static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI,
MFI.getObjectSize(FI), Align);
}
-void MipsInstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
- MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
-
- unsigned Opc = 0;
-
- if (Mips::CPURegsRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
- else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
- else if (Mips::FGR32RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
- else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
- Opc = Mips::SDC1;
- else if (Mips::FGR64RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
-
- assert(Opc && "Register class not handled!");
- BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
-}
-
-void MipsInstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const
-{
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
- MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
- unsigned Opc = 0;
-
- if (Mips::CPURegsRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
- else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
- else if (Mips::FGR32RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
- else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
- Opc = Mips::LDC1;
- else if (Mips::FGR64RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
-
- assert(Opc && "Register class not handled!");
- BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
- .addMemOperand(MMO);
-}
-
-void MipsInstrInfo::ExpandRetRA(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned Opc) const {
- BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(Opc))
- .addReg(Mips::RA);
-}
-
-void MipsInstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned Opc) const {
- BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(Opc));
-}
-
-void MipsInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetInstrInfo *TII = TM.getInstrInfo();
- unsigned DstReg = I->getOperand(0).getReg();
- unsigned SrcReg = I->getOperand(1).getReg();
- unsigned N = I->getOperand(2).getImm();
- const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
- DebugLoc dl = I->getDebugLoc();
-
- assert(N < 2 && "Invalid immediate");
- unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven;
- unsigned SubReg = TM.getRegisterInfo()->getSubReg(SrcReg, SubIdx);
-
- BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg);
-}
-
-void MipsInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetInstrInfo *TII = TM.getInstrInfo();
- unsigned DstReg = I->getOperand(0).getReg();
- unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
- const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
- DebugLoc dl = I->getDebugLoc();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
- // mtc1 Lo, $fp
- // mtc1 Hi, $fp + 1
- BuildMI(MBB, I, dl, Mtc1Tdd, TRI->getSubReg(DstReg, Mips::sub_fpeven))
- .addReg(LoReg);
- BuildMI(MBB, I, dl, Mtc1Tdd, TRI->getSubReg(DstReg, Mips::sub_fpodd))
- .addReg(HiReg);
-}
-
-bool MipsInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
- MachineBasicBlock &MBB = *MI->getParent();
-
- switch(MI->getDesc().getOpcode()) {
- default:
- return false;
- case Mips::RetRA:
- ExpandRetRA(MBB, MI, Mips::RET);
- break;
- case Mips::RetRA16:
- ExpandRetRA16(MBB, MI, Mips::JrRa16);
- break;
- case Mips::BuildPairF64:
- ExpandBuildPairF64(MBB, MI);
- break;
- case Mips::ExtractElementF64:
- ExpandExtractElementF64(MBB, MI);
- break;
- }
-
- MBB.erase(MI);
- return true;
-}
-
MachineInstr*
MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
uint64_t Offset, const MDNode *MDPtr,
@@ -322,42 +74,9 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
// Branch Analysis
//===----------------------------------------------------------------------===//
-static unsigned GetAnalyzableBrOpc(unsigned Opc) {
- return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
- Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
- Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
- Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
- Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B ||
- Opc == Mips::J) ?
- Opc : 0;
-}
-
-/// GetOppositeBranchOpc - Return the inverse of the specified
-/// opcode, e.g. turning BEQ to BNE.
-unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
-{
- switch (Opc) {
- default: llvm_unreachable("Illegal opcode!");
- case Mips::BEQ: return Mips::BNE;
- case Mips::BNE: return Mips::BEQ;
- case Mips::BGTZ: return Mips::BLEZ;
- case Mips::BGEZ: return Mips::BLTZ;
- case Mips::BLTZ: return Mips::BGEZ;
- case Mips::BLEZ: return Mips::BGTZ;
- case Mips::BEQ64: return Mips::BNE64;
- case Mips::BNE64: return Mips::BEQ64;
- case Mips::BGTZ64: return Mips::BLEZ64;
- case Mips::BGEZ64: return Mips::BLTZ64;
- case Mips::BLTZ64: return Mips::BGEZ64;
- case Mips::BLEZ64: return Mips::BGTZ64;
- case Mips::BC1T: return Mips::BC1F;
- case Mips::BC1F: return Mips::BC1T;
- }
-}
-
-static void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
- MachineBasicBlock *&BB,
- SmallVectorImpl<MachineOperand> &Cond) {
+void MipsInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
+ MachineBasicBlock *&BB,
+ SmallVectorImpl<MachineOperand> &Cond) const {
assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
int NumOp = Inst->getNumExplicitOperands();
@@ -527,7 +246,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
{
assert( (Cond.size() && Cond.size() <= 3) &&
"Invalid Mips branch condition!");
- Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm()));
+ Cond[0].setImm(GetOppositeBranchOpc(Cond[0].getImm()));
return false;
}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 358f817..7d56259 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -26,99 +26,69 @@
namespace llvm {
class MipsInstrInfo : public MipsGenInstrInfo {
+protected:
MipsTargetMachine &TM;
- bool IsN64; bool InMips16Mode;
- const MipsRegisterInfo RI;
unsigned UncondBrOpc;
+
public:
- explicit MipsInstrInfo(MipsTargetMachine &TM);
+ explicit MipsInstrInfo(MipsTargetMachine &TM, unsigned UncondBrOpc);
- /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
- /// such, whenever a client has an instance of instruction info, it should
- /// always be able to get register info as well (through this method).
- ///
- virtual const MipsRegisterInfo &getRegisterInfo() const;
-
- /// isLoadFromStackSlot - If the specified machine instruction is a direct
- /// load from a stack slot, return the virtual or physical register number of
- /// the destination along with the FrameIndex of the loaded stack slot. If
- /// not, return 0. This predicate must return 0 if the instruction has
- /// any side effects other than loading from the stack slot.
- virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
- /// isStoreToStackSlot - If the specified machine instruction is a direct
- /// store to a stack slot, return the virtual or physical register number of
- /// the source reg along with the FrameIndex of the loaded stack slot. If
- /// not, return 0. This predicate must return 0 if the instruction has
- /// any side effects other than storing to the stack slot.
- virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
+ static const MipsInstrInfo *create(MipsTargetMachine &TM);
/// Branch Analysis
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const;
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
-private:
- void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned Opc) const;
- void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned Opc) const;
- void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
- const SmallVectorImpl<MachineOperand>& Cond) const;
- void ExpandExtractElementF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
- void ExpandBuildPairF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-public:
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
const MDNode *MDPtr,
DebugLoc DL) const;
- virtual
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
/// Insert nop instruction when hazard condition is found
virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MipsRegisterInfo &getRegisterInfo() const = 0;
+
+ virtual unsigned GetOppositeBranchOpc(unsigned Opc) const = 0;
+
/// Return the number of bytes of code the specified instruction may be.
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+protected:
+ bool isZeroImm(const MachineOperand &op) const;
+
+ MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI,
+ unsigned Flag) const;
+
+private:
+ virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const = 0;
+
+ void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
+ MachineBasicBlock *&BB,
+ SmallVectorImpl<MachineOperand> &Cond) const;
+
+ void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
+ const SmallVectorImpl<MachineOperand>& Cond) const;
};
namespace Mips {
- /// GetOppositeBranchOpc - Return the inverse of the specified
- /// opcode, e.g. turning BEQ to BNE.
- unsigned GetOppositeBranchOpc(unsigned Opc);
-
/// Emit a series of instructions to load an immediate. All instructions
/// except for the last one are emitted. The function returns the number of
/// MachineInstrs generated. The opcode-immediate pair of the last
@@ -130,6 +100,10 @@ namespace Mips {
MipsAnalyzeImmediate::Inst *LastInst);
}
+/// Create MipsInstrInfo objects.
+const MipsInstrInfo *createMips16InstrInfo(MipsTargetMachine &TM);
+const MipsInstrInfo *createMipsSEInstrInfo(MipsTargetMachine &TM);
+
}
#endif
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index f1aada4..fd952ef 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -208,17 +208,24 @@ def uimm16 : Operand<i32> {
let PrintMethod = "printUnsignedImm";
}
+def MipsMemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
+ let ParserMethod = "parseMemOperand";
+}
+
// Address operand
def mem : Operand<i32> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops CPURegs, simm16);
let EncoderMethod = "getMemEncoding";
+ let ParserMatchClass = MipsMemAsmOperand;
}
def mem64 : Operand<i64> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops CPU64Regs, simm16_64);
let EncoderMethod = "getMemEncoding";
+ let ParserMatchClass = MipsMemAsmOperand;
}
def mem_ea : Operand<i32> {
@@ -722,9 +729,11 @@ class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC,
let neverHasSideEffects = 1;
}
-class EffectiveAddress<string instr_asm, RegisterClass RC, Operand Mem> :
- FMem<0x09, (outs RC:$rt), (ins Mem:$addr),
- instr_asm, [(set RC:$rt, addr:$addr)], IIAlu>;
+class EffectiveAddress<bits<6> opc, string instr_asm, RegisterClass RC, Operand Mem> :
+ FMem<opc, (outs RC:$rt), (ins Mem:$addr),
+ instr_asm, [(set RC:$rt, addr:$addr)], IIAlu> {
+ let isCodeGenOnly = 1;
+}
// Count Leading Ones/Zeros in Word
class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>:
@@ -803,9 +812,9 @@ class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC,
RegisterClass PRC> :
- MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
- !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
- [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
+ PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
+ !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
+ [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>,
@@ -819,9 +828,9 @@ multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
// Atomic Compare & Swap.
class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC,
RegisterClass PRC> :
- MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
- !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"),
- [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
+ PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
+ !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"),
+ [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
multiclass AtomicCmpSwap32<PatFrag Op, string Width> {
def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>,
@@ -851,14 +860,13 @@ class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
// Return RA.
let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
-def RetRA : MipsPseudo<(outs), (ins), "", [(MipsRet)]>;
+def RetRA : PseudoSE<(outs), (ins), "", [(MipsRet)]>;
-// As stack alignment is always done with addiu, we need a 16-bit immediate
-let Defs = [SP], Uses = [SP] in {
-def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt),
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt),
"!ADJCALLSTACKDOWN $amt",
[(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
+def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
"!ADJCALLSTACKUP $amt1",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
@@ -868,8 +876,8 @@ def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
// are used, we have the same behavior, but get also a bunch of warnings
// from the assembler.
let neverHasSideEffects = 1 in
-def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc, CPURegs:$gp),
- ".cprestore\t$loc", []>;
+def CPRESTORE : PseudoSE<(outs), (ins i32imm:$loc, CPURegs:$gp),
+ ".cprestore\t$loc", []>;
let usesCustomInserter = 1 in {
defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8, "load_add_8">;
@@ -969,8 +977,8 @@ defm SWL : StoreLeftRightM32<0x2a, "swl", MipsSWL>;
defm SWR : StoreLeftRightM32<0x2e, "swr", MipsSWR>;
let hasSideEffects = 1 in
-def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
- [(MipsSync imm:$stype)], NoItinerary, FrmOther>
+def SYNC : InstSE<(outs), (ins i32imm:$stype), "sync $stype",
+ [(MipsSync imm:$stype)], NoItinerary, FrmOther>
{
bits<5> stype;
let Opcode = 0;
@@ -1046,17 +1054,13 @@ let addr=0 in
// instructions. The same not happens for stack address copies, so an
// add op with mem ComplexPattern is used and the stack address copy
// can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> {
- let isCodeGenOnly = 1;
-}
+def LEA_ADDiu : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
// DynAlloc node points to dynamically allocated stack space.
// $sp is added to the list of implicitly used registers to prevent dead code
// elimination from removing instructions that modify $sp.
let Uses = [SP] in
-def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> {
- let isCodeGenOnly = 1;
-}
+def DynAlloc : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
// MADD*/MSUB*
def MADD : MArithR<0, "madd", MipsMAdd, 1>;
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
index 150bdbb..052046a 100644
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ b/lib/Target/Mips/MipsJITInfo.cpp
@@ -27,7 +27,52 @@ using namespace llvm;
void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
+ unsigned NewAddr = (intptr_t)New;
+ unsigned OldAddr = (intptr_t)Old;
+ const unsigned NopInstr = 0x0;
+
+ // If the functions are in the same memory segment, insert PC-region branch.
+ if ((NewAddr & 0xF0000000) == ((OldAddr + 4) & 0xF0000000)) {
+ unsigned *OldInstruction = (unsigned *)Old;
+ *OldInstruction = 0x08000000;
+ unsigned JTargetAddr = NewAddr & 0x0FFFFFFC;
+
+ JTargetAddr >>= 2;
+ *OldInstruction |= JTargetAddr;
+
+ // Insert a NOP.
+ OldInstruction++;
+ *OldInstruction = NopInstr;
+
+ sys::Memory::InvalidateInstructionCache(Old, 2 * 4);
+ } else {
+ // We need to clear hint bits from the instruction, in case it is 'jr ra'.
+ const unsigned HintMask = 0xFFFFF83F, ReturnSequence = 0x03e00008;
+ unsigned* CurrentInstr = (unsigned*)Old;
+ unsigned CurrInstrHintClear = (*CurrentInstr) & HintMask;
+ unsigned* NextInstr = CurrentInstr + 1;
+ unsigned NextInstrHintClear = (*NextInstr) & HintMask;
+
+ // Do absolute jump if there are 2 or more instructions before return from
+ // the old function.
+ if ((CurrInstrHintClear != ReturnSequence) &&
+ (NextInstrHintClear != ReturnSequence)) {
+ const unsigned LuiT0Instr = 0x3c080000, AddiuT0Instr = 0x25080000;
+ const unsigned JrT0Instr = 0x01000008;
+ // lui t0, high 16 bit of the NewAddr
+ (*(CurrentInstr++)) = LuiT0Instr | ((NewAddr & 0xffff0000) >> 16);
+ // addiu t0, t0, low 16 bit of the NewAddr
+ (*(CurrentInstr++)) = AddiuT0Instr | (NewAddr & 0x0000ffff);
+ // jr t0
+ (*(CurrentInstr++)) = JrT0Instr;
+ (*CurrentInstr) = NopInstr;
+
+ sys::Memory::InvalidateInstructionCache(Old, 4 * 4);
+ } else {
+ // Unsupported case
+ report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
+ }
+ }
}
/// JITCompilerFunction - This contains the address of the JIT function used to
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 70ecbc1..f78203f 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -207,7 +207,7 @@ int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) {
// MachineBasicBlock operand MBBOpnd.
void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
DebugLoc DL, MachineBasicBlock *MBBOpnd) {
- unsigned NewOpc = Mips::GetOppositeBranchOpc(Br->getOpcode());
+ unsigned NewOpc = TII->GetOppositeBranchOpc(Br->getOpcode());
const MCInstrDesc &NewDesc = TII->get(NewOpc);
MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc);
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index b2232c6..df3c4c0 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -48,8 +48,6 @@ class MipsFunctionInfo : public MachineFunctionInfo {
// OutArgFIRange: Range of indices of all frame objects created during call to
// LowerCall except for the frame object for restoring $gp.
std::pair<int, int> InArgFIRange, OutArgFIRange;
- int GlobalRegFI;
- mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
unsigned MaxCallFrameSize;
bool EmitNOAT;
@@ -58,8 +56,7 @@ public:
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
- OutArgFIRange(std::make_pair(-1, 0)), GlobalRegFI(0), DynAllocFI(0),
- MaxCallFrameSize(0), EmitNOAT(false)
+ OutArgFIRange(std::make_pair(-1, 0)), MaxCallFrameSize(0), EmitNOAT(false)
{}
bool isInArgFI(int FI) const {
@@ -77,34 +74,6 @@ public:
OutArgFIRange.second = LastFI;
}
- bool isGlobalRegFI(int FI) const {
- return GlobalRegFI && (FI == GlobalRegFI);
- }
-
- int getGlobalRegFI() const {
- return GlobalRegFI;
- }
-
- int initGlobalRegFI() {
- const TargetMachine &TM = MF.getTarget();
- unsigned RegSize = TM.getSubtarget<MipsSubtarget>().isABI_N64() ? 8 : 4;
- int64_t StackAlignment = TM.getFrameLowering()->getStackAlignment();
- uint64_t Offset = RoundUpToAlignment(MaxCallFrameSize, StackAlignment);
-
- GlobalRegFI = MF.getFrameInfo()->CreateFixedObject(RegSize, Offset, true);
- return GlobalRegFI;
- }
-
- // The first call to this function creates a frame object for dynamically
- // allocated stack area.
- int getDynAllocFI() const {
- if (!DynAllocFI)
- DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true);
-
- return DynAllocFI;
- }
- bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; }
-
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index a3ce236..ae6ae3a 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -144,15 +144,6 @@ MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
return true;
}
-// This function eliminate ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void MipsRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
- MBB.erase(I);
-}
-
// FrameIndex represent objects inside a abstract stack.
// We must replace FrameIndex with an stack/frame pointer
// direct reference.
@@ -161,8 +152,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
unsigned i = 0;
while (!MI.getOperand(i).isFI()) {
@@ -182,68 +171,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
<< "spOffset : " << spOffset << "\n"
<< "stackSize : " << stackSize << "\n");
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- int MinCSFI = 0;
- int MaxCSFI = -1;
-
- if (CSI.size()) {
- MinCSFI = CSI[0].getFrameIdx();
- MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
- }
-
- // The following stack frame objects are always referenced relative to $sp:
- // 1. Outgoing arguments.
- // 2. Pointer to dynamically allocated stack space.
- // 3. Locations for callee-saved registers.
- // Everything else is referenced relative to whatever register
- // getFrameRegister() returns.
- unsigned FrameReg;
-
- if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
- (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
- FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
- else
- FrameReg = getFrameRegister(MF);
-
- // Calculate final offset.
- // - There is no need to change the offset if the frame object is one of the
- // following: an outgoing argument, pointer to a dynamically allocated
- // stack space or a $gp restore location,
- // - If the frame object is any of the following, its offset must be adjusted
- // by adding the size of the stack:
- // incoming argument, callee-saved register location or local variable.
- int64_t Offset;
-
- if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
- MipsFI->isGlobalRegFI(FrameIndex))
- Offset = spOffset;
- else
- Offset = spOffset + (int64_t)stackSize;
-
- Offset += MI.getOperand(i+1).getImm();
-
- DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
-
- // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
- // field.
- if (!MI.isDebugValue() && !isInt<16>(Offset)) {
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = II->getDebugLoc();
- unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT;
- MipsAnalyzeImmediate::Inst LastInst(0, 0);
-
- MipsFI->setEmitNOAT();
- Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true,
- &LastInst);
- BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg);
-
- FrameReg = ATReg;
- Offset = SignExtend64<16>(LastInst.ImmOpnd);
- }
-
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ eliminateFI(MI, i, FrameIndex, stackSize, spOffset);
}
unsigned MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index f320bae..9a05e94 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -25,10 +25,12 @@ class MipsSubtarget;
class TargetInstrInfo;
class Type;
-struct MipsRegisterInfo : public MipsGenRegisterInfo {
+class MipsRegisterInfo : public MipsGenRegisterInfo {
+protected:
const MipsSubtarget &Subtarget;
const TargetInstrInfo &TII;
+public:
MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii);
/// getRegisterNumbering - Given the enum value for some register, e.g.
@@ -51,10 +53,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
/// Stack Frame Processing Methods
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
@@ -67,6 +65,11 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
/// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
+
+private:
+ virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const = 0;
};
} // end namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index b255e42..4015add 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -239,6 +239,9 @@ let Namespace = "Mips" in {
// fcc0 register
def FCC0 : Register<"fcc0">;
+ // PC register
+ def PC : Register<"pc">;
+
// Hardware register $29
def HWR29 : Register<"29">;
def HWR29_64 : Register<"29">;
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
new file mode 100644
index 0000000..1c59847
--- /dev/null
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -0,0 +1,210 @@
+//===-- MipsSEFrameLowering.cpp - Mips32/64 Frame Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSEFrameLowering.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsSEInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsRegisterInfo *RegInfo =
+ static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+
+ // First, compute final stack size.
+ uint64_t StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ MachineLocation DstML, SrcML;
+
+ // Adjust stack.
+ TII.adjustStackPtr(SP, -StackSize, MBB, MBBI);
+
+ // emit ".cfi_def_cfa_offset StackSize"
+ MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
+ DstML = MachineLocation(MachineLocation::VirtualFP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
+ Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ if (CSI.size()) {
+ // Find the instruction past the last instruction that saves a callee-saved
+ // register to the stack.
+ for (unsigned i = 0; i < CSI.size(); ++i)
+ ++MBBI;
+
+ // Iterate over list of callee-saved registers and emit .cfi_offset
+ // directives.
+ MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
+
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+ unsigned Reg = I->getReg();
+
+ // If Reg is a double precision register, emit two cfa_offsets,
+ // one for each of the paired single precision registers.
+ if (Mips::AFGR64RegClass.contains(Reg)) {
+ MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
+ MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
+ MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven));
+ MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd));
+
+ if (!STI.isLittle())
+ std::swap(SrcML0, SrcML1);
+
+ Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0));
+ Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1));
+ } else {
+ // Reg is either in CPURegs or FGR32.
+ DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
+ SrcML = MachineLocation(Reg);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+ }
+ }
+ }
+
+ // if framepointer enabled, set it to point to the stack pointer.
+ if (hasFP(MF)) {
+ // Insert instruction "move $fp, $sp" at this location.
+ BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO);
+
+ // emit ".cfi_def_cfa_register $fp"
+ MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
+ DstML = MachineLocation(FP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
+ }
+}
+
+void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+
+ // if framepointer enabled, restore the stack pointer.
+ if (hasFP(MF)) {
+ // Find the first instruction that restores a callee-saved register.
+ MachineBasicBlock::iterator I = MBBI;
+
+ for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+ --I;
+
+ // Insert instruction "move $sp, $fp" at this location.
+ BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
+ }
+
+ // Get the number of bytes from FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (!StackSize)
+ return;
+
+ // Adjust stack.
+ TII.adjustStackPtr(SP, StackSize, MBB, MBBI);
+}
+
+bool MipsSEFrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction *MF = MBB.getParent();
+ MachineBasicBlock *EntryBlock = MF->begin();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in. Do not add if the register is
+ // RA and return address is taken, because it has already been added in
+ // method MipsTargetLowering::LowerRETURNADDR.
+ // It's killed at the spill, unless the register is RA and return address
+ // is taken.
+ unsigned Reg = CSI[i].getReg();
+ bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64)
+ && MF->getFrameInfo()->isReturnAddressTaken();
+ if (!IsRAAndRetAddrIsTaken)
+ EntryBlock->addLiveIn(Reg);
+
+ // Insert the spill to the stack frame.
+ bool IsKill = !IsRAAndRetAddrIsTaken;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*EntryBlock, MI, Reg, IsKill,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
+
+ return true;
+}
+
+bool
+MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Reserve call frame if the size of the maximum call frame fits into 16-bit
+ // immediate field and there are no variable sized objects on the stack.
+ return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects();
+}
+
+void MipsSEFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+
+ // Mark $fp as used if function has dedicated frame pointer.
+ if (hasFP(MF))
+ MRI.setPhysRegUsed(FP);
+}
+
+const MipsFrameLowering *
+llvm::createMipsSEFrameLowering(const MipsSubtarget &ST) {
+ return new MipsSEFrameLowering(ST);
+}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
new file mode 100644
index 0000000..6481a0a
--- /dev/null
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -0,0 +1,44 @@
+//===-- MipsSEFrameLowering.h - Mips32/64 frame lowering --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSE_FRAMEINFO_H
+#define MIPSSE_FRAMEINFO_H
+
+#include "MipsFrameLowering.h"
+
+namespace llvm {
+
+class MipsSEFrameLowering : public MipsFrameLowering {
+public:
+ explicit MipsSEFrameLowering(const MipsSubtarget &STI)
+ : MipsFrameLowering(STI) {}
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
new file mode 100644
index 0000000..eeb1de3
--- /dev/null
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -0,0 +1,320 @@
+//===-- MipsSEInstrInfo.cpp - Mips32/64 Instruction Information -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSEInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm)
+ : MipsInstrInfo(tm,
+ tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J),
+ RI(*tm.getSubtargetImpl(), *this),
+ IsN64(tm.getSubtarget<MipsSubtarget>().isABI_N64()) {}
+
+const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MipsSEInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) ||
+ (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) ||
+ (Opc == Mips::LDC1) || (Opc == Mips::LDC164) ||
+ (Opc == Mips::LDC164_P8)) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MipsSEInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) ||
+ (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) ||
+ (Opc == Mips::SDC1) || (Opc == Mips::SDC164) ||
+ (Opc == Mips::SDC164_P8)) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc = 0, ZeroReg = 0;
+
+ if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
+ if (Mips::CPURegsRegClass.contains(SrcReg))
+ Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
+ else if (Mips::CCRRegClass.contains(SrcReg))
+ Opc = Mips::CFC1;
+ else if (Mips::FGR32RegClass.contains(SrcReg))
+ Opc = Mips::MFC1;
+ else if (SrcReg == Mips::HI)
+ Opc = Mips::MFHI, SrcReg = 0;
+ else if (SrcReg == Mips::LO)
+ Opc = Mips::MFLO, SrcReg = 0;
+ }
+ else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
+ if (Mips::CCRRegClass.contains(DestReg))
+ Opc = Mips::CTC1;
+ else if (Mips::FGR32RegClass.contains(DestReg))
+ Opc = Mips::MTC1;
+ else if (DestReg == Mips::HI)
+ Opc = Mips::MTHI, DestReg = 0;
+ else if (DestReg == Mips::LO)
+ Opc = Mips::MTLO, DestReg = 0;
+ }
+ else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_S;
+ else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_D32;
+ else if (Mips::FGR64RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_D64;
+ else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
+ Opc = Mips::MOVCCRToCCR;
+ else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
+ if (Mips::CPU64RegsRegClass.contains(SrcReg))
+ Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
+ else if (SrcReg == Mips::HI64)
+ Opc = Mips::MFHI64, SrcReg = 0;
+ else if (SrcReg == Mips::LO64)
+ Opc = Mips::MFLO64, SrcReg = 0;
+ else if (Mips::FGR64RegClass.contains(SrcReg))
+ Opc = Mips::DMFC1;
+ }
+ else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
+ if (DestReg == Mips::HI64)
+ Opc = Mips::MTHI64, DestReg = 0;
+ else if (DestReg == Mips::LO64)
+ Opc = Mips::MTLO64, DestReg = 0;
+ else if (Mips::FGR64RegClass.contains(DestReg))
+ Opc = Mips::DMTC1;
+ }
+
+ assert(Opc && "Cannot copy registers");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
+
+ if (DestReg)
+ MIB.addReg(DestReg, RegState::Define);
+
+ if (ZeroReg)
+ MIB.addReg(ZeroReg);
+
+ if (SrcReg)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void MipsSEInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
+
+ unsigned Opc = 0;
+
+ if (Mips::CPURegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
+ else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
+ else if (Mips::FGR32RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
+ else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
+ Opc = Mips::SDC1;
+ else if (Mips::FGR64RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+}
+
+void MipsSEInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
+ unsigned Opc = 0;
+
+ if (Mips::CPURegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
+ else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
+ else if (Mips::FGR32RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
+ else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
+ Opc = Mips::LDC1;
+ else if (Mips::FGR64RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+ .addMemOperand(MMO);
+}
+
+bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ MachineBasicBlock &MBB = *MI->getParent();
+
+ switch(MI->getDesc().getOpcode()) {
+ default:
+ return false;
+ case Mips::RetRA:
+ ExpandRetRA(MBB, MI, Mips::RET);
+ break;
+ case Mips::BuildPairF64:
+ ExpandBuildPairF64(MBB, MI);
+ break;
+ case Mips::ExtractElementF64:
+ ExpandExtractElementF64(MBB, MI);
+ break;
+ }
+
+ MBB.erase(MI);
+ return true;
+}
+
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned MipsSEInstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+ switch (Opc) {
+ default: llvm_unreachable("Illegal opcode!");
+ case Mips::BEQ: return Mips::BNE;
+ case Mips::BNE: return Mips::BEQ;
+ case Mips::BGTZ: return Mips::BLEZ;
+ case Mips::BGEZ: return Mips::BLTZ;
+ case Mips::BLTZ: return Mips::BGEZ;
+ case Mips::BLEZ: return Mips::BGTZ;
+ case Mips::BEQ64: return Mips::BNE64;
+ case Mips::BNE64: return Mips::BEQ64;
+ case Mips::BGTZ64: return Mips::BLEZ64;
+ case Mips::BGEZ64: return Mips::BLTZ64;
+ case Mips::BLTZ64: return Mips::BGEZ64;
+ case Mips::BLEZ64: return Mips::BGTZ64;
+ case Mips::BC1T: return Mips::BC1F;
+ case Mips::BC1F: return Mips::BC1T;
+ }
+}
+
+/// Adjust SP by Amount bytes.
+void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+
+ if (isInt<16>(Amount))// addi sp, sp, amount
+ BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
+ else { // Expand immediate that doesn't fit in 16-bit.
+ unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
+
+ MBB.getParent()->getInfo<MipsFunctionInfo>()->setEmitNOAT();
+ Mips::loadImmediate(Amount, STI.isABI_N64(), *this, MBB, I, DL, false, 0);
+ BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(ATReg);
+ }
+}
+
+unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
+ return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
+ Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
+ Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
+ Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
+ Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B ||
+ Opc == Mips::J) ?
+ Opc : 0;
+}
+
+void MipsSEInstrInfo::ExpandRetRA(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opc) const {
+ BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA);
+}
+
+void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned SrcReg = I->getOperand(1).getReg();
+ unsigned N = I->getOperand(2).getImm();
+ const MCInstrDesc& Mfc1Tdd = get(Mips::MFC1);
+ DebugLoc dl = I->getDebugLoc();
+
+ assert(N < 2 && "Invalid immediate");
+ unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven;
+ unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx);
+
+ BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg);
+}
+
+void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
+ const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1);
+ DebugLoc dl = I->getDebugLoc();
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+
+ // mtc1 Lo, $fp
+ // mtc1 Hi, $fp + 1
+ BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpeven))
+ .addReg(LoReg);
+ BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpodd))
+ .addReg(HiReg);
+}
+
+const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) {
+ return new MipsSEInstrInfo(TM);
+}
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
new file mode 100644
index 0000000..346e74d
--- /dev/null
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -0,0 +1,86 @@
+//===-- MipsSEInstrInfo.h - Mips32/64 Instruction Information ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEINSTRUCTIONINFO_H
+#define MIPSSEINSTRUCTIONINFO_H
+
+#include "MipsInstrInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsSERegisterInfo.h"
+
+namespace llvm {
+
+class MipsSEInstrInfo : public MipsInstrInfo {
+ const MipsSERegisterInfo RI;
+ bool IsN64;
+
+public:
+ explicit MipsSEInstrInfo(MipsTargetMachine &TM);
+
+ virtual const MipsRegisterInfo &getRegisterInfo() const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+
+ /// Adjust SP by Amount bytes.
+ void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+private:
+ virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
+
+ void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned Opc) const;
+ void ExpandExtractElementF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+ void ExpandBuildPairF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
new file mode 100644
index 0000000..043a1ef
--- /dev/null
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -0,0 +1,138 @@
+//===-- MipsSERegisterInfo.cpp - MIPS32/64 Register Information -== -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS32/64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSERegisterInfo.h"
+#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsSEInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST,
+ const TargetInstrInfo &TII)
+ : MipsRegisterInfo(ST, TII) {}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MipsSERegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ int64_t Amount = I->getOperand(0).getImm();
+
+ if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+ Amount = -Amount;
+
+ const MipsSEInstrInfo *II = static_cast<const MipsSEInstrInfo*>(&TII);
+ unsigned SP = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+
+ II->adjustStackPtr(SP, Amount, MBB, I);
+ }
+
+ MBB.erase(I);
+}
+
+void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
+ unsigned OpNo, int FrameIndex,
+ uint64_t StackSize,
+ int64_t SPOffset) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ // The following stack frame objects are always referenced relative to $sp:
+ // 1. Outgoing arguments.
+ // 2. Pointer to dynamically allocated stack space.
+ // 3. Locations for callee-saved registers.
+ // Everything else is referenced relative to whatever register
+ // getFrameRegister() returns.
+ unsigned FrameReg;
+
+ if (MipsFI->isOutArgFI(FrameIndex) ||
+ (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+ FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ else
+ FrameReg = getFrameRegister(MF);
+
+ // Calculate final offset.
+ // - There is no need to change the offset if the frame object is one of the
+ // following: an outgoing argument, pointer to a dynamically allocated
+ // stack space or a $gp restore location,
+ // - If the frame object is any of the following, its offset must be adjusted
+ // by adding the size of the stack:
+ // incoming argument, callee-saved register location or local variable.
+ int64_t Offset;
+
+ if (MipsFI->isOutArgFI(FrameIndex))
+ Offset = SPOffset;
+ else
+ Offset = SPOffset + (int64_t)StackSize;
+
+ Offset += MI.getOperand(OpNo + 1).getImm();
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
+ // field.
+ if (!MI.isDebugValue() && !isInt<16>(Offset)) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = II->getDebugLoc();
+ unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT;
+ MipsAnalyzeImmediate::Inst LastInst(0, 0);
+
+ MipsFI->setEmitNOAT();
+ Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true,
+ &LastInst);
+ BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg);
+
+ FrameReg = ATReg;
+ Offset = SignExtend64<16>(LastInst.ImmOpnd);
+ }
+
+ MI.getOperand(OpNo).ChangeToRegister(FrameReg, false);
+ MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
+}
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
new file mode 100644
index 0000000..4b17b33
--- /dev/null
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -0,0 +1,39 @@
+//===-- MipsSERegisterInfo.h - Mips32/64 Register Information ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEREGISTERINFO_H
+#define MIPSSEREGISTERINFO_H
+
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+
+class MipsSERegisterInfo : public MipsRegisterInfo {
+public:
+ MipsSERegisterInfo(const MipsSubtarget &Subtarget,
+ const TargetInstrInfo &TII);
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+private:
+ virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 3215c44..ba15362 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -89,6 +89,9 @@ protected:
// InMips16 -- can process Mips16 instructions
bool InMips16Mode;
+ // IsAndroid -- target is android
+ bool IsAndroid;
+
InstrItineraryData InstrItins;
public:
@@ -128,6 +131,7 @@ public:
bool isNotSingleFloat() const { return !IsSingleFloat; }
bool hasVFPU() const { return HasVFPU; }
bool inMips16Mode() const { return InMips16Mode; }
+ bool isAndroid() const { return IsAndroid; }
bool isLinux() const { return IsLinux; }
bool hasStandardEncoding() const { return !inMips16Mode(); }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index dd5d35f..2928a73 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -13,6 +13,8 @@
#include "MipsTargetMachine.h"
#include "Mips.h"
+#include "MipsFrameLowering.h"
+#include "MipsInstrInfo.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
@@ -22,8 +24,8 @@ extern "C" void LLVMInitializeMipsTarget() {
// Register the target.
RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget);
RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
- RegisterTargetMachine<Mips64ebTargetMachine> A(TheMips64Target);
- RegisterTargetMachine<Mips64elTargetMachine> B(TheMips64elTarget);
+ RegisterTargetMachine<MipsebTargetMachine> A(TheMips64Target);
+ RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget);
}
// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -48,9 +50,10 @@ MipsTargetMachine(const Target &T, StringRef TT,
(Subtarget.isABI_N64() ?
"E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
"E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
- InstrInfo(*this),
- FrameLowering(Subtarget),
- TLInfo(*this), TSInfo(*this), JITInfo() {
+ InstrInfo(MipsInstrInfo::create(*this)),
+ FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
+ TLInfo(*this), TSInfo(*this), JITInfo(),
+ ELFWriterInfo(false, isLittle) {
}
void MipsebTargetMachine::anchor() { }
@@ -71,24 +74,6 @@ MipselTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL)
: MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-void Mips64ebTargetMachine::anchor() { }
-
-Mips64ebTargetMachine::
-Mips64ebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
-
-void Mips64elTargetMachine::anchor() { }
-
-Mips64elTargetMachine::
-Mips64elTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-
namespace {
/// Mips Code Generator Pass Configuration Options.
class MipsPassConfig : public TargetPassConfig {
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 5cbf057..a542ef6 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -20,59 +20,67 @@
#include "MipsJITInfo.h"
#include "MipsSelectionDAGInfo.h"
#include "MipsSubtarget.h"
+#include "MipsELFWriterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
- class formatted_raw_ostream;
-
- class MipsTargetMachine : public LLVMTargetMachine {
- MipsSubtarget Subtarget;
- const TargetData DataLayout; // Calculates type size & alignment
- MipsInstrInfo InstrInfo;
- MipsFrameLowering FrameLowering;
- MipsTargetLowering TLInfo;
- MipsSelectionDAGInfo TSInfo;
- MipsJITInfo JITInfo;
-
- public:
- MipsTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle);
-
- virtual const MipsInstrInfo *getInstrInfo() const
- { return &InstrInfo; }
- virtual const TargetFrameLowering *getFrameLowering() const
- { return &FrameLowering; }
- virtual const MipsSubtarget *getSubtargetImpl() const
- { return &Subtarget; }
- virtual const TargetData *getTargetData() const
- { return &DataLayout;}
- virtual MipsJITInfo *getJITInfo()
- { return &JITInfo; }
-
-
- virtual const MipsRegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo();
- }
-
- virtual const MipsTargetLowering *getTargetLowering() const {
- return &TLInfo;
- }
-
- virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
- return &TSInfo;
- }
-
- // Pass Pipeline Configuration
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
- virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
- };
-
-/// MipsebTargetMachine - Mips32 big endian target machine.
+class formatted_raw_ostream;
+class MipsRegisterInfo;
+
+class MipsTargetMachine : public LLVMTargetMachine {
+ MipsSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ const MipsInstrInfo *InstrInfo;
+ const MipsFrameLowering *FrameLowering;
+ MipsTargetLowering TLInfo;
+ MipsSelectionDAGInfo TSInfo;
+ MipsJITInfo JITInfo;
+ MipsELFWriterInfo ELFWriterInfo;
+
+public:
+ MipsTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
+ bool isLittle);
+
+ virtual ~MipsTargetMachine() { delete InstrInfo; }
+
+ virtual const MipsInstrInfo *getInstrInfo() const
+ { return InstrInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const
+ { return FrameLowering; }
+ virtual const MipsSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+ virtual const TargetData *getTargetData() const
+ { return &DataLayout;}
+ virtual MipsJITInfo *getJITInfo()
+ { return &JITInfo; }
+
+ virtual const MipsRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+
+ virtual const MipsTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const MipsELFWriterInfo *getELFWriterInfo() const {
+ return &ELFWriterInfo;
+ }
+
+ // Pass Pipeline Configuration
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
+};
+
+/// MipsebTargetMachine - Mips32/64 big endian target machine.
///
class MipsebTargetMachine : public MipsTargetMachine {
virtual void anchor();
@@ -83,7 +91,7 @@ public:
CodeGenOpt::Level OL);
};
-/// MipselTargetMachine - Mips32 little endian target machine.
+/// MipselTargetMachine - Mips32/64 little endian target machine.
///
class MipselTargetMachine : public MipsTargetMachine {
virtual void anchor();
@@ -94,29 +102,6 @@ public:
CodeGenOpt::Level OL);
};
-/// Mips64ebTargetMachine - Mips64 big endian target machine.
-///
-class Mips64ebTargetMachine : public MipsTargetMachine {
- virtual void anchor();
-public:
- Mips64ebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-};
-
-/// Mips64elTargetMachine - Mips64 little endian target machine.
-///
-class Mips64elTargetMachine : public MipsTargetMachine {
- virtual void anchor();
-public:
- Mips64elTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-};
} // End llvm namespace
#endif
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index f50f9b5..2a2abb1 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -337,7 +337,10 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
// can get a useful trip count. The trip count can
// be either a register or an immediate. The location
// of the value depends upon the type (reg or imm).
- while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
+ for (MachineRegisterInfo::reg_iterator
+ RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
+ RI != RE; ++RI) {
+ IV_Opnd = &RI.getOperand();
bool SignedCmp;
MachineInstr *MI = IV_Opnd->getParent();
if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 13250b3..61d44c5 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -106,7 +106,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// from FP_ROUND: that rounds to nearest, this rounds to zero.
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
- // We do not currently implment this libm ops for PowerPC.
+ // We do not currently implement these libm ops for PowerPC.
setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
@@ -394,8 +394,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
}
- if (Subtarget->has64BitSupport())
+ if (Subtarget->has64BitSupport()) {
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
+ }
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 91c5366..39778a5 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -265,6 +265,15 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
+let Pattern = [(set G8RC:$rT, readcyclecounter)] in
+def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
+ "mfspr $rT, 268", SprMFTB>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+// Note that encoding mftb using mfspr is now the preferred form,
+// and has been since at least ISA v2.03. The mftb instruction has
+// now been phased out. Using mfspr, however, is known not to work on
+// the POWER3.
+
let Defs = [X1], Uses = [X1] in
def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"",
[(set G8RC:$result,
diff --git a/lib/Target/PowerPC/TargetInfo/Makefile b/lib/Target/PowerPC/TargetInfo/Makefile
index a101aa4..2d0560d 100644
--- a/lib/Target/PowerPC/TargetInfo/Makefile
+++ b/lib/Target/PowerPC/TargetInfo/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
LIBRARYNAME = LLVMPowerPCInfo
# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+override CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index cbfa4cf..9c27f27 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2367,8 +2367,3 @@ unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; }
should fold to x > y.
//===---------------------------------------------------------------------===//
-
-int f(double x) { return __builtin_fabs(x) < 0.0; }
-should fold to false.
-
-//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 6357468..ff8d3c5 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -109,9 +109,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-void SparcRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
-
unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index ec95ad4..8e215a7 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -24,64 +24,72 @@ void TargetLibraryInfo::anchor() { }
const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
{
+ "__cxa_atexit",
+ "__cxa_guard_abort",
+ "__cxa_guard_acquire",
+ "__cxa_guard_release",
+ "__memcpy_chk",
"acos",
- "acosl",
"acosf",
+ "acosl",
"asin",
- "asinl",
"asinf",
+ "asinl",
"atan",
- "atanl",
- "atanf",
"atan2",
- "atan2l",
"atan2f",
+ "atan2l",
+ "atanf",
+ "atanl",
"ceil",
- "ceill",
"ceilf",
+ "ceill",
"copysign",
"copysignf",
"copysignl",
"cos",
- "cosl",
"cosf",
"cosh",
- "coshl",
"coshf",
+ "coshl",
+ "cosl",
"exp",
- "expl",
- "expf",
"exp2",
- "exp2l",
"exp2f",
+ "exp2l",
+ "expf",
+ "expl",
"expm1",
- "expm1l",
"expm1f",
+ "expm1l",
"fabs",
- "fabsl",
"fabsf",
+ "fabsl",
+ "fiprintf",
"floor",
- "floorl",
"floorf",
- "fiprintf",
+ "floorl",
"fmod",
- "fmodl",
"fmodf",
+ "fmodl",
+ "fputc",
"fputs",
"fwrite",
"iprintf",
"log",
- "logl",
- "logf",
- "log2",
- "log2l",
- "log2f",
"log10",
- "log10l",
"log10f",
+ "log10l",
"log1p",
- "log1pl",
"log1pf",
+ "log1pl",
+ "log2",
+ "log2f",
+ "log2l",
+ "logf",
+ "logl",
+ "memchr",
+ "memcmp",
"memcpy",
"memmove",
"memset",
@@ -92,6 +100,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"pow",
"powf",
"powl",
+ "putchar",
+ "puts",
"rint",
"rintf",
"rintl",
@@ -99,36 +109,48 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"roundf",
"roundl",
"sin",
- "sinl",
"sinf",
"sinh",
- "sinhl",
"sinhf",
+ "sinhl",
+ "sinl",
"siprintf",
"sqrt",
- "sqrtl",
"sqrtf",
+ "sqrtl",
+ "strcat",
+ "strchr",
+ "strcpy",
+ "strlen",
+ "strncat",
+ "strncmp",
+ "strncpy",
+ "strnlen",
"tan",
- "tanl",
"tanf",
"tanh",
- "tanhl",
"tanhf",
+ "tanhl",
+ "tanl",
"trunc",
"truncf",
- "truncl",
- "__cxa_atexit",
- "__cxa_guard_abort",
- "__cxa_guard_acquire",
- "__cxa_guard_release"
+ "truncl"
};
/// initialize - Initialize the set of available library functions based on the
/// specified target triple. This should be carefully written so that a missing
/// target triple gets a sane set of defaults.
-static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
+static void initialize(TargetLibraryInfo &TLI, const Triple &T,
+ const char **StandardNames) {
initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+#ifndef NDEBUG
+ // Verify that the StandardNames array is in alphabetical order.
+ for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) {
+ if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0)
+ llvm_unreachable("TargetLibraryInfo function names must be sorted");
+ }
+#endif // !NDEBUG
// memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later.
if (T.isMacOSX()) {
@@ -240,14 +262,14 @@ TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) {
// Default to everything being available.
memset(AvailableArray, -1, sizeof(AvailableArray));
- initialize(*this, Triple());
+ initialize(*this, Triple(), StandardNames);
}
TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
// Default to everything being available.
memset(AvailableArray, -1, sizeof(AvailableArray));
- initialize(*this, T);
+ initialize(*this, T, StandardNames);
}
TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
@@ -256,6 +278,17 @@ TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
CustomNames = TLI.CustomNames;
}
+bool TargetLibraryInfo::getLibFunc(StringRef funcName,
+ LibFunc::Func &F) const {
+ const char **Start = &StandardNames[0];
+ const char **End = &StandardNames[LibFunc::NumLibFuncs];
+ const char **I = std::lower_bound(Start, End, funcName);
+ if (I != End && *I == funcName) {
+ F = (LibFunc::Func)(I - Start);
+ return true;
+ }
+ return false;
+}
/// disableAllFunctions - This disables all builtins, which is used for options
/// like -fno-builtin.
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 95e83ec..73a0095 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -39,7 +39,9 @@ private:
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
bool Error(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
+ bool matchingInlineAsm = false) {
+ if (matchingInlineAsm) return true;
return Parser.Error(L, Msg, Ranges);
}
@@ -65,6 +67,12 @@ private:
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out);
+ bool MatchInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ SmallVectorImpl<MCInst> &MCInsts,
+ unsigned &OrigErrorInfo,
+ bool matchingInlineAsm = false);
+
/// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
/// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
bool isSrcOp(X86Operand &Op);
@@ -1508,9 +1516,24 @@ bool X86AsmParser::
MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out) {
+ SmallVector<MCInst, 2> Insts;
+ unsigned ErrorInfo;
+ bool Error = MatchInstruction(IDLoc, Operands, Insts, ErrorInfo);
+ if (!Error)
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i)
+ Out.EmitInstruction(Insts[i]);
+ return Error;
+}
+
+bool X86AsmParser::
+MatchInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ SmallVectorImpl<MCInst> &MCInsts, unsigned &OrigErrorInfo,
+ bool matchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+ ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
// First, handle aliases that expand to multiple instructions.
// FIXME: This should be replaced with a real .td file alias mechanism.
@@ -1523,7 +1546,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
MCInst Inst;
Inst.setOpcode(X86::WAIT);
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
+ MCInsts.push_back(Inst);
const char *Repl =
StringSwitch<const char*>(Op->getToken())
@@ -1542,7 +1565,6 @@ MatchAndEmitInstruction(SMLoc IDLoc,
}
bool WasOriginallyInvalidOperand = false;
- unsigned OrigErrorInfo;
MCInst Inst;
// First, try a direct match.
@@ -1557,13 +1579,15 @@ MatchAndEmitInstruction(SMLoc IDLoc,
;
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
+ MCInsts.push_back(Inst);
return false;
case Match_MissingFeature:
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled",
+ EmptyRanges, matchingInlineAsm);
return true;
case Match_ConversionFail:
- return Error(IDLoc, "unable to convert operands to instruction");
+ return Error(IDLoc, "unable to convert operands to instruction",
+ EmptyRanges, matchingInlineAsm);
case Match_InvalidOperand:
WasOriginallyInvalidOperand = true;
break;
@@ -1615,7 +1639,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
(Match3 == Match_Success) + (Match4 == Match_Success);
if (NumSuccessfulMatches == 1) {
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
+ MCInsts.push_back(Inst);
return false;
}
@@ -1642,7 +1666,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
OS << "'" << Base << MatchChars[i] << "'";
}
OS << ")";
- Error(IDLoc, OS.str());
+ Error(IDLoc, OS.str(), EmptyRanges, matchingInlineAsm);
return true;
}
@@ -1654,30 +1678,33 @@ MatchAndEmitInstruction(SMLoc IDLoc,
(Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
if (!WasOriginallyInvalidOperand) {
return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
- Op->getLocRange());
+ Op->getLocRange(), matchingInlineAsm);
}
// Recover location info for the operand if we know which was the problem.
if (OrigErrorInfo != ~0U) {
if (OrigErrorInfo >= Operands.size())
- return Error(IDLoc, "too few operands for instruction");
+ return Error(IDLoc, "too few operands for instruction",
+ EmptyRanges, matchingInlineAsm);
X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo];
if (Operand->getStartLoc().isValid()) {
SMRange OperandRange = Operand->getLocRange();
return Error(Operand->getStartLoc(), "invalid operand for instruction",
- OperandRange);
+ OperandRange, matchingInlineAsm);
}
}
- return Error(IDLoc, "invalid operand for instruction");
+ return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+ matchingInlineAsm);
}
// If one instruction matched with a missing feature, report this as a
// missing feature.
if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
(Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled",
+ EmptyRanges, matchingInlineAsm);
return true;
}
@@ -1685,12 +1712,14 @@ MatchAndEmitInstruction(SMLoc IDLoc,
// operand failure.
if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
(Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
- Error(IDLoc, "invalid operand for instruction");
+ Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+ matchingInlineAsm);
return true;
}
// If all of these were an outright failure, report it in a useless way.
- Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
+ Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
+ EmptyRanges, matchingInlineAsm);
return true;
}
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 4bbfe95..5039887 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -327,7 +327,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
if (type == TYPE_RELv) {
isBranch = true;
pcrel = insn.startLocation +
- insn.displacementOffset + insn.displacementSize;
+ insn.immediateOffset + insn.immediateSize;
switch (insn.displacementSize) {
default:
break;
@@ -762,8 +762,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
translateRegister(mcInst, insn.vvvv);
return false;
case ENCODING_DUP:
- return translateOperand(mcInst,
- insn.spec->operands[operand.type - TYPE_DUP0],
+ return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
insn, Dis);
}
}
@@ -789,8 +788,8 @@ static bool translateInstruction(MCInst &mcInst,
insn.numImmediatesTranslated = 0;
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
- if (insn.spec->operands[index].encoding != ENCODING_NONE) {
- if (translateOperand(mcInst, insn.spec->operands[index], insn, Dis)) {
+ if (insn.operands[index].encoding != ENCODING_NONE) {
+ if (translateOperand(mcInst, insn.operands[index], insn, Dis)) {
return true;
}
}
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index c11f51c..0dbfa26 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -20,7 +20,7 @@
// 2. Read the opcode, and determine what kind of opcode it is. The
// disassembler distinguishes four kinds of opcodes, which are enumerated in
// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
-// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
+// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
//
// 3. Depending on the opcode type, look in one of four ClassDecision structures
@@ -74,8 +74,8 @@
#ifndef X86DISASSEMBLER_H
#define X86DISASSEMBLER_H
-#define INSTRUCTION_SPECIFIER_FIELDS \
- const char* name;
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ uint16_t operands;
#define INSTRUCTION_IDS \
unsigned instructionIDs;
@@ -88,7 +88,7 @@
#include "llvm/MC/MCDisassembler.h"
namespace llvm {
-
+
class MCInst;
class MCInstrInfo;
class MCSubtargetInfo;
@@ -96,7 +96,7 @@ class MemoryObject;
class raw_ostream;
struct EDInstInfo;
-
+
namespace X86Disassembler {
/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 6020877..0c92912 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1495,14 +1495,14 @@ static int readOperands(struct InternalInstruction* insn) {
needVVVV = hasVVVV && (insn->vvvv != 0);
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
- switch (insn->spec->operands[index].encoding) {
+ switch (x86OperandSets[insn->spec->operands][index].encoding) {
case ENCODING_NONE:
break;
case ENCODING_REG:
case ENCODING_RM:
if (readModRM(insn))
return -1;
- if (fixupReg(insn, &insn->spec->operands[index]))
+ if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
return -1;
break;
case ENCODING_CB:
@@ -1524,14 +1524,14 @@ static int readOperands(struct InternalInstruction* insn) {
}
if (readImmediate(insn, 1))
return -1;
- if (insn->spec->operands[index].type == TYPE_IMM3 &&
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
insn->immediates[insn->numImmediatesConsumed - 1] > 7)
return -1;
- if (insn->spec->operands[index].type == TYPE_IMM5 &&
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
insn->immediates[insn->numImmediatesConsumed - 1] > 31)
return -1;
- if (insn->spec->operands[index].type == TYPE_XMM128 ||
- insn->spec->operands[index].type == TYPE_XMM256)
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
+ x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
sawRegImm = 1;
break;
case ENCODING_IW:
@@ -1582,7 +1582,7 @@ static int readOperands(struct InternalInstruction* insn) {
needVVVV = 0; /* Mark that we have found a VVVV operand. */
if (!hasVVVV)
return -1;
- if (fixupReg(insn, &insn->spec->operands[index]))
+ if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
return -1;
break;
case ENCODING_DUP:
@@ -1644,6 +1644,8 @@ int decodeInstruction(struct InternalInstruction* insn,
insn->instructionID == 0 ||
readOperands(insn))
return -1;
+
+ insn->operands = &x86OperandSets[insn->spec->operands][0];
insn->length = insn->readerCursor - insn->startLocation;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index e2caf6a..797703f 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -19,17 +19,18 @@
#ifdef __cplusplus
extern "C" {
#endif
-
-#define INSTRUCTION_SPECIFIER_FIELDS
+
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ uint16_t operands;
#define INSTRUCTION_IDS \
unsigned instructionIDs;
#include "X86DisassemblerDecoderCommon.h"
-
+
#undef INSTRUCTION_SPECIFIER_FIELDS
#undef INSTRUCTION_IDS
-
+
/*
* Accessor functions for various fields of an Intel instruction
*/
@@ -43,7 +44,7 @@ extern "C" {
#define rFromREX(rex) (((rex) & 0x4) >> 2)
#define xFromREX(rex) (((rex) & 0x2) >> 1)
#define bFromREX(rex) ((rex) & 0x1)
-
+
#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
@@ -237,7 +238,7 @@ extern "C" {
ENTRY(YMM13) \
ENTRY(YMM14) \
ENTRY(YMM15)
-
+
#define REGS_SEGMENT \
ENTRY(ES) \
ENTRY(CS) \
@@ -245,7 +246,7 @@ extern "C" {
ENTRY(DS) \
ENTRY(FS) \
ENTRY(GS)
-
+
#define REGS_DEBUG \
ENTRY(DR0) \
ENTRY(DR1) \
@@ -266,12 +267,12 @@ extern "C" {
ENTRY(CR6) \
ENTRY(CR7) \
ENTRY(CR8)
-
+
#define ALL_EA_BASES \
EA_BASES_16BIT \
EA_BASES_32BIT \
EA_BASES_64BIT
-
+
#define ALL_SIB_BASES \
REGS_32BIT \
REGS_64BIT
@@ -290,7 +291,7 @@ extern "C" {
ENTRY(RIP)
/*
- * EABase - All possible values of the base field for effective-address
+ * EABase - All possible values of the base field for effective-address
* computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We
* distinguish between bases (EA_BASE_*) and registers that just happen to be
* referred to when Mod == 0b11 (EA_REG_*).
@@ -305,8 +306,8 @@ typedef enum {
#undef ENTRY
EA_max
} EABase;
-
-/*
+
+/*
* SIBIndex - All possible values of the SIB index field.
* Borrows entries from ALL_EA_BASES with the special case that
* sib is synonymous with NONE.
@@ -321,7 +322,7 @@ typedef enum {
#undef ENTRY
SIB_INDEX_max
} SIBIndex;
-
+
/*
* SIBBase - All possible values of the SIB base field.
*/
@@ -353,7 +354,7 @@ typedef enum {
#undef ENTRY
MODRM_REG_max
} Reg;
-
+
/*
* SegmentOverride - All possible segment overrides.
*/
@@ -367,7 +368,7 @@ typedef enum {
SEG_OVERRIDE_GS,
SEG_OVERRIDE_max
} SegmentOverride;
-
+
/*
* VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
*/
@@ -431,16 +432,16 @@ struct InternalInstruction {
void* dlogArg;
/* General instruction information */
-
+
/* The mode to disassemble for (64-bit, protected, real) */
DisassemblerMode mode;
/* The start of the instruction, usable with the reader */
uint64_t startLocation;
/* The length of the instruction, in bytes */
size_t length;
-
+
/* Prefix state */
-
+
/* 1 if the prefix byte corresponding to the entry is present; 0 if not */
uint8_t prefixPresent[0x100];
/* contains the location (for use with the reader) of the prefix byte */
@@ -456,7 +457,7 @@ struct InternalInstruction {
uint64_t necessaryPrefixLocation;
/* The segment override type */
SegmentOverride segmentOverride;
-
+
/* Sizes of various critical pieces of data, in bytes */
uint8_t registerSize;
uint8_t addressSize;
@@ -467,9 +468,9 @@ struct InternalInstruction {
needed to find relocation entries for adding symbolic operands */
uint8_t displacementOffset;
uint8_t immediateOffset;
-
+
/* opcode state */
-
+
/* The value of the two-byte escape prefix (usually 0x0f) */
uint8_t twoByteEscape;
/* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
@@ -478,16 +479,16 @@ struct InternalInstruction {
uint8_t opcode;
/* The ModR/M byte of the instruction, if it is an opcode extension */
uint8_t modRMExtension;
-
+
/* decode state */
-
+
/* The type of opcode, used for indexing into the array of decode tables */
OpcodeType opcodeType;
/* The instruction ID, extracted from the decode table */
uint16_t instructionID;
/* The specifier for the instruction, from the instruction info table */
const struct InstructionSpecifier *spec;
-
+
/* state for additional bytes, consumed during operand decode. Pattern:
consumed___ indicates that the byte was already consumed and does not
need to be consumed again */
@@ -495,12 +496,12 @@ struct InternalInstruction {
/* The VEX.vvvv field, which contains a third register operand for some AVX
instructions */
Reg vvvv;
-
+
/* The ModR/M byte, which contains most register operands and some portion of
all memory operands */
BOOL consumedModRM;
uint8_t modRM;
-
+
/* The SIB byte, used for more complex 32- or 64-bit memory operands */
BOOL consumedSIB;
uint8_t sib;
@@ -508,19 +509,19 @@ struct InternalInstruction {
/* The displacement, used for memory operands */
BOOL consumedDisplacement;
int32_t displacement;
-
+
/* Immediates. There can be two in some cases */
uint8_t numImmediatesConsumed;
uint8_t numImmediatesTranslated;
uint64_t immediates[2];
-
+
/* A register or immediate operand encoded into the opcode */
BOOL consumedOpcodeModifier;
uint8_t opcodeModifier;
Reg opcodeRegister;
-
+
/* Portions of the ModR/M byte */
-
+
/* These fields determine the allowable values for the ModR/M fields, which
depend on operand and address widths */
EABase eaBaseBase;
@@ -533,11 +534,13 @@ struct InternalInstruction {
EADisplacement eaDisplacement;
/* The reg field always encodes a register */
Reg reg;
-
+
/* SIB state */
SIBIndex sibIndex;
uint8_t sibScale;
SIBBase sibBase;
+
+ const struct OperandSpecifier *operands;
};
/* decodeInstruction - Decode one instruction and store the decoding results in
@@ -571,15 +574,15 @@ int decodeInstruction(struct InternalInstruction* insn,
* @param line - The line number that printed the debug message.
* @param s - The message to print.
*/
-
+
void x86DisassemblerDebug(const char *file,
unsigned line,
const char *s);
const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii);
-#ifdef __cplusplus
+#ifdef __cplusplus
}
#endif
-
+
#endif
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 13e1136..b0a0e1e 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -119,7 +119,7 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize")
-#define ENUM_ENTRY(n, r, d) n,
+#define ENUM_ENTRY(n, r, d) n,
typedef enum {
INSTRUCTION_CONTEXTS
IC_max
@@ -148,11 +148,11 @@ typedef enum {
* If a ModR/M byte is not required, "required" is left unset, and the values
* for each instructionID are identical.
*/
-
+
typedef uint16_t InstrUID;
/*
- * ModRMDecisionType - describes the type of ModR/M decision, allowing the
+ * ModRMDecisionType - describes the type of ModR/M decision, allowing the
* consumer to determine the number of entries in it.
*
* MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
@@ -172,7 +172,7 @@ typedef uint16_t InstrUID;
ENUM_ENTRY(MODRM_SPLITREG) \
ENUM_ENTRY(MODRM_FULL)
-#define ENUM_ENTRY(n) n,
+#define ENUM_ENTRY(n) n,
typedef enum {
MODRMTYPES
MODRM_max
@@ -180,13 +180,13 @@ typedef enum {
#undef ENUM_ENTRY
/*
- * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
+ * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
* instruction each possible value of the ModR/M byte corresponds to. Once
* this information is known, we have narrowed down to a single instruction.
*/
struct ModRMDecision {
uint8_t modrm_type;
-
+
/* The macro below must be defined wherever this file is included. */
INSTRUCTION_IDS
};
@@ -210,7 +210,7 @@ struct ContextDecision {
struct OpcodeDecision opcodeDecisions[IC_max];
};
-/*
+/*
* Physical encodings of instruction operands.
*/
@@ -244,14 +244,14 @@ struct ContextDecision {
ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \
"in type")
-#define ENUM_ENTRY(n, d) n,
+#define ENUM_ENTRY(n, d) n,
typedef enum {
ENCODINGS
ENCODING_max
} OperandEncoding;
#undef ENUM_ENTRY
-/*
+/*
* Semantic interpretations of instruction operands.
*/
@@ -332,14 +332,14 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_DUP4, "operand 4") \
ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
-#define ENUM_ENTRY(n, d) n,
+#define ENUM_ENTRY(n, d) n,
typedef enum {
TYPES
TYPE_max
} OperandType;
#undef ENUM_ENTRY
-/*
+/*
* OperandSpecifier - The specification for how to extract and interpret one
* operand.
*/
@@ -374,8 +374,7 @@ typedef enum {
struct InstructionSpecifier {
uint8_t modifierType;
uint8_t modifierBase;
- struct OperandSpecifier operands[X86_MAX_OPERANDS];
-
+
/* The macro below must be defined wherever this file is included. */
INSTRUCTION_SPECIFIER_FIELDS
};
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 49c07f3..b0acd7d 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -91,9 +91,10 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
- // OpenBSD has buggy support for .quad in 32-bit mode, just split into two
- // .words.
- if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
+ // OpenBSD and Bitrig have buggy support for .quad in 32-bit mode, just split
+ // into two .words.
+ if ((T.getOS() == Triple::OpenBSD || T.getOS() == Triple::Bitrig) &&
+ T.getArch() == Triple::x86)
Data64bitsDirective = 0;
}
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index bf05ccf..dce5b4d 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -26,7 +26,7 @@ class FunctionPass;
class JITCodeEmitter;
class X86TargetMachine;
-/// createX86ISelDag - This pass converts a legalized DAG into a
+/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
FunctionPass *createX86ISelDag(X86TargetMachine &TM,
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 6c1a816..18e6b7c 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -17,14 +17,14 @@
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
-// X86 Subtarget state.
+// X86 Subtarget state
//
def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
"64-bit mode (x86_64)">;
//===----------------------------------------------------------------------===//
-// X86 Subtarget features.
+// X86 Subtarget features
//===----------------------------------------------------------------------===//
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
@@ -97,7 +97,7 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
[FeatureAVX, FeatureSSE4A]>;
def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
"Enable XOP instructions",
- [FeatureAVX, FeatureSSE4A]>;
+ [FeatureFMA4]>;
def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
"HasVectorUAMem", "true",
"Allow unaligned memory operands on vector/SIMD instructions">;
@@ -226,7 +226,7 @@ def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePCLMUL,
FeatureF16C, FeatureLZCNT,
- FeaturePOPCNT, FeatureBMI]>;
+ FeaturePOPCNT, FeatureBMI, FeatureFMA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
def : Proc<"winchip2", [Feature3DNow]>;
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index a6ed9ba..35386cd 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -37,15 +37,15 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
virtual const char *getPassName() const {
return "X86 AT&T-Style Assembly Printer";
}
-
+
const X86Subtarget &getSubtarget() const { return *Subtarget; }
virtual void EmitStartOfAsmFile(Module &M);
virtual void EmitEndOfAsmFile(Module &M);
-
+
virtual void EmitInstruction(const MachineInstr *MI);
-
+
void printSymbolOperand(const MachineOperand &MO, raw_ostream &O);
// These methods are used by the tablegen'erated instruction printer.
@@ -71,7 +71,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O);
bool runOnMachineFunction(MachineFunction &F);
-
+
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
index e01ff41..6a6125b 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -17,4 +17,3 @@ using namespace llvm;
X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
}
-
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index 0cec95a..471eb31 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -1,4 +1,4 @@
-//===-- X86COFFMachineModuleInfo.h - X86 COFF MMI Impl ----------*- C++ -*-===//
+//===-- X86coffmachinemoduleinfo.h - X86 COFF MMI Impl ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -33,7 +33,7 @@ public:
void addExternalFunction(MCSymbol* Symbol) {
Externals.insert(Symbol);
}
-
+
typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
externals_iterator externals_begin() const { return Externals.begin(); }
externals_iterator externals_end() const { return Externals.end(); }
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 585b7a5..e5952aa 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -57,7 +57,9 @@ class X86FastISel : public FastISel {
bool X86ScalarSSEf32;
public:
- explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
+ explicit X86FastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
X86ScalarSSEf64 = Subtarget->hasSSE2();
@@ -155,9 +157,9 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
// For now, require SSE/SSE2 for performing floating-point operations,
// since x87 requires additional work.
if (VT == MVT::f64 && !X86ScalarSSEf64)
- return false;
+ return false;
if (VT == MVT::f32 && !X86ScalarSSEf32)
- return false;
+ return false;
// Similarly, no f80 support yet.
if (VT == MVT::f80)
return false;
@@ -1516,6 +1518,22 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
return DoSelectCall(I, 0);
}
+static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
+ const ImmutableCallSite &CS) {
+ if (Subtarget.is64Bit())
+ return 0;
+ if (Subtarget.isTargetWindows())
+ return 0;
+ CallingConv::ID CC = CS.getCallingConv();
+ if (CC == CallingConv::Fast || CC == CallingConv::GHC)
+ return 0;
+ if (!CS.paramHasAttr(1, Attribute::StructRet))
+ return 0;
+ if (CS.paramHasAttr(1, Attribute::InReg))
+ return 0;
+ return 4;
+}
+
// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
const CallInst *CI = cast<CallInst>(I);
@@ -1862,12 +1880,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
- unsigned NumBytesCallee = 0;
- if (!Subtarget->is64Bit() && !Subtarget->isTargetWindows() &&
- !(CS.getCallingConv() == CallingConv::Fast ||
- CS.getCallingConv() == CallingConv::GHC) &&
- CS.paramHasAttr(1, Attribute::StructRet))
- NumBytesCallee = 4;
+ const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
.addImm(NumBytes).addImm(NumBytesCallee);
@@ -2129,28 +2142,28 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
switch (VT.SimpleTy) {
- default: return false;
- case MVT::f32:
- if (X86ScalarSSEf32) {
- Opc = X86::FsFLD0SS;
- RC = &X86::FR32RegClass;
- } else {
- Opc = X86::LD_Fp032;
- RC = &X86::RFP32RegClass;
- }
- break;
- case MVT::f64:
- if (X86ScalarSSEf64) {
- Opc = X86::FsFLD0SD;
- RC = &X86::FR64RegClass;
- } else {
- Opc = X86::LD_Fp064;
- RC = &X86::RFP64RegClass;
- }
- break;
- case MVT::f80:
- // No f80 support yet.
- return false;
+ default: return false;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = X86::FsFLD0SS;
+ RC = &X86::FR32RegClass;
+ } else {
+ Opc = X86::LD_Fp032;
+ RC = &X86::RFP32RegClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = X86::FsFLD0SD;
+ RC = &X86::FR64RegClass;
+ } else {
+ Opc = X86::LD_Fp064;
+ RC = &X86::RFP64RegClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
}
unsigned ResultReg = createResultReg(RC);
@@ -2169,7 +2182,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
if (!X86SelectAddress(LI->getOperand(0), AM))
return false;
- X86InstrInfo &XII = (X86InstrInfo&)TII;
+ const X86InstrInfo &XII = (const X86InstrInfo&)TII;
unsigned Size = TD.getTypeAllocSize(LI->getType());
unsigned Alignment = LI->getAlignment();
@@ -2188,7 +2201,8 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
namespace llvm {
- FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
- return new X86FastISel(funcInfo);
+ FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
+ return new X86FastISel(funcInfo, libInfo);
}
}
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 711ee41..955c75a 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -971,7 +971,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
// Change from the pseudo instruction to the concrete instruction.
MI->RemoveOperand(0); // Remove the explicit ST(0) operand
MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
-
+
// Result gets pushed on the stack.
pushReg(DestReg);
}
@@ -1015,7 +1015,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
} else {
moveToTop(Reg, I); // Move to the top of the stack...
}
-
+
// Convert from the pseudo instruction to the concrete instruction.
MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand
MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
@@ -1297,7 +1297,7 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
MI->RemoveOperand(1);
MI->getOperand(0).setReg(getSTReg(Op1));
MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
-
+
// If we kill the second operand, make sure to pop it from the stack.
if (Op0 != Op1 && KillsOp1) {
// Get this value off of the register stack.
@@ -1714,38 +1714,38 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// Assert that the top of stack contains the right FP register.
assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
"Top of stack not the right register for RET!");
-
+
// Ok, everything is good, mark the value as not being on the stack
// anymore so that our assertion about the stack being empty at end of
// block doesn't fire.
StackTop = 0;
return;
}
-
+
// Otherwise, we are returning two values:
// 2) If returning the same value for both, we only have one thing in the FP
// stack. Consider: RET FP1, FP1
if (StackTop == 1) {
assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
"Stack misconfiguration for RET!");
-
+
// Duplicate the TOS so that we return it twice. Just pick some other FPx
// register to hold it.
unsigned NewReg = getScratchReg();
duplicateToTop(FirstFPRegOp, NewReg, MI);
FirstFPRegOp = NewReg;
}
-
+
/// Okay we know we have two different FPx operands now:
assert(StackTop == 2 && "Must have two values live!");
-
+
/// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
/// in ST(1). In this case, emit an fxch.
if (getStackEntry(0) == SecondFPRegOp) {
assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
moveToTop(FirstFPRegOp, MI);
}
-
+
/// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
/// ST(1). Just remove both from our understanding of the stack and return.
assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5186482..27195b4 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -60,7 +60,7 @@ namespace {
int Base_FrameIndex;
unsigned Scale;
- SDValue IndexReg;
+ SDValue IndexReg;
int32_t Disp;
SDValue Segment;
const GlobalValue *GV;
@@ -80,11 +80,11 @@ namespace {
bool hasSymbolicDisplacement() const {
return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
}
-
+
bool hasBaseOrIndexReg() const {
return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
}
-
+
/// isRIPRelative - Return true if this addressing mode is already RIP
/// relative.
bool isRIPRelative() const {
@@ -94,7 +94,7 @@ namespace {
return RegNode->getReg() == X86::RIP;
return false;
}
-
+
void setBaseReg(SDValue Reg) {
BaseType = RegBase;
Base_Reg = Reg;
@@ -104,7 +104,7 @@ namespace {
dbgs() << "X86ISelAddressMode " << this << '\n';
dbgs() << "Base_Reg ";
if (Base_Reg.getNode() != 0)
- Base_Reg.getNode()->dump();
+ Base_Reg.getNode()->dump();
else
dbgs() << "nul";
dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
@@ -113,7 +113,7 @@ namespace {
if (IndexReg.getNode() != 0)
IndexReg.getNode()->dump();
else
- dbgs() << "nul";
+ dbgs() << "nul";
dbgs() << " Disp " << Disp << '\n'
<< "GV ";
if (GV)
@@ -213,21 +213,21 @@ namespace {
SDValue &Index, SDValue &Disp,
SDValue &Segment,
SDValue &NodeWithChain);
-
+
bool TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment);
-
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps);
-
+
void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
- inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
+ inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
@@ -426,7 +426,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
void X86DAGToDAGISel::PreprocessISelDAG() {
// OptForSize is used in pattern predicates that isel is matching.
OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
-
+
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
@@ -462,7 +462,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
++NumLoadMoved;
continue;
}
-
+
// Lower fpround and fpextend nodes that target the FP stack to be store and
// load to the stack. This is a gross hack. We would like to simply mark
// these as being illegal, but when we do that, legalize produces these when
@@ -473,7 +473,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// FIXME: This should only happen when not compiled with -O0.
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
continue;
-
+
EVT SrcVT = N->getOperand(0).getValueType();
EVT DstVT = N->getValueType(0);
@@ -496,7 +496,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (N->getConstantOperandVal(1))
continue;
}
-
+
// Here we could have an FP stack truncation or an FPStack <-> SSE convert.
// FPStack has extload and truncstore. SSE can fold direct loads into other
// operations. Based on this, decide what we want to do.
@@ -505,10 +505,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
else
MemVT = SrcIsSSE ? SrcVT : DstVT;
-
+
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
DebugLoc dl = N->getDebugLoc();
-
+
// FIXME: optimize the case where the src/dest is a load or store?
SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
N->getOperand(0),
@@ -524,12 +524,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// To avoid invalidating 'I', back it up to the convert node.
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
-
+
// Now that we did that, the node is dead. Increment the iterator to the
// next node to process, then delete N.
++I;
CurDAG->DeleteNode(N);
- }
+ }
}
@@ -584,7 +584,7 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
SDValue Address = N->getOperand(1);
-
+
// load gs:0 -> GS segment register.
// load fs:0 -> FS segment register.
//
@@ -593,7 +593,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
// For more information see http://people.redhat.com/drepper/tls.pdf
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
- Subtarget->isTargetELF())
+ Subtarget->isTargetLinux())
switch (N->getPointerInfo().getAddrSpace()) {
case 256:
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
@@ -602,7 +602,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
return false;
}
-
+
return true;
}
@@ -992,7 +992,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
case ISD::SHL:
if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
break;
-
+
if (ConstantSDNode
*CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
unsigned Val = CN->getZExtValue();
@@ -1167,7 +1167,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
return false;
AM = Backup;
-
+
// Try again after commuting the operands.
if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
!MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
@@ -1203,7 +1203,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
AM = Backup;
}
break;
-
+
case ISD::AND: {
// Perform some heroic transforms on an and of a constant-count shift
// with a constant to enable use of the scaled offset field.
@@ -1275,7 +1275,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
X86ISelAddressMode AM;
-
+
if (Parent &&
// This list of opcodes are all the nodes that have an "addr:$ptr" operand
// that are not a MemSDNode, and thus don't have proper addrspace info.
@@ -1290,7 +1290,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
if (AddrSpace == 257)
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
}
-
+
if (MatchAddress(N, AM))
return false;
@@ -1336,7 +1336,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
// elements. This is a vector shuffle from the zero vector.
if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
// Check to see if the top elements are all zeros (or bitcast of zeros).
- N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
N.getOperand(0).getNode()->hasOneUse() &&
ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
N.getOperand(0).getOperand(0).hasOneUse() &&
@@ -1411,7 +1411,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
// If it isn't worth using an LEA, reject it.
if (Complexity <= 2)
return false;
-
+
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -1422,7 +1422,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Disp, SDValue &Segment) {
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
-
+
X86ISelAddressMode AM;
AM.GV = GA->getGlobal();
AM.Disp += GA->getOffset();
@@ -1435,7 +1435,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
} else {
AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
}
-
+
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -1449,7 +1449,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
!IsProfitableToFold(N, P, P) ||
!IsLegalToFold(N, P, P, OptLevel))
return false;
-
+
return SelectAddr(N.getNode(),
N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
@@ -1700,7 +1700,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
if (Node->hasAnyUseOfValue(0))
return 0;
-
+
// Optimize common patterns for __sync_or_and_fetch and similar arith
// operations where the result is not used. This allows us to use the "lock"
// version of the arithmetic instruction.
@@ -1727,14 +1727,14 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
default:
return 0;
}
-
+
bool isCN = false;
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) {
isCN = true;
Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
}
-
+
unsigned Opc = 0;
switch (NVT.getSimpleVT().SimpleTy) {
default: return 0;
@@ -1772,7 +1772,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
}
break;
}
-
+
assert(Opc != 0 && "Invalid arith lock transform!");
DebugLoc dl = Node->getDebugLoc();
@@ -1852,7 +1852,7 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
/// is suitable for doing the {load; increment or decrement; store} to modify
/// transformation.
-static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
+static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
SDValue StoredVal, SelectionDAG *CurDAG,
LoadSDNode* &LoadNode, SDValue &InputChain) {
@@ -1876,15 +1876,15 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
// Return LoadNode by reference.
LoadNode = cast<LoadSDNode>(Load);
// is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
- EVT LdVT = LoadNode->getMemoryVT();
- if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
+ EVT LdVT = LoadNode->getMemoryVT();
+ if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
LdVT != MVT::i8)
return false;
// Is store the only read of the loaded value?
if (!Load.hasOneUse())
return false;
-
+
// Is the address of the store the same as the load?
if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
LoadNode->getOffset() != StoreNode->getOffset())
@@ -1990,7 +1990,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
unsigned Opc, MOpc;
unsigned Opcode = Node->getOpcode();
DebugLoc dl = Node->getDebugLoc();
-
+
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
if (Node->isMachineOpcode()) {
@@ -2062,7 +2062,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::ATOMSWAP64_DAG: {
unsigned Opc;
switch (Opcode) {
- default: llvm_unreachable("Impossible intrinsic");
+ default: llvm_unreachable("Impossible opcode");
case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
@@ -2119,7 +2119,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
break;
- unsigned ShlOp, Op = 0;
+ unsigned ShlOp, Op;
EVT CstVT = NVT;
// Check the minimum bitwidth for the new constant.
@@ -2142,6 +2142,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ShlOp = X86::SHL32ri;
switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
case ISD::AND: Op = X86::AND32ri8; break;
case ISD::OR: Op = X86::OR32ri8; break;
case ISD::XOR: Op = X86::XOR32ri8; break;
@@ -2152,6 +2153,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ShlOp = X86::SHL64ri;
switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
@@ -2168,7 +2170,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
-
+
unsigned LoReg;
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
@@ -2177,20 +2179,20 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
}
-
+
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
-
+
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
SDValue Ops[] = {N1, InFlag};
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
-
+
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
return NULL;
}
-
+
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
SDValue N0 = Node->getOperand(0);
@@ -2287,7 +2289,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
-
+
return NULL;
}
@@ -2438,7 +2440,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return NULL;
}
- case X86ISD::CMP: {
+ case X86ISD::CMP:
+ case X86ISD::SUB: {
+ // Sometimes a SUB is used to perform comparison.
+ if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
+ // This node is not a CMP.
+ break;
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
@@ -2555,7 +2562,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// a simple increment or decrement through memory of that value, if the
// uses of the modified value and its address are suitable.
// The DEC64m tablegen pattern is currently not able to match the case where
- // the EFLAGS on the original DEC are used. (This also applies to
+ // the EFLAGS on the original DEC are used. (This also applies to
// {INC,DEC}X{64,32,16,8}.)
// We'll need to improve tablegen to allow flags to be transferred from a
// node in the pattern to the result node. probably with a new keyword
@@ -2587,7 +2594,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
MemOp[0] = StoreNode->getMemOperand();
MemOp[1] = LoadNode->getMemOperand();
const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
- EVT LdVT = LoadNode->getMemoryVT();
+ EVT LdVT = LoadNode->getMemoryVT();
unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
Node->getDebugLoc(),
@@ -2600,6 +2607,85 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return Result;
}
+
+ // FIXME: Custom handling because TableGen doesn't support multiple implicit
+ // defs in an instruction pattern
+ case X86ISD::PCMPESTRI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ SDValue N3 = Node->getOperand(3);
+ SDValue N4 = Node->getOperand(4);
+
+ // Make sure last argument is a constant
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4);
+ if (!Cst)
+ break;
+
+ uint64_t Imm = Cst->getZExtValue();
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+ X86::EAX, N1, SDValue()).getValue(1);
+ InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
+ N3, InFlag).getValue(1);
+
+ SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag };
+ unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr :
+ X86::PCMPESTRIrr;
+ InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
+ array_lengthof(Ops)), 0);
+
+ if (!SDValue(Node, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::ECX, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 0), Result);
+ }
+ if (!SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::EFLAGS, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 1), Result);
+ }
+
+ return NULL;
+ }
+
+ // FIXME: Custom handling because TableGen doesn't support multiple implicit
+ // defs in an instruction pattern
+ case X86ISD::PCMPISTRI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+
+ // Make sure last argument is a constant
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2);
+ if (!Cst)
+ break;
+
+ uint64_t Imm = Cst->getZExtValue();
+
+ SDValue Ops[] = { N0, N1, getI8Imm(Imm) };
+ unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr :
+ X86::PCMPISTRIrr;
+ SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
+ array_lengthof(Ops)), 0);
+
+ if (!SDValue(Node, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::ECX, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 0), Result);
+ }
+ if (!SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::EFLAGS, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 1), Result);
+ }
+
+ return NULL;
+ }
}
SDNode *ResNode = SelectCode(Node);
@@ -2627,7 +2713,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
return true;
break;
}
-
+
OutOps.push_back(Op0);
OutOps.push_back(Op1);
OutOps.push_back(Op2);
@@ -2636,7 +2722,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
return false;
}
-/// createX86ISelDag - This pass converts a legalized DAG into a
+/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b88f2fa..7954170 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -66,7 +66,7 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, DebugLoc dl) {
EVT VT = Vec.getValueType();
- assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+ assert(VT.is256BitVector() && "Unexpected vector size!");
EVT ElVT = VT.getVectorElementType();
unsigned Factor = VT.getSizeInBits()/128;
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
@@ -105,7 +105,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
return Result;
EVT VT = Vec.getValueType();
- assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+ assert(VT.is128BitVector() && "Unexpected vector size!");
EVT ElVT = VT.getVectorElementType();
EVT ResultVT = Result.getValueType();
@@ -174,7 +174,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// For 64-bit since we have so many registers use the ILP scheduler, for
// 32-bit code use the register pressure specific scheduling.
// For Atom, always use ILP scheduling.
- if (Subtarget->isAtom())
+ if (Subtarget->isAtom())
setSchedulingPreference(Sched::ILP);
else if (Subtarget->is64Bit())
setSchedulingPreference(Sched::ILP);
@@ -731,6 +731,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FMA, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
@@ -828,7 +829,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
- setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
@@ -869,27 +869,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
-
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
- EVT VT = (MVT::SimpleValueType)i;
+ MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to custom lower non-power-of-2 vectors
if (!isPowerOf2_32(VT.getVectorNumElements()))
continue;
// Do not attempt to custom lower non-128-bit vectors
if (!VT.is128BitVector())
continue;
- setOperationAction(ISD::BUILD_VECTOR,
- VT.getSimpleVT().SimpleTy, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE,
- VT.getSimpleVT().SimpleTy, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT,
- VT.getSimpleVT().SimpleTy, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
@@ -906,23 +897,22 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
- MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
- EVT VT = SVT;
+ MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to promote non-128-bit vectors
if (!VT.is128BitVector())
continue;
- setOperationAction(ISD::AND, SVT, Promote);
- AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
- setOperationAction(ISD::OR, SVT, Promote);
- AddPromotedToType (ISD::OR, SVT, MVT::v2i64);
- setOperationAction(ISD::XOR, SVT, Promote);
- AddPromotedToType (ISD::XOR, SVT, MVT::v2i64);
- setOperationAction(ISD::LOAD, SVT, Promote);
- AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64);
- setOperationAction(ISD::SELECT, SVT, Promote);
- AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64);
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v2i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v2i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
}
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -1009,9 +999,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
}
- if (Subtarget->hasSSE42())
- setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
-
if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
@@ -1042,13 +1029,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom);
-
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
setOperationAction(ISD::SRL, MVT::v32i8, Custom);
@@ -1072,6 +1052,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
+ if (Subtarget->hasFMA()) {
+ setOperationAction(ISD::FMA, MVT::v8f32, Custom);
+ setOperationAction(ISD::FMA, MVT::v4f64, Custom);
+ setOperationAction(ISD::FMA, MVT::v4f32, Custom);
+ setOperationAction(ISD::FMA, MVT::v2f64, Custom);
+ setOperationAction(ISD::FMA, MVT::f32, Custom);
+ setOperationAction(ISD::FMA, MVT::f64, Custom);
+ }
+
if (Subtarget->hasAVX2()) {
setOperationAction(ISD::ADD, MVT::v4i64, Legal);
setOperationAction(ISD::ADD, MVT::v8i32, Legal);
@@ -1125,45 +1114,44 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Custom lower several nodes for 256-bit types.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
- EVT VT = SVT;
+ MVT VT = (MVT::SimpleValueType)i;
// Extract subvector is special because the value type
// (result) is 128-bit but the source is 256-bit wide.
if (VT.is128BitVector())
- setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Do not attempt to custom lower other non-256-bit vectors
if (!VT.is256BitVector())
continue;
- setOperationAction(ISD::BUILD_VECTOR, SVT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
}
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
- MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
- EVT VT = SVT;
+ MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to promote non-256-bit vectors
if (!VT.is256BitVector())
continue;
- setOperationAction(ISD::AND, SVT, Promote);
- AddPromotedToType (ISD::AND, SVT, MVT::v4i64);
- setOperationAction(ISD::OR, SVT, Promote);
- AddPromotedToType (ISD::OR, SVT, MVT::v4i64);
- setOperationAction(ISD::XOR, SVT, Promote);
- AddPromotedToType (ISD::XOR, SVT, MVT::v4i64);
- setOperationAction(ISD::LOAD, SVT, Promote);
- AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64);
- setOperationAction(ISD::SELECT, SVT, Promote);
- AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v4i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v4i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
}
}
@@ -1221,6 +1209,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
+ setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
@@ -1718,21 +1707,37 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
/// CallIsStructReturn - Determines whether a call uses struct return
/// semantics.
-static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+enum StructReturnType {
+ NotStructReturn,
+ RegStructReturn,
+ StackStructReturn
+};
+static StructReturnType
+callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
if (Outs.empty())
- return false;
+ return NotStructReturn;
- return Outs[0].Flags.isSRet();
+ const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
+ if (!Flags.isSRet())
+ return NotStructReturn;
+ if (Flags.isInReg())
+ return RegStructReturn;
+ return StackStructReturn;
}
/// ArgsAreStructReturn - Determines whether a function uses struct
/// return semantics.
-static bool
-ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+static StructReturnType
+argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
if (Ins.empty())
- return false;
+ return NotStructReturn;
- return Ins[0].Flags.isSRet();
+ const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
+ if (!Flags.isSRet())
+ return NotStructReturn;
+ if (Flags.isInReg())
+ return RegStructReturn;
+ return StackStructReturn;
}
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
@@ -1876,9 +1881,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
RC = &X86::FR32RegClass;
else if (RegVT == MVT::f64)
RC = &X86::FR64RegClass;
- else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
+ else if (RegVT.is256BitVector())
RC = &X86::VR256RegClass;
- else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
+ else if (RegVT.is128BitVector())
RC = &X86::VR128RegClass;
else if (RegVT == MVT::x86mmx)
RC = &X86::VR64RegClass;
@@ -2073,7 +2078,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
- ArgsAreStructReturn(Ins))
+ argsAreStructReturn(Ins) == StackStructReturn)
FuncInfo->setBytesToPopOnReturn(4);
}
@@ -2163,7 +2168,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isTargetWin64();
bool IsWindows = Subtarget->isTargetWindows();
- bool IsStructRet = CallIsStructReturn(Outs);
+ StructReturnType SR = callIsStructReturn(Outs);
bool IsSibcall = false;
if (MF.getTarget().Options.DisableTailCalls)
@@ -2172,8 +2177,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
- Outs, OutVals, Ins, DAG);
+ isVarArg, SR != NotStructReturn,
+ MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
@@ -2255,7 +2261,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::AExt:
- if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
+ if (RegVT.is128BitVector()) {
// Special case: passing MMX values in XMM registers.
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
@@ -2549,7 +2555,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
getTargetMachine().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
- IsStructRet)
+ SR == StackStructReturn)
// If this is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
@@ -2870,8 +2876,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
FastISel *
-X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
- return X86::createFastISel(funcInfo);
+X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const {
+ return X86::createFastISel(funcInfo, libInfo);
}
@@ -3397,11 +3404,11 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX,
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
- unsigned NumElems = VT.getVectorNumElements();
-
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
+ unsigned NumElems = VT.getVectorNumElements();
+
if (NumElems != 4)
return false;
@@ -3416,11 +3423,11 @@ static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3>
static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
- unsigned NumElems = VT.getVectorNumElements();
-
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
+ unsigned NumElems = VT.getVectorNumElements();
+
if (NumElems != 4)
return false;
@@ -3433,7 +3440,7 @@ static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
@@ -3455,10 +3462,12 @@ static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
+ if (!VT.is128BitVector())
+ return false;
+
unsigned NumElems = VT.getVectorNumElements();
- if ((NumElems != 2 && NumElems != 4)
- || VT.getSizeInBits() > 128)
+ if (NumElems != 2 && NumElems != 4)
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
@@ -3675,7 +3684,7 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
if (VT.getVectorElementType().getSizeInBits() < 32)
return false;
- if (VT.getSizeInBits() == 256)
+ if (!VT.is128BitVector())
return false;
unsigned NumElts = VT.getVectorNumElements();
@@ -3697,7 +3706,7 @@ static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- if (!HasAVX || VT.getSizeInBits() != 256)
+ if (!HasAVX || !VT.is256BitVector())
return false;
// The shuffle result is divided into half A and half B. In total the two
@@ -3789,9 +3798,10 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
/// element of vector 2 and the other elements to come from vector 1 in order.
static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
bool V2IsSplat = false, bool V2IsUndef = false) {
- unsigned NumOps = VT.getVectorNumElements();
- if (VT.getSizeInBits() == 256)
+ if (!VT.is128BitVector())
return false;
+
+ unsigned NumOps = VT.getVectorNumElements();
if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
return false;
@@ -3857,9 +3867,11 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// version of MOVDDUP.
static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- unsigned NumElts = VT.getVectorNumElements();
+ if (!HasAVX || !VT.is256BitVector())
+ return false;
- if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4)
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts != 4)
return false;
for (unsigned i = 0; i != NumElts/2; ++i)
@@ -3875,7 +3887,7 @@ static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
/// specifies a shuffle of elements that is suitable for input to 128-bit
/// version of MOVDDUP.
static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
unsigned e = VT.getVectorNumElements() / 2;
@@ -4120,7 +4132,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
if (VT.getVectorNumElements() != 4)
return false;
@@ -4177,7 +4189,7 @@ static bool WillBeConstantPoolLoad(SDNode *N) {
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
@@ -4719,7 +4731,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
// Although the logic below support any bitwidth size, there are no
// shift instructions which handle more than 128-bit vectors.
- if (SVOp->getValueType(0).getSizeInBits() > 128)
+ if (!SVOp->getValueType(0).is128BitVector())
return false;
if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
@@ -4814,7 +4826,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI, DebugLoc dl) {
- assert(VT.getSizeInBits() == 128 && "Unknown type for VShift");
+ assert(VT.is128BitVector() && "Unknown type for VShift");
EVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
@@ -5047,7 +5059,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
}
}
- bool Is256 = VT.getSizeInBits() == 256;
+ bool Is256 = VT.is256BitVector();
// Handle the broadcasting a single constant scalar from the constant pool
// into a vector. On Sandybridge it is still better to load a constant vector
@@ -5102,6 +5114,86 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
return SDValue();
}
+// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
+// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
+// constraint of matching input/output vector elements.
+SDValue
+X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ SDNode *N = Op.getNode();
+ EVT VT = Op.getValueType();
+ unsigned NumElts = Op.getNumOperands();
+
+ // Check supported types and sub-targets.
+ //
+ // Only v2f32 -> v2f64 needs special handling.
+ if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
+ return SDValue();
+
+ SDValue VecIn;
+ EVT VecInVT;
+ SmallVector<int, 8> Mask;
+ EVT SrcVT = MVT::Other;
+
+ // Check the patterns could be translated into X86vfpext.
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue In = N->getOperand(i);
+ unsigned Opcode = In.getOpcode();
+
+ // Skip if the element is undefined.
+ if (Opcode == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // Quit if one of the elements is not defined from 'fpext'.
+ if (Opcode != ISD::FP_EXTEND)
+ return SDValue();
+
+ // Check how the source of 'fpext' is defined.
+ SDValue L2In = In.getOperand(0);
+ EVT L2InVT = L2In.getValueType();
+
+ // Check the original type
+ if (SrcVT == MVT::Other)
+ SrcVT = L2InVT;
+ else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
+ return SDValue();
+
+ // Check whether the value being 'fpext'ed is extracted from the same
+ // source.
+ Opcode = L2In.getOpcode();
+
+ // Quit if it's not extracted with a constant index.
+ if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(L2In.getOperand(1)))
+ return SDValue();
+
+ SDValue ExtractedFromVec = L2In.getOperand(0);
+
+ if (VecIn.getNode() == 0) {
+ VecIn = ExtractedFromVec;
+ VecInVT = ExtractedFromVec.getValueType();
+ } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
+ return SDValue();
+
+ Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
+ }
+
+ // Quit if all operands of BUILD_VECTOR are undefined.
+ if (!VecIn.getNode())
+ return SDValue();
+
+ // Fill the remaining mask as undef.
+ for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
+ Mask.push_back(-1);
+
+ return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+ DAG.getVectorShuffle(VecInVT, DL,
+ VecIn, DAG.getUNDEF(VecInVT),
+ &Mask[0]));
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
@@ -5134,6 +5226,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (Broadcast.getNode())
return Broadcast;
+ SDValue FpExt = LowerVectorFpExtend(Op, DAG);
+ if (FpExt.getNode())
+ return FpExt;
+
unsigned EVTBits = ExtVT.getSizeInBits();
unsigned NumZero = 0;
@@ -5209,12 +5305,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
}
- assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
@@ -5223,11 +5319,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
} else {
- assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
@@ -5287,7 +5383,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// For AVX-length vectors, build the individual 128-bit pieces and use
// shuffles to put them in place.
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SmallVector<SDValue, 32> V;
for (unsigned i = 0; i != NumElems; ++i)
V.push_back(Op.getOperand(i));
@@ -5368,7 +5464,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
}
- if (Values.size() > 1 && VT.getSizeInBits() == 128) {
+ if (Values.size() > 1 && VT.is128BitVector()) {
// Check for a build vector of consecutive loads.
for (unsigned i = 0; i < NumElems; ++i)
V[i] = Op.getOperand(i);
@@ -5429,39 +5525,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-// LowerMMXCONCAT_VECTORS - We support concatenate two MMX registers and place
-// them in a MMX register. This is better than doing a stack convert.
-static SDValue LowerMMXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
- DebugLoc dl = Op.getDebugLoc();
- EVT ResVT = Op.getValueType();
-
- assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
- ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
- int Mask[2];
- SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0));
- SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
- InVec = Op.getOperand(1);
- if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- unsigned NumElts = ResVT.getVectorNumElements();
- VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
- InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
- } else {
- InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec);
- SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
- Mask[0] = 0; Mask[1] = 2;
- VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
- }
- return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
-}
-
// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
// to create 256-bit vectors from two other 128-bit ones.
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
EVT ResVT = Op.getValueType();
- assert(ResVT.getSizeInBits() == 256 && "Value type must be 256-bit wide");
+ assert(ResVT.is256BitVector() && "Value type must be 256-bit wide");
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -5472,16 +5542,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
SDValue
X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
- EVT ResVT = Op.getValueType();
-
assert(Op.getNumOperands() == 2);
- assert((ResVT.getSizeInBits() == 128 || ResVT.getSizeInBits() == 256) &&
- "Unsupported CONCAT_VECTORS for value type");
-
- // We support concatenate two MMX registers and place them in a MMX register.
- // This is better than doing a stack convert.
- if (ResVT.is128BitVector())
- return LowerMMXCONCAT_VECTORS(Op, DAG);
// 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors
// from two other 128-bit ones.
@@ -6131,7 +6192,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
DebugLoc dl = SVOp->getDebugLoc();
EVT VT = SVOp->getValueType(0);
- assert(VT.getSizeInBits() == 128 && "Unsupported vector size");
+ assert(VT.is128BitVector() && "Unsupported vector size");
std::pair<int, int> Locs[4];
int Mask1[] = { -1, -1, -1, -1 };
@@ -6759,7 +6820,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// Handle all 128-bit wide vectors with 4 elements, and match them with
// several different shuffle types.
- if (NumElems == 4 && VT.getSizeInBits() == 128)
+ if (NumElems == 4 && VT.is128BitVector())
return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG);
// Handle general 256-bit shuffles
@@ -6775,7 +6836,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
- if (Op.getOperand(0).getValueType().getSizeInBits() != 128)
+ if (!Op.getOperand(0).getValueType().is128BitVector())
return SDValue();
if (VT.getSizeInBits() == 8) {
@@ -6845,7 +6906,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// If this is a 256-bit vector result, first extract the 128-bit vector and
// then extract the element from the 128-bit vector.
- if (VecVT.getSizeInBits() == 256) {
+ if (VecVT.is256BitVector()) {
DebugLoc dl = Op.getNode()->getDebugLoc();
unsigned NumElems = VecVT.getVectorNumElements();
SDValue Idx = Op.getOperand(1);
@@ -6860,7 +6921,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
DAG.getConstant(IdxVal, MVT::i32));
}
- assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
+ assert(VecVT.is128BitVector() && "Unexpected vector length");
if (Subtarget->hasSSE41()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
@@ -6936,7 +6997,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
- if (VT.getSizeInBits() == 256)
+ if (!VT.is128BitVector())
return SDValue();
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
@@ -6992,7 +7053,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
// If this is a 256-bit vector result, first extract the 128-bit vector,
// insert the element into the extracted half and then place it back.
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
if (!isa<ConstantSDNode>(N2))
return SDValue();
@@ -7036,7 +7097,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If this is a 256-bit vector result, first insert into a 128-bit
// vector and then insert into the 256-bit vector.
- if (OpVT.getSizeInBits() > 128) {
+ if (!OpVT.is128BitVector()) {
// Insert into a 128-bit vector.
EVT VT128 = EVT::getVectorVT(*Context,
OpVT.getVectorElementType(),
@@ -7053,7 +7114,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
- assert(OpVT.getSizeInBits() == 128 && "Expected an SSE type!");
+ assert(OpVT.is128BitVector() && "Expected an SSE type!");
return DAG.getNode(ISD::BITCAST, dl, OpVT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
}
@@ -7068,8 +7129,8 @@ X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
SDValue Vec = Op.getNode()->getOperand(0);
SDValue Idx = Op.getNode()->getOperand(1);
- if (Op.getNode()->getValueType(0).getSizeInBits() == 128 &&
- Vec.getNode()->getValueType(0).getSizeInBits() == 256 &&
+ if (Op.getNode()->getValueType(0).is128BitVector() &&
+ Vec.getNode()->getValueType(0).is256BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Extract128BitVector(Vec, IdxVal, DAG, dl);
@@ -7089,8 +7150,8 @@ X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
SDValue SubVec = Op.getNode()->getOperand(1);
SDValue Idx = Op.getNode()->getOperand(2);
- if (Op.getNode()->getValueType(0).getSizeInBits() == 256 &&
- SubVec.getNode()->getValueType(0).getSizeInBits() == 128 &&
+ if (Op.getNode()->getValueType(0).is256BitVector() &&
+ SubVec.getNode()->getValueType(0).is128BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
@@ -7735,9 +7796,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U }
subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 }
#ifdef __SSE3__
- haddpd %xmm0, %xmm0
+ haddpd %xmm0, %xmm0
#else
- pshufd $0x4e, %xmm0, %xmm1
+ pshufd $0x4e, %xmm0, %xmm1
addpd %xmm1, %xmm0
#endif
*/
@@ -8064,7 +8125,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op,
EltVT = VT.getVectorElementType();
Constant *C;
if (EltVT == MVT::f64) {
- C = ConstantVector::getSplat(2,
+ C = ConstantVector::getSplat(2,
ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
} else {
C = ConstantVector::getSplat(4,
@@ -8098,7 +8159,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
if (VT.isVector()) {
- MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64;
+ MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::XOR, dl, XORVT,
DAG.getNode(ISD::BITCAST, dl, XORVT,
@@ -8226,7 +8287,33 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
unsigned Opcode = 0;
unsigned NumOperands = 0;
- switch (Op.getNode()->getOpcode()) {
+
+ // Truncate operations may prevent the merge of the SETCC instruction
+ // and the arithmetic intruction before it. Attempt to truncate the operands
+ // of the arithmetic instruction and use a reduced bit-width instruction.
+ bool NeedTruncation = false;
+ SDValue ArithOp = Op;
+ if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
+ SDValue Arith = Op->getOperand(0);
+ // Both the trunc and the arithmetic op need to have one user each.
+ if (Arith->hasOneUse())
+ switch (Arith.getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ NeedTruncation = true;
+ ArithOp = Arith;
+ }
+ }
+ }
+
+ // NOTICE: In the code below we use ArithOp to hold the arithmetic operation
+ // which may be the result of a CAST. We use the variable 'Op', which is the
+ // non-casted variable when we check for possible users.
+ switch (ArithOp.getOpcode()) {
case ISD::ADD:
// Due to an isel shortcoming, be conservative if this add is likely to be
// selected as part of a load-modify-store instruction. When the root node
@@ -8246,7 +8333,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
goto default_case;
if (ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+ dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
// An add of one will be selected as an INC.
if (C->getAPIntValue() == 1) {
Opcode = X86ISD::INC;
@@ -8282,7 +8369,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
if (User->getOpcode() != ISD::BRCOND &&
User->getOpcode() != ISD::SETCC &&
- (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+ !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) {
NonFlagUse = true;
break;
}
@@ -8303,15 +8390,9 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
goto default_case;
// Otherwise use a regular EFLAGS-setting instruction.
- switch (Op.getNode()->getOpcode()) {
+ switch (ArithOp.getOpcode()) {
default: llvm_unreachable("unexpected operator!");
- case ISD::SUB:
- // If the only use of SUB is EFLAGS, use CMP instead.
- if (Op.hasOneUse())
- Opcode = X86ISD::CMP;
- else
- Opcode = X86ISD::SUB;
- break;
+ case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::OR: Opcode = X86ISD::OR; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
@@ -8332,19 +8413,40 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
break;
}
+ // If we found that truncation is beneficial, perform the truncation and
+ // update 'Op'.
+ if (NeedTruncation) {
+ EVT VT = Op.getValueType();
+ SDValue WideVal = Op->getOperand(0);
+ EVT WideVT = WideVal.getValueType();
+ unsigned ConvertedOp = 0;
+ // Use a target machine opcode to prevent further DAGCombine
+ // optimizations that may separate the arithmetic operations
+ // from the setcc node.
+ switch (WideVal.getOpcode()) {
+ default: break;
+ case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
+ case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
+ case ISD::AND: ConvertedOp = X86ISD::AND; break;
+ case ISD::OR: ConvertedOp = X86ISD::OR; break;
+ case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
+ }
+
+ if (ConvertedOp) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
+ SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
+ SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
+ Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1);
+ }
+ }
+ }
+
if (Opcode == 0)
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, Op.getValueType()));
- if (Opcode == X86ISD::CMP) {
- SDValue New = DAG.getNode(Opcode, dl, MVT::i32, Op.getOperand(0),
- Op.getOperand(1));
- // We can't replace usage of SUB with CMP.
- // The SUB node will be removed later because there is no use of it.
- return SDValue(New.getNode(), 0);
- }
-
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0; i != NumOperands; ++i)
@@ -8364,6 +8466,14 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
return EmitTest(Op0, X86CC, DAG);
DebugLoc dl = Op0.getDebugLoc();
+ if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
+ Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
+ // Use SUB instead of CMP to enable CSE between SUB and CMP.
+ SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
+ SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
+ Op0, Op1);
+ return SDValue(Sub.getNode(), 1);
+ }
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}
@@ -8522,7 +8632,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- assert(VT.getSizeInBits() == 256 && Op.getOpcode() == ISD::SETCC &&
+ assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
"Unsupported value type for operation");
unsigned NumElems = VT.getVectorNumElements();
@@ -8559,10 +8669,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
if (isFP) {
- unsigned SSECC = 8;
+#ifndef NDEBUG
EVT EltVT = Op0.getValueType().getVectorElementType();
- assert(EltVT == MVT::f32 || EltVT == MVT::f64); (void)EltVT;
+ assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+#endif
+ unsigned SSECC;
bool Swap = false;
// SSE Condition code mapping:
@@ -8575,7 +8687,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
// 6 - NLE
// 7 - ORD
switch (SetCCOpcode) {
- default: break;
+ default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETOEQ:
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETOGT:
@@ -8589,34 +8701,33 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETUO: SSECC = 3; break;
case ISD::SETUNE:
case ISD::SETNE: SSECC = 4; break;
- case ISD::SETULE: Swap = true;
+ case ISD::SETULE: Swap = true; // Fallthrough
case ISD::SETUGE: SSECC = 5; break;
- case ISD::SETULT: Swap = true;
+ case ISD::SETULT: Swap = true; // Fallthrough
case ISD::SETUGT: SSECC = 6; break;
case ISD::SETO: SSECC = 7; break;
+ case ISD::SETUEQ:
+ case ISD::SETONE: SSECC = 8; break;
}
if (Swap)
std::swap(Op0, Op1);
// In the two special cases we can't handle, emit two comparisons.
if (SSECC == 8) {
+ unsigned CC0, CC1;
+ unsigned CombineOpc;
if (SetCCOpcode == ISD::SETUEQ) {
- SDValue UNORD, EQ;
- UNORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(3, MVT::i8));
- EQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(0, MVT::i8));
- return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
- }
- if (SetCCOpcode == ISD::SETONE) {
- SDValue ORD, NEQ;
- ORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(7, MVT::i8));
- NEQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(4, MVT::i8));
- return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
+ CC0 = 3; CC1 = 0; CombineOpc = ISD::OR;
+ } else {
+ assert(SetCCOpcode == ISD::SETONE);
+ CC0 = 7; CC1 = 4; CombineOpc = ISD::AND;
}
- llvm_unreachable("Illegal FP comparison");
+
+ SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(CC0, MVT::i8));
+ SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(CC1, MVT::i8));
+ return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
}
// Handle all other FP comparisons here.
return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
@@ -8624,17 +8735,17 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
}
// Break 256-bit integer vector compare into smaller ones.
- if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
+ if (VT.is256BitVector() && !Subtarget->hasAVX2())
return Lower256IntVSETCC(Op, DAG);
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
- unsigned Opc = 0;
+ unsigned Opc;
bool Swap = false, Invert = false, FlipSigns = false;
switch (SetCCOpcode) {
- default: break;
+ default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: Invert = true;
case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
case ISD::SETLT: Swap = true;
@@ -8651,10 +8762,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
// Check that the operation in question is available (most are plain SSE2,
// but PCMPGTQ and PCMPEQQ have different requirements).
- if (Opc == X86ISD::PCMPGT && VT == MVT::v2i64 && !Subtarget->hasSSE42())
- return SDValue();
- if (Opc == X86ISD::PCMPEQ && VT == MVT::v2i64 && !Subtarget->hasSSE41())
- return SDValue();
+ if (VT == MVT::v2i64) {
+ if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
+ return SDValue();
+ if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41())
+ return SDValue();
+ }
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
@@ -8714,6 +8827,16 @@ static bool isAllOnes(SDValue V) {
return C && C->isAllOnesValue();
}
+static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
+ if (V.getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ SDValue VOp0 = V.getOperand(0);
+ unsigned InBits = VOp0.getValueSizeInBits();
+ unsigned Bits = V.getValueSizeInBits();
+ return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
+}
+
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
bool addTest = true;
SDValue Cond = Op.getOperand(0);
@@ -8728,46 +8851,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Cond = NewCond;
}
- // Handle the following cases related to max and min:
- // (a > b) ? (a-b) : 0
- // (a >= b) ? (a-b) : 0
- // (b < a) ? (a-b) : 0
- // (b <= a) ? (a-b) : 0
- // Comparison is removed to use EFLAGS from SUB.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2))
- if (Cond.getOpcode() == X86ISD::SETCC &&
- Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
- (Op1.getOpcode() == ISD::SUB || Op1.getOpcode() == X86ISD::SUB) &&
- C->getAPIntValue() == 0) {
- SDValue Cmp = Cond.getOperand(1);
- unsigned CC = cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
- if ((DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(0)) &&
- DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(1)) &&
- (CC == X86::COND_G || CC == X86::COND_GE ||
- CC == X86::COND_A || CC == X86::COND_AE)) ||
- (DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(1)) &&
- DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(0)) &&
- (CC == X86::COND_L || CC == X86::COND_LE ||
- CC == X86::COND_B || CC == X86::COND_BE))) {
-
- if (Op1.getOpcode() == ISD::SUB) {
- SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i32);
- SDValue New = DAG.getNode(X86ISD::SUB, DL, VTs,
- Op1.getOperand(0), Op1.getOperand(1));
- DAG.ReplaceAllUsesWith(Op1, New);
- Op1 = New;
- }
-
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
- unsigned NewCC = (CC == X86::COND_G || CC == X86::COND_GE ||
- CC == X86::COND_L ||
- CC == X86::COND_LE) ? X86::COND_GE : X86::COND_AE;
- SDValue Ops[] = { Op2, Op1, DAG.getConstant(NewCC, MVT::i8),
- SDValue(Op1.getNode(), 1) };
- return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
- }
- }
-
// (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
// (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
@@ -8788,11 +8871,11 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// (select (x != 0), -1, 0) -> neg & sbb
// (select (x == 0), 0, -1) -> neg & sbb
if (ConstantSDNode *YC = dyn_cast<ConstantSDNode>(Y))
- if (YC->isNullValue() &&
+ if (YC->isNullValue() &&
(isAllOnes(Op1) == (CondCode == X86::COND_NE))) {
SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
- SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
- DAG.getConstant(0, CmpOp0.getValueType()),
+ SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
+ DAG.getConstant(0, CmpOp0.getValueType()),
CmpOp0);
SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, MVT::i8),
@@ -8883,9 +8966,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
if (addTest) {
- // Look pass the truncate.
- if (Cond.getOpcode() == ISD::TRUNCATE)
- Cond = Cond.getOperand(0);
+ // Look pass the truncate if the high bits are known zero.
+ if (isTruncWithZeroHighBitsInput(Cond, DAG))
+ Cond = Cond.getOperand(0);
// We know the result of AND is compared against zero. Try to match
// it to BT.
@@ -8908,7 +8991,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// a < b ? 0 : -1 -> RES = setcc_carry
// a >= b ? -1 : 0 -> RES = setcc_carry
// a >= b ? 0 : -1 -> RES = ~setcc_carry
- if (Cond.getOpcode() == X86ISD::CMP) {
+ if (Cond.getOpcode() == X86ISD::SUB) {
Cond = ConvertCmpIfNecessary(Cond, DAG);
unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
@@ -9192,9 +9275,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
}
if (addTest) {
- // Look pass the truncate.
- if (Cond.getOpcode() == ISD::TRUNCATE)
- Cond = Cond.getOperand(0);
+ // Look pass the truncate if the high bits are known zero.
+ if (isTruncWithZeroHighBitsInput(Cond, DAG))
+ Cond = Cond.getOperand(0);
// We know the result of AND is compared against zero. Try to match
// it to BT.
@@ -9459,8 +9542,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
SDValue ShOps[4];
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, MVT::i32);
- ShOps[2] = DAG.getUNDEF(MVT::i32);
- ShOps[3] = DAG.getUNDEF(MVT::i32);
+ ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32);
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
// The return type has to be a 128-bit type with the same element
@@ -9503,8 +9585,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_sse2_ucomigt_sd:
case Intrinsic::x86_sse2_ucomige_sd:
case Intrinsic::x86_sse2_ucomineq_sd: {
- unsigned Opc = 0;
- ISD::CondCode CC = ISD::SETCC_INVALID;
+ unsigned Opc;
+ ISD::CondCode CC;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse_comieq_ss:
@@ -9578,55 +9660,102 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+
// Arithmetic intrinsics.
case Intrinsic::x86_sse2_pmulu_dq:
case Intrinsic::x86_avx2_pmulu_dq:
return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
+ // SSE3/AVX horizontal add/sub intrinsics
case Intrinsic::x86_sse3_hadd_ps:
case Intrinsic::x86_sse3_hadd_pd:
case Intrinsic::x86_avx_hadd_ps_256:
case Intrinsic::x86_avx_hadd_pd_256:
- return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse3_hsub_ps:
case Intrinsic::x86_sse3_hsub_pd:
case Intrinsic::x86_avx_hsub_ps_256:
case Intrinsic::x86_avx_hsub_pd_256:
- return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_phadd_w_128:
case Intrinsic::x86_ssse3_phadd_d_128:
case Intrinsic::x86_avx2_phadd_w:
case Intrinsic::x86_avx2_phadd_d:
- return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_phsub_w_128:
case Intrinsic::x86_ssse3_phsub_d_128:
case Intrinsic::x86_avx2_phsub_w:
- case Intrinsic::x86_avx2_phsub_d:
- return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_phsub_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse3_hadd_ps:
+ case Intrinsic::x86_sse3_hadd_pd:
+ case Intrinsic::x86_avx_hadd_ps_256:
+ case Intrinsic::x86_avx_hadd_pd_256:
+ Opcode = X86ISD::FHADD;
+ break;
+ case Intrinsic::x86_sse3_hsub_ps:
+ case Intrinsic::x86_sse3_hsub_pd:
+ case Intrinsic::x86_avx_hsub_ps_256:
+ case Intrinsic::x86_avx_hsub_pd_256:
+ Opcode = X86ISD::FHSUB;
+ break;
+ case Intrinsic::x86_ssse3_phadd_w_128:
+ case Intrinsic::x86_ssse3_phadd_d_128:
+ case Intrinsic::x86_avx2_phadd_w:
+ case Intrinsic::x86_avx2_phadd_d:
+ Opcode = X86ISD::HADD;
+ break;
+ case Intrinsic::x86_ssse3_phsub_w_128:
+ case Intrinsic::x86_ssse3_phsub_d_128:
+ case Intrinsic::x86_avx2_phsub_w:
+ case Intrinsic::x86_avx2_phsub_d:
+ Opcode = X86ISD::HSUB;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // AVX2 variable shift intrinsics
case Intrinsic::x86_avx2_psllv_d:
case Intrinsic::x86_avx2_psllv_q:
case Intrinsic::x86_avx2_psllv_d_256:
case Intrinsic::x86_avx2_psllv_q_256:
- return DAG.getNode(ISD::SHL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_avx2_psrlv_d:
case Intrinsic::x86_avx2_psrlv_q:
case Intrinsic::x86_avx2_psrlv_d_256:
case Intrinsic::x86_avx2_psrlv_q_256:
- return DAG.getNode(ISD::SRL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_avx2_psrav_d:
- case Intrinsic::x86_avx2_psrav_d_256:
- return DAG.getNode(ISD::SRA, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_avx2_psrav_d_256: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ Opcode = ISD::SHL;
+ break;
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ Opcode = ISD::SRL;
+ break;
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256:
+ Opcode = ISD::SRA;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
case Intrinsic::x86_ssse3_pshuf_b_128:
case Intrinsic::x86_avx2_pshuf_b:
return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_ssse3_psign_b_128:
case Intrinsic::x86_ssse3_psign_w_128:
case Intrinsic::x86_ssse3_psign_d_128:
@@ -9635,15 +9764,18 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx2_psign_d:
return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_sse41_insertps:
return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
case Intrinsic::x86_avx_vperm2f128_ps_256:
case Intrinsic::x86_avx_vperm2f128_pd_256:
case Intrinsic::x86_avx_vperm2f128_si_256:
case Intrinsic::x86_avx2_vperm2i128:
return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
@@ -9673,7 +9805,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx_vtestc_pd_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: {
bool IsTestPacked = false;
- unsigned X86CC = 0;
+ unsigned X86CC;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
case Intrinsic::x86_avx_vtestz_ps:
@@ -9724,44 +9856,93 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx2_psll_w:
case Intrinsic::x86_avx2_psll_d:
case Intrinsic::x86_avx2_psll_q:
- return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_psrl_w:
case Intrinsic::x86_sse2_psrl_d:
case Intrinsic::x86_sse2_psrl_q:
case Intrinsic::x86_avx2_psrl_w:
case Intrinsic::x86_avx2_psrl_d:
case Intrinsic::x86_avx2_psrl_q:
- return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_psra_w:
case Intrinsic::x86_sse2_psra_d:
case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx2_psra_d:
- return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_psra_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ Opcode = X86ISD::VSHL;
+ break;
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ Opcode = X86ISD::VSRL;
+ break;
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ Opcode = X86ISD::VSRA;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // SSE/AVX immediate shift intrinsics
case Intrinsic::x86_sse2_pslli_w:
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
case Intrinsic::x86_avx2_pslli_w:
case Intrinsic::x86_avx2_pslli_d:
case Intrinsic::x86_avx2_pslli_q:
- return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrli_w:
case Intrinsic::x86_sse2_psrli_d:
case Intrinsic::x86_sse2_psrli_q:
case Intrinsic::x86_avx2_psrli_w:
case Intrinsic::x86_avx2_psrli_d:
case Intrinsic::x86_avx2_psrli_q:
- return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d:
case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
- return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_psrai_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ Opcode = X86ISD::VSHLI;
+ break;
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ Opcode = X86ISD::VSRLI;
+ break;
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ Opcode = X86ISD::VSRAI;
+ break;
+ }
+ return getTargetVShiftNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), DAG);
+ }
+
// Fix vector shift instructions where the last operand is a non-immediate
// i32 value.
case Intrinsic::x86_mmx_pslli_w:
@@ -9776,8 +9957,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
if (isa<ConstantSDNode>(ShAmt))
return SDValue();
- unsigned NewIntNo = 0;
+ unsigned NewIntNo;
switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_mmx_pslli_w:
NewIntNo = Intrinsic::x86_mmx_psll_w;
break;
@@ -9802,7 +9984,6 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_mmx_psrai_d:
NewIntNo = Intrinsic::x86_mmx_psra_d;
break;
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
}
// The vector shift intrinsics with scalars uses 32b shift amounts but
@@ -9818,6 +9999,84 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(NewIntNo, MVT::i32),
Op.getOperand(1), ShAmt);
}
+ case Intrinsic::x86_sse42_pcmpistria128:
+ case Intrinsic::x86_sse42_pcmpestria128:
+ case Intrinsic::x86_sse42_pcmpistric128:
+ case Intrinsic::x86_sse42_pcmpestric128:
+ case Intrinsic::x86_sse42_pcmpistrio128:
+ case Intrinsic::x86_sse42_pcmpestrio128:
+ case Intrinsic::x86_sse42_pcmpistris128:
+ case Intrinsic::x86_sse42_pcmpestris128:
+ case Intrinsic::x86_sse42_pcmpistriz128:
+ case Intrinsic::x86_sse42_pcmpestriz128: {
+ unsigned Opcode;
+ unsigned X86CC;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse42_pcmpistria128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_A;
+ break;
+ case Intrinsic::x86_sse42_pcmpestria128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_A;
+ break;
+ case Intrinsic::x86_sse42_pcmpistric128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_sse42_pcmpestric128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_sse42_pcmpistrio128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_O;
+ break;
+ case Intrinsic::x86_sse42_pcmpestrio128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_O;
+ break;
+ case Intrinsic::x86_sse42_pcmpistris128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_S;
+ break;
+ case Intrinsic::x86_sse42_pcmpestris128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_S;
+ break;
+ case Intrinsic::x86_sse42_pcmpistriz128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_E;
+ break;
+ case Intrinsic::x86_sse42_pcmpestriz128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_E;
+ break;
+ }
+ SmallVector<SDValue, 5> NewOps;
+ NewOps.append(Op->op_begin()+1, Op->op_end());
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8),
+ SDValue(PCMP.getNode(), 1));
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+
+ case Intrinsic::x86_sse42_pcmpistri128:
+ case Intrinsic::x86_sse42_pcmpestri128: {
+ unsigned Opcode;
+ if (IntNo == Intrinsic::x86_sse42_pcmpistri128)
+ Opcode = X86ISD::PCMPISTRI;
+ else
+ Opcode = X86ISD::PCMPESTRI;
+
+ SmallVector<SDValue, 5> NewOps;
+ NewOps.append(Op->op_begin()+1, Op->op_end());
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
+ }
}
}
@@ -10231,7 +10490,7 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- assert(VT.getSizeInBits() == 256 && VT.isInteger() &&
+ assert(VT.is256BitVector() && VT.isInteger() &&
"Unsupported value type for operation");
unsigned NumElems = VT.getVectorNumElements();
@@ -10256,14 +10515,14 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
}
SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getValueType().getSizeInBits() == 256 &&
+ assert(Op.getValueType().is256BitVector() &&
Op.getValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
}
SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getValueType().getSizeInBits() == 256 &&
+ assert(Op.getValueType().is256BitVector() &&
Op.getValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
@@ -10273,7 +10532,7 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
+ if (VT.is256BitVector() && !Subtarget->hasAVX2())
return Lower256IntArith(Op, DAG);
assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
@@ -10503,7 +10762,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
}
// Decompose 256-bit shifts into smaller 128-bit shifts.
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
unsigned NumElems = VT.getVectorNumElements();
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
@@ -10992,9 +11251,9 @@ static void ReplaceATOMIC_LOAD(SDNode *Node,
Results.push_back(Swap.getValue(1));
}
-void X86TargetLowering::
+static void
ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG, unsigned NewOp) const {
+ SelectionDAG &DAG, unsigned NewOp) {
DebugLoc dl = Node->getDebugLoc();
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
@@ -11092,7 +11351,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Regs64bit ? X86::RBX : X86::EBX,
swapInL, cpInH.getValue(1));
swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl,
- Regs64bit ? X86::RCX : X86::ECX,
+ Regs64bit ? X86::RCX : X86::ECX,
swapInH, swapInL.getValue(1));
SDValue Ops[] = { swapInH.getValue(0),
N->getOperand(1),
@@ -11115,26 +11374,40 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::ATOMIC_LOAD_ADD:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
- return;
case ISD::ATOMIC_LOAD_AND:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_NAND:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_OR:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
- return;
case ISD::ATOMIC_LOAD_SUB:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
- return;
case ISD::ATOMIC_LOAD_XOR:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
- return;
- case ISD::ATOMIC_SWAP:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+ case ISD::ATOMIC_SWAP: {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode");
+ case ISD::ATOMIC_LOAD_ADD:
+ Opc = X86ISD::ATOMADD64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ Opc = X86ISD::ATOMAND64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ Opc = X86ISD::ATOMNAND64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ Opc = X86ISD::ATOMOR64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ Opc = X86ISD::ATOMSUB64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ Opc = X86ISD::ATOMXOR64_DAG;
+ break;
+ case ISD::ATOMIC_SWAP:
+ Opc = X86ISD::ATOMSWAP64_DAG;
+ break;
+ }
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
return;
+ }
case ISD::ATOMIC_LOAD:
ReplaceATOMIC_LOAD(N, Results, DAG);
}
@@ -11194,6 +11467,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
+ case X86ISD::FMAXC: return "X86ISD::FMAXC";
+ case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
@@ -11212,7 +11487,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
+ case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
@@ -11273,6 +11550,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL";
case X86ISD::SAHF: return "X86ISD::SAHF";
case X86ISD::RDRAND: return "X86ISD::RDRAND";
+ case X86ISD::FMADD: return "X86ISD::FMADD";
+ case X86ISD::FMSUB: return "X86ISD::FMSUB";
+ case X86ISD::FNMADD: return "X86ISD::FNMADD";
+ case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
+ case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
+ case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
}
}
@@ -11408,7 +11691,7 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
// FIXME: This collection of masks seems suspect.
if (NumElts == 2)
return true;
- if (NumElts == 4 && VT.getSizeInBits() == 128) {
+ if (NumElts == 4 && VT.is128BitVector()) {
return (isMOVLMask(Mask, VT) ||
isCommutedMOVLMask(Mask, VT, true) ||
isSHUFPMask(Mask, VT, Subtarget->hasAVX()) ||
@@ -11834,8 +12117,7 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
MIB.addOperand(Op);
}
BuildMI(*BB, MI, dl,
- TII->get(Subtarget->hasAVX() ? X86::VMOVAPSrr : X86::MOVAPSrr),
- MI->getOperand(0).getReg())
+ TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
.addReg(X86::XMM0);
MI->eraseFromParent();
@@ -11868,24 +12150,6 @@ X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
}
MachineBasicBlock *
-X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
- DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- // First arg in ECX, the second in EAX.
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
- .addReg(MI->getOperand(0).getReg());
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
- .addReg(MI->getOperand(1).getReg());
-
- // The instruction doesn't actually take any operands though.
- BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
-
- MI->eraseFromParent(); // The pseudo is gone now.
- return BB;
-}
-
-MachineBasicBlock *
X86TargetLowering::EmitVAARG64WithCustomInserter(
MachineInstr *MI,
MachineBasicBlock *MBB) const {
@@ -12675,185 +12939,208 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// String/text processing lowering.
case X86::PCMPISTRM128REG:
case X86::VPCMPISTRM128REG:
- return EmitPCMP(MI, BB, 3, false /* in-mem */);
case X86::PCMPISTRM128MEM:
case X86::VPCMPISTRM128MEM:
- return EmitPCMP(MI, BB, 3, true /* in-mem */);
case X86::PCMPESTRM128REG:
case X86::VPCMPESTRM128REG:
- return EmitPCMP(MI, BB, 5, false /* in mem */);
case X86::PCMPESTRM128MEM:
- case X86::VPCMPESTRM128MEM:
- return EmitPCMP(MI, BB, 5, true /* in mem */);
+ case X86::VPCMPESTRM128MEM: {
+ unsigned NumArgs;
+ bool MemArg;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::PCMPISTRM128REG:
+ case X86::VPCMPISTRM128REG:
+ NumArgs = 3; MemArg = false; break;
+ case X86::PCMPISTRM128MEM:
+ case X86::VPCMPISTRM128MEM:
+ NumArgs = 3; MemArg = true; break;
+ case X86::PCMPESTRM128REG:
+ case X86::VPCMPESTRM128REG:
+ NumArgs = 5; MemArg = false; break;
+ case X86::PCMPESTRM128MEM:
+ case X86::VPCMPESTRM128MEM:
+ NumArgs = 5; MemArg = true; break;
+ }
+ return EmitPCMP(MI, BB, NumArgs, MemArg);
+ }
// Thread synchronization.
case X86::MONITOR:
return EmitMonitor(MI, BB);
- case X86::MWAIT:
- return EmitMwait(MI, BB);
// Atomic Lowering.
- case X86::ATOMAND32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
- X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass);
- case X86::ATOMOR32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
- X86::OR32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass);
- case X86::ATOMXOR32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
- X86::XOR32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass);
- case X86::ATOMNAND32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
- X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass, true);
case X86::ATOMMIN32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
case X86::ATOMMAX32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
case X86::ATOMUMIN32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
case X86::ATOMUMAX32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
+ case X86::ATOMMIN16:
+ case X86::ATOMMAX16:
+ case X86::ATOMUMIN16:
+ case X86::ATOMUMAX16:
+ case X86::ATOMMIN64:
+ case X86::ATOMMAX64:
+ case X86::ATOMUMIN64:
+ case X86::ATOMUMAX64: {
+ unsigned Opc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMMIN32: Opc = X86::CMOVL32rr; break;
+ case X86::ATOMMAX32: Opc = X86::CMOVG32rr; break;
+ case X86::ATOMUMIN32: Opc = X86::CMOVB32rr; break;
+ case X86::ATOMUMAX32: Opc = X86::CMOVA32rr; break;
+ case X86::ATOMMIN16: Opc = X86::CMOVL16rr; break;
+ case X86::ATOMMAX16: Opc = X86::CMOVG16rr; break;
+ case X86::ATOMUMIN16: Opc = X86::CMOVB16rr; break;
+ case X86::ATOMUMAX16: Opc = X86::CMOVA16rr; break;
+ case X86::ATOMMIN64: Opc = X86::CMOVL64rr; break;
+ case X86::ATOMMAX64: Opc = X86::CMOVG64rr; break;
+ case X86::ATOMUMIN64: Opc = X86::CMOVB64rr; break;
+ case X86::ATOMUMAX64: Opc = X86::CMOVA64rr; break;
+ // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+ }
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, Opc);
+ }
+
+ case X86::ATOMAND32:
+ case X86::ATOMOR32:
+ case X86::ATOMXOR32:
+ case X86::ATOMNAND32: {
+ bool Invert = false;
+ unsigned RegOpc, ImmOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND32:
+ RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; break;
+ case X86::ATOMOR32:
+ RegOpc = X86::OR32rr; ImmOpc = X86::OR32ri; break;
+ case X86::ATOMXOR32:
+ RegOpc = X86::XOR32rr; ImmOpc = X86::XOR32ri; break;
+ case X86::ATOMNAND32:
+ RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; Invert = true; break;
+ }
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+ X86::MOV32rm, X86::LCMPXCHG32,
+ X86::NOT32r, X86::EAX,
+ &X86::GR32RegClass, Invert);
+ }
case X86::ATOMAND16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
- X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass);
case X86::ATOMOR16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
- X86::OR16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass);
case X86::ATOMXOR16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
- X86::XOR16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass);
- case X86::ATOMNAND16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
- X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
+ case X86::ATOMNAND16: {
+ bool Invert = false;
+ unsigned RegOpc, ImmOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND16:
+ RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; break;
+ case X86::ATOMOR16:
+ RegOpc = X86::OR16rr; ImmOpc = X86::OR16ri; break;
+ case X86::ATOMXOR16:
+ RegOpc = X86::XOR16rr; ImmOpc = X86::XOR16ri; break;
+ case X86::ATOMNAND16:
+ RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; Invert = true; break;
+ }
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+ X86::MOV16rm, X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
- &X86::GR16RegClass, true);
- case X86::ATOMMIN16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
- case X86::ATOMMAX16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr);
- case X86::ATOMUMIN16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr);
- case X86::ATOMUMAX16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr);
+ &X86::GR16RegClass, Invert);
+ }
case X86::ATOMAND8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
- X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass);
case X86::ATOMOR8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
- X86::OR8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass);
case X86::ATOMXOR8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
- X86::XOR8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass);
- case X86::ATOMNAND8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
- X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
+ case X86::ATOMNAND8: {
+ bool Invert = false;
+ unsigned RegOpc, ImmOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND8:
+ RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; break;
+ case X86::ATOMOR8:
+ RegOpc = X86::OR8rr; ImmOpc = X86::OR8ri; break;
+ case X86::ATOMXOR8:
+ RegOpc = X86::XOR8rr; ImmOpc = X86::XOR8ri; break;
+ case X86::ATOMNAND8:
+ RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; Invert = true; break;
+ }
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+ X86::MOV8rm, X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
- &X86::GR8RegClass, true);
- // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+ &X86::GR8RegClass, Invert);
+ }
+
// This group is for 64-bit host.
case X86::ATOMAND64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
- X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass);
case X86::ATOMOR64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
- X86::OR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass);
case X86::ATOMXOR64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
- X86::XOR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass);
- case X86::ATOMNAND64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
- X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
+ case X86::ATOMNAND64: {
+ bool Invert = false;
+ unsigned RegOpc, ImmOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND64:
+ RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; break;
+ case X86::ATOMOR64:
+ RegOpc = X86::OR64rr; ImmOpc = X86::OR64ri32; break;
+ case X86::ATOMXOR64:
+ RegOpc = X86::XOR64rr; ImmOpc = X86::XOR64ri32; break;
+ case X86::ATOMNAND64:
+ RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; Invert = true; break;
+ }
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+ X86::MOV64rm, X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
- &X86::GR64RegClass, true);
- case X86::ATOMMIN64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
- case X86::ATOMMAX64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr);
- case X86::ATOMUMIN64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
- case X86::ATOMUMAX64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
+ &X86::GR64RegClass, Invert);
+ }
// This group does 64-bit operations on a 32-bit host.
case X86::ATOMAND6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::AND32rr, X86::AND32rr,
- X86::AND32ri, X86::AND32ri,
- false);
case X86::ATOMOR6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::OR32rr, X86::OR32rr,
- X86::OR32ri, X86::OR32ri,
- false);
case X86::ATOMXOR6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::XOR32rr, X86::XOR32rr,
- X86::XOR32ri, X86::XOR32ri,
- false);
case X86::ATOMNAND6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::AND32rr, X86::AND32rr,
- X86::AND32ri, X86::AND32ri,
- true);
case X86::ATOMADD6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::ADD32rr, X86::ADC32rr,
- X86::ADD32ri, X86::ADC32ri,
- false);
case X86::ATOMSUB6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::SUB32rr, X86::SBB32rr,
- X86::SUB32ri, X86::SBB32ri,
- false);
- case X86::ATOMSWAP6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::MOV32rr, X86::MOV32rr,
- X86::MOV32ri, X86::MOV32ri,
- false);
+ case X86::ATOMSWAP6432: {
+ bool Invert = false;
+ unsigned RegOpcL, RegOpcH, ImmOpcL, ImmOpcH;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND6432:
+ RegOpcL = RegOpcH = X86::AND32rr;
+ ImmOpcL = ImmOpcH = X86::AND32ri;
+ break;
+ case X86::ATOMOR6432:
+ RegOpcL = RegOpcH = X86::OR32rr;
+ ImmOpcL = ImmOpcH = X86::OR32ri;
+ break;
+ case X86::ATOMXOR6432:
+ RegOpcL = RegOpcH = X86::XOR32rr;
+ ImmOpcL = ImmOpcH = X86::XOR32ri;
+ break;
+ case X86::ATOMNAND6432:
+ RegOpcL = RegOpcH = X86::AND32rr;
+ ImmOpcL = ImmOpcH = X86::AND32ri;
+ Invert = true;
+ break;
+ case X86::ATOMADD6432:
+ RegOpcL = X86::ADD32rr; RegOpcH = X86::ADC32rr;
+ ImmOpcL = X86::ADD32ri; ImmOpcH = X86::ADC32ri;
+ break;
+ case X86::ATOMSUB6432:
+ RegOpcL = X86::SUB32rr; RegOpcH = X86::SBB32rr;
+ ImmOpcL = X86::SUB32ri; ImmOpcH = X86::SBB32ri;
+ break;
+ case X86::ATOMSWAP6432:
+ RegOpcL = RegOpcH = X86::MOV32rr;
+ ImmOpcL = ImmOpcH = X86::MOV32ri;
+ break;
+ }
+ return EmitAtomicBit6432WithCustomInserter(MI, BB, RegOpcL, RegOpcH,
+ ImmOpcL, ImmOpcH, Invert);
+ }
+
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
@@ -13043,7 +13330,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
false/*WriteMem*/);
return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
}
- }
+ }
// Emit a zeroed vector and insert the desired subvector on its
// first half.
@@ -13086,12 +13373,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
- if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 &&
+ if (Subtarget->hasAVX() && VT.is256BitVector() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
// Only handle 128 wide vector from here on.
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return SDValue();
// Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
@@ -13109,7 +13396,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
/// a sequence of vector shuffle operations.
/// It is possible when we truncate 256-bit vector to 128-bit vector
-SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
+SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
DAGCombinerInfo &DCI) const {
if (!DCI.isBeforeLegalizeOps())
return SDValue();
@@ -13151,8 +13438,9 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
// PSHUFD
static const int ShufMask1[] = {0, 2, 0, 0};
- OpLo = DAG.getVectorShuffle(VT, dl, OpLo, DAG.getUNDEF(VT), ShufMask1);
- OpHi = DAG.getVectorShuffle(VT, dl, OpHi, DAG.getUNDEF(VT), ShufMask1);
+ SDValue Undef = DAG.getUNDEF(VT);
+ OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1);
// MOVLHPS
static const int ShufMask2[] = {0, 1, 4, 5};
@@ -13210,10 +13498,9 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
-1, -1, -1, -1, -1, -1, -1, -1};
- OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, DAG.getUNDEF(MVT::v16i8),
- ShufMask1);
- OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, DAG.getUNDEF(MVT::v16i8),
- ShufMask1);
+ SDValue Undef = DAG.getUNDEF(MVT::v16i8);
+ OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1);
OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
@@ -13718,6 +14005,88 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Check whether a boolean test is testing a boolean value generated by
+// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
+// code.
+//
+// Simplify the following patterns:
+// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or
+// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ)
+// to (Op EFLAGS Cond)
+//
+// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or
+// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ)
+// to (Op EFLAGS !Cond)
+//
+// where Op could be BRCOND or CMOV.
+//
+static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
+ // Quit if not CMP and SUB with its value result used.
+ if (Cmp.getOpcode() != X86ISD::CMP &&
+ (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0)))
+ return SDValue();
+
+ // Quit if not used as a boolean value.
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ // Check CMP operands. One of them should be 0 or 1 and the other should be
+ // an SetCC or extended from it.
+ SDValue Op1 = Cmp.getOperand(0);
+ SDValue Op2 = Cmp.getOperand(1);
+
+ SDValue SetCC;
+ const ConstantSDNode* C = 0;
+ bool needOppositeCond = (CC == X86::COND_E);
+
+ if ((C = dyn_cast<ConstantSDNode>(Op1)))
+ SetCC = Op2;
+ else if ((C = dyn_cast<ConstantSDNode>(Op2)))
+ SetCC = Op1;
+ else // Quit if all operands are not constants.
+ return SDValue();
+
+ if (C->getZExtValue() == 1)
+ needOppositeCond = !needOppositeCond;
+ else if (C->getZExtValue() != 0)
+ // Quit if the constant is neither 0 or 1.
+ return SDValue();
+
+ // Skip 'zext' node.
+ if (SetCC.getOpcode() == ISD::ZERO_EXTEND)
+ SetCC = SetCC.getOperand(0);
+
+ // Quit if not SETCC.
+ // FIXME: So far we only handle the boolean value generated from SETCC. If
+ // there is other ways to generate boolean values, we need handle them here
+ // as well.
+ if (SetCC.getOpcode() != X86ISD::SETCC)
+ return SDValue();
+
+ // Set the condition code or opposite one if necessary.
+ CC = X86::CondCode(SetCC.getConstantOperandVal(0));
+ if (needOppositeCond)
+ CC = X86::GetOppositeBranchCondition(CC);
+
+ return SetCC.getOperand(1);
+}
+
+static bool IsValidFCMOVCondition(X86::CondCode CC) {
+ switch (CC) {
+ default:
+ return false;
+ case X86::COND_B:
+ case X86::COND_BE:
+ case X86::COND_E:
+ case X86::COND_P:
+ case X86::COND_AE:
+ case X86::COND_A:
+ case X86::COND_NE:
+ case X86::COND_NP:
+ return true;
+ }
+}
+
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
@@ -13731,6 +14100,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
SDValue TrueOp = N->getOperand(1);
X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
SDValue Cond = N->getOperand(3);
+
if (CC == X86::COND_E || CC == X86::COND_NE) {
switch (Cond.getOpcode()) {
default: break;
@@ -13742,6 +14112,18 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ SDValue Flags;
+
+ Flags = BoolTestSetCCCombine(Cond, CC);
+ if (Flags.getNode() &&
+ // Extra check as FCMOV only supports a subset of X86 cond.
+ (FalseOp.getValueType() != MVT::f80 || IsValidFCMOVCondition(CC))) {
+ SDValue Ops[] = { FalseOp, TrueOp,
+ DAG.getConstant(CC, MVT::i8), Flags };
+ return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
+ Ops, array_lengthof(Ops));
+ }
+
// If this is a select between two integer constants, try to do some
// optimizations. Note that the operands are ordered the opposite of SELECT
// operands.
@@ -14164,7 +14546,7 @@ static bool CanFoldXORWithAllOnes(const SDNode *N) {
// Sometimes the operand may come from a insert_subvector building a 256-bit
// allones vector
- if (VT.getSizeInBits() == 256 &&
+ if (VT.is256BitVector() &&
N->getOpcode() == ISD::INSERT_SUBVECTOR) {
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
@@ -14609,7 +14991,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// On Sandy Bridge, 256-bit memory operations are executed by two
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
// memory operation.
- if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2() &&
+ if (VT.is256BitVector() && !Subtarget->hasAVX2() &&
StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
StoredVal.getNumOperands() == 2) {
SDValue Value0 = StoredVal.getOperand(0);
@@ -14992,6 +15374,29 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+/// PerformFMinFMaxCombine - Do target-specific dag combines on X86ISD::FMIN and
+/// X86ISD::FMAX nodes.
+static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
+
+ // Only perform optimizations if UnsafeMath is used.
+ if (!DAG.getTarget().Options.UnsafeFPMath)
+ return SDValue();
+
+ // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
+ // into FMINC and MMAXC, which are Commutative operations.
+ unsigned NewOp = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unknown opcode");
+ case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
+ case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
+ }
+
+ return DAG.getNode(NewOp, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1));
+}
+
+
/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
// FAND(0.0, x) -> 0.0
@@ -15067,19 +15472,19 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
// concat the vectors to original VT
unsigned NumElems = OpVT.getVectorNumElements();
+ SDValue Undef = DAG.getUNDEF(OpVT);
+
SmallVector<int,8> ShufMask1(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask1[i] = i;
- SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
- &ShufMask1[0]);
+ SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]);
SmallVector<int,8> ShufMask2(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask2[i] = i + NumElems/2;
- SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
- &ShufMask2[0]);
+ SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
VT.getVectorNumElements()/2);
@@ -15092,6 +15497,40 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget* Subtarget) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ EVT ScalarVT = VT.getScalarType();
+ if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA())
+ return SDValue();
+
+ SDValue A = N->getOperand(0);
+ SDValue B = N->getOperand(1);
+ SDValue C = N->getOperand(2);
+
+ bool NegA = (A.getOpcode() == ISD::FNEG);
+ bool NegB = (B.getOpcode() == ISD::FNEG);
+ bool NegC = (C.getOpcode() == ISD::FNEG);
+
+ // Negative multiplication when NegA xor NegB
+ bool NegMul = (NegA != NegB);
+ if (NegA)
+ A = A.getOperand(0);
+ if (NegB)
+ B = B.getOperand(0);
+ if (NegC)
+ C = C.getOperand(0);
+
+ unsigned Opcode;
+ if (!NegMul)
+ Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB;
+ else
+ Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB;
+ return DAG.getNode(Opcode, dl, VT, A, B, C);
+}
+
static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -15164,7 +15603,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(1);
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
@@ -15187,19 +15626,50 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
- unsigned X86CC = N->getConstantOperandVal(0);
- SDValue EFLAG = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
+ X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
+ SDValue EFLAGS = N->getOperand(1);
// Materialize "setb reg" as "sbb reg,reg", since it can be extended without
// a zext and produces an all-ones bit which is more useful than 0/1 in some
// cases.
- if (X86CC == X86::COND_B)
+ if (CC == X86::COND_B)
return DAG.getNode(ISD::AND, DL, MVT::i8,
DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), EFLAG),
+ DAG.getConstant(CC, MVT::i8), EFLAGS),
DAG.getConstant(1, MVT::i8));
+ SDValue Flags;
+
+ Flags = BoolTestSetCCCombine(EFLAGS, CC);
+ if (Flags.getNode()) {
+ SDValue Cond = DAG.getConstant(CC, MVT::i8);
+ return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
+ }
+
+ return SDValue();
+}
+
+// Optimize branch condition evaluation.
+//
+static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue Chain = N->getOperand(0);
+ SDValue Dest = N->getOperand(1);
+ SDValue EFLAGS = N->getOperand(3);
+ X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
+
+ SDValue Flags;
+
+ Flags = BoolTestSetCCCombine(EFLAGS, CC);
+ if (Flags.getNode()) {
+ SDValue Cond = DAG.getConstant(CC, MVT::i8);
+ return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
+ Flags);
+ }
+
return SDValue();
}
@@ -15408,6 +15878,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
+ case X86ISD::FMIN:
+ case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
@@ -15417,6 +15889,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI);
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
+ case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::PALIGN:
case X86ISD::UNPCKH:
@@ -15431,6 +15904,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERMILP:
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
+ case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
}
return SDValue();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 78e4d75..74f5167 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -137,10 +137,6 @@ namespace llvm {
/// relative displacements.
WrapperRIP,
- /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
- /// of an XMM vector, with the high word zero filled.
- MOVQ2DQ,
-
/// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
/// to an MMX vector. If you think this is too close to the previous
/// mnemonic, so do I; blame Intel.
@@ -199,6 +195,9 @@ namespace llvm {
///
FMAX, FMIN,
+ /// FMAXC, FMINC - Commutative FMIN and FMAX.
+ FMAXC, FMINC,
+
/// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
/// approximation. Note that these typically require refinement
/// in order to obtain suitable precision.
@@ -231,6 +230,9 @@ namespace llvm {
// VSEXT_MOVL - Vector move low and sign extend.
VSEXT_MOVL,
+ // VFPEXT - Vector FP extend.
+ VFPEXT,
+
// VSHL, VSRL - 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
@@ -294,6 +296,14 @@ namespace llvm {
// PMULUDQ - Vector multiply packed unsigned doubleword integers
PMULUDQ,
+ // FMA nodes
+ FMADD,
+ FNMADD,
+ FMSUB,
+ FNMSUB,
+ FMADDSUB,
+ FMSUBADD,
+
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
// according to %al. An operator is needed so that this can be expanded
// with control flow.
@@ -325,6 +335,10 @@ namespace llvm {
// RDRAND - Get a random integer and indicate whether it is valid in CF.
RDRAND,
+ // PCMP*STRI
+ PCMPISTRI,
+ PCMPESTRI,
+
// ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
// ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
// Atomic 64-bit binary operations.
@@ -597,6 +611,12 @@ namespace llvm {
virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+ /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+ /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+ /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+ /// is expanded to mul + add.
+ virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+
/// isNarrowingProfitable - Return true if it's profitable to narrow
/// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
/// from i32 to i8 but not from i32 to i16.
@@ -656,7 +676,8 @@ namespace llvm {
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
- virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const;
/// getStackCookieLocation - Return true if the target stores stack
/// protector cookies at a fixed offset in some non-standard address
@@ -813,6 +834,8 @@ namespace llvm {
SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
+
virtual SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -844,9 +867,6 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const;
- void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG, unsigned NewOp) const;
-
/// Utility function to emit string processing sse4.2 instructions
/// that return in xmm0.
/// This takes the instruction to expand, the associated machine basic
@@ -933,7 +953,8 @@ namespace llvm {
};
namespace X86 {
- FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
}
}
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index b6ba68f..f790611 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1132,8 +1132,10 @@ defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
X86xor_flag, xor, 1, 0>;
defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
X86add_flag, add, 1, 1>;
+let isCompare = 1 in {
defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
X86sub_flag, sub, 0, 0>;
+}
// Arithmetic.
let Uses = [EFLAGS] in {
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 0d5490a..2eb454d 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -39,12 +39,15 @@ let neverHasSideEffects = 1 in {
// Sign/Zero extenders
+let neverHasSideEffects = 1 in {
def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
TB, OpSize;
+let mayLoad = 1 in
def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
TB, OpSize;
+} // neverHasSideEffects = 1
def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
@@ -59,12 +62,15 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
[(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
TB;
+let neverHasSideEffects = 1 in {
def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
TB, OpSize;
+let mayLoad = 1 in
def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
TB, OpSize;
+} // neverHasSideEffects = 1
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
@@ -82,6 +88,7 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
// except that they use GR32_NOREX for the output operand register class
// instead of GR32. This allows them to operate on h registers on x86-64.
+let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
@@ -91,6 +98,7 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[], IIC_MOVZX>, TB;
+}
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
// operand, which makes it a rare instruction with an 8-bit register
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 8802a2e..95ee7e5 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -16,159 +16,307 @@
//===----------------------------------------------------------------------===//
let Constraints = "$src1 = $dst" in {
-multiclass fma3p_rm<bits<8> opc, string OpcodeStr> {
-let neverHasSideEffects = 1 in {
- def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>;
- let mayLoad = 1 in
- def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, f128mem:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>;
- def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>;
- let mayLoad = 1 in
- def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, f256mem:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>;
-} // neverHasSideEffects = 1
-}
+multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
+ PatFrag MemFrag128, PatFrag MemFrag256,
+ ValueType OpVT128, ValueType OpVT256,
+ SDPatternOperator Op = null_frag, bit MayLoad = 1> {
+ def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (OpVT128 (Op VR128:$src2,
+ VR128:$src1, VR128:$src3)))]>;
-// Intrinsic for 132 pattern
-multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag MemFrag128, PatFrag MemFrag256,
- Intrinsic Int128, Intrinsic Int256> {
- def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src3, VR128:$src2))]>;
- def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, f128mem:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (Int128 VR128:$src1, (MemFrag128 addr:$src3), VR128:$src2))]>;
- def rY_Int : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src3, VR256:$src2))]>;
- def mY_Int : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, f256mem:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR256:$dst,
- (Int256 VR256:$src1, (MemFrag256 addr:$src3), VR256:$src2))]>;
+ let mayLoad = MayLoad in
+ def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
+ (MemFrag128 addr:$src3))))]>;
+
+ def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
+ VR256:$src3)))]>;
+
+ let mayLoad = MayLoad in
+ def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst,
+ (OpVT256 (Op VR256:$src2, VR256:$src1,
+ (MemFrag256 addr:$src3))))]>;
}
} // Constraints = "$src1 = $dst"
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpcodeStr, string PackTy,
PatFrag MemFrag128, PatFrag MemFrag256,
- Intrinsic Int128, Intrinsic Int256> {
- defm r132 : fma3p_rm_int <opc132, !strconcat(OpcodeStr,
- !strconcat("132", PackTy)), MemFrag128, MemFrag256,
- Int128, Int256>;
- defm r132 : fma3p_rm <opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
- defm r213 : fma3p_rm <opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
- defm r231 : fma3p_rm <opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+ SDNode Op, ValueType OpTy128, ValueType OpTy256> {
+ defm r213 : fma3p_rm<opc213,
+ !strconcat(OpcodeStr, !strconcat("213", PackTy)),
+ MemFrag128, MemFrag256, OpTy128, OpTy256, Op, 0>;
+let neverHasSideEffects = 1 in {
+ defm r132 : fma3p_rm<opc132,
+ !strconcat(OpcodeStr, !strconcat("132", PackTy)),
+ MemFrag128, MemFrag256, OpTy128, OpTy256>;
+ defm r231 : fma3p_rm<opc231,
+ !strconcat(OpcodeStr, !strconcat("231", PackTy)),
+ MemFrag128, MemFrag256, OpTy128, OpTy256>;
+} // neverHasSideEffects = 1
}
// Fused Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfmadd_ps, int_x86_fma_vfmadd_ps_256>;
- defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfmsub_ps, int_x86_fma_vfmsub_ps_256>;
+ memopv8f32, X86Fmadd, v4f32, v8f32>;
+ defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32,
+ memopv8f32, X86Fmsub, v4f32, v8f32>;
defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps",
- memopv4f32, memopv8f32, int_x86_fma_vfmaddsub_ps,
- int_x86_fma_vfmaddsub_ps_256>;
+ memopv4f32, memopv8f32, X86Fmaddsub,
+ v4f32, v8f32>;
defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps",
- memopv4f32, memopv8f32, int_x86_fma_vfmsubadd_ps,
- int_x86_fma_vfmaddsub_ps_256>;
+ memopv4f32, memopv8f32, X86Fmsubadd,
+ v4f32, v8f32>;
}
let ExeDomain = SSEPackedDouble in {
defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfmadd_pd, int_x86_fma_vfmadd_pd_256>, VEX_W;
+ memopv4f64, X86Fmadd, v2f64, v4f64>, VEX_W;
defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfmsub_pd, int_x86_fma_vfmsub_pd_256>, VEX_W;
- defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfmaddsub_pd, int_x86_fma_vfmaddsub_pd_256>, VEX_W;
- defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfmsubadd_pd, int_x86_fma_vfmsubadd_pd_256>, VEX_W;
+ memopv4f64, X86Fmsub, v2f64, v4f64>, VEX_W;
+ defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd",
+ memopv2f64, memopv4f64, X86Fmaddsub,
+ v2f64, v4f64>, VEX_W;
+ defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd",
+ memopv2f64, memopv4f64, X86Fmsubadd,
+ v2f64, v4f64>, VEX_W;
}
// Fused Negative Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfnmadd_ps, int_x86_fma_vfnmadd_ps_256>;
+ memopv8f32, X86Fnmadd, v4f32, v8f32>;
defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfnmsub_ps, int_x86_fma_vfnmsub_ps_256>;
+ memopv8f32, X86Fnmsub, v4f32, v8f32>;
}
let ExeDomain = SSEPackedDouble in {
defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfnmadd_pd, int_x86_fma_vfnmadd_pd_256>, VEX_W;
- defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfnmsub_pd, int_x86_fma_vfnmsub_pd_256>, VEX_W;
+ memopv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
+ defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd",
+ memopv2f64, memopv4f64, X86Fnmsub, v2f64,
+ v4f64>, VEX_W;
}
+let Predicates = [HasFMA] in {
+ def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMADDSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFMADDSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMSUBADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFMSUBADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMADDSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFMADDSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMSUBADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFMSUBADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMADDSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFMADDSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMSUBADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFMSUBADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMADDSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFMADDSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMSUBADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFMSUBADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFNMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFNMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFNMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFNMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFNMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFNMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFNMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFNMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFNMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFNMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFNMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFNMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFNMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFNMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFNMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFNMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+} // Predicates = [HasFMA]
let Constraints = "$src1 = $dst" in {
multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
- RegisterClass RC> {
-let neverHasSideEffects = 1 in {
- def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>;
- let mayLoad = 1 in
- def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, x86memop:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>;
-} // neverHasSideEffects = 1
+ RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
+ SDPatternOperator OpNode = null_frag, bit MayLoad = 1> {
+ def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
+ let mayLoad = MayLoad in
+ def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src2, RC:$src1,
+ (mem_frag addr:$src3))))]>;
}
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop,
- ComplexPattern mem_cpat, Intrinsic IntId> {
+ ComplexPattern mem_cpat, Intrinsic IntId,
+ RegisterClass RC> {
def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src3, VR128:$src2))]>;
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1,
+ VR128:$src3))]>;
def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, memop:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (IntId VR128:$src1, mem_cpat:$src3, VR128:$src2))]>;
+ (ins VR128:$src1, VR128:$src2, memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>;
}
} // Constraints = "$src1 = $dst"
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
- string OpStr, Intrinsic IntF32, Intrinsic IntF64> {
- defm SSr132 : fma3s_rm<opc132, !strconcat(OpStr, "132ss"), f32mem, FR32>;
- defm SSr213 : fma3s_rm<opc213, !strconcat(OpStr, "213ss"), f32mem, FR32>;
- defm SSr231 : fma3s_rm<opc231, !strconcat(OpStr, "231ss"), f32mem, FR32>;
- defm SDr132 : fma3s_rm<opc132, !strconcat(OpStr, "132sd"), f64mem, FR64>, VEX_W;
- defm SDr213 : fma3s_rm<opc213, !strconcat(OpStr, "213sd"), f64mem, FR64>, VEX_W;
- defm SDr231 : fma3s_rm<opc231, !strconcat(OpStr, "231sd"), f64mem, FR64>, VEX_W;
- defm SSr132 : fma3s_rm_int <opc132, !strconcat(OpStr, "132ss"), ssmem,
- sse_load_f32, IntF32>;
- defm SDr132 : fma3s_rm_int <opc132, !strconcat(OpStr, "132sd"), sdmem,
- sse_load_f64, IntF64>;
+ string OpStr, string PackTy, Intrinsic Int,
+ SDNode OpNode, RegisterClass RC, ValueType OpVT,
+ X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
+ ComplexPattern mem_cpat> {
+let neverHasSideEffects = 1 in {
+ defm r132 : fma3s_rm<opc132, !strconcat(OpStr, !strconcat("132", PackTy)),
+ x86memop, RC, OpVT, mem_frag>;
+ defm r231 : fma3s_rm<opc231, !strconcat(OpStr, !strconcat("231", PackTy)),
+ x86memop, RC, OpVT, mem_frag>;
+}
+
+defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+ x86memop, RC, OpVT, mem_frag, OpNode, 0>,
+ fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+ memop, mem_cpat, Int, RC>;
+}
+
+multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpStr, Intrinsic IntF32, Intrinsic IntF64,
+ SDNode OpNode> {
+ defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", IntF32, OpNode,
+ FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>;
+ defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", IntF64, OpNode,
+ FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
}
-defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
- int_x86_fma_vfmadd_sd>, VEX_LIG;
-defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
- int_x86_fma_vfmsub_sd>, VEX_LIG;
+defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
+ int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG;
+defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
+ int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG;
-defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
- int_x86_fma_vfnmadd_sd>, VEX_LIG;
-defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
- int_x86_fma_vfnmsub_sd>, VEX_LIG;
+defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
+ int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG;
+defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
+ int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index a115ab4..81b4f81 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -366,7 +366,7 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
//
// SDI - SSE2 instructions with XD prefix.
// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
-// SSDI - SSE2 instructions with XS prefix.
+// S2SI - SSE2 instructions with XS prefix.
// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
// PDI - SSE2 instructions with TB and OpSize prefixes.
// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
@@ -379,10 +379,10 @@ class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
-class SSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE2]>;
-class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
@@ -397,6 +397,10 @@ class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
Requires<[HasAVX]>;
+class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
+ Requires<[HasAVX]>;
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index ec030dd..ee2d3c4 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -29,6 +29,13 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+
+// Commutative and Associative FMIN and FMAX.
+def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+
def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
@@ -73,14 +80,20 @@ def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
def X86vzmovly : SDNode<"X86ISD::VZEXT_MOVL",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<1, 0> ]>>;
def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
+ SDTypeProfile<1, 1,
+ [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def X86vfpext : SDNode<"X86ISD::VFPEXT",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCisFP<1>]>>;
+
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
@@ -125,7 +138,10 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
-SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
+ SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
+
+def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
@@ -160,9 +176,26 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
-def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
-def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
-def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
+def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
+def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
+def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
+def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
+def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
+def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
+def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>;
+def X86Fmaddsub : SDNode<"X86ISD::FMSUBADD", SDTFma>;
+def X86Fmsubadd : SDNode<"X86ISD::FMADDSUB", SDTFma>;
+
+def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
+ SDTCisVT<4, i8>]>;
+def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, v16i8>, SDTCisVT<3, i32>,
+ SDTCisVT<4, v16i8>, SDTCisVT<5, i32>,
+ SDTCisVT<6, i8>]>;
+
+def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
+def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 69493bc..459f01a 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -414,12 +414,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
{ X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
{ X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
- { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
- { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
- { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
- { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
- { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
- { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
{ X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
@@ -680,6 +674,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::IMUL64rr, X86::IMUL64rm, 0 },
{ X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
{ X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
{ X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 },
{ X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
@@ -1130,8 +1130,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 },
{ X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 },
{ X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 },
- { X86::VFMADDSSr132r_Int, X86::VFMADDSSr132m_Int, 0 },
- { X86::VFMADDSDr132r_Int, X86::VFMADDSDr132m_Int, 0 },
+ { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, 0 },
+ { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, 0 },
{ X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 },
{ X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 },
@@ -1145,10 +1145,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 },
{ X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 },
{ X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 },
- { X86::VFMADDPSr132r_Int, X86::VFMADDPSr132m_Int, TB_ALIGN_16 },
- { X86::VFMADDPDr132r_Int, X86::VFMADDPDr132m_Int, TB_ALIGN_16 },
- { X86::VFMADDPSr132rY_Int, X86::VFMADDPSr132mY_Int, TB_ALIGN_32 },
- { X86::VFMADDPDr132rY_Int, X86::VFMADDPDr132mY_Int, TB_ALIGN_32 },
{ X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 },
{ X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 },
@@ -1156,8 +1152,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 },
{ X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 },
{ X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 },
- { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr132m_Int, 0 },
- { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr132m_Int, 0 },
+ { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, 0 },
+ { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, 0 },
{ X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 },
{ X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 },
@@ -1171,10 +1167,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 },
{ X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 },
{ X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 },
- { X86::VFNMADDPSr132r_Int, X86::VFNMADDPSr132m_Int, TB_ALIGN_16 },
- { X86::VFNMADDPDr132r_Int, X86::VFNMADDPDr132m_Int, TB_ALIGN_16 },
- { X86::VFNMADDPSr132rY_Int, X86::VFNMADDPSr132mY_Int, TB_ALIGN_32 },
- { X86::VFNMADDPDr132rY_Int, X86::VFNMADDPDr132mY_Int, TB_ALIGN_32 },
{ X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 },
{ X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 },
@@ -1182,8 +1174,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 },
{ X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 },
{ X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 },
- { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr132m_Int, 0 },
- { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr132m_Int, 0 },
+ { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, 0 },
+ { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, 0 },
{ X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 },
{ X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 },
@@ -1197,10 +1189,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 },
{ X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 },
{ X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 },
- { X86::VFMSUBPSr132r_Int, X86::VFMSUBPSr132m_Int, TB_ALIGN_16 },
- { X86::VFMSUBPDr132r_Int, X86::VFMSUBPDr132m_Int, TB_ALIGN_16 },
- { X86::VFMSUBPSr132rY_Int, X86::VFMSUBPSr132mY_Int, TB_ALIGN_32 },
- { X86::VFMSUBPDr132rY_Int, X86::VFMSUBPDr132mY_Int, TB_ALIGN_32 },
{ X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 },
{ X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 },
@@ -1208,8 +1196,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 },
{ X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 },
{ X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 },
- { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr132m_Int, 0 },
- { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr132m_Int, 0 },
+ { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, 0 },
+ { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, 0 },
{ X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 },
{ X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 },
@@ -1223,10 +1211,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 },
{ X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 },
{ X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 },
- { X86::VFNMSUBPSr132r_Int, X86::VFNMSUBPSr132m_Int, TB_ALIGN_16 },
- { X86::VFNMSUBPDr132r_Int, X86::VFNMSUBPDr132m_Int, TB_ALIGN_16 },
- { X86::VFNMSUBPSr132rY_Int, X86::VFNMSUBPSr132mY_Int, TB_ALIGN_32 },
- { X86::VFNMSUBPDr132rY_Int, X86::VFNMSUBPDr132mY_Int, TB_ALIGN_32 },
{ X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 },
{ X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 },
@@ -1240,10 +1224,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 },
{ X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 },
{ X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 },
- { X86::VFMADDSUBPSr132r_Int, X86::VFMADDSUBPSr132m_Int, TB_ALIGN_16 },
- { X86::VFMADDSUBPDr132r_Int, X86::VFMADDSUBPDr132m_Int, TB_ALIGN_16 },
- { X86::VFMADDSUBPSr132rY_Int, X86::VFMADDSUBPSr132mY_Int, TB_ALIGN_32 },
- { X86::VFMADDSUBPDr132rY_Int, X86::VFMADDSUBPDr132mY_Int, TB_ALIGN_32 },
{ X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 },
{ X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 },
@@ -1257,10 +1237,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 },
{ X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 },
{ X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 },
- { X86::VFMSUBADDPSr132r_Int, X86::VFMSUBADDPSr132m_Int, TB_ALIGN_16 },
- { X86::VFMSUBADDPDr132r_Int, X86::VFMSUBADDPDr132m_Int, TB_ALIGN_16 },
- { X86::VFMSUBADDPSr132rY_Int, X86::VFMSUBADDPSr132mY_Int, TB_ALIGN_32 },
- { X86::VFMSUBADDPDr132rY_Int, X86::VFMSUBADDPDr132mY_Int, TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
@@ -1318,8 +1294,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
switch (MI.getOpcode()) {
- default:
- llvm_unreachable(0);
+ default: llvm_unreachable("Unreachable!");
case X86::MOVSX16rr8:
case X86::MOVZX16rr8:
case X86::MOVSX32rr8:
@@ -1463,6 +1438,9 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
/// X86::MOVPC32r.
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
+ // Don't waste compile time scanning use-def chains of physregs.
+ if (!TargetRegisterInfo::isVirtualRegister(BaseReg))
+ return false;
bool isPICBase = false;
for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
E = MRI.def_end(); I != E; ++I) {
@@ -1480,78 +1458,69 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
AliasAnalysis *AA) const {
switch (MI->getOpcode()) {
default: break;
- case X86::MOV8rm:
- case X86::MOV16rm:
- case X86::MOV32rm:
- case X86::MOV64rm:
- case X86::LD_Fp64m:
- case X86::MOVSSrm:
- case X86::MOVSDrm:
- case X86::MOVAPSrm:
- case X86::MOVUPSrm:
- case X86::MOVAPDrm:
- case X86::MOVDQArm:
- case X86::VMOVSSrm:
- case X86::VMOVSDrm:
- case X86::VMOVAPSrm:
- case X86::VMOVUPSrm:
- case X86::VMOVAPDrm:
- case X86::VMOVDQArm:
- case X86::VMOVAPSYrm:
- case X86::VMOVUPSYrm:
- case X86::VMOVAPDYrm:
- case X86::VMOVDQAYrm:
- case X86::MMX_MOVD64rm:
- case X86::MMX_MOVQ64rm:
- case X86::FsVMOVAPSrm:
- case X86::FsVMOVAPDrm:
- case X86::FsMOVAPSrm:
- case X86::FsMOVAPDrm: {
- // Loads from constant pools are trivially rematerializable.
- if (MI->getOperand(1).isReg() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
- MI->isInvariantLoad(AA)) {
- unsigned BaseReg = MI->getOperand(1).getReg();
- if (BaseReg == 0 || BaseReg == X86::RIP)
- return true;
- // Allow re-materialization of PIC load.
- if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
- return false;
- const MachineFunction &MF = *MI->getParent()->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- bool isPICBase = false;
- for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
- E = MRI.def_end(); I != E; ++I) {
- MachineInstr *DefMI = I.getOperand().getParent();
- if (DefMI->getOpcode() != X86::MOVPC32r)
- return false;
- assert(!isPICBase && "More than one PIC base?");
- isPICBase = true;
- }
- return isPICBase;
- }
- return false;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm: {
+ // Loads from constant pools are trivially rematerializable.
+ if (MI->getOperand(1).isReg() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ MI->isInvariantLoad(AA)) {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0 || BaseReg == X86::RIP)
+ return true;
+ // Allow re-materialization of PIC load.
+ if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
+ return false;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return regIsPICBase(BaseReg, MRI);
}
+ return false;
+ }
- case X86::LEA32r:
- case X86::LEA64r: {
- if (MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
- !MI->getOperand(4).isReg()) {
- // lea fi#, lea GV, etc. are all rematerializable.
- if (!MI->getOperand(1).isReg())
- return true;
- unsigned BaseReg = MI->getOperand(1).getReg();
- if (BaseReg == 0)
- return true;
- // Allow re-materialization of lea PICBase + x.
- const MachineFunction &MF = *MI->getParent()->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- return regIsPICBase(BaseReg, MRI);
- }
- return false;
- }
+ case X86::LEA32r:
+ case X86::LEA64r: {
+ if (MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ !MI->getOperand(4).isReg()) {
+ // lea fi#, lea GV, etc. are all rematerializable.
+ if (!MI->getOperand(1).isReg())
+ return true;
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0)
+ return true;
+ // Allow re-materialization of lea PICBase + x.
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return regIsPICBase(BaseReg, MRI);
+ }
+ return false;
+ }
}
// All other instructions marked M_REMATERIALIZABLE are always trivially
@@ -1660,7 +1629,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
case X86::MOV64r0: {
if (!isSafeToClobberEFLAGS(MBB, I)) {
switch (Opc) {
- default: break;
+ default: llvm_unreachable("Unreachable!");
case X86::MOV8r0: Opc = X86::MOV8ri; break;
case X86::MOV16r0: Opc = X86::MOV16ri; break;
case X86::MOV32r0: Opc = X86::MOV32ri; break;
@@ -1733,8 +1702,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
get(Opc), leaOutReg);
switch (MIOpc) {
- default:
- llvm_unreachable(0);
+ default: llvm_unreachable("Unreachable!");
case X86::SHL16ri: {
unsigned ShAmt = MI->getOperand(2).getImm();
MIB.addReg(0).addImm(1 << ShAmt)
@@ -2126,57 +2094,25 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
MI->getOperand(3).setImm(Size-Amt);
return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
}
- case X86::CMOVB16rr:
- case X86::CMOVB32rr:
- case X86::CMOVB64rr:
- case X86::CMOVAE16rr:
- case X86::CMOVAE32rr:
- case X86::CMOVAE64rr:
- case X86::CMOVE16rr:
- case X86::CMOVE32rr:
- case X86::CMOVE64rr:
- case X86::CMOVNE16rr:
- case X86::CMOVNE32rr:
- case X86::CMOVNE64rr:
- case X86::CMOVBE16rr:
- case X86::CMOVBE32rr:
- case X86::CMOVBE64rr:
- case X86::CMOVA16rr:
- case X86::CMOVA32rr:
- case X86::CMOVA64rr:
- case X86::CMOVL16rr:
- case X86::CMOVL32rr:
- case X86::CMOVL64rr:
- case X86::CMOVGE16rr:
- case X86::CMOVGE32rr:
- case X86::CMOVGE64rr:
- case X86::CMOVLE16rr:
- case X86::CMOVLE32rr:
- case X86::CMOVLE64rr:
- case X86::CMOVG16rr:
- case X86::CMOVG32rr:
- case X86::CMOVG64rr:
- case X86::CMOVS16rr:
- case X86::CMOVS32rr:
- case X86::CMOVS64rr:
- case X86::CMOVNS16rr:
- case X86::CMOVNS32rr:
- case X86::CMOVNS64rr:
- case X86::CMOVP16rr:
- case X86::CMOVP32rr:
- case X86::CMOVP64rr:
- case X86::CMOVNP16rr:
- case X86::CMOVNP32rr:
- case X86::CMOVNP64rr:
- case X86::CMOVO16rr:
- case X86::CMOVO32rr:
- case X86::CMOVO64rr:
- case X86::CMOVNO16rr:
- case X86::CMOVNO32rr:
- case X86::CMOVNO64rr: {
- unsigned Opc = 0;
+ case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
+ case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
+ case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
+ case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
+ case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr:
+ case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr:
+ case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
+ case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
+ case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
+ case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
+ case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
+ case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
+ case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
+ case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
+ case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
+ case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: {
+ unsigned Opc;
switch (MI->getOpcode()) {
- default: break;
+ default: llvm_unreachable("Unreachable!");
case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
@@ -2408,7 +2344,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) {
/// whether it has memory operand.
static unsigned getSETFromCond(X86::CondCode CC,
bool HasMemoryOperand) {
- static const unsigned Opc[16][2] = {
+ static const uint16_t Opc[16][2] = {
{ X86::SETAr, X86::SETAm },
{ X86::SETAEr, X86::SETAEm },
{ X86::SETBr, X86::SETBm },
@@ -2435,7 +2371,7 @@ static unsigned getSETFromCond(X86::CondCode CC,
/// register size in bytes, and operand type.
static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
bool HasMemoryOperand) {
- static const unsigned Opc[32][3] = {
+ static const uint16_t Opc[32][3] = {
{ X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
{ X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
{ X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr },
@@ -2768,19 +2704,18 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
// SrcReg(GR64) -> DestReg(VR64)
if (X86::GR64RegClass.contains(DestReg)) {
- if (X86::VR128RegClass.contains(SrcReg)) {
+ if (X86::VR128RegClass.contains(SrcReg))
// Copy from a VR128 register to a GR64 register.
return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr;
- } else if (X86::VR64RegClass.contains(SrcReg)) {
+ if (X86::VR64RegClass.contains(SrcReg))
// Copy from a VR64 register to a GR64 register.
return X86::MOVSDto64rr;
- }
} else if (X86::GR64RegClass.contains(SrcReg)) {
// Copy from a GR64 register to a VR128 register.
if (X86::VR128RegClass.contains(DestReg))
return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr;
// Copy from a GR64 register to a VR64 register.
- else if (X86::VR64RegClass.contains(DestReg))
+ if (X86::VR64RegClass.contains(DestReg))
return X86::MOV64toSDrr;
}
@@ -2788,12 +2723,12 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
// SrcReg(GR32) -> DestReg(FR32)
if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg))
- // Copy from a FR32 register to a GR32 register.
- return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
+ // Copy from a FR32 register to a GR32 register.
+ return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
- // Copy from a GR32 register to a FR32 register.
- return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
+ // Copy from a GR32 register to a FR32 register.
+ return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
return 0;
}
@@ -2804,7 +2739,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool KillSrc) const {
// First deal with the normal symmetric copies.
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
- unsigned Opc = 0;
+ unsigned Opc;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
else if (X86::GR32RegClass.contains(DestReg, SrcReg))
@@ -2843,7 +2778,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(X86::PUSHF64));
BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
return;
- } else if (X86::GR32RegClass.contains(DestReg)) {
+ }
+ if (X86::GR32RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF32));
BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
return;
@@ -2855,7 +2791,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, MI, DL, get(X86::POPF64));
return;
- } else if (X86::GR32RegClass.contains(SrcReg)) {
+ }
+ if (X86::GR32RegClass.contains(SrcReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSH32r))
.addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, MI, DL, get(X86::POPF32));
@@ -3037,6 +2974,37 @@ analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
CmpMask = ~0;
CmpValue = MI->getOperand(1).getImm();
return true;
+ // A SUB can be used to perform comparison.
+ case X86::SUB64rm:
+ case X86::SUB32rm:
+ case X86::SUB16rm:
+ case X86::SUB8rm:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case X86::SUB64rr:
+ case X86::SUB32rr:
+ case X86::SUB16rr:
+ case X86::SUB8rr:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = MI->getOperand(2).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(2).getImm();
+ return true;
case X86::CMP64rr:
case X86::CMP32rr:
case X86::CMP16rr:
@@ -3145,6 +3113,55 @@ bool X86InstrInfo::
optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const {
+ // Check whether we can replace SUB with CMP.
+ unsigned NewOpcode = 0;
+ switch (CmpInstr->getOpcode()) {
+ default: break;
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ case X86::SUB64rm:
+ case X86::SUB32rm:
+ case X86::SUB16rm:
+ case X86::SUB8rm:
+ case X86::SUB64rr:
+ case X86::SUB32rr:
+ case X86::SUB16rr:
+ case X86::SUB8rr: {
+ if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
+ return false;
+ // There is no use of the destination register, we can replace SUB with CMP.
+ switch (CmpInstr->getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
+ case X86::SUB32rm: NewOpcode = X86::CMP32rm; break;
+ case X86::SUB16rm: NewOpcode = X86::CMP16rm; break;
+ case X86::SUB8rm: NewOpcode = X86::CMP8rm; break;
+ case X86::SUB64rr: NewOpcode = X86::CMP64rr; break;
+ case X86::SUB32rr: NewOpcode = X86::CMP32rr; break;
+ case X86::SUB16rr: NewOpcode = X86::CMP16rr; break;
+ case X86::SUB8rr: NewOpcode = X86::CMP8rr; break;
+ case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
+ case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break;
+ case X86::SUB32ri: NewOpcode = X86::CMP32ri; break;
+ case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break;
+ case X86::SUB16ri: NewOpcode = X86::CMP16ri; break;
+ case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break;
+ case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
+ }
+ CmpInstr->setDesc(get(NewOpcode));
+ CmpInstr->RemoveOperand(0);
+ // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
+ if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
+ NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
+ return false;
+ }
+ }
+
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
if (!MI) return false;
@@ -3221,12 +3238,15 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
MachineBasicBlock::iterator E = CmpInstr->getParent()->end();
for (++I; I != E; ++I) {
const MachineInstr &Instr = *I;
- if (Instr.modifiesRegister(X86::EFLAGS, TRI)) {
+ bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
+ bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
+ // We should check the usage if this instruction uses and updates EFLAGS.
+ if (!UseEFLAGS && ModifyEFLAGS) {
// It is safe to remove CmpInstr if EFLAGS is updated again.
IsSafe = true;
break;
}
- if (!Instr.readsRegister(X86::EFLAGS, TRI))
+ if (!UseEFLAGS && !ModifyEFLAGS)
continue;
// EFLAGS is used by this instruction.
@@ -3281,7 +3301,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// instructions will be modified.
OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
}
- if (Instr.killsRegister(X86::EFLAGS, TRI)) {
+ if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
+ // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
IsSafe = true;
break;
}
@@ -3319,6 +3340,81 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
return true;
}
+/// optimizeLoadInstr - Try to remove the load by folding it to a register
+/// operand at the use. We fold the load instructions if load defines a virtual
+/// register, the virtual register is used once in the same BB, and the
+/// instructions in-between do not load or store, and have no side effects.
+MachineInstr* X86InstrInfo::
+optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const {
+ if (FoldAsLoadDefReg == 0)
+ return 0;
+ // To be conservative, if there exists another load, clear the load candidate.
+ if (MI->mayLoad()) {
+ FoldAsLoadDefReg = 0;
+ return 0;
+ }
+
+ // Check whether we can move DefMI here.
+ DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
+ assert(DefMI);
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(this, 0, SawStore))
+ return 0;
+
+ // We try to commute MI if possible.
+ unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
+ for (unsigned Idx = 0; Idx < IdxEnd; Idx++) {
+ // Collect information about virtual register operands of MI.
+ unsigned SrcOperandId = 0;
+ bool FoundSrcOperand = false;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg != FoldAsLoadDefReg)
+ continue;
+ // Do not fold if we have a subreg use or a def or multiple uses.
+ if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
+ return 0;
+
+ SrcOperandId = i;
+ FoundSrcOperand = true;
+ }
+ if (!FoundSrcOperand) return 0;
+
+ // Check whether we can fold the def into SrcOperandId.
+ SmallVector<unsigned, 8> Ops;
+ Ops.push_back(SrcOperandId);
+ MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
+ if (FoldMI) {
+ FoldAsLoadDefReg = 0;
+ return FoldMI;
+ }
+
+ if (Idx == 1) {
+ // MI was changed but it didn't help, commute it back!
+ commuteInstruction(MI, false);
+ return 0;
+ }
+
+ // Check whether we can commute MI and enable folding.
+ if (MI->isCommutable()) {
+ MachineInstr *NewMI = commuteInstruction(MI, false);
+ // Unable to commute.
+ if (!NewMI) return 0;
+ if (NewMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ NewMI->eraseFromParent();
+ return 0;
+ }
+ }
+ }
+ return 0;
+}
+
/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
/// instruction with two undef reads of the register being defined. This is
/// used for mapping:
@@ -3477,6 +3573,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
OpcodeTablePtr = &RegOp2MemOpTable1;
} else if (i == 2) {
OpcodeTablePtr = &RegOp2MemOpTable2;
+ } else if (i == 3) {
+ OpcodeTablePtr = &RegOp2MemOpTable3;
}
// If table selected...
@@ -3947,7 +4045,6 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
getUndefRegState(MO.isUndef()));
}
// Change CMP32ri r, 0 back to TEST32rr r, r, etc.
- unsigned NewOpc = 0;
switch (DataMI->getOpcode()) {
default: break;
case X86::CMP64ri32:
@@ -3960,8 +4057,9 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
MachineOperand &MO0 = DataMI->getOperand(0);
MachineOperand &MO1 = DataMI->getOperand(1);
if (MO1.getImm() == 0) {
+ unsigned NewOpc;
switch (DataMI->getOpcode()) {
- default: break;
+ default: llvm_unreachable("Unreachable!");
case X86::CMP64ri8:
case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
case X86::CMP32ri8:
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index ec9b2e6..b6f69af 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -387,6 +387,18 @@ public:
unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const;
+ /// optimizeLoadInstr - Try to remove the load by folding it to a register
+ /// operand at the use. We fold the load instructions if and only if the
+ /// def and use are in the same BB. We only look at one load and see
+ /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register
+ /// defined by the load we are trying to fold. DefMI returns the machine
+ /// instruction that defines FoldAsLoadDefReg, and the function returns
+ /// the machine instruction generated due to folding.
+ virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
+ const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const;
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index e4edd36..c8f40bb 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -251,7 +251,7 @@ def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(iPTR 0))))))],
IIC_MMX_MOVQ_RR>;
-def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
+def MMX_MOVQ2DQrr : S2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector
@@ -259,7 +259,7 @@ def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
IIC_MMX_MOVQ_RR>;
let neverHasSideEffects = 1 in
-def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
+def MMX_MOVQ2FR64rr: S2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [],
IIC_MMX_MOVQ_RR>;
@@ -554,20 +554,6 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
(int_x86_mmx_pmovmskb VR64:$src))]>;
-// MMX to XMM for vector types
-def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
- [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
- (v2i64 (MOVQI2PQIrm addr:$src))>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq
- (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
- (v2i64 (MOVDI2PDIrm addr:$src))>;
-
// Low word of XMM to MMX.
def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
[SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index c2d169a..220c06d 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -245,9 +245,9 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
// A vector extract of the first f32/f64 position is a subregister copy
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+ (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
// A 128-bit subvector extract from the first 256-bit vector position
// is a subregister copy that needs no instruction.
@@ -283,14 +283,14 @@ def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)),
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+ (COPY_TO_REGCLASS FR32:$src, VR128)>;
def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
- (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+ (COPY_TO_REGCLASS FR32:$src, VR128)>;
// Implicitly promote a 64-bit scalar to a vector.
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+ (COPY_TO_REGCLASS FR64:$src, VR128)>;
def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+ (COPY_TO_REGCLASS FR64:$src, VR128)>;
// Bitcasts between 128-bit vector types. Return the original type since
// no instruction is needed for the conversion
@@ -562,59 +562,57 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
(VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (VMOVSSrr (v4f32 (V_SET0)),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+ (VMOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (VMOVSSrr (v4i32 (V_SET0)),
- (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+ (VMOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
(VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
// Move low f32 and clear high bits.
def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSrr (v4f32 (V_SET0)),
- (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>;
+ (VMOVSSrr (v4f32 (V_SET0)),
+ (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSrr (v4i32 (V_SET0)),
- (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>;
+ (VMOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>;
}
let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
// MOVSDrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
// Represent the same patterns above but in the form they appear for
// 256-bit types
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
(v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
}
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector FR32:$src)), (i32 0)))),
@@ -628,70 +626,68 @@ let Predicates = [HasAVX] in {
sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
// Move low f64 and clear high bits.
def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSDrr (v2f64 (V_SET0)),
- (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>;
+ (VMOVSDrr (v2f64 (V_SET0)),
+ (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSDrr (v2i64 (V_SET0)),
- (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>;
+ (VMOVSDrr (v2i64 (V_SET0)),
+ (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>;
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
- (VMOVSSmr addr:$dst,
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>;
def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
- (VMOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+ (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>;
// Shuffle with VMOVSS
def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
(VMOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+ (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>;
def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
(VMOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>;
// 256-bit variants
def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)),
(SUBREG_TO_REG (i32 0),
- (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss),
- (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>;
+ (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)),
(SUBREG_TO_REG (i32 0),
- (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss),
- (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>;
+ (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
// Shuffle with VMOVSD
def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
// 256-bit variants
def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
(SUBREG_TO_REG (i32 0),
- (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd),
- (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>;
+ (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
(SUBREG_TO_REG (i32 0),
- (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd),
- (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>;
+ (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
@@ -699,17 +695,13 @@ let Predicates = [HasAVX] in {
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
}
let Predicates = [HasSSE1] in {
@@ -719,37 +711,31 @@ let Predicates = [HasSSE1] in {
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
(MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0)),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+ (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0)),
- (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+ (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
}
let AddedComplexity = 20 in {
- // MOVSSrm zeros the high parts of the register; represent this
- // with SUBREG_TO_REG.
+ // MOVSSrm already zeros the high parts of the register.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
}
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
- (MOVSSmr addr:$dst,
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>;
// Shuffle with MOVSS
def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+ (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
}
let Predicates = [HasSSE2] in {
@@ -761,50 +747,46 @@ let Predicates = [HasSSE2] in {
}
let AddedComplexity = 20 in {
- // MOVSDrm zeros the high parts of the register; represent this
- // with SUBREG_TO_REG.
+ // MOVSDrm already zeros the high parts of the register.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
}
// Extract and store.
def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
- (MOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+ (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>;
// Shuffle with MOVSD
def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
// is during lowering, where it's not possible to recognize the fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
}
//===----------------------------------------------------------------------===//
@@ -1416,14 +1398,15 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
- string asm, Domain d, OpndItins itins> {
+ X86MemOperand x86memop, string asm, Domain d,
+ OpndItins itins> {
+let neverHasSideEffects = 1 in {
def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (OpNode SrcRC:$src))],
- itins.rr, d>;
+ [], itins.rr, d>;
+ let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
- itins.rm, d>;
+ [], itins.rm, d>;
+}
}
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -1443,7 +1426,7 @@ defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
SSE_CVT_SS2SI_32>,
XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}",
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_64>,
XS, VEX, VEX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
@@ -1451,7 +1434,7 @@ defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
SSE_CVT_SD2SI>,
XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}",
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
SSE_CVT_SD2SI>,
XD, VEX, VEX_W, VEX_LIG;
@@ -1465,11 +1448,14 @@ defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">,
XS, VEX_4V, VEX_W, VEX_LIG;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">,
XD, VEX_4V, VEX_LIG;
-defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
- XD, VEX_4V, VEX_LIG;
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
XD, VEX_4V, VEX_W, VEX_LIG;
+def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SDrr FR64:$dst, FR64:$src1, GR32:$src)>;
+def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+
let Predicates = [HasAVX], AddedComplexity = 1 in {
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -1519,14 +1505,14 @@ defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
// and/or XMM operand(s).
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm>;
+ [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>;
}
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -1548,30 +1534,31 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
itins.rm>;
}
-defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
+defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
+ int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si{l}",
+ SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si",
- SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
+ int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si{q}",
+ SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
+ sdmem, sse_load_f64, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
- f128mem, load, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+ sdmem, sse_load_f64, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss",
SSE_CVT_Scalar, 0>, XS, VEX_4V;
defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss",
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
SSE_CVT_Scalar, 0>, XS, VEX_4V,
VEX_W;
defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd",
SSE_CVT_Scalar, 0>, XD, VEX_4V;
defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd",
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
SSE_CVT_Scalar, 0>, XD,
VEX_4V, VEX_W;
@@ -1587,96 +1574,71 @@ let Constraints = "$src1 = $dst" in {
"cvtsi2sd", SSE_CVT_Scalar>, XD;
defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64,
- "cvtsi2sd", SSE_CVT_Scalar>, XD, REX_W;
+ "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W;
}
/// SSE 1 Only
// Aliases for intrinsics
defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
- f32mem, load, "cvttss2si",
+ ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS, VEX;
defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse_cvttss2si64, f32mem, load,
- "cvttss2si", SSE_CVT_SS2SI_64>,
- XS, VEX, VEX_W;
+ int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+ "cvttss2si{q}", SSE_CVT_SS2SI_64>,
+ XS, VEX, VEX_W;
defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
- XD, VEX;
+ sdmem, sse_load_f64, "cvttsd2si",
+ SSE_CVT_SD2SI>, XD, VEX;
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse2_cvttsd2si64, f128mem, load,
- "cvttsd2si", SSE_CVT_SD2SI>,
- XD, VEX, VEX_W;
+ int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+ "cvttsd2si{q}", SSE_CVT_SD2SI>,
+ XD, VEX, VEX_W;
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
- f32mem, load, "cvttss2si",
+ ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS;
defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse_cvttss2si64, f32mem, load,
- "cvttss2si{q}", SSE_CVT_SS2SI_64>,
- XS, REX_W;
+ int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+ "cvttss2si{q}", SSE_CVT_SS2SI_64>, XS, REX_W;
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
- XD;
+ sdmem, sse_load_f64, "cvttsd2si",
+ SSE_CVT_SD2SI>, XD;
defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse2_cvttsd2si64, f128mem, load,
- "cvttsd2si{q}", SSE_CVT_SD2SI>,
- XD, REX_W;
-
-let Pattern = []<dag>, neverHasSideEffects = 1 in {
-defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
- "cvtss2si{l}\t{$src, $dst|$dst, $src}",
- SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
-defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
- "cvtss2si\t{$src, $dst|$dst, $src}",
- SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
-defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
+ int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+ "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+
+defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ ssmem, sse_load_f32, "cvtss2si{l}",
+ SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
+defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+ ssmem, sse_load_f32, "cvtss2si{q}",
+ SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
+
+defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ ssmem, sse_load_f32, "cvtss2si{l}",
+ SSE_CVT_SS2SI_32>, XS;
+defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+ ssmem, sse_load_f32, "cvtss2si{q}",
+ SSE_CVT_SS2SI_64>, XS, REX_W;
+
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>, TB, VEX,
- Requires<[HasAVX]>;
-defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
+ SSEPackedSingle, SSE_CVT_PS>,
+ TB, VEX, Requires<[HasAVX]>;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>, TB, VEX,
- Requires<[HasAVX]>;
-}
-
-let Pattern = []<dag>, neverHasSideEffects = 1 in {
-defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
- "cvtss2si{l}\t{$src, $dst|$dst, $src}",
- SSE_CVT_SS2SI_32>, XS;
-defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
- "cvtss2si{q}\t{$src, $dst|$dst, $src}",
- SSE_CVT_SS2SI_64>, XS, REX_W;
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>, TB,
- Requires<[HasSSE2]>;
-}
+ SSEPackedSingle, SSE_CVT_PS>,
+ TB, VEX, Requires<[HasAVX]>;
-let Predicates = [HasAVX] in {
- def : Pat<(int_x86_sse_cvtss2si VR128:$src),
- (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
- def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
- (VCVTSS2SIrm addr:$src)>;
- def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
- (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
- def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
- (VCVTSS2SI64rm addr:$src)>;
-}
-
-let Predicates = [HasSSE1] in {
- def : Pat<(int_x86_sse_cvtss2si VR128:$src),
- (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
- def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
- (CVTSS2SIrm addr:$src)>;
- def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
- (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
- def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
- (CVTSS2SI64rm addr:$src)>;
-}
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle, SSE_CVT_PS>,
+ TB, Requires<[HasSSE2]>;
/// SSE 2 Only
// Convert scalar double to scalar single
+let neverHasSideEffects = 1 in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
@@ -1687,6 +1649,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+}
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
Requires<[HasAVX]>;
@@ -1702,17 +1665,37 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
XD,
Requires<[HasSSE2, OptForSize]>;
-defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
- SSE_CVT_Scalar, 0>,
- XS, VEX_4V;
-let Constraints = "$src1 = $dst" in
-defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
- SSE_CVT_Scalar>, XS;
+def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>;
+def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
+ VR128:$src1, sse_load_f64:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>;
+
+let Constraints = "$src1 = $dst" in {
+def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XD, Requires<[HasSSE2]>;
+def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
+ VR128:$src1, sse_load_f64:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasSSE2]>;
+}
// Convert scalar single to scalar double
// SSE2 instructions with XS prefix
+let neverHasSideEffects = 1 in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1724,19 +1707,21 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+}
-let Predicates = [HasAVX] in {
+let AddedComplexity = 1 in { // give AVX priority
def : Pat<(f64 (fextend FR32:$src)),
- (VCVTSS2SDrr FR32:$src, FR32:$src)>;
+ (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>;
def : Pat<(fextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
-}
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>,
- Requires<[HasAVX, OptForSpeed]>;
+ def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+ def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
+ Requires<[HasAVX, OptForSpeed]>;
+} // AddedComplexity = 1
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
@@ -1762,67 +1747,60 @@ def : Pat<(extloadf32 addr:$src),
def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V,
- Requires<[HasAVX]>;
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- (load addr:$src2)))],
- IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V,
- Requires<[HasAVX]>;
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS,
- Requires<[HasSSE2]>;
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>;
def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- (load addr:$src2)))],
- IIC_SSE_CVT_Scalar_RM>, XS,
- Requires<[HasSSE2]>;
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>;
}
// Convert packed single/double fp to doubleword
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
IIC_SSE_CVT_PS_RR>, VEX;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2dq_256 VR256:$src))],
IIC_SSE_CVT_PS_RR>, VEX;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX;
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
IIC_SSE_CVT_PS_RR>;
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>;
-let Predicates = [HasAVX] in {
- def : Pat<(int_x86_sse2_cvtps2dq VR128:$src),
- (VCVTPS2DQrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)),
- (VCVTPS2DQrm addr:$src)>;
-}
-
-let Predicates = [HasSSE2] in {
- def : Pat<(int_x86_sse2_cvtps2dq VR128:$src),
- (CVTPS2DQrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)),
- (CVTPS2DQrm addr:$src)>;
-}
// Convert Packed Double FP to Packed DW Integers
let Predicates = [HasAVX] in {
@@ -1830,77 +1808,74 @@ let Predicates = [HasAVX] in {
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ VEX;
// XMM only
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
(VCVTPD2DQrr VR128:$dst, VR128:$src)>;
def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX;
// YMM only
def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX;
def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+ "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>,
+ VEX, VEX_L;
def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
(VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
}
def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>;
def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
IIC_SSE_CVT_PD_RR>;
-let Predicates = [HasAVX] in {
- def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src),
- (VCVTPD2DQrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)),
- (VCVTPD2DQXrm addr:$src)>;
-}
-
-let Predicates = [HasSSE2] in {
- def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src),
- (CVTPD2DQrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)),
- (CVTPD2DQrm addr:$src)>;
-}
-
// Convert with truncation packed single/double fp to doubleword
// SSE2 packed instructions with XS prefix
-def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
-def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
-def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
-def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>, VEX;
+def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
- (memopv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
-
-def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
-def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+ (memopv4f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, VEX;
+def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
+ IIC_SSE_CVT_PS_RR>, VEX;
+def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
+ (memopv8f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, VEX;
+
+def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>;
+def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>;
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
@@ -1952,16 +1927,6 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
(int_x86_sse2_cvttpd2dq VR128:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
-def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
-def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
-
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
@@ -1977,10 +1942,14 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [],
+ "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [],
+ "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
@@ -1992,82 +1961,82 @@ let Predicates = [HasAVX] in {
(VCVTTPD2DQYrm addr:$src)>;
} // Predicates = [HasAVX]
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
+ IIC_SSE_CVT_PD_RR>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memopv2f64 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>;
+
// Convert packed single to packed double
let Predicates = [HasAVX] in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB, VEX;
+let neverHasSideEffects = 1, mayLoad = 1 in
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, TB, VEX;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB, VEX;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PD_RM>, TB, VEX;
}
let Predicates = [HasSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}", [],
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB;
+let neverHasSideEffects = 1, mayLoad = 1 in
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, TB;
}
-let Predicates = [HasAVX] in {
- def : Pat<(int_x86_sse2_cvtps2pd VR128:$src),
- (VCVTPS2PDrr VR128:$src)>;
-}
-
-let Predicates = [HasSSE2] in {
- def : Pat<(int_x86_sse2_cvtps2pd VR128:$src),
- (CVTPS2PDrr VR128:$src)>;
-}
-
// Convert Packed DW Integers to Packed Double FP
let Predicates = [HasAVX] in {
-def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+let neverHasSideEffects = 1, mayLoad = 1 in
+def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ []>, VEX;
+def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX;
+def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvtdq2_pd_256
+ (bitconvert (memopv2i64 addr:$src))))]>, VEX;
+def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX;
}
-def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+let neverHasSideEffects = 1, mayLoad = 1 in
+def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>;
-def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
IIC_SSE_CVT_PD_RM>;
-// 128 bit register conversion intrinsics
-let Predicates = [HasAVX] in
-def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src),
- (VCVTDQ2PDrr VR128:$src)>;
-
-let Predicates = [HasSSE2] in
-def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src),
- (CVTDQ2PDrr VR128:$src)>;
-
// AVX 256-bit register conversion intrinsics
let Predicates = [HasAVX] in {
- def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
- (VCVTDQ2PDYrr VR128:$src)>;
- def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
- (VCVTDQ2PDYrm addr:$src)>;
-
- def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
- (VCVTPD2DQYrr VR256:$src)>;
- def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
- (VCVTPD2DQYrm addr:$src)>;
-
def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PDYrr VR128:$src)>;
def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
@@ -2079,48 +2048,44 @@ let Predicates = [HasAVX] in {
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
// XMM only
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
(VCVTPD2PSrr VR128:$dst, VR128:$src)>;
def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2psx\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2psx\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX;
// YMM only
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
IIC_SSE_CVT_PD_RR>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>;
-let Predicates = [HasAVX] in {
- def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src),
- (VCVTPD2PSrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)),
- (VCVTPD2PSXrm addr:$src)>;
-}
-
-let Predicates = [HasSSE2] in {
- def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src),
- (CVTPD2PSrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)),
- (CVTPD2PSrm addr:$src)>;
-}
-
// AVX 256-bit register conversion intrinsics
// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
// whenever possible to avoid declaring two versions of each one.
@@ -2130,38 +2095,26 @@ let Predicates = [HasAVX] in {
def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
(VCVTDQ2PSYrm addr:$src)>;
- def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
- (VCVTPD2PSYrr VR256:$src)>;
- def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
- (VCVTPD2PSYrm addr:$src)>;
-
- def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
- (VCVTPS2DQYrr VR256:$src)>;
- def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
- (VCVTPS2DQYrm addr:$src)>;
-
- def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
- (VCVTPS2PDYrr VR128:$src)>;
- def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
- (VCVTPS2PDYrm addr:$src)>;
-
- def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
- (VCVTTPD2DQYrr VR256:$src)>;
- def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
- (VCVTTPD2DQYrm addr:$src)>;
-
// Match fround and fextend for 128/256-bit conversions
def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
(VCVTPD2PSYrr VR256:$src)>;
def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
(VCVTPD2PSYrm addr:$src)>;
+ def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+ (VCVTPS2PDrr VR128:$src)>;
def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
(VCVTPS2PDYrr VR128:$src)>;
def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
(VCVTPS2PDYrm addr:$src)>;
}
+let Predicates = [HasSSE2] in {
+ // Match fextend for 128 conversions
+ def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+ (CVTPS2PDrr VR128:$src)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
//===----------------------------------------------------------------------===//
@@ -2593,17 +2546,13 @@ let Predicates = [HasAVX] in {
OpSize, VEX;
def : Pat<(i32 (X86fgetsign FR32:$src)),
- (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>;
+ (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(i64 (X86fgetsign FR32:$src)),
- (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>;
+ (VMOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(i32 (X86fgetsign FR64:$src)),
- (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>;
+ (VMOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>;
def : Pat<(i64 (X86fgetsign FR64:$src)),
- (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>;
+ (VMOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>;
// Assembler Only
def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
@@ -2628,17 +2577,17 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
SSEPackedDouble>, TB, OpSize;
def : Pat<(i32 (X86fgetsign FR32:$src)),
- (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>, Requires<[HasSSE1]>;
+ (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+ Requires<[HasSSE1]>;
def : Pat<(i64 (X86fgetsign FR32:$src)),
- (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>, Requires<[HasSSE1]>;
+ (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+ Requires<[HasSSE1]>;
def : Pat<(i32 (X86fgetsign FR64:$src)),
- (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>, Requires<[HasSSE2]>;
+ (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+ Requires<[HasSSE2]>;
def : Pat<(i64 (X86fgetsign FR64:$src)),
- (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>, Requires<[HasSSE2]>;
+ (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+ Requires<[HasSSE2]>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Logical Instructions
@@ -2923,7 +2872,8 @@ let isCommutable = 0 in {
basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, VEX_4V;
+ basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
+ VEX_4V;
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
VEX_4V, VEX_LIG;
@@ -2974,6 +2924,23 @@ let Constraints = "$src1 = $dst" in {
}
}
+let isCommutable = 1, isCodeGenOnly = 1 in {
+ defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V;
+ defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V;
+ let Constraints = "$src1 = $dst" in {
+ defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
+ defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
+ }
+}
+
/// Unop Arithmetic
/// In addition, we also have a special variant of the scalar form here to
/// represent the associated intrinsic operation. This form is unlike the
@@ -3236,34 +3203,30 @@ def : Pat<(f32 (X86frcp (load addr:$src))),
let Predicates = [HasAVX], AddedComplexity = 1 in {
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (VSQRTSSr (f32 (IMPLICIT_DEF)),
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
- sub_ss)>;
+ (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR32)),
+ VR128)>;
def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
(VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
- (VSQRTSDr (f64 (IMPLICIT_DEF)),
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)),
- sub_sd)>;
+ (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR64)),
+ VR128)>;
def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
(VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (VRSQRTSSr (f32 (IMPLICIT_DEF)),
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
- sub_ss)>;
+ (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR32)),
+ VR128)>;
def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
(VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
def : Pat<(int_x86_sse_rcp_ss VR128:$src),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (VRCPSSr (f32 (IMPLICIT_DEF)),
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
- sub_ss)>;
+ (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR32)),
+ VR128)>;
def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
(VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
}
@@ -4609,7 +4572,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
// Bitcast FR64 <-> GR64
//
let Predicates = [HasAVX] in
-def VMOV64toSDrm : SSDI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
VEX;
@@ -4622,7 +4585,7 @@ def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
IIC_SSE_MOVDQ>, VEX;
-def MOV64toSDrm : SSDI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
IIC_SSE_MOVDQ>;
@@ -5505,16 +5468,14 @@ let usesCustomInserter = 1 in {
def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
[(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
Requires<[HasSSE3]>;
-def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
- [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>,
- Requires<[HasSSE3]>;
}
let Uses = [EAX, ECX, EDX] in
def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>,
TB, Requires<[HasSSE3]>;
let Uses = [ECX, EAX] in
-def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [], IIC_SSE_MWAIT>,
+def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait",
+ [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>,
TB, Requires<[HasSSE3]>;
def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
@@ -6906,81 +6867,42 @@ let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
}
// Packed Compare Implicit Length Strings, Return Index
-let Defs = [ECX, EFLAGS] in {
- multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> {
+let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in {
+ multiclass SS42AI_pcmpistri<string asm> {
def rr : SS42AI<0x63, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
- (implicit EFLAGS)]>, OpSize;
+ []>, OpSize;
+ let mayLoad = 1 in
def rm : SS42AI<0x63, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
- (implicit EFLAGS)]>, OpSize;
+ []>, OpSize;
}
}
-let Predicates = [HasAVX] in {
-defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">,
- VEX;
-}
-
-defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
-defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
-defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
-defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
-defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
-defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
+let Predicates = [HasAVX] in
+defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
+defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
// Packed Compare Explicit Length Strings, Return Index
-let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
- multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> {
+let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
+ multiclass SS42AI_pcmpestri<string asm> {
def rr : SS42AI<0x61, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
- (implicit EFLAGS)]>, OpSize;
+ []>, OpSize;
+ let mayLoad = 1 in
def rm : SS42AI<0x61, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- [(set ECX,
- (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
- (implicit EFLAGS)]>, OpSize;
+ []>, OpSize;
}
}
-let Predicates = [HasAVX] in {
-defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">,
- VEX;
-}
-
-defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
-defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
-defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
-defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
-defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
-defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
+let Predicates = [HasAVX] in
+defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
+defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
//===----------------------------------------------------------------------===//
// SSE4.2 - CRC Instructions
@@ -7727,24 +7649,18 @@ let Predicates = [HasAVX2] in {
// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
- (VBROADCASTSSrr
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
+ (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
- (VBROADCASTSSYrr
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
+ (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
- (VBROADCASTSDYrr
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>;
+ (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
- (VBROADCASTSSrr
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
+ (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
- (VBROADCASTSSYrr
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
+ (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
- (VBROADCASTSDYrr
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>;
+ (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
}
}
@@ -7768,46 +7684,26 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
let AddedComplexity = 20 in {
// 128bit broadcasts:
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
- (VPSHUFDri
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0)>;
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
(VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
- (VPSHUFDri
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0),
- sub_xmm),
- (VPSHUFDri
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss),
- 0), 1)>;
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
- (VPSHUFDri
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
- 0x44),
- sub_xmm),
- (VPSHUFDri
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
- 0x44), 1)>;
+ (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
- (VPSHUFDri
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0)>;
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>;
def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
(VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- (VPSHUFDri
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0),
- sub_xmm),
- (VPSHUFDri
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss),
- 0), 1)>;
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>;
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
- (VPSHUFDri
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd),
- 0x44),
- sub_xmm),
- (VPSHUFDri
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd),
- 0x44), 1)>;
+ (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
}
}
@@ -8052,7 +7948,7 @@ multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
[]>, VEX_4VOp3, VEX_L;
}
-let Constraints = "$src1 = $dst, $mask = $mask_wb" in {
+let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in {
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W;
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W;
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 0168d12..7ac4cec 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -532,6 +532,15 @@ uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) {
#endif
}
+template<typename T> void addUnaligned(void *Pos, T Delta) {
+ T Value;
+ std::memcpy(reinterpret_cast<char*>(&Value), reinterpret_cast<char*>(Pos),
+ sizeof(T));
+ Value += Delta;
+ std::memcpy(reinterpret_cast<char*>(Pos), reinterpret_cast<char*>(&Value),
+ sizeof(T));
+}
+
/// relocate - Before the JIT can run a block of code that has been emitted,
/// it must rewrite the code to contain the actual addresses of any
/// referenced global symbols.
@@ -545,24 +554,24 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
// PC relative relocation, add the relocated value to the value already in
// memory, after we adjust it for where the PC is.
ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal();
- *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ addUnaligned<unsigned>(RelocPos, ResultPtr);
break;
}
case X86::reloc_picrel_word: {
// PIC base relative relocation, add the relocated value to the value
// already in memory, after we adjust it for where the PIC base is.
ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
- *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ addUnaligned<unsigned>(RelocPos, ResultPtr);
break;
}
case X86::reloc_absolute_word:
case X86::reloc_absolute_word_sext:
// Absolute relocation, just add the relocated value to the value already
// in memory.
- *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ addUnaligned<unsigned>(RelocPos, ResultPtr);
break;
case X86::reloc_absolute_dword:
- *((intptr_t*)RelocPos) += ResultPtr;
+ addUnaligned<intptr_t>(RelocPos, ResultPtr);
break;
}
}
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index c76d3cc..d7c08df 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -65,7 +65,7 @@ namespace llvm {
/// referenced global symbols.
virtual void relocate(void *Function, MachineRelocation *MR,
unsigned NumRelocs, unsigned char* GOTBase);
-
+
/// allocateThreadLocalMemory - Each target has its own way of
/// handling thread local variables. This method returns a value only
/// meaningful to the target.
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index df7507c..9c0ce4e 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -46,12 +46,12 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference");
SmallString<128> Name;
-
+
if (!MO.isGlobal()) {
assert(MO.isSymbol());
Name += MAI.getGlobalPrefix();
Name += MO.getSymbolName();
- } else {
+ } else {
const GlobalValue *GV = MO.getGlobal();
bool isImplicitlyPrivate = false;
if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
@@ -59,7 +59,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
isImplicitlyPrivate = true;
-
+
Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
}
@@ -110,7 +110,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
getMachOMMI().getFnStubEntry(Sym);
if (StubSym.getPointer())
return Sym;
-
+
if (MO.isGlobal()) {
StubSym =
MachineModuleInfoImpl::
@@ -135,7 +135,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
// lot of extra uniquing.
const MCExpr *Expr = 0;
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
-
+
switch (MO.getTargetFlags()) {
default: llvm_unreachable("Unknown target flag on GV operand");
case X86II::MO_NO_FLAG: // No flag.
@@ -144,7 +144,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_DLLIMPORT:
case X86II::MO_DARWIN_STUB:
break;
-
+
case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
case X86II::MO_TLVP_PIC_BASE:
Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
@@ -173,7 +173,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
Expr = MCSymbolRefExpr::Create(Sym, Ctx);
// Subtract the pic base.
- Expr = MCBinaryExpr::CreateSub(Expr,
+ Expr = MCBinaryExpr::CreateSub(Expr,
MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
Ctx);
if (MO.isJTI() && MAI.hasSetDirective()) {
@@ -187,10 +187,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
}
break;
}
-
+
if (Expr == 0)
Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
-
+
if (!MO.isJTI() && MO.getOffset())
Expr = MCBinaryExpr::CreateAdd(Expr,
MCConstantExpr::Create(MO.getOffset(), Ctx),
@@ -211,10 +211,10 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
// Convert registers in the addr mode according to subreg64.
for (unsigned i = 0; i != 4; ++i) {
if (!MI->getOperand(OpNo+i).isReg()) continue;
-
+
unsigned Reg = MI->getOperand(OpNo+i).getReg();
if (Reg == 0) continue;
-
+
MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
}
}
@@ -280,7 +280,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
return;
// Check whether this is an absolute address.
- // FIXME: We know TLVP symbol refs aren't, but there should be a better way
+ // FIXME: We know TLVP symbol refs aren't, but there should be a better way
// to do this here.
bool Absolute = true;
if (Inst.getOperand(AddrOp).isExpr()) {
@@ -289,7 +289,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
Absolute = false;
}
-
+
if (Absolute &&
(Inst.getOperand(AddrBase + 0).getReg() != 0 ||
Inst.getOperand(AddrBase + 2).getReg() != 0 ||
@@ -306,10 +306,10 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
-
+
MCOperand MCOp;
switch (MO.getType()) {
default:
@@ -345,10 +345,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
// Ignore call clobbers.
continue;
}
-
+
OutMI.addOperand(MCOp);
}
-
+
// Handle a few special cases to eliminate operand modifiers.
ReSimplify:
switch (OutMI.getOpcode()) {
@@ -425,7 +425,7 @@ ReSimplify:
case X86::TAILJMPd:
case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
}
-
+
MCOperand Saved = OutMI.getOperand(0);
OutMI = MCInst();
OutMI.setOpcode(Opcode);
@@ -445,7 +445,7 @@ ReSimplify:
case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
-
+
// The assembler backend wants to see branches in their small form and relax
// them to their large form. The JIT can only handle the large form because
// it does not do relaxation. For now, translate the large form to the
@@ -688,7 +688,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// call "L1$pb"
// "L1$pb":
// popl %esi
-
+
// Emit the call.
MCSymbol *PICBase = MF->getPICBaseSymbol();
TmpInst.setOpcode(X86::CALLpcrel32);
@@ -697,43 +697,43 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
OutContext)));
OutStreamer.EmitInstruction(TmpInst);
-
+
// Emit the label.
OutStreamer.EmitLabel(PICBase);
-
+
// popl $reg
TmpInst.setOpcode(X86::POP32r);
TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
OutStreamer.EmitInstruction(TmpInst);
return;
}
-
+
case X86::ADD32ri: {
// Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
break;
-
+
// Okay, we have something like:
// EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
-
+
// For this, we want to print something like:
// MYGLOBAL + (. - PICBASE)
// However, we can't generate a ".", so just emit a new label here and refer
// to it.
MCSymbol *DotSym = OutContext.CreateTempSymbol();
OutStreamer.EmitLabel(DotSym);
-
+
// Now that we have emitted the label, lower the complex operand expression.
MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
-
+
const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
const MCExpr *PICBase =
MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
-
- DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
+
+ DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
DotExpr, OutContext);
-
+
MCInst TmpInst;
TmpInst.setOpcode(X86::ADD32ri);
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
@@ -743,7 +743,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
}
-
+
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
OutStreamer.EmitInstruction(TmpInst);
diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h
index 40df3db..b4d4cfd 100644
--- a/lib/Target/X86/X86MCInstLower.h
+++ b/lib/Target/X86/X86MCInstLower.h
@@ -25,7 +25,7 @@ namespace llvm {
class Mangler;
class TargetMachine;
class X86AsmPrinter;
-
+
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
MCContext &Ctx;
@@ -37,12 +37,12 @@ class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
public:
X86MCInstLower(Mangler *mang, const MachineFunction &MF,
X86AsmPrinter &asmprinter);
-
+
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
-
+
private:
MachineModuleInfoMachO &getMachOMMI() const;
};
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index f83a525..78d20ce 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -24,7 +24,7 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
virtual void anchor();
/// ForceFramePointer - True if the function is required to use of frame
- /// pointer for reasons other than it containing dynamic allocation or
+ /// pointer for reasons other than it containing dynamic allocation or
/// that FP eliminatation is turned off. For example, Cygwin main function
/// contains stack pointer re-alignment code which requires FP.
bool ForceFramePointer;
@@ -83,7 +83,7 @@ public:
VarArgsFPOffset(0),
ArgumentStackSize(0),
NumLocalDynamics(0) {}
-
+
explicit X86MachineFunctionInfo(MachineFunction &MF)
: ForceFramePointer(false),
CalleeSavedFrameSize(0),
@@ -99,7 +99,7 @@ public:
ArgumentStackSize(0),
NumLocalDynamics(0) {}
- bool getForceFramePointer() const { return ForceFramePointer;}
+ bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index acf53f8..877b8f6 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -72,13 +72,15 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
SlotSize = 8;
StackPtr = X86::RSP;
FramePtr = X86::RBP;
- BasePtr = X86::RBX;
} else {
SlotSize = 4;
StackPtr = X86::ESP;
FramePtr = X86::EBP;
- BasePtr = X86::EBX;
}
+ // Use a callee-saved register as the base pointer. These registers must
+ // not conflict with any ABI requirements. For example, in 32-bit mode PIC
+ // requires GOT in the EBX register before function calls via PLT GOT pointer.
+ BasePtr = Is64Bit ? X86::RBX : X86::ESI;
}
/// getCompactUnwindRegNum - This function maps the register to the number for
@@ -366,7 +368,7 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
if (!EnableBasePointer)
return false;
- // When we need stack realignment and there are dynamic allocas, we can't
+ // When we need stack realignment and there are dynamic allocas, we can't
// reference off of the stack pointer, so we reserve a base pointer.
if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
return true;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index ae2d4d0..edc7184 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -23,9 +23,6 @@ let Namespace = "X86" in {
def sub_8bit_hi : SubRegIndex;
def sub_16bit : SubRegIndex;
def sub_32bit : SubRegIndex;
-
- def sub_ss : SubRegIndex;
- def sub_sd : SubRegIndex;
def sub_xmm : SubRegIndex;
@@ -163,8 +160,6 @@ let Namespace = "X86" in {
def FP6 : Register<"fp6">;
// XMM Registers, used by the various SSE instruction set extensions.
- // The sub_ss and sub_sd subregs are the same registers with another regclass.
- let CompositeIndices = [(sub_ss), (sub_sd)] in {
def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
@@ -184,7 +179,7 @@ let Namespace = "X86" in {
def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
- }}
+ } // CostPerUse
// YMM Registers, used by AVX instructions
let SubRegIndices = [sub_xmm] in {
diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h
index 857becf..0333056 100644
--- a/lib/Target/X86/X86Relocations.h
+++ b/lib/Target/X86/X86Relocations.h
@@ -21,7 +21,7 @@ namespace llvm {
/// RelocationType - An enum for the x86 relocation codes. Note that
/// the terminology here doesn't follow x86 convention - word means
/// 32-bit and dword means 64-bit. The relocations will be treated
- /// by JIT or ObjectCode emitters, this is transparent to the x86 code
+ /// by JIT or ObjectCode emitters, this is transparent to the x86 code
/// emitter but JIT and ObjectCode will treat them differently
enum RelocationType {
/// reloc_pcrel_word - PC relative relocation, add the relocated value to
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 7c6788f..00edcbc 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -38,7 +38,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
// If to a segment-relative address space, use the default lowering.
if (DstPtrInfo.getAddrSpace() >= 256)
return SDValue();
-
+
// If not DWORD aligned or size is more than the threshold, call the library.
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index e6e9c56..9087852 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -39,10 +39,10 @@ unsigned char X86Subtarget::
ClassifyBlockAddressReference() const {
if (isPICStyleGOT()) // 32-bit ELF targets.
return X86II::MO_GOTOFF;
-
+
if (isPICStyleStubPIC()) // Darwin/32 in PIC mode.
return X86II::MO_PIC_BASE_OFFSET;
-
+
// Direct static reference to label.
return X86II::MO_NO_FLAG;
}
@@ -69,7 +69,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// Large model never uses stubs.
if (TM.getCodeModel() == CodeModel::Large)
return X86II::MO_NO_FLAG;
-
+
if (isTargetDarwin()) {
// If symbol visibility is hidden, the extra load is not needed if
// target is x86-64 or the symbol is definitely defined in the current
@@ -87,18 +87,18 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
return X86II::MO_NO_FLAG;
}
-
+
if (isPICStyleGOT()) { // 32-bit ELF targets.
// Extra load is needed for all externally visible.
if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
return X86II::MO_GOTOFF;
return X86II::MO_GOT;
}
-
+
if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode.
// Determine whether we have a stub reference and/or whether the reference
// is relative to the PIC base or not.
-
+
// If this is a strong reference to a definition, it is definitely not
// through a stub.
if (!isDecl && !GV->isWeakForLinker())
@@ -108,26 +108,26 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// normal $non_lazy_ptr stub because this symbol might be resolved late.
if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
-
+
// If symbol visibility is hidden, we have a stub for common symbol
// references and external declarations.
if (isDecl || GV->hasCommonLinkage()) {
// Hidden $non_lazy_ptr reference.
return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
}
-
+
// Otherwise, no stub.
return X86II::MO_PIC_BASE_OFFSET;
}
-
+
if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode.
// Determine whether we have a stub reference.
-
+
// If this is a strong reference to a definition, it is definitely not
// through a stub.
if (!isDecl && !GV->isWeakForLinker())
return X86II::MO_NO_FLAG;
-
+
// Unless we have a symbol with hidden visibility, we have to go through a
// normal $non_lazy_ptr stub because this symbol might be resolved late.
if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
@@ -136,7 +136,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// Otherwise, no stub.
return X86II::MO_NO_FLAG;
}
-
+
// Direct static reference to global.
return X86II::MO_NO_FLAG;
}
@@ -246,8 +246,11 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
}
// If it's Nehalem, unaligned memory access is fast.
- // FIXME: Nehalem is family 6. Also include Westmere and later processors?
- if (Family == 15 && Model == 26) {
+ // Include Westmere and Sandy Bridge as well.
+ // FIXME: add later processors.
+ if (IsIntel && ((Family == 6 && Model == 26) ||
+ (Family == 6 && Model == 44) ||
+ (Family == 6 && Model == 42))) {
IsUAMemFast = true;
ToggleFeature(X86::FeatureFastUAMem);
}
@@ -315,7 +318,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
}
X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS,
+ const std::string &FS,
unsigned StackAlignOverride, bool is64Bit)
: X86GenSubtargetInfo(TT, CPU, FS)
, X86ProcFamily(Others)
@@ -397,10 +400,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
}
}
- if (X86ProcFamily == IntelAtom) {
+ if (X86ProcFamily == IntelAtom)
PostRAScheduler = true;
- InstrItins = getInstrItineraryForCPU(CPUName);
- }
+
+ InstrItins = getInstrItineraryForCPU(CPUName);
// It's important to keep the MCSubtargetInfo feature bits in sync with
// target data structure which is shared with MC code emitter, etc.
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 1af585f..6841c5b 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -55,7 +55,7 @@ protected:
/// X86ProcFamily - X86 processor family: Intel Atom, and others
X86ProcFamilyEnum X86ProcFamily;
-
+
/// PICStyle - Which PIC style to use
///
PICStyles::Style PICStyle;
@@ -149,7 +149,7 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
-
+
/// Instruction itineraries for scheduling
InstrItineraryData InstrItins;
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index e4f567f..80b75dc 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -222,7 +222,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
DebugLoc dl = I->getDebugLoc();
bool isControlFlow = MI->isCall() || MI->isReturn();
- // Shortcut: don't need to check regular instructions in dirty state.
+ // Shortcut: don't need to check regular instructions in dirty state.
if (!isControlFlow && CurState == ST_DIRTY)
continue;
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 3dbc3b9..a4e5647 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -371,8 +371,3 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
false));
}
}
-
-void XCoreFrameLowering::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
-
-}
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index afa2773..db1bbb6 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -44,8 +44,6 @@ namespace llvm {
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
//! Stack slot size (4 bytes)
static int stackSlotSize() {
return 4;
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 60ce958..6d950d2 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -352,7 +352,8 @@ static bool IsSafeComputationToRemove(Value *V) {
return true;
if (!V->hasOneUse())
return false;
- if (isa<LoadInst>(V) || isa<Argument>(V) || isa<GlobalValue>(V))
+ if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
+ isa<GlobalValue>(V))
return false;
if (isAllocationFn(V))
return true;
@@ -442,12 +443,14 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV) {
Dead[i].second->eraseFromParent();
Instruction *I = Dead[i].first;
do {
+ if (isAllocationFn(I))
+ break;
Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
if (!J)
break;
I->eraseFromParent();
I = J;
- } while (!isAllocationFn(I));
+ } while (1);
I->eraseFromParent();
}
}
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index d8e8cf7..80bfc1c 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -27,6 +27,7 @@
#include "llvm/Instructions.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
+#include "llvm/TypeFinder.h"
#include "llvm/ValueSymbolTable.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
@@ -175,8 +176,8 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
// Strip any named types of their names.
static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
- std::vector<StructType*> StructTypes;
- M.findUsedStructTypes(StructTypes);
+ TypeFinder StructTypes;
+ StructTypes.run(M, false);
for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
StructType *STy = StructTypes[i];
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c1d9d01..cbe1ca4 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -51,8 +51,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// if the size is something we can handle with a single primitive load/store.
// A single load+store correctly handles overlapping memory in the memmove
// case.
- unsigned Size = MemOpLength->getZExtValue();
- if (Size == 0) return MI; // Delete this mem transfer.
+ uint64_t Size = MemOpLength->getLimitedValue();
+ assert(Size && "0-sized memory transfering should be removed already.");
if (Size > 8 || (Size&(Size-1)))
return 0; // If not 1/2/4/8 bytes, exit.
@@ -133,11 +133,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
return 0;
- uint64_t Len = LenC->getZExtValue();
+ uint64_t Len = LenC->getLimitedValue();
Alignment = MI->getAlignment();
-
- // If the length is zero, this is a no-op
- if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+ assert(Len && "0-sized memory setting should be removed already.");
// memset(s,c,n) -> store s, c (for n=1,2,4,8)
if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
@@ -795,7 +793,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
if (CI->getCalledFunction() == 0) return 0;
InstCombineFortifiedLibCalls Simplifier(this);
- Simplifier.fold(CI, TD);
+ Simplifier.fold(CI, TD, TLI);
return Simplifier.NewInstruction;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7076d88..c3fc18c 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
@@ -2824,7 +2825,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
case ICmpInst::ICMP_UGE:
// (float)int >= -4.4 --> true
// (float)int >= 4.4 --> int > 4
- if (!RHS.isNegative())
+ if (RHS.isNegative())
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
Pred = ICmpInst::ICMP_UGT;
break;
@@ -2985,6 +2986,44 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return Res;
}
break;
+ case Instruction::Call: {
+ CallInst *CI = cast<CallInst>(LHSI);
+ LibFunc::Func Func;
+ // Various optimization for fabs compared with zero.
+ if (RHSC->isNullValue() && CI->getCalledFunction() &&
+ TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
+ TLI->has(Func)) {
+ if (Func == LibFunc::fabs || Func == LibFunc::fabsf ||
+ Func == LibFunc::fabsl) {
+ switch (I.getPredicate()) {
+ default: break;
+ // fabs(x) < 0 --> false
+ case FCmpInst::FCMP_OLT:
+ return ReplaceInstUsesWith(I, Builder->getFalse());
+ // fabs(x) > 0 --> x != 0
+ case FCmpInst::FCMP_OGT:
+ return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) <= 0 --> x == 0
+ case FCmpInst::FCMP_OLE:
+ return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) >= 0 --> !isnan(x)
+ case FCmpInst::FCMP_OGE:
+ return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) == 0 --> x == 0
+ // fabs(x) != 0 --> x != 0
+ case FCmpInst::FCMP_OEQ:
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_ONE:
+ case FCmpInst::FCMP_UNE:
+ return new FCmpInst(I.getPredicate(), CI->getArgOperand(0),
+ RHSC);
+ }
+ }
+ }
+ }
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index c485844..6ecb4c5 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -20,7 +20,154 @@
#include "llvm/ADT/Statistic.h"
using namespace llvm;
-STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
+
+/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
+/// some part of a constant global variable. This intentionally only accepts
+/// constant expressions because we can't rewrite arbitrary instructions.
+static bool pointsToConstantGlobal(Value *V) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return GV->isConstant();
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr)
+ return pointsToConstantGlobal(CE->getOperand(0));
+ return false;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
+/// pointer to an alloca. Ignore any reads of the pointer, return false if we
+/// see any stores or other unknown uses. If we see pointer arithmetic, keep
+/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
+/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
+/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// can optimize this.
+static bool
+isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+ SmallVectorImpl<Instruction *> &ToDelete,
+ bool IsOffset = false) {
+ // We track lifetime intrinsics as we encounter them. If we decide to go
+ // ahead and replace the value with the global, this lets the caller quickly
+ // eliminate the markers.
+
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+ User *U = cast<Instruction>(*UI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Ignore non-volatile loads, they are always ok.
+ if (!LI->isSimple()) return false;
+ continue;
+ }
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ // If uses of the bitcast are ok, we are ok.
+ if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset))
+ return false;
+ continue;
+ }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // If the GEP has all zero indices, it doesn't offset the pointer. If it
+ // doesn't, it does.
+ if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
+ IsOffset || !GEP->hasAllZeroIndices()))
+ return false;
+ continue;
+ }
+
+ if (CallSite CS = U) {
+ // If this is the function being called then we treat it like a load and
+ // ignore it.
+ if (CS.isCallee(UI))
+ continue;
+
+ // If this is a readonly/readnone call site, then we know it is just a
+ // load (but one that potentially returns the value itself), so we can
+ // ignore it if we know that the value isn't captured.
+ unsigned ArgNo = CS.getArgumentNo(UI);
+ if (CS.onlyReadsMemory() &&
+ (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
+ continue;
+
+ // If this is being passed as a byval argument, the caller is making a
+ // copy, so it is only a read of the alloca.
+ if (CS.isByValArgument(ArgNo))
+ continue;
+ }
+
+ // Lifetime intrinsics can be handled by the caller.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ assert(II->use_empty() && "Lifetime markers have no result to use!");
+ ToDelete.push_back(II);
+ continue;
+ }
+ }
+
+ // If this is isn't our memcpy/memmove, reject it as something we can't
+ // handle.
+ MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
+ if (MI == 0)
+ return false;
+
+ // If the transfer is using the alloca as a source of the transfer, then
+ // ignore it since it is a load (unless the transfer is volatile).
+ if (UI.getOperandNo() == 1) {
+ if (MI->isVolatile()) return false;
+ continue;
+ }
+
+ // If we already have seen a copy, reject the second one.
+ if (TheCopy) return false;
+
+ // If the pointer has been offset from the start of the alloca, we can't
+ // safely handle this.
+ if (IsOffset) return false;
+
+ // If the memintrinsic isn't using the alloca as the dest, reject it.
+ if (UI.getOperandNo() != 0) return false;
+
+ // If the source of the memcpy/move is not a constant global, reject it.
+ if (!pointsToConstantGlobal(MI->getSource()))
+ return false;
+
+ // Otherwise, the transform is safe. Remember the copy instruction.
+ TheCopy = MI;
+ }
+ return true;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
+/// modified by a copy from a constant global. If we can prove this, we can
+/// replace any uses of the alloca with uses of the global directly.
+static MemTransferInst *
+isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
+ SmallVectorImpl<Instruction *> &ToDelete) {
+ MemTransferInst *TheCopy = 0;
+ if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
+ return TheCopy;
+ return 0;
+}
+
+/// getPointeeAlignment - Compute the minimum alignment of the value pointed
+/// to by the given pointer.
+static unsigned getPointeeAlignment(Value *V, const TargetData &TD) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ (CE->getOpcode() == Instruction::GetElementPtr &&
+ cast<GEPOperator>(CE)->hasAllZeroIndices()))
+ return getPointeeAlignment(CE->getOperand(0), TD);
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ if (!GV->isDeclaration())
+ return TD.getPreferredAlignment(GV);
+
+ if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
+ return TD.getABITypeAlignment(PT->getElementType());
+
+ return 0;
+}
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
@@ -113,6 +260,29 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
}
}
+ // Check to see if this allocation is only modified by a memcpy/memmove from
+ // a constant global whose alignment is equal to or exceeds that of the
+ // allocation. If this is the case, we can change all users to use
+ // the constant global instead. This is commonly produced by the CFE by
+ // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+ // is only subsequently read.
+ SmallVector<Instruction *, 4> ToDelete;
+ if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
+ if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
+ DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
+ DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
+ for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+ EraseInstFromFunction(*ToDelete[i]);
+ Constant *TheSrc = cast<Constant>(Copy->getSource());
+ Instruction *NewI
+ = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
+ AI.getType()));
+ EraseInstFromFunction(*Copy);
+ ++NumGlobalCopies;
+ return NewI;
+ }
+ }
+
// At last, use the generic allocation site handler to aggressively remove
// unused allocas.
return visitAllocSite(AI);
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index eb9945b..291e800 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -881,12 +881,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
if (TrueSI->getCondition() == CondVal) {
+ if (SI.getTrueValue() == TrueSI->getTrueValue())
+ return 0;
SI.setOperand(1, TrueSI->getTrueValue());
return &SI;
}
}
if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
if (FalseSI->getCondition() == CondVal) {
+ if (SI.getFalseValue() == FalseSI->getFalseValue())
+ return 0;
SI.setOperand(2, FalseSI->getFalseValue());
return &SI;
}
@@ -899,5 +903,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
+ if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
+ unsigned VWidth = VecTy->getNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) {
+ if (V != &SI)
+ return ReplaceInstUsesWith(SI, V);
+ return &SI;
+ }
+ }
+
return 0;
}
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 125c74a..54be8ed 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -989,6 +989,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
break;
}
+ case Instruction::Select: {
+ APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
+ if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
+ for (unsigned i = 0; i < VWidth; i++) {
+ if (CV->getAggregateElement(i)->isNullValue())
+ LeftDemanded.clearBit(i);
+ else
+ RightDemanded.clearBit(i);
+ }
+ }
+
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
+ UndefElts2, Depth+1);
+ if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
+
+ // Output elements are undefined if both are undefined.
+ UndefElts &= UndefElts2;
+ break;
+ }
case Instruction::BitCast: {
// Vector->vector casts only.
VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
@@ -1074,6 +1097,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// like undef&0. The result is known zero, not undef.
UndefElts &= UndefElts2;
break;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ break;
case Instruction::Call: {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 3368026..06f4d2f 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -61,6 +61,8 @@ static const int kAsanCtorAndCtorPriority = 1;
static const char *kAsanReportErrorTemplate = "__asan_report_";
static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
+static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
+static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
static const char *kAsanInitName = "__asan_init";
static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
@@ -86,8 +88,8 @@ static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics",
cl::desc("instrument atomic instructions (rmw, cmpxchg)"),
cl::Hidden, cl::init(true));
-static cl::opt<bool> ClMergeCallbacks("asan-merge-callbacks",
- cl::desc("merge __asan_report_ callbacks to create fewer BBs"),
+static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path",
+ cl::desc("use instrumentation with slow path for all accesses"),
cl::Hidden, cl::init(false));
// This flag limits the number of instructions to be instrumented
// in any given BB. Normally, this should be set to unlimited (INT_MAX),
@@ -106,6 +108,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
// This flag may need to be replaced with -f[no]asan-globals.
static cl::opt<bool> ClGlobals("asan-globals",
cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInitializers("asan-initialization-order",
+ cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
static cl::opt<bool> ClMemIntrin("asan-memintrin",
cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
// This flag may need to be replaced with -fasan-blacklist.
@@ -145,24 +149,11 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
namespace {
-/// When the crash callbacks are merged, they receive some amount of arguments
-/// that are merged in a PHI node. This struct represents arguments from one
-/// call site.
-struct CrashArg {
- Value *Arg1;
- Value *Arg2;
-};
-
/// An object of this type is created while instrumenting every function.
struct AsanFunctionContext {
- AsanFunctionContext(Function &Function) : F(Function), CrashBlock() { }
+ AsanFunctionContext(Function &Function) : F(Function) { }
Function &F;
- // These are initially zero. If we require at least one call to
- // __asan_report_{read,write}{1,2,4,8,16}, an appropriate BB is created.
- BasicBlock *CrashBlock[2][kNumberOfAccessSizes];
- typedef SmallVector<CrashArg, 8> CrashArgsVec;
- CrashArgsVec CrashArgs[2][kNumberOfAccessSizes];
};
/// AddressSanitizer: instrument the code in module to find memory bugs.
@@ -175,7 +166,7 @@ struct AddressSanitizer : public ModulePass {
Value *Addr, uint32_t TypeSize, bool IsWrite);
Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
Value *ShadowValue, uint32_t TypeSize);
- Instruction *generateCrashCode(BasicBlock *BB, Value *Addr, Value *PC,
+ Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
bool IsWrite, size_t AccessSizeIndex);
bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI);
void instrumentMemIntrinsicParam(AsanFunctionContext &AFC,
@@ -184,6 +175,8 @@ struct AddressSanitizer : public ModulePass {
Instruction *InsertBefore, bool IsWrite);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
bool handleFunction(Module &M, Function &F);
+ void createInitializerPoisonCalls(Module &M,
+ Value *FirstAddr, Value *LastAddr);
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
bool poisonStackInFunction(Module &M, Function &F);
virtual bool runOnModule(Module &M);
@@ -191,7 +184,6 @@ struct AddressSanitizer : public ModulePass {
static char ID; // Pass identification, replacement for typeid
private:
-
uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
Type *Ty = AI->getAllocatedType();
uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
@@ -207,9 +199,12 @@ struct AddressSanitizer : public ModulePass {
}
Function *checkInterfaceFunction(Constant *FuncOrBitcast);
+ bool ShouldInstrumentGlobal(GlobalVariable *G);
void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
Value *ShadowBase, bool DoPoison);
bool LooksLikeCodeInBug11395(Instruction *I);
+ void FindDynamicInitializers(Module &M);
+ bool HasDynamicInitializer(GlobalVariable *G);
LLVMContext *C;
TargetData *TD;
@@ -226,6 +221,7 @@ struct AddressSanitizer : public ModulePass {
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
InlineAsm *EmptyAsm;
+ SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals;
};
} // namespace
@@ -267,24 +263,24 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
// ThenBlock
// Tail
//
-// If ThenBlock is zero, a new block is created and its terminator is returned.
-// Otherwize 0 is returned.
-static BranchInst *splitBlockAndInsertIfThen(Value *Cmp,
- BasicBlock *ThenBlock = 0) {
+// ThenBlock block is created and its terminator is returned.
+// If Unreachable, ThenBlock is terminated with UnreachableInst, otherwise
+// it is terminated with BranchInst to Tail.
+static TerminatorInst *splitBlockAndInsertIfThen(Value *Cmp, bool Unreachable) {
Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode();
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
TerminatorInst *HeadOldTerm = Head->getTerminator();
- BranchInst *CheckTerm = 0;
- if (!ThenBlock) {
- LLVMContext &C = Head->getParent()->getParent()->getContext();
- ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ LLVMContext &C = Head->getParent()->getParent()->getContext();
+ BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ TerminatorInst *CheckTerm;
+ if (Unreachable)
+ CheckTerm = new UnreachableInst(C, ThenBlock);
+ else
CheckTerm = BranchInst::Create(Tail, ThenBlock);
- }
BranchInst *HeadNewTerm =
BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
-
return CheckTerm;
}
@@ -336,7 +332,7 @@ bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
Value *Cmp = IRB.CreateICmpNE(Length,
Constant::getNullValue(Length->getType()));
- InsertBefore = splitBlockAndInsertIfThen(Cmp);
+ InsertBefore = splitBlockAndInsertIfThen(Cmp, false);
}
instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true);
@@ -371,14 +367,50 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
return NULL;
}
+void AddressSanitizer::FindDynamicInitializers(Module& M) {
+ // Clang generates metadata identifying all dynamically initialized globals.
+ NamedMDNode *DynamicGlobals =
+ M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
+ if (!DynamicGlobals)
+ return;
+ for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) {
+ MDNode *MDN = DynamicGlobals->getOperand(i);
+ assert(MDN->getNumOperands() == 1);
+ Value *VG = MDN->getOperand(0);
+ // The optimizer may optimize away a global entirely, in which case we
+ // cannot instrument access to it.
+ if (!VG)
+ continue;
+
+ GlobalVariable *G = cast<GlobalVariable>(VG);
+ DynamicallyInitializedGlobals.insert(G);
+ }
+}
+// Returns true if a global variable is initialized dynamically in this TU.
+bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) {
+ return DynamicallyInitializedGlobals.count(G);
+}
+
void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
bool IsWrite;
Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
assert(Addr);
- if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) {
- // We are accessing a global scalar variable. Nothing to catch here.
- return;
+ if (ClOpt && ClOptGlobals) {
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
+ // If initialization order checking is disabled, a simple access to a
+ // dynamically initialized global is always valid.
+ if (!ClInitializers)
+ return;
+ // If a global variable does not have dynamic initialization we don't
+ // have to instrument it. However, if a global has external linkage, we
+ // assume it has dynamic initialization, as it may have an initializer
+ // in a different TU.
+ if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+ !HasDynamicInitializer(G))
+ return;
+ }
}
+
Type *OrigPtrTy = Addr->getType();
Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
@@ -407,15 +439,11 @@ Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) {
}
Instruction *AddressSanitizer::generateCrashCode(
- BasicBlock *BB, Value *Addr, Value *PC,
+ Instruction *InsertBefore, Value *Addr,
bool IsWrite, size_t AccessSizeIndex) {
- IRBuilder<> IRB(BB->getFirstNonPHI());
- CallInst *Call;
- if (PC)
- Call = IRB.CreateCall2(AsanErrorCallback[IsWrite][AccessSizeIndex],
- Addr, PC);
- else
- Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr);
+ IRBuilder<> IRB(InsertBefore);
+ CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex],
+ Addr);
// We don't do Call->setDoesNotReturn() because the BB already has
// UnreachableInst at the end.
// This EmptyAsm is required to avoid callback merge.
@@ -436,7 +464,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
// (uint8_t) ((Addr & (Granularity-1)) + size - 1)
LastAccessedByte = IRB.CreateIntCast(
- LastAccessedByte, IRB.getInt8Ty(), false);
+ LastAccessedByte, ShadowValue->getType(), false);
// ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
}
@@ -456,112 +484,129 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
-
- BasicBlock *CrashBlock = 0;
- if (ClMergeCallbacks) {
- size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
- BasicBlock **Cached = &AFC.CrashBlock[IsWrite][AccessSizeIndex];
- if (!*Cached) {
- std::string BBName("crash_bb-");
- BBName += (IsWrite ? "w-" : "r-") + itostr(1 << AccessSizeIndex);
- BasicBlock *BB = BasicBlock::Create(*C, BBName, &AFC.F);
- new UnreachableInst(*C, BB);
- *Cached = BB;
- }
- CrashBlock = *Cached;
- // We need to pass the PC as the second parameter to __asan_report_*.
- // There are few problems:
- // - Some architectures (e.g. x86_32) don't have a cheap way to get the PC.
- // - LLVM doesn't have the appropriate intrinsic.
- // For now, put a random number into the PC, just to allow experiments.
- Value *PC = ConstantInt::get(IntptrTy, rand());
- CrashArg Arg = {AddrLong, PC};
- AFC.CrashArgs[IsWrite][AccessSizeIndex].push_back(Arg);
- } else {
- CrashBlock = BasicBlock::Create(*C, "crash_bb", &AFC.F);
- new UnreachableInst(*C, CrashBlock);
- size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
- Instruction *Crash =
- generateCrashCode(CrashBlock, AddrLong, 0, IsWrite, AccessSizeIndex);
- Crash->setDebugLoc(OrigIns->getDebugLoc());
- }
-
+ size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
size_t Granularity = 1 << MappingScale;
- if (TypeSize < 8 * Granularity) {
- BranchInst *CheckTerm = splitBlockAndInsertIfThen(Cmp);
- assert(CheckTerm->isUnconditional());
+ TerminatorInst *CrashTerm = 0;
+
+ if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
+ TerminatorInst *CheckTerm = splitBlockAndInsertIfThen(Cmp, false);
+ assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
BasicBlock *NextBB = CheckTerm->getSuccessor(0);
IRB.SetInsertPoint(CheckTerm);
Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
+ BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB);
+ CrashTerm = new UnreachableInst(*C, CrashBlock);
BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
ReplaceInstWithInst(CheckTerm, NewTerm);
} else {
- splitBlockAndInsertIfThen(Cmp, CrashBlock);
+ CrashTerm = splitBlockAndInsertIfThen(Cmp, true);
+ }
+
+ Instruction *Crash =
+ generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex);
+ Crash->setDebugLoc(OrigIns->getDebugLoc());
+}
+
+void AddressSanitizer::createInitializerPoisonCalls(Module &M,
+ Value *FirstAddr,
+ Value *LastAddr) {
+ // We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
+ Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
+ // If that function is not present, this TU contains no globals, or they have
+ // all been optimized away
+ if (!GlobalInit)
+ return;
+
+ // Set up the arguments to our poison/unpoison functions.
+ IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt());
+
+ // Declare our poisoning and unpoisoning functions.
+ Function *AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
+ Function *AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
+ AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
+
+ // Add a call to poison all external globals before the given function starts.
+ IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr);
+
+ // Add calls to unpoison all globals before each return instruction.
+ for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
+ I != E; ++I) {
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) {
+ CallInst::Create(AsanUnpoisonGlobals, "", RI);
+ }
}
}
+bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
+ Type *Ty = cast<PointerType>(G->getType())->getElementType();
+ DEBUG(dbgs() << "GLOBAL: " << *G);
+
+ if (!Ty->isSized()) return false;
+ if (!G->hasInitializer()) return false;
+ // Touch only those globals that will not be defined in other modules.
+ // Don't handle ODR type linkages since other modules may be built w/o asan.
+ if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+ G->getLinkage() != GlobalVariable::PrivateLinkage &&
+ G->getLinkage() != GlobalVariable::InternalLinkage)
+ return false;
+ // Two problems with thread-locals:
+ // - The address of the main thread's copy can't be computed at link-time.
+ // - Need to poison all copies, not just the main thread's one.
+ if (G->isThreadLocal())
+ return false;
+ // For now, just ignore this Alloca if the alignment is large.
+ if (G->getAlignment() > RedzoneSize) return false;
+
+ // Ignore all the globals with the names starting with "\01L_OBJC_".
+ // Many of those are put into the .cstring section. The linker compresses
+ // that section by removing the spare \0s after the string terminator, so
+ // our redzones get broken.
+ if ((G->getName().find("\01L_OBJC_") == 0) ||
+ (G->getName().find("\01l_OBJC_") == 0)) {
+ DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
+ return false;
+ }
+
+ if (G->hasSection()) {
+ StringRef Section(G->getSection());
+ // Ignore the globals from the __OBJC section. The ObjC runtime assumes
+ // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
+ // them.
+ if ((Section.find("__OBJC,") == 0) ||
+ (Section.find("__DATA, __objc_") == 0)) {
+ DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
+ return false;
+ }
+ // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
+ // Constant CFString instances are compiled in the following way:
+ // -- the string buffer is emitted into
+ // __TEXT,__cstring,cstring_literals
+ // -- the constant NSConstantString structure referencing that buffer
+ // is placed into __DATA,__cfstring
+ // Therefore there's no point in placing redzones into __DATA,__cfstring.
+ // Moreover, it causes the linker to crash on OS X 10.7
+ if (Section.find("__DATA,__cfstring") == 0) {
+ DEBUG(dbgs() << "Ignoring CFString: " << *G);
+ return false;
+ }
+ }
+
+ return true;
+}
+
// This function replaces all global variables with new variables that have
// trailing redzones. It also creates a function that poisons
// redzones and inserts this function into llvm.global_ctors.
bool AddressSanitizer::insertGlobalRedzones(Module &M) {
SmallVector<GlobalVariable *, 16> GlobalsToChange;
- for (Module::GlobalListType::iterator G = M.getGlobalList().begin(),
- E = M.getGlobalList().end(); G != E; ++G) {
- Type *Ty = cast<PointerType>(G->getType())->getElementType();
- DEBUG(dbgs() << "GLOBAL: " << *G);
-
- if (!Ty->isSized()) continue;
- if (!G->hasInitializer()) continue;
- // Touch only those globals that will not be defined in other modules.
- // Don't handle ODR type linkages since other modules may be built w/o asan.
- if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
- G->getLinkage() != GlobalVariable::PrivateLinkage &&
- G->getLinkage() != GlobalVariable::InternalLinkage)
- continue;
- // Two problems with thread-locals:
- // - The address of the main thread's copy can't be computed at link-time.
- // - Need to poison all copies, not just the main thread's one.
- if (G->isThreadLocal())
- continue;
- // For now, just ignore this Alloca if the alignment is large.
- if (G->getAlignment() > RedzoneSize) continue;
-
- // Ignore all the globals with the names starting with "\01L_OBJC_".
- // Many of those are put into the .cstring section. The linker compresses
- // that section by removing the spare \0s after the string terminator, so
- // our redzones get broken.
- if ((G->getName().find("\01L_OBJC_") == 0) ||
- (G->getName().find("\01l_OBJC_") == 0)) {
- DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
- continue;
- }
-
- if (G->hasSection()) {
- StringRef Section(G->getSection());
- // Ignore the globals from the __OBJC section. The ObjC runtime assumes
- // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
- // them.
- if ((Section.find("__OBJC,") == 0) ||
- (Section.find("__DATA, __objc_") == 0)) {
- DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
- continue;
- }
- // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
- // Constant CFString instances are compiled in the following way:
- // -- the string buffer is emitted into
- // __TEXT,__cstring,cstring_literals
- // -- the constant NSConstantString structure referencing that buffer
- // is placed into __DATA,__cfstring
- // Therefore there's no point in placing redzones into __DATA,__cfstring.
- // Moreover, it causes the linker to crash on OS X 10.7
- if (Section.find("__DATA,__cfstring") == 0) {
- DEBUG(dbgs() << "Ignoring CFString: " << *G);
- continue;
- }
- }
-
- GlobalsToChange.push_back(G);
+ for (Module::GlobalListType::iterator G = M.global_begin(),
+ E = M.global_end(); G != E; ++G) {
+ if (ShouldInstrumentGlobal(G))
+ GlobalsToChange.push_back(G);
}
size_t n = GlobalsToChange.size();
@@ -572,13 +617,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
// size_t size;
// size_t size_with_redzone;
// const char *name;
+ // size_t has_dynamic_init;
// We initialize an array of such structures and pass it to a run-time call.
StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
- IntptrTy, IntptrTy, NULL);
- SmallVector<Constant *, 16> Initializers(n);
+ IntptrTy, IntptrTy,
+ IntptrTy, NULL);
+ SmallVector<Constant *, 16> Initializers(n), DynamicInit;
IRBuilder<> IRB(CtorInsertBefore);
+ if (ClInitializers)
+ FindDynamicInitializers(M);
+
+ // The addresses of the first and last dynamically initialized globals in
+ // this TU. Used in initialization order checking.
+ Value *FirstDynamic = 0, *LastDynamic = 0;
+
for (size_t i = 0; i < n; i++) {
GlobalVariable *G = GlobalsToChange[i];
PointerType *PtrTy = cast<PointerType>(G->getType());
@@ -587,6 +641,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
uint64_t RightRedzoneSize = RedzoneSize +
(RedzoneSize - (SizeInBytes % RedzoneSize));
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
+ // Determine whether this global should be poisoned in initialization.
+ bool GlobalHasDynamicInitializer = HasDynamicInitializer(G);
StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
Constant *NewInitializer = ConstantStruct::get(
@@ -621,7 +677,16 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
ConstantInt::get(IntptrTy, SizeInBytes),
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
+ ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
NULL);
+
+ // Populate the first and last globals declared in this TU.
+ if (ClInitializers && GlobalHasDynamicInitializer) {
+ LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy);
+ if (FirstDynamic == 0)
+ FirstDynamic = LastDynamic;
+ }
+
DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal);
}
@@ -630,8 +695,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage,
ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
+ // Create calls for poisoning before initializers run and unpoisoning after.
+ if (ClInitializers && FirstDynamic && LastDynamic)
+ createInitializerPoisonCalls(M, FirstDynamic, LastDynamic);
+
Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ kAsanRegisterGlobalsName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, NULL));
AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
IRB.CreateCall2(AsanRegisterGlobals,
@@ -694,12 +764,7 @@ bool AddressSanitizer::runOnModule(Module &M) {
std::string FunctionName = std::string(kAsanReportErrorTemplate) +
(AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex);
// If we are merging crash callbacks, they have two parameters.
- if (ClMergeCallbacks)
- AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>(
- M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy,
- IntptrTy, NULL));
- else
- AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>(
+ AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>(
M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
}
}
@@ -845,33 +910,6 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
NumInstrumented++;
}
- // Create PHI nodes and crash callbacks if we are merging crash callbacks.
- if (NumInstrumented) {
- for (size_t IsWrite = 0; IsWrite <= 1; IsWrite++) {
- for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
- AccessSizeIndex++) {
- BasicBlock *BB = AFC.CrashBlock[IsWrite][AccessSizeIndex];
- if (!BB) continue;
- assert(ClMergeCallbacks);
- AsanFunctionContext::CrashArgsVec &Args =
- AFC.CrashArgs[IsWrite][AccessSizeIndex];
- IRBuilder<> IRB(BB->getFirstNonPHI());
- size_t n = Args.size();
- PHINode *PN1 = IRB.CreatePHI(IntptrTy, n);
- PHINode *PN2 = IRB.CreatePHI(IntptrTy, n);
- // We need to match crash parameters and the predecessors.
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
- PI != PE; ++PI) {
- n--;
- PN1->addIncoming(Args[n].Arg1, *PI);
- PN2->addIncoming(Args[n].Arg2, *PI);
- }
- assert(n == 0);
- generateCrashCode(BB, PN1, PN2, IsWrite, AccessSizeIndex);
- }
- }
- }
-
DEBUG(dbgs() << F);
bool ChangedStack = poisonStackInFunction(M, F);
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index f76c77e..a4bb5a6 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -26,30 +26,6 @@ namespace llvm {
/// The type parameter T determines the type of the nodes of the graph.
template <typename T>
class MaximumSpanningTree {
-
- // A comparing class for comparing weighted edges.
- template <typename CT>
- struct EdgeWeightCompare {
- bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X,
- typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
- if (X.second > Y.second) return true;
- if (X.second < Y.second) return false;
- if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) {
- if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) {
- if (BBX->size() > BBY->size()) return true;
- if (BBX->size() < BBY->size()) return false;
- }
- }
- if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) {
- if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) {
- if (BBX->size() > BBY->size()) return true;
- if (BBX->size() < BBY->size()) return false;
- }
- }
- return false;
- }
- };
-
public:
typedef std::pair<const T*, const T*> Edge;
typedef std::pair<Edge, double> EdgeWeight;
@@ -59,6 +35,33 @@ namespace llvm {
MaxSpanTree MST;
+ private:
+ // A comparing class for comparing weighted edges.
+ struct EdgeWeightCompare {
+ static bool getBlockSize(const T *X) {
+ const BasicBlock *BB = dyn_cast_or_null<BasicBlock>(X);
+ return BB ? BB->size() : 0;
+ }
+
+ bool operator()(EdgeWeight X, EdgeWeight Y) const {
+ if (X.second > Y.second) return true;
+ if (X.second < Y.second) return false;
+
+ // Equal edge weights: break ties by comparing block sizes.
+ size_t XSizeA = getBlockSize(X.first.first);
+ size_t YSizeA = getBlockSize(Y.first.first);
+ if (XSizeA > YSizeA) return true;
+ if (XSizeA < YSizeA) return false;
+
+ size_t XSizeB = getBlockSize(X.first.second);
+ size_t YSizeB = getBlockSize(Y.first.second);
+ if (XSizeB > YSizeB) return true;
+ if (XSizeB < YSizeB) return false;
+
+ return false;
+ }
+ };
+
public:
static char ID; // Class identification, replacement for typeinfo
@@ -66,7 +69,7 @@ namespace llvm {
/// spanning tree.
MaximumSpanningTree(EdgeWeights &EdgeVector) {
- std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>());
+ std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare());
// Create spanning tree, Forest contains a special data structure
// that makes checking if two nodes are already in a common (sub-)tree
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 277c4d5..a8deda8 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -66,11 +66,6 @@ static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
-// FIXME: Remove this abomination once all of the tests pass without it!
-static cl::opt<bool> DisableDeleteDeadBlocks(
- "disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false),
- cl::desc("Disable deleting dead blocks in CodeGenPrepare"));
-
static cl::opt<bool> DisableSelectToBranch(
"disable-cgp-select2branch", cl::Hidden, cl::init(false),
cl::desc("Disable select to branch conversion."));
@@ -116,6 +111,7 @@ namespace {
}
private:
+ bool EliminateFallThrough(Function &F);
bool EliminateMostlyEmptyBlocks(Function &F);
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
@@ -187,10 +183,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
WorkList.insert(*II);
}
- if (!DisableDeleteDeadBlocks)
- for (SmallPtrSet<BasicBlock*, 8>::iterator
- I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
- DeleteDeadBlock(*I);
+ for (SmallPtrSet<BasicBlock*, 8>::iterator
+ I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
+ DeleteDeadBlock(*I);
+
+ // Merge pairs of basic blocks with unconditional branches, connected by
+ // a single edge.
+ if (EverMadeChange || MadeChange)
+ MadeChange |= EliminateFallThrough(F);
if (MadeChange)
ModifiedDT = true;
@@ -203,6 +203,39 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
return EverMadeChange;
}
+/// EliminateFallThrough - Merge basic blocks which are connected
+/// by a single edge, where one of the basic blocks has a single successor
+/// pointing to the other basic block, which has a single predecessor.
+bool CodeGenPrepare::EliminateFallThrough(Function &F) {
+ bool Changed = false;
+ // Scan all of the blocks in the function, except for the entry block.
+ for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
+ // If the destination block has a single pred, then this is a trivial
+ // edge, just collapse it.
+ BasicBlock *SinglePred = BB->getSinglePredecessor();
+
+ if (!SinglePred || SinglePred == BB) continue;
+
+ BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
+ if (Term && !Term->isConditional()) {
+ Changed = true;
+ DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
+ // Remember if SinglePred was the entry block of the function.
+ // If so, we will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(BB, this);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+ // We have erased a block. Update the iterator.
+ I = BB;
+ }
+ }
+ return Changed;
+}
+
/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
/// debug info directives, and an unconditional branch. Passes before isel
/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
@@ -610,7 +643,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// that have the default "don't know" as the objectsize. Anything else
// should be left alone.
CodeGenPrepareFortifiedLibCalls Simplifier;
- return Simplifier.fold(CI, TD);
+ return Simplifier.fold(CI, TD, TLInfo);
}
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
@@ -645,10 +678,18 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
if (!TLI)
return false;
+ PHINode *PN = 0;
+ BitCastInst *BCI = 0;
Value *V = RI->getReturnValue();
- PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
- if (V && !PN)
- return false;
+ if (V) {
+ BCI = dyn_cast<BitCastInst>(V);
+ if (BCI)
+ V = BCI->getOperand(0);
+
+ PN = dyn_cast<PHINode>(V);
+ if (!PN)
+ return false;
+ }
BasicBlock *BB = RI->getParent();
if (PN && PN->getParent() != BB)
@@ -666,6 +707,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
if (PN) {
BasicBlock::iterator BI = BB->begin();
do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+ if (&*BI == BCI)
+ // Also skip over the bitcast.
+ ++BI;
if (&*BI != RI)
return false;
} else {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 5eff0e5..8b1283f 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -378,7 +378,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
if (EarlierOff >= LaterOff &&
- Later.Size > Earlier.Size &&
+ Later.Size >= Earlier.Size &&
uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
return OverwriteComplete;
@@ -740,12 +740,19 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
continue;
}
- if (isa<AllocaInst>(BBI) || isAllocLikeFn(BBI)) {
+ if (isa<AllocaInst>(BBI)) {
+ // Remove allocas from the list of dead stack objects; there can't be
+ // any references before the definition.
DeadStackObjects.remove(BBI);
continue;
}
if (CallSite CS = cast<Value>(BBI)) {
+ // Remove allocation function calls from the list of dead stack objects;
+ // there can't be any references before the definition.
+ if (isAllocLikeFn(BBI))
+ DeadStackObjects.remove(BBI);
+
// If this call does not access memory, it can't be loading any of our
// pointers.
if (AA->doesNotAccessMemory(CS))
@@ -771,7 +778,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
if (DeadStackObjects.empty())
- return MadeChange;
+ break;
continue;
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 140864d..4822fd0 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -512,7 +512,7 @@ namespace {
/// have that value number. Use findLeader to query it.
struct LeaderTableEntry {
Value *Val;
- BasicBlock *BB;
+ const BasicBlock *BB;
LeaderTableEntry *Next;
};
DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
@@ -542,7 +542,7 @@ namespace {
private:
/// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
/// its value number.
- void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+ void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) {
LeaderTableEntry &Curr = LeaderTable[N];
if (!Curr.Val) {
Curr.Val = V;
@@ -608,13 +608,13 @@ namespace {
void dump(DenseMap<uint32_t, Value*> &d);
bool iterateOnFunction(Function &F);
bool performPRE(Function &F);
- Value *findLeader(BasicBlock *BB, uint32_t num);
+ Value *findLeader(const BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
- BasicBlock *Root);
- bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root);
+ const BasicBlockEdge &Root);
+ bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
};
char GVN::ID = 0;
@@ -1977,7 +1977,7 @@ bool GVN::processLoad(LoadInst *L) {
// and then scan the list to find one whose block dominates the block in
// question. This is fast because dominator tree queries consist of only
// a few comparisons of DFS numbers.
-Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
+Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
LeaderTableEntry Vals = LeaderTable[num];
if (!Vals.Val) return 0;
@@ -2004,22 +2004,13 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
/// use is dominated by the given basic block. Returns the number of uses that
/// were replaced.
unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
- BasicBlock *Root) {
+ const BasicBlockEdge &Root) {
unsigned Count = 0;
for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
UI != UE; ) {
Use &U = (UI++).getUse();
- // If From occurs as a phi node operand then the use implicitly lives in the
- // corresponding incoming block. Otherwise it is the block containing the
- // user that must be dominated by Root.
- BasicBlock *UsingBlock;
- if (PHINode *PN = dyn_cast<PHINode>(U.getUser()))
- UsingBlock = PN->getIncomingBlock(U);
- else
- UsingBlock = cast<Instruction>(U.getUser())->getParent();
-
- if (DT->dominates(Root, UsingBlock)) {
+ if (DT->dominates(Root, U)) {
U.set(To);
++Count;
}
@@ -2027,13 +2018,34 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
return Count;
}
+/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return
+/// true if every path from the entry block to 'Dst' passes via this edge. In
+/// particular 'Dst' must not be reachable via another edge from 'Src'.
+static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
+ DominatorTree *DT) {
+ // While in theory it is interesting to consider the case in which Dst has
+ // more than one predecessor, because Dst might be part of a loop which is
+ // only reachable from Src, in practice it is pointless since at the time
+ // GVN runs all such loops have preheaders, which means that Dst will have
+ // been changed to have only one predecessor, namely Src.
+ const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
+ const BasicBlock *Src = E.getStart();
+ assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
+ (void)Src;
+ return Pred != 0;
+}
+
/// propagateEquality - The given values are known to be equal in every block
/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
/// 'RHS' everywhere in the scope. Returns whether a change was made.
-bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
+bool GVN::propagateEquality(Value *LHS, Value *RHS,
+ const BasicBlockEdge &Root) {
SmallVector<std::pair<Value*, Value*>, 4> Worklist;
Worklist.push_back(std::make_pair(LHS, RHS));
bool Changed = false;
+ // For speed, compute a conservative fast approximation to
+ // DT->dominates(Root, Root.getEnd());
+ bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
while (!Worklist.empty()) {
std::pair<Value*, Value*> Item = Worklist.pop_back_val();
@@ -2065,9 +2077,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
LVN = RVN;
}
}
- assert((!isa<Instruction>(RHS) ||
- DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) &&
- "Instruction doesn't dominate scope!");
// If value numbering later sees that an instruction in the scope is equal
// to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
@@ -2076,8 +2085,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
// if RHS is an instruction (if an instruction in the scope is morphed into
// LHS then it will be turned into RHS by the next GVN iteration anyway, so
// using the leader table is about compiling faster, not optimizing better).
- if (!isa<Instruction>(RHS))
- addToLeaderTable(LVN, RHS, Root);
+ // The leader table only tracks basic blocks, not edges. Only add to if we
+ // have the simple case where the edge dominates the end.
+ if (RootDominatesEnd && !isa<Instruction>(RHS))
+ addToLeaderTable(LVN, RHS, Root.getEnd());
// Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
// LHS always has at least one use that is not dominated by Root, this will
@@ -2136,7 +2147,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
// If the number we were assigned was brand new then there is no point in
// looking for an instruction realizing it: there cannot be one!
if (Num < NextNum) {
- Value *NotCmp = findLeader(Root, Num);
+ Value *NotCmp = findLeader(Root.getEnd(), Num);
if (NotCmp && isa<Instruction>(NotCmp)) {
unsigned NumReplacements =
replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
@@ -2146,7 +2157,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
}
// Ensure that any instruction in scope that gets the "A < B" value number
// is replaced with false.
- addToLeaderTable(Num, NotVal, Root);
+ // The leader table only tracks basic blocks, not edges. Only add to if we
+ // have the simple case where the edge dominates the end.
+ if (RootDominatesEnd)
+ addToLeaderTable(Num, NotVal, Root.getEnd());
continue;
}
@@ -2155,22 +2169,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
return Changed;
}
-/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return
-/// true if every path from the entry block to 'Dst' passes via this edge. In
-/// particular 'Dst' must not be reachable via another edge from 'Src'.
-static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst,
- DominatorTree *DT) {
- // While in theory it is interesting to consider the case in which Dst has
- // more than one predecessor, because Dst might be part of a loop which is
- // only reachable from Src, in practice it is pointless since at the time
- // GVN runs all such loops have preheaders, which means that Dst will have
- // been changed to have only one predecessor, namely Src.
- BasicBlock *Pred = Dst->getSinglePredecessor();
- assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
- (void)Src;
- return Pred != 0;
-}
-
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction *I) {
@@ -2210,18 +2208,20 @@ bool GVN::processInstruction(Instruction *I) {
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
+ // Avoid multiple edges early.
+ if (TrueSucc == FalseSucc)
+ return false;
+
BasicBlock *Parent = BI->getParent();
bool Changed = false;
- if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT))
- Changed |= propagateEquality(BranchCond,
- ConstantInt::getTrue(TrueSucc->getContext()),
- TrueSucc);
+ Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
+ BasicBlockEdge TrueE(Parent, TrueSucc);
+ Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
- if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT))
- Changed |= propagateEquality(BranchCond,
- ConstantInt::getFalse(FalseSucc->getContext()),
- FalseSucc);
+ Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
+ BasicBlockEdge FalseE(Parent, FalseSucc);
+ Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
return Changed;
}
@@ -2234,8 +2234,9 @@ bool GVN::processInstruction(Instruction *I) {
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
BasicBlock *Dst = i.getCaseSuccessor();
- if (isOnlyReachableViaThisEdge(Parent, Dst, DT))
- Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst);
+ BasicBlockEdge E(Parent, Dst);
+ if (E.isSingleEdge())
+ Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
}
return Changed;
}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 582948e..0192e92 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -175,7 +175,9 @@ namespace {
bool canSinkOrHoistInst(Instruction &I);
bool isNotUsedInLoop(Instruction &I);
- void PromoteAliasSet(AliasSet &AS);
+ void PromoteAliasSet(AliasSet &AS,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ SmallVectorImpl<Instruction*> &InsertPts);
};
}
@@ -256,10 +258,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ SmallVector<Instruction *, 8> InsertPts;
+
// Loop over all of the alias sets in the tracker object.
for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
I != E; ++I)
- PromoteAliasSet(*I);
+ PromoteAliasSet(*I, ExitBlocks, InsertPts);
}
// Clear out loops state information for the next iteration
@@ -631,6 +636,7 @@ namespace {
Value *SomePtr; // Designated pointer to store to.
SmallPtrSet<Value*, 4> &PointerMustAliases;
SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
+ SmallVectorImpl<Instruction*> &LoopInsertPts;
AliasSetTracker &AST;
DebugLoc DL;
int Alignment;
@@ -638,11 +644,12 @@ namespace {
LoopPromoter(Value *SP,
const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
SmallPtrSet<Value*, 4> &PMA,
- SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast,
- DebugLoc dl, int alignment)
+ SmallVectorImpl<BasicBlock*> &LEB,
+ SmallVectorImpl<Instruction*> &LIP,
+ AliasSetTracker &ast, DebugLoc dl, int alignment)
: LoadAndStorePromoter(Insts, S), SomePtr(SP),
- PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl),
- Alignment(alignment) {}
+ PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP),
+ AST(ast), DL(dl), Alignment(alignment) {}
virtual bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction*> &) const {
@@ -662,7 +669,7 @@ namespace {
for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = LoopExitBlocks[i];
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
- Instruction *InsertPos = ExitBlock->getFirstInsertionPt();
+ Instruction *InsertPos = LoopInsertPts[i];
StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
@@ -684,7 +691,9 @@ namespace {
/// looping over the stores in the loop, looking for stores to Must pointers
/// which are loop invariant.
///
-void LICM::PromoteAliasSet(AliasSet &AS) {
+void LICM::PromoteAliasSet(AliasSet &AS,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ SmallVectorImpl<Instruction*> &InsertPts) {
// We can promote this alias set if it has a store, if it is a "Must" alias
// set, if the pointer is loop invariant, and if we are not eliminating any
// volatile loads or stores.
@@ -794,14 +803,20 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
// location is better than none.
DebugLoc DL = LoopUses[0]->getDebugLoc();
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getUniqueExitBlocks(ExitBlocks);
+ // Figure out the loop exits and their insertion points, if this is the
+ // first promotion.
+ if (ExitBlocks.empty()) {
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ InsertPts.resize(ExitBlocks.size());
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt();
+ }
// We use the SSAUpdater interface to insert phi nodes as required.
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- *CurAST, DL, Alignment);
+ InsertPts, *CurAST, DL, Alignment);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b14a713..0ae7a51 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -738,7 +738,8 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
bool Changed = false;
while (!DeadInsts.empty()) {
- Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
+ Value *V = DeadInsts.pop_back_val();
+ Instruction *I = dyn_cast_or_null<Instruction>(V);
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index ffcf97c..09687d8 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -543,6 +543,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
// Update the number of paths to the leaf.
IncorporateWeight(It->second, Weight, Opcode);
+#if 0 // TODO: Re-enable once PR13021 is fixed.
// The leaf already has one use from inside the expression. As we want
// exactly one such use, drop this new use of the leaf.
assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
@@ -559,6 +560,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
Leaves.erase(It);
continue;
}
+#endif
// If we still have uses that are not accounted for by the expression
// then it is not safe to modify the value.
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index ec835b1..8090fdf 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -56,7 +56,6 @@ STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
STATISTIC(NumConverted, "Number of aggregates converted to scalar");
-STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
struct SROA : public FunctionPass {
@@ -183,9 +182,6 @@ namespace {
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
bool ShouldAttemptScalarRepl(AllocaInst *AI);
-
- static MemTransferInst *isOnlyCopiedFromConstantGlobal(
- AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
};
// SROA_DT - SROA that uses DominatorTree.
@@ -612,11 +608,16 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- if (!GEP->hasAllConstantIndices())
- NonConstantIdx = Indices.pop_back_val();
+ Value* GEPNonConstantIdx = 0;
+ if (!GEP->hasAllConstantIndices()) {
+ assert(!NonConstantIdx &&
+ "Dynamic GEP reading from dynamic GEP unsupported");
+ GEPNonConstantIdx = Indices.pop_back_val();
+ } else
+ GEPNonConstantIdx = NonConstantIdx;
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
Indices);
- ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, NonConstantIdx);
+ ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx);
GEP->eraseFromParent();
continue;
}
@@ -1460,26 +1461,6 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
return false;
}
-/// getPointeeAlignment - Compute the minimum alignment of the value pointed
-/// to by the given pointer.
-static unsigned getPointeeAlignment(Value *V, const TargetData &TD) {
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::BitCast ||
- (CE->getOpcode() == Instruction::GetElementPtr &&
- cast<GEPOperator>(CE)->hasAllZeroIndices()))
- return getPointeeAlignment(CE->getOperand(0), TD);
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- if (!GV->isDeclaration())
- return TD.getPreferredAlignment(GV);
-
- if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
- return TD.getABITypeAlignment(PT->getElementType());
-
- return 0;
-}
-
-
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
// which runs on all of the alloca instructions in the function, removing them
// if they are only used by getelementptr instructions.
@@ -1511,29 +1492,6 @@ bool SROA::performScalarRepl(Function &F) {
if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
continue;
- // Check to see if this allocation is only modified by a memcpy/memmove from
- // a constant global whose alignment is equal to or exceeds that of the
- // allocation. If this is the case, we can change all users to use
- // the constant global instead. This is commonly produced by the CFE by
- // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
- // is only subsequently read.
- SmallVector<Instruction *, 4> ToDelete;
- if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
- if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
- DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
- DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
- for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
- ToDelete[i]->eraseFromParent();
- Constant *TheSrc = cast<Constant>(Copy->getSource());
- AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
- Copy->eraseFromParent(); // Don't mutate the global.
- AI->eraseFromParent();
- ++NumGlobals;
- Changed = true;
- continue;
- }
- }
-
// Check to see if we can perform the core SROA transformation. We cannot
// transform the allocation instruction if it is an array allocation
// (allocations OF arrays are ok though), and an allocation of a scalar
@@ -2651,134 +2609,3 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
return true;
}
-
-
-
-/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
-/// some part of a constant global variable. This intentionally only accepts
-/// constant expressions because we don't can't rewrite arbitrary instructions.
-static bool PointsToConstantGlobal(Value *V) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- return GV->isConstant();
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::BitCast ||
- CE->getOpcode() == Instruction::GetElementPtr)
- return PointsToConstantGlobal(CE->getOperand(0));
- return false;
-}
-
-/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
-/// pointer to an alloca. Ignore any reads of the pointer, return false if we
-/// see any stores or other unknown uses. If we see pointer arithmetic, keep
-/// track of whether it moves the pointer (with isOffset) but otherwise traverse
-/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
-/// the alloca, and if the source pointer is a pointer to a constant global, we
-/// can optimize this.
-static bool
-isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
- bool isOffset,
- SmallVector<Instruction *, 4> &LifetimeMarkers) {
- // We track lifetime intrinsics as we encounter them. If we decide to go
- // ahead and replace the value with the global, this lets the caller quickly
- // eliminate the markers.
-
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
- User *U = cast<Instruction>(*UI);
-
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- // Ignore non-volatile loads, they are always ok.
- if (!LI->isSimple()) return false;
- continue;
- }
-
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- // If uses of the bitcast are ok, we are ok.
- if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset,
- LifetimeMarkers))
- return false;
- continue;
- }
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
- // If the GEP has all zero indices, it doesn't offset the pointer. If it
- // doesn't, it does.
- if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
- isOffset || !GEP->hasAllZeroIndices(),
- LifetimeMarkers))
- return false;
- continue;
- }
-
- if (CallSite CS = U) {
- // If this is the function being called then we treat it like a load and
- // ignore it.
- if (CS.isCallee(UI))
- continue;
-
- // If this is a readonly/readnone call site, then we know it is just a
- // load (but one that potentially returns the value itself), so we can
- // ignore it if we know that the value isn't captured.
- unsigned ArgNo = CS.getArgumentNo(UI);
- if (CS.onlyReadsMemory() &&
- (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
- continue;
-
- // If this is being passed as a byval argument, the caller is making a
- // copy, so it is only a read of the alloca.
- if (CS.isByValArgument(ArgNo))
- continue;
- }
-
- // Lifetime intrinsics can be handled by the caller.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
- assert(II->use_empty() && "Lifetime markers have no result to use!");
- LifetimeMarkers.push_back(II);
- continue;
- }
- }
-
- // If this is isn't our memcpy/memmove, reject it as something we can't
- // handle.
- MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
- if (MI == 0)
- return false;
-
- // If the transfer is using the alloca as a source of the transfer, then
- // ignore it since it is a load (unless the transfer is volatile).
- if (UI.getOperandNo() == 1) {
- if (MI->isVolatile()) return false;
- continue;
- }
-
- // If we already have seen a copy, reject the second one.
- if (TheCopy) return false;
-
- // If the pointer has been offset from the start of the alloca, we can't
- // safely handle this.
- if (isOffset) return false;
-
- // If the memintrinsic isn't using the alloca as the dest, reject it.
- if (UI.getOperandNo() != 0) return false;
-
- // If the source of the memcpy/move is not a constant global, reject it.
- if (!PointsToConstantGlobal(MI->getSource()))
- return false;
-
- // Otherwise, the transform is safe. Remember the copy instruction.
- TheCopy = MI;
- }
- return true;
-}
-
-/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
-/// modified by a copy from a constant global. If we can prove this, we can
-/// replace any uses of the alloca with uses of the global directly.
-MemTransferInst *
-SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
- SmallVector<Instruction*, 4> &ToDelete) {
- MemTransferInst *TheCopy = 0;
- if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete))
- return TheCopy;
- return 0;
-}
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index a1a8a41..3904419 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -157,14 +157,15 @@ struct StrCatOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- EmitStrLenMemCpy(Src, Dst, Len, B);
- return Dst;
+ return EmitStrLenMemCpy(Src, Dst, Len, B);
}
- void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
+ Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
// We need to find the end of the destination string. That's where the
// memory is to be moved to. We just generate a call to strlen.
- Value *DstLen = EmitStrLen(Dst, B, TD);
+ Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+ if (!DstLen)
+ return 0;
// Now that we have the destination's length, we must index into the
// destination's pointer to get the actual memcpy destination (end of
@@ -175,6 +176,7 @@ struct StrCatOpt : public LibCallOptimization {
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(CpyDst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+ return Dst;
}
};
@@ -221,8 +223,7 @@ struct StrNCatOpt : public StrCatOpt {
// strncat(x, s, c) -> strcat(x, s)
// s is constant so the strcat can be optimized further
- EmitStrLenMemCpy(Src, Dst, SrcLen, B);
- return Dst;
+ return EmitStrLenMemCpy(Src, Dst, SrcLen, B);
}
};
@@ -254,7 +255,7 @@ struct StrChrOpt : public LibCallOptimization {
return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
ConstantInt::get(TD->getIntPtrType(*Context), Len),
- B, TD);
+ B, TD, TLI);
}
// Otherwise, the character is a constant, see if the first argument is
@@ -299,7 +300,7 @@ struct StrRChrOpt : public LibCallOptimization {
if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
if (TD && CharC->isZero())
- return EmitStrChr(SrcStr, '\0', B, TD);
+ return EmitStrChr(SrcStr, '\0', B, TD, TLI);
return 0;
}
@@ -355,7 +356,7 @@ struct StrCmpOpt : public LibCallOptimization {
return EmitMemCmp(Str1P, Str2P,
ConstantInt::get(TD->getIntPtrType(*Context),
- std::min(Len1, Len2)), B, TD);
+ std::min(Len1, Len2)), B, TD, TLI);
}
return 0;
@@ -391,7 +392,7 @@ struct StrNCmpOpt : public LibCallOptimization {
return ConstantInt::get(CI->getType(), 0);
if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
StringRef Str1, Str2;
bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -447,11 +448,10 @@ struct StrCpyOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- if (OptChkCall)
- EmitMemCpyChk(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- CI->getArgOperand(2), B, TD);
- else
+ if (!OptChkCall ||
+ !EmitMemCpyChk(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ CI->getArgOperand(2), B, TD, TLI))
B.CreateMemCpy(Dst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
return Dst;
@@ -480,8 +480,10 @@ struct StpCpyOpt: public LibCallOptimization {
if (!TD) return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) // stpcpy(x,x) -> x+strlen(x)
- return B.CreateInBoundsGEP(Dst, EmitStrLen(Src, B, TD));
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ }
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
@@ -494,9 +496,8 @@ struct StpCpyOpt: public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
- if (OptChkCall)
- EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD);
- else
+ if (!OptChkCall || !EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B,
+ TD, TLI))
B.CreateMemCpy(Dst, Src, LenV, 1);
return DstEnd;
}
@@ -609,7 +610,7 @@ struct StrPBrkOpt : public LibCallOptimization {
// strpbrk(s, "a") -> strchr(s, 'a')
if (TD && HasS2 && S2.size() == 1)
- return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD);
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
return 0;
}
@@ -698,7 +699,7 @@ struct StrCSpnOpt : public LibCallOptimization {
// strcspn(s, "") -> strlen(s)
if (TD && HasS2 && S2.empty())
- return EmitStrLen(CI->getArgOperand(0), B, TD);
+ return EmitStrLen(CI->getArgOperand(0), B, TD, TLI);
return 0;
}
@@ -722,9 +723,13 @@ struct StrStrOpt : public LibCallOptimization {
// fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
- Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD);
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI);
+ if (!StrLen)
+ return 0;
Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
- StrLen, B, TD);
+ StrLen, B, TD, TLI);
+ if (!StrNCmp)
+ return 0;
for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
UI != UE; ) {
ICmpInst *Old = cast<ICmpInst>(*UI++);
@@ -760,9 +765,10 @@ struct StrStrOpt : public LibCallOptimization {
}
// fold strstr(x, "y") -> strchr(x, 'y').
- if (HasStr2 && ToFindStr.size() == 1)
- return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0),
- ToFindStr[0], B, TD), CI->getType());
+ if (HasStr2 && ToFindStr.size() == 1) {
+ Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI);
+ return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
+ }
return 0;
}
};
@@ -1179,8 +1185,8 @@ struct PrintFOpt : public LibCallOptimization {
// printf("x") -> putchar('x'), even for '%'.
if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD);
- if (CI->use_empty()) return CI;
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
+ if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1191,26 +1197,26 @@ struct PrintFOpt : public LibCallOptimization {
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
Value *GV = B.CreateGlobalString(FormatStr, "str");
- EmitPutS(GV, B, TD);
- return CI->use_empty() ? (Value*)CI :
- ConstantInt::get(CI->getType(), FormatStr.size()+1);
+ Value *NewCI = EmitPutS(GV, B, TD, TLI);
+ return (CI->use_empty() || !NewCI) ?
+ NewCI :
+ ConstantInt::get(CI->getType(), FormatStr.size()+1);
}
// Optimize specific format strings.
// printf("%c", chr) --> putchar(chr)
if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD);
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
- if (CI->use_empty()) return CI;
+ if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isPointerTy()) {
- EmitPutS(CI->getArgOperand(1), B, TD);
- return CI;
+ return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
}
return 0;
}
@@ -1297,7 +1303,9 @@ struct SPrintFOpt : public LibCallOptimization {
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
- Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD);
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
+ if (!Len)
+ return 0;
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
@@ -1364,8 +1372,8 @@ struct FWriteOpt : public LibCallOptimization {
// This optimisation is only valid, if the return value is unused.
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
- EmitFPutC(Char, CI->getArgOperand(3), B, TD);
- return ConstantInt::get(CI->getType(), 1);
+ Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
}
return 0;
@@ -1390,10 +1398,10 @@ struct FPutsOpt : public LibCallOptimization {
// fputs(s,F) --> fwrite(s,1,strlen(s),F)
uint64_t Len = GetStringLength(CI->getArgOperand(0));
if (!Len) return 0;
- EmitFWrite(CI->getArgOperand(0),
- ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
- CI->getArgOperand(1), B, TD, TLI);
- return CI; // Known to have no uses (see above).
+ // Known to have no uses (see above).
+ return EmitFWrite(CI->getArgOperand(0),
+ ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+ CI->getArgOperand(1), B, TD, TLI);
}
};
@@ -1417,11 +1425,11 @@ struct FPrintFOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- EmitFWrite(CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context),
- FormatStr.size()),
- CI->getArgOperand(0), B, TD, TLI);
- return ConstantInt::get(CI->getType(), FormatStr.size());
+ Value *NewCI = EmitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ FormatStr.size()),
+ CI->getArgOperand(0), B, TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
}
// The remaining optimizations require the format string to be "%s" or "%c"
@@ -1434,16 +1442,16 @@ struct FPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
- EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
- return ConstantInt::get(CI->getType(), 1);
+ Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
+ TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
return 0;
- EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
- return CI;
+ return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
}
return 0;
}
@@ -1494,8 +1502,8 @@ struct PutsOpt : public LibCallOptimization {
if (Str.empty() && CI->use_empty()) {
// puts("") -> putchar('\n')
- Value *Res = EmitPutChar(B.getInt32('\n'), B, TD);
- if (CI->use_empty()) return CI;
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
+ if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1633,6 +1641,8 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["llvm.exp2.f64"] = &Exp2;
Optimizations["llvm.exp2.f32"] = &Exp2;
+ if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf))
+ Optimizations["fabs"] = &UnaryDoubleFP;
if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf))
Optimizations["floor"] = &UnaryDoubleFP;
if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf))
@@ -1643,6 +1653,8 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["rint"] = &UnaryDoubleFP;
if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf))
Optimizations["nearbyint"] = &UnaryDoubleFP;
+ if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf))
+ Optimizations["trunc"] = &UnaryDoubleFP;
// Integer Optimizations
Optimizations["ffs"] = &FFS;
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 5576432..2679b93 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -659,10 +659,26 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
// If the return instruction returns a value, and if the value was a
// PHI node in "BB", propagate the right value into the return.
for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
- i != e; ++i)
- if (PHINode *PN = dyn_cast<PHINode>(*i))
- if (PN->getParent() == BB)
- *i = PN->getIncomingValueForBlock(Pred);
+ i != e; ++i) {
+ Value *V = *i;
+ Instruction *NewBC = 0;
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
+ // Return value might be bitcasted. Clone and insert it before the
+ // return instruction.
+ V = BCI->getOperand(0);
+ NewBC = BCI->clone();
+ Pred->getInstList().insert(NewRet, NewBC);
+ *i = NewBC;
+ }
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (PN->getParent() == BB) {
+ if (NewBC)
+ NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred));
+ else
+ *i = PN->getIncomingValueForBlock(Pred);
+ }
+ }
+ }
// Update any PHI nodes in the returning block to realize that we no
// longer branch to them.
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 27f7724..e13fd71 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -34,7 +34,11 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
/// EmitStrLen - Emit a call to the strlen function to the builder, for the
/// specified pointer. This always returns an integer value of size intptr_t.
-Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strlen))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -53,11 +57,41 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
return CI;
}
+/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the
+/// specified pointer. Ptr is required to be some pointer type, MaxLen must
+/// be of size_t type, and the return value has 'intptr_t' type.
+Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strnlen))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ NULL);
+ CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen");
+ if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
/// EmitStrChr - Emit a call to the strchr function to the builder, for the
/// specified pointer and character. Ptr is required to be some pointer type,
/// and the return value has 'i8*' type.
Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
- const TargetData *TD) {
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strchr))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI =
AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
@@ -75,7 +109,11 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
- IRBuilder<> &B, const TargetData *TD) {
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strncmp))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -101,7 +139,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
/// specified pointer arguments.
Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
- const TargetData *TD, StringRef Name) {
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ StringRef Name) {
+ if (!TLI->has(LibFunc::strcpy))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
@@ -119,7 +161,11 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
/// specified pointer arguments.
Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
- IRBuilder<> &B, const TargetData *TD, StringRef Name) {
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI, StringRef Name) {
+ if (!TLI->has(LibFunc::strncpy))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
@@ -139,7 +185,11 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
/// are pointers.
Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
- IRBuilder<> &B, const TargetData *TD) {
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memcpy_chk))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI;
AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
@@ -162,7 +212,11 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
- Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Value *Len, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memchr))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI;
AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
@@ -183,7 +237,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
/// EmitMemCmp - Emit a call to the memcmp function.
Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
- Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Value *Len, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memcmp))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -236,7 +294,11 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
-Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::putchar))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
B.getInt32Ty(), NULL);
@@ -254,7 +316,11 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
/// some pointer.
-void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::puts))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -267,13 +333,16 @@ void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
-
+ return CI;
}
/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
/// an integer and File is a pointer to FILE.
-void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
- const TargetData *TD) {
+Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fputc))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
@@ -295,12 +364,16 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
+ return CI;
}
/// EmitFPutS - Emit a call to the puts function. Str is required to be a
/// pointer and File is a pointer to FILE.
-void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
- const TargetData *TD, const TargetLibraryInfo *TLI) {
+Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fputs))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -321,13 +394,17 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
+ return CI;
}
/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
- IRBuilder<> &B, const TargetData *TD,
- const TargetLibraryInfo *TLI) {
+Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fwrite))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -354,11 +431,13 @@ void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
+ return CI;
}
SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
-bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
+bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// We really need TargetData for later.
if (!TD) return false;
@@ -446,7 +525,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
// string lengths for varying.
if (isFoldable(2, 1, true)) {
Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
- Name.substr(2, 6));
+ TLI, Name.substr(2, 6));
+ if (!Ret)
+ return false;
replaceCall(Ret);
return true;
}
@@ -464,7 +545,10 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
if (isFoldable(3, 2, false)) {
Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TD, Name.substr(2, 7));
+ CI->getArgOperand(2), B, TD, TLI,
+ Name.substr(2, 7));
+ if (!Ret)
+ return false;
replaceCall(Ret);
return true;
}
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index b3f5289..72d4199 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -39,7 +39,7 @@ SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
: AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
- delete &getAvailableVals(AV);
+ delete static_cast<AvailableValsTy*>(AV);
}
/// Initialize - Reset this object to get ready for a new set of SSA
@@ -214,6 +214,11 @@ void SSAUpdater::RewriteUse(Use &U) {
else
V = GetValueInMiddleOfBlock(User->getParent());
+ // Notify that users of the existing value that it is being replaced.
+ Value *OldVal = U.get();
+ if (OldVal != V && OldVal->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(OldVal, V);
+
U.set(V);
}
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index aedb86b..c09c69b 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -26,6 +26,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Operator.h"
#include "llvm/Module.h"
+#include "llvm/TypeFinder.h"
#include "llvm/ValueSymbolTable.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
@@ -145,7 +146,7 @@ class TypePrinting {
public:
/// NamedTypes - The named types that are used by the current module.
- std::vector<StructType*> NamedTypes;
+ TypeFinder NamedTypes;
/// NumberedTypes - The numbered types, along with their value.
DenseMap<StructType*, unsigned> NumberedTypes;
@@ -164,7 +165,7 @@ public:
void TypePrinting::incorporateTypes(const Module &M) {
- M.findUsedStructTypes(NamedTypes);
+ NamedTypes.run(M, false);
// The list of struct types we got back includes all the struct types, split
// the unnamed ones out to a numbering and remove the anonymous structs.
@@ -1352,12 +1353,12 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT,
case GlobalValue::LinkerPrivateWeakLinkage:
Out << "linker_private_weak ";
break;
- case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
- Out << "linker_private_weak_def_auto ";
- break;
case GlobalValue::InternalLinkage: Out << "internal "; break;
case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break;
case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break;
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
+ Out << "linkonce_odr_auto_hide ";
+ break;
case GlobalValue::WeakAnyLinkage: Out << "weak "; break;
case GlobalValue::WeakODRLinkage: Out << "weak_odr "; break;
case GlobalValue::CommonLinkage: Out << "common "; break;
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index d466ac6..c8219eb 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -88,6 +88,9 @@ std::string Attribute::getAsString(Attributes Attrs) {
Result += utostr(Attribute::getAlignmentFromAttrs(Attrs));
Result += " ";
}
+ if (Attrs & Attribute::IANSDialect)
+ Result += "ia_nsdialect ";
+
// Trim the trailing space.
assert(!Result.empty() && "Unknown attribute!");
Result.erase(Result.end()-1);
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 648ccbd..6a20be6 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -31,6 +31,7 @@ add_llvm_library(LLVMCore
PassRegistry.cpp
PrintModulePass.cpp
Type.cpp
+ TypeFinder.cpp
Use.cpp
User.cpp
Value.cpp
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 972db3c..a56f1b2 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -1084,6 +1084,8 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
return LLVMLinkOnceAnyLinkage;
case GlobalValue::LinkOnceODRLinkage:
return LLVMLinkOnceODRLinkage;
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
+ return LLVMLinkOnceODRAutoHideLinkage;
case GlobalValue::WeakAnyLinkage:
return LLVMWeakAnyLinkage;
case GlobalValue::WeakODRLinkage:
@@ -1098,8 +1100,6 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
return LLVMLinkerPrivateLinkage;
case GlobalValue::LinkerPrivateWeakLinkage:
return LLVMLinkerPrivateWeakLinkage;
- case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
- return LLVMLinkerPrivateWeakDefAutoLinkage;
case GlobalValue::DLLImportLinkage:
return LLVMDLLImportLinkage;
case GlobalValue::DLLExportLinkage:
@@ -1129,6 +1129,9 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
case LLVMLinkOnceODRLinkage:
GV->setLinkage(GlobalValue::LinkOnceODRLinkage);
break;
+ case LLVMLinkOnceODRAutoHideLinkage:
+ GV->setLinkage(GlobalValue::LinkOnceODRAutoHideLinkage);
+ break;
case LLVMWeakAnyLinkage:
GV->setLinkage(GlobalValue::WeakAnyLinkage);
break;
@@ -1150,9 +1153,6 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
case LLVMLinkerPrivateWeakLinkage:
GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage);
break;
- case LLVMLinkerPrivateWeakDefAutoLinkage:
- GV->setLinkage(GlobalValue::LinkerPrivateWeakDefAutoLinkage);
- break;
case LLVMDLLImportLinkage:
GV->setLinkage(GlobalValue::DLLImportLinkage);
break;
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index 219e631..77b2403 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -39,6 +39,19 @@ static cl::opt<bool,true>
VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
cl::desc("Verify dominator info (time consuming)"));
+bool BasicBlockEdge::isSingleEdge() const {
+ const TerminatorInst *TI = Start->getTerminator();
+ unsigned NumEdgesToEnd = 0;
+ for (unsigned int i = 0, n = TI->getNumSuccessors(); i < n; ++i) {
+ if (TI->getSuccessor(i) == End)
+ ++NumEdgesToEnd;
+ if (NumEdgesToEnd >= 2)
+ return false;
+ }
+ assert(NumEdgesToEnd == 1);
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// DominatorTree Implementation
//===----------------------------------------------------------------------===//
@@ -142,12 +155,27 @@ bool DominatorTree::dominates(const Instruction *Def,
// Invoke results are only usable in the normal destination, not in the
// exceptional destination.
BasicBlock *NormalDest = II->getNormalDest();
- if (!dominates(NormalDest, UseBB))
+ BasicBlockEdge E(DefBB, NormalDest);
+ return dominates(E, UseBB);
+}
+
+bool DominatorTree::dominates(const BasicBlockEdge &BBE,
+ const BasicBlock *UseBB) const {
+ // Assert that we have a single edge. We could handle them by simply
+ // returning false, but since isSingleEdge is linear on the number of
+ // edges, the callers can normally handle them more efficiently.
+ assert(BBE.isSingleEdge());
+
+ // If the BB the edge ends in doesn't dominate the use BB, then the
+ // edge also doesn't.
+ const BasicBlock *Start = BBE.getStart();
+ const BasicBlock *End = BBE.getEnd();
+ if (!dominates(End, UseBB))
return false;
- // Simple case: if the normal destination has a single predecessor, the
- // fact that it dominates the use block implies that we also do.
- if (NormalDest->getSinglePredecessor())
+ // Simple case: if the end BB has a single predecessor, the fact that it
+ // dominates the use block implies that the edge also does.
+ if (End->getSinglePredecessor())
return true;
// The normal edge from the invoke is critical. Conceptually, what we would
@@ -170,29 +198,45 @@ bool DominatorTree::dominates(const Instruction *Def,
// trivially dominates itself, so we only have to find if it dominates the
// other predecessors. Since the only way out of X is via NormalDest, X can
// only properly dominate a node if NormalDest dominates that node too.
- for (pred_iterator PI = pred_begin(NormalDest),
- E = pred_end(NormalDest); PI != E; ++PI) {
+ for (const_pred_iterator PI = pred_begin(End), E = pred_end(End);
+ PI != E; ++PI) {
const BasicBlock *BB = *PI;
- if (BB == DefBB)
+ if (BB == Start)
continue;
- if (!DT->isReachableFromEntry(BB))
- continue;
-
- if (!dominates(NormalDest, BB))
+ if (!dominates(End, BB))
return false;
}
return true;
}
-bool DominatorTree::dominates(const Instruction *Def,
+bool DominatorTree::dominates(const BasicBlockEdge &BBE,
const Use &U) const {
- Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
+ // Assert that we have a single edge. We could handle them by simply
+ // returning false, but since isSingleEdge is linear on the number of
+ // edges, the callers can normally handle them more efficiently.
+ assert(BBE.isSingleEdge());
+
+ Instruction *UserInst = cast<Instruction>(U.getUser());
+ // A PHI in the end of the edge is dominated by it.
+ PHINode *PN = dyn_cast<PHINode>(UserInst);
+ if (PN && PN->getParent() == BBE.getEnd() &&
+ PN->getIncomingBlock(U) == BBE.getStart())
+ return true;
- // Instructions do not dominate non-instructions.
- if (!UserInst)
- return false;
+ // Otherwise use the edge-dominates-block query, which
+ // handles the crazy critical edge cases properly.
+ const BasicBlock *UseBB;
+ if (PN)
+ UseBB = PN->getIncomingBlock(U);
+ else
+ UseBB = UserInst->getParent();
+ return dominates(BBE, UseBB);
+}
+bool DominatorTree::dominates(const Instruction *Def,
+ const Use &U) const {
+ Instruction *UserInst = cast<Instruction>(U.getUser());
const BasicBlock *DefBB = Def->getParent();
// Determine the block in which the use happens. PHI nodes use
@@ -218,17 +262,9 @@ bool DominatorTree::dominates(const Instruction *Def,
// their own block, except possibly a phi, so we don't need to
// walk the block in any case.
if (const InvokeInst *II = dyn_cast<InvokeInst>(Def)) {
- // A PHI in the normal successor using the invoke's return value is
- // dominated by the invoke's return value.
- if (isa<PHINode>(UserInst) &&
- UserInst->getParent() == II->getNormalDest() &&
- cast<PHINode>(UserInst)->getIncomingBlock(U) == DefBB)
- return true;
-
- // Otherwise use the instruction-dominates-block query, which
- // handles the crazy case of an invoke with a critical edge
- // properly.
- return dominates(Def, UseBB);
+ BasicBlock *NormalDest = II->getNormalDest();
+ BasicBlockEdge E(DefBB, NormalDest);
+ return dominates(E, U);
}
// If the def and use are in different blocks, do a simple CFG dominator
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index ede4626..95e5a8b 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -200,7 +200,7 @@ const Function *MDNode::getFunction() const {
// destroy - Delete this node. Only when there are no uses.
void MDNode::destroy() {
setValueSubclassData(getSubclassDataFromValue() | DestroyFlag);
- // Placement delete, the free the memory.
+ // Placement delete, then free the memory.
this->~MDNode();
free(this);
}
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index 8ea3665..5b5176b 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -467,143 +467,3 @@ void Module::removeLibrary(StringRef Lib) {
return;
}
}
-
-//===----------------------------------------------------------------------===//
-// Type finding functionality.
-//===----------------------------------------------------------------------===//
-
-namespace {
- /// TypeFinder - Walk over a module, identifying all of the types that are
- /// used by the module.
- class TypeFinder {
- // To avoid walking constant expressions multiple times and other IR
- // objects, we keep several helper maps.
- DenseSet<const Value*> VisitedConstants;
- DenseSet<Type*> VisitedTypes;
-
- std::vector<StructType*> &StructTypes;
- bool OnlyNamed;
- public:
- TypeFinder(std::vector<StructType*> &structTypes, bool onlyNamed)
- : StructTypes(structTypes), OnlyNamed(onlyNamed) {}
-
- void run(const Module &M) {
- // Get types from global variables.
- for (Module::const_global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I) {
- incorporateType(I->getType());
- if (I->hasInitializer())
- incorporateValue(I->getInitializer());
- }
-
- // Get types from aliases.
- for (Module::const_alias_iterator I = M.alias_begin(),
- E = M.alias_end(); I != E; ++I) {
- incorporateType(I->getType());
- if (const Value *Aliasee = I->getAliasee())
- incorporateValue(Aliasee);
- }
-
- // Get types from functions.
- SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
- for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
- incorporateType(FI->getType());
-
- // First incorporate the arguments.
- for (Function::const_arg_iterator AI = FI->arg_begin(),
- AE = FI->arg_end(); AI != AE; ++AI)
- incorporateValue(AI);
-
- for (Function::const_iterator BB = FI->begin(), E = FI->end();
- BB != E;++BB)
- for (BasicBlock::const_iterator II = BB->begin(),
- E = BB->end(); II != E; ++II) {
- const Instruction &I = *II;
- // Incorporate the type of the instruction.
- incorporateType(I.getType());
-
- // Incorporate non-instruction operand types. (We are incorporating
- // all instructions with this loop.)
- for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
- OI != OE; ++OI)
- if (!isa<Instruction>(OI))
- incorporateValue(*OI);
-
- // Incorporate types hiding in metadata.
- I.getAllMetadataOtherThanDebugLoc(MDForInst);
- for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
- incorporateMDNode(MDForInst[i].second);
- MDForInst.clear();
- }
- }
-
- for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
- E = M.named_metadata_end(); I != E; ++I) {
- const NamedMDNode *NMD = I;
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
- incorporateMDNode(NMD->getOperand(i));
- }
- }
-
- private:
- void incorporateType(Type *Ty) {
- // Check to see if we're already visited this type.
- if (!VisitedTypes.insert(Ty).second)
- return;
-
- // If this is a structure or opaque type, add a name for the type.
- if (StructType *STy = dyn_cast<StructType>(Ty))
- if (!OnlyNamed || STy->hasName())
- StructTypes.push_back(STy);
-
- // Recursively walk all contained types.
- for (Type::subtype_iterator I = Ty->subtype_begin(),
- E = Ty->subtype_end(); I != E; ++I)
- incorporateType(*I);
- }
-
- /// incorporateValue - This method is used to walk operand lists finding
- /// types hiding in constant expressions and other operands that won't be
- /// walked in other ways. GlobalValues, basic blocks, instructions, and
- /// inst operands are all explicitly enumerated.
- void incorporateValue(const Value *V) {
- if (const MDNode *M = dyn_cast<MDNode>(V))
- return incorporateMDNode(M);
- if (!isa<Constant>(V) || isa<GlobalValue>(V)) return;
-
- // Already visited?
- if (!VisitedConstants.insert(V).second)
- return;
-
- // Check this type.
- incorporateType(V->getType());
-
- // If this is an instruction, we incorporate it separately.
- if (isa<Instruction>(V))
- return;
-
- // Look in operands for types.
- const User *U = cast<User>(V);
- for (Constant::const_op_iterator I = U->op_begin(),
- E = U->op_end(); I != E;++I)
- incorporateValue(*I);
- }
-
- void incorporateMDNode(const MDNode *V) {
-
- // Already visited?
- if (!VisitedConstants.insert(V).second)
- return;
-
- // Look in operands for types.
- for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
- if (Value *Op = V->getOperand(i))
- incorporateValue(Op);
- }
- };
-} // end anonymous namespace
-
-void Module::findUsedStructTypes(std::vector<StructType*> &StructTypes,
- bool OnlyNamed) const {
- TypeFinder(StructTypes, OnlyNamed).run(*this);
-}
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index c6f3558..5e9a00f 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -464,19 +464,26 @@ void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
void StructType::setName(StringRef Name) {
if (Name == getName()) return;
- // If this struct already had a name, remove its symbol table entry.
- if (SymbolTableEntry) {
- getContext().pImpl->NamedStructTypes.erase(getName());
- SymbolTableEntry = 0;
- }
-
+ StringMap<StructType *> &SymbolTable = getContext().pImpl->NamedStructTypes;
+ typedef StringMap<StructType *>::MapEntryTy EntryTy;
+
+ // If this struct already had a name, remove its symbol table entry. Don't
+ // delete the data yet because it may be part of the new name.
+ if (SymbolTableEntry)
+ SymbolTable.remove((EntryTy *)SymbolTableEntry);
+
// If this is just removing the name, we're done.
- if (Name.empty())
+ if (Name.empty()) {
+ if (SymbolTableEntry) {
+ // Delete the old string data.
+ ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator());
+ SymbolTableEntry = 0;
+ }
return;
+ }
// Look up the entry for the name.
- StringMapEntry<StructType*> *Entry =
- &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name);
+ EntryTy *Entry = &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name);
// While we have a name collision, try a random rename.
if (Entry->getValue()) {
@@ -497,7 +504,10 @@ void StructType::setName(StringRef Name) {
// Okay, we found an entry that isn't used. It's us!
Entry->setValue(this);
-
+
+ // Delete the old string data.
+ if (SymbolTableEntry)
+ ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator());
SymbolTableEntry = Entry;
}
diff --git a/lib/VMCore/TypeFinder.cpp b/lib/VMCore/TypeFinder.cpp
new file mode 100644
index 0000000..4de649f
--- /dev/null
+++ b/lib/VMCore/TypeFinder.cpp
@@ -0,0 +1,148 @@
+//===-- TypeFinder.cpp - Implement the TypeFinder class -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TypeFinder class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TypeFinder.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+void TypeFinder::run(const Module &M, bool onlyNamed) {
+ OnlyNamed = onlyNamed;
+
+ // Get types from global variables.
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ incorporateType(I->getType());
+ if (I->hasInitializer())
+ incorporateValue(I->getInitializer());
+ }
+
+ // Get types from aliases.
+ for (Module::const_alias_iterator I = M.alias_begin(),
+ E = M.alias_end(); I != E; ++I) {
+ incorporateType(I->getType());
+ if (const Value *Aliasee = I->getAliasee())
+ incorporateValue(Aliasee);
+ }
+
+ // Get types from functions.
+ SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+ for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
+ incorporateType(FI->getType());
+
+ // First incorporate the arguments.
+ for (Function::const_arg_iterator AI = FI->arg_begin(),
+ AE = FI->arg_end(); AI != AE; ++AI)
+ incorporateValue(AI);
+
+ for (Function::const_iterator BB = FI->begin(), E = FI->end();
+ BB != E;++BB)
+ for (BasicBlock::const_iterator II = BB->begin(),
+ E = BB->end(); II != E; ++II) {
+ const Instruction &I = *II;
+
+ // Incorporate the type of the instruction.
+ incorporateType(I.getType());
+
+ // Incorporate non-instruction operand types. (We are incorporating all
+ // instructions with this loop.)
+ for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+ OI != OE; ++OI)
+ if (!isa<Instruction>(OI))
+ incorporateValue(*OI);
+
+ // Incorporate types hiding in metadata.
+ I.getAllMetadataOtherThanDebugLoc(MDForInst);
+ for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
+ incorporateMDNode(MDForInst[i].second);
+
+ MDForInst.clear();
+ }
+ }
+
+ for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
+ E = M.named_metadata_end(); I != E; ++I) {
+ const NamedMDNode *NMD = I;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ incorporateMDNode(NMD->getOperand(i));
+ }
+}
+
+void TypeFinder::clear() {
+ VisitedConstants.clear();
+ VisitedTypes.clear();
+ StructTypes.clear();
+}
+
+/// incorporateType - This method adds the type to the list of used structures
+/// if it's not in there already.
+void TypeFinder::incorporateType(Type *Ty) {
+ // Check to see if we're already visited this type.
+ if (!VisitedTypes.insert(Ty).second)
+ return;
+
+ // If this is a structure or opaque type, add a name for the type.
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ if (!OnlyNamed || STy->hasName())
+ StructTypes.push_back(STy);
+
+ // Recursively walk all contained types.
+ for (Type::subtype_iterator I = Ty->subtype_begin(),
+ E = Ty->subtype_end(); I != E; ++I)
+ incorporateType(*I);
+}
+
+/// incorporateValue - This method is used to walk operand lists finding types
+/// hiding in constant expressions and other operands that won't be walked in
+/// other ways. GlobalValues, basic blocks, instructions, and inst operands are
+/// all explicitly enumerated.
+void TypeFinder::incorporateValue(const Value *V) {
+ if (const MDNode *M = dyn_cast<MDNode>(V))
+ return incorporateMDNode(M);
+
+ if (!isa<Constant>(V) || isa<GlobalValue>(V)) return;
+
+ // Already visited?
+ if (!VisitedConstants.insert(V).second)
+ return;
+
+ // Check this type.
+ incorporateType(V->getType());
+
+ // If this is an instruction, we incorporate it separately.
+ if (isa<Instruction>(V))
+ return;
+
+ // Look in operands for types.
+ const User *U = cast<User>(V);
+ for (Constant::const_op_iterator I = U->op_begin(),
+ E = U->op_end(); I != E;++I)
+ incorporateValue(*I);
+}
+
+/// incorporateMDNode - This method is used to walk the operands of an MDNode to
+/// find types hiding within.
+void TypeFinder::incorporateMDNode(const MDNode *V) {
+ // Already visited?
+ if (!VisitedConstants.insert(V).second)
+ return;
+
+ // Look in operands for types.
+ for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
+ if (Value *Op = V->getOperand(i))
+ incorporateValue(Op);
+}
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index 9a8e185..d1ca953 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -71,6 +71,10 @@ bool EVT::isExtended512BitVector() const {
return isExtendedVector() && getSizeInBits() == 512;
}
+bool EVT::isExtended1024BitVector() const {
+ return isExtendedVector() && getSizeInBits() == 1024;
+}
+
EVT EVT::getExtendedVectorElementType() const {
assert(isExtended() && "Type is not extended!");
return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
@@ -128,10 +132,12 @@ std::string EVT::getEVTString() const {
case MVT::v2i32: return "v2i32";
case MVT::v4i32: return "v4i32";
case MVT::v8i32: return "v8i32";
+ case MVT::v16i32: return "v16i32";
case MVT::v1i64: return "v1i64";
case MVT::v2i64: return "v2i64";
case MVT::v4i64: return "v4i64";
case MVT::v8i64: return "v8i64";
+ case MVT::v16i64: return "v16i64";
case MVT::v2f32: return "v2f32";
case MVT::v2f16: return "v2f16";
case MVT::v4f32: return "v4f32";
@@ -177,10 +183,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2);
case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4);
case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8);
+ case MVT::v16i32: return VectorType::get(Type::getInt32Ty(Context), 16);
case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1);
case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2);
case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4);
case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8);
+ case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16);
case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2);
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4);
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 5d51f41..c932d9e 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -400,8 +400,8 @@ void Verifier::visitGlobalValue(GlobalValue &GV) {
"Only global arrays can have appending linkage!", GVar);
}
- Assert1(!GV.hasLinkerPrivateWeakDefAutoLinkage() || GV.hasDefaultVisibility(),
- "linker_private_weak_def_auto can only have default visibility!",
+ Assert1(!GV.hasLinkOnceODRAutoHideLinkage() || GV.hasDefaultVisibility(),
+ "linkonce_odr_auto_hide can only have default visibility!",
&GV);
}
@@ -1093,7 +1093,7 @@ void Verifier::visitBitCastInst(BitCastInst &I) {
// BitCast implies a no-op cast of type only. No bits change.
// However, you can't cast pointers to anything but pointers.
- Assert1(DestTy->isPointerTy() == DestTy->isPointerTy(),
+ Assert1(SrcTy->isPointerTy() == DestTy->isPointerTy(),
"Bitcast requires both operands to be pointer or neither", &I);
Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);
@@ -1378,6 +1378,15 @@ void Verifier::visitLoadInst(LoadInst &LI) {
"Load cannot have Release ordering", &LI);
Assert1(LI.getAlignment() != 0,
"Atomic load must specify explicit alignment", &LI);
+ if (!ElTy->isPointerTy()) {
+ Assert2(ElTy->isIntegerTy(),
+ "atomic store operand must have integer type!",
+ &LI, ElTy);
+ unsigned Size = ElTy->getPrimitiveSizeInBits();
+ Assert2(Size >= 8 && !(Size & (Size - 1)),
+ "atomic store operand must be power-of-two byte-sized integer",
+ &LI, ElTy);
+ }
} else {
Assert1(LI.getSynchScope() == CrossThread,
"Non-atomic load cannot have SynchronizationScope specified", &LI);
@@ -1444,6 +1453,15 @@ void Verifier::visitStoreInst(StoreInst &SI) {
"Store cannot have Acquire ordering", &SI);
Assert1(SI.getAlignment() != 0,
"Atomic store must specify explicit alignment", &SI);
+ if (!ElTy->isPointerTy()) {
+ Assert2(ElTy->isIntegerTy(),
+ "atomic store operand must have integer type!",
+ &SI, ElTy);
+ unsigned Size = ElTy->getPrimitiveSizeInBits();
+ Assert2(Size >= 8 && !(Size & (Size - 1)),
+ "atomic store operand must be power-of-two byte-sized integer",
+ &SI, ElTy);
+ }
} else {
Assert1(SI.getSynchScope() == CrossThread,
"Non-atomic store cannot have SynchronizationScope specified", &SI);
@@ -1471,6 +1489,13 @@ void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
PointerType *PTy = dyn_cast<PointerType>(CXI.getOperand(0)->getType());
Assert1(PTy, "First cmpxchg operand must be a pointer.", &CXI);
Type *ElTy = PTy->getElementType();
+ Assert2(ElTy->isIntegerTy(),
+ "cmpxchg operand must have integer type!",
+ &CXI, ElTy);
+ unsigned Size = ElTy->getPrimitiveSizeInBits();
+ Assert2(Size >= 8 && !(Size & (Size - 1)),
+ "cmpxchg operand must be power-of-two byte-sized integer",
+ &CXI, ElTy);
Assert2(ElTy == CXI.getOperand(1)->getType(),
"Expected value type does not match pointer operand type!",
&CXI, ElTy);
@@ -1488,6 +1513,13 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
Assert1(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
Type *ElTy = PTy->getElementType();
+ Assert2(ElTy->isIntegerTy(),
+ "atomicrmw operand must have integer type!",
+ &RMWI, ElTy);
+ unsigned Size = ElTy->getPrimitiveSizeInBits();
+ Assert2(Size >= 8 && !(Size & (Size - 1)),
+ "atomicrmw operand must be power-of-two byte-sized integer",
+ &RMWI, ElTy);
Assert2(ElTy == RMWI.getOperand(1)->getType(),
"Argument value type does not match pointer operand type!",
&RMWI, ElTy);
@@ -1536,7 +1568,7 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
// landing pad block may be branched to only by the unwind edge of an invoke.
for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator());
- Assert1(II && II->getUnwindDest() == BB,
+ Assert1(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
"Block containing LandingPadInst must be jumped to "
"only by the unwind edge of an invoke.", &LPI);
}
@@ -1575,6 +1607,13 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
Instruction *Op = cast<Instruction>(I.getOperand(i));
+ // If the we have an invalid invoke, don't try to compute the dominance.
+ // We already reject it in the invoke specific checks and the dominance
+ // computation doesn't handle multiple edges.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) {
+ if (II->getNormalDest() == II->getUnwindDest())
+ return;
+ }
const Use &U = I.getOperandUse(i);
Assert2(InstsInThisBlock.count(Op) || DT->dominates(Op, U),