aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/Analysis.cpp1
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp2
-rw-r--r--lib/Analysis/CMakeLists.txt1
-rw-r--r--lib/Analysis/CaptureTracking.cpp5
-rw-r--r--lib/Analysis/CostModel.cpp11
-rw-r--r--lib/Analysis/DbgInfoPrinter.cpp224
-rw-r--r--lib/Analysis/IPA/InlineCost.cpp8
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp99
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp18
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp4
-rw-r--r--lib/Analysis/ValueTracking.cpp10
-rw-r--r--lib/CodeGen/Analysis.cpp1
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp11
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h10
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp58
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h5
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp202
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h19
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp79
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp2
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp60
-rw-r--r--lib/CodeGen/MachineFunction.cpp48
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp1
-rw-r--r--lib/CodeGen/MachineScheduler.cpp11
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp4
-rw-r--r--lib/CodeGen/Passes.cpp1
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp3
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp1
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp1
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp1
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp6
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp79
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp23
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp20
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp26
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp13
-rw-r--r--lib/CodeGen/StackProtector.cpp1
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp18
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp8
-rw-r--r--lib/CodeGen/TargetSchedule.cpp5
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp1
-rw-r--r--lib/IR/AsmWriter.cpp2
-rw-r--r--lib/IR/AttributeImpl.h2
-rw-r--r--lib/IR/Attributes.cpp9
-rw-r--r--lib/IR/ConstantFold.cpp35
-rw-r--r--lib/IR/Constants.cpp54
-rw-r--r--lib/IR/DIBuilder.cpp51
-rw-r--r--lib/IR/DataLayout.cpp6
-rw-r--r--lib/IR/DebugInfo.cpp230
-rw-r--r--lib/IR/Function.cpp24
-rw-r--r--lib/IR/Instructions.cpp13
-rw-r--r--lib/IR/LLVMContext.cpp5
-rw-r--r--lib/IR/LLVMContextImpl.h7
-rw-r--r--lib/IR/Metadata.cpp1
-rw-r--r--lib/IR/Value.cpp9
-rw-r--r--lib/Linker/LinkModules.cpp2
-rw-r--r--lib/MC/ELFObjectWriter.cpp6
-rw-r--r--lib/MC/MCAsmStreamer.cpp13
-rw-r--r--lib/MC/MCContext.cpp11
-rw-r--r--lib/MC/MCDisassembler/Disassembler.cpp24
-rw-r--r--lib/MC/MCDwarf.cpp12
-rw-r--r--lib/MC/MCELFStreamer.cpp2
-rw-r--r--lib/MC/MCNullStreamer.cpp2
-rw-r--r--lib/MC/MCObjectFileInfo.cpp12
-rw-r--r--lib/MC/MCParser/AsmParser.cpp4
-rw-r--r--lib/MC/MCPureStreamer.cpp2
-rw-r--r--lib/MC/MCStreamer.cpp14
-rw-r--r--lib/Object/MachOObjectFile.cpp7
-rw-r--r--lib/Support/CommandLine.cpp8
-rw-r--r--lib/Support/FileOutputBuffer.cpp4
-rw-r--r--lib/Support/MemoryBuffer.cpp81
-rw-r--r--lib/Support/Statistic.cpp18
-rw-r--r--lib/Support/Unix/Memory.inc9
-rw-r--r--lib/Support/Unix/PathV2.inc25
-rw-r--r--lib/Support/Windows/PathV2.inc61
-rw-r--r--lib/Support/raw_ostream.cpp7
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp55
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp53
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td43
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp83
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp16
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp11
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp704
-rw-r--r--lib/Target/ARM/ARM.h1
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp2
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp57
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp14
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp102
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp14
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp86
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp216
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp17
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp49
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h2
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp671
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h14
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td87
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td17
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp67
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h22
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp50
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp3
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h3
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp4
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h3
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h2
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp41
-rw-r--r--lib/Target/Mips/CMakeLists.txt5
-rw-r--r--lib/Target/Mips/Mips.h1
-rw-r--r--lib/Target/Mips/Mips.td2
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp308
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.h51
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp689
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.h80
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td2
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td2
-rw-r--r--lib/Target/Mips/MipsCallingConv.td18
-rw-r--r--lib/Target/Mips/MipsCondMov.td32
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp85
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp567
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp670
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h93
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp1860
-rw-r--r--lib/Target/Mips/MipsISelLowering.h165
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp162
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h15
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td10
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h3
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td21
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp460
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h57
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp197
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h46
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp2
-rw-r--r--lib/Target/Mips/MipsSubtarget.h4
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp4
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h4
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp42
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h4
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp77
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp95
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h11
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp52
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h6
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td26
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp154
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td32
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h15
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp156
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h19
-rw-r--r--lib/Target/R600/AMDGPU.h2
-rw-r--r--lib/Target/R600/AMDGPU.td1
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp2
-rw-r--r--lib/Target/R600/AMDGPUCallingConv.td42
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp20
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h9
-rw-r--r--lib/Target/R600/AMDGPUIndirectAddressing.cpp1
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td11
-rw-r--r--lib/Target/R600/AMDGPUIntrinsics.td2
-rw-r--r--lib/Target/R600/AMDGPUStructurizeCFG.cpp1
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp23
-rw-r--r--lib/Target/R600/AMDIL.h35
-rw-r--r--lib/Target/R600/AMDILCFGStructurizer.cpp8
-rw-r--r--lib/Target/R600/AMDILDevice.cpp20
-rw-r--r--lib/Target/R600/AMDILISelDAGToDAG.cpp62
-rw-r--r--lib/Target/R600/AMDILISelLowering.cpp5
-rw-r--r--lib/Target/R600/AMDILSIDevice.cpp15
-rw-r--r--lib/Target/R600/CMakeLists.txt3
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h9
-rw-r--r--lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp32
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp212
-rw-r--r--lib/Target/R600/R600ISelLowering.h3
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp122
-rw-r--r--lib/Target/R600/R600InstrInfo.h3
-rw-r--r--lib/Target/R600/R600Instructions.td63
-rw-r--r--lib/Target/R600/R600LowerConstCopy.cpp222
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.cpp1
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.h1
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp427
-rw-r--r--lib/Target/R600/R600MachineScheduler.h120
-rw-r--r--lib/Target/R600/SIAssignInterpRegs.cpp152
-rw-r--r--lib/Target/R600/SIISelLowering.cpp265
-rw-r--r--lib/Target/R600/SIISelLowering.h13
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp9
-rw-r--r--lib/Target/R600/SIInstrFormats.td20
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp104
-rw-r--r--lib/Target/R600/SIInstrInfo.td25
-rw-r--r--lib/Target/R600/SIInstructions.td300
-rw-r--r--lib/Target/R600/SIIntrinsics.td21
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp117
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.cpp20
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.h4
-rw-r--r--lib/Target/R600/SIRegisterInfo.td69
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp9
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h3
-rw-r--r--lib/Target/TargetLibraryInfo.cpp248
-rw-r--r--lib/Target/TargetMachine.cpp26
-rw-r--r--lib/Target/TargetMachineC.cpp2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp343
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp24
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp27
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp24
-rw-r--r--lib/Target/X86/X86FastISel.cpp3
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp32
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp340
-rw-r--r--lib/Target/X86/X86ISelLowering.h2
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td172
-rw-r--r--lib/Target/X86/X86InstrSSE.td468
-rw-r--r--lib/Target/X86/X86InstrTSX.td2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp51
-rw-r--r--lib/Target/X86/X86Schedule.td72
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp34
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp2
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h2
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp6
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp96
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp7
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp6
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp25
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp260
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp57
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp2
-rw-r--r--lib/Transforms/Scalar/GVN.cpp10
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp83
-rw-r--r--lib/Transforms/Scalar/SROA.cpp431
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp4
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp9
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp26
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp406
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp4
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp231
251 files changed, 10316 insertions, 6618 deletions
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 131a593..66e416c 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -31,7 +31,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeCFGPrinterPass(Registry);
initializeCFGOnlyViewerPass(Registry);
initializeCFGOnlyPrinterPass(Registry);
- initializePrintDbgInfoPass(Registry);
initializeDependenceAnalysisPass(Registry);
initializeDominanceFrontierPass(Registry);
initializeDomViewerPass(Registry);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index ca668b2..4139336 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -88,7 +88,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
const TargetLibraryInfo &TLI,
bool RoundToAlign = false) {
uint64_t Size;
- if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign))
+ if (getUnderlyingObjectSize(V, Size, &TD, &TLI, RoundToAlign))
return Size;
return AliasAnalysis::UnknownSize;
}
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 4c64c4a..597c767 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -13,7 +13,6 @@ add_llvm_library(LLVMAnalysis
CostModel.cpp
CodeMetrics.cpp
ConstantFolding.cpp
- DbgInfoPrinter.cpp
DependenceAnalysis.cpp
DomPrinter.cpp
DominanceFrontier.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index d9c0299..a729270 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -18,7 +18,12 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CallSite.h"
+
using namespace llvm;
CaptureTracker::~CaptureTracker() {}
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 44684a9..df70d15 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -189,6 +190,16 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
0);
return -1;
}
+ case Instruction::Call:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ SmallVector<Type*, 4> Tys;
+ for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
+ Tys.push_back(II->getArgOperand(J)->getType());
+
+ return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
+ Tys);
+ }
+ return -1;
default:
// We don't have any information on this instruction.
return -1;
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
deleted file mode 100644
index f674e0c..0000000
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ /dev/null
@@ -1,224 +0,0 @@
-//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass that prints instructions, and associated debug
-// info:
-//
-// - source/line/col information
-// - original variable name
-// - original type name
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-static cl::opt<bool>
-PrintDirectory("print-fullpath",
- cl::desc("Print fullpath when printing debug info"),
- cl::Hidden);
-
-namespace {
- class PrintDbgInfo : public FunctionPass {
- raw_ostream &Out;
- void printVariableDeclaration(const Value *V);
- public:
- static char ID; // Pass identification
- PrintDbgInfo() : FunctionPass(ID), Out(errs()) {
- initializePrintDbgInfoPass(*PassRegistry::getPassRegistry());
- }
-
- virtual bool runOnFunction(Function &F);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- }
- };
- char PrintDbgInfo::ID = 0;
-}
-
-INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo",
- "Print debug info in human readable form", false, false)
-
-FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); }
-
-/// Find the debug info descriptor corresponding to this global variable.
-static Value *findDbgGlobalDeclare(GlobalVariable *V) {
- const Module *M = V->getParent();
- NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv");
- if (!NMD)
- return 0;
-
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
- if (!DIG.isGlobalVariable())
- continue;
- if (DIGlobalVariable(DIG).getGlobal() == V)
- return DIG;
- }
- return 0;
-}
-
-/// Find the debug info descriptor corresponding to this function.
-static Value *findDbgSubprogramDeclare(Function *V) {
- const Module *M = V->getParent();
- NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp");
- if (!NMD)
- return 0;
-
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
- if (!DIG.isSubprogram())
- continue;
- if (DISubprogram(DIG).getFunction() == V)
- return DIG;
- }
- return 0;
-}
-
-/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any.
-/// It looks through pointer casts too.
-static const DbgDeclareInst *findDbgDeclare(const Value *V) {
- V = V->stripPointerCasts();
-
- if (!isa<Instruction>(V) && !isa<Argument>(V))
- return 0;
-
- const Function *F = NULL;
- if (const Instruction *I = dyn_cast<Instruction>(V))
- F = I->getParent()->getParent();
- else if (const Argument *A = dyn_cast<Argument>(V))
- F = A->getParent();
-
- for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
- for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
- BI != BE; ++BI)
- if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
- if (DDI->getAddress() == V)
- return DDI;
-
- return 0;
-}
-
-static bool getLocationInfo(const Value *V, std::string &DisplayName,
- std::string &Type, unsigned &LineNo,
- std::string &File, std::string &Dir) {
- DICompileUnit Unit;
- DIType TypeD;
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) {
- Value *DIGV = findDbgGlobalDeclare(GV);
- if (!DIGV) return false;
- DIGlobalVariable Var(cast<MDNode>(DIGV));
-
- StringRef D = Var.getDisplayName();
- if (!D.empty())
- DisplayName = D;
- LineNo = Var.getLineNumber();
- Unit = Var.getCompileUnit();
- TypeD = Var.getType();
- } else if (Function *F = dyn_cast<Function>(const_cast<Value*>(V))){
- Value *DIF = findDbgSubprogramDeclare(F);
- if (!DIF) return false;
- DISubprogram Var(cast<MDNode>(DIF));
-
- StringRef D = Var.getDisplayName();
- if (!D.empty())
- DisplayName = D;
- LineNo = Var.getLineNumber();
- Unit = Var.getCompileUnit();
- TypeD = Var.getType();
- } else {
- const DbgDeclareInst *DDI = findDbgDeclare(V);
- if (!DDI) return false;
- DIVariable Var(cast<MDNode>(DDI->getVariable()));
-
- StringRef D = Var.getName();
- if (!D.empty())
- DisplayName = D;
- LineNo = Var.getLineNumber();
- Unit = Var.getCompileUnit();
- TypeD = Var.getType();
- }
-
- StringRef T = TypeD.getName();
- if (!T.empty())
- Type = T;
- StringRef F = Unit.getFilename();
- if (!F.empty())
- File = F;
- StringRef D = Unit.getDirectory();
- if (!D.empty())
- Dir = D;
- return true;
-}
-
-void PrintDbgInfo::printVariableDeclaration(const Value *V) {
- std::string DisplayName, File, Directory, Type;
- unsigned LineNo = 0;
-
- if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory))
- return;
-
- Out << "; ";
- WriteAsOperand(Out, V, false, 0);
- if (isa<Function>(V))
- Out << " is function " << DisplayName
- << " of type " << Type << " declared at ";
- else
- Out << " is variable " << DisplayName
- << " of type " << Type << " declared at ";
-
- if (PrintDirectory)
- Out << Directory << "/";
-
- Out << File << ":" << LineNo << "\n";
-}
-
-bool PrintDbgInfo::runOnFunction(Function &F) {
- if (F.isDeclaration())
- return false;
-
- Out << "function " << F.getName() << "\n\n";
-
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- BasicBlock *BB = I;
-
- if (I != F.begin() && (pred_begin(BB) == pred_end(BB)))
- // Skip dead blocks.
- continue;
-
- Out << BB->getName();
- Out << ":";
-
- Out << "\n";
-
- for (BasicBlock::const_iterator i = BB->begin(), e = BB->end();
- i != e; ++i) {
-
- printVariableDeclaration(i);
-
- if (const User *U = dyn_cast<User>(i)) {
- for(unsigned i=0;i<U->getNumOperands();i++)
- printVariableDeclaration(U->getOperand(i));
- }
- }
- }
- return false;
-}
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 3292e00..35c45e6 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -474,10 +474,12 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
Value *Operand = I.getOperand(0);
- Constant *Ops[1] = { dyn_cast<Constant>(Operand) };
- if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand)))
+ Constant *COp = dyn_cast<Constant>(Operand);
+ if (!COp)
+ COp = SimplifiedValues.lookup(Operand);
+ if (COp)
if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
- Ops, TD)) {
+ COp, TD)) {
SimplifiedValues[&I] = C;
return true;
}
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 0fc0550..d490d54 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This family of functions identifies calls to builtin functions that allocate
-// or free memory.
+// or free memory.
//
//===----------------------------------------------------------------------===//
@@ -88,6 +88,10 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
const TargetLibraryInfo *TLI,
bool LookThroughBitCast = false) {
+ // Skip intrinsics
+ if (isa<IntrinsicInst>(V))
+ return 0;
+
Function *Callee = getCalledFunction(V, LookThroughBitCast);
if (!Callee)
return 0;
@@ -194,12 +198,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
const TargetLibraryInfo *TLI,
bool LookThroughSExt = false) {
if (!CI)
- return NULL;
+ return 0;
// The size of the malloc's result type must be known to determine array size.
Type *T = getMallocAllocatedType(CI, TLI);
if (!T || !T->isSized() || !TD)
- return NULL;
+ return 0;
unsigned ElementSize = TD->getTypeAllocSize(T);
if (StructType *ST = dyn_cast<StructType>(T))
@@ -208,15 +212,15 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
// If malloc call's arg can be determined to be a multiple of ElementSize,
// return the multiple. Otherwise, return NULL.
Value *MallocArg = CI->getArgOperand(0);
- Value *Multiple = NULL;
+ Value *Multiple = 0;
if (ComputeMultiple(MallocArg, ElementSize, Multiple,
LookThroughSExt))
return Multiple;
- return NULL;
+ return 0;
}
-/// isArrayMalloc - Returns the corresponding CallInst if the instruction
+/// isArrayMalloc - Returns the corresponding CallInst if the instruction
/// is a call to malloc whose array size can be determined and the array size
/// is not constant 1. Otherwise, return NULL.
const CallInst *llvm::isArrayMalloc(const Value *I,
@@ -225,12 +229,12 @@ const CallInst *llvm::isArrayMalloc(const Value *I,
const CallInst *CI = extractMallocCall(I, TLI);
Value *ArraySize = computeArraySize(CI, TD, TLI);
- if (ArraySize &&
- ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
- return CI;
+ if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize))
+ if (ConstSize->isOne())
+ return CI;
// CI is a non-array malloc or we can't figure out that it is an array malloc.
- return NULL;
+ return 0;
}
/// getMallocType - Returns the PointerType resulting from the malloc call.
@@ -241,8 +245,8 @@ const CallInst *llvm::isArrayMalloc(const Value *I,
PointerType *llvm::getMallocType(const CallInst *CI,
const TargetLibraryInfo *TLI) {
assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call");
-
- PointerType *MallocType = NULL;
+
+ PointerType *MallocType = 0;
unsigned NumOfBitCastUses = 0;
// Determine if CallInst has a bitcast use.
@@ -262,7 +266,7 @@ PointerType *llvm::getMallocType(const CallInst *CI,
return cast<PointerType>(CI->getType());
// Type could not be determined.
- return NULL;
+ return 0;
}
/// getMallocAllocatedType - Returns the Type allocated by malloc call.
@@ -273,10 +277,10 @@ PointerType *llvm::getMallocType(const CallInst *CI,
Type *llvm::getMallocAllocatedType(const CallInst *CI,
const TargetLibraryInfo *TLI) {
PointerType *PT = getMallocType(CI, TLI);
- return PT ? PT->getElementType() : NULL;
+ return PT ? PT->getElementType() : 0;
}
-/// getMallocArraySize - Returns the array size of a malloc call. If the
+/// getMallocArraySize - Returns the array size of a malloc call. If the
/// argument passed to malloc is a multiple of the size of the malloced type,
/// then return that multiple. For non-array mallocs, the multiple is
/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be
@@ -300,7 +304,7 @@ const CallInst *llvm::extractCallocCall(const Value *I,
/// isFreeCall - Returns non-null if the value is a call to the builtin free()
const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
const CallInst *CI = dyn_cast<CallInst>(I);
- if (!CI)
+ if (!CI || isa<IntrinsicInst>(CI))
return 0;
Function *Callee = CI->getCalledFunction();
if (Callee == 0 || !Callee->isDeclaration())
@@ -317,7 +321,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
return 0;
// Check free prototype.
- // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
+ // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
// attribute will exist.
FunctionType *FTy = Callee->getFunctionType();
if (!FTy->getReturnType()->isVoidTy())
@@ -360,6 +364,26 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
return true;
}
+/// \brief Compute the size of the underlying object pointed by Ptr. Returns
+/// true and the object size in Size if successful, and false otherwise.
+/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
+/// byval arguments, and global variables.
+bool llvm::getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ bool RoundToAlign) {
+ if (!TD)
+ return false;
+
+ ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign);
+ SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
+ if (!Visitor.knownSize(Data))
+ return false;
+
+ Size = Data.first.getZExtValue();
+ return true;
+}
+
STATISTIC(ObjectVisitorArgument,
"Number of arguments with unsolved size and offset");
@@ -385,16 +409,23 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD,
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
V = V->stripPointerCasts();
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- // If we have already seen this instruction, bail out. Cycles can happen in
- // unreachable code after constant propagation.
- if (!SeenInsts.insert(I))
- return unknown();
+ if (isa<Instruction>(V) || isa<GEPOperator>(V)) {
+ // Return cached value or insert unknown in cache if size of V was not
+ // computed yet in order to avoid recursions in PHis.
+ std::pair<CacheMapTy::iterator, bool> CacheVal =
+ CacheMap.insert(std::make_pair(V, unknown()));
+ if (!CacheVal.second)
+ return CacheVal.first->second;
+
+ SizeOffsetType Result;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
- return visitGEPOperator(*GEP);
- return visit(*I);
+ Result = visitGEPOperator(*GEP);
+ else
+ Result = visit(cast<Instruction>(*V));
+ return CacheMap[V] = Result;
}
+
if (Argument *A = dyn_cast<Argument>(V))
return visitArgument(*A);
if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V))
@@ -408,8 +439,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (CE->getOpcode() == Instruction::IntToPtr)
return unknown(); // clueless
- if (CE->getOpcode() == Instruction::GetElementPtr)
- return visitGEPOperator(cast<GEPOperator>(*CE));
}
DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V
@@ -543,9 +572,21 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) {
return unknown();
}
-SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) {
- // too complex to analyze statically.
- return unknown();
+SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) {
+ if (PHI.getNumIncomingValues() == 0)
+ return unknown();
+
+ SizeOffsetType Ret = compute(PHI.getIncomingValue(0));
+ if (!bothKnown(Ret))
+ return unknown();
+
+ // Verify that all PHI incoming pointers have the same size and offset.
+ for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) {
+ SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i));
+ if (!bothKnown(EdgeData) || EdgeData != Ret)
+ return unknown();
+ }
+ return Ret;
}
SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 38bf5dd..1faa046 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -351,15 +351,23 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
/// getPointerDependencyFrom - Return the instruction on which a memory
/// location depends. If isLoad is true, this routine ignores may-aliases with
/// read-only operations. If isLoad is false, this routine ignores may-aliases
-/// with reads from read-only locations.
+/// with reads from read-only locations. If possible, pass the query
+/// instruction as well; this function may take advantage of the metadata
+/// annotated to the query instruction to refine the result.
MemDepResult MemoryDependenceAnalysis::
getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
- BasicBlock::iterator ScanIt, BasicBlock *BB) {
+ BasicBlock::iterator ScanIt, BasicBlock *BB,
+ Instruction *QueryInst) {
const Value *MemLocBase = 0;
int64_t MemLocOffset = 0;
-
unsigned Limit = BlockScanLimit;
+ bool isInvariantLoad = false;
+ if (isLoad && QueryInst) {
+ LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
+ if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0)
+ isInvariantLoad = true;
+ }
// Walk backwards through the basic block, looking for dependencies.
while (ScanIt != BB->begin()) {
@@ -474,6 +482,8 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
continue;
if (R == AliasAnalysis::MustAlias)
return MemDepResult::getDef(Inst);
+ if (isInvariantLoad)
+ continue;
return MemDepResult::getClobber(Inst);
}
@@ -571,7 +581,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
- QueryParent);
+ QueryParent, QueryInst);
} else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
CallSite QueryCS(QueryInst);
bool isReadOnly = AA->onlyReadsMemory(QueryCS);
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 72421a0..976cd87 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -263,8 +263,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
case Instruction::PtrToInt:
// A ptrtoint cast is free so long as the result is large enough to store
// the pointer, and a legal integer type.
- if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) &&
- OpTy->getScalarSizeInBits() >= DL->getPointerSizeInBits())
+ if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) &&
+ Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits())
return TCC_Free;
// Otherwise it's not a no-op.
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 8e3994e..45dcc5e 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -953,6 +953,8 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
// Check for pointer simplifications.
if (V->getType()->isPointerTy()) {
+ if (isKnownNonNull(V))
+ return true;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
if (isGEPKnownNonNull(GEP, TD, Depth))
return true;
@@ -1396,10 +1398,10 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
return true;
// (add x, 0.0) is guaranteed to return +0.0, not -0.0.
- if (I->getOpcode() == Instruction::FAdd &&
- isa<ConstantFP>(I->getOperand(1)) &&
- cast<ConstantFP>(I->getOperand(1))->isNullValue())
- return true;
+ if (I->getOpcode() == Instruction::FAdd)
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(1)))
+ if (CFP->isNullValue())
+ return true;
// sitofp and uitofp turn into +0.0 for zero.
if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I))
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index c7abf7a..dd7282c 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -24,7 +24,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
using namespace llvm;
/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 4ded281..bbb0432 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -112,6 +112,17 @@ DIE::~DIE() {
delete Children[i];
}
+/// Climb up the parent chain to get the compile unit DIE this DIE belongs to.
+DIE *DIE::getCompileUnit() const{
+ DIE *p = getParent();
+ while (p) {
+ if (p->getTag() == dwarf::DW_TAG_compile_unit)
+ return p;
+ p = p->getParent();
+ }
+ llvm_unreachable("We should not have orphaned DIEs.");
+}
+
#ifndef NDEBUG
void DIE::print(raw_ostream &O, unsigned IncIndent) {
IndentCount += IncIndent;
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 35d7959..d087c54 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -152,6 +152,9 @@ namespace llvm {
const std::vector<DIE *> &getChildren() const { return Children; }
const SmallVector<DIEValue*, 32> &getValues() const { return Values; }
DIE *getParent() const { return Parent; }
+ /// Climb up the parent chain to get the compile unit DIE this DIE belongs
+ /// to.
+ DIE *getCompileUnit() const;
void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
void setOffset(unsigned O) { Offset = O; }
void setSize(unsigned S) { Size = S; }
@@ -232,9 +235,10 @@ namespace llvm {
///
static unsigned BestForm(bool IsSigned, uint64_t Int) {
if (IsSigned) {
- if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1;
- if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2;
- if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4;
+ const int64_t SignedInt = Int;
+ if ((char)Int == SignedInt) return dwarf::DW_FORM_data1;
+ if ((short)Int == SignedInt) return dwarf::DW_FORM_data2;
+ if ((int)Int == SignedInt) return dwarf::DW_FORM_data4;
} else {
if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1;
if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 93b00fb..1c743c2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -35,7 +35,7 @@ using namespace llvm;
CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A,
DwarfDebug *DW, DwarfUnits *DWU)
: UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU),
- IndexTyDie(0) {
+ IndexTyDie(0), DebugInfoOffset(0) {
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
}
@@ -241,7 +241,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
if (Line == 0)
return;
unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(),
- V.getContext().getDirectory());
+ V.getContext().getDirectory(),
+ getUniqueID());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -257,7 +258,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
unsigned Line = G.getLineNumber();
if (Line == 0)
return;
- unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory());
+ unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(),
+ getUniqueID());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -276,7 +278,7 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
return;
unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(),
- SP.getDirectory());
+ SP.getDirectory(), getUniqueID());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -293,7 +295,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
if (Line == 0)
return;
unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(),
- Ty.getDirectory());
+ Ty.getDirectory(), getUniqueID());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -311,7 +313,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
return;
DIFile File = Ty.getFile();
unsigned FileID = DD->getOrCreateSourceID(File.getFilename(),
- File.getDirectory());
+ File.getDirectory(), getUniqueID());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -329,7 +331,8 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
return;
StringRef FN = NS.getFilename();
- unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory());
+ unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(),
+ getUniqueID());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -1348,9 +1351,19 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
}
// Add linkage name.
StringRef LinkageName = GV.getLinkageName();
- if (!LinkageName.empty() && isGlobalVariable)
- addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+ if (!LinkageName.empty()) {
+ // From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
+ // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
+ // TAG_variable.
+ addString(IsStaticMember && VariableSpecDIE ?
+ VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
getRealLinkageName(LinkageName));
+ // In compatibility mode with older gdbs we put the linkage name on both
+ // the TAG_variable DIE and on the TAG_member DIE.
+ if (IsStaticMember && VariableSpecDIE && DD->useDarwinGDBCompat())
+ addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
+ }
} else if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
// AT_const_value was added when the static memeber was created. To avoid
@@ -1659,33 +1672,6 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
if (DT.isArtificial())
addFlag(MemberDie, dwarf::DW_AT_artificial);
- // This is only for backward compatibility.
- StringRef PropertyName = DT.getObjCPropertyName();
- if (!PropertyName.empty()) {
- addString(MemberDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
- StringRef GetterName = DT.getObjCPropertyGetterName();
- if (!GetterName.empty())
- addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
- StringRef SetterName = DT.getObjCPropertySetterName();
- if (!SetterName.empty())
- addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
- unsigned PropertyAttributes = 0;
- if (DT.isReadOnlyObjCProperty())
- PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
- if (DT.isReadWriteObjCProperty())
- PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
- if (DT.isAssignObjCProperty())
- PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
- if (DT.isRetainObjCProperty())
- PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
- if (DT.isCopyObjCProperty())
- PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
- if (DT.isNonAtomicObjCProperty())
- PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
- if (PropertyAttributes)
- addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0,
- PropertyAttributes);
- }
return MemberDie;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 77bf6a9..2b180c6 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -87,6 +87,9 @@ class CompileUnit {
/// corresponds to the MDNode mapped with the subprogram DIE.
DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+ /// Offset of the CUDie from beginning of debug info section.
+ unsigned DebugInfoOffset;
+
/// getLowerBoundDefault - Return the default lower bound for an array. If the
/// DWARF version doesn't handle the language, return -1.
int64_t getDefaultLowerBound() const;
@@ -103,6 +106,7 @@ public:
unsigned getUniqueID() const { return UniqueID; }
unsigned getLanguage() const { return Language; }
DIE* getCUDie() const { return CUDie.get(); }
+ unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; }
const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
@@ -120,6 +124,7 @@ public:
return AccelTypes;
}
+ void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
/// hasContent - Return true if this compile unit has something to write out.
///
bool hasContent() const { return !CUDie->getChildren().empty(); }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 87659ef..b169602 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -352,11 +352,16 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
// If we're updating an abstract DIE, then we will be adding the children and
// object pointer later on. But what we don't want to do is process the
// concrete DIE twice.
- if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+ DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode);
+ if (AbsSPDIE) {
+ bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie());
// Pick up abstract subprogram DIE.
SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of
+ // DW_FORM_ref4.
SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
- dwarf::DW_FORM_ref4, AbsSPDIE);
+ InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
+ AbsSPDIE);
SPCU->addDie(SPDie);
} else {
DISubprogram SPDecl = SP.getFunctionDeclaration();
@@ -528,7 +533,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
DILocation DL(Scope->getInlinedAt());
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0,
- getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
+ getOrCreateSourceID(DL.getFilename(), DL.getDirectory(),
+ TheCU->getUniqueID()));
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
// Add name to the name table, we do this here because we're guaranteed
@@ -617,19 +623,28 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
// SourceIds map. This can update DirectoryNames and SourceFileNames maps
// as well.
unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
- StringRef DirName) {
+ StringRef DirName, unsigned CUID) {
+ // If we use .loc in assembly, we can't separate .file entries according to
+ // compile units. Thus all files will belong to the default compile unit.
+ if (Asm->TM.hasMCUseLoc() &&
+ Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer)
+ CUID = 0;
+
// If FE did not provide a file name, then assume stdin.
if (FileName.empty())
- return getOrCreateSourceID("<stdin>", StringRef());
+ return getOrCreateSourceID("<stdin>", StringRef(), CUID);
// TODO: this might not belong here. See if we can factor this better.
if (DirName == CompilationDir)
DirName = "";
- unsigned SrcId = SourceIdMap.size()+1;
+ // FileIDCUMap stores the current ID for the given compile unit.
+ unsigned SrcId = FileIDCUMap[CUID] + 1;
- // We look up the file/dir pair by concatenating them with a zero byte.
+ // We look up the CUID/file/dir by concatenating them with a zero byte.
SmallString<128> NamePair;
+ NamePair += CUID;
+ NamePair += '\0';
NamePair += DirName;
NamePair += '\0'; // Zero bytes are not allowed in paths.
NamePair += FileName;
@@ -638,8 +653,9 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
if (Ent.getValue() != SrcId)
return Ent.getValue();
+ FileIDCUMap[CUID] = SrcId;
// Print out a .file directive to specify files for .loc directives.
- Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName);
+ Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName, CUID);
return SrcId;
}
@@ -650,14 +666,17 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
DICompileUnit DIUnit(N);
StringRef FN = DIUnit.getFilename();
CompilationDir = DIUnit.getDirectory();
- // Call this to emit a .file directive if it wasn't emitted for the source
- // file this CU comes from yet.
- getOrCreateSourceID(FN, CompilationDir);
DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
DIUnit.getLanguage(), Die, Asm,
this, &InfoHolder);
+
+ FileIDCUMap[NewCU->getUniqueID()] = 0;
+ // Call this to emit a .file directive if it wasn't emitted for the source
+ // file this CU comes from yet.
+ getOrCreateSourceID(FN, CompilationDir, NewCU->getUniqueID());
+
NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
DIUnit.getLanguage());
@@ -742,82 +761,6 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
TheCU->addGlobalName(SP.getName(), SubprogramDie);
}
-// Collect debug info from named mdnodes such as llvm.dbg.enum and llvm.dbg.ty.
-void DwarfDebug::collectInfoFromNamedMDNodes(const Module *M) {
- if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"))
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- const MDNode *N = NMD->getOperand(i);
- if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit()))
- constructSubprogramDIE(CU, N);
- }
-
- if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"))
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- const MDNode *N = NMD->getOperand(i);
- if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
- CU->createGlobalVariableDIE(N);
- }
-
- if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIType Ty(NMD->getOperand(i));
- if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit()))
- CU->getOrCreateTypeDIE(Ty);
- }
-
- if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIType Ty(NMD->getOperand(i));
- if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit()))
- CU->getOrCreateTypeDIE(Ty);
- }
-}
-
-// Collect debug info using DebugInfoFinder.
-// FIXME - Remove this when dragonegg switches to DIBuilder.
-bool DwarfDebug::collectLegacyDebugInfo(const Module *M) {
- DebugInfoFinder DbgFinder;
- DbgFinder.processModule(*M);
-
- bool HasDebugInfo = false;
- // Scan all the compile-units to see if there are any marked as the main
- // unit. If not, we do not generate debug info.
- for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
- E = DbgFinder.compile_unit_end(); I != E; ++I) {
- if (DICompileUnit(*I).isMain()) {
- HasDebugInfo = true;
- break;
- }
- }
- if (!HasDebugInfo) return false;
-
- // Emit initial sections so we can refer to them later.
- emitSectionLabels();
-
- // Create all the compile unit DIEs.
- for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
- E = DbgFinder.compile_unit_end(); I != E; ++I)
- constructCompileUnit(*I);
-
- // Create DIEs for each global variable.
- for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
- E = DbgFinder.global_variable_end(); I != E; ++I) {
- const MDNode *N = *I;
- if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
- CU->createGlobalVariableDIE(N);
- }
-
- // Create DIEs for each subprogram.
- for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
- E = DbgFinder.subprogram_end(); I != E; ++I) {
- const MDNode *N = *I;
- if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit()))
- constructSubprogramDIE(CU, N);
- }
-
- return HasDebugInfo;
-}
-
// Emit all Dwarf sections that should come prior to the content. Create
// global DIEs and emit initial debug info sections. This is invoked by
// the target AsmPrinter.
@@ -830,30 +773,28 @@ void DwarfDebug::beginModule() {
// If module has named metadata anchors then use them, otherwise scan the
// module using debug info finder to collect debug info.
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
- if (CU_Nodes) {
- // Emit initial sections so we can reference labels later.
- emitSectionLabels();
-
- for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
- DICompileUnit CUNode(CU_Nodes->getOperand(i));
- CompileUnit *CU = constructCompileUnit(CUNode);
- DIArray GVs = CUNode.getGlobalVariables();
- for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
- CU->createGlobalVariableDIE(GVs.getElement(i));
- DIArray SPs = CUNode.getSubprograms();
- for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
- constructSubprogramDIE(CU, SPs.getElement(i));
- DIArray EnumTypes = CUNode.getEnumTypes();
- for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
- CU->getOrCreateTypeDIE(EnumTypes.getElement(i));
- DIArray RetainedTypes = CUNode.getRetainedTypes();
- for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
- CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
- }
- } else if (!collectLegacyDebugInfo(M))
+ if (!CU_Nodes)
return;
- collectInfoFromNamedMDNodes(M);
+ // Emit initial sections so we can reference labels later.
+ emitSectionLabels();
+
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit CUNode(CU_Nodes->getOperand(i));
+ CompileUnit *CU = constructCompileUnit(CUNode);
+ DIArray GVs = CUNode.getGlobalVariables();
+ for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
+ CU->createGlobalVariableDIE(GVs.getElement(i));
+ DIArray SPs = CUNode.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
+ constructSubprogramDIE(CU, SPs.getElement(i));
+ DIArray EnumTypes = CUNode.getEnumTypes();
+ for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
+ CU->getOrCreateTypeDIE(EnumTypes.getElement(i));
+ DIArray RetainedTypes = CUNode.getRetainedTypes();
+ for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
+ CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
+ }
// Tell MMI that we have debug info.
MMI->setDebugInfoAvailability(true);
@@ -1197,16 +1138,10 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
DISubprogram(DV.getContext()).describes(MF->getFunction()))
Scope = LScopes.getCurrentFunctionScope();
- else {
- if (DV.getVersion() <= LLVMDebugVersion9)
- Scope = LScopes.findLexicalScope(MInsn->getDebugLoc());
- else {
- if (MDNode *IA = DV.getInlinedAt())
- Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA));
- else
- Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1)));
- }
- }
+ else if (MDNode *IA = DV.getInlinedAt())
+ Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA));
+ else
+ Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1)));
// If variable scope is not found then skip this variable.
if (!Scope)
continue;
@@ -1707,7 +1642,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
} else
llvm_unreachable("Unexpected scope info");
- Src = getOrCreateSourceID(Fn, Dir);
+ Src = getOrCreateSourceID(Fn, Dir,
+ Asm->OutStreamer.getContext().getDwarfCompileUnitID());
}
Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn);
}
@@ -1761,15 +1697,19 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
// Compute the size and offset of all the DIEs.
void DwarfUnits::computeSizeAndOffsets() {
+ // Offset from the beginning of debug info section.
+ unsigned AccuOffset = 0;
for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
E = CUs.end(); I != E; ++I) {
+ (*I)->setDebugInfoOffset(AccuOffset);
unsigned Offset =
sizeof(int32_t) + // Length of Compilation Unit Info
sizeof(int16_t) + // DWARF version number
sizeof(int32_t) + // Offset Into Abbrev. Section
sizeof(int8_t); // Pointer Size (in bytes)
- computeSizeAndOffset((*I)->getCUDie(), Offset);
+ unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset);
+ AccuOffset += EndOffset;
}
}
@@ -1843,6 +1783,13 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
DIEEntry *E = cast<DIEEntry>(Values[i]);
DIE *Origin = E->getEntry();
unsigned Addr = Origin->getOffset();
+ if (Form == dwarf::DW_FORM_ref_addr) {
+ // For DW_FORM_ref_addr, output the offset from beginning of debug info
+ // section. Origin->getOffset() returns the offset from start of the
+ // compile unit.
+ DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Addr += Holder.getCUOffset(Origin->getCompileUnit());
+ }
Asm->EmitInt32(Addr);
break;
}
@@ -1940,6 +1887,19 @@ void DwarfUnits::emitUnits(DwarfDebug *DD,
}
}
+/// For a given compile unit DIE, returns offset from beginning of debug info.
+unsigned DwarfUnits::getCUOffset(DIE *Die) {
+ assert(Die->getTag() == dwarf::DW_TAG_compile_unit &&
+ "Input DIE should be compile unit in getCUOffset.");
+ for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
+ E = CUs.end(); I != E; ++I) {
+ CompileUnit *TheCU = *I;
+ if (TheCU->getCUDie() == Die)
+ return TheCU->getDebugInfoOffset();
+ }
+ llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits.");
+}
+
// Emit the debug info section.
void DwarfDebug::emitDebugInfo() {
DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 7b56815..81e345e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -274,6 +274,10 @@ public:
/// \brief Returns the address pool.
AddrPool *getAddrPool() { return &AddressPool; }
+
+ /// \brief for a given compile unit DIE, returns offset from beginning of
+ /// debug info.
+ unsigned getCUOffset(DIE *Die);
};
/// \brief Collects and handles dwarf debug information.
@@ -305,7 +309,9 @@ class DwarfDebug {
// A list of all the unique abbreviations in use.
std::vector<DIEAbbrev *> Abbreviations;
- // Source id map, i.e. pair of source filename and directory,
+ // Stores the current file ID for a given compile unit.
+ DenseMap <unsigned, unsigned> FileIDCUMap;
+ // Source id map, i.e. CUID, source filename and directory,
// separated by a zero byte, mapped to a unique id.
StringMap<unsigned, BumpPtrAllocator&> SourceIdMap;
@@ -596,14 +602,6 @@ public:
DwarfDebug(AsmPrinter *A, Module *M);
~DwarfDebug();
- /// \brief Collect debug info from named mdnodes such as llvm.dbg.enum
- /// and llvm.dbg.ty
- void collectInfoFromNamedMDNodes(const Module *M);
-
- /// \brief Collect debug info using DebugInfoFinder.
- /// FIXME - Remove this when DragonEgg switches to DIBuilder.
- bool collectLegacyDebugInfo(const Module *M);
-
/// \brief Emit all Dwarf sections that should come prior to the
/// content.
void beginModule();
@@ -626,7 +624,8 @@ public:
/// \brief Look up the source id with the given directory and source file
/// names. If none currently exists, create a new id and insert it in the
/// SourceIds map.
- unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName);
+ unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName,
+ unsigned CUID);
/// \brief Recursively Emits a debug information entry.
void emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs);
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index e8b5b4f..4cd1b80 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -379,22 +379,77 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}
-unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
+unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys) const {
- // assume that we need to scalarize this intrinsic.
- unsigned ScalarizationCost = 0;
- unsigned ScalarCalls = 1;
- if (RetTy->isVectorTy()) {
- ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
- ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
- }
- for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
- if (Tys[i]->isVectorTy()) {
- ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+ unsigned ISD = 0;
+ switch (IID) {
+ default: {
+ // Assume that we need to scalarize this intrinsic.
+ unsigned ScalarizationCost = 0;
+ unsigned ScalarCalls = 1;
+ if (RetTy->isVectorTy()) {
+ ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
}
+ for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
+ if (Tys[i]->isVectorTy()) {
+ ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+ ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+ }
+ }
+
+ return ScalarCalls + ScalarizationCost;
+ }
+ // Look for intrinsics that can be lowered directly or turned into a scalar
+ // intrinsic call.
+ case Intrinsic::sqrt: ISD = ISD::FSQRT; break;
+ case Intrinsic::sin: ISD = ISD::FSIN; break;
+ case Intrinsic::cos: ISD = ISD::FCOS; break;
+ case Intrinsic::exp: ISD = ISD::FEXP; break;
+ case Intrinsic::exp2: ISD = ISD::FEXP2; break;
+ case Intrinsic::log: ISD = ISD::FLOG; break;
+ case Intrinsic::log10: ISD = ISD::FLOG10; break;
+ case Intrinsic::log2: ISD = ISD::FLOG2; break;
+ case Intrinsic::fabs: ISD = ISD::FABS; break;
+ case Intrinsic::floor: ISD = ISD::FFLOOR; break;
+ case Intrinsic::ceil: ISD = ISD::FCEIL; break;
+ case Intrinsic::trunc: ISD = ISD::FTRUNC; break;
+ case Intrinsic::rint: ISD = ISD::FRINT; break;
+ case Intrinsic::pow: ISD = ISD::FPOW; break;
+ case Intrinsic::fma: ISD = ISD::FMA; break;
+ case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add?
+ }
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);
+
+ if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+ // The operation is legal. Assume it costs 1.
+ // If the type is split to multiple registers, assume that thre is some
+ // overhead to this.
+ // TODO: Once we have extract/insert subvector cost we need to use them.
+ if (LT.first > 1)
+ return LT.first * 2;
+ return LT.first * 1;
}
- return ScalarCalls + ScalarizationCost;
+
+ if (!TLI->isOperationExpand(ISD, LT.second)) {
+ // If the operation is custom lowered then assume
+ // thare the code is twice as expensive.
+ return LT.first * 2;
+ }
+
+ // Else, assume that we need to scalarize this intrinsic. For math builtins
+ // this will emit a costly libcall, adding call overhead and spills. Make it
+ // very expensive.
+ if (RetTy->isVectorTy()) {
+ unsigned Num = RetTy->getVectorNumElements();
+ unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(),
+ Tys);
+ return 10 * Cost * Num;
+ }
+
+ // This is going to be turned into a library call, make it expensive.
+ return 10;
}
unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index fac207e..5447df0 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -459,7 +459,6 @@ void SSAIfConv::replacePHIInstrs() {
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
PHIInfo &PI = PHIs[i];
DEBUG(dbgs() << "If-converting " << *PI.PHI);
- assert(PI.PHI->getNumOperands() == 5 && "Unexpected PHI operands.");
unsigned DstReg = PI.PHI->getOperand(0).getReg();
TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm));
@@ -593,6 +592,7 @@ public:
EarlyIfConverter() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const;
bool runOnMachineFunction(MachineFunction &MF);
+ const char *getPassName() const { return "Early If-Conversion"; }
private:
bool tryConvertIf(MachineBasicBlock*);
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 22b35d5..f1b8394 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -972,9 +972,9 @@ private:
// Return the last use of reg between NewIdx and OldIdx.
SlotIndex findLastUseBefore(unsigned Reg) {
- SlotIndex LastUse = NewIdx;
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ SlotIndex LastUse = NewIdx;
for (MachineRegisterInfo::use_nodbg_iterator
UI = MRI.use_nodbg_begin(Reg),
UE = MRI.use_nodbg_end();
@@ -984,30 +984,42 @@ private:
if (InstSlot > LastUse && InstSlot < OldIdx)
LastUse = InstSlot;
}
- } else {
- MachineInstr* MI = LIS.getSlotIndexes()->getInstructionFromIndex(NewIdx);
- MachineBasicBlock::iterator MII(MI);
- ++MII;
- MachineBasicBlock* MBB = MI->getParent();
- for (; MII != MBB->end(); ++MII){
- if (MII->isDebugValue())
- continue;
- if (LIS.getInstructionIndex(MII) < OldIdx)
- break;
- for (MachineInstr::mop_iterator MOI = MII->operands_begin(),
- MOE = MII->operands_end();
- MOI != MOE; ++MOI) {
- const MachineOperand& mop = *MOI;
- if (!mop.isReg() || mop.getReg() == 0 ||
- TargetRegisterInfo::isVirtualRegister(mop.getReg()))
- continue;
-
- if (TRI.hasRegUnit(mop.getReg(), Reg))
- LastUse = LIS.getInstructionIndex(MII);
- }
- }
+ return LastUse;
+ }
+
+ // This is a regunit interval, so scanning the use list could be very
+ // expensive. Scan upwards from OldIdx instead.
+ assert(NewIdx < OldIdx && "Expected upwards move");
+ SlotIndexes *Indexes = LIS.getSlotIndexes();
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(NewIdx);
+
+ // OldIdx may not correspond to an instruction any longer, so set MII to
+ // point to the next instruction after OldIdx, or MBB->end().
+ MachineBasicBlock::iterator MII = MBB->end();
+ if (MachineInstr *MI = Indexes->getInstructionFromIndex(
+ Indexes->getNextNonNullIndex(OldIdx)))
+ if (MI->getParent() == MBB)
+ MII = MI;
+
+ MachineBasicBlock::iterator Begin = MBB->begin();
+ while (MII != Begin) {
+ if ((--MII)->isDebugValue())
+ continue;
+ SlotIndex Idx = Indexes->getInstructionIndex(MII);
+
+ // Stop searching when NewIdx is reached.
+ if (!SlotIndex::isEarlierInstr(NewIdx, Idx))
+ return NewIdx;
+
+ // Check if MII uses Reg.
+ for (MIBundleOperands MO(MII); MO.isValid(); ++MO)
+ if (MO->isReg() &&
+ TargetRegisterInfo::isPhysicalRegister(MO->getReg()) &&
+ TRI.hasRegUnit(MO->getReg(), Reg))
+ return Idx;
}
- return LastUse;
+ // Didn't reach NewIdx. It must be the first instruction in the block.
+ return NewIdx;
}
};
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 5e04f2d..04321f3 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -574,6 +574,54 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
return BV;
}
+unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ unsigned MaxAlign = getMaxAlignment();
+ int Offset = 0;
+
+ // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+ // It really should be refactored to share code. Until then, changes
+ // should keep in mind that there's tight coupling between the two.
+
+ for (int i = getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
+ if (isDeadObjectIndex(i))
+ continue;
+ Offset += getObjectSize(i);
+ unsigned Align = getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ if (adjustsStack() && TFI->hasReservedCallFrame(MF))
+ Offset += getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (adjustsStack() || hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
+ StackAlign = TFI->getStackAlignment();
+ else
+ StackAlign = TFI->getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+ return (unsigned)Offset;
+}
void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
if (Objects.empty()) return;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index a777f52..1af00e8 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -37,6 +37,7 @@ MachineRegisterInfo::~MachineRegisterInfo() {
///
void
MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+ assert(RC && RC->isAllocatable() && "Invalid RC for virtual register");
VRegInfo[Reg].first = RC;
}
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index a93d070..c872355 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -19,6 +19,8 @@
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDFS.h"
@@ -57,6 +59,9 @@ static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
+ cl::desc("Verify machine instrs before and after machine scheduling"));
+
// DAG subtrees must have at least this many nodes.
static const unsigned MinSubtreeSize = 8;
@@ -197,6 +202,10 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
LIS = &getAnalysis<LiveIntervals>();
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ if (VerifyScheduling) {
+ DEBUG(LIS->print(dbgs()));
+ MF->verify(this, "Before machine scheduling.");
+ }
RegClassInfo->runOnMachineFunction(*MF);
// Select the scheduler, or set the default.
@@ -285,6 +294,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
}
Scheduler->finalizeSchedule();
DEBUG(LIS->print(dbgs()));
+ if (VerifyScheduling)
+ MF->verify(this, "After machine scheduling.");
return true;
}
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index f77a7b1..5bf0176 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -677,7 +677,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
// Ignore dependencies outside the current trace.
const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
- if (!DefTBI.isEarlierInSameTrace(TBI))
+ if (!DefTBI.isUsefulDominator(TBI))
continue;
unsigned Len = LIR.Height + Cycles[DefMI].Depth;
MaxLen = std::max(MaxLen, Len);
@@ -740,7 +740,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
const TraceBlockInfo&DepTBI =
BlockInfo[Dep.DefMI->getParent()->getNumber()];
// Ignore dependencies from outside the current trace.
- if (!DepTBI.isEarlierInSameTrace(TBI))
+ if (!DepTBI.isUsefulDominator(TBI))
continue;
assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index b79f9f9..6e1cad3 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -25,7 +25,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index b18d52d..855a8c5 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -38,7 +38,6 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <climits>
@@ -102,7 +101,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// Allow the target machine to make final modifications to the function
// before the frame layout is finalized.
- TFI->processFunctionBeforeFrameFinalized(Fn);
+ TFI->processFunctionBeforeFrameFinalized(Fn, RS);
// Calculate actual frame offsets for all abstract stack objects...
calculateFrameObjectOffsets(Fn);
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 3053119..0b6dc68 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -34,7 +34,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <cstdlib>
#include <queue>
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 6344a73..6d84176 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -41,7 +41,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
#include <queue>
using namespace llvm;
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index e2488ad..d85646d 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -37,7 +37,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 45b4f68..07e5b47 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -329,8 +329,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n";
dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
dbgs() << " Latency : " << Latency << "\n";
- dbgs() << " Depth : " << Depth << "\n";
- dbgs() << " Height : " << Height << "\n";
+ dbgs() << " Depth : " << getDepth() << "\n";
+ dbgs() << " Height : " << getHeight() << "\n";
if (Preds.size() != 0) {
dbgs() << " Predecessors:\n";
@@ -367,6 +367,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
if (I->isArtificial())
dbgs() << " *";
dbgs() << ": Latency=" << I->getLatency();
+ if (I->isAssignedRegDep())
+ dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
dbgs() << "\n";
}
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ec52d7e..61603e1 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4496,8 +4496,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
NegOne, DAG.getConstant(0, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode()) return SCC;
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
+ if (!VT.isVector() && (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))))
return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
DAG.getSetCC(N->getDebugLoc(),
TLI.getSetCCResultType(VT),
@@ -6709,7 +6709,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
// fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
// on the target.
if (N1.getOpcode() == ISD::SETCC &&
- TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) {
+ TLI.isOperationLegalOrCustom(ISD::BR_CC,
+ N1.getOperand(0).getValueType())) {
return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
Chain, N1.getOperand(2),
N1.getOperand(0), N1.getOperand(1), N2);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index ff9b2ba..10e2dc6 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -696,6 +696,13 @@ bool FastISel::SelectCall(const User *I) {
UpdateValueMap(Call, ResultReg);
return true;
}
+ case Intrinsic::expect: {
+ unsigned ResultReg = getRegForValue(Call->getArgOperand(0));
+ if (ResultReg == 0)
+ return false;
+ UpdateValueMap(Call, ResultReg);
+ return true;
+ }
}
// Usually, it does not make sense to initialize a value,
@@ -822,7 +829,7 @@ FastISel::SelectInstruction(const Instruction *I) {
}
// First, try doing target-independent selection.
- if (!SkipTargetIndependentFastISel() && SelectOperator(I, I->getOpcode())) {
+ if (SelectOperator(I, I->getOpcode())) {
++NumFastIselSuccessIndependent;
DL = DebugLoc();
return true;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f085e44..51cc254 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3632,8 +3632,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Otherwise, SETCC for the given comparison type must be completely
// illegal; expand it into a SELECT_CC.
EVT VT = Node->getValueType(0);
+ int TrueValue;
+ switch (TLI.getBooleanContents(VT.isVector())) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ TrueValue = 1;
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ TrueValue = -1;
+ break;
+ }
Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
- DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3);
+ DAG.getConstant(TrueValue, VT), DAG.getConstant(0, VT),
+ Tmp3);
Results.push_back(Tmp1);
break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 1ee2192..6a05cf8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -29,11 +29,13 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
RTLIB::Libcall Call_F32,
RTLIB::Libcall Call_F64,
RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128) {
return
VT == MVT::f32 ? Call_F32 :
VT == MVT::f64 ? Call_F64 :
VT == MVT::f80 ? Call_F80 :
+ VT == MVT::f128 ? Call_F128 :
VT == MVT::ppcf128 ? Call_PPCF128 :
RTLIB::UNKNOWN_LIBCALL;
}
@@ -156,6 +158,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
RTLIB::ADD_F32,
RTLIB::ADD_F64,
RTLIB::ADD_F80,
+ RTLIB::ADD_F128,
RTLIB::ADD_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -167,6 +170,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
RTLIB::CEIL_F32,
RTLIB::CEIL_F64,
RTLIB::CEIL_F80,
+ RTLIB::CEIL_F128,
RTLIB::CEIL_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -220,6 +224,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
RTLIB::COS_F32,
RTLIB::COS_F64,
RTLIB::COS_F80,
+ RTLIB::COS_F128,
RTLIB::COS_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -232,6 +237,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
RTLIB::DIV_F32,
RTLIB::DIV_F64,
RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -243,6 +249,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
RTLIB::EXP_F32,
RTLIB::EXP_F64,
RTLIB::EXP_F80,
+ RTLIB::EXP_F128,
RTLIB::EXP_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -254,6 +261,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
RTLIB::EXP2_F32,
RTLIB::EXP2_F64,
RTLIB::EXP2_F80,
+ RTLIB::EXP2_F128,
RTLIB::EXP2_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -265,6 +273,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
RTLIB::FLOOR_F32,
RTLIB::FLOOR_F64,
RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_F128,
RTLIB::FLOOR_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -276,6 +285,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
RTLIB::LOG_F32,
RTLIB::LOG_F64,
RTLIB::LOG_F80,
+ RTLIB::LOG_F128,
RTLIB::LOG_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -287,6 +297,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
RTLIB::LOG2_F32,
RTLIB::LOG2_F64,
RTLIB::LOG2_F80,
+ RTLIB::LOG2_F128,
RTLIB::LOG2_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -298,6 +309,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
RTLIB::LOG10_F32,
RTLIB::LOG10_F64,
RTLIB::LOG10_F80,
+ RTLIB::LOG10_F128,
RTLIB::LOG10_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -311,6 +323,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
RTLIB::FMA_F32,
RTLIB::FMA_F64,
RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
NVT, Ops, 3, false, N->getDebugLoc());
}
@@ -323,6 +336,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
RTLIB::MUL_F32,
RTLIB::MUL_F64,
RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -334,6 +348,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
RTLIB::NEARBYINT_F32,
RTLIB::NEARBYINT_F64,
RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -347,6 +362,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -384,6 +400,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
RTLIB::POW_F32,
RTLIB::POW_F64,
RTLIB::POW_F80,
+ RTLIB::POW_F128,
RTLIB::POW_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -397,6 +414,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
RTLIB::POWI_F32,
RTLIB::POWI_F64,
RTLIB::POWI_F80,
+ RTLIB::POWI_F128,
RTLIB::POWI_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -409,6 +427,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
RTLIB::REM_F32,
RTLIB::REM_F64,
RTLIB::REM_F80,
+ RTLIB::REM_F128,
RTLIB::REM_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -420,6 +439,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
RTLIB::RINT_F32,
RTLIB::RINT_F64,
RTLIB::RINT_F80,
+ RTLIB::RINT_F128,
RTLIB::RINT_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -431,6 +451,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
RTLIB::SIN_F32,
RTLIB::SIN_F64,
RTLIB::SIN_F80,
+ RTLIB::SIN_F128,
RTLIB::SIN_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -442,6 +463,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
RTLIB::SQRT_F32,
RTLIB::SQRT_F64,
RTLIB::SQRT_F80,
+ RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -454,6 +476,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -465,6 +488,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
RTLIB::TRUNC_F32,
RTLIB::TRUNC_F64,
RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_F128,
RTLIB::TRUNC_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -839,7 +863,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::ADD_F32, RTLIB::ADD_F64,
- RTLIB::ADD_F80, RTLIB::ADD_PPCF128),
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -848,7 +873,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::CEIL_F32, RTLIB::CEIL_F64,
- RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128),
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -859,6 +885,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
RTLIB::COPYSIGN_F32,
RTLIB::COPYSIGN_F64,
RTLIB::COPYSIGN_F80,
+ RTLIB::COPYSIGN_F128,
RTLIB::COPYSIGN_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
@@ -868,7 +895,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::COS_F32, RTLIB::COS_F64,
- RTLIB::COS_F80, RTLIB::COS_PPCF128),
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -880,6 +908,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
RTLIB::DIV_F32,
RTLIB::DIV_F64,
RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
N->getValueType(0), Ops, 2, false,
N->getDebugLoc());
@@ -890,7 +919,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::EXP_F32, RTLIB::EXP_F64,
- RTLIB::EXP_F80, RTLIB::EXP_PPCF128),
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -899,7 +929,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::EXP2_F32, RTLIB::EXP2_F64,
- RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128),
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -907,8 +938,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::FLOOR_F32,RTLIB::FLOOR_F64,
- RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128),
+ RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -917,7 +949,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::LOG_F32, RTLIB::LOG_F64,
- RTLIB::LOG_F80, RTLIB::LOG_PPCF128),
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -926,7 +959,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::LOG2_F32, RTLIB::LOG2_F64,
- RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128),
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -934,8 +968,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::LOG10_F32,RTLIB::LOG10_F64,
- RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128),
+ RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -947,6 +982,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
RTLIB::FMA_F32,
RTLIB::FMA_F64,
RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
N->getValueType(0), Ops, 3, false,
N->getDebugLoc());
@@ -960,6 +996,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
RTLIB::MUL_F32,
RTLIB::MUL_F64,
RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
N->getValueType(0), Ops, 2, false,
N->getDebugLoc());
@@ -972,6 +1009,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
RTLIB::NEARBYINT_F32,
RTLIB::NEARBYINT_F64,
RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
@@ -997,7 +1035,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::POW_F32, RTLIB::POW_F64,
- RTLIB::POW_F80, RTLIB::POW_PPCF128),
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -1006,7 +1045,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::POWI_F32, RTLIB::POWI_F64,
- RTLIB::POWI_F80, RTLIB::POWI_PPCF128),
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -1015,7 +1055,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::RINT_F32, RTLIB::RINT_F64,
- RTLIB::RINT_F80, RTLIB::RINT_PPCF128),
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -1024,7 +1065,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::SIN_F32, RTLIB::SIN_F64,
- RTLIB::SIN_F80, RTLIB::SIN_PPCF128),
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -1033,7 +1075,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::SQRT_F32, RTLIB::SQRT_F64,
- RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128),
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -1045,6 +1088,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
N->getValueType(0), Ops, 2, false,
N->getDebugLoc());
@@ -1055,7 +1099,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
- RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128),
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 182b7f3..beeb6b3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -531,9 +531,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
- return DAG.getNode(ISD::SHL, N->getDebugLoc(),
- TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
- GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
+ SDValue Res = GetPromotedInteger(N->getOperand(0));
+ SDValue Amt = N->getOperand(1);
+ Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), Res.getValueType(), Res, Amt);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
@@ -555,16 +556,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
// The input value must be properly sign extended.
SDValue Res = SExtPromotedInteger(N->getOperand(0));
- return DAG.getNode(ISD::SRA, N->getDebugLoc(),
- Res.getValueType(), Res, N->getOperand(1));
+ SDValue Amt = N->getOperand(1);
+ Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), Res.getValueType(), Res, Amt);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
// The input value must be properly zero extended.
- EVT VT = N->getValueType(0);
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Res = ZExtPromotedInteger(N->getOperand(0));
- return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1));
+ SDValue Amt = N->getOperand(1);
+ Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), Res.getValueType(), Res, Amt);
}
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
@@ -2101,8 +2103,9 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
// have an illegal type. Fix that first by casting the operand, otherwise
// the new SHL_PARTS operation would need further legalization.
SDValue ShiftOp = N->getOperand(1);
- MVT ShiftTy = TLI.getShiftAmountTy(VT);
- assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(VT.getSizeInBits()) &&
+ EVT ShiftTy = TLI.getShiftAmountTy(VT);
+ assert(ShiftTy.getScalarType().getSizeInBits() >=
+ Log2_32_Ceil(VT.getScalarType().getSizeInBits()) &&
"ShiftAmountTy is too small to cover the range of this type!");
if (ShiftOp.getValueType() != ShiftTy)
ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 7de42ea..27b3cf2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -530,6 +530,7 @@ private:
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_BITCAST(SDNode *N);
+ SDValue ScalarizeVecOp_EXTEND(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 09a50d9..5ec8535 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -365,6 +365,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::BITCAST:
Res = ScalarizeVecOp_BITCAST(N);
break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ Res = ScalarizeVecOp_EXTEND(N);
+ break;
case ISD::CONCAT_VECTORS:
Res = ScalarizeVecOp_CONCAT_VECTORS(N);
break;
@@ -400,6 +405,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
N->getValueType(0), Elt);
}
+/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs
+/// to be scalarized, it must be <1 x ty>. Extend the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SmallVector<SDValue, 1> Ops(1);
+ Ops[0] = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ N->getValueType(0).getScalarType(), Elt);
+ // Revectorize the result so the types line up with what the uses of this
+ // expression expect.
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+ &Ops[0], 1);
+}
+
/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
/// use a BUILD_VECTOR instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index addfccb..c009cfc 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -904,9 +904,6 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
SUnit *OldSU = Sequence.back();
while (true) {
Sequence.pop_back();
- if (SU->isSucc(OldSU))
- // Don't try to remove SU from AvailableQueue.
- SU->isAvailable = false;
// FIXME: use ready cycle instead of height
CurCycle = OldSU->getHeight();
UnscheduleNodeBottomUp(OldSU);
@@ -1363,8 +1360,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
SmallVector<unsigned, 4> LRegs;
if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
break;
- DEBUG(dbgs() << " Interfering reg " << TRI->getName(LRegs[0])
- << " SU #" << CurSU->NodeNum << '\n');
+ DEBUG(dbgs() << " Interfering reg " <<
+ (LRegs[0] == TRI->getNumRegs() ? "CallResource"
+ : TRI->getName(LRegs[0]))
+ << " SU #" << CurSU->NodeNum << '\n');
std::pair<LRegsMapT::iterator, bool> LRegsPair =
LRegsMap.insert(std::make_pair(CurSU, LRegs));
if (LRegsPair.second) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index db8ae6e..35707e8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1518,7 +1518,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
/// the target's desired shift amount type.
SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
EVT OpTy = Op.getValueType();
- MVT ShTy = TLI.getShiftAmountTy(LHSTy);
+ EVT ShTy = TLI.getShiftAmountTy(LHSTy);
if (OpTy == ShTy || OpTy.isVector()) return Op;
ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -2912,6 +2912,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1,
"Shift operators return type must be the same as their first arg");
assert(VT.isInteger() && N2.getValueType().isInteger() &&
"Shifts only work on integers");
+ assert((!VT.isVector() || VT == N2.getValueType()) &&
+ "Vector shift amounts must be in the same as their first arg");
// Verify that the shift amount VT is bit enough to hold valid shift
// amounts. This catches things like trying to shift an i1024 value by an
// i8, which is easy to fall into in generic code that uses
@@ -4702,7 +4704,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
- const std::vector<EVT> &ResultTys,
+ ArrayRef<EVT> ResultTys,
const SDValue *Ops, unsigned NumOps) {
return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
Ops, NumOps);
@@ -5354,7 +5356,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
- const std::vector<EVT> &ResultTys,
+ ArrayRef<EVT> ResultTys,
const SDValue *Ops, unsigned NumOps) {
SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index b8ab2a9..33d100e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2654,7 +2654,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
+ EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
// Coerce the shift amount to the right type if we can.
if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
@@ -6161,6 +6161,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:"
" don't know how to handle tied "
"indirect register inputs");
+ report_fatal_error("Cannot handle indirect register inputs!");
}
RegsForValue MatchedRegs;
@@ -6589,9 +6590,7 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
return true;
}
-void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
- // If this is the entry block, emit arguments.
- const Function &F = *LLVMBB->getParent();
+void SelectionDAGISel::LowerArguments(const Function &F) {
SelectionDAG &DAG = SDB->DAG;
DebugLoc dl = SDB->getCurDebugLoc();
const DataLayout *TD = TLI.getDataLayout();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index acae58c..c3b6276 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -362,6 +362,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TargetSubtargetInfo &ST =
const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>());
ST.resetSubtargetFeatures(MF);
+ TM.resetTargetOptions(MF);
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
@@ -1031,13 +1032,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FuncInfo->VisitedBBs.insert(LLVMBB);
}
- FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
- FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
-
BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
BasicBlock::const_iterator const End = LLVMBB->end();
BasicBlock::const_iterator BI = End;
+ FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
// Setup an EH landing-pad block.
@@ -1053,15 +1052,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (LLVMBB == &Fn.getEntryBlock()) {
// Lower any arguments needed in this block if this is the entry block.
if (!FastIS->LowerArguments()) {
-
+ // Fast isel failed to lower these arguments
if (EnableFastISelAbortArgs)
- // The "fast" selector couldn't lower these arguments. For the
- // purpose of debugging, just abort.
llvm_unreachable("FastISel didn't lower all arguments");
- // Call target indepedent SDISel argument lowering code if the target
- // specific routine is not successful.
- LowerArguments(LLVMBB);
+ // Use SelectionDAG argument lowering
+ LowerArguments(Fn);
CurDAG->setRoot(SDB->getControlRoot());
SDB->clear();
CodeGenAndEmitDAG();
@@ -1179,7 +1175,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
} else {
// Lower any arguments needed in this block if this is the entry block.
if (LLVMBB == &Fn.getEntryBlock())
- LowerArguments(LLVMBB);
+ LowerArguments(Fn);
}
if (Begin != BI)
@@ -1674,9 +1670,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
std::vector<SDValue> Ops(N->op_begin(), N->op_end());
SelectInlineAsmMemoryOperands(Ops);
- std::vector<EVT> VTs;
- VTs.push_back(MVT::Other);
- VTs.push_back(MVT::Glue);
+ EVT VTs[] = { MVT::Other, MVT::Glue };
SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
VTs, &Ops[0], Ops.size());
New->setNodeId(-1);
@@ -2610,11 +2604,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
SDValue Imm = RecordedNodes[RecNo].first;
if (Imm->getOpcode() == ISD::Constant) {
- int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
- Imm = CurDAG->getTargetConstant(Val, Imm.getValueType());
+ const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue();
+ Imm = CurDAG->getConstant(*Val, Imm.getValueType(), true);
} else if (Imm->getOpcode() == ISD::ConstantFP) {
const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
- Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
+ Imm = CurDAG->getConstantFP(*Val, Imm.getValueType(), true);
}
RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index b58bb85..3903743 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -379,13 +379,22 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
/// the function context and marking the call sites with the appropriate
/// values. These values are used by the DWARF EH emitter.
bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
- SmallVector<ReturnInst*, 16> Returns;
- SmallVector<InvokeInst*, 16> Invokes;
+ SmallVector<ReturnInst*, 16> Returns;
+ SmallVector<InvokeInst*, 16> Invokes;
SmallSetVector<LandingPadInst*, 16> LPads;
// Look through the terminators of the basic blocks to find invokes.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ if (Function *Callee = II->getCalledFunction())
+ if (Callee->isIntrinsic() &&
+ Callee->getIntrinsicID() == Intrinsic::donothing) {
+ // Remove the NOP invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->eraseFromParent();
+ continue;
+ }
+
Invokes.push_back(II);
LPads.insert(II->getUnwindDest()->getLandingPadInst());
} else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index f3be37c..fbef347 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -31,7 +31,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
using namespace llvm;
STATISTIC(NumFunProtected, "Number of functions protected");
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 2a02f6a..3c34676 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -744,10 +744,17 @@ TargetLoweringBase::~TargetLoweringBase() {
delete &TLOF;
}
-MVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
+MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
return MVT::getIntegerVT(8*TD->getPointerSize(0));
}
+EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
+ assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
+ if (LHSTy.isVector())
+ return LHSTy;
+ return getScalarShiftAmountTy(LHSTy);
+}
+
/// canOpTrap - Returns true if the operation can trap for the value type.
/// VT must be a legal type.
bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
@@ -905,6 +912,15 @@ void TargetLoweringBase::computeRegisterProperties() {
ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
}
+ // Decide how to handle f128. If the target does not have native f128 support,
+ // expand it to i128 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f128)) {
+ NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128];
+ RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128];
+ TransformToType[MVT::f128] = MVT::i128;
+ ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
+ }
+
// Decide how to handle f64. If the target does not have native f64 support,
// expand it to i64 and we will be generating soft float library calls.
if (!isTypeLegal(MVT::f64)) {
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 1170bf2..3bdca4c 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -36,7 +36,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
using namespace llvm;
using namespace dwarf;
@@ -743,8 +742,11 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
return ".text$";
if (Kind.isBSS ())
return ".bss$";
- if (Kind.isThreadLocal())
- return ".tls$";
+ if (Kind.isThreadLocal()) {
+ // 'LLVM' is just an arbitary string to ensure that the section name gets
+ // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker.
+ return ".tls$LLVM";
+ }
if (Kind.isWriteable())
return ".data$";
return ".rdata$";
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index f31f67d..783bfa1 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -240,7 +240,10 @@ unsigned TargetSchedModel::computeOperandLatency(
report_fatal_error(ss.str());
}
#endif
- return DefMI->isTransient() ? 0 : 1;
+ // FIXME: Automatically giving all implicit defs defaultDefLatency is
+ // undesirable. We should only do it for defs that are known to the MC
+ // desc like flags. Truly implicit defs should get 1 cycle latency.
+ return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI);
}
unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 26c5fe4..e6dfe10 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -47,7 +47,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index 9954a29..fb591a8 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -2094,7 +2094,7 @@ static void WriteMDNodeComment(const MDNode *Node,
return;
DIDescriptor Desc(Node);
- if (Desc.getVersion() < LLVMDebugVersion11)
+ if (!Desc.Verify())
return;
unsigned Tag = Desc.getTag();
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index cb2c55c..ad2670d 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -146,7 +146,7 @@ public:
}
static void Profile(FoldingSetNodeID &ID, StringRef Kind, StringRef Values) {
ID.AddString(Kind);
- ID.AddString(Values);
+ if (!Values.empty()) ID.AddString(Values);
}
// FIXME: Remove this!
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index 6eb51f0..2d82891 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -645,9 +645,17 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<AttributeSet> Attrs) {
AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
Attribute::AttrKind Attr) const {
+ if (hasAttribute(Idx, Attr)) return *this;
return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
}
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+ StringRef Kind) const {
+ llvm::AttrBuilder B;
+ B.addAttribute(Kind);
+ return addAttributes(C, Idx, AttributeSet::get(C, Idx, B));
+}
+
AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
AttributeSet Attrs) const {
if (!pImpl) return Attrs;
@@ -699,6 +707,7 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Idx,
Attribute::AttrKind Attr) const {
+ if (!hasAttribute(Idx, Attr)) return *this;
return removeAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
}
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index a5a9d9f..bf93d4f 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -1971,21 +1971,30 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
}
}
- // Implement folding of:
- // i32* getelementptr ([2 x i32]* bitcast ([3 x i32]* %X to [2 x i32]*),
- // i64 0, i64 0)
- // To: i32* getelementptr ([3 x i32]* %X, i64 0, i64 0)
+ // Attempt to fold casts to the same type away. For example, folding:
//
+ // i32* getelementptr ([2 x i32]* bitcast ([3 x i32]* %X to [2 x i32]*),
+ // i64 0, i64 0)
+ // into:
+ //
+ // i32* getelementptr ([3 x i32]* %X, i64 0, i64 0)
+ //
+ // Don't fold if the cast is changing address spaces.
if (CE->isCast() && Idxs.size() > 1 && Idx0->isNullValue()) {
- if (PointerType *SPT =
- dyn_cast<PointerType>(CE->getOperand(0)->getType()))
- if (ArrayType *SAT = dyn_cast<ArrayType>(SPT->getElementType()))
- if (ArrayType *CAT =
- dyn_cast<ArrayType>(cast<PointerType>(C->getType())->getElementType()))
- if (CAT->getElementType() == SAT->getElementType())
- return
- ConstantExpr::getGetElementPtr((Constant*)CE->getOperand(0),
- Idxs, inBounds);
+ PointerType *SrcPtrTy =
+ dyn_cast<PointerType>(CE->getOperand(0)->getType());
+ PointerType *DstPtrTy = dyn_cast<PointerType>(CE->getType());
+ if (SrcPtrTy && DstPtrTy) {
+ ArrayType *SrcArrayTy =
+ dyn_cast<ArrayType>(SrcPtrTy->getElementType());
+ ArrayType *DstArrayTy =
+ dyn_cast<ArrayType>(DstPtrTy->getElementType());
+ if (SrcArrayTy && DstArrayTy
+ && SrcArrayTy->getElementType() == DstArrayTy->getElementType()
+ && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace())
+ return ConstantExpr::getGetElementPtr((Constant*)CE->getOperand(0),
+ Idxs, inBounds);
+ }
}
}
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 8093a09..70f7e01 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -47,6 +47,19 @@ bool Constant::isNegativeZeroValue() const {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
return CFP->isZero() && CFP->isNegative();
+ // Equivalent for a vector of -0.0's.
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+ if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue()))
+ if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative())
+ return true;
+
+ // However, vectors of zeroes which are floating point represent +0.0's.
+ if (const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(this))
+ if (const VectorType *VT = dyn_cast<VectorType>(CAZ->getType()))
+ if (VT->getElementType()->isFloatingPointTy())
+ // As it's a CAZ, we know it's the zero bit-pattern (ie, +0.0) in each element.
+ return false;
+
// Otherwise, just use +0.0.
return isNullValue();
}
@@ -1416,9 +1429,8 @@ static inline Constant *getFoldedCast(
LLVMContextImpl *pImpl = Ty->getContext().pImpl;
- // Look up the constant in the table first to ensure uniqueness
- std::vector<Constant*> argVec(1, C);
- ExprMapKeyType Key(opc, argVec);
+ // Look up the constant in the table first to ensure uniqueness.
+ ExprMapKeyType Key(opc, C);
return pImpl->ExprConstants.getOrCreate(Ty, Key);
}
@@ -1715,9 +1727,8 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
return FC; // Fold a few common cases.
- std::vector<Constant*> argVec(1, C1);
- argVec.push_back(C2);
- ExprMapKeyType Key(Opcode, argVec, 0, Flags);
+ Constant *ArgVec[] = { C1, C2 };
+ ExprMapKeyType Key(Opcode, ArgVec, 0, Flags);
LLVMContextImpl *pImpl = C1->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(C1->getType(), Key);
@@ -1793,10 +1804,8 @@ Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) {
if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
return SC; // Fold common cases
- std::vector<Constant*> argVec(3, C);
- argVec[1] = V1;
- argVec[2] = V2;
- ExprMapKeyType Key(Instruction::Select, argVec);
+ Constant *ArgVec[] = { C, V1, V2 };
+ ExprMapKeyType Key(Instruction::Select, ArgVec);
LLVMContextImpl *pImpl = C->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
@@ -1848,9 +1857,7 @@ ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
return FC; // Fold a few common cases...
// Look up the constant in the table first to ensure uniqueness
- std::vector<Constant*> ArgVec;
- ArgVec.push_back(LHS);
- ArgVec.push_back(RHS);
+ Constant *ArgVec[] = { LHS, RHS };
// Get the key type with both the opcode and predicate
const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
@@ -1871,9 +1878,7 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
return FC; // Fold a few common cases...
// Look up the constant in the table first to ensure uniqueness
- std::vector<Constant*> ArgVec;
- ArgVec.push_back(LHS);
- ArgVec.push_back(RHS);
+ Constant *ArgVec[] = { LHS, RHS };
// Get the key type with both the opcode and predicate
const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
@@ -1895,9 +1900,8 @@ Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
return FC; // Fold a few common cases.
// Look up the constant in the table first to ensure uniqueness
- std::vector<Constant*> ArgVec(1, Val);
- ArgVec.push_back(Idx);
- const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
+ Constant *ArgVec[] = { Val, Idx };
+ const ExprMapKeyType Key(Instruction::ExtractElement, ArgVec);
LLVMContextImpl *pImpl = Val->getContext().pImpl;
Type *ReqTy = Val->getType()->getVectorElementType();
@@ -1916,10 +1920,8 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
return FC; // Fold a few common cases.
// Look up the constant in the table first to ensure uniqueness
- std::vector<Constant*> ArgVec(1, Val);
- ArgVec.push_back(Elt);
- ArgVec.push_back(Idx);
- const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
+ Constant *ArgVec[] = { Val, Elt, Idx };
+ const ExprMapKeyType Key(Instruction::InsertElement, ArgVec);
LLVMContextImpl *pImpl = Val->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(Val->getType(), Key);
@@ -1938,10 +1940,8 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
Type *ShufTy = VectorType::get(EltTy, NElts);
// Look up the constant in the table first to ensure uniqueness
- std::vector<Constant*> ArgVec(1, V1);
- ArgVec.push_back(V2);
- ArgVec.push_back(Mask);
- const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
+ Constant *ArgVec[] = { V1, V2, Mask };
+ const ExprMapKeyType Key(Instruction::ShuffleVector, ArgVec);
LLVMContextImpl *pImpl = ShufTy->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(ShufTy, Key);
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index f31e531..34921af 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -95,11 +95,8 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
Constant::getNullValue(Type::getInt32Ty(VMContext)),
ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
- MDString::get(VMContext, Filename),
- MDString::get(VMContext, Directory),
+ createFile(Filename, Directory),
MDString::get(VMContext, Producer),
- // isMain field can be removed when we remove the legacy debug info.
- ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain
ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
MDString::get(VMContext, Flags),
ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer),
@@ -119,13 +116,14 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
/// createFile - Create a file descriptor to hold debugging information
/// for a file.
DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
- assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit");
assert(!Filename.empty() && "Unable to create file without name");
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
+ Value *Pair[] = {
MDString::get(VMContext, Filename),
MDString::get(VMContext, Directory),
- NULL // TheCU
+ };
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
+ MDNode::get(VMContext, Pair)
};
return DIFile(MDNode::get(VMContext, Elts));
}
@@ -487,7 +485,9 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
DIType DerivedFrom, DIArray Elements,
MDNode *VTableHolder,
MDNode *TemplateParams) {
- // TAG_class_type is encoded in DICompositeType format.
+ assert((!Context || Context.Verify()) &&
+ "createClassType should be called with a valid Context");
+ // TAG_class_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
getNonCompileUnitScope(Context),
@@ -504,7 +504,9 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
VTableHolder,
TemplateParams
};
- return DIType(MDNode::get(VMContext, Elts));
+ DIType R(MDNode::get(VMContext, Elts));
+ assert(R.Verify() && "createClassType should return a verifiable DIType");
+ return R;
}
/// createStructType - Create debugging information entry for a struct.
@@ -534,7 +536,9 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context,
VTableHolder,
NULL,
};
- return DICompositeType(MDNode::get(VMContext, Elts));
+ DICompositeType R(MDNode::get(VMContext, Elts));
+ assert(R.Verify() && "createStructType should return a verifiable DIType");
+ return R;
}
/// createUnionType - Create debugging information entry for an union.
@@ -766,6 +770,8 @@ DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name,
ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
};
MDNode *Node = MDNode::getTemporary(VMContext, Elts);
+ assert(DIType(Node).Verify() &&
+ "createForwardDecl result should be verifiable");
return DIType(Node);
}
@@ -846,6 +852,11 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
unsigned LineNo, DIType Ty,
bool AlwaysPreserve, unsigned Flags,
unsigned ArgNo) {
+ DIDescriptor Context(getNonCompileUnitScope(Scope));
+ assert((!Context || Context.Verify()) &&
+ "createLocalVariable should be called with a valid Context");
+ assert(Ty.Verify() &&
+ "createLocalVariable should be called with a valid type");
Value *Elts[] = {
GetTagConstant(VMContext, Tag),
getNonCompileUnitScope(Scope),
@@ -865,6 +876,8 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, Fn);
FnLocals->addOperand(Node);
}
+ assert(DIVariable(Node).Verify() &&
+ "createLocalVariable should return a verifiable DIVariable");
return DIVariable(Node);
}
@@ -990,7 +1003,10 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
};
- return DINameSpace(MDNode::get(VMContext, Elts));
+ DINameSpace R(MDNode::get(VMContext, Elts));
+ assert(R.Verify() &&
+ "createNameSpace should return a verifiable DINameSpace");
+ return R;
}
/// createLexicalBlockFile - This creates a new MDNode that encapsulates
@@ -1002,7 +1018,11 @@ DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
Scope,
File
};
- return DILexicalBlockFile(MDNode::get(VMContext, Elts));
+ DILexicalBlockFile R(MDNode::get(VMContext, Elts));
+ assert(
+ R.Verify() &&
+ "createLexicalBlockFile should return a verifiable DILexicalBlockFile");
+ return R;
}
DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
@@ -1017,7 +1037,10 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
File,
ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
};
- return DILexicalBlock(MDNode::get(VMContext, Elts));
+ DILexicalBlock R(MDNode::get(VMContext, Elts));
+ assert(R.Verify() &&
+ "createLexicalBlock should return a verifiable DILexicalBlock");
+ return R;
}
/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index f09de3a..4100c4f 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -438,6 +438,12 @@ DataLayout::~DataLayout() {
delete static_cast<StructLayoutMap*>(LayoutMap);
}
+bool DataLayout::doFinalization(Module &M) {
+ delete static_cast<StructLayoutMap*>(LayoutMap);
+ LayoutMap = 0;
+ return false;
+}
+
const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
if (!LayoutMap)
LayoutMap = new StructLayoutMap();
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index a59fdcd..1a5454e 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -51,18 +51,43 @@ DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) {
DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) {
}
-StringRef
-DIDescriptor::getStringField(unsigned Elt) const {
- if (DbgNode == 0)
- return StringRef();
+bool DIDescriptor::Verify() const {
+ return DbgNode &&
+ (DIDerivedType(DbgNode).Verify() ||
+ DICompositeType(DbgNode).Verify() || DIBasicType(DbgNode).Verify() ||
+ DIVariable(DbgNode).Verify() || DISubprogram(DbgNode).Verify() ||
+ DIGlobalVariable(DbgNode).Verify() || DIFile(DbgNode).Verify() ||
+ DICompileUnit(DbgNode).Verify() || DINameSpace(DbgNode).Verify() ||
+ DILexicalBlock(DbgNode).Verify() ||
+ DILexicalBlockFile(DbgNode).Verify() ||
+ DISubrange(DbgNode).Verify() || DIEnumerator(DbgNode).Verify() ||
+ DIObjCProperty(DbgNode).Verify() ||
+ DITemplateTypeParameter(DbgNode).Verify() ||
+ DITemplateValueParameter(DbgNode).Verify());
+}
+
+static Value *getField(const MDNode *DbgNode, unsigned Elt) {
+ if (DbgNode == 0 || Elt >= DbgNode->getNumOperands())
+ return 0;
+ return DbgNode->getOperand(Elt);
+}
- if (Elt < DbgNode->getNumOperands())
- if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getOperand(Elt)))
- return MDS->getString();
+static const MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
+ if (const MDNode *R = dyn_cast_or_null<MDNode>(getField(DbgNode, Elt)))
+ return R;
+ return 0;
+}
+static StringRef getStringField(const MDNode *DbgNode, unsigned Elt) {
+ if (MDString *MDS = dyn_cast_or_null<MDString>(getField(DbgNode, Elt)))
+ return MDS->getString();
return StringRef();
}
+StringRef DIDescriptor::getStringField(unsigned Elt) const {
+ return ::getStringField(DbgNode, Elt);
+}
+
uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
if (DbgNode == 0)
return 0;
@@ -135,17 +160,11 @@ void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) {
}
unsigned DIVariable::getNumAddrElements() const {
- if (getVersion() <= LLVMDebugVersion8)
- return DbgNode->getNumOperands()-6;
- if (getVersion() == LLVMDebugVersion9)
- return DbgNode->getNumOperands()-7;
return DbgNode->getNumOperands()-8;
}
/// getInlinedAt - If this variable is inlined then return inline location.
MDNode *DIVariable::getInlinedAt() const {
- if (getVersion() <= LLVMDebugVersion9)
- return NULL;
return dyn_cast_or_null<MDNode>(DbgNode->getOperand(7));
}
@@ -392,31 +411,30 @@ bool DIType::isUnsignedDIType() {
/// Verify - Verify that a compile unit is well formed.
bool DICompileUnit::Verify() const {
- if (!DbgNode)
+ if (!isCompileUnit())
return false;
StringRef N = getFilename();
if (N.empty())
return false;
// It is possible that directory and produce string is empty.
- return true;
+ return DbgNode->getNumOperands() == 13;
}
/// Verify - Verify that an ObjC property is well formed.
bool DIObjCProperty::Verify() const {
- if (!DbgNode)
+ if (!isObjCProperty())
return false;
- unsigned Tag = getTag();
- if (Tag != dwarf::DW_TAG_APPLE_property) return false;
+
DIType Ty = getType();
if (!Ty.Verify()) return false;
// Don't worry about the rest of the strings for now.
- return true;
+ return DbgNode->getNumOperands() == 8;
}
/// Verify - Verify that a type descriptor is well formed.
bool DIType::Verify() const {
- if (!DbgNode)
+ if (!isType())
return false;
if (getContext() && !getContext().Verify())
return false;
@@ -437,27 +455,28 @@ bool DIType::Verify() const {
/// Verify - Verify that a basic type descriptor is well formed.
bool DIBasicType::Verify() const {
- return isBasicType();
+ return isBasicType() && DbgNode->getNumOperands() == 10;
}
/// Verify - Verify that a derived type descriptor is well formed.
bool DIDerivedType::Verify() const {
- return isDerivedType();
+ return isDerivedType() && DbgNode->getNumOperands() >= 10 &&
+ DbgNode->getNumOperands() <= 14;
}
/// Verify - Verify that a composite type descriptor is well formed.
bool DICompositeType::Verify() const {
- if (!DbgNode)
+ if (!isCompositeType())
return false;
if (getContext() && !getContext().Verify())
return false;
- return true;
+ return DbgNode->getNumOperands() >= 10 && DbgNode->getNumOperands() <= 14;
}
/// Verify - Verify that a subprogram descriptor is well formed.
bool DISubprogram::Verify() const {
- if (!DbgNode)
+ if (!isSubprogram())
return false;
if (getContext() && !getContext().Verify())
@@ -466,12 +485,12 @@ bool DISubprogram::Verify() const {
DICompositeType Ty = getType();
if (!Ty.Verify())
return false;
- return true;
+ return DbgNode->getNumOperands() == 21;
}
/// Verify - Verify that a global variable descriptor is well formed.
bool DIGlobalVariable::Verify() const {
- if (!DbgNode)
+ if (!isGlobalVariable())
return false;
if (getDisplayName().empty())
@@ -487,12 +506,12 @@ bool DIGlobalVariable::Verify() const {
if (!getGlobal() && !getConstant())
return false;
- return true;
+ return DbgNode->getNumOperands() == 13;
}
/// Verify - Verify that a variable descriptor is well formed.
bool DIVariable::Verify() const {
- if (!DbgNode)
+ if (!isVariable())
return false;
if (getContext() && !getContext().Verify())
@@ -502,7 +521,7 @@ bool DIVariable::Verify() const {
if (!Ty.Verify())
return false;
- return true;
+ return DbgNode->getNumOperands() >= 8;
}
/// Verify - Verify that a location descriptor is well formed.
@@ -515,11 +534,44 @@ bool DILocation::Verify() const {
/// Verify - Verify that a namespace descriptor is well formed.
bool DINameSpace::Verify() const {
- if (!DbgNode)
- return false;
- if (getName().empty())
+ if (!isNameSpace())
return false;
- return true;
+ return DbgNode->getNumOperands() == 5;
+}
+
+/// \brief Verify that the file descriptor is well formed.
+bool DIFile::Verify() const {
+ return isFile() && DbgNode->getNumOperands() == 2;
+}
+
+/// \brief Verify that the enumerator descriptor is well formed.
+bool DIEnumerator::Verify() const {
+ return isEnumerator() && DbgNode->getNumOperands() == 3;
+}
+
+/// \brief Verify that the subrange descriptor is well formed.
+bool DISubrange::Verify() const {
+ return isSubrange() && DbgNode->getNumOperands() == 3;
+}
+
+/// \brief Verify that the lexical block descriptor is well formed.
+bool DILexicalBlock::Verify() const {
+ return isLexicalBlock() && DbgNode->getNumOperands() == 6;
+}
+
+/// \brief Verify that the file-scoped lexical block descriptor is well formed.
+bool DILexicalBlockFile::Verify() const {
+ return isLexicalBlockFile() && DbgNode->getNumOperands() == 3;
+}
+
+/// \brief Verify that the template type parameter descriptor is well formed.
+bool DITemplateTypeParameter::Verify() const {
+ return isTemplateTypeParameter() && DbgNode->getNumOperands() == 7;
+}
+
+/// \brief Verify that the template value parameter descriptor is well formed.
+bool DITemplateValueParameter::Verify() const {
+ return isTemplateValueParameter() && DbgNode->getNumOperands() == 8;
}
/// getOriginalTypeSize - If this type is derived from a base type then
@@ -553,7 +605,7 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
/// getObjCProperty - Return property node, if this ivar is associated with one.
MDNode *DIDerivedType::getObjCProperty() const {
- if (getVersion() <= LLVMDebugVersion11 || DbgNode->getNumOperands() <= 10)
+ if (DbgNode->getNumOperands() <= 10)
return NULL;
return dyn_cast_or_null<MDNode>(DbgNode->getOperand(10));
}
@@ -619,9 +671,7 @@ StringRef DIScope::getFilename() const {
return DINameSpace(DbgNode).getFilename();
if (isType())
return DIType(DbgNode).getFilename();
- if (isFile())
- return DIFile(DbgNode).getFilename();
- llvm_unreachable("Invalid DIScope!");
+ return ::getStringField(getNodeField(DbgNode, 1), 0);
}
StringRef DIScope::getDirectory() const {
@@ -639,44 +689,42 @@ StringRef DIScope::getDirectory() const {
return DINameSpace(DbgNode).getDirectory();
if (isType())
return DIType(DbgNode).getDirectory();
- if (isFile())
- return DIFile(DbgNode).getDirectory();
- llvm_unreachable("Invalid DIScope!");
+ return ::getStringField(getNodeField(DbgNode, 1), 1);
}
DIArray DICompileUnit::getEnumTypes() const {
- if (!DbgNode || DbgNode->getNumOperands() < 14)
+ if (!DbgNode || DbgNode->getNumOperands() < 13)
return DIArray();
- if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8)))
return DIArray(N);
return DIArray();
}
DIArray DICompileUnit::getRetainedTypes() const {
- if (!DbgNode || DbgNode->getNumOperands() < 14)
+ if (!DbgNode || DbgNode->getNumOperands() < 13)
return DIArray();
- if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11)))
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9)))
return DIArray(N);
return DIArray();
}
DIArray DICompileUnit::getSubprograms() const {
- if (!DbgNode || DbgNode->getNumOperands() < 14)
+ if (!DbgNode || DbgNode->getNumOperands() < 13)
return DIArray();
- if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(12)))
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
return DIArray(N);
return DIArray();
}
DIArray DICompileUnit::getGlobalVariables() const {
- if (!DbgNode || DbgNode->getNumOperands() < 14)
+ if (!DbgNode || DbgNode->getNumOperands() < 13)
return DIArray();
- if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(13)))
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11)))
return DIArray(N);
return DIArray();
}
@@ -805,71 +853,25 @@ void DebugInfoFinder::processModule(const Module &M) {
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
DICompileUnit CU(CU_Nodes->getOperand(i));
addCompileUnit(CU);
- if (CU.getVersion() > LLVMDebugVersion10) {
- DIArray GVs = CU.getGlobalVariables();
- for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
- DIGlobalVariable DIG(GVs.getElement(i));
- if (addGlobalVariable(DIG))
- processType(DIG.getType());
- }
- DIArray SPs = CU.getSubprograms();
- for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
- processSubprogram(DISubprogram(SPs.getElement(i)));
- DIArray EnumTypes = CU.getEnumTypes();
- for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
- processType(DIType(EnumTypes.getElement(i)));
- DIArray RetainedTypes = CU.getRetainedTypes();
- for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
- processType(DIType(RetainedTypes.getElement(i)));
- return;
+ DIArray GVs = CU.getGlobalVariables();
+ for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
+ DIGlobalVariable DIG(GVs.getElement(i));
+ if (addGlobalVariable(DIG))
+ processType(DIG.getType());
}
+ DIArray SPs = CU.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
+ processSubprogram(DISubprogram(SPs.getElement(i)));
+ DIArray EnumTypes = CU.getEnumTypes();
+ for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
+ processType(DIType(EnumTypes.getElement(i)));
+ DIArray RetainedTypes = CU.getRetainedTypes();
+ for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
+ processType(DIType(RetainedTypes.getElement(i)));
+ // FIXME: We really shouldn't be bailing out after visiting just one CU
+ return;
}
}
-
- for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
- for (Function::const_iterator FI = (*I).begin(), FE = (*I).end();
- FI != FE; ++FI)
- for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
- BI != BE; ++BI) {
- if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
- processDeclare(DDI);
-
- DebugLoc Loc = BI->getDebugLoc();
- if (Loc.isUnknown())
- continue;
-
- LLVMContext &Ctx = BI->getContext();
- DIDescriptor Scope(Loc.getScope(Ctx));
-
- if (Scope.isCompileUnit())
- addCompileUnit(DICompileUnit(Scope));
- else if (Scope.isSubprogram())
- processSubprogram(DISubprogram(Scope));
- else if (Scope.isLexicalBlockFile()) {
- DILexicalBlockFile DBF = DILexicalBlockFile(Scope);
- processLexicalBlock(DILexicalBlock(DBF.getScope()));
- }
- else if (Scope.isLexicalBlock())
- processLexicalBlock(DILexicalBlock(Scope));
-
- if (MDNode *IA = Loc.getInlinedAt(Ctx))
- processLocation(DILocation(IA));
- }
-
- if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i)));
- if (addGlobalVariable(DIG)) {
- if (DIG.getVersion() <= LLVMDebugVersion10)
- addCompileUnit(DIG.getCompileUnit());
- processType(DIG.getType());
- }
- }
- }
-
- if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp"))
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
- processSubprogram(DISubprogram(NMD->getOperand(i)));
}
/// processLocation - Process DILocation.
@@ -893,8 +895,6 @@ void DebugInfoFinder::processLocation(DILocation Loc) {
void DebugInfoFinder::processType(DIType DT) {
if (!addType(DT))
return;
- if (DT.getVersion() <= LLVMDebugVersion10)
- addCompileUnit(DT.getCompileUnit());
if (DT.isCompositeType()) {
DICompositeType DCT(DT);
processType(DCT.getTypeDerivedFrom());
@@ -929,8 +929,6 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
void DebugInfoFinder::processSubprogram(DISubprogram SP) {
if (!addSubprogram(SP))
return;
- if (SP.getVersion() <= LLVMDebugVersion10)
- addCompileUnit(SP.getCompileUnit());
processType(SP.getType());
}
@@ -945,8 +943,6 @@ void DebugInfoFinder::processDeclare(const DbgDeclareInst *DDI) {
if (!NodesSeen.insert(DV))
return;
- if (DIVariable(N).getVersion() <= LLVMDebugVersion10)
- addCompileUnit(DIVariable(N).getCompileUnit());
processType(DIVariable(N).getType());
}
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index 5c444d2..5559a6c 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/Function.h"
+#include "LLVMContextImpl.h"
#include "SymbolTableListTraitsImpl.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -208,6 +209,10 @@ Function::~Function() {
// Remove the function from the on-the-side GC table.
clearGC();
+
+ // Remove the intrinsicID from the Cache.
+ if(getValueName() && isIntrinsic())
+ getContext().pImpl->IntrinsicIDCache.erase(this);
}
void Function::BuildLazyArguments() const {
@@ -337,18 +342,35 @@ void Function::copyAttributesFrom(const GlobalValue *Src) {
/// intrinsic, or if the pointer is null. This value is always defined to be
/// zero to allow easy checking for whether a function is intrinsic or not. The
/// particular intrinsic functions which correspond to this value are defined in
-/// llvm/Intrinsics.h.
+/// llvm/Intrinsics.h. Results are cached in the LLVM context, subsequent
+/// requests for the same ID return results much faster from the cache.
///
unsigned Function::getIntrinsicID() const {
const ValueName *ValName = this->getValueName();
if (!ValName || !isIntrinsic())
return 0;
+
+ LLVMContextImpl::IntrinsicIDCacheTy &IntrinsicIDCache =
+ getContext().pImpl->IntrinsicIDCache;
+ if(!IntrinsicIDCache.count(this)) {
+ unsigned Id = lookupIntrinsicID();
+ IntrinsicIDCache[this]=Id;
+ return Id;
+ }
+ return IntrinsicIDCache[this];
+}
+
+/// This private method does the actual lookup of an intrinsic ID when the query
+/// could not be answered from the cache.
+unsigned Function::lookupIntrinsicID() const {
+ const ValueName *ValName = this->getValueName();
unsigned Len = ValName->getKeyLength();
const char *Name = ValName->getKeyData();
#define GET_FUNCTION_RECOGNIZER
#include "llvm/IR/Intrinsics.gen"
#undef GET_FUNCTION_RECOGNIZER
+
return 0;
}
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 8a0a465..2e3a525 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -331,12 +331,9 @@ CallInst::CallInst(const CallInst &CI)
SubclassOptionalData = CI.SubclassOptionalData;
}
-void CallInst::addAttribute(unsigned i, Attribute attr) {
+void CallInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
AttributeSet PAL = getAttributes();
- AttrBuilder B(attr);
- LLVMContext &Context = getContext();
- PAL = PAL.addAttributes(Context, i,
- AttributeSet::get(Context, i, B));
+ PAL = PAL.addAttribute(getContext(), i, attr);
setAttributes(PAL);
}
@@ -593,11 +590,9 @@ bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
return false;
}
-void InvokeInst::addAttribute(unsigned i, Attribute attr) {
+void InvokeInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
AttributeSet PAL = getAttributes();
- AttrBuilder B(attr);
- PAL = PAL.addAttributes(getContext(), i,
- AttributeSet::get(getContext(), i, B));
+ PAL = PAL.addAttribute(getContext(), i, attr);
setAttributes(PAL);
}
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index b73cd03..883bb98 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -58,6 +58,11 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
unsigned TBAAStructID = getMDKindID("tbaa.struct");
assert(TBAAStructID == MD_tbaa_struct && "tbaa.struct kind id drifted");
(void)TBAAStructID;
+
+ // Create the 'invariant.load' metadata kind.
+ unsigned InvariantLdId = getMDKindID("invariant.load");
+ assert(InvariantLdId == MD_invariant_load && "invariant.load kind id drifted");
+ (void)InvariantLdId;
}
LLVMContext::~LLVMContext() { delete pImpl; }
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 7353dc0..0c659b8 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -318,7 +318,7 @@ public:
/// ValueHandles - This map keeps track of all of the value handles that are
/// watching a Value*. The Value::HasValueHandle bit is used to know
- // whether or not a value has an entry in this map.
+ /// whether or not a value has an entry in this map.
typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
ValueHandlesTy ValueHandles;
@@ -350,6 +350,11 @@ public:
/// to date.
std::vector<std::pair<DebugRecVH, DebugRecVH> > ScopeInlinedAtRecords;
+ /// IntrinsicIDCache - Cache of intrinsic name (string) to numeric ID mappings
+ /// requested in this context
+ typedef DenseMap<const Function*, unsigned> IntrinsicIDCacheTy;
+ IntrinsicIDCacheTy IntrinsicIDCache;
+
int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx);
int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx);
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index d751064..0228aeb 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -303,6 +303,7 @@ void MDNode::deleteTemporary(MDNode *N) {
/// getOperand - Return specified operand.
Value *MDNode::getOperand(unsigned i) const {
+ assert(i < getNumOperands() && "Invalid operand number");
return *getOperandPtr(const_cast<MDNode*>(this), i);
}
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 5bdce2b..adc702e 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -195,6 +195,9 @@ void Value::setName(const Twine &NewName) {
if (getSymTab(this, ST))
return; // Cannot set a name on this value (e.g. constant).
+ if (Function *F = dyn_cast<Function>(this))
+ getContext().pImpl->IntrinsicIDCache.erase(F);
+
if (!ST) { // No symbol table to update? Just do the change.
if (NameRef.empty()) {
// Free the name for this value.
@@ -307,7 +310,7 @@ void Value::replaceAllUsesWith(Value *New) {
// Notify all ValueHandles (if present) that this value is going away.
if (HasValueHandle)
ValueHandleBase::ValueIsRAUWd(this, New);
-
+
while (!use_empty()) {
Use &U = *UseList;
// Must handle Constants specially, we cannot call replaceUsesOfWith on a
@@ -318,10 +321,10 @@ void Value::replaceAllUsesWith(Value *New) {
continue;
}
}
-
+
U.set(New);
}
-
+
if (BasicBlock *BB = dyn_cast<BasicBlock>(this))
BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
}
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index c358a0a..0acbcfa 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -1292,7 +1292,7 @@ bool ModuleLinker::run() {
//===----------------------------------------------------------------------===//
/// LinkModules - This function links two modules together, with the resulting
-/// left module modified to be the composite of the two input modules. If an
+/// Dest module modified to be the composite of the two input modules. If an
/// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
/// the problem. Upon failure, the Dest module could be in a modified state,
/// and shouldn't be relied on to be consistent.
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 0b97f27..3d99548 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -1,4 +1,4 @@
-//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -------------------===//
+//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -155,7 +155,7 @@ class ELFObjectWriter : public MCObjectWriter {
raw_ostream &_OS, bool IsLittleEndian)
: MCObjectWriter(_OS, IsLittleEndian),
TargetObjectWriter(MOTW),
- NeedsGOT(false), NeedsSymtabShndx(false){
+ NeedsGOT(false), NeedsSymtabShndx(false) {
}
virtual ~ELFObjectWriter();
@@ -978,7 +978,7 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
UndefinedSymbolData[i].SymbolData->setIndex(Index++);
- if (NumRegularSections > ELF::SHN_LORESERVE)
+ if (Index >= ELF::SHN_LORESERVE)
NeedsSymtabShndx = true;
}
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 7eb7202..35613b4 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -215,7 +215,7 @@ public:
virtual void EmitFileDirective(StringRef Filename);
virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
- StringRef Filename);
+ StringRef Filename, unsigned CUID = 0);
virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
unsigned Column, unsigned Flags,
unsigned Isa, unsigned Discriminator,
@@ -828,14 +828,14 @@ void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
}
bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
- StringRef Filename) {
+ StringRef Filename, unsigned CUID) {
if (!UseDwarfDirectory && !Directory.empty()) {
if (sys::path::is_absolute(Filename))
- return EmitDwarfFileDirective(FileNo, "", Filename);
+ return EmitDwarfFileDirective(FileNo, "", Filename, CUID);
SmallString<128> FullPathName = Directory;
sys::path::append(FullPathName, Filename);
- return EmitDwarfFileDirective(FileNo, "", FullPathName);
+ return EmitDwarfFileDirective(FileNo, "", FullPathName, CUID);
}
if (UseLoc) {
@@ -846,8 +846,11 @@ bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
}
PrintQuotedString(Filename, OS);
EmitEOL();
+ // All .file will belong to a single CUID.
+ CUID = 0;
}
- return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename);
+ return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename,
+ CUID);
}
void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 1a7df60..9adcc02 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -77,8 +77,8 @@ void MCContext::reset() {
Symbols.clear();
Allocator.Reset();
Instances.clear();
- MCDwarfFiles.clear();
- MCDwarfDirs.clear();
+ MCDwarfFilesCUMap.clear();
+ MCDwarfDirsCUMap.clear();
MCGenDwarfLabelEntries.clear();
DwarfDebugFlags = StringRef();
MCLineSections.clear();
@@ -299,11 +299,13 @@ const MCSection *MCContext::getCOFFSection(StringRef Section,
/// error and zero is returned and the client reports the error, else the
/// allocated file number is returned. The file numbers may be in any order.
unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName,
- unsigned FileNumber) {
+ unsigned FileNumber, unsigned CUID) {
// TODO: a FileNumber of zero says to use the next available file number.
// Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked
// to not be less than one. This needs to be change to be not less than zero.
+ SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
+ SmallVectorImpl<StringRef>& MCDwarfDirs = MCDwarfDirsCUMap[CUID];
// Make space for this FileNumber in the MCDwarfFiles vector if needed.
if (FileNumber >= MCDwarfFiles.size()) {
MCDwarfFiles.resize(FileNumber + 1);
@@ -363,7 +365,8 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName,
/// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
/// currently is assigned and false otherwise.
-bool MCContext::isValidDwarfFileNumber(unsigned FileNumber) {
+bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
+ SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
return false;
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index d3fa906..4766b37 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -44,41 +44,49 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
// Get the assembler info needed to setup the MCContext.
const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(Triple);
- assert(MAI && "Unable to create target asm info!");
+ if (!MAI)
+ return 0;
const MCInstrInfo *MII = TheTarget->createMCInstrInfo();
- assert(MII && "Unable to create target instruction info!");
+ if (!MII)
+ return 0;
const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple);
- assert(MRI && "Unable to create target register info!");
+ if (!MRI)
+ return 0;
// Package up features to be passed to target/subtarget
std::string FeaturesStr;
const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU,
FeaturesStr);
- assert(STI && "Unable to create subtarget info!");
+ if (!STI)
+ return 0;
// Set up the MCContext for creating symbols and MCExpr's.
MCContext *Ctx = new MCContext(*MAI, *MRI, 0);
- assert(Ctx && "Unable to create MCContext!");
+ if (!Ctx)
+ return 0;
// Set up disassembler.
MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI);
- assert(DisAsm && "Unable to create disassembler!");
+ if (!DisAsm)
+ return 0;
DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, Ctx);
// Set up the instruction printer.
int AsmPrinterVariant = MAI->getAssemblerDialect();
MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
*MAI, *MII, *MRI, *STI);
- assert(IP && "Unable to create instruction printer!");
+ if (!IP)
+ return 0;
LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType,
GetOpInfo, SymbolLookUp,
TheTarget, MAI, MRI,
STI, MII, Ctx, DisAsm, IP);
- assert(DC && "Allocation failure!");
+ if (!DC)
+ return 0;
return DC;
}
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index fea057a..0f8f074 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -298,8 +298,8 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) {
// Put out the directory and file tables.
// First the directory table.
- const std::vector<StringRef> &MCDwarfDirs =
- context.getMCDwarfDirs();
+ const SmallVectorImpl<StringRef> &MCDwarfDirs =
+ context.getMCDwarfDirs(CUID);
for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
MCOS->EmitBytes(MCDwarfDirs[i]); // the DirectoryName
MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string
@@ -307,8 +307,8 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) {
MCOS->EmitIntValue(0, 1); // Terminate the directory list
// Second the file table.
- const std::vector<MCDwarfFile *> &MCDwarfFiles =
- MCOS->getContext().getMCDwarfFiles();
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
+ MCOS->getContext().getMCDwarfFiles(CUID);
for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
MCOS->EmitBytes(MCDwarfFiles[i]->getName()); // FileName
MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string
@@ -643,13 +643,13 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
// AT_name, the name of the source file. Reconstruct from the first directory
// and file table entries.
- const std::vector<StringRef> &MCDwarfDirs =
+ const SmallVectorImpl<StringRef> &MCDwarfDirs =
context.getMCDwarfDirs();
if (MCDwarfDirs.size() > 0) {
MCOS->EmitBytes(MCDwarfDirs[0]);
MCOS->EmitBytes("/");
}
- const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
MCOS->getContext().getMCDwarfFiles();
MCOS->EmitBytes(MCDwarfFiles[1]->getName());
MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index c1428d8..7f5f1b6 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -1,4 +1,4 @@
-//===- lib/MC/MCELFStreamer.cpp - ELF Object Output ------------===//
+//===- lib/MC/MCELFStreamer.cpp - ELF Object Output -----------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 89f74c1..c872b22 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -89,7 +89,7 @@ namespace {
virtual void EmitFileDirective(StringRef Filename) {}
virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
- StringRef Filename) {
+ StringRef Filename, unsigned CUID = 0) {
return false;
}
virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 2e1a045..bafa002 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -223,6 +223,11 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
}
void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
+ // FIXME: Check this. Mips64el is using the base values, which is most likely
+ // incorrect.
+ if (T.getArch() != Triple::mips64el)
+ FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
if (T.getArch() == Triple::x86) {
PersonalityEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
@@ -230,15 +235,13 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
LSDAEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
- FDEEncoding = FDECFIEncoding = (RelocM == Reloc::PIC_)
+ FDEEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
TTypeEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
} else if (T.getArch() == Triple::x86_64) {
- FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-
if (RelocM == Reloc::PIC_) {
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
((CMModel == CodeModel::Small || CMModel == CodeModel::Medium)
@@ -261,8 +264,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
}
} else if (T.getArch() == Triple::aarch64) {
- FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-
// The small model guarantees static code/data size < 4GB, but not where it
// will be in memory. Most of these could end up >2GB away so even a signed
// pc-relative 32-bit address is insufficient, theoretically.
@@ -282,7 +283,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
} else if (T.getArch() == Triple::ppc64) {
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_udata8;
- FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 6ab49ec..9d52377 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -626,7 +626,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
return TokError("unmatched .ifs or .elses");
// Check to see there are no empty DwarfFile slots.
- const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
getContext().getMCDwarfFiles();
for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
if (!MCDwarfFiles[i])
@@ -1495,7 +1495,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
// If we previously parsed a cpp hash file line comment then make sure the
// current Dwarf File is for the CppHashFilename if not then emit the
// Dwarf File table for it and adjust the line number for the .loc.
- const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
getContext().getMCDwarfFiles();
if (CppHashFilename.size() != 0) {
if (MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() !=
diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp
index 573308a..0e04c55 100644
--- a/lib/MC/MCPureStreamer.cpp
+++ b/lib/MC/MCPureStreamer.cpp
@@ -95,7 +95,7 @@ public:
report_fatal_error("unsupported directive in pure streamer");
}
virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
- StringRef Filename) {
+ StringRef Filename, unsigned CUID = 0) {
report_fatal_error("unsupported directive in pure streamer");
}
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 9857f7b..d02e553 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx)
: Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) {
- const MCSection *section = NULL;
+ const MCSection *section = 0;
SectionStack.push_back(std::make_pair(section, section));
}
@@ -40,7 +40,7 @@ void MCStreamer::reset() {
EmitDebugFrame = false;
CurrentW64UnwindInfo = 0;
LastSymbol = 0;
- const MCSection *section = NULL;
+ const MCSection *section = 0;
SectionStack.clear();
SectionStack.push_back(std::make_pair(section, section));
}
@@ -157,8 +157,8 @@ void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo,
StringRef Directory,
- StringRef Filename) {
- return getContext().GetDwarfFile(Directory, Filename, FileNo) == 0;
+ StringRef Filename, unsigned CUID) {
+ return getContext().GetDwarfFile(Directory, Filename, FileNo, CUID) == 0;
}
void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -172,7 +172,7 @@ void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() {
if (FrameInfos.empty())
- return NULL;
+ return 0;
return &FrameInfos.back();
}
@@ -473,7 +473,7 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
report_fatal_error("Frame register and offset already specified!");
if (Offset & 0x0F)
report_fatal_error("Misaligned frame pointer offset!");
- MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, NULL, Register, Offset);
+ MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, 0, Register, Offset);
CurFrame->LastFrameInst = CurFrame->Instructions.size();
CurFrame->Instructions.push_back(Inst);
}
@@ -623,5 +623,5 @@ void MCStreamer::Finish() {
MCSymbolData &MCStreamer::getOrCreateSymbolData(MCSymbol *Symbol) {
report_fatal_error("Not supported!");
- return *(static_cast<MCSymbolData*> (NULL));
+ return *(static_cast<MCSymbolData*>(0));
}
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index eb1690e..6501df9 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -1304,14 +1304,17 @@ StringRef MachOObjectFile::getFileFormatName() const {
}
}
+ // Make sure the cpu type has the correct mask.
+ assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64)
+ == llvm::MachO::CPUArchABI64 &&
+ "32-bit object file when we're 64-bit?");
+
switch (MachOObj->getHeader().CPUType) {
case llvm::MachO::CPUTypeX86_64:
return "Mach-O 64-bit x86-64";
case llvm::MachO::CPUTypePowerPC64:
return "Mach-O 64-bit ppc64";
default:
- assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 1 &&
- "32-bit object file when we're 64-bit?");
return "Mach-O 64-bit unknown";
}
}
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 53fcf06..560d7eb 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -1222,14 +1222,10 @@ sortOpts(StringMap<Option*> &OptMap,
namespace {
class HelpPrinter {
- size_t MaxArgLen;
- const Option *EmptyArg;
const bool ShowHidden;
public:
- explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {
- EmptyArg = 0;
- }
+ explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {}
void operator=(bool Value) {
if (Value == false) return;
@@ -1266,7 +1262,7 @@ public:
outs() << "\n\n";
// Compute the maximum argument length...
- MaxArgLen = 0;
+ size_t MaxArgLen = 0;
for (size_t i = 0, e = Opts.size(); i != e; ++i)
MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index cd430f2..1ee69b6 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -70,8 +70,8 @@ error_code FileOutputBuffer::create(StringRef FilePath,
if (EC)
return EC;
- OwningPtr<mapped_file_region> MappedFile(
- new mapped_file_region(FD, mapped_file_region::readwrite, Size, 0, EC));
+ OwningPtr<mapped_file_region> MappedFile(new mapped_file_region(
+ FD, true, mapped_file_region::readwrite, Size, 0, EC));
if (EC)
return EC;
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 691b6f5..8042237 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -72,13 +72,15 @@ static void CopyStringRef(char *Memory, StringRef Data) {
Memory[Data.size()] = 0; // Null terminate string.
}
-/// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it.
-template <typename T>
-static T *GetNamedBuffer(StringRef Buffer, StringRef Name,
- bool RequiresNullTerminator) {
- char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1));
- CopyStringRef(Mem + sizeof(T), Name);
- return new (Mem) T(Buffer, RequiresNullTerminator);
+struct NamedBufferAlloc {
+ StringRef Name;
+ NamedBufferAlloc(StringRef Name) : Name(Name) {}
+};
+
+void *operator new(size_t N, const NamedBufferAlloc &Alloc) {
+ char *Mem = static_cast<char *>(operator new(N + Alloc.Name.size() + 1));
+ CopyStringRef(Mem + N, Alloc.Name);
+ return Mem;
}
namespace {
@@ -105,8 +107,8 @@ public:
MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
StringRef BufferName,
bool RequiresNullTerminator) {
- return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName,
- RequiresNullTerminator);
+ return new (NamedBufferAlloc(BufferName))
+ MemoryBufferMem(InputData, RequiresNullTerminator);
}
/// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
@@ -183,24 +185,38 @@ error_code MemoryBuffer::getFileOrSTDIN(const char *Filename,
//===----------------------------------------------------------------------===//
namespace {
-/// MemoryBufferMMapFile - This represents a file that was mapped in with the
-/// sys::Path::MapInFilePages method. When destroyed, it calls the
-/// sys::Path::UnMapFilePages method.
-class MemoryBufferMMapFile : public MemoryBufferMem {
-public:
- MemoryBufferMMapFile(StringRef Buffer, bool RequiresNullTerminator)
- : MemoryBufferMem(Buffer, RequiresNullTerminator) { }
+/// \brief Memorry maps a file descriptor using sys::fs::mapped_file_region.
+///
+/// This handles converting the offset into a legal offset on the platform.
+class MemoryBufferMMapFile : public MemoryBuffer {
+ sys::fs::mapped_file_region MFR;
+
+ static uint64_t getLegalMapOffset(uint64_t Offset) {
+ return Offset & ~(sys::fs::mapped_file_region::alignment() - 1);
+ }
- ~MemoryBufferMMapFile() {
- static int PageSize = sys::process::get_self()->page_size();
+ static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) {
+ return Len + (Offset - getLegalMapOffset(Offset));
+ }
- uintptr_t Start = reinterpret_cast<uintptr_t>(getBufferStart());
- size_t Size = getBufferSize();
- uintptr_t RealStart = Start & ~(PageSize - 1);
- size_t RealSize = Size + (Start - RealStart);
+ const char *getStart(uint64_t Len, uint64_t Offset) {
+ return MFR.const_data() + (Offset - getLegalMapOffset(Offset));
+ }
- sys::Path::UnMapFilePages(reinterpret_cast<const char*>(RealStart),
- RealSize);
+public:
+ MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len,
+ uint64_t Offset, error_code EC)
+ : MFR(FD, false, sys::fs::mapped_file_region::readonly,
+ getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) {
+ if (!EC) {
+ const char *Start = getStart(Len, Offset);
+ init(Start, Start + Len, RequiresNullTerminator);
+ }
+ }
+
+ virtual const char *getBufferIdentifier() const LLVM_OVERRIDE {
+ // The name is stored after the class itself.
+ return reinterpret_cast<const char *>(this + 1);
}
virtual BufferKind getBufferKind() const LLVM_OVERRIDE {
@@ -244,6 +260,8 @@ error_code MemoryBuffer::getFile(const char *Filename,
OwningPtr<MemoryBuffer> &result,
int64_t FileSize,
bool RequiresNullTerminator) {
+ // FIXME: Review if this check is unnecessary on windows as well.
+#ifdef LLVM_ON_WIN32
// First check that the "file" is not a directory
bool is_dir = false;
error_code err = sys::fs::is_directory(Filename, is_dir);
@@ -251,6 +269,7 @@ error_code MemoryBuffer::getFile(const char *Filename,
return err;
if (is_dir)
return make_error_code(errc::is_a_directory);
+#endif
int OpenFlags = O_RDONLY;
#ifdef O_BINARY
@@ -341,17 +360,11 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
PageSize)) {
- off_t RealMapOffset = Offset & ~(PageSize - 1);
- off_t Delta = Offset - RealMapOffset;
- size_t RealMapSize = MapSize + Delta;
-
- if (const char *Pages = sys::Path::MapInFilePages(FD,
- RealMapSize,
- RealMapOffset)) {
- result.reset(GetNamedBuffer<MemoryBufferMMapFile>(
- StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator));
+ error_code EC;
+ result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile(
+ RequiresNullTerminator, FD, MapSize, Offset, EC));
+ if (!EC)
return error_code::success();
- }
}
MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 3a65221..9c28176 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -40,7 +40,9 @@ namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
/// what they did.
///
static cl::opt<bool>
-Enabled("stats", cl::desc("Enable statistics output from program"));
+Enabled(
+ "stats",
+ cl::desc("Enable statistics output from program (available with Asserts)"));
namespace {
@@ -142,6 +144,7 @@ void llvm::PrintStatistics(raw_ostream &OS) {
}
void llvm::PrintStatistics() {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
StatisticInfo &Stats = *StatInfo;
// Statistics not enabled?
@@ -151,4 +154,17 @@ void llvm::PrintStatistics() {
raw_ostream &OutStream = *CreateInfoOutputFile();
PrintStatistics(OutStream);
delete &OutStream; // Close the file.
+#else
+ // Check if the -stats option is set instead of checking
+ // !Stats.Stats.empty(). In release builds, Statistics operators
+ // do nothing, so stats are never Registered.
+ if (Enabled) {
+ // Get the stream to write to.
+ raw_ostream &OutStream = *CreateInfoOutputFile();
+ OutStream << "Statistics are disabled. "
+ << "Build with asserts or with -DLLVM_ENABLE_STATS\n";
+ OutStream.flush();
+ delete &OutStream; // Close the file.
+ }
+#endif
}
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index e00394e..e9b26bd 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -332,7 +332,16 @@ void Memory::InvalidateInstructionCache(const void *Addr,
__clear_cache(const_cast<char *>(Start), const_cast<char *>(End));
# elif defined(__mips__)
const char *Start = static_cast<const char *>(Addr);
+# if defined(ANDROID)
+ // The declaration of "cacheflush" in Android bionic:
+ // extern int cacheflush(long start, long end, long flags);
+ const char *End = Start + Len;
+ long LStart = reinterpret_cast<long>(const_cast<char *>(Start));
+ long LEnd = reinterpret_cast<long>(const_cast<char *>(End));
+ cacheflush(LStart, LEnd, BCACHE);
+# else
cacheflush(const_cast<char *>(Start), Len, BCACHE);
+# endif
# endif
#endif // end apple
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index c343455..11c5b05 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -419,6 +419,10 @@ retry_random_path:
RandomPath[i] = "0123456789abcdef"[sys::Process::GetRandomNumber() & 15];
}
+ // Make sure we don't fall into an infinite loop by constantly trying
+ // to create the parent path.
+ bool TriedToCreateParent = false;
+
// Try to open + create the file.
rety_open_create:
int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, mode);
@@ -429,7 +433,9 @@ rety_open_create:
goto retry_random_path;
// If path prefix doesn't exist, try to create it.
if (SavedErrno == errc::no_such_file_or_directory &&
- !exists(path::parent_path(RandomPath))) {
+ !exists(path::parent_path(RandomPath)) &&
+ !TriedToCreateParent) {
+ TriedToCreateParent = true;
StringRef p(RandomPath);
SmallString<64> dir_to_create;
for (path::const_iterator i = path::begin(p),
@@ -471,12 +477,14 @@ rety_open_create:
return error_code::success();
}
-error_code mapped_file_region::init(int fd, uint64_t offset) {
- AutoFD FD(fd);
+error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
+ AutoFD ScopedFD(FD);
+ if (!CloseFD)
+ ScopedFD.take();
// Figure out how large the file is.
struct stat FileInfo;
- if (fstat(fd, &FileInfo) == -1)
+ if (fstat(FD, &FileInfo) == -1)
return error_code(errno, system_category());
uint64_t FileSize = FileInfo.st_size;
@@ -484,7 +492,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) {
Size = FileSize;
else if (FileSize < Size) {
// We need to grow the file.
- if (ftruncate(fd, Size) == -1)
+ if (ftruncate(FD, Size) == -1)
return error_code(errno, system_category());
}
@@ -493,7 +501,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) {
#ifdef MAP_FILE
flags |= MAP_FILE;
#endif
- Mapping = ::mmap(0, Size, prot, flags, fd, offset);
+ Mapping = ::mmap(0, Size, prot, flags, FD, Offset);
if (Mapping == MAP_FAILED)
return error_code(errno, system_category());
return error_code::success();
@@ -522,12 +530,13 @@ mapped_file_region::mapped_file_region(const Twine &path,
return;
}
- ec = init(ofd, offset);
+ ec = init(ofd, true, offset);
if (ec)
Mapping = 0;
}
mapped_file_region::mapped_file_region(int fd,
+ bool closefd,
mapmode mode,
uint64_t length,
uint64_t offset,
@@ -541,7 +550,7 @@ mapped_file_region::mapped_file_region(int fd,
return;
}
- ec = init(fd, offset);
+ ec = init(fd, closefd, offset);
if (ec)
Mapping = 0;
}
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index 2e6cc96..23f3d14 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -593,6 +593,10 @@ retry_random_path:
random_path_utf16.push_back(0);
random_path_utf16.pop_back();
+ // Make sure we don't fall into an infinite loop by constantly trying
+ // to create the parent path.
+ bool TriedToCreateParent = false;
+
// Try to create + open the path.
retry_create_file:
HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(),
@@ -610,7 +614,9 @@ retry_create_file:
if (ec == windows_error::file_exists)
goto retry_random_path;
// Check for non-existing parent directories.
- if (ec == windows_error::path_not_found) {
+ if (ec == windows_error::path_not_found && !TriedToCreateParent) {
+ TriedToCreateParent = true;
+
// Create the directories using result_path as temp storage.
if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
random_path_utf16.size(), result_path))
@@ -705,13 +711,14 @@ error_code get_magic(const Twine &path, uint32_t len,
return error_code::success();
}
-error_code mapped_file_region::init(int FD, uint64_t Offset) {
+error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
FileDescriptor = FD;
// Make sure that the requested size fits within SIZE_T.
if (Size > std::numeric_limits<SIZE_T>::max()) {
- if (FileDescriptor)
- _close(FileDescriptor);
- else
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
::CloseHandle(FileHandle);
return make_error_code(errc::invalid_argument);
}
@@ -732,9 +739,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) {
0);
if (FileMappingHandle == NULL) {
error_code ec = windows_error(GetLastError());
- if (FileDescriptor)
- _close(FileDescriptor);
- else
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
::CloseHandle(FileHandle);
return ec;
}
@@ -754,9 +762,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) {
if (Mapping == NULL) {
error_code ec = windows_error(GetLastError());
::CloseHandle(FileMappingHandle);
- if (FileDescriptor)
- _close(FileDescriptor);
- else
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
::CloseHandle(FileHandle);
return ec;
}
@@ -768,14 +777,24 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) {
error_code ec = windows_error(GetLastError());
::UnmapViewOfFile(Mapping);
::CloseHandle(FileMappingHandle);
- if (FileDescriptor)
- _close(FileDescriptor);
- else
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
::CloseHandle(FileHandle);
return ec;
}
Size = mbi.RegionSize;
}
+
+ // Close all the handles except for the view. It will keep the other handles
+ // alive.
+ ::CloseHandle(FileMappingHandle);
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor); // Also closes FileHandle.
+ } else
+ ::CloseHandle(FileHandle);
return error_code::success();
}
@@ -815,7 +834,7 @@ mapped_file_region::mapped_file_region(const Twine &path,
}
FileDescriptor = 0;
- ec = init(FileDescriptor, offset);
+ ec = init(FileDescriptor, true, offset);
if (ec) {
Mapping = FileMappingHandle = 0;
FileHandle = INVALID_HANDLE_VALUE;
@@ -824,6 +843,7 @@ mapped_file_region::mapped_file_region(const Twine &path,
}
mapped_file_region::mapped_file_region(int fd,
+ bool closefd,
mapmode mode,
uint64_t length,
uint64_t offset,
@@ -836,13 +856,14 @@ mapped_file_region::mapped_file_region(int fd,
, FileMappingHandle() {
FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
if (FileHandle == INVALID_HANDLE_VALUE) {
- _close(FileDescriptor);
+ if (closefd)
+ _close(FileDescriptor);
FileDescriptor = 0;
ec = make_error_code(errc::bad_file_descriptor);
return;
}
- ec = init(FileDescriptor, offset);
+ ec = init(FileDescriptor, closefd, offset);
if (ec) {
Mapping = FileMappingHandle = 0;
FileHandle = INVALID_HANDLE_VALUE;
@@ -853,12 +874,6 @@ mapped_file_region::mapped_file_region(int fd,
mapped_file_region::~mapped_file_region() {
if (Mapping)
::UnmapViewOfFile(Mapping);
- if (FileMappingHandle)
- ::CloseHandle(FileMappingHandle);
- if (FileDescriptor)
- _close(FileDescriptor);
- else if (FileHandle != INVALID_HANDLE_VALUE)
- ::CloseHandle(FileHandle);
}
#if LLVM_HAS_RVALUE_REFERENCES
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index f71abd3..da26a37 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -306,7 +306,12 @@ raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) {
size_t BytesToWrite = Size - (Size % NumBytes);
write_impl(Ptr, BytesToWrite);
- copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite);
+ size_t BytesRemaining = Size - BytesToWrite;
+ if (BytesRemaining > size_t(OutBufEnd - OutBufCur)) {
+ // Too much left over to copy into our buffer.
+ return write(Ptr + BytesToWrite, BytesRemaining);
+ }
+ copy_to_buffer(Ptr + BytesToWrite, BytesRemaining);
return *this;
}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index cca6d12..572617c 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -349,59 +349,6 @@ AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
return TopOfFrameOffset - FrameRegPos;
}
-/// Estimate and return the size of the frame.
-static unsigned estimateStackSize(MachineFunction &MF) {
- // FIXME: Make generic? Really consider after upstreaming. This code is now
- // shared between PEI, ARM *and* here.
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
- int Offset = 0;
-
- // This code is very, very similar to PEI::calculateFrameObjectOffsets().
- // It really should be refactored to share code. Until then, changes
- // should keep in mind that there's tight coupling between the two.
-
- for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -MFI->getObjectOffset(i);
- if (FixedOff > Offset) Offset = FixedOff;
- }
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isDeadObjectIndex(i))
- continue;
- Offset += MFI->getObjectSize(i);
- unsigned Align = MFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
-
- MaxAlign = std::max(Align, MaxAlign);
- }
-
- if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
- Offset += MFI->getMaxCallFrameSize();
-
- // Round up the size to a multiple of the alignment. If the function has
- // any calls or alloca's, align to the target's StackAlignment value to
- // ensure that the callee's frame or the alloca data is suitably aligned;
- // otherwise, for leaf functions, align to the TransientStackAlignment
- // value.
- unsigned StackAlign;
- if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
- StackAlign = TFI->getStackAlignment();
- else
- StackAlign = TFI->getTransientStackAlignment();
-
- // If the frame pointer is eliminated, all frame offsets will be relative to
- // SP not FP. Align to MaxAlign so this works.
- StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
- return (unsigned)Offset;
-}
-
void
AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
@@ -422,7 +369,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// callee-save register for this purpose or allocate an extra spill slot.
bool BigStack =
- (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+ (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
|| MFI->hasVarSizedObjects() // Access will be from X29: messes things up
|| (MFI->adjustsStack() && !hasReservedCallFrame(MF));
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index cea7f91..e9f4497 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -200,6 +200,8 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
@@ -217,6 +219,7 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setOperationAction(ISD::FREM, MVT::f128, Expand);
setOperationAction(ISD::FRINT, MVT::f128, Expand);
setOperationAction(ISD::FSIN, MVT::f128, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
setOperationAction(ISD::FSQRT, MVT::f128, Expand);
setOperationAction(ISD::FSUB, MVT::f128, Custom);
setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
@@ -341,8 +344,7 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// ldxr dest, ptr
// <binop> scratch, dest, incr
// stxr stxr_status, scratch, ptr
- // cmp stxr_status, #0
- // b.ne loopMBB
+ // cbnz stxr_status, loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
@@ -364,10 +366,8 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
- BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
- .addReg(stxr_status).addImm(0);
- BuildMI(BB, dl, TII->get(AArch64::Bcc))
- .addImm(A64CC::NE).addMBB(loopMBB);
+ BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+ .addReg(stxr_status).addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
@@ -437,8 +437,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
// cmp incr, dest (, sign extend if necessary)
// csel scratch, dest, incr, cond
// stxr stxr_status, scratch, ptr
- // cmp stxr_status, #0
- // b.ne loopMBB
+ // cbnz stxr_status, loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
@@ -457,10 +456,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
BuildMI(BB, dl, TII->get(strOpc), stxr_status)
.addReg(scratch).addReg(ptr);
- BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
- .addReg(stxr_status).addImm(0);
- BuildMI(BB, dl, TII->get(AArch64::Bcc))
- .addImm(A64CC::NE).addMBB(loopMBB);
+ BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+ .addReg(stxr_status).addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
@@ -533,17 +530,14 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
// loop2MBB:
// strex stxr_status, newval, [ptr]
- // cmp stxr_status, #0
- // b.ne loop1MBB
+ // cbnz stxr_status, loop1MBB
BB = loop2MBB;
unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
- BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
- .addReg(stxr_status).addImm(0);
- BuildMI(BB, dl, TII->get(AArch64::Bcc))
- .addImm(A64CC::NE).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+ .addReg(stxr_status).addMBB(loop1MBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
@@ -1861,11 +1855,10 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
const GlobalValue *GV = GN->getGlobal();
unsigned Alignment = GV->getAlignment();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
-
- if (GV->isWeakForLinker() && RelocM == Reloc::Static) {
- // Weak symbols can't use ADRP/ADD pair since they should evaluate to
- // zero when undefined. In PIC mode the GOT can take care of this, but in
- // absolute mode we use a constant pool load.
+ if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
+ // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
+ // to zero when they remain undefined. In PIC mode the GOT can take care of
+ // this, but in absolute mode we use a constant pool load.
SDValue PoolAddr;
PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
@@ -1873,10 +1866,16 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
AArch64II::MO_LO12),
DAG.getConstant(8, MVT::i32));
- return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
- MachinePointerInfo::getConstantPool(),
- /*isVolatile=*/ false, /*isNonTemporal=*/ true,
- /*isInvariant=*/ true, 8);
+ SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
+ MachinePointerInfo::getConstantPool(),
+ /*isVolatile=*/ false,
+ /*isNonTemporal=*/ true,
+ /*isInvariant=*/ true, 8);
+ if (GN->getOffset() != 0)
+ return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
+ DAG.getConstant(GN->getOffset(), PtrVT));
+
+ return GlobalAddr;
}
if (Alignment == 0) {
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 562a7f6..319ec97 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -159,7 +159,7 @@ let Defs = [XSP], Uses = [XSP] in {
// Atomic operation pseudo-instructions
//===----------------------------------------------------------------------===//
-let usesCustomInserter = 1, Defs = [NZCV] in {
+let usesCustomInserter = 1 in {
multiclass AtomicSizes<string opname> {
def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
[(set GPR32:$dst, (!cast<SDNode>(opname # "_8") GPR64:$ptr, GPR32:$incr))]>;
@@ -178,11 +178,14 @@ defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">;
defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">;
defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">;
defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">;
-defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">;
-defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">;
-defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
-defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">;
+let Defs = [NZCV] in {
+ // These operations need a CMP to calculate the correct value
+ defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">;
+ defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">;
+ defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
+ defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
+}
let usesCustomInserter = 1, Defs = [NZCV] in {
def ATOMIC_CMP_SWAP_I8
@@ -1942,43 +1945,41 @@ def fpz32 : Operand<f32>,
ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
let ParserMatchClass = fpzero_asmoperand;
let PrintMethod = "printFPZeroOperand";
+ let DecoderMethod = "DecodeFPZeroOperand";
}
def fpz64 : Operand<f64>,
ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
let ParserMatchClass = fpzero_asmoperand;
let PrintMethod = "printFPZeroOperand";
+ let DecoderMethod = "DecodeFPZeroOperand";
}
-multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, string asmop2,
- dag pattern> {
+multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
- (outs), ins, !strconcat("fcmp\t$Rn, ", asmop2),
- [pattern], NoItinerary> {
+ (outs), ins, "fcmp\t$Rn, $Rm", [pattern],
+ NoItinerary> {
let Defs = [NZCV];
}
def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0},
- (outs), ins, !strconcat("fcmpe\t$Rn, ", asmop2),
- [], NoItinerary> {
+ (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary> {
let Defs = [NZCV];
}
}
-defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), "$Rm",
+defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm),
(set NZCV, (A64cmp (f32 FPR32:$Rn), FPR32:$Rm))>;
-defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), "$Rm",
+defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm),
(set NZCV, (A64cmp (f64 FPR64:$Rn), FPR64:$Rm))>;
-// What would be Rm should be written as 0, but anything is valid for
-// disassembly so we can't set the bits
-let PostEncoderMethod = "fixFCMPImm" in {
- defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Imm), "$Imm",
- (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Imm))>;
+// What would be Rm should be written as 0; note that even though it's called
+// "$Rm" here to fit in with the InstrFormats, it's actually an immediate.
+defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm),
+ (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Rm))>;
- defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Imm), "$Imm",
- (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Imm))>;
-}
+defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm),
+ (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Rm))>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index c1695da..69bb80a 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -160,44 +160,53 @@ private:
SMLoc StartLoc, EndLoc;
+ struct ImmWithLSLOp {
+ const MCExpr *Val;
+ unsigned ShiftAmount;
+ bool ImplicitAmount;
+ };
+
+ struct CondCodeOp {
+ A64CC::CondCodes Code;
+ };
+
+ struct FPImmOp {
+ double Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ShiftExtendOp {
+ A64SE::ShiftExtSpecifiers ShiftType;
+ unsigned Amount;
+ bool ImplicitAmount;
+ };
+
+ struct SysRegOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
union {
- struct {
- const MCExpr *Val;
- unsigned ShiftAmount;
- bool ImplicitAmount;
- } ImmWithLSL;
-
- struct {
- A64CC::CondCodes Code;
- } CondCode;
-
- struct {
- double Val;
- } FPImm;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- struct {
- A64SE::ShiftExtSpecifiers ShiftType;
- unsigned Amount;
- bool ImplicitAmount;
- } ShiftExtend;
-
- struct {
- const char *Data;
- unsigned Length;
- } SysReg;
-
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
+ struct ImmWithLSLOp ImmWithLSL;
+ struct CondCodeOp CondCode;
+ struct FPImmOp FPImm;
+ struct ImmOp Imm;
+ struct RegOp Reg;
+ struct ShiftExtendOp ShiftExtend;
+ struct SysRegOp SysReg;
+ struct TokOp Tok;
};
AArch64Operand(KindTy K, SMLoc S, SMLoc E)
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index eba7666..12c1b8f 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -106,6 +106,11 @@ static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
+ unsigned RmBits,
+ uint64_t Address,
+ const void *Decoder);
+
template<int RegWidth>
static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
unsigned FullImm,
@@ -381,6 +386,17 @@ static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
+ unsigned RmBits,
+ uint64_t Address,
+ const void *Decoder) {
+ // Any bits are valid in the instruction (they're architecturally ignored),
+ // but a code generator should insert 0.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return MCDisassembler::Success;
+}
+
+
template<int RegWidth>
static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 756e037..a5c591e 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -106,8 +106,6 @@ public:
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const;
- unsigned fixFCMPImm(const MCInst &MI, unsigned EncodedValue) const;
-
template<int hasRs, int hasRt2> unsigned
fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const;
@@ -423,15 +421,6 @@ AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups);
}
-unsigned AArch64MCCodeEmitter::fixFCMPImm(const MCInst &MI,
- unsigned EncodedValue) const {
- // For FCMP[E] Rn, #0.0, the Rm field has a canonical representation
- // with 0s, but is architecturally ignored
- EncodedValue &= ~0x1f0000u;
-
- return EncodedValue;
-}
-
template<int hasRs, int hasRt2> unsigned
AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI,
unsigned EncodedValue) const {
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
new file mode 100644
index 0000000..f0d4dbe
--- /dev/null
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -0,0 +1,704 @@
+//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The Cortex-A15 processor employs a tracking scheme in its register renaming
+// in order to process each instruction's micro-ops speculatively and
+// out-of-order with appropriate forwarding. The ARM architecture allows VFP
+// instructions to read and write 32-bit S-registers. Each S-register
+// corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
+//
+// There are several instruction patterns which can be used to provide this
+// capability which can provide higher performance than other, potentially more
+// direct patterns, specifically around when one micro-op reads a D-register
+// operand that has recently been written as one or more S-register results.
+//
+// This file defines a pre-regalloc pass which looks for SPR producers which
+// are going to be used by a DPR (or QPR) consumers and creates the more
+// optimized access pattern.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "a15-sd-optimizer"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <set>
+
+using namespace llvm;
+
+namespace {
+ struct A15SDOptimizer : public MachineFunctionPass {
+ static char ID;
+ A15SDOptimizer() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM A15 S->D optimizer";
+ }
+
+ private:
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ bool runOnInstruction(MachineInstr *MI);
+
+ //
+ // Instruction builder helpers
+ //
+ unsigned createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane,
+ bool QPR=false);
+
+ unsigned createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC);
+
+ unsigned createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1);
+
+ unsigned createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2);
+
+ unsigned createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert);
+
+ unsigned createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL);
+
+ //
+ // Various property checkers
+ //
+ bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
+ bool hasPartialWrite(MachineInstr *MI);
+ SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
+ unsigned getDPRLaneFromSPR(unsigned SReg);
+
+ //
+ // Methods used for getting the definitions of partial registers
+ //
+
+ MachineInstr *elideCopies(MachineInstr *MI);
+ void elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs);
+
+ //
+ // Pattern optimization methods
+ //
+ unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
+ unsigned optimizeSDPattern(MachineInstr *MI);
+ unsigned getPrefSPRLane(unsigned SReg);
+
+ //
+ // Sanitizing method - used to make sure if don't leave dead code around.
+ //
+ void eraseInstrWithNoUses(MachineInstr *MI);
+
+ //
+ // A map used to track the changes done by this pass.
+ //
+ std::map<MachineInstr*, unsigned> Replacements;
+ std::set<MachineInstr *> DeadInstr;
+ };
+ char A15SDOptimizer::ID = 0;
+} // end anonymous namespace
+
+// Returns true if this is a use of a SPR register.
+bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
+ const TargetRegisterClass *TRC) {
+ if (!MO.isReg())
+ return false;
+ unsigned Reg = MO.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
+ else
+ return TRC->contains(Reg);
+}
+
+unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
+ unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
+ &ARM::DPRRegClass);
+ if (DReg != ARM::NoRegister) return ARM::ssub_1;
+ return ARM::ssub_0;
+}
+
+// Get the subreg type that is most likely to be coalesced
+// for an SPR register that will be used in VDUP32d pseudo.
+unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
+ if (!TRI->isVirtualRegister(SReg))
+ return getDPRLaneFromSPR(SReg);
+
+ MachineInstr *MI = MRI->getVRegDef(SReg);
+ if (!MI) return ARM::ssub_0;
+ MachineOperand *MO = MI->findRegisterDefOperand(SReg);
+
+ assert(MO->isReg() && "Non register operand found!");
+ if (!MO) return ARM::ssub_0;
+
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ SReg = MI->getOperand(1).getReg();
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(SReg)) {
+ if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
+ return ARM::ssub_0;
+ }
+ return getDPRLaneFromSPR(SReg);
+}
+
+// MI is known to be dead. Figure out what instructions
+// are also made dead by this and mark them for removal.
+void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
+ SmallVector<MachineInstr *, 8> Front;
+ DeadInstr.insert(MI);
+
+ DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
+ Front.push_back(MI);
+
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // MI is already known to be dead. We need to see
+ // if other instructions can also be removed.
+ for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if ((!MO.isReg()) || (!MO.isUse()))
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TRI->isVirtualRegister(Reg))
+ continue;
+ MachineOperand *Op = MI->findRegisterDefOperand(Reg);
+
+ if (!Op)
+ continue;
+
+ MachineInstr *Def = Op->getParent();
+
+ // We don't need to do anything if we have already marked
+ // this instruction as being dead.
+ if (DeadInstr.find(Def) != DeadInstr.end())
+ continue;
+
+ // Check if all the uses of this instruction are marked as
+ // dead. If so, we can also mark this instruction as being
+ // dead.
+ bool IsDead = true;
+ for (unsigned int j = 0; j < Def->getNumOperands(); ++j) {
+ MachineOperand &MODef = Def->getOperand(j);
+ if ((!MODef.isReg()) || (!MODef.isDef()))
+ continue;
+ unsigned DefReg = MODef.getReg();
+ if (!TRI->isVirtualRegister(DefReg)) {
+ IsDead = false;
+ break;
+ }
+ for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg),
+ EE = MRI->use_end();
+ II != EE; ++II) {
+ // We don't care about self references.
+ if (&*II == Def)
+ continue;
+ if (DeadInstr.find(&*II) == DeadInstr.end()) {
+ IsDead = false;
+ break;
+ }
+ }
+ }
+
+ if (!IsDead) continue;
+
+ DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
+ DeadInstr.insert(Def);
+ }
+ }
+}
+
+// Creates the more optimized patterns and generally does all the code
+// transformations in this pass.
+unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
+ if (MI->isCopy()) {
+ return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
+ }
+
+ if (MI->isInsertSubreg()) {
+ unsigned DPRReg = MI->getOperand(1).getReg();
+ unsigned SPRReg = MI->getOperand(2).getReg();
+
+ if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
+ MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
+
+ if (DPRMI && SPRMI) {
+ // See if the first operand of this insert_subreg is IMPLICIT_DEF
+ MachineInstr *ECDef = elideCopies(DPRMI);
+ if (ECDef != 0 && ECDef->isImplicitDef()) {
+ // Another corner case - if we're inserting something that is purely
+ // a subreg copy of a DPR, just use that DPR.
+
+ MachineInstr *EC = elideCopies(SPRMI);
+ // Is it a subreg copy of ssub_0?
+ if (EC && EC->isCopy() &&
+ EC->getOperand(1).getSubReg() == ARM::ssub_0) {
+ DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
+
+ // Find the thing we're subreg copying out of - is it of the same
+ // regclass as DPRMI? (i.e. a DPR or QPR).
+ unsigned FullReg = SPRMI->getOperand(1).getReg();
+ const TargetRegisterClass *TRC =
+ MRI->getRegClass(MI->getOperand(1).getReg());
+ if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
+ DEBUG(dbgs() << "Subreg copy is compatible - returning ");
+ DEBUG(dbgs() << PrintReg(FullReg) << "\n");
+ eraseInstrWithNoUses(MI);
+ return FullReg;
+ }
+ }
+
+ return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
+ }
+ }
+ }
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ // See if all bar one of the operands are IMPLICIT_DEF and insert the
+ // optimizer pattern accordingly.
+ unsigned NumImplicit = 0, NumTotal = 0;
+ unsigned NonImplicitReg = ~0U;
+
+ for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
+ if (!MI->getOperand(I).isReg())
+ continue;
+ ++NumTotal;
+ unsigned OpReg = MI->getOperand(I).getReg();
+
+ if (!TRI->isVirtualRegister(OpReg))
+ break;
+
+ MachineInstr *Def = MRI->getVRegDef(OpReg);
+ if (!Def)
+ break;
+ if (Def->isImplicitDef())
+ ++NumImplicit;
+ else
+ NonImplicitReg = MI->getOperand(I).getReg();
+ }
+
+ if (NumImplicit == NumTotal - 1)
+ return optimizeAllLanesPattern(MI, NonImplicitReg);
+ else
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ assert(0 && "Unhandled update pattern!");
+ return 0;
+}
+
+// Return true if this MachineInstr inserts a scalar (SPR) value into
+// a D or Q register.
+bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
+ // The only way we can do a partial register update is through a COPY,
+ // INSERT_SUBREG or REG_SEQUENCE.
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
+ &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ return false;
+}
+
+// Looks through full copies to get the instruction that defines the input
+// operand for MI.
+MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
+ if (!MI->isFullCopy())
+ return MI;
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ return NULL;
+ MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!Def)
+ return NULL;
+ return elideCopies(Def);
+}
+
+// Look through full copies and PHIs to get the set of non-copy MachineInstrs
+// that can produce MI.
+void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs) {
+ // Looking through PHIs may create loops so we need to track what
+ // instructions we have visited before.
+ std::set<MachineInstr *> Reached;
+ SmallVector<MachineInstr *, 8> Front;
+ Front.push_back(MI);
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // If we have already explored this MachineInstr, ignore it.
+ if (Reached.find(MI) != Reached.end())
+ continue;
+ Reached.insert(MI);
+ if (MI->isPHI()) {
+ for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+ unsigned Reg = MI->getOperand(I).getReg();
+ if (!TRI->isVirtualRegister(Reg)) {
+ continue;
+ }
+ MachineInstr *NewMI = MRI->getVRegDef(Reg);
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ }
+ } else if (MI->isFullCopy()) {
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ continue;
+ MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ } else {
+ DEBUG(dbgs() << "Found partial copy" << *MI <<"\n");
+ Outs.push_back(MI);
+ }
+ }
+}
+
+// Return the DPR virtual registers that are read by this machine instruction
+// (if any).
+SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
+ if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
+ MI->isKill())
+ return SmallVector<unsigned, 8>();
+
+ SmallVector<unsigned, 8> Defs;
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ if (!usesRegClass(MO, &ARM::DPRRegClass) &&
+ !usesRegClass(MO, &ARM::QPRRegClass))
+ continue;
+
+ Defs.push_back(MO.getReg());
+ }
+ return Defs;
+}
+
+// Creates a DPR register from an SPR one by using a VDUP.
+unsigned
+A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane, bool QPR) {
+ unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
+ &ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d),
+ Out)
+ .addReg(Reg)
+ .addImm(Lane));
+
+ return Out;
+}
+
+// Creates a SPR register from a DPR by copying the value in lane 0.
+unsigned
+A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC) {
+ unsigned Out = MRI->createVirtualRegister(TRC);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::COPY), Out)
+ .addReg(DReg, 0, Lane);
+
+ return Out;
+}
+
+// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
+unsigned
+A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::REG_SEQUENCE), Out)
+ .addReg(Reg1)
+ .addImm(ARM::dsub_0)
+ .addReg(Reg2)
+ .addImm(ARM::dsub_1);
+ return Out;
+}
+
+// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
+// and merges them into one DPR register.
+unsigned
+A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(ARM::VEXTd32), Out)
+ .addReg(Ssub0)
+ .addReg(Ssub1)
+ .addImm(1));
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::INSERT_SUBREG), Out)
+ .addReg(DReg)
+ .addReg(ToInsert)
+ .addImm(Lane);
+
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::IMPLICIT_DEF), Out);
+ return Out;
+}
+
+// This function inserts instructions in order to optimize interactions between
+// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
+// lanes, and the using VEXT instructions to recompose the result.
+unsigned
+A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
+ MachineBasicBlock::iterator InsertPt(MI);
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock &MBB = *MI->getParent();
+ InsertPt++;
+ unsigned Out;
+
+ if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) {
+ unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_0, &ARM::DPRRegClass);
+ unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_1, &ARM::DPRRegClass);
+
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
+ unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
+ Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
+
+ Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
+
+ } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ } else {
+ assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
+ "Found unexpected regclass!");
+
+ unsigned PrefLane = getPrefSPRLane(Reg);
+ unsigned Lane;
+ switch (PrefLane) {
+ case ARM::ssub_0: Lane = 0; break;
+ case ARM::ssub_1: Lane = 1; break;
+ default: llvm_unreachable("Unknown preferred lane!");
+ }
+
+ bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass);
+
+ Out = createImplicitDef(MBB, InsertPt, DL);
+ Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
+ Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
+ eraseInstrWithNoUses(MI);
+ }
+ return Out;
+}
+
+bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
+ // We look for instructions that write S registers that are then read as
+ // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
+ // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
+ // merge two SPR values to form a DPR register. In order avoid false
+ // positives we make sure that there is an SPR producer so we look past
+ // COPY and PHI nodes to find it.
+ //
+ // The best code pattern for when an SPR producer is going to be used by a
+ // DPR or QPR consumer depends on whether the other lanes of the
+ // corresponding DPR/QPR are currently defined.
+ //
+ // We can handle these efficiently, depending on the type of
+ // pseudo-instruction that is producing the pattern
+ //
+ // * COPY: * VDUP all lanes and merge the results together
+ // using VEXTs.
+ //
+ // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
+ // lane, and the other lane(s) of the DPR/QPR register
+ // that we are inserting in are undefined, use the
+ // original DPR/QPR value.
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+ // * REG_SEQUENCE: * If all except one of the input operands are
+ // IMPLICIT_DEFs, insert the VDUP pattern for just the
+ // defined input operand
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+
+ // First, get all the reads of D-registers done by this instruction.
+ SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
+ bool Modified = false;
+
+ for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end();
+ I != E; ++I) {
+ // Follow the def-use chain for this DPR through COPYs, and also through
+ // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
+ // we can end up with multiple defs of this DPR.
+
+ SmallVector<MachineInstr *, 8> DefSrcs;
+ if (!TRI->isVirtualRegister(*I))
+ continue;
+ MachineInstr *Def = MRI->getVRegDef(*I);
+ if (!Def)
+ continue;
+
+ elideCopiesAndPHIs(Def, DefSrcs);
+
+ for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(),
+ EE = DefSrcs.end(); II != EE; ++II) {
+ MachineInstr *MI = *II;
+
+ // If we've already analyzed and replaced this operand, don't do
+ // anything.
+ if (Replacements.find(MI) != Replacements.end())
+ continue;
+
+ // Now, work out if the instruction causes a SPR->DPR dependency.
+ if (!hasPartialWrite(MI))
+ continue;
+
+ // Collect all the uses of this MI's DPR def for updating later.
+ SmallVector<MachineOperand*, 8> Uses;
+ unsigned DPRDefReg = MI->getOperand(0).getReg();
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
+ E = MRI->use_end(); I != E; ++I)
+ Uses.push_back(&I.getOperand());
+
+ // We can optimize this.
+ unsigned NewReg = optimizeSDPattern(MI);
+
+ if (NewReg != 0) {
+ Modified = true;
+ for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ DEBUG(dbgs() << "Replacing operand "
+ << **I << " with "
+ << PrintReg(NewReg) << "\n");
+ (*I)->substVirtReg(NewReg, 0, *TRI);
+ }
+ }
+ Replacements[MI] = NewReg;
+ }
+ }
+ return Modified;
+}
+
+bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+ TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+ TRI = Fn.getTarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ bool Modified = false;
+
+ DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n");
+
+ DeadInstr.clear();
+ Replacements.clear();
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+
+ for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end();
+ MI != ME;) {
+ Modified |= runOnInstruction(MI++);
+ }
+
+ }
+
+ for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(),
+ E = DeadInstr.end();
+ I != E; ++I) {
+ (*I)->eraseFromParent();
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createA15SDOptimizerPass() {
+ return new A15SDOptimizer();
+}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 5faf8c3..80e5f37 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -35,6 +35,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
JITCodeEmitter &JCE);
+FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMGlobalBaseRegPass();
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 58c7798..13ec208 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1357,7 +1357,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
.addReg(ARM::PC)
- .addImm(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
// Add predicate operands.
.addImm(ARMCC::AL)
.addReg(0)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index ed001ea..ed8b9cd 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1125,7 +1125,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
- if (!WidenVMOVS || !MI->isCopy())
+ if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
return false;
// Look for a copy between even S-registers. That is where we keep floats
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 0ca6450..3b12408 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1038,58 +1038,6 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
return FnSize;
}
-/// estimateStackSize - Estimate and return the size of the frame.
-/// FIXME: Make generic?
-static unsigned estimateStackSize(MachineFunction &MF) {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
- int Offset = 0;
-
- // This code is very, very similar to PEI::calculateFrameObjectOffsets().
- // It really should be refactored to share code. Until then, changes
- // should keep in mind that there's tight coupling between the two.
-
- for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -MFI->getObjectOffset(i);
- if (FixedOff > Offset) Offset = FixedOff;
- }
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isDeadObjectIndex(i))
- continue;
- Offset += MFI->getObjectSize(i);
- unsigned Align = MFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
-
- MaxAlign = std::max(Align, MaxAlign);
- }
-
- if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
- Offset += MFI->getMaxCallFrameSize();
-
- // Round up the size to a multiple of the alignment. If the function has
- // any calls or alloca's, align to the target's StackAlignment value to
- // ensure that the callee's frame or the alloca data is suitably aligned;
- // otherwise, for leaf functions, align to the TransientStackAlignment
- // value.
- unsigned StackAlign;
- if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
- StackAlign = TFI->getStackAlignment();
- else
- StackAlign = TFI->getTransientStackAlignment();
-
- // If the frame pointer is eliminated, all frame offsets will be relative to
- // SP not FP. Align to MaxAlign so this works.
- StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
- return (unsigned)Offset;
-}
-
/// estimateRSStackSizeLimit - Look at each instruction that references stack
/// frames and return the stack size limit beyond which some of these
/// instructions will require a scratch register during their expansion later.
@@ -1235,7 +1183,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// we've used all the registers and so R4 is already used, so not marking
// it here will be OK.
// FIXME: It will be better just to find spare register here.
- unsigned StackSize = estimateStackSize(MF);
+ unsigned StackSize = MFI->estimateStackSize(MF);
if (MFI->hasVarSizedObjects() || StackSize > 508)
MRI.setPhysRegUsed(ARM::R4);
}
@@ -1330,7 +1278,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// worth the effort and added fragility?
bool BigStack =
(RS &&
- (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ (MFI->estimateStackSize(MF) +
+ ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
estimateRSStackSizeLimit(MF, this)))
|| MFI->hasVarSizedObjects()
|| (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index a83f052..2c51de2 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3155,7 +3155,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
// Remap uses.
- SDValue Glue = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
+ SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
if (!SDValue(N, 0).use_empty()) {
SDValue Result;
if (isThumb)
@@ -3163,9 +3163,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
else {
SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- dl, MVT::i32, MVT::Glue, SDValue(Ld, 0), SubRegIdx, Glue);
+ dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
Result = SDValue(ResNode,0);
- Glue = Result.getValue(1);
}
ReplaceUses(SDValue(N, 0), Result);
}
@@ -3176,13 +3175,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
else {
SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- dl, MVT::i32, MVT::Glue, SDValue(Ld, 0), SubRegIdx, Glue);
+ dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
Result = SDValue(ResNode,0);
- Glue = Result.getValue(1);
}
ReplaceUses(SDValue(N, 1), Result);
}
- ReplaceUses(SDValue(N, 2), Glue);
+ ReplaceUses(SDValue(N, 2), OutChain);
return NULL;
}
@@ -3195,9 +3193,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
// Store exclusive double return a i32 value which is the return status
// of the issued store.
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::i32);
- ResTys.push_back(MVT::Other);
+ EVT ResTys[] = { MVT::i32, MVT::Other };
bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
// Place arguments in the right order.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ef96e56..514971f 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -504,6 +504,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMA, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
@@ -521,6 +522,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
+ // Mark v2f32 intrinsics.
+ setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
+
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
// Custom handling for some quad-vector types to detect VMULL.
@@ -554,6 +572,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
+ // NEON only has FMA instructions as of VFP4.
+ if (!Subtarget->hasVFP4()) {
+ setOperationAction(ISD::FMA, MVT::v2f32, Expand);
+ setOperationAction(ISD::FMA, MVT::v4f32, Expand);
+ }
+
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
@@ -1581,7 +1605,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
if (Subtarget->isTargetELF() &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
OpFlags = ARMII::MO_PLT;
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
}
@@ -2247,8 +2271,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- Reloc::Model RelocM = getTargetMachine().getRelocationModel();
- if (RelocM == Reloc::PIC_) {
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV,
@@ -2292,8 +2315,6 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
DebugLoc dl = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
- MachineFunction &MF = DAG.getMachineFunction();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// FIXME: Enable this for static codegen when tool issues are fixed. Also
// update ARMFastISel::ARMMaterializeGV.
@@ -2321,6 +2342,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
if (RelocM == Reloc::Static) {
CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
} else {
+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
ARMPCLabelIndex = AFI->createPICLabelUId();
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
ARMConstantPoolValue *CPV =
@@ -2401,7 +2423,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue CPAddr;
unsigned PCAdj = (RelocM != Reloc::PIC_)
@@ -2661,7 +2682,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
CCInfo.AnalyzeFormalArguments(Ins,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
-
+
SmallVector<SDValue, 16> ArgValues;
int lastInsIndex = -1;
SDValue ArgValue;
@@ -2776,7 +2797,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
} else {
int FI = MFI->CreateFixedObject(Flags.getByValSize(),
VA.getLocMemOffset(), false);
- InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+ InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
}
} else {
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
@@ -3573,7 +3594,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
-/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
+/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
/// [b0 b1 b2 b3 b4 b5 b6 b7]
/// +[b1 b0 b3 b2 b5 b4 b7 b6]
/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
@@ -3594,7 +3615,7 @@ static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
/// bit-count for each 16-bit element from the operand. We need slightly
/// different sequencing for v4i16 and v8i16 to stay within NEON's available
/// 64/128-bit registers.
-///
+///
/// Trace for v4i16:
/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
@@ -3625,7 +3646,7 @@ static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
/// input = [v0 v1 ] (vi: 32-bit elements)
/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
-/// vrev: N0 = [k1 k0 k3 k2 ]
+/// vrev: N0 = [k1 k0 k3 k2 ]
/// [k0 k1 k2 k3 ]
/// N1 =+[k1 k0 k3 k2 ]
/// [k0 k2 k1 k3 ]
@@ -4403,7 +4424,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
ValueCounts.insert(std::make_pair(V, 0));
unsigned &Count = ValueCounts[V];
-
+
// Is this value dominant? (takes up more than half of the lanes)
if (++Count > (NumElts / 2)) {
hasDominantValue = true;
@@ -4431,8 +4452,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// If we are VDUPing a value that comes directly from a vector, that will
// cause an unnecessary move to and from a GPR, where instead we could
- // just use VDUPLANE.
- if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ // just use VDUPLANE. We can only do this if the lane being extracted
+ // is at a constant index, as the VDUP from lane instructions only have
+ // constant-index forms.
+ if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Value->getOperand(1))) {
// We need to create a new undef vector to use for the VDUPLANE if the
// size of the vector from which we get the value is different than the
// size of the vector that we need to create. We will insert the element
@@ -4447,12 +4471,10 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
Value, DAG.getConstant(index, MVT::i32)),
DAG.getConstant(index, MVT::i32));
- } else {
+ } else
N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
Value->getOperand(0), Value->getOperand(1));
- }
- }
- else
+ } else
N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
if (!usesOnlyOneValue) {
@@ -4484,7 +4506,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (usesOnlyOneValue) {
SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
if (isConstant && Val.getNode())
- return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
}
}
@@ -6329,6 +6351,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
unsigned MJTI = JTI->createJumpTableIndex(LPadList);
unsigned UId = AFI->createJumpTableUId();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
// Create the MBBs for the dispatch code.
@@ -6338,14 +6361,11 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
unsigned trap_opcode;
- if (Subtarget->isThumb()) {
+ if (Subtarget->isThumb())
trap_opcode = ARM::tTRAP;
- } else {
- if (Subtarget->useNaClTrap())
- trap_opcode = ARM::TRAPNaCl;
- else
- trap_opcode = ARM::TRAP;
- }
+ else
+ trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
+
BuildMI(TrapBB, dl, TII->get(trap_opcode));
DispatchBB->addSuccessor(TrapBB);
@@ -6492,11 +6512,14 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
.addImm(0)
.addMemOperand(JTMMOLd));
- unsigned NewVReg6 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg5, RegState::Kill)
- .addReg(NewVReg3));
+ unsigned NewVReg6 = NewVReg5;
+ if (RelocM == Reloc::PIC_) {
+ NewVReg6 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg3));
+ }
BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
.addReg(NewVReg6, RegState::Kill)
@@ -6576,11 +6599,18 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
.addImm(0)
.addMemOperand(JTMMOLd));
- BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
- .addReg(NewVReg5, RegState::Kill)
- .addReg(NewVReg4)
- .addJumpTableIndex(MJTI)
- .addImm(UId);
+ if (RelocM == Reloc::PIC_) {
+ BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg4)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ } else {
+ BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
+ .addReg(NewVReg5, RegState::Kill)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ }
}
// Add the jump table entries as successors to the MBB.
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 7745218..3003760 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -28,6 +28,11 @@ EnableGlobalMerge("global-merge", cl::Hidden,
cl::desc("Enable global merge pass"),
cl::init(true));
+static cl::opt<bool>
+DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
+ cl::desc("Inhibit optimization of S->D register accesses on A15"),
+ cl::init(false));
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -164,6 +169,12 @@ bool ARMPassConfig::addPreRegAlloc() {
addPass(createARMLoadStoreOptimizationPass(true));
if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9())
addPass(createMLxExpansionPass());
+ // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
+ // enabled when NEON is available.
+ if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() &&
+ getARMSubtarget().hasNEON() && !DisableA15SDOptimization) {
+ addPass(createA15SDOptimizerPass());
+ }
return true;
}
@@ -174,7 +185,8 @@ bool ARMPassConfig::addPreSched2() {
addPass(createARMLoadStoreOptimizationPass());
printAndVerify("After ARM load / store optimizer");
}
- if (getARMSubtarget().hasNEON())
+ if ((DisableA15SDOptimization || !getARMSubtarget().isCortexA15()) &&
+ getARMSubtarget().hasNEON())
addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 01c04b4..140a8db 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -177,6 +177,23 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ // Single to/from double precision conversions.
+ static const CostTblEntry<MVT> NEONFltDblTbl[] = {
+ // Vector fptrunc/fpext conversions.
+ { ISD::FP_ROUND, MVT::v2f64, 2 },
+ { ISD::FP_EXTEND, MVT::v2f32, 2 },
+ { ISD::FP_EXTEND, MVT::v4f32, 4 }
+ };
+
+ if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
+ ISD == ISD::FP_EXTEND)) {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * NEONFltDblTbl[Idx].Cost;
+ }
+
EVT SrcTy = TLI->getValueType(Src);
EVT DstTy = TLI->getValueType(Dst);
@@ -194,17 +211,63 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+ // Operations that we legalize using load/stores to the stack.
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 16*2 + 4*4 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 16*2 + 4*3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 8*2 + 2*4 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 8*2 + 2*3 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 },
+
// Vector float <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
// Vector double <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
};
if (SrcTy.isVector() && ST->hasNEON()) {
@@ -247,7 +310,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
return NEONFloatConversionTbl[Idx].Cost;
}
-
// Scalar integer to float conversions.
static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
@@ -303,7 +365,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
return ARMIntegerConversionTbl[Idx].Cost;
}
-
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
}
@@ -326,6 +387,25 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
+ // Lowering of some vector selects is currently far from perfect.
+ static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = {
+ { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
+ { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
+ };
+
+ EVT SelCondTy = TLI->getValueType(CondTy);
+ EVT SelValTy = TLI->getValueType(ValTy);
+ int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl,
+ array_lengthof(NEONVectorSelectTbl),
+ ISD, SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONVectorSelectTbl[Idx].Cost;
+
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
return LT.first;
}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 6c678fd..c897efd 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -316,103 +316,127 @@ class ARMOperand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
SmallVector<unsigned, 8> Registers;
+ struct CCOp {
+ ARMCC::CondCodes Val;
+ };
+
+ struct CopOp {
+ unsigned Val;
+ };
+
+ struct CoprocOptionOp {
+ unsigned Val;
+ };
+
+ struct ITMaskOp {
+ unsigned Mask:4;
+ };
+
+ struct MBOptOp {
+ ARM_MB::MemBOpt Val;
+ };
+
+ struct IFlagsOp {
+ ARM_PROC::IFlags Val;
+ };
+
+ struct MMaskOp {
+ unsigned Val;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ // A vector register list is a sequential list of 1 to 4 registers.
+ struct VectorListOp {
+ unsigned RegNum;
+ unsigned Count;
+ unsigned LaneIndex;
+ bool isDoubleSpaced;
+ };
+
+ struct VectorIndexOp {
+ unsigned Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ /// Combined record for all forms of ARM address expressions.
+ struct MemoryOp {
+ unsigned BaseRegNum;
+ // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
+ // was specified.
+ const MCConstantExpr *OffsetImm; // Offset immediate value
+ unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
+ ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
+ unsigned ShiftImm; // shift for OffsetReg.
+ unsigned Alignment; // 0 = no alignment specified
+ // n = alignment in bytes (2, 4, 8, 16, or 32)
+ unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
+ };
+
+ struct PostIdxRegOp {
+ unsigned RegNum;
+ bool isAdd;
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned ShiftImm;
+ };
+
+ struct ShifterImmOp {
+ bool isASR;
+ unsigned Imm;
+ };
+
+ struct RegShiftedRegOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftReg;
+ unsigned ShiftImm;
+ };
+
+ struct RegShiftedImmOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftImm;
+ };
+
+ struct RotImmOp {
+ unsigned Imm;
+ };
+
+ struct BitfieldOp {
+ unsigned LSB;
+ unsigned Width;
+ };
+
union {
- struct {
- ARMCC::CondCodes Val;
- } CC;
-
- struct {
- unsigned Val;
- } Cop;
-
- struct {
- unsigned Val;
- } CoprocOption;
-
- struct {
- unsigned Mask:4;
- } ITMask;
-
- struct {
- ARM_MB::MemBOpt Val;
- } MBOpt;
-
- struct {
- ARM_PROC::IFlags Val;
- } IFlags;
-
- struct {
- unsigned Val;
- } MMask;
-
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- // A vector register list is a sequential list of 1 to 4 registers.
- struct {
- unsigned RegNum;
- unsigned Count;
- unsigned LaneIndex;
- bool isDoubleSpaced;
- } VectorList;
-
- struct {
- unsigned Val;
- } VectorIndex;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- /// Combined record for all forms of ARM address expressions.
- struct {
- unsigned BaseRegNum;
- // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
- // was specified.
- const MCConstantExpr *OffsetImm; // Offset immediate value
- unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
- ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
- unsigned ShiftImm; // shift for OffsetReg.
- unsigned Alignment; // 0 = no alignment specified
- // n = alignment in bytes (2, 4, 8, 16, or 32)
- unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
- } Memory;
-
- struct {
- unsigned RegNum;
- bool isAdd;
- ARM_AM::ShiftOpc ShiftTy;
- unsigned ShiftImm;
- } PostIdxReg;
-
- struct {
- bool isASR;
- unsigned Imm;
- } ShifterImm;
- struct {
- ARM_AM::ShiftOpc ShiftTy;
- unsigned SrcReg;
- unsigned ShiftReg;
- unsigned ShiftImm;
- } RegShiftedReg;
- struct {
- ARM_AM::ShiftOpc ShiftTy;
- unsigned SrcReg;
- unsigned ShiftImm;
- } RegShiftedImm;
- struct {
- unsigned Imm;
- } RotImm;
- struct {
- unsigned LSB;
- unsigned Width;
- } Bitfield;
+ struct CCOp CC;
+ struct CopOp Cop;
+ struct CoprocOptionOp CoprocOption;
+ struct MBOptOp MBOpt;
+ struct ITMaskOp ITMask;
+ struct IFlagsOp IFlags;
+ struct MMaskOp MMask;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct VectorListOp VectorList;
+ struct VectorIndexOp VectorIndex;
+ struct ImmOp Imm;
+ struct MemoryOp Memory;
+ struct PostIdxRegOp PostIdxReg;
+ struct ShifterImmOp ShifterImm;
+ struct RegShiftedRegOp RegShiftedReg;
+ struct RegShiftedImmOp RegShiftedImm;
+ struct RotImmOp RotImm;
+ struct BitfieldOp Bitfield;
};
ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 586834c..b832508 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -15,6 +15,7 @@ tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler)
add_public_tablegen_target(ARMCommonTableGen)
add_llvm_target(ARMCodeGen
+ A15SDOptimizer.cpp
ARMAsmPrinter.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 09e15af..7a59a7d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -655,15 +655,28 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
int32_t offset = MO.getImm();
uint32_t Val = 0x2000;
+ int SoImmVal;
if (offset == INT32_MIN) {
Val = 0x1000;
- offset = 0;
+ SoImmVal = 0;
} else if (offset < 0) {
Val = 0x1000;
offset *= -1;
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ if(SoImmVal == -1) {
+ Val = 0x2000;
+ offset *= -1;
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ }
+ } else {
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ if(SoImmVal == -1) {
+ Val = 0x1000;
+ offset *= -1;
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ }
}
- int SoImmVal = ARM_AM::getSOImmVal(offset);
assert(SoImmVal != -1 && "Not a valid so_imm value!");
Val |= SoImmVal;
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index fac931a..0a8b1af 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -103,6 +103,16 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
return false;
}
+ if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
if (LocVT == MVT::i32 || LocVT == MVT::f32) {
ofst = State.AllocateStack(4, 4);
State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
@@ -1024,6 +1034,14 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
}
+SDValue
+HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ SDValue BA_SD = DAG.getTargetBlockAddress(BA, MVT::i32);
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), BA_SD);
+}
+
//===----------------------------------------------------------------------===//
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
@@ -1297,6 +1315,7 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
// Custom legalize GlobalAddress nodes into CONST32.
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
// Truncate action?
setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
@@ -1342,7 +1361,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
}
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
if (EmitJumpTables) {
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
@@ -1352,6 +1370,9 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
// Increase jump tables cutover to 5, was 4.
setMinimumJumpTableEntries(5);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
@@ -1365,6 +1386,29 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+
+ // In V4, we have double word add/sub with carry. The problem with
+ // modelling this instruction is that it produces 2 results - Rdd and Px.
+ // To model update of Px, we will have to use Defs[p0..p3] which will
+ // cause any predicate live range to spill. So, we pretend we dont't
+ // have these instructions.
+ setOperationAction(ISD::ADDE, MVT::i8, Expand);
+ setOperationAction(ISD::ADDE, MVT::i16, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i64, Expand);
+ setOperationAction(ISD::SUBE, MVT::i8, Expand);
+ setOperationAction(ISD::SUBE, MVT::i16, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i64, Expand);
+ setOperationAction(ISD::ADDC, MVT::i8, Expand);
+ setOperationAction(ISD::ADDC, MVT::i16, Expand);
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDC, MVT::i64, Expand);
+ setOperationAction(ISD::SUBC, MVT::i8, Expand);
+ setOperationAction(ISD::SUBC, MVT::i16, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i64, Expand);
+
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
@@ -1436,6 +1480,8 @@ HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
case HexagonISD::CONST32: return "HexagonISD::CONST32";
+ case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
+ case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real";
case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
case HexagonISD::CMPICC: return "HexagonISD::CMPICC";
case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC";
@@ -1484,6 +1530,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 65dab85..3279cc6 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -27,6 +27,7 @@ namespace llvm {
CONST32,
CONST32_GP, // For marking data present in GP.
+ CONST32_Int_Real,
FCONST32,
SETCC,
ADJDYNALLOC,
@@ -106,6 +107,7 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index d30cdda..96a252e 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -557,218 +557,43 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
}
bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
- switch(MI->getOpcode()) {
- default: return false;
- // JMP_EQri
- case Hexagon::JMP_EQriPt_nv_V4:
- case Hexagon::JMP_EQriPnt_nv_V4:
- case Hexagon::JMP_EQriNotPt_nv_V4:
- case Hexagon::JMP_EQriNotPnt_nv_V4:
-
- // JMP_EQri - with -1
- case Hexagon::JMP_EQriPtneg_nv_V4:
- case Hexagon::JMP_EQriPntneg_nv_V4:
- case Hexagon::JMP_EQriNotPtneg_nv_V4:
- case Hexagon::JMP_EQriNotPntneg_nv_V4:
-
- // JMP_EQrr
- case Hexagon::JMP_EQrrPt_nv_V4:
- case Hexagon::JMP_EQrrPnt_nv_V4:
- case Hexagon::JMP_EQrrNotPt_nv_V4:
- case Hexagon::JMP_EQrrNotPnt_nv_V4:
-
- // JMP_GTri
- case Hexagon::JMP_GTriPt_nv_V4:
- case Hexagon::JMP_GTriPnt_nv_V4:
- case Hexagon::JMP_GTriNotPt_nv_V4:
- case Hexagon::JMP_GTriNotPnt_nv_V4:
-
- // JMP_GTri - with -1
- case Hexagon::JMP_GTriPtneg_nv_V4:
- case Hexagon::JMP_GTriPntneg_nv_V4:
- case Hexagon::JMP_GTriNotPtneg_nv_V4:
- case Hexagon::JMP_GTriNotPntneg_nv_V4:
-
- // JMP_GTrr
- case Hexagon::JMP_GTrrPt_nv_V4:
- case Hexagon::JMP_GTrrPnt_nv_V4:
- case Hexagon::JMP_GTrrNotPt_nv_V4:
- case Hexagon::JMP_GTrrNotPnt_nv_V4:
-
- // JMP_GTrrdn
- case Hexagon::JMP_GTrrdnPt_nv_V4:
- case Hexagon::JMP_GTrrdnPnt_nv_V4:
- case Hexagon::JMP_GTrrdnNotPt_nv_V4:
- case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
-
- // JMP_GTUri
- case Hexagon::JMP_GTUriPt_nv_V4:
- case Hexagon::JMP_GTUriPnt_nv_V4:
- case Hexagon::JMP_GTUriNotPt_nv_V4:
- case Hexagon::JMP_GTUriNotPnt_nv_V4:
-
- // JMP_GTUrr
- case Hexagon::JMP_GTUrrPt_nv_V4:
- case Hexagon::JMP_GTUrrPnt_nv_V4:
- case Hexagon::JMP_GTUrrNotPt_nv_V4:
- case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+ // Constant extenders are allowed only for V4 and above.
+ if (!Subtarget.hasV4TOps())
+ return false;
- // JMP_GTUrrdn
- case Hexagon::JMP_GTUrrdnPt_nv_V4:
- case Hexagon::JMP_GTUrrdnPnt_nv_V4:
- case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
- case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+ const MCInstrDesc &MID = MI->getDesc();
+ const uint64_t F = MID.TSFlags;
+ if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
+ return true;
- // TFR_FI
+ // TODO: This is largely obsolete now. Will need to be removed
+ // in consecutive patches.
+ switch(MI->getOpcode()) {
+ // TFR_FI Remains a special case.
case Hexagon::TFR_FI:
return true;
+ default:
+ return false;
}
+ return false;
}
+// This returns true in two cases:
+// - The OP code itself indicates that this is an extended instruction.
+// - One of MOs has been marked with HMOTF_ConstExtended flag.
bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
- switch(MI->getOpcode()) {
- default: return false;
- // JMP_EQri
- case Hexagon::JMP_EQriPt_ie_nv_V4:
- case Hexagon::JMP_EQriPnt_ie_nv_V4:
- case Hexagon::JMP_EQriNotPt_ie_nv_V4:
- case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
-
- // JMP_EQri - with -1
- case Hexagon::JMP_EQriPtneg_ie_nv_V4:
- case Hexagon::JMP_EQriPntneg_ie_nv_V4:
- case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
- case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
-
- // JMP_EQrr
- case Hexagon::JMP_EQrrPt_ie_nv_V4:
- case Hexagon::JMP_EQrrPnt_ie_nv_V4:
- case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
- case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
-
- // JMP_GTri
- case Hexagon::JMP_GTriPt_ie_nv_V4:
- case Hexagon::JMP_GTriPnt_ie_nv_V4:
- case Hexagon::JMP_GTriNotPt_ie_nv_V4:
- case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
-
- // JMP_GTri - with -1
- case Hexagon::JMP_GTriPtneg_ie_nv_V4:
- case Hexagon::JMP_GTriPntneg_ie_nv_V4:
- case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
- case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
-
- // JMP_GTrr
- case Hexagon::JMP_GTrrPt_ie_nv_V4:
- case Hexagon::JMP_GTrrPnt_ie_nv_V4:
- case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
- case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
-
- // JMP_GTrrdn
- case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
- case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
- case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
- case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
-
- // JMP_GTUri
- case Hexagon::JMP_GTUriPt_ie_nv_V4:
- case Hexagon::JMP_GTUriPnt_ie_nv_V4:
- case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
- case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
-
- // JMP_GTUrr
- case Hexagon::JMP_GTUrrPt_ie_nv_V4:
- case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
- case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
- case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
-
- // JMP_GTUrrdn
- case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
- case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
- case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
- case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
-
- // V4 absolute set addressing.
- case Hexagon::LDrid_abs_setimm_V4:
- case Hexagon::LDriw_abs_setimm_V4:
- case Hexagon::LDrih_abs_setimm_V4:
- case Hexagon::LDrib_abs_setimm_V4:
- case Hexagon::LDriuh_abs_setimm_V4:
- case Hexagon::LDriub_abs_setimm_V4:
-
- case Hexagon::STrid_abs_setimm_V4:
- case Hexagon::STrib_abs_setimm_V4:
- case Hexagon::STrih_abs_setimm_V4:
- case Hexagon::STriw_abs_setimm_V4:
-
- // V4 global address load.
- case Hexagon::LDd_GP_cPt_V4 :
- case Hexagon::LDd_GP_cNotPt_V4 :
- case Hexagon::LDd_GP_cdnPt_V4 :
- case Hexagon::LDd_GP_cdnNotPt_V4 :
- case Hexagon::LDb_GP_cPt_V4 :
- case Hexagon::LDb_GP_cNotPt_V4 :
- case Hexagon::LDb_GP_cdnPt_V4 :
- case Hexagon::LDb_GP_cdnNotPt_V4 :
- case Hexagon::LDub_GP_cPt_V4 :
- case Hexagon::LDub_GP_cNotPt_V4 :
- case Hexagon::LDub_GP_cdnPt_V4 :
- case Hexagon::LDub_GP_cdnNotPt_V4 :
- case Hexagon::LDh_GP_cPt_V4 :
- case Hexagon::LDh_GP_cNotPt_V4 :
- case Hexagon::LDh_GP_cdnPt_V4 :
- case Hexagon::LDh_GP_cdnNotPt_V4 :
- case Hexagon::LDuh_GP_cPt_V4 :
- case Hexagon::LDuh_GP_cNotPt_V4 :
- case Hexagon::LDuh_GP_cdnPt_V4 :
- case Hexagon::LDuh_GP_cdnNotPt_V4 :
- case Hexagon::LDw_GP_cPt_V4 :
- case Hexagon::LDw_GP_cNotPt_V4 :
- case Hexagon::LDw_GP_cdnPt_V4 :
- case Hexagon::LDw_GP_cdnNotPt_V4 :
-
- // V4 global address store.
- case Hexagon::STd_GP_cPt_V4 :
- case Hexagon::STd_GP_cNotPt_V4 :
- case Hexagon::STd_GP_cdnPt_V4 :
- case Hexagon::STd_GP_cdnNotPt_V4 :
- case Hexagon::STb_GP_cPt_V4 :
- case Hexagon::STb_GP_cNotPt_V4 :
- case Hexagon::STb_GP_cdnPt_V4 :
- case Hexagon::STb_GP_cdnNotPt_V4 :
- case Hexagon::STh_GP_cPt_V4 :
- case Hexagon::STh_GP_cNotPt_V4 :
- case Hexagon::STh_GP_cdnPt_V4 :
- case Hexagon::STh_GP_cdnNotPt_V4 :
- case Hexagon::STw_GP_cPt_V4 :
- case Hexagon::STw_GP_cNotPt_V4 :
- case Hexagon::STw_GP_cdnPt_V4 :
- case Hexagon::STw_GP_cdnNotPt_V4 :
-
- // V4 predicated global address new value store.
- case Hexagon::STb_GP_cPt_nv_V4 :
- case Hexagon::STb_GP_cNotPt_nv_V4 :
- case Hexagon::STb_GP_cdnPt_nv_V4 :
- case Hexagon::STb_GP_cdnNotPt_nv_V4 :
- case Hexagon::STh_GP_cPt_nv_V4 :
- case Hexagon::STh_GP_cNotPt_nv_V4 :
- case Hexagon::STh_GP_cdnPt_nv_V4 :
- case Hexagon::STh_GP_cdnNotPt_nv_V4 :
- case Hexagon::STw_GP_cPt_nv_V4 :
- case Hexagon::STw_GP_cNotPt_nv_V4 :
- case Hexagon::STw_GP_cdnPt_nv_V4 :
- case Hexagon::STw_GP_cdnNotPt_nv_V4 :
-
- // TFR_FI
- case Hexagon::TFR_FI_immext_V4:
-
- // TFRI_F
- case Hexagon::TFRI_f:
- case Hexagon::TFRI_cPt_f:
- case Hexagon::TFRI_cNotPt_f:
- case Hexagon::CONST64_Float_Real:
+ // First check if this is permanently extended op code.
+ const uint64_t F = MI->getDesc().TSFlags;
+ if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask)
+ return true;
+ // Use MO operand flags to determine if one of MI's operands
+ // has HMOTF_ConstExtended flag set.
+ for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+ E = MI->operands_end(); I != E; ++I) {
+ if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
return true;
}
+ return false;
}
bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
@@ -877,258 +702,6 @@ bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
}
}
-unsigned HexagonInstrInfo::getImmExtForm(const MachineInstr* MI) const {
- switch(MI->getOpcode()) {
- default: llvm_unreachable("Unknown type of instruction.");
- // JMP_EQri
- case Hexagon::JMP_EQriPt_nv_V4:
- return Hexagon::JMP_EQriPt_ie_nv_V4;
- case Hexagon::JMP_EQriNotPt_nv_V4:
- return Hexagon::JMP_EQriNotPt_ie_nv_V4;
- case Hexagon::JMP_EQriPnt_nv_V4:
- return Hexagon::JMP_EQriPnt_ie_nv_V4;
- case Hexagon::JMP_EQriNotPnt_nv_V4:
- return Hexagon::JMP_EQriNotPnt_ie_nv_V4;
-
- // JMP_EQri -- with -1
- case Hexagon::JMP_EQriPtneg_nv_V4:
- return Hexagon::JMP_EQriPtneg_ie_nv_V4;
- case Hexagon::JMP_EQriNotPtneg_nv_V4:
- return Hexagon::JMP_EQriNotPtneg_ie_nv_V4;
- case Hexagon::JMP_EQriPntneg_nv_V4:
- return Hexagon::JMP_EQriPntneg_ie_nv_V4;
- case Hexagon::JMP_EQriNotPntneg_nv_V4:
- return Hexagon::JMP_EQriNotPntneg_ie_nv_V4;
-
- // JMP_EQrr
- case Hexagon::JMP_EQrrPt_nv_V4:
- return Hexagon::JMP_EQrrPt_ie_nv_V4;
- case Hexagon::JMP_EQrrNotPt_nv_V4:
- return Hexagon::JMP_EQrrNotPt_ie_nv_V4;
- case Hexagon::JMP_EQrrPnt_nv_V4:
- return Hexagon::JMP_EQrrPnt_ie_nv_V4;
- case Hexagon::JMP_EQrrNotPnt_nv_V4:
- return Hexagon::JMP_EQrrNotPnt_ie_nv_V4;
-
- // JMP_GTri
- case Hexagon::JMP_GTriPt_nv_V4:
- return Hexagon::JMP_GTriPt_ie_nv_V4;
- case Hexagon::JMP_GTriNotPt_nv_V4:
- return Hexagon::JMP_GTriNotPt_ie_nv_V4;
- case Hexagon::JMP_GTriPnt_nv_V4:
- return Hexagon::JMP_GTriPnt_ie_nv_V4;
- case Hexagon::JMP_GTriNotPnt_nv_V4:
- return Hexagon::JMP_GTriNotPnt_ie_nv_V4;
-
- // JMP_GTri -- with -1
- case Hexagon::JMP_GTriPtneg_nv_V4:
- return Hexagon::JMP_GTriPtneg_ie_nv_V4;
- case Hexagon::JMP_GTriNotPtneg_nv_V4:
- return Hexagon::JMP_GTriNotPtneg_ie_nv_V4;
- case Hexagon::JMP_GTriPntneg_nv_V4:
- return Hexagon::JMP_GTriPntneg_ie_nv_V4;
- case Hexagon::JMP_GTriNotPntneg_nv_V4:
- return Hexagon::JMP_GTriNotPntneg_ie_nv_V4;
-
- // JMP_GTrr
- case Hexagon::JMP_GTrrPt_nv_V4:
- return Hexagon::JMP_GTrrPt_ie_nv_V4;
- case Hexagon::JMP_GTrrNotPt_nv_V4:
- return Hexagon::JMP_GTrrNotPt_ie_nv_V4;
- case Hexagon::JMP_GTrrPnt_nv_V4:
- return Hexagon::JMP_GTrrPnt_ie_nv_V4;
- case Hexagon::JMP_GTrrNotPnt_nv_V4:
- return Hexagon::JMP_GTrrNotPnt_ie_nv_V4;
-
- // JMP_GTrrdn
- case Hexagon::JMP_GTrrdnPt_nv_V4:
- return Hexagon::JMP_GTrrdnPt_ie_nv_V4;
- case Hexagon::JMP_GTrrdnNotPt_nv_V4:
- return Hexagon::JMP_GTrrdnNotPt_ie_nv_V4;
- case Hexagon::JMP_GTrrdnPnt_nv_V4:
- return Hexagon::JMP_GTrrdnPnt_ie_nv_V4;
- case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
- return Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4;
-
- // JMP_GTUri
- case Hexagon::JMP_GTUriPt_nv_V4:
- return Hexagon::JMP_GTUriPt_ie_nv_V4;
- case Hexagon::JMP_GTUriNotPt_nv_V4:
- return Hexagon::JMP_GTUriNotPt_ie_nv_V4;
- case Hexagon::JMP_GTUriPnt_nv_V4:
- return Hexagon::JMP_GTUriPnt_ie_nv_V4;
- case Hexagon::JMP_GTUriNotPnt_nv_V4:
- return Hexagon::JMP_GTUriNotPnt_ie_nv_V4;
-
- // JMP_GTUrr
- case Hexagon::JMP_GTUrrPt_nv_V4:
- return Hexagon::JMP_GTUrrPt_ie_nv_V4;
- case Hexagon::JMP_GTUrrNotPt_nv_V4:
- return Hexagon::JMP_GTUrrNotPt_ie_nv_V4;
- case Hexagon::JMP_GTUrrPnt_nv_V4:
- return Hexagon::JMP_GTUrrPnt_ie_nv_V4;
- case Hexagon::JMP_GTUrrNotPnt_nv_V4:
- return Hexagon::JMP_GTUrrNotPnt_ie_nv_V4;
-
- // JMP_GTUrrdn
- case Hexagon::JMP_GTUrrdnPt_nv_V4:
- return Hexagon::JMP_GTUrrdnPt_ie_nv_V4;
- case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
- return Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4;
- case Hexagon::JMP_GTUrrdnPnt_nv_V4:
- return Hexagon::JMP_GTUrrdnPnt_ie_nv_V4;
- case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
- return Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4;
-
- case Hexagon::TFR_FI:
- return Hexagon::TFR_FI_immext_V4;
-
- case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMw_ORr_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDi_MEM_V4 :
- case Hexagon::MEMw_SUBi_MEM_V4 :
- case Hexagon::MEMw_ADDr_MEM_V4 :
- case Hexagon::MEMw_SUBr_MEM_V4 :
- case Hexagon::MEMw_ANDr_MEM_V4 :
- case Hexagon::MEMw_ORr_MEM_V4 :
- case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMh_ORr_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDi_MEM_V4 :
- case Hexagon::MEMh_SUBi_MEM_V4 :
- case Hexagon::MEMh_ADDr_MEM_V4 :
- case Hexagon::MEMh_SUBr_MEM_V4 :
- case Hexagon::MEMh_ANDr_MEM_V4 :
- case Hexagon::MEMh_ORr_MEM_V4 :
- case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMb_ORr_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDi_MEM_V4 :
- case Hexagon::MEMb_SUBi_MEM_V4 :
- case Hexagon::MEMb_ADDr_MEM_V4 :
- case Hexagon::MEMb_SUBr_MEM_V4 :
- case Hexagon::MEMb_ANDr_MEM_V4 :
- case Hexagon::MEMb_ORr_MEM_V4 :
- llvm_unreachable("Needs implementing.");
- }
-}
-
-unsigned HexagonInstrInfo::getNormalBranchForm(const MachineInstr* MI) const {
- switch(MI->getOpcode()) {
- default: llvm_unreachable("Unknown type of jump instruction.");
- // JMP_EQri
- case Hexagon::JMP_EQriPt_ie_nv_V4:
- return Hexagon::JMP_EQriPt_nv_V4;
- case Hexagon::JMP_EQriNotPt_ie_nv_V4:
- return Hexagon::JMP_EQriNotPt_nv_V4;
- case Hexagon::JMP_EQriPnt_ie_nv_V4:
- return Hexagon::JMP_EQriPnt_nv_V4;
- case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
- return Hexagon::JMP_EQriNotPnt_nv_V4;
-
- // JMP_EQri -- with -1
- case Hexagon::JMP_EQriPtneg_ie_nv_V4:
- return Hexagon::JMP_EQriPtneg_nv_V4;
- case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
- return Hexagon::JMP_EQriNotPtneg_nv_V4;
- case Hexagon::JMP_EQriPntneg_ie_nv_V4:
- return Hexagon::JMP_EQriPntneg_nv_V4;
- case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
- return Hexagon::JMP_EQriNotPntneg_nv_V4;
-
- // JMP_EQrr
- case Hexagon::JMP_EQrrPt_ie_nv_V4:
- return Hexagon::JMP_EQrrPt_nv_V4;
- case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
- return Hexagon::JMP_EQrrNotPt_nv_V4;
- case Hexagon::JMP_EQrrPnt_ie_nv_V4:
- return Hexagon::JMP_EQrrPnt_nv_V4;
- case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
- return Hexagon::JMP_EQrrNotPnt_nv_V4;
-
- // JMP_GTri
- case Hexagon::JMP_GTriPt_ie_nv_V4:
- return Hexagon::JMP_GTriPt_nv_V4;
- case Hexagon::JMP_GTriNotPt_ie_nv_V4:
- return Hexagon::JMP_GTriNotPt_nv_V4;
- case Hexagon::JMP_GTriPnt_ie_nv_V4:
- return Hexagon::JMP_GTriPnt_nv_V4;
- case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
- return Hexagon::JMP_GTriNotPnt_nv_V4;
-
- // JMP_GTri -- with -1
- case Hexagon::JMP_GTriPtneg_ie_nv_V4:
- return Hexagon::JMP_GTriPtneg_nv_V4;
- case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
- return Hexagon::JMP_GTriNotPtneg_nv_V4;
- case Hexagon::JMP_GTriPntneg_ie_nv_V4:
- return Hexagon::JMP_GTriPntneg_nv_V4;
- case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
- return Hexagon::JMP_GTriNotPntneg_nv_V4;
-
- // JMP_GTrr
- case Hexagon::JMP_GTrrPt_ie_nv_V4:
- return Hexagon::JMP_GTrrPt_nv_V4;
- case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
- return Hexagon::JMP_GTrrNotPt_nv_V4;
- case Hexagon::JMP_GTrrPnt_ie_nv_V4:
- return Hexagon::JMP_GTrrPnt_nv_V4;
- case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
- return Hexagon::JMP_GTrrNotPnt_nv_V4;
-
- // JMP_GTrrdn
- case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
- return Hexagon::JMP_GTrrdnPt_nv_V4;
- case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
- return Hexagon::JMP_GTrrdnNotPt_nv_V4;
- case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
- return Hexagon::JMP_GTrrdnPnt_nv_V4;
- case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
- return Hexagon::JMP_GTrrdnNotPnt_nv_V4;
-
- // JMP_GTUri
- case Hexagon::JMP_GTUriPt_ie_nv_V4:
- return Hexagon::JMP_GTUriPt_nv_V4;
- case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
- return Hexagon::JMP_GTUriNotPt_nv_V4;
- case Hexagon::JMP_GTUriPnt_ie_nv_V4:
- return Hexagon::JMP_GTUriPnt_nv_V4;
- case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
- return Hexagon::JMP_GTUriNotPnt_nv_V4;
-
- // JMP_GTUrr
- case Hexagon::JMP_GTUrrPt_ie_nv_V4:
- return Hexagon::JMP_GTUrrPt_nv_V4;
- case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
- return Hexagon::JMP_GTUrrNotPt_nv_V4;
- case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
- return Hexagon::JMP_GTUrrPnt_nv_V4;
- case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
- return Hexagon::JMP_GTUrrNotPnt_nv_V4;
-
- // JMP_GTUrrdn
- case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
- return Hexagon::JMP_GTUrrdnPt_nv_V4;
- case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
- return Hexagon::JMP_GTUrrdnNotPt_nv_V4;
- case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
- return Hexagon::JMP_GTUrrdnPnt_nv_V4;
- case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
- return Hexagon::JMP_GTUrrdnNotPnt_nv_V4;
- }
-}
-
-
bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
default: return false;
@@ -1326,6 +899,16 @@ bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
}
}
+bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
+ if (isNewValueJump(MI))
+ return true;
+
+ if (isNewValueStore(MI))
+ return true;
+
+ return false;
+}
+
bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
}
@@ -2366,6 +1949,10 @@ isValidOffset(const int Opcode, const int Offset) const {
// the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
// inserted to calculate the final address. Due to this reason, the function
// assumes that the "Offset" has correct alignment.
+ // We used to assert if the offset was not properly aligned, however,
+ // there are cases where a misaligned pointer recast can cause this
+ // problem, and we need to allow for it. The front end warns of such
+ // misaligns with respect to load size.
switch(Opcode) {
@@ -2375,7 +1962,6 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::STriw_indexed:
case Hexagon::STriw:
case Hexagon::STriw_f:
- assert((Offset % 4 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
(Offset <= Hexagon_MEMW_OFFSET_MAX);
@@ -2385,14 +1971,12 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::STrid:
case Hexagon::STrid_indexed:
case Hexagon::STrid_f:
- assert((Offset % 8 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
(Offset <= Hexagon_MEMD_OFFSET_MAX);
case Hexagon::LDrih:
case Hexagon::LDriuh:
case Hexagon::STrih:
- assert((Offset % 2 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
(Offset <= Hexagon_MEMH_OFFSET_MAX);
@@ -2419,7 +2003,6 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::MEMw_SUBr_MEM_V4 :
case Hexagon::MEMw_ANDr_MEM_V4 :
case Hexagon::MEMw_ORr_MEM_V4 :
- assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." );
return (0 <= Offset && Offset <= 255);
case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
@@ -2434,7 +2017,6 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::MEMh_SUBr_MEM_V4 :
case Hexagon::MEMh_ANDr_MEM_V4 :
case Hexagon::MEMh_ORr_MEM_V4 :
- assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." );
return (0 <= Offset && Offset <= 127);
case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
@@ -2800,7 +2382,26 @@ isConditionalStore (const MachineInstr* MI) const {
}
}
+unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+
+ return((F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask);
+}
+/// immediateExtend - Changes the instruction in place to one using an immediate
+/// extender.
+void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
+ assert((isExtendable(MI)||isConstExtended(MI)) &&
+ "Instruction must be extendable");
+ // Find which operand is extendable.
+ short ExtOpNum = getCExtOpNum(MI);
+ MachineOperand &MO = MI->getOperand(ExtOpNum);
+ // This needs to be something we understand.
+ assert((MO.isMBB() || MO.isImm()) &&
+ "Branch with unknown extendable field type");
+ // Mark given operand as extended.
+ MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
+}
DFAPacketizer *HexagonInstrInfo::
CreateTargetScheduleState(const TargetMachine *TM,
@@ -2827,3 +2428,155 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
return false;
}
+
+bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
+
+ // Constant extenders are allowed only for V4 and above.
+ if (!Subtarget.hasV4TOps())
+ return false;
+
+ const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+ if (isExtended) // Instruction must be extended.
+ return true;
+
+ unsigned isExtendable = (F >> HexagonII::ExtendablePos)
+ & HexagonII::ExtendableMask;
+ if (!isExtendable)
+ return false;
+
+ short ExtOpNum = getCExtOpNum(MI);
+ const MachineOperand &MO = MI->getOperand(ExtOpNum);
+ // Use MO operand flags to determine if MO
+ // has the HMOTF_ConstExtended flag set.
+ if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+ return true;
+ // If this is a Machine BB address we are talking about, and it is
+ // not marked as extended, say so.
+ if (MO.isMBB())
+ return false;
+
+ // We could be using an instruction with an extendable immediate and shoehorn
+ // a global address into it. If it is a global address it will be constant
+ // extended. We do this for COMBINE.
+ // We currently only handle isGlobal() because it is the only kind of
+ // object we are going to end up with here for now.
+ // In the future we probably should add isSymbol(), etc.
+ if (MO.isGlobal() || MO.isSymbol())
+ return true;
+
+ // If the extendable operand is not 'Immediate' type, the instruction should
+ // have 'isExtended' flag set.
+ assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+ int MinValue = getMinValue(MI);
+ int MaxValue = getMaxValue(MI);
+ int ImmValue = MO.getImm();
+
+ return (ImmValue < MinValue || ImmValue > MaxValue);
+}
+
+// Returns true if a particular operand is extendable for an instruction.
+bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
+ unsigned short OperandNum) const {
+ // Constant extenders are allowed only for V4 and above.
+ if (!Subtarget.hasV4TOps())
+ return false;
+
+ const uint64_t F = MI->getDesc().TSFlags;
+
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
+ == OperandNum;
+}
+
+// Returns Operand Index for the constant extended instruction.
+unsigned short HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+// Returns the min value that doesn't need to be extended.
+int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return -1 << (bits - 1);
+ else
+ return 0;
+}
+
+// Returns the max value that doesn't need to be extended.
+int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return ~(-1 << (bits - 1));
+ else
+ return ~(-1 << bits);
+}
+
+// Returns true if an instruction can be converted into a non-extended
+// equivalent instruction.
+bool HexagonInstrInfo::NonExtEquivalentExists (const MachineInstr *MI) const {
+
+ short NonExtOpcode;
+ // Check if the instruction has a register form that uses register in place
+ // of the extended operand, if so return that as the non-extended form.
+ if (Hexagon::getRegForm(MI->getOpcode()) >= 0)
+ return true;
+
+ if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+ // Check addressing mode and retreive non-ext equivalent instruction.
+
+ switch (getAddrMode(MI)) {
+ case HexagonII::Absolute :
+ // Load/store with absolute addressing mode can be converted into
+ // base+offset mode.
+ NonExtOpcode = Hexagon::getBasedWithImmOffset(MI->getOpcode());
+ break;
+ case HexagonII::BaseImmOffset :
+ // Load/store with base+offset addressing mode can be converted into
+ // base+register offset addressing mode. However left shift operand should
+ // be set to 0.
+ NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode());
+ break;
+ default:
+ return false;
+ }
+ if (NonExtOpcode < 0)
+ return false;
+ return true;
+ }
+ return false;
+}
+
+// Returns opcode of the non-extended equivalent instruction.
+short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
+
+ // Check if the instruction has a register form that uses register in place
+ // of the extended operand, if so return that as the non-extended form.
+ short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode());
+ if (NonExtOpcode >= 0)
+ return NonExtOpcode;
+
+ if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+ // Check addressing mode and retreive non-ext equivalent instruction.
+ switch (getAddrMode(MI)) {
+ case HexagonII::Absolute :
+ return Hexagon::getBasedWithImmOffset(MI->getOpcode());
+ case HexagonII::BaseImmOffset :
+ return Hexagon::getBaseWithRegOffset(MI->getOpcode());
+ default:
+ return -1;
+ }
+ }
+ return -1;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 4e36dfb..d2f059a 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -169,6 +169,7 @@ public:
bool isConditionalALU32 (const MachineInstr* MI) const;
bool isConditionalLoad (const MachineInstr* MI) const;
bool isConditionalStore(const MachineInstr* MI) const;
+ bool isNewValueInst(const MachineInstr* MI) const;
bool isDeallocRet(const MachineInstr *MI) const;
unsigned getInvertedPredicatedOpcode(const int Opc) const;
bool isExtendable(const MachineInstr* MI) const;
@@ -177,9 +178,18 @@ public:
bool isNewValueStore(const MachineInstr* MI) const;
bool isNewValueJump(const MachineInstr* MI) const;
bool isNewValueJumpCandidate(const MachineInstr *MI) const;
- unsigned getImmExtForm(const MachineInstr* MI) const;
- unsigned getNormalBranchForm(const MachineInstr* MI) const;
+
+ void immediateExtend(MachineInstr *MI) const;
+ bool isConstExtended(MachineInstr *MI) const;
+ unsigned getAddrMode(const MachineInstr* MI) const;
+ bool isOperandExtended(const MachineInstr *MI,
+ unsigned short OperandNum) const;
+ unsigned short getCExtOpNum(const MachineInstr *MI) const;
+ int getMinValue(const MachineInstr *MI) const;
+ int getMaxValue(const MachineInstr *MI) const;
+ bool NonExtEquivalentExists (const MachineInstr *MI) const;
+ short getNonExtOpcode(const MachineInstr *MI) const;
private:
int getMatchingCondBranchOpcode(int Opc, bool sense) const;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index 082772a..d7bab20 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -251,28 +251,55 @@ multiclass TFR_base<string CextOp> {
}
}
+class T_TFR64_Pred<bit PredNot, bit isPredNew>
+ : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2),
+ !if(PredNot, "if (!$src1", "if ($src1")#
+ !if(isPredNew, ".new) ", ") ")#"$dst = $src2", []>
+{
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+
+ let IClass = 0b1111;
+ let Inst{27-24} = 0b1101;
+ let Inst{13} = isPredNew;
+ let Inst{7} = PredNot;
+ let Inst{4-0} = dst;
+ let Inst{6-5} = src1;
+ let Inst{20-17} = src2{4-1};
+ let Inst{16} = 0b1;
+ let Inst{12-9} = src2{4-1};
+ let Inst{8} = 0b0;
+}
+
multiclass TFR64_Pred<bit PredNot> {
let PredSense = !if(PredNot, "false", "true") in {
- def _c#NAME : ALU32_rr<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2),
- !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2",
- []>;
- // Predicate new
+ def _c#NAME : T_TFR64_Pred<PredNot, 0>;
+
let PNewValue = "new" in
- def _cdn#NAME : ALU32_rr<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2),
- !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2",
- []>;
+ def _cdn#NAME : T_TFR64_Pred<PredNot, 1>; // Predicate new
}
}
-let InputType = "reg", neverHasSideEffects = 1 in
-multiclass TFR64_base<string CextOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp in {
+let neverHasSideEffects = 1 in
+multiclass TFR64_base<string BaseName> {
+ let BaseOpcode = BaseName in {
let isPredicable = 1 in
- def NAME : ALU32_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
- "$dst = $src1",
- []>;
+ def NAME : ALU32Inst <(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1),
+ "$dst = $src1" > {
+ bits<5> dst;
+ bits<5> src1;
+
+ let IClass = 0b1111;
+ let Inst{27-23} = 0b01010;
+ let Inst{4-0} = dst;
+ let Inst{20-17} = src1{4-1};
+ let Inst{16} = 0b1;
+ let Inst{12-9} = src1{4-1};
+ let Inst{8} = 0b0;
+ }
let isPredicated = 1 in {
defm Pt : TFR64_Pred<0>;
@@ -281,9 +308,8 @@ multiclass TFR64_base<string CextOp> {
}
}
-
multiclass TFRI_Pred<bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isMoveImm = 1, PredSense = !if(PredNot, "false", "true") in {
def _c#NAME : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, s12Ext:$src2),
!if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2",
@@ -301,8 +327,8 @@ multiclass TFRI_Pred<bit PredNot> {
let InputType = "imm", isExtendable = 1, isExtentSigned = 1 in
multiclass TFRI_base<string CextOp> {
let CextOpcode = CextOp, BaseOpcode = CextOp#I in {
- let opExtendable = 1, opExtentBits = 16, isMoveImm = 1, isPredicable = 1,
- isReMaterializable = 1 in
+ let isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16,
+ isMoveImm = 1, isPredicable = 1, isReMaterializable = 1 in
def NAME : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
"$dst = #$src1",
[(set (i32 IntRegs:$dst), s16ExtPred:$src1)]>;
@@ -317,7 +343,7 @@ multiclass TFRI_base<string CextOp> {
defm TFRI : TFRI_base<"TFR">, ImmRegRel, PredNewRel;
defm TFR : TFR_base<"TFR">, ImmRegRel, PredNewRel;
-defm TFR64 : TFR64_base<"TFR64">, ImmRegRel, PredNewRel;
+defm TFR64 : TFR64_base<"TFR64">, PredNewRel;
// Transfer control register.
let neverHasSideEffects = 1 in
@@ -2050,6 +2076,10 @@ def CONST32_Int_Real : LDInst2<(outs IntRegs:$dst), (ins i32imm:$global),
"$dst = CONST32(#$global)",
[(set (i32 IntRegs:$dst), imm:$global) ]>;
+// Map BlockAddress lowering to CONST32_Int_Real
+def : Pat<(HexagonCONST32_GP tblockaddress:$addr),
+ (CONST32_Int_Real tblockaddress:$addr)>;
+
let isReMaterializable = 1, isMoveImm = 1 in
def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label),
"$dst = CONST32($label)",
@@ -2845,6 +2875,18 @@ def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
s11_0ExtPred:$offset)))>,
Requires<[NoV4T]>;
+// i1 -> i64
+def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
+ s11_0ExtPred:$offset))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
+ s11_0ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
+
// i16 -> i64
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>,
@@ -3020,6 +3062,11 @@ def BR_JT : JRInst<(outs), (ins IntRegs:$src),
"jumpr $src",
[(HexagonBR_JT (i32 IntRegs:$src))]>;
+let isBranch=1, isIndirectBranch=1, isTerminator=1 in
+def BRIND : JRInst<(outs), (ins IntRegs:$src),
+ "jumpr $src",
+ [(brind (i32 IntRegs:$src))]>;
+
def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
def : Pat<(HexagonWrapperJT tjumptable:$dst),
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index e1b2f88..1d0643d 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -940,6 +940,18 @@ def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
s11_0ExtPred:$offset)))>,
Requires<[HasV4T]>;
+// zext i1->i64
+def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
+ s11_0ExtPred:$offset))),
+ (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1,
+ s11_0ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
// zext i16->i64
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
(i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>,
@@ -3790,6 +3802,11 @@ def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
[(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>,
Requires<[HasV4T]>;
+// Transfer a block address into a register
+def : Pat<(HexagonCONST32_GP tblockaddress:$src1),
+ (TFRI_V4 tblockaddress:$src1)>,
+ Requires<[HasV4T]>;
+
let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$src2),
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index ced17b3..1388ad4 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -15,7 +15,8 @@
#define DEBUG_TYPE "misched"
#include "HexagonMachineScheduler.h"
-#include <queue>
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
@@ -159,6 +160,9 @@ void VLIWMachineScheduler::schedule() {
SchedImpl->initialize(this);
// To view Height/Depth correctly, they should be accessed at least once.
+ //
+ // FIXME: SUnit::dumpAll always recompute depth and height now. The max
+ // depth/height could be computed directly from the roots and leaves.
DEBUG(unsigned maxH = 0;
for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
if (SUnits[su].getHeight() > maxH)
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index aff6b86..866beb1 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -17,35 +17,36 @@
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "packets"
-#include "Hexagon.h"
-#include "HexagonMachineFunctionInfo.h"
-#include "HexagonRegisterInfo.h"
-#include "HexagonSubtarget.h"
-#include "HexagonTargetMachine.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+
#include <map>
using namespace llvm;
@@ -257,7 +258,7 @@ void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- assert(QII->isExtended(MI) &&
+ assert((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
"Should only be called for constant extended instructions");
MachineFunction *MF = MI->getParent()->getParent();
MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
@@ -350,17 +351,6 @@ static bool IsControlFlow(MachineInstr* MI) {
return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
}
-bool HexagonPacketizerList::isNewValueInst(MachineInstr* MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- if (QII->isNewValueJump(MI))
- return true;
-
- if (QII->isNewValueStore(MI))
- return true;
-
- return false;
-}
-
// Function returns true if an instruction can be promoted to the new-value
// store. It will always return false for v2 and v3.
// It lists all the conditional and unconditional stores that can be promoted
@@ -2165,7 +2155,8 @@ static bool GetPredicateSense(MachineInstr* MI,
}
bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) {
- if (isNewValueInst(MI))
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ if (QII->isNewValueInst(MI))
return true;
switch (MI->getOpcode()) {
@@ -2893,13 +2884,13 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
// dealloc_return and memop always take SLOT0.
// Arch spec 3.4.4.2
if (QRI->Subtarget.hasV4TOps()) {
-
- if (MCIDI.mayStore() && MCIDJ.mayStore() && isNewValueInst(J)) {
+ if (MCIDI.mayStore() && MCIDJ.mayStore() &&
+ (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) {
Dependence = true;
return false;
}
- if ( (QII->isMemOp(J) && MCIDI.mayStore())
+ if ((QII->isMemOp(J) && MCIDI.mayStore())
|| (MCIDJ.mayStore() && QII->isMemOp(I))
|| (QII->isMemOp(J) && QII->isMemOp(I))) {
Dependence = true;
@@ -3196,7 +3187,7 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) {
MachineInstr *nvjMI = MII;
assert(ResourceTracker->canReserveResources(MI));
ResourceTracker->reserveResources(MI);
- if (QII->isExtended(MI) &&
+ if ((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
!tryAllocateResourcesForConstExt(MI)) {
endPacket(MBB, MI);
ResourceTracker->reserveResources(MI);
@@ -3216,7 +3207,7 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) {
&& (!tryAllocateResourcesForConstExt(nvjMI)
|| !ResourceTracker->canReserveResources(nvjMI)))
|| // For non-extended instruction, no need to allocate extra 4 bytes.
- (!QII->isExtended(nvjMI) &&
+ (!QII->isExtended(nvjMI) &&
!ResourceTracker->canReserveResources(nvjMI)))
{
endPacket(MBB, MI);
@@ -3232,7 +3223,7 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) {
CurrentPacketMIs.push_back(MI);
CurrentPacketMIs.push_back(nvjMI);
} else {
- if ( QII->isExtended(MI)
+ if ( (QII->isExtended(MI) || QII->isConstExtended(MI))
&& ( !tryAllocateResourcesForConstExt(MI)
|| !ResourceTracker->canReserveResources(MI)))
{
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 5f9718b..d4a93b5 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -154,6 +154,28 @@ namespace HexagonII {
// *** The code above must match HexagonInstrFormat*.td *** //
+ // Hexagon specific MO operand flag mask.
+ enum HexagonMOTargetFlagVal {
+ //===------------------------------------------------------------------===//
+ // Hexagon Specific MachineOperand flags.
+ MO_NO_FLAG,
+
+ HMOTF_ConstExtended = 1,
+
+ /// MO_PCREL - On a symbol operand, indicates a PC-relative relocation
+ /// Used for computing a global address for PIC compilations
+ MO_PCREL,
+
+ /// MO_GOT - Indicates a GOT-relative relocation
+ MO_GOT,
+
+ // Low or high part of a symbol.
+ MO_LO16, MO_HI16,
+
+ // Offset from the base of the SDA.
+ MO_GPREL
+ };
+
} // End namespace HexagonII.
} // End namespace llvm.
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index ad495ff..dda6e24 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -82,29 +82,35 @@ struct MBlazeOperand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned Base;
+ unsigned OffReg;
+ const MCExpr *Off;
+ };
+
+ struct FslImmOp {
+ const MCExpr *Val;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned Base;
- unsigned OffReg;
- const MCExpr *Off;
- } Mem;
-
- struct {
- const MCExpr *Val;
- } FslImm;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
+ struct FslImmOp FslImm;
};
MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 7664c60..d4f9432 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -160,7 +160,8 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
// Operations not directly supported by MBlaze.
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTR, MVT::i32, Expand);
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index d0fd7dc..bd83afc 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -122,7 +122,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
}
void MBlazeRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *) const {
// Set the stack offset where GP must be saved/loaded from.
MachineFrameInfo *MFI = MF.getFrameInfo();
MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 99a2fac..497f386 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -55,7 +55,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index ae2e556..e504011 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -285,8 +285,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
}
void
-MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
+MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *) const {
// Create a frame entry for the FPW register that must be saved.
if (hasFP(MF)) {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index a077dd7..c673f59 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -50,7 +50,8 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool hasReservedCallFrame(const MachineFunction &MF) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
};
} // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index ab4e64e..e0ed870 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -73,7 +73,7 @@ namespace llvm {
public:
explicit MSP430TargetLowering(MSP430TargetMachine &TM);
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
/// LowerOperation - Provide custom lowering hooks for some operations.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index ade6084..ebe12c9 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -211,25 +211,30 @@ private:
MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ struct Token {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ RegisterKind Kind;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned Base;
+ const MCExpr *Off;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- RegisterKind Kind;
- } Reg;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned Base;
- const MCExpr *Off;
- } Mem;
+ struct Token Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
};
SMLoc StartLoc, EndLoc;
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index d6fac0c..cf8bb18 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -16,10 +16,13 @@ add_public_tablegen_target(MipsCommonTableGen)
add_llvm_target(MipsCodeGen
Mips16FrameLowering.cpp
Mips16InstrInfo.cpp
+ Mips16ISelDAGToDAG.cpp
+ Mips16ISelLowering.cpp
Mips16RegisterInfo.cpp
MipsAnalyzeImmediate.cpp
MipsAsmPrinter.cpp
MipsCodeEmitter.cpp
+ MipsConstantIslandPass.cpp
MipsDelaySlotFiller.cpp
MipsJITInfo.cpp
MipsInstrInfo.cpp
@@ -32,6 +35,8 @@ add_llvm_target(MipsCodeGen
MipsRegisterInfo.cpp
MipsSEFrameLowering.cpp
MipsSEInstrInfo.cpp
+ MipsSEISelDAGToDAG.cpp
+ MipsSEISelLowering.cpp
MipsSERegisterInfo.cpp
MipsSubtarget.cpp
MipsTargetMachine.cpp
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 2963f7e..8c65bb4 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -27,6 +27,7 @@ namespace llvm {
FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
JITCodeEmitter &JCE);
+ FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
} // end namespace llvm;
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 1326623..eefb02a 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -44,8 +44,6 @@ def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64",
"Enable n64 ABI">;
def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
"Enable eabi ABI">;
-def FeatureAndroid : SubtargetFeature<"android", "IsAndroid", "true",
- "Target is android">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
"true", "Enable vector FPU instructions.">;
def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true",
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
new file mode 100644
index 0000000..00b3449
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -0,0 +1,308 @@
+//===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "Mips16ISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+ bool HasLo, bool HasHi) {
+ SDNode *Lo = 0, *Hi = 0;
+ SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+ N->getOperand(1));
+ SDValue InFlag = SDValue(Mul, 0);
+
+ if (HasLo) {
+ unsigned Opcode = Mips::Mflo16;
+ Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+ InFlag = SDValue(Lo, 1);
+ }
+ if (HasHi) {
+ unsigned Opcode = Mips::Mfhi16;
+ Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+ }
+ return std::make_pair(Lo, Hi);
+}
+
+void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->globalBaseRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ const TargetRegisterClass *RC =
+ (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+ V2 = RegInfo.createVirtualRegister(RC);
+
+ BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
+ BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
+ .addReg(V1).addReg(V2);
+}
+
+// Insert instructions to initialize the Mips16 SP Alias register in the
+// first MBB of the function.
+//
+void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->mips16SPAliasRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
+
+ BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg)
+ .addReg(Mips::SP);
+}
+
+void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+ initGlobalBaseReg(MF);
+ initMips16SPAliasReg(MF);
+}
+
+/// getMips16SPAliasReg - Output the instructions required to put the
+/// SP into a Mips16 accessible aliased register.
+SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() {
+ unsigned Mips16SPAliasReg =
+ MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
+ return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy());
+}
+
+void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
+ SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
+ if (Parent) {
+ switch (Parent->getOpcode()) {
+ case ISD::LOAD: {
+ LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent);
+ switch (SD->getMemoryVT().getSizeInBits()) {
+ case 8:
+ case 16:
+ AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+ AliasFPReg: getMips16SPAliasReg();
+ return;
+ }
+ break;
+ }
+ case ISD::STORE: {
+ StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent);
+ switch (SD->getMemoryVT().getSizeInBits()) {
+ case 8:
+ case 16:
+ AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+ AliasFPReg: getMips16SPAliasReg();
+ return;
+ }
+ break;
+ }
+ }
+ }
+ AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy());
+ return;
+
+}
+
+bool Mips16DAGToDAGISel::selectAddr16(
+ SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
+ SDValue &Alias) {
+ EVT ValTy = Addr.getValueType();
+
+ Alias = CurDAG->getTargetConstant(0, ValTy);
+
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ getMips16SPRefReg(Parent, Alias);
+ return true;
+ }
+ // on PIC code Load GA
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ getMips16SPRefReg(Parent, Alias);
+ }
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+ Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+ SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
+ Base = Addr.getOperand(0);
+ Offset = Opnd0;
+ return true;
+ }
+ }
+
+ // If an indexed floating point load/store can be emitted, return false.
+ const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
+
+ if (LS &&
+ (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
+ Subtarget.hasFPIdx())
+ return false;
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc DL = Node->getDebugLoc();
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ EVT NodeTy = Node->getValueType(0);
+ unsigned MultOpc;
+
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE:
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2), CmpLHS;
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ unsigned MOp;
+ if (Opcode == ISD::ADDE) {
+ CmpLHS = InFlag.getValue(0);
+ MOp = Mips::AdduRxRyRz16;
+ } else {
+ CmpLHS = InFlag.getOperand(0);
+ MOp = Mips::SubuRxRyRz16;
+ }
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+
+ EVT VT = LHS.getValueType();
+
+ unsigned Sltu_op = Mips::SltuRxRyRz16;
+ SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2);
+ unsigned Addu_op = Mips::AdduRxRyRz16;
+ SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT,
+ SDValue(Carry,0), RHS);
+
+ SDNode *Result = CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+ SDValue(AddCarry,0));
+ return std::make_pair(true, Result);
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+ std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+ true, true);
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+ return std::make_pair(true, (SDNode*)NULL);
+ }
+
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ MultOpc = (Opcode == ISD::MULHU ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+ SDNode *Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+ return std::make_pair(true, Result);
+ }
+ }
+
+ return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) {
+ return new Mips16DAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
new file mode 100644
index 0000000..baa8587
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -0,0 +1,51 @@
+//===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16ISELDAGTODAG_H
+#define MIPS16ISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class Mips16DAGToDAGISel : public MipsDAGToDAGISel {
+public:
+ explicit Mips16DAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+ std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc DL,
+ EVT Ty, bool HasLo, bool HasHi);
+
+ SDValue getMips16SPAliasReg();
+
+ void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg);
+
+ virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias);
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+ virtual void processFunctionAfterISel(MachineFunction &MF);
+
+ // Insert instructions to initialize the global base register in the
+ // first MBB of the function.
+ void initGlobalBaseReg(MachineFunction &MF);
+
+ void initMips16SPAliasReg(MachineFunction &MF);
+};
+
+FunctionPass *createMips16ISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
new file mode 100644
index 0000000..23eb537
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -0,0 +1,689 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-lower"
+#include "Mips16ISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <set>
+
+using namespace llvm;
+
+static cl::opt<bool>
+Mips16HardFloat("mips16-hard-float", cl::NotHidden,
+ cl::desc("MIPS: mips16 hard float enable."),
+ cl::init(false));
+
+static cl::opt<bool> DontExpandCondPseudos16(
+ "mips16-dont-expand-cond-pseudo",
+ cl::init(false),
+ cl::desc("Dont expand conditional move related "
+ "pseudos for Mips 16"),
+ cl::Hidden);
+
+namespace {
+ std::set<const char*, MipsTargetLowering::LTStr> NoHelperNeeded;
+}
+
+Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
+ : MipsTargetLowering(TM) {
+ //
+ // set up as if mips32 and then revert so we can test the mechanism
+ // for switching
+ addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+ addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+ computeRegisterProperties();
+ clearRegisterClasses();
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
+
+ if (Mips16HardFloat)
+ setMips16HardFloatLibCalls();
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+
+ computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMips16TargetLowering(MipsTargetMachine &TM) {
+ return new Mips16TargetLowering(TM);
+}
+
+bool
+Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ return false;
+}
+
+MachineBasicBlock *
+Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case Mips::SelBeqZ:
+ return emitSel16(Mips::BeqzRxImm16, MI, BB);
+ case Mips::SelBneZ:
+ return emitSel16(Mips::BnezRxImm16, MI, BB);
+ case Mips::SelTBteqZCmpi:
+ return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSlti:
+ return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSltiu:
+ return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBtneZCmpi:
+ return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSlti:
+ return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSltiu:
+ return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBteqZCmp:
+ return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBteqZSlt:
+ return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBteqZSltu:
+ return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::SelTBtneZCmp:
+ return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBtneZSlt:
+ return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBtneZSltu:
+ return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpX16:
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BteqzT8SltX16:
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BteqzT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BtnezT8CmpX16:
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BtnezT8SltX16:
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BtnezT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ break;
+ case Mips::SltCCRxRy16:
+ return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
+ break;
+ case Mips::SltiCCRxImmX16:
+ return emitFEXT_CCRXI16_ins
+ (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SltiuCCRxImmX16:
+ return emitFEXT_CCRXI16_ins
+ (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SltuCCRxRy16:
+ return emitFEXT_CCRX16_ins
+ (Mips::SltuRxRy16, MI, BB);
+ }
+}
+
+bool Mips16TargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const {
+ // No tail call optimization for mips16.
+ return false;
+}
+
+void Mips16TargetLowering::setMips16LibcallName
+ (RTLIB::Libcall L, const char *Name) {
+ setLibcallName(L, Name);
+ NoHelperNeeded.insert(Name);
+}
+
+void Mips16TargetLowering::setMips16HardFloatLibCalls() {
+ setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
+ setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
+ setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
+ setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
+ setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
+ setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
+ setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
+ setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
+ setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
+ setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
+ setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
+ setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
+ setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
+ setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
+ setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
+ setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
+ setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
+ setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
+ setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
+ setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
+ setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
+ setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
+ setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
+ setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
+ setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
+ setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
+ setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
+ setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
+ setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
+ setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
+ setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2");
+ setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2");
+}
+
+
+//
+// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much
+// cleaner way to do all of this but it will have to wait until the traditional
+// gcc mechanism is completed.
+//
+// For Pic, in order for Mips16 code to call Mips32 code which according the abi
+// have either arguments or returned values placed in floating point registers,
+// we use a set of helper functions. (This includes functions which return type
+// complex which on Mips are returned in a pair of floating point registers).
+//
+// This is an encoding that we inherited from gcc.
+// In Mips traditional O32, N32 ABI, floating point numbers are passed in
+// floating point argument registers 1,2 only when the first and optionally
+// the second arguments are float (sf) or double (df).
+// For Mips16 we are only concerned with the situations where floating point
+// arguments are being passed in floating point registers by the ABI, because
+// Mips16 mode code cannot execute floating point instructions to load those
+// values and hence helper functions are needed.
+// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df)
+// the helper function suffixs for these are:
+// 0, 1, 5, 9, 2, 6, 10
+// this suffix can then be calculated as follows:
+// for a given argument Arg:
+// Arg1x, Arg2x = 1 : Arg is sf
+// 2 : Arg is df
+// 0: Arg is neither sf or df
+// So this stub is the string for number Arg1x + Arg2x*4.
+// However not all numbers between 0 and 10 are possible, we check anyway and
+// assert if the impossible exists.
+//
+
+unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber
+ (ArgListTy &Args) const {
+ unsigned int resultNum = 0;
+ if (Args.size() >= 1) {
+ Type *t = Args[0].Ty;
+ if (t->isFloatTy()) {
+ resultNum = 1;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum = 2;
+ }
+ }
+ if (resultNum) {
+ if (Args.size() >=2) {
+ Type *t = Args[1].Ty;
+ if (t->isFloatTy()) {
+ resultNum += 4;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum += 8;
+ }
+ }
+ }
+ return resultNum;
+}
+
+//
+// prefixs are attached to stub numbers depending on the return type .
+// return type: float sf_
+// double df_
+// single complex sc_
+// double complext dc_
+// others NO PREFIX
+//
+//
+// The full name of a helper function is__mips16_call_stub +
+// return type dependent prefix + stub number
+//
+//
+// This is something that probably should be in a different source file and
+// perhaps done differently but my main purpose is to not waste runtime
+// on something that we can enumerate in the source. Another possibility is
+// to have a python script to generate these mapping tables. This will do
+// for now. There are a whole series of helper function mapping arrays, one
+// for each return type class as outlined above. There there are 11 possible
+// entries. Ones with 0 are ones which should never be selected
+//
+// All the arrays are similar except for ones which return neither
+// sf, df, sc, dc, in which only care about ones which have sf or df as a
+// first parameter.
+//
+#define P_ "__mips16_call_stub_"
+#define MAX_STUB_NUMBER 10
+#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10"
+#define T P "0" , T1
+#define P P_
+static char const * vMips16Helper[MAX_STUB_NUMBER+1] =
+ {0, T1 };
+#undef P
+#define P P_ "sf_"
+static char const * sfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "df_"
+static char const * dfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "sc_"
+static char const * scMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "dc_"
+static char const * dcMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#undef P_
+
+
+const char* Mips16TargetLowering::
+ getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const {
+ const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args);
+#ifndef NDEBUG
+ const unsigned int maxStubNum = 10;
+ assert(stubNum <= maxStubNum);
+ const bool validStubNum[maxStubNum+1] =
+ {true, true, true, false, false, true, true, false, false, true, true};
+ assert(validStubNum[stubNum]);
+#endif
+ const char *result;
+ if (RetTy->isFloatTy()) {
+ result = sfMips16Helper[stubNum];
+ }
+ else if (RetTy ->isDoubleTy()) {
+ result = dfMips16Helper[stubNum];
+ }
+ else if (RetTy->isStructTy()) {
+ // check if it's complex
+ if (RetTy->getNumContainedTypes() == 2) {
+ if ((RetTy->getContainedType(0)->isFloatTy()) &&
+ (RetTy->getContainedType(1)->isFloatTy())) {
+ result = scMips16Helper[stubNum];
+ }
+ else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
+ (RetTy->getContainedType(1)->isDoubleTy())) {
+ result = dcMips16Helper[stubNum];
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ if (stubNum == 0) {
+ needHelper = false;
+ return "";
+ }
+ result = vMips16Helper[stubNum];
+ }
+ needHelper = true;
+ return result;
+}
+
+void Mips16TargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ SelectionDAG &DAG = CLI.DAG;
+ const char* Mips16HelperFunction = 0;
+ bool NeedMips16Helper = false;
+
+ if (getTargetMachine().Options.UseSoftFloat && Mips16HardFloat) {
+ //
+ // currently we don't have symbols tagged with the mips16 or mips32
+ // qualifier so we will assume that we don't know what kind it is.
+ // and generate the helper
+ //
+ bool LookupHelper = true;
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
+ if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) {
+ LookupHelper = false;
+ }
+ }
+ if (LookupHelper) Mips16HelperFunction =
+ getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper);
+
+ }
+
+ SDValue JumpTarget = Callee;
+
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
+ unsigned V0Reg = Mips::V0;
+ if (NeedMips16Helper) {
+ RegsToPass.push_front(std::make_pair(V0Reg, Callee));
+ JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
+ JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
+ } else
+ RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee));
+ }
+
+ Ops.push_back(JumpTarget);
+
+ MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+ InternalLinkage, CLI, Callee, Chain);
+}
+
+MachineBasicBlock *Mips16TargetLowering::
+emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg())
+ .addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSelT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addReg(MI->getOperand(4).getReg());
+ BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSeliT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addImm(MI->getOperand(4).getImm());
+ BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+MachineBasicBlock
+ *Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ unsigned regY = MI->getOperand(1).getReg();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ int64_t imm = MI->getOperand(1).getImm();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ unsigned CmpOpc;
+ if (isUInt<8>(imm))
+ CmpOpc = CmpiOpc;
+ else if (isUInt<16>(imm))
+ CmpOpc = CmpiXOpc;
+ else
+ llvm_unreachable("immediate field not usable");
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+static unsigned Mips16WhichOp8uOr16simm
+ (unsigned shortOp, unsigned longOp, int64_t Imm) {
+ if (isUInt<8>(Imm))
+ return shortOp;
+ else if (isInt<16>(Imm))
+ return longOp;
+ else
+ llvm_unreachable("immediate field not usable");
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ unsigned regY = MI->getOperand(2).getReg();
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ int64_t Imm = MI->getOperand(2).getImm();
+ unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addImm(Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
new file mode 100644
index 0000000..b23e2a1
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -0,0 +1,80 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Mips16ISELLOWERING_H
+#define Mips16ISELLOWERING_H
+
+#include "MipsISelLowering.h"
+
+namespace llvm {
+ class Mips16TargetLowering : public MipsTargetLowering {
+ public:
+ explicit Mips16TargetLowering(MipsTargetMachine &TM);
+
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ private:
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const;
+
+ void setMips16LibcallName(RTLIB::Libcall, const char *Name);
+
+ void setMips16HardFloatLibCalls();
+
+ unsigned int
+ getMips16HelperFunctionStubNumber(ArgListTy &Args) const;
+
+ const char *getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const;
+
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+ MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const;
+ };
+}
+
+#endif // Mips16ISELLOWERING_H
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index a9e9c52..6293829 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -15,7 +15,7 @@
// Mips Address
//
def addr16 :
- ComplexPattern<iPTR, 3, "SelectAddr16", [frameindex], [SDNPWantParent]>;
+ ComplexPattern<iPTR, 3, "selectAddr16", [frameindex], [SDNPWantParent]>;
//
// Address operand
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 494ba87..5903b9e 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -313,7 +313,7 @@ def : InstAlias<"move $dst, $src",
(DADDu CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 1>,
Requires<[HasMips64]>;
def : InstAlias<"move $dst, $src",
- (OR64 CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 0>,
+ (OR64 CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 1>,
Requires<[HasMips64]>;
def : InstAlias<"and $rs, $rt, $imm",
(DANDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 78cf140..462def7 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -96,6 +96,12 @@ def RetCC_MipsN : CallingConv<[
CCIfType<[f64], CCAssignToReg<[D0_64, D2_64]>>
]>;
+// In soft-mode, register A0_64, instead of V1_64, is used to return a long
+// double value.
+def RetCC_F128Soft : CallingConv<[
+ CCIfType<[i64], CCAssignToReg<[V0_64, A0_64]>>
+]>;
+
//===----------------------------------------------------------------------===//
// Mips EABI Calling Convention
//===----------------------------------------------------------------------===//
@@ -139,17 +145,6 @@ def RetCC_MipsEABI : CallingConv<[
]>;
//===----------------------------------------------------------------------===//
-// Mips Android Calling Convention
-//===----------------------------------------------------------------------===//
-
-def RetCC_MipsAndroid : CallingConv<[
- // f32 are returned in registers F0, F2, F1, F3
- CCIfType<[f32], CCAssignToReg<[F0, F2, F1, F3]>>,
-
- CCDelegateTo<RetCC_MipsO32>
-]>;
-
-//===----------------------------------------------------------------------===//
// Mips FastCC Calling Convention
//===----------------------------------------------------------------------===//
def CC_MipsO32_FastCC : CallingConv<[
@@ -209,7 +204,6 @@ def RetCC_Mips : CallingConv<[
CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>,
CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
- CCIfSubtarget<"isAndroid()", CCDelegateTo<RetCC_MipsAndroid>>,
CCDelegateTo<RetCC_MipsO32>
]>;
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index 559370b..42e4c99 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -58,21 +58,23 @@ multiclass MovzPats0<RegisterClass CRC, RegisterClass DRC,
Instruction SLTiuOp> {
def : MipsPat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
(MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
- def : MipsPat<
- (select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
- def : MipsPat<
- (select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>;
- def : MipsPat<
- (select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>;
- def : MipsPat<
- (select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
- def : MipsPat<
- (select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>;
+ def : MipsPat<(select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setgt CRC:$lhs, immSExt16Plus1:$rhs)),
+ DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, (Plus1 imm:$rhs)), DRC:$F)>;
+ def : MipsPat<(select (i32 (setugt CRC:$lhs, immSExt16Plus1:$rhs)),
+ DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiuOp CRC:$lhs, (Plus1 imm:$rhs)),
+ DRC:$F)>;
}
multiclass MovzPats1<RegisterClass CRC, RegisterClass DRC,
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
new file mode 100644
index 0000000..b5de1eb
--- /dev/null
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -0,0 +1,85 @@
+//===-- MipsConstantIslandPass.cpp - Emit Pc Relative loads----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+// This pass is used to make Pc relative loads of constants.
+// For now, only Mips16 will use this. While it has the same name and
+// uses many ideas from the LLVM ARM Constant Island Pass, it's not intended
+// to reuse any of the code from the ARM version.
+//
+// Loading constants inline is expensive on Mips16 and it's in general better
+// to place the constant nearby in code space and then it can be loaded with a
+// simple 16 bit load instruction.
+//
+// The constants can be not just numbers but addresses of functions and labels.
+// This can be particularly helpful in static relocation mode for embedded
+// non linux targets.
+//
+//
+
+#define DEBUG_TYPE "mips-constant-islands"
+
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+ typedef MachineBasicBlock::iterator Iter;
+ typedef MachineBasicBlock::reverse_iterator ReverseIter;
+
+ class MipsConstantIslands : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ MipsConstantIslands(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm),
+ TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_),
+ ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()) {}
+
+ virtual const char *getPassName() const {
+ return "Mips Constant Islands";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ private:
+
+
+ const TargetMachine &TM;
+ const MipsInstrInfo *TII;
+ bool IsPIC;
+ unsigned ABI;
+
+ };
+
+ char MipsConstantIslands::ID = 0;
+} // end of anonymous namespace
+
+/// createMipsLongBranchPass - Returns a pass that converts branches to long
+/// branches.
+FunctionPass *llvm::createMipsConstantIslandPass(MipsTargetMachine &tm) {
+ return new MipsConstantIslands(tm);
+}
+
+bool MipsConstantIslands::runOnMachineFunction(MachineFunction &F) {
+ return true;
+}
+
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index d62b166..e265590 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -14,11 +14,17 @@
#define DEBUG_TYPE "delay-slot-filler"
#include "Mips.h"
+#include "MipsInstrInfo.h"
#include "MipsTargetMachine.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -36,18 +42,59 @@ static cl::opt<bool> DisableDelaySlotFiller(
cl::desc("Fill all delay slots with NOPs."),
cl::Hidden);
-// This option can be used to silence complaints by machine verifier passes.
-static cl::opt<bool> SkipDelaySlotFiller(
- "skip-mips-delay-filler",
+static cl::opt<bool> DisableForwardSearch(
+ "disable-mips-df-forward-search",
+ cl::init(true),
+ cl::desc("Disallow MIPS delay filler to search forward."),
+ cl::Hidden);
+
+static cl::opt<bool> DisableSuccBBSearch(
+ "disable-mips-df-succbb-search",
+ cl::init(true),
+ cl::desc("Disallow MIPS delay filler to search successor basic blocks."),
+ cl::Hidden);
+
+static cl::opt<bool> DisableBackwardSearch(
+ "disable-mips-df-backward-search",
cl::init(false),
- cl::desc("Skip MIPS' delay slot filling pass."),
+ cl::desc("Disallow MIPS delay filler to search backward."),
cl::Hidden);
namespace {
+ typedef MachineBasicBlock::iterator Iter;
+ typedef MachineBasicBlock::reverse_iterator ReverseIter;
+ typedef SmallDenseMap<MachineBasicBlock*, MachineInstr*, 2> BB2BrMap;
+
+ /// \brief A functor comparing edge weight of two blocks.
+ struct CmpWeight {
+ CmpWeight(const MachineBasicBlock &S,
+ const MachineBranchProbabilityInfo &P) : Src(S), Prob(P) {}
+
+ bool operator()(const MachineBasicBlock *Dst0,
+ const MachineBasicBlock *Dst1) const {
+ return Prob.getEdgeWeight(&Src, Dst0) < Prob.getEdgeWeight(&Src, Dst1);
+ }
+
+ const MachineBasicBlock &Src;
+ const MachineBranchProbabilityInfo &Prob;
+ };
+
class RegDefsUses {
public:
RegDefsUses(TargetMachine &TM);
void init(const MachineInstr &MI);
+
+ /// This function sets all caller-saved registers in Defs.
+ void setCallerSaved(const MachineInstr &MI);
+
+ /// This function sets all unallocatable registers in Defs.
+ void setUnallocatableRegs(const MachineFunction &MF);
+
+ /// Set bits in Uses corresponding to MBB's live-out registers except for
+ /// the registers that are live-in to SuccBB.
+ void addLiveOut(const MachineBasicBlock &MBB,
+ const MachineBasicBlock &SuccBB);
+
bool update(const MachineInstr &MI, unsigned Begin, unsigned End);
private:
@@ -61,6 +108,72 @@ namespace {
BitVector Defs, Uses;
};
+ /// Base class for inspecting loads and stores.
+ class InspectMemInstr {
+ public:
+ InspectMemInstr(bool ForbidMemInstr_)
+ : OrigSeenLoad(false), OrigSeenStore(false), SeenLoad(false),
+ SeenStore(false), ForbidMemInstr(ForbidMemInstr_) {}
+
+ /// Return true if MI cannot be moved to delay slot.
+ bool hasHazard(const MachineInstr &MI);
+
+ virtual ~InspectMemInstr() {}
+
+ protected:
+ /// Flags indicating whether loads or stores have been seen.
+ bool OrigSeenLoad, OrigSeenStore, SeenLoad, SeenStore;
+
+ /// Memory instructions are not allowed to move to delay slot if this flag
+ /// is true.
+ bool ForbidMemInstr;
+
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI) = 0;
+ };
+
+ /// This subclass rejects any memory instructions.
+ class NoMemInstr : public InspectMemInstr {
+ public:
+ NoMemInstr() : InspectMemInstr(true) {}
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI) { return true; }
+ };
+
+ /// This subclass accepts loads from stacks and constant loads.
+ class LoadFromStackOrConst : public InspectMemInstr {
+ public:
+ LoadFromStackOrConst() : InspectMemInstr(false) {}
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI);
+ };
+
+ /// This subclass uses memory dependence information to determine whether a
+ /// memory instruction can be moved to a delay slot.
+ class MemDefsUses : public InspectMemInstr {
+ public:
+ MemDefsUses(const MachineFrameInfo *MFI);
+
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI);
+
+ /// Update Defs and Uses. Return true if there exist dependences that
+ /// disqualify the delay slot candidate between V and values in Uses and
+ /// Defs.
+ bool updateDefsUses(const Value *V, bool MayStore);
+
+ /// Get the list of underlying objects of MI's memory operand.
+ bool getUnderlyingObjects(const MachineInstr &MI,
+ SmallVectorImpl<const Value *> &Objects) const;
+
+ const MachineFrameInfo *MFI;
+ SmallPtrSet<const Value*, 4> Uses, Defs;
+
+ /// Flags indicating whether loads or stores with no underlying objects have
+ /// been seen.
+ bool SeenNoObjLoad, SeenNoObjStore;
+ };
+
class Filler : public MachineFunctionPass {
public:
Filler(TargetMachine &tm)
@@ -71,9 +184,6 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &F) {
- if (SkipDelaySlotFiller)
- return false;
-
bool Changed = false;
for (MachineFunction::iterator FI = F.begin(), FE = F.end();
FI != FE; ++FI)
@@ -81,19 +191,54 @@ namespace {
return Changed;
}
- private:
- typedef MachineBasicBlock::iterator Iter;
- typedef MachineBasicBlock::reverse_iterator ReverseIter;
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ private:
bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
/// This function checks if it is valid to move Candidate to the delay slot
- /// and returns true if it isn't. It also updates load and store flags and
- /// register defs and uses.
- bool delayHasHazard(const MachineInstr &Candidate, bool &SawLoad,
- bool &SawStore, RegDefsUses &RegDU) const;
-
- bool findDelayInstr(MachineBasicBlock &MBB, Iter slot, Iter &Filler) const;
+ /// and returns true if it isn't. It also updates memory and register
+ /// dependence information.
+ bool delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU,
+ InspectMemInstr &IM) const;
+
+ /// This function searches range [Begin, End) for an instruction that can be
+ /// moved to the delay slot. Returns true on success.
+ template<typename IterTy>
+ bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
+ RegDefsUses &RegDU, InspectMemInstr &IM,
+ IterTy &Filler) const;
+
+ /// This function searches in the backward direction for an instruction that
+ /// can be moved to the delay slot. Returns true on success.
+ bool searchBackward(MachineBasicBlock &MBB, Iter Slot) const;
+
+ /// This function searches MBB in the forward direction for an instruction
+ /// that can be moved to the delay slot. Returns true on success.
+ bool searchForward(MachineBasicBlock &MBB, Iter Slot) const;
+
+ /// This function searches MBB's successor blocks for an instruction that
+ /// can be moved to the delay slot and inserts clones of the instruction
+ /// into the successor blocks.
+ bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const;
+
+ /// Pick a successor block of MBB. Return NULL if MBB doesn't have a
+ /// successor block that is not a landing pad.
+ MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const;
+
+ /// This function analyzes MBB and returns an instruction with an unoccupied
+ /// slot that branches to Dst.
+ std::pair<MipsInstrInfo::BranchType, MachineInstr *>
+ getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const;
+
+ /// Examine Pred and see if it is possible to insert an instruction into
+ /// one of its branches delay slot or its end.
+ bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
+ RegDefsUses &RegDU, bool &HasMultipleSuccs,
+ BB2BrMap &BrMap) const;
bool terminateSearch(const MachineInstr &Candidate) const;
@@ -105,6 +250,45 @@ namespace {
char Filler::ID = 0;
} // end of anonymous namespace
+static bool hasUnoccupiedSlot(const MachineInstr *MI) {
+ return MI->hasDelaySlot() && !MI->isBundledWithSucc();
+}
+
+/// This function inserts clones of Filler into predecessor blocks.
+static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) {
+ MachineFunction *MF = Filler->getParent()->getParent();
+
+ for (BB2BrMap::const_iterator I = BrMap.begin(); I != BrMap.end(); ++I) {
+ if (I->second) {
+ MIBundleBuilder(I->second).append(MF->CloneMachineInstr(&*Filler));
+ ++UsefulSlots;
+ } else {
+ I->first->insert(I->first->end(), MF->CloneMachineInstr(&*Filler));
+ }
+ }
+}
+
+/// This function adds registers Filler defines to MBB's live-in register list.
+static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
+ for (unsigned I = 0, E = Filler->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = Filler->getOperand(I);
+ unsigned R;
+
+ if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg()))
+ continue;
+
+#ifndef NDEBUG
+ const MachineFunction &MF = *MBB.getParent();
+ assert(MF.getTarget().getRegisterInfo()->getAllocatableSet(MF).test(R) &&
+ "Shouldn't move an instruction with unallocatable registers across "
+ "basic block boundaries.");
+#endif
+
+ if (!MBB.isLiveIn(R))
+ MBB.addLiveIn(R);
+ }
+}
+
RegDefsUses::RegDefsUses(TargetMachine &TM)
: TRI(*TM.getRegisterInfo()), Defs(TRI.getNumRegs(), false),
Uses(TRI.getNumRegs(), false) {}
@@ -126,6 +310,45 @@ void RegDefsUses::init(const MachineInstr &MI) {
}
}
+void RegDefsUses::setCallerSaved(const MachineInstr &MI) {
+ assert(MI.isCall());
+
+ // If MI is a call, add all caller-saved registers to Defs.
+ BitVector CallerSavedRegs(TRI.getNumRegs(), true);
+
+ CallerSavedRegs.reset(Mips::ZERO);
+ CallerSavedRegs.reset(Mips::ZERO_64);
+
+ for (const MCPhysReg *R = TRI.getCalleeSavedRegs(); *R; ++R)
+ for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI)
+ CallerSavedRegs.reset(*AI);
+
+ Defs |= CallerSavedRegs;
+}
+
+void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) {
+ BitVector AllocSet = TRI.getAllocatableSet(MF);
+
+ for (int R = AllocSet.find_first(); R != -1; R = AllocSet.find_next(R))
+ for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
+ AllocSet.set(*AI);
+
+ AllocSet.set(Mips::ZERO);
+ AllocSet.set(Mips::ZERO_64);
+
+ Defs |= AllocSet.flip();
+}
+
+void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
+ const MachineBasicBlock &SuccBB) {
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ if (*SI != &SuccBB)
+ for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(),
+ LE = (*SI)->livein_end(); LI != LE; ++LI)
+ Uses.set(*LI);
+}
+
bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs());
bool HasHazard = false;
@@ -164,28 +387,134 @@ bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const {
return false;
}
+bool InspectMemInstr::hasHazard(const MachineInstr &MI) {
+ if (!MI.mayStore() && !MI.mayLoad())
+ return false;
+
+ if (ForbidMemInstr)
+ return true;
+
+ OrigSeenLoad = SeenLoad;
+ OrigSeenStore = SeenStore;
+ SeenLoad |= MI.mayLoad();
+ SeenStore |= MI.mayStore();
+
+ // If MI is an ordered or volatile memory reference, disallow moving
+ // subsequent loads and stores to delay slot.
+ if (MI.hasOrderedMemoryRef() && (OrigSeenLoad || OrigSeenStore)) {
+ ForbidMemInstr = true;
+ return true;
+ }
+
+ return hasHazard_(MI);
+}
+
+bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
+ if (MI.mayStore())
+ return true;
+
+ if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue())
+ return true;
+
+ const Value *V = (*MI.memoperands_begin())->getValue();
+
+ if (isa<FixedStackPseudoSourceValue>(V))
+ return false;
+
+ if (const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V))
+ return !PSV->PseudoSourceValue::isConstant(0) &&
+ (V != PseudoSourceValue::getStack());
+
+ return true;
+}
+
+MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_)
+ : InspectMemInstr(false), MFI(MFI_), SeenNoObjLoad(false),
+ SeenNoObjStore(false) {}
+
+bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
+ bool HasHazard = false;
+ SmallVector<const Value *, 4> Objs;
+
+ // Check underlying object list.
+ if (getUnderlyingObjects(MI, Objs)) {
+ for (SmallVector<const Value *, 4>::const_iterator I = Objs.begin();
+ I != Objs.end(); ++I)
+ HasHazard |= updateDefsUses(*I, MI.mayStore());
+
+ return HasHazard;
+ }
+
+ // No underlying objects found.
+ HasHazard = MI.mayStore() && (OrigSeenLoad || OrigSeenStore);
+ HasHazard |= MI.mayLoad() || OrigSeenStore;
+
+ SeenNoObjLoad |= MI.mayLoad();
+ SeenNoObjStore |= MI.mayStore();
+
+ return HasHazard;
+}
+
+bool MemDefsUses::updateDefsUses(const Value *V, bool MayStore) {
+ if (MayStore)
+ return !Defs.insert(V) || Uses.count(V) || SeenNoObjStore || SeenNoObjLoad;
+
+ Uses.insert(V);
+ return Defs.count(V) || SeenNoObjStore;
+}
+
+bool MemDefsUses::
+getUnderlyingObjects(const MachineInstr &MI,
+ SmallVectorImpl<const Value *> &Objects) const {
+ if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue())
+ return false;
+
+ const Value *V = (*MI.memoperands_begin())->getValue();
+
+ SmallVector<Value *, 4> Objs;
+ GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+
+ for (SmallVector<Value*, 4>::iterator I = Objs.begin(), E = Objs.end();
+ I != E; ++I) {
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(*I)) {
+ if (PSV->isAliased(MFI))
+ return false;
+ } else if (!isIdentifiedObject(V))
+ return false;
+
+ Objects.push_back(*I);
+ }
+
+ return true;
+}
+
/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
/// We assume there is only one delay slot per delayed instruction.
bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
- if (!I->hasDelaySlot())
+ if (!hasUnoccupiedSlot(&*I))
continue;
++FilledSlots;
Changed = true;
- Iter D;
// Delay slot filling is disabled at -O0.
- if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) &&
- findDelayInstr(MBB, I, D)) {
- MBB.splice(llvm::next(I), &MBB, D);
- ++UsefulSlots;
- } else
- BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
+ if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None)) {
+ if (searchBackward(MBB, I))
+ continue;
+
+ if (I->isTerminator()) {
+ if (searchSuccBBs(MBB, I))
+ continue;
+ } else if (searchForward(MBB, I)) {
+ continue;
+ }
+ }
- // Bundle the delay slot filler to the instruction with the delay slot.
+ // Bundle the NOP to the instruction with the delay slot.
+ BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
MIBundleBuilder(MBB, I, llvm::next(llvm::next(I)));
}
@@ -198,16 +527,11 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
return new Filler(tm);
}
-bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot,
- Iter &Filler) const {
- RegDefsUses RegDU(TM);
-
- RegDU.init(*Slot);
-
- bool SawLoad = false;
- bool SawStore = false;
-
- for (ReverseIter I(Slot); I != MBB.rend(); ++I) {
+template<typename IterTy>
+bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
+ RegDefsUses &RegDU, InspectMemInstr& IM,
+ IterTy &Filler) const {
+ for (IterTy I = Begin; I != End; ++I) {
// skip debug value
if (I->isDebugValue())
continue;
@@ -215,33 +539,176 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot,
if (terminateSearch(*I))
break;
- if (delayHasHazard(*I, SawLoad, SawStore, RegDU))
+ assert((!I->isCall() && !I->isReturn() && !I->isBranch()) &&
+ "Cannot put calls, returns or branches in delay slot.");
+
+ if (delayHasHazard(*I, RegDU, IM))
continue;
- Filler = llvm::next(I).base();
+ Filler = I;
return true;
}
return false;
}
-bool Filler::delayHasHazard(const MachineInstr &Candidate, bool &SawLoad,
- bool &SawStore, RegDefsUses &RegDU) const {
- bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill());
+bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
+ if (DisableBackwardSearch)
+ return false;
+
+ RegDefsUses RegDU(TM);
+ MemDefsUses MemDU(MBB.getParent()->getFrameInfo());
+ ReverseIter Filler;
+
+ RegDU.init(*Slot);
- // Loads or stores cannot be moved past a store to the delay slot
- // and stores cannot be moved past a load.
- if (Candidate.mayStore() || Candidate.hasOrderedMemoryRef()) {
- HasHazard |= SawStore | SawLoad;
- SawStore = true;
- } else if (Candidate.mayLoad()) {
- HasHazard |= SawStore;
- SawLoad = true;
+ if (searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler)) {
+ MBB.splice(llvm::next(Slot), &MBB, llvm::next(Filler).base());
+ MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
+ ++UsefulSlots;
+ return true;
}
- assert((!Candidate.isCall() && !Candidate.isReturn()) &&
- "Cannot put calls or returns in delay slot.");
+ return false;
+}
+
+bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
+ // Can handle only calls.
+ if (DisableForwardSearch || !Slot->isCall())
+ return false;
+
+ RegDefsUses RegDU(TM);
+ NoMemInstr NM;
+ Iter Filler;
+
+ RegDU.setCallerSaved(*Slot);
+
+ if (searchRange(MBB, llvm::next(Slot), MBB.end(), RegDU, NM, Filler)) {
+ MBB.splice(llvm::next(Slot), &MBB, Filler);
+ MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
+ ++UsefulSlots;
+ return true;
+ }
+
+ return false;
+}
+
+bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
+ if (DisableSuccBBSearch)
+ return false;
+
+ MachineBasicBlock *SuccBB = selectSuccBB(MBB);
+
+ if (!SuccBB)
+ return false;
+
+ RegDefsUses RegDU(TM);
+ bool HasMultipleSuccs = false;
+ BB2BrMap BrMap;
+ OwningPtr<InspectMemInstr> IM;
+ Iter Filler;
+
+ // Iterate over SuccBB's predecessor list.
+ for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(),
+ PE = SuccBB->pred_end(); PI != PE; ++PI)
+ if (!examinePred(**PI, *SuccBB, RegDU, HasMultipleSuccs, BrMap))
+ return false;
+
+ // Do not allow moving instructions which have unallocatable register operands
+ // across basic block boundaries.
+ RegDU.setUnallocatableRegs(*MBB.getParent());
+
+ // Only allow moving loads from stack or constants if any of the SuccBB's
+ // predecessors have multiple successors.
+ if (HasMultipleSuccs) {
+ IM.reset(new LoadFromStackOrConst());
+ } else {
+ const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo();
+ IM.reset(new MemDefsUses(MFI));
+ }
+
+ if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Filler))
+ return false;
+
+ insertDelayFiller(Filler, BrMap);
+ addLiveInRegs(Filler, *SuccBB);
+ Filler->eraseFromParent();
+
+ return true;
+}
+
+MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const {
+ if (B.succ_empty())
+ return NULL;
+
+ // Select the successor with the larget edge weight.
+ CmpWeight Cmp(B, getAnalysis<MachineBranchProbabilityInfo>());
+ MachineBasicBlock *S = *std::max_element(B.succ_begin(), B.succ_end(), Cmp);
+ return S->isLandingPad() ? NULL : S;
+}
+
+std::pair<MipsInstrInfo::BranchType, MachineInstr *>
+Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const {
+ const MipsInstrInfo *TII =
+ static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ MachineBasicBlock *TrueBB = 0, *FalseBB = 0;
+ SmallVector<MachineInstr*, 2> BranchInstrs;
+ SmallVector<MachineOperand, 2> Cond;
+
+ MipsInstrInfo::BranchType R =
+ TII->AnalyzeBranch(MBB, TrueBB, FalseBB, Cond, false, BranchInstrs);
+
+ if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch))
+ return std::make_pair(R, (MachineInstr*)NULL);
+
+ if (R != MipsInstrInfo::BT_CondUncond) {
+ if (!hasUnoccupiedSlot(BranchInstrs[0]))
+ return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL);
+
+ assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst)));
+
+ return std::make_pair(R, BranchInstrs[0]);
+ }
+
+ assert((TrueBB == &Dst) || (FalseBB == &Dst));
+
+ // Examine the conditional branch. See if its slot is occupied.
+ if (hasUnoccupiedSlot(BranchInstrs[0]))
+ return std::make_pair(MipsInstrInfo::BT_Cond, BranchInstrs[0]);
+
+ // If that fails, try the unconditional branch.
+ if (hasUnoccupiedSlot(BranchInstrs[1]) && (FalseBB == &Dst))
+ return std::make_pair(MipsInstrInfo::BT_Uncond, BranchInstrs[1]);
+
+ return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL);
+}
+
+bool Filler::examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
+ RegDefsUses &RegDU, bool &HasMultipleSuccs,
+ BB2BrMap &BrMap) const {
+ std::pair<MipsInstrInfo::BranchType, MachineInstr *> P =
+ getBranch(Pred, Succ);
+
+ // Return if either getBranch wasn't able to analyze the branches or there
+ // were no branches with unoccupied slots.
+ if (P.first == MipsInstrInfo::BT_None)
+ return false;
+
+ if ((P.first != MipsInstrInfo::BT_Uncond) &&
+ (P.first != MipsInstrInfo::BT_NoBranch)) {
+ HasMultipleSuccs = true;
+ RegDU.addLiveOut(Pred, Succ);
+ }
+
+ BrMap[&Pred] = P.second;
+ return true;
+}
+
+bool Filler::delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU,
+ InspectMemInstr &IM) const {
+ bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill());
+ HasHazard |= IM.hasHazard(Candidate);
HasHazard |= RegDU.update(Candidate, 0, Candidate.getNumOperands());
return HasHazard;
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index df52d92..14268d2 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -39,7 +39,7 @@ protected:
uint64_t estimateStackSize(const MachineFunction &MF) const;
};
-/// Create MipsInstrInfo objects.
+/// Create MipsFrameLowering objects.
const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST);
const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST);
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 78c74ef..77b08cb 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -12,19 +12,19 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-isel"
+#include "MipsISelDAGToDAG.h"
+#include "Mips16ISelDAGToDAG.h"
+#include "MipsSEISelDAGToDAG.h"
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsAnalyzeImmediate.h"
#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
-#include "MipsSubtarget.h"
-#include "MipsTargetMachine.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
@@ -45,270 +45,11 @@ using namespace llvm;
// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
// instructions for SelectionDAG operations.
//===----------------------------------------------------------------------===//
-namespace {
-
-class MipsDAGToDAGISel : public SelectionDAGISel {
-
- /// TM - Keep a reference to MipsTargetMachine.
- MipsTargetMachine &TM;
-
- /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
- /// make the right decision when generating code for different targets.
- const MipsSubtarget &Subtarget;
-
-public:
- explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
- SelectionDAGISel(tm),
- TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
-
- // Pass Name
- virtual const char *getPassName() const {
- return "MIPS DAG->DAG Pattern Instruction Selection";
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
-private:
- // Include the pieces autogenerated from the target description.
- #include "MipsGenDAGISel.inc"
-
- /// getTargetMachine - Return a reference to the TargetMachine, casted
- /// to the target-specific type.
- const MipsTargetMachine &getTargetMachine() {
- return static_cast<const MipsTargetMachine &>(TM);
- }
-
- /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
- /// to the target-specific type.
- const MipsInstrInfo *getInstrInfo() {
- return getTargetMachine().getInstrInfo();
- }
-
- SDNode *getGlobalBaseReg();
-
- SDValue getMips16SPAliasReg();
-
- void getMips16SPRefReg(SDNode *parent, SDValue &AliasReg);
-
- std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
- EVT Ty, bool HasLo, bool HasHi);
-
- SDNode *Select(SDNode *N);
-
- // Complex Pattern.
- /// (reg + imm).
- bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const;
-
- /// Fall back on this function if all else fails.
- bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const;
-
- /// Match integer address pattern.
- bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const;
-
- bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset,
- SDValue &Alias);
-
- // getImm - Return a target constant with the specified value.
- inline SDValue getImm(const SDNode *Node, unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
- }
-
- void ProcessFunctionAfterISel(MachineFunction &MF);
- bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
- void InitGlobalBaseReg(MachineFunction &MF);
- void InitMips16SPAliasReg(MachineFunction &MF);
-
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps);
-};
-
-}
-
-// Insert instructions to initialize the global base register in the
-// first MBB of the function. When the ABI is O32 and the relocation model is
-// PIC, the necessary instructions are emitted later to prevent optimization
-// passes from moving them.
-void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
- if (!MipsFI->globalBaseRegSet())
- return;
-
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator I = MBB.begin();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
- const TargetRegisterClass *RC;
-
- if (Subtarget.isABI_N64())
- RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
- else if (Subtarget.inMips16Mode())
- RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
- else
- RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
-
- V0 = RegInfo.createVirtualRegister(RC);
- V1 = RegInfo.createVirtualRegister(RC);
- V2 = RegInfo.createVirtualRegister(RC);
-
- if (Subtarget.isABI_N64()) {
- MF.getRegInfo().addLiveIn(Mips::T9_64);
- MBB.addLiveIn(Mips::T9_64);
-
- // lui $v0, %hi(%neg(%gp_rel(fname)))
- // daddu $v1, $v0, $t9
- // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
- const GlobalValue *FName = MF.getFunction();
- BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
- .addReg(Mips::T9_64);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- return;
- }
-
- if (Subtarget.inMips16Mode()) {
- BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
- BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
- BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
- .addReg(V1).addReg(V2);
- return;
- }
-
- if (MF.getTarget().getRelocationModel() == Reloc::Static) {
- // Set global register to __gnu_local_gp.
- //
- // lui $v0, %hi(__gnu_local_gp)
- // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
- BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
- .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
- .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
- return;
- }
-
- MF.getRegInfo().addLiveIn(Mips::T9);
- MBB.addLiveIn(Mips::T9);
-
- if (Subtarget.isABI_N32()) {
- // lui $v0, %hi(%neg(%gp_rel(fname)))
- // addu $v1, $v0, $t9
- // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
- const GlobalValue *FName = MF.getFunction();
- BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- return;
- }
-
- assert(Subtarget.isABI_O32());
-
- // For O32 ABI, the following instruction sequence is emitted to initialize
- // the global base register:
- //
- // 0. lui $2, %hi(_gp_disp)
- // 1. addiu $2, $2, %lo(_gp_disp)
- // 2. addu $globalbasereg, $2, $t9
- //
- // We emit only the last instruction here.
- //
- // GNU linker requires that the first two instructions appear at the beginning
- // of a function and no instructions be inserted before or between them.
- // The two instructions are emitted during lowering to MC layer in order to
- // avoid any reordering.
- //
- // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
- // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
- // reads it.
- MF.getRegInfo().addLiveIn(Mips::V0);
- MBB.addLiveIn(Mips::V0);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
- .addReg(Mips::V0).addReg(Mips::T9);
-}
-
-// Insert instructions to initialize the Mips16 SP Alias register in the
-// first MBB of the function.
-//
-void MipsDAGToDAGISel::InitMips16SPAliasReg(MachineFunction &MF) {
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
- if (!MipsFI->mips16SPAliasRegSet())
- return;
-
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator I = MBB.begin();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
-
- BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg)
- .addReg(Mips::SP);
-}
-
-
-bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
- const MachineInstr& MI) {
- unsigned DstReg = 0, ZeroReg = 0;
-
- // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
- if ((MI.getOpcode() == Mips::ADDiu) &&
- (MI.getOperand(1).getReg() == Mips::ZERO) &&
- (MI.getOperand(2).getImm() == 0)) {
- DstReg = MI.getOperand(0).getReg();
- ZeroReg = Mips::ZERO;
- } else if ((MI.getOpcode() == Mips::DADDiu) &&
- (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
- (MI.getOperand(2).getImm() == 0)) {
- DstReg = MI.getOperand(0).getReg();
- ZeroReg = Mips::ZERO_64;
- }
-
- if (!DstReg)
- return false;
-
- // Replace uses with ZeroReg.
- for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
- E = MRI->use_end(); U != E;) {
- MachineOperand &MO = U.getOperand();
- unsigned OpNo = U.getOperandNo();
- MachineInstr *MI = MO.getParent();
- ++U;
-
- // Do not replace if it is a phi's operand or is tied to def operand.
- if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
- continue;
-
- MO.setReg(ZeroReg);
- }
-
- return true;
-}
-
-void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) {
- InitGlobalBaseReg(MF);
- InitMips16SPAliasReg(MF);
-
- MachineRegisterInfo *MRI = &MF.getRegInfo();
-
- for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
- ++MFI)
- for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
- ReplaceUsesWithZeroReg(MRI, *I);
-}
bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
- ProcessFunctionAfterISel(MF);
+ processFunctionAfterISel(MF);
return Ret;
}
@@ -320,233 +61,36 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
-/// getMips16SPAliasReg - Output the instructions required to put the
-/// SP into a Mips16 accessible aliased register.
-SDValue MipsDAGToDAGISel::getMips16SPAliasReg() {
- unsigned Mips16SPAliasReg =
- MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
- return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy());
-}
-
/// ComplexPattern used on MipsInstrInfo
/// Used on Mips Load/Store instructions
bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
- EVT ValTy = Addr.getValueType();
-
- // if Address is FI, get the TargetFrameIndex.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- Offset = CurDAG->getTargetConstant(0, ValTy);
- return true;
- }
-
- // on PIC code Load GA
- if (Addr.getOpcode() == MipsISD::Wrapper) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
- }
-
- if (TM.getRelocationModel() != Reloc::PIC_) {
- if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress))
- return false;
- }
-
- // Addresses of the form FI+const or FI|const
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<16>(CN->getSExtValue())) {
-
- // If the first operand is a FI, get the TargetFI Node
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
- (Addr.getOperand(0)))
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- else
- Base = Addr.getOperand(0);
-
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
- return true;
- }
- }
-
- // Operand is a result from an ADD.
- if (Addr.getOpcode() == ISD::ADD) {
- // When loading from constant pools, load the lower address part in
- // the instruction itself. Example, instead of:
- // lui $2, %hi($CPI1_0)
- // addiu $2, $2, %lo($CPI1_0)
- // lwc1 $f0, 0($2)
- // Generate:
- // lui $2, %hi($CPI1_0)
- // lwc1 $f0, %lo($CPI1_0)($2)
- if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
- Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
- SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
- if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
- isa<JumpTableSDNode>(Opnd0)) {
- Base = Addr.getOperand(0);
- Offset = Opnd0;
- return true;
- }
- }
- }
-
+ llvm_unreachable("Unimplemented function.");
return false;
}
bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
- return true;
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
- return selectAddrRegImm(Addr, Base, Offset) ||
- selectAddrDefault(Addr, Base, Offset);
-}
-
-void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
- SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
- if (Parent) {
- switch (Parent->getOpcode()) {
- case ISD::LOAD: {
- LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent);
- switch (SD->getMemoryVT().getSizeInBits()) {
- case 8:
- case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
- return;
- }
- break;
- }
- case ISD::STORE: {
- StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent);
- switch (SD->getMemoryVT().getSizeInBits()) {
- case 8:
- case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
- return;
- }
- break;
- }
- }
- }
- AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy());
- return;
-
-}
-bool MipsDAGToDAGISel::SelectAddr16(
- SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
- SDValue &Alias) {
- EVT ValTy = Addr.getValueType();
-
- Alias = CurDAG->getTargetConstant(0, ValTy);
-
- // if Address is FI, get the TargetFrameIndex.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- Offset = CurDAG->getTargetConstant(0, ValTy);
- getMips16SPRefReg(Parent, Alias);
- return true;
- }
- // on PIC code Load GA
- if (Addr.getOpcode() == MipsISD::Wrapper) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
- }
- if (TM.getRelocationModel() != Reloc::PIC_) {
- if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress))
- return false;
- }
- // Addresses of the form FI+const or FI|const
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<16>(CN->getSExtValue())) {
-
- // If the first operand is a FI, get the TargetFI Node
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
- (Addr.getOperand(0))) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- getMips16SPRefReg(Parent, Alias);
- }
- else
- Base = Addr.getOperand(0);
-
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
- return true;
- }
- }
- // Operand is a result from an ADD.
- if (Addr.getOpcode() == ISD::ADD) {
- // When loading from constant pools, load the lower address part in
- // the instruction itself. Example, instead of:
- // lui $2, %hi($CPI1_0)
- // addiu $2, $2, %lo($CPI1_0)
- // lwc1 $f0, 0($2)
- // Generate:
- // lui $2, %hi($CPI1_0)
- // lwc1 $f0, %lo($CPI1_0)($2)
- if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
- Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
- SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
- if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
- isa<JumpTableSDNode>(Opnd0)) {
- Base = Addr.getOperand(0);
- Offset = Opnd0;
- return true;
- }
- }
-
- // If an indexed floating point load/store can be emitted, return false.
- const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
-
- if (LS &&
- (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
- Subtarget.hasFPIdx())
- return false;
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, ValTy);
- return true;
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
-/// Select multiply instructions.
-std::pair<SDNode*, SDNode*>
-MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty,
- bool HasLo, bool HasHi) {
- SDNode *Lo = 0, *Hi = 0;
- SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0),
- N->getOperand(1));
- SDValue InFlag = SDValue(Mul, 0);
-
- if (HasLo) {
- unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mflo16 :
- (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
- Lo = CurDAG->getMachineNode(Opcode, dl, Ty, MVT::Glue, InFlag);
- InFlag = SDValue(Lo, 1);
- }
- if (HasHi) {
- unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mfhi16 :
- (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
- Hi = CurDAG->getMachineNode(Opcode, dl, Ty, InFlag);
- }
- return std::make_pair(Lo, Hi);
+bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias) {
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
-
/// Select instructions not customized! Used for
/// expanded, promoted and normal instructions
SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
- DebugLoc dl = Node->getDebugLoc();
// Dump information about the Node being selected
DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
@@ -557,167 +101,19 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
return NULL;
}
- ///
- // Instruction Selection not handled by the auto-generated
- // tablegen selection should be handled here.
- ///
- EVT NodeTy = Node->getValueType(0);
- unsigned MultOpc;
+ // See if subclasses can handle this node.
+ std::pair<bool, SDNode*> Ret = selectNode(Node);
+
+ if (Ret.first)
+ return Ret.second;
switch(Opcode) {
default: break;
- case ISD::SUBE:
- case ISD::ADDE: {
- bool inMips16Mode = Subtarget.inMips16Mode();
- SDValue InFlag = Node->getOperand(2), CmpLHS;
- unsigned Opc = InFlag.getOpcode(); (void)Opc;
- assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
- (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
- "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
-
- unsigned MOp;
- if (Opcode == ISD::ADDE) {
- CmpLHS = InFlag.getValue(0);
- if (inMips16Mode)
- MOp = Mips::AdduRxRyRz16;
- else
- MOp = Mips::ADDu;
- } else {
- CmpLHS = InFlag.getOperand(0);
- if (inMips16Mode)
- MOp = Mips::SubuRxRyRz16;
- else
- MOp = Mips::SUBu;
- }
-
- SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
-
- SDValue LHS = Node->getOperand(0);
- SDValue RHS = Node->getOperand(1);
-
- EVT VT = LHS.getValueType();
-
- unsigned Sltu_op = inMips16Mode? Mips::SltuRxRyRz16: Mips::SLTu;
- SDNode *Carry = CurDAG->getMachineNode(Sltu_op, dl, VT, Ops, 2);
- unsigned Addu_op = inMips16Mode? Mips::AdduRxRyRz16 : Mips::ADDu;
- SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, dl, VT,
- SDValue(Carry,0), RHS);
-
- return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
- LHS, SDValue(AddCarry,0));
- }
-
- /// Mul with two results
- case ISD::SMUL_LOHI:
- case ISD::UMUL_LOHI: {
- if (NodeTy == MVT::i32) {
- if (Subtarget.inMips16Mode())
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 :
- Mips::MultRxRy16);
- else
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
- }
- else
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
-
- std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy,
- true, true);
-
- if (!SDValue(Node, 0).use_empty())
- ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
-
- if (!SDValue(Node, 1).use_empty())
- ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
-
- return NULL;
- }
-
- /// Special Muls
- case ISD::MUL: {
- // Mips32 has a 32-bit three operand mul instruction.
- if (Subtarget.hasMips32() && NodeTy == MVT::i32)
- break;
- return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT,
- dl, NodeTy, true, false).first;
- }
- case ISD::MULHS:
- case ISD::MULHU: {
- if (NodeTy == MVT::i32) {
- if (Subtarget.inMips16Mode())
- MultOpc = (Opcode == ISD::MULHU ?
- Mips::MultuRxRy16 : Mips::MultRxRy16);
- else
- MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
- }
- else
- MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
-
- return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second;
- }
-
// Get target GOT address.
case ISD::GLOBAL_OFFSET_TABLE:
return getGlobalBaseReg();
- case ISD::ConstantFP: {
- ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
- if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
- if (Subtarget.hasMips64()) {
- SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- Mips::ZERO_64, MVT::i64);
- return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero);
- }
-
- SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- Mips::ZERO, MVT::i32);
- return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero,
- Zero);
- }
- break;
- }
-
- case ISD::Constant: {
- const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
- unsigned Size = CN->getValueSizeInBits(0);
-
- if (Size == 32)
- break;
-
- MipsAnalyzeImmediate AnalyzeImm;
- int64_t Imm = CN->getSExtValue();
-
- const MipsAnalyzeImmediate::InstSeq &Seq =
- AnalyzeImm.Analyze(Imm, Size, false);
-
- MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
- DebugLoc DL = CN->getDebugLoc();
- SDNode *RegOpnd;
- SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
- MVT::i64);
-
- // The first instruction can be a LUi which is different from other
- // instructions (ADDiu, ORI and SLL) in that it does not have a register
- // operand.
- if (Inst->Opc == Mips::LUi64)
- RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
- else
- RegOpnd =
- CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
- CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
- ImmOpnd);
-
- // The remaining instructions in the sequence are handled here.
- for (++Inst; Inst != Seq.end(); ++Inst) {
- ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
- MVT::i64);
- RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
- SDValue(RegOpnd, 0), ImmOpnd);
- }
-
- return RegOpnd;
- }
-
#ifndef NDEBUG
case ISD::LOAD:
case ISD::STORE:
@@ -726,31 +122,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
"Unexpected unaligned loads/stores.");
break;
#endif
-
- case MipsISD::ThreadPointer: {
- EVT PtrVT = TLI.getPointerTy();
- unsigned RdhwrOpc, SrcReg, DestReg;
-
- if (PtrVT == MVT::i32) {
- RdhwrOpc = Mips::RDHWR;
- SrcReg = Mips::HWR29;
- DestReg = Mips::V1;
- } else {
- RdhwrOpc = Mips::RDHWR64;
- SrcReg = Mips::HWR29_64;
- DestReg = Mips::V1_64;
- }
-
- SDNode *Rdhwr =
- CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
- Node->getValueType(0),
- CurDAG->getRegister(SrcReg, PtrVT));
- SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
- SDValue(Rdhwr, 0));
- SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT);
- ReplaceUses(SDValue(Node, 0), ResNode);
- return ResNode.getNode();
- }
}
// Select the default instruction
@@ -776,5 +147,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
/// createMipsISelDag - This pass converts a legalized DAG into a
/// MIPS-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
- return new MipsDAGToDAGISel(TM);
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16ISelDag(TM);
+
+ return llvm::createMipsSEISelDag(TM);
}
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
new file mode 100644
index 0000000..cf0f9c5
--- /dev/null
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -0,0 +1,93 @@
+//===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSISELDAGTODAG_H
+#define MIPSISELDAGTODAG_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace llvm {
+
+class MipsDAGToDAGISel : public SelectionDAGISel {
+public:
+ explicit MipsDAGToDAGISel(MipsTargetMachine &TM)
+ : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+protected:
+ SDNode *getGlobalBaseReg();
+
+ /// Keep a pointer to the MipsSubtarget around so that we can make the right
+ /// decision when generating code for different targets.
+ const MipsSubtarget &Subtarget;
+
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MipsGenDAGISel.inc"
+
+ // Complex Pattern.
+ /// (reg + imm).
+ virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// Fall back on this function if all else fails.
+ virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// Match integer address pattern.
+ virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias);
+
+ virtual SDNode *Select(SDNode *N);
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0;
+
+ // getImm - Return a target constant with the specified value.
+ inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
+ }
+
+ virtual void processFunctionAfterISel(MachineFunction &MF) = 0;
+
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+};
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 36e1a15..4bf43f4 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-lower"
-#include <set>
#include "MipsISelLowering.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
@@ -30,7 +29,6 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
@@ -43,26 +41,9 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
-EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
- cl::desc("MIPS: Enable tail calls."), cl::init(false));
-
-static cl::opt<bool>
LargeGOT("mxgot", cl::Hidden,
cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false));
-static cl::opt<bool>
-Mips16HardFloat("mips16-hard-float", cl::NotHidden,
- cl::desc("MIPS: mips16 hard float enable."),
- cl::init(false));
-
-static cl::opt<bool> DontExpandCondPseudos16(
- "mips16-dont-expand-cond-pseudo",
- cl::init(false),
- cl::desc("Dont expand conditional move related "
- "pseudos for Mips 16"),
- cl::Hidden);
-
-
static const uint16_t O32IntRegs[4] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
@@ -80,7 +61,7 @@ static const uint16_t Mips64DPRegs[8] = {
// If I is a shifted mask, set the size (Size) and the first bit of the
// mask (Pos), and return true.
// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
-static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
+static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
if (!isShiftedMask_64(I))
return false;
@@ -89,7 +70,7 @@ static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
return true;
}
-static SDValue GetGlobalReg(SelectionDAG &DAG, EVT Ty) {
+SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
}
@@ -124,11 +105,12 @@ static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
}
-static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) {
+SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG,
+ bool HasMips64) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
- SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, GetGlobalReg(DAG, Ty),
+ SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
getTargetNode(Op, DAG, GOTFlag));
SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
MachinePointerInfo::getGOT(), false, false, false,
@@ -138,21 +120,23 @@ static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) {
return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo);
}
-static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
+SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG,
+ unsigned Flag) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
- SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, GetGlobalReg(DAG, Ty),
+ SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
getTargetNode(Op, DAG, Flag));
return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Tgt,
MachinePointerInfo::getGOT(), false, false, false, 0);
}
-static SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
- unsigned HiFlag, unsigned LoFlag) {
+SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+ unsigned HiFlag,
+ unsigned LoFlag) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag));
- Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, GetGlobalReg(DAG, Ty));
+ Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));
SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
getTargetNode(Op, DAG, LoFlag));
return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Wrapper,
@@ -204,7 +188,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::MTHLIP: return "MipsISD::MTHLIP";
case MipsISD::MULT: return "MipsISD::MULT";
case MipsISD::MULTU: return "MipsISD::MULTU";
- case MipsISD::MADD_DSP: return "MipsISD::MADD_DSPDSP";
+ case MipsISD::MADD_DSP: return "MipsISD::MADD_DSP";
case MipsISD::MADDU_DSP: return "MipsISD::MADDU_DSP";
case MipsISD::MSUB_DSP: return "MipsISD::MSUB_DSP";
case MipsISD::MSUBU_DSP: return "MipsISD::MSUBU_DSP";
@@ -212,110 +196,17 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-namespace {
- struct ltstr {
- bool operator()(const char *s1, const char *s2) const
- {
- return strcmp(s1, s2) < 0;
- }
- };
-
- std::set<const char*, ltstr> noHelperNeeded;
-}
-
-void MipsTargetLowering::SetMips16LibcallName
- (RTLIB::Libcall l, const char *Name) {
- setLibcallName(l, Name);
- noHelperNeeded.insert(Name);
-}
-
-void MipsTargetLowering::setMips16HardFloatLibCalls() {
- SetMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
- SetMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
- SetMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
- SetMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
- SetMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
- SetMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
- SetMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
- SetMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
- SetMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
- SetMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
- SetMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
- SetMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
- SetMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
- SetMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
- SetMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
- SetMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
- SetMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
- SetMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
- SetMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
- SetMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
- SetMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
- SetMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
- SetMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
- SetMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
- SetMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
- SetMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
- SetMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
- SetMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
- SetMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
- SetMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
- SetMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2");
- SetMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2");
-}
-
MipsTargetLowering::
MipsTargetLowering(MipsTargetMachine &TM)
: TargetLowering(TM, new MipsTargetObjectFile()),
Subtarget(&TM.getSubtarget<MipsSubtarget>()),
HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()),
IsO32(Subtarget->isABI_O32()) {
-
// Mips does not have i1 type, so use i32 for
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
- // Set up the register classes
- addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
-
- if (HasMips64)
- addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
-
- if (Subtarget->inMips16Mode()) {
- addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
- if (Mips16HardFloat)
- setMips16HardFloatLibCalls();
- }
-
- if (Subtarget->hasDSP()) {
- MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
-
- for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
- addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
-
- // Expand all builtin opcodes.
- for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
- setOperationAction(Opc, VecTys[i], Expand);
-
- setOperationAction(ISD::LOAD, VecTys[i], Legal);
- setOperationAction(ISD::STORE, VecTys[i], Legal);
- setOperationAction(ISD::BITCAST, VecTys[i], Legal);
- }
- }
-
- if (!TM.Options.UseSoftFloat) {
- addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
-
- // When dealing with single precision only, use libcalls
- if (!Subtarget->isSingleFloat()) {
- if (HasMips64)
- addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
- else
- addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
- }
- }
-
// Load extented operations for i1 types must be promoted
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
@@ -332,6 +223,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
// Mips Custom Operations
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
@@ -348,18 +240,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
- if (Subtarget->inMips16Mode()) {
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
- }
- else {
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
- }
- if (!Subtarget->inMips16Mode()) {
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::STORE, MVT::i32, Custom);
- }
if (!TM.Options.NoNaNsFPMath) {
setOperationAction(ISD::FABS, MVT::f32, Custom);
@@ -397,8 +277,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::UREM, MVT::i64, Expand);
// Operations not directly supported by Mips.
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
@@ -470,21 +352,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
- if (Subtarget->inMips16Mode()) {
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
- }
-
setInsertFencesForAtomic(true);
if (!Subtarget->hasSEInReg()) {
@@ -521,7 +388,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setMinFunctionAlignment(HasMips64 ? 3 : 2);
setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP);
- computeRegisterProperties();
setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0);
setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1);
@@ -529,22 +395,11 @@ MipsTargetLowering(MipsTargetMachine &TM)
MaxStoresPerMemcpy = 16;
}
-bool
-MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
- MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
-
- if (Subtarget->inMips16Mode())
- return false;
+const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16TargetLowering(TM);
- switch (SVT) {
- case MVT::i64:
- case MVT::i32:
- if (Fast)
- *Fast = true;
- return true;
- default:
- return false;
- }
+ return llvm::createMipsSETargetLowering(TM);
}
EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
@@ -553,7 +408,7 @@ EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
return VT.changeVectorElementTypeToInteger();
}
-// SelectMadd -
+// selectMADD -
// Transforms a subgraph in CurDAG if the following pattern is found:
// (addc multLo, Lo0), (adde multHi, Hi0),
// where,
@@ -561,7 +416,7 @@ EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
// Lo0: initial value of Lo register
// Hi0: initial value of Hi register
// Return true if pattern matching was successful.
-static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) {
+static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
// ADDENode's second operand must be a flag output of an ADDC node in order
// for the matching to be successful.
SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
@@ -598,21 +453,21 @@ static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) {
return false;
SDValue Chain = CurDAG->getEntryNode();
- DebugLoc dl = ADDENode->getDebugLoc();
+ DebugLoc DL = ADDENode->getDebugLoc();
// create MipsMAdd(u) node
MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
- SDValue MAdd = CurDAG->getNode(MultOpc, dl, MVT::Glue,
+ SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Glue,
MultNode->getOperand(0),// Factor 0
MultNode->getOperand(1),// Factor 1
ADDCNode->getOperand(1),// Lo0
ADDENode->getOperand(1));// Hi0
// create CopyFromReg nodes
- SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+ SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, DL, Mips::LO, MVT::i32,
MAdd);
- SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+ SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), DL,
Mips::HI, MVT::i32,
CopyFromLo.getValue(2));
@@ -626,7 +481,7 @@ static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) {
return true;
}
-// SelectMsub -
+// selectMSUB -
// Transforms a subgraph in CurDAG if the following pattern is found:
// (addc Lo0, multLo), (sube Hi0, multHi),
// where,
@@ -634,7 +489,7 @@ static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) {
// Lo0: initial value of Lo register
// Hi0: initial value of Hi register
// Return true if pattern matching was successful.
-static bool SelectMsub(SDNode *SUBENode, SelectionDAG *CurDAG) {
+static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
// SUBENode's second operand must be a flag output of an SUBC node in order
// for the matching to be successful.
SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
@@ -671,21 +526,21 @@ static bool SelectMsub(SDNode *SUBENode, SelectionDAG *CurDAG) {
return false;
SDValue Chain = CurDAG->getEntryNode();
- DebugLoc dl = SUBENode->getDebugLoc();
+ DebugLoc DL = SUBENode->getDebugLoc();
// create MipsSub(u) node
MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
- SDValue MSub = CurDAG->getNode(MultOpc, dl, MVT::Glue,
+ SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue,
MultNode->getOperand(0),// Factor 0
MultNode->getOperand(1),// Factor 1
SUBCNode->getOperand(0),// Lo0
SUBENode->getOperand(0));// Hi0
// create CopyFromReg nodes
- SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+ SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, DL, Mips::LO, MVT::i32,
MSub);
- SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+ SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), DL,
Mips::HI, MVT::i32,
CopyFromLo.getValue(2));
@@ -699,33 +554,33 @@ static bool SelectMsub(SDNode *SUBENode, SelectionDAG *CurDAG) {
return true;
}
-static SDValue PerformADDECombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalize())
return SDValue();
if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
- SelectMadd(N, &DAG))
+ selectMADD(N, &DAG))
return SDValue(N, 0);
return SDValue();
}
-static SDValue PerformSUBECombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalize())
return SDValue();
if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
- SelectMsub(N, &DAG))
+ selectMSUB(N, &DAG))
return SDValue(N, 0);
return SDValue();
}
-static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
@@ -734,18 +589,18 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG,
EVT Ty = N->getValueType(0);
unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
- unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
+ unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
MipsISD::DivRemU;
- DebugLoc dl = N->getDebugLoc();
+ DebugLoc DL = N->getDebugLoc();
- SDValue DivRem = DAG.getNode(opc, dl, MVT::Glue,
+ SDValue DivRem = DAG.getNode(Opc, DL, MVT::Glue,
N->getOperand(0), N->getOperand(1));
SDValue InChain = DAG.getEntryNode();
SDValue InGlue = DivRem;
// insert MFLO
if (N->hasAnyUseOfValue(0)) {
- SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, LO, Ty,
+ SDValue CopyFromLo = DAG.getCopyFromReg(InChain, DL, LO, Ty,
InGlue);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo);
InChain = CopyFromLo.getValue(1);
@@ -754,7 +609,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG,
// insert MFHI
if (N->hasAnyUseOfValue(1)) {
- SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl,
+ SDValue CopyFromHi = DAG.getCopyFromReg(InChain, DL,
HI, Ty, InGlue);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi);
}
@@ -790,7 +645,7 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
// Returns true if condition code has to be inverted.
-static bool InvertFPCondCode(Mips::CondCode CC) {
+static bool invertFPCondCode(Mips::CondCode CC) {
if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
return false;
@@ -802,7 +657,7 @@ static bool InvertFPCondCode(Mips::CondCode CC) {
// Creates and returns an FPCmp node from a setcc node.
// Returns Op if setcc is not a floating point comparison.
-static SDValue CreateFPCmp(SelectionDAG &DAG, const SDValue &Op) {
+static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) {
// must be a SETCC node
if (Op.getOpcode() != ISD::SETCC)
return Op;
@@ -813,20 +668,20 @@ static SDValue CreateFPCmp(SelectionDAG &DAG, const SDValue &Op) {
return Op;
SDValue RHS = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
// Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of
// node if necessary.
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
- return DAG.getNode(MipsISD::FPCmp, dl, MVT::Glue, LHS, RHS,
+ return DAG.getNode(MipsISD::FPCmp, DL, MVT::Glue, LHS, RHS,
DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
}
// Creates and returns a CMovFPT/F node.
-static SDValue CreateCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
+static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
SDValue False, DebugLoc DL) {
- bool invert = InvertFPCondCode((Mips::CondCode)
+ bool invert = invertFPCondCode((Mips::CondCode)
cast<ConstantSDNode>(Cond.getOperand(2))
->getSExtValue());
@@ -834,7 +689,7 @@ static SDValue CreateCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
True.getValueType(), True, False, Cond);
}
-static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
@@ -867,7 +722,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True);
}
-static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
// Pattern match EXT.
@@ -893,7 +748,7 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
// Op's second operand must be a shifted mask.
if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
- !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize))
+ !isShiftedMask(CN->getZExtValue(), SMPos, SMSize))
return SDValue();
// Return if the shifted mask does not start at bit 0 or the sum of its size
@@ -907,7 +762,7 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(SMSize, MVT::i32));
}
-static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
// Pattern match INS.
@@ -926,7 +781,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) ||
- !IsShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0))
+ !isShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0))
return SDValue();
// See if Op's second operand matches (and (shl $src, pos), mask1).
@@ -934,7 +789,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) ||
- !IsShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
+ !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
return SDValue();
// The shift masks must have the same position and size.
@@ -961,7 +816,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0));
}
-static SDValue PerformADDCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
// (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
@@ -991,25 +846,25 @@ static SDValue PerformADDCombine(SDNode *N, SelectionDAG &DAG,
SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
SelectionDAG &DAG = DCI.DAG;
- unsigned opc = N->getOpcode();
+ unsigned Opc = N->getOpcode();
- switch (opc) {
+ switch (Opc) {
default: break;
case ISD::ADDE:
- return PerformADDECombine(N, DAG, DCI, Subtarget);
+ return performADDECombine(N, DAG, DCI, Subtarget);
case ISD::SUBE:
- return PerformSUBECombine(N, DAG, DCI, Subtarget);
+ return performSUBECombine(N, DAG, DCI, Subtarget);
case ISD::SDIVREM:
case ISD::UDIVREM:
- return PerformDivRemCombine(N, DAG, DCI, Subtarget);
+ return performDivRemCombine(N, DAG, DCI, Subtarget);
case ISD::SELECT:
- return PerformSELECTCombine(N, DAG, DCI, Subtarget);
+ return performSELECTCombine(N, DAG, DCI, Subtarget);
case ISD::AND:
- return PerformANDCombine(N, DAG, DCI, Subtarget);
+ return performANDCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
- return PerformORCombine(N, DAG, DCI, Subtarget);
+ return performORCombine(N, DAG, DCI, Subtarget);
case ISD::ADD:
- return PerformADDCombine(N, DAG, DCI, Subtarget);
+ return performADDCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
@@ -1040,31 +895,32 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
switch (Op.getOpcode())
{
- case ISD::BRCOND: return LowerBRCOND(Op, DAG);
- case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
- case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
- case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
- case ISD::JumpTable: return LowerJumpTable(Op, DAG);
- case ISD::SELECT: return LowerSELECT(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::SETCC: return LowerSETCC(Op, DAG);
- case ISD::VASTART: return LowerVASTART(Op, DAG);
- case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
- case ISD::FABS: return LowerFABS(Op, DAG);
- case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
- case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
- case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
- case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
- case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
- case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
- case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG, true);
- case ISD::SRL_PARTS: return LowerShiftRightParts(Op, DAG, false);
- case ISD::LOAD: return LowerLOAD(Op, DAG);
- case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
- case ISD::ADD: return LowerADD(Op, DAG);
+ case ISD::BR_JT: return lowerBR_JT(Op, DAG);
+ case ISD::BRCOND: return lowerBRCOND(Op, DAG);
+ case ISD::ConstantPool: return lowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG);
+ case ISD::BlockAddress: return lowerBlockAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return lowerJumpTable(Op, DAG);
+ case ISD::SELECT: return lowerSELECT(Op, DAG);
+ case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG);
+ case ISD::SETCC: return lowerSETCC(Op, DAG);
+ case ISD::VASTART: return lowerVASTART(Op, DAG);
+ case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG);
+ case ISD::FABS: return lowerFABS(Op, DAG);
+ case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG);
+ case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG);
+ case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG);
+ case ISD::MEMBARRIER: return lowerMEMBARRIER(Op, DAG);
+ case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG);
+ case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG);
+ case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true);
+ case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false);
+ case ISD::LOAD: return lowerLOAD(Op, DAG);
+ case ISD::STORE: return lowerSTORE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::ADD: return lowerADD(Op, DAG);
}
return SDValue();
}
@@ -1073,11 +929,11 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
// Lower helper functions
//===----------------------------------------------------------------------===//
-// AddLiveIn - This helper function adds the specified physical register to the
+// addLiveIn - This helper function adds the specified physical register to the
// MachineFunction as a live in value. It also creates a corresponding
// virtual register for it.
static unsigned
-AddLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
+addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
{
unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
MF.getRegInfo().addLiveIn(PReg, VReg);
@@ -1085,7 +941,7 @@ AddLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
}
// Get fp branch code (not opcode) from condition code.
-static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
+static Mips::FPBranchCode getFPBranchCodeFromCond(Mips::CondCode CC) {
if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
return Mips::BRANCH_T;
@@ -1095,425 +951,6 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
return Mips::BRANCH_F;
}
-/*
-static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB,
- DebugLoc dl,
- const MipsSubtarget *Subtarget,
- const TargetInstrInfo *TII,
- bool isFPCmp, unsigned Opc) {
- // There is no need to expand CMov instructions if target has
- // conditional moves.
- if (Subtarget->hasCondMov())
- return BB;
-
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- // Emit the right instruction according to the type of the operands compared
- if (isFPCmp)
- BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
- else
- BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(2).getReg())
- .addReg(Mips::ZERO).addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- if (isFPCmp)
- BuildMI(*BB, BB->begin(), dl,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
- else
- BuildMI(*BB, BB->begin(), dl,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-*/
-
-MachineBasicBlock *
-MipsTargetLowering::EmitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
- // $bb:
- // bposge32_pseudo $vr0
- // =>
- // $bb:
- // bposge32 $tbb
- // $fbb:
- // li $vr2, 0
- // b $sink
- // $tbb:
- // li $vr1, 1
- // $sink:
- // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
-
- MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
- DebugLoc DL = MI->getDebugLoc();
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, FBB);
- F->insert(It, TBB);
- F->insert(It, Sink);
-
- // Transfer the remainder of BB and its successor edges to Sink.
- Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- Sink->transferSuccessorsAndUpdatePHIs(BB);
-
- // Add successors.
- BB->addSuccessor(FBB);
- BB->addSuccessor(TBB);
- FBB->addSuccessor(Sink);
- TBB->addSuccessor(Sink);
-
- // Insert the real bposge32 instruction to $BB.
- BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
-
- // Fill $FBB.
- unsigned VR2 = RegInfo.createVirtualRegister(RC);
- BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
- .addReg(Mips::ZERO).addImm(0);
- BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
-
- // Fill $TBB.
- unsigned VR1 = RegInfo.createVirtualRegister(RC);
- BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
- .addReg(Mips::ZERO).addImm(1);
-
- // Insert phi function to $Sink.
- BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
- MI->getOperand(0).getReg())
- .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return Sink;
-}
-
-MachineBasicBlock *MipsTargetLowering::EmitSel16(unsigned Opc, MachineInstr *MI,
- MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(3).getReg())
- .addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- BuildMI(*BB, BB->begin(), dl,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-MachineBasicBlock *MipsTargetLowering::EmitSelT16
- (unsigned Opc1, unsigned Opc2,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- BuildMI(BB, dl, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
- .addReg(MI->getOperand(4).getReg());
- BuildMI(BB, dl, TII->get(Opc1)).addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- BuildMI(*BB, BB->begin(), dl,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-
-}
-
-
-MachineBasicBlock *MipsTargetLowering::EmitSeliT16
- (unsigned Opc1, unsigned Opc2,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- BuildMI(BB, dl, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
- .addImm(MI->getOperand(4).getImm());
- BuildMI(BB, dl, TII->get(Opc1)).addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- BuildMI(*BB, BB->begin(), dl,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-
-}
-
-
-MachineBasicBlock
- *MipsTargetLowering::EmitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
- MachineInstr *MI,
- MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned regX = MI->getOperand(0).getReg();
- unsigned regY = MI->getOperand(1).getReg();
- MachineBasicBlock *target = MI->getOperand(2).getMBB();
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-
-MachineBasicBlock *MipsTargetLowering::EmitFEXT_T8I8I16_ins(
- unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned regX = MI->getOperand(0).getReg();
- int64_t imm = MI->getOperand(1).getImm();
- MachineBasicBlock *target = MI->getOperand(2).getMBB();
- unsigned CmpOpc;
- if (isUInt<8>(imm))
- CmpOpc = CmpiOpc;
- else if (isUInt<16>(imm))
- CmpOpc = CmpiXOpc;
- else
- llvm_unreachable("immediate field not usable");
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-
-static unsigned Mips16WhichOp8uOr16simm
- (unsigned shortOp, unsigned longOp, int64_t Imm) {
- if (isUInt<8>(Imm))
- return shortOp;
- else if (isInt<16>(Imm))
- return longOp;
- else
- llvm_unreachable("immediate field not usable");
-}
-
-MachineBasicBlock *MipsTargetLowering::EmitFEXT_CCRX16_ins(
- unsigned SltOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned CC = MI->getOperand(0).getReg();
- unsigned regX = MI->getOperand(1).getReg();
- unsigned regY = MI->getOperand(2).getReg();
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(SltOpc)).addReg(regX).addReg(regY);
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-MachineBasicBlock *MipsTargetLowering::EmitFEXT_CCRXI16_ins(
- unsigned SltiOpc, unsigned SltiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB )const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned CC = MI->getOperand(0).getReg();
- unsigned regX = MI->getOperand(1).getReg();
- int64_t Imm = MI->getOperand(2).getImm();
- unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm);
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(SltOpc)).addReg(regX).addImm(Imm);
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-
-}
MachineBasicBlock *
MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -1522,185 +959,114 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
llvm_unreachable("Unexpected instr type to insert");
case Mips::ATOMIC_LOAD_ADD_I8:
case Mips::ATOMIC_LOAD_ADD_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I16:
case Mips::ATOMIC_LOAD_ADD_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I32:
case Mips::ATOMIC_LOAD_ADD_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, Mips::ADDu);
+ return emitAtomicBinary(MI, BB, 4, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I64:
case Mips::ATOMIC_LOAD_ADD_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, Mips::DADDu);
+ return emitAtomicBinary(MI, BB, 8, Mips::DADDu);
case Mips::ATOMIC_LOAD_AND_I8:
case Mips::ATOMIC_LOAD_AND_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I16:
case Mips::ATOMIC_LOAD_AND_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I32:
case Mips::ATOMIC_LOAD_AND_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, Mips::AND);
+ return emitAtomicBinary(MI, BB, 4, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I64:
case Mips::ATOMIC_LOAD_AND_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, Mips::AND64);
+ return emitAtomicBinary(MI, BB, 8, Mips::AND64);
case Mips::ATOMIC_LOAD_OR_I8:
case Mips::ATOMIC_LOAD_OR_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I16:
case Mips::ATOMIC_LOAD_OR_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I32:
case Mips::ATOMIC_LOAD_OR_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, Mips::OR);
+ return emitAtomicBinary(MI, BB, 4, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I64:
case Mips::ATOMIC_LOAD_OR_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, Mips::OR64);
+ return emitAtomicBinary(MI, BB, 8, Mips::OR64);
case Mips::ATOMIC_LOAD_XOR_I8:
case Mips::ATOMIC_LOAD_XOR_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I16:
case Mips::ATOMIC_LOAD_XOR_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I32:
case Mips::ATOMIC_LOAD_XOR_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, Mips::XOR);
+ return emitAtomicBinary(MI, BB, 4, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I64:
case Mips::ATOMIC_LOAD_XOR_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, Mips::XOR64);
+ return emitAtomicBinary(MI, BB, 8, Mips::XOR64);
case Mips::ATOMIC_LOAD_NAND_I8:
case Mips::ATOMIC_LOAD_NAND_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, 0, true);
+ return emitAtomicBinaryPartword(MI, BB, 1, 0, true);
case Mips::ATOMIC_LOAD_NAND_I16:
case Mips::ATOMIC_LOAD_NAND_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, 0, true);
+ return emitAtomicBinaryPartword(MI, BB, 2, 0, true);
case Mips::ATOMIC_LOAD_NAND_I32:
case Mips::ATOMIC_LOAD_NAND_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, 0, true);
+ return emitAtomicBinary(MI, BB, 4, 0, true);
case Mips::ATOMIC_LOAD_NAND_I64:
case Mips::ATOMIC_LOAD_NAND_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, 0, true);
+ return emitAtomicBinary(MI, BB, 8, 0, true);
case Mips::ATOMIC_LOAD_SUB_I8:
case Mips::ATOMIC_LOAD_SUB_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I16:
case Mips::ATOMIC_LOAD_SUB_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I32:
case Mips::ATOMIC_LOAD_SUB_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, Mips::SUBu);
+ return emitAtomicBinary(MI, BB, 4, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I64:
case Mips::ATOMIC_LOAD_SUB_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, Mips::DSUBu);
+ return emitAtomicBinary(MI, BB, 8, Mips::DSUBu);
case Mips::ATOMIC_SWAP_I8:
case Mips::ATOMIC_SWAP_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, 0);
+ return emitAtomicBinaryPartword(MI, BB, 1, 0);
case Mips::ATOMIC_SWAP_I16:
case Mips::ATOMIC_SWAP_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, 0);
+ return emitAtomicBinaryPartword(MI, BB, 2, 0);
case Mips::ATOMIC_SWAP_I32:
case Mips::ATOMIC_SWAP_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, 0);
+ return emitAtomicBinary(MI, BB, 4, 0);
case Mips::ATOMIC_SWAP_I64:
case Mips::ATOMIC_SWAP_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, 0);
+ return emitAtomicBinary(MI, BB, 8, 0);
case Mips::ATOMIC_CMP_SWAP_I8:
case Mips::ATOMIC_CMP_SWAP_I8_P8:
- return EmitAtomicCmpSwapPartword(MI, BB, 1);
+ return emitAtomicCmpSwapPartword(MI, BB, 1);
case Mips::ATOMIC_CMP_SWAP_I16:
case Mips::ATOMIC_CMP_SWAP_I16_P8:
- return EmitAtomicCmpSwapPartword(MI, BB, 2);
+ return emitAtomicCmpSwapPartword(MI, BB, 2);
case Mips::ATOMIC_CMP_SWAP_I32:
case Mips::ATOMIC_CMP_SWAP_I32_P8:
- return EmitAtomicCmpSwap(MI, BB, 4);
+ return emitAtomicCmpSwap(MI, BB, 4);
case Mips::ATOMIC_CMP_SWAP_I64:
case Mips::ATOMIC_CMP_SWAP_I64_P8:
- return EmitAtomicCmpSwap(MI, BB, 8);
- case Mips::BPOSGE32_PSEUDO:
- return EmitBPOSGE32(MI, BB);
- case Mips::SelBeqZ:
- return EmitSel16(Mips::BeqzRxImm16, MI, BB);
- case Mips::SelBneZ:
- return EmitSel16(Mips::BnezRxImm16, MI, BB);
- case Mips::SelTBteqZCmpi:
- return EmitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::SelTBteqZSlti:
- return EmitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
- case Mips::SelTBteqZSltiu:
- return EmitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::SelTBtneZCmpi:
- return EmitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::SelTBtneZSlti:
- return EmitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
- case Mips::SelTBtneZSltiu:
- return EmitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::SelTBteqZCmp:
- return EmitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
- case Mips::SelTBteqZSlt:
- return EmitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
- case Mips::SelTBteqZSltu:
- return EmitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
- case Mips::SelTBtneZCmp:
- return EmitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
- case Mips::SelTBtneZSlt:
- return EmitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
- case Mips::SelTBtneZSltu:
- return EmitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
- case Mips::BteqzT8CmpX16:
- return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
- case Mips::BteqzT8SltX16:
- return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
- case Mips::BteqzT8SltuX16:
- // TBD: figure out a way to get this or remove the instruction
- // altogether.
- return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
- case Mips::BtnezT8CmpX16:
- return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
- case Mips::BtnezT8SltX16:
- return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
- case Mips::BtnezT8SltuX16:
- // TBD: figure out a way to get this or remove the instruction
- // altogether.
- return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
- case Mips::BteqzT8CmpiX16: return EmitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::BteqzT8SltiX16: return EmitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
- case Mips::BteqzT8SltiuX16: return EmitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::BtnezT8CmpiX16: return EmitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::BtnezT8SltiX16: return EmitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
- case Mips::BtnezT8SltiuX16: return EmitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
- break;
- case Mips::SltCCRxRy16:
- return EmitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
- break;
- case Mips::SltiCCRxImmX16:
- return EmitFEXT_CCRXI16_ins
- (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
- case Mips::SltiuCCRxImmX16:
- return EmitFEXT_CCRXI16_ins
- (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::SltuCCRxRy16:
- return EmitFEXT_CCRX16_ins
- (Mips::SltuRxRy16, MI, BB);
+ return emitAtomicCmpSwap(MI, BB, 8);
}
}
// This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and
// Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true)
MachineBasicBlock *
-MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode,
bool Nand) const {
assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicBinary.");
@@ -1709,7 +1075,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
unsigned LL, SC, AND, NOR, ZERO, BEQ;
if (Size == 4) {
@@ -1765,20 +1131,20 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// sc success, storeval, 0(ptr)
// beq success, $0, loopMBB
BB = loopMBB;
- BuildMI(BB, dl, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
+ BuildMI(BB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
if (Nand) {
// and andres, oldval, incr
// nor storeval, $0, andres
- BuildMI(BB, dl, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr);
- BuildMI(BB, dl, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes);
+ BuildMI(BB, DL, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr);
+ BuildMI(BB, DL, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes);
} else if (BinOpcode) {
// <binop> storeval, oldval, incr
- BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr);
+ BuildMI(BB, DL, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr);
} else {
StoreVal = Incr;
}
- BuildMI(BB, dl, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0);
- BuildMI(BB, dl, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB);
+ BuildMI(BB, DL, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0);
+ BuildMI(BB, DL, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB);
MI->eraseFromParent(); // The instruction is gone now.
@@ -1786,7 +1152,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
}
MachineBasicBlock *
-MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
+MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode,
bool Nand) const {
@@ -1797,7 +1163,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
@@ -1856,18 +1222,18 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
// sll incr2,incr,shiftamt
int64_t MaskImm = (Size == 1) ? 255 : 65535;
- BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2)
+ BuildMI(BB, DL, TII->get(Mips::ADDiu), MaskLSB2)
.addReg(Mips::ZERO).addImm(-4);
- BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr)
+ BuildMI(BB, DL, TII->get(Mips::AND), AlignedAddr)
.addReg(Ptr).addReg(MaskLSB2);
- BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper)
+ BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+ BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+ BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper)
.addReg(Mips::ZERO).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLLV), Mask)
+ BuildMI(BB, DL, TII->get(Mips::SLLV), Mask)
.addReg(ShiftAmt).addReg(MaskUpper);
- BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
- BuildMI(BB, dl, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr);
+ BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+ BuildMI(BB, DL, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr);
// atomic.load.binop
// loopMBB:
@@ -1889,32 +1255,32 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
// beq success,$0,loopMBB
BB = loopMBB;
- BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
if (Nand) {
// and andres, oldval, incr2
// nor binopres, $0, andres
// and newval, binopres, mask
- BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2);
- BuildMI(BB, dl, TII->get(Mips::NOR), BinOpRes)
+ BuildMI(BB, DL, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2);
+ BuildMI(BB, DL, TII->get(Mips::NOR), BinOpRes)
.addReg(Mips::ZERO).addReg(AndRes);
- BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
+ BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
} else if (BinOpcode) {
// <binop> binopres, oldval, incr2
// and newval, binopres, mask
- BuildMI(BB, dl, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2);
- BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
+ BuildMI(BB, DL, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2);
+ BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
} else {// atomic.swap
// and newval, incr2, mask
- BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
+ BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
}
- BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
+ BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
.addReg(OldVal).addReg(Mask2);
- BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
+ BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
.addReg(MaskedOldVal0).addReg(NewVal);
- BuildMI(BB, dl, TII->get(SC), Success)
+ BuildMI(BB, DL, TII->get(SC), Success)
.addReg(StoreVal).addReg(AlignedAddr).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::BEQ))
+ BuildMI(BB, DL, TII->get(Mips::BEQ))
.addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
// sinkMBB:
@@ -1925,13 +1291,13 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
BB = sinkMBB;
int64_t ShiftImm = (Size == 1) ? 24 : 16;
- BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1)
+ BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
.addReg(OldVal).addReg(Mask);
- BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes)
+ BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
.addReg(ShiftAmt).addReg(MaskedOldVal1);
- BuildMI(BB, dl, TII->get(Mips::SLL), SllRes)
+ BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
.addReg(SrlRes).addImm(ShiftImm);
- BuildMI(BB, dl, TII->get(Mips::SRA), Dest)
+ BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
.addReg(SllRes).addImm(ShiftImm);
MI->eraseFromParent(); // The instruction is gone now.
@@ -1940,7 +1306,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
}
MachineBasicBlock *
-MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size) const {
assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap.");
@@ -1949,7 +1315,7 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
unsigned LL, SC, ZERO, BNE, BEQ;
if (Size == 4) {
@@ -2003,17 +1369,17 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
// ll dest, 0(ptr)
// bne dest, oldval, exitMBB
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(LL), Dest).addReg(Ptr).addImm(0);
- BuildMI(BB, dl, TII->get(BNE))
+ BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0);
+ BuildMI(BB, DL, TII->get(BNE))
.addReg(Dest).addReg(OldVal).addMBB(exitMBB);
// loop2MBB:
// sc success, newval, 0(ptr)
// beq success, $0, loop1MBB
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(SC), Success)
+ BuildMI(BB, DL, TII->get(SC), Success)
.addReg(NewVal).addReg(Ptr).addImm(0);
- BuildMI(BB, dl, TII->get(BEQ))
+ BuildMI(BB, DL, TII->get(BEQ))
.addReg(Success).addReg(ZERO).addMBB(loop1MBB);
MI->eraseFromParent(); // The instruction is gone now.
@@ -2022,7 +1388,7 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
}
MachineBasicBlock *
-MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
+MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size) const {
assert((Size == 1 || Size == 2) &&
@@ -2032,7 +1398,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
@@ -2099,24 +1465,24 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
// andi maskednewval,newval,255
// sll shiftednewval,maskednewval,shiftamt
int64_t MaskImm = (Size == 1) ? 255 : 65535;
- BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2)
+ BuildMI(BB, DL, TII->get(Mips::ADDiu), MaskLSB2)
.addReg(Mips::ZERO).addImm(-4);
- BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr)
+ BuildMI(BB, DL, TII->get(Mips::AND), AlignedAddr)
.addReg(Ptr).addReg(MaskLSB2);
- BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper)
+ BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+ BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+ BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper)
.addReg(Mips::ZERO).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLLV), Mask)
+ BuildMI(BB, DL, TII->get(Mips::SLLV), Mask)
.addReg(ShiftAmt).addReg(MaskUpper);
- BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
- BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedCmpVal)
+ BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+ BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedCmpVal)
.addReg(CmpVal).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedCmpVal)
+ BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedCmpVal)
.addReg(ShiftAmt).addReg(MaskedCmpVal);
- BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedNewVal)
+ BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedNewVal)
.addReg(NewVal).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedNewVal)
+ BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal)
.addReg(ShiftAmt).addReg(MaskedNewVal);
// loop1MBB:
@@ -2124,10 +1490,10 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
// and maskedoldval0,oldval,mask
// bne maskedoldval0,shiftedcmpval,sinkMBB
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
+ BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
.addReg(OldVal).addReg(Mask);
- BuildMI(BB, dl, TII->get(Mips::BNE))
+ BuildMI(BB, DL, TII->get(Mips::BNE))
.addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB);
// loop2MBB:
@@ -2136,13 +1502,13 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
// sc success,storeval,0(alignedaddr)
// beq success,$0,loop1MBB
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1)
+ BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
.addReg(OldVal).addReg(Mask2);
- BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
+ BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
.addReg(MaskedOldVal1).addReg(ShiftedNewVal);
- BuildMI(BB, dl, TII->get(SC), Success)
+ BuildMI(BB, DL, TII->get(SC), Success)
.addReg(StoreVal).addReg(AlignedAddr).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::BEQ))
+ BuildMI(BB, DL, TII->get(Mips::BEQ))
.addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
// sinkMBB:
@@ -2152,11 +1518,11 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
BB = sinkMBB;
int64_t ShiftImm = (Size == 1) ? 24 : 16;
- BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes)
+ BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
.addReg(ShiftAmt).addReg(MaskedOldVal0);
- BuildMI(BB, dl, TII->get(Mips::SLL), SllRes)
+ BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
.addReg(SrlRes).addImm(ShiftImm);
- BuildMI(BB, dl, TII->get(Mips::SRA), Dest)
+ BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
.addReg(SllRes).addImm(ShiftImm);
MI->eraseFromParent(); // The instruction is gone now.
@@ -2167,16 +1533,46 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
//===----------------------------------------------------------------------===//
// Misc Lower Operation implementation
//===----------------------------------------------------------------------===//
+SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT PTy = getPointerTy();
+ unsigned EntrySize =
+ DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(*getDataLayout());
+
+ Index = DAG.getNode(ISD::MUL, DL, PTy, Index,
+ DAG.getConstant(EntrySize, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, DL, PTy, Index, Table);
+
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+ Addr = DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(), MemVT, false, false,
+ 0);
+ Chain = Addr.getValue(1);
+
+ if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || IsN64) {
+ // For PIC, the sequence is:
+ // BRIND(load(Jumptable + index) + RelocBase)
+ // RelocBase can be JumpTable, GOT or some sort of global base.
+ Addr = DAG.getNode(ISD::ADD, DL, PTy, Addr,
+ getPICJumpTableRelocBase(Table, DAG));
+ }
+
+ return DAG.getNode(ISD::BRIND, DL, MVT::Other, Chain, Addr);
+}
+
SDValue MipsTargetLowering::
-LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+lowerBRCOND(SDValue Op, SelectionDAG &DAG) const
{
// The first operand is the chain, the second is the condition, the third is
// the block to branch to if the condition is true.
SDValue Chain = Op.getOperand(0);
SDValue Dest = Op.getOperand(2);
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
- SDValue CondRes = CreateFPCmp(DAG, Op.getOperand(1));
+ SDValue CondRes = createFPCmp(DAG, Op.getOperand(1));
// Return if flag is not set by a floating point comparison.
if (CondRes.getOpcode() != MipsISD::FPCmp)
@@ -2185,27 +1581,27 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
SDValue CCNode = CondRes.getOperand(2);
Mips::CondCode CC =
(Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
- SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
+ SDValue BrCode = DAG.getConstant(getFPBranchCodeFromCond(CC), MVT::i32);
- return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
+ return DAG.getNode(MipsISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode,
Dest, CondRes);
}
SDValue MipsTargetLowering::
-LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+lowerSELECT(SDValue Op, SelectionDAG &DAG) const
{
- SDValue Cond = CreateFPCmp(DAG, Op.getOperand(0));
+ SDValue Cond = createFPCmp(DAG, Op.getOperand(0));
// Return if flag is not set by a floating point comparison.
if (Cond.getOpcode() != MipsISD::FPCmp)
return Op;
- return CreateCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2),
+ return createCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2),
Op.getDebugLoc());
}
SDValue MipsTargetLowering::
-LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getOperand(0).getValueType();
@@ -2217,8 +1613,8 @@ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
Op.getOperand(3));
}
-SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Cond = CreateFPCmp(DAG, Op);
+SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = createFPCmp(DAG, Op);
assert(Cond.getOpcode() == MipsISD::FPCmp &&
"Floating point operand expected.");
@@ -2226,13 +1622,13 @@ SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDValue True = DAG.getConstant(1, MVT::i32);
SDValue False = DAG.getConstant(0, MVT::i32);
- return CreateCMovFP(DAG, Cond, True, False, Op.getDebugLoc());
+ return createCMovFP(DAG, Cond, True, False, Op.getDebugLoc());
}
-SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
+SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
// FIXME there isn't actually debug info here
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
@@ -2241,12 +1637,12 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
// %gp_rel relocation
if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0,
MipsII::MO_GPREL);
- SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl,
+ SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, DL,
DAG.getVTList(MVT::i32), &GA, 1);
SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32);
- return DAG.getNode(ISD::ADD, dl, MVT::i32, GPReg, GPRelNode);
+ return DAG.getNode(ISD::ADD, DL, MVT::i32, GPReg, GPRelNode);
}
// %hi/%lo relocation
@@ -2264,7 +1660,7 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16);
}
-SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
+SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
return getAddrNonPIC(Op, DAG);
@@ -2273,14 +1669,14 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
}
SDValue MipsTargetLowering::
-LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
{
// If the relocation model is PIC, use the General Dynamic TLS Model or
// Local Dynamic TLS model, otherwise use the Initial Exec or
// Local Exec TLS Model.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
- DebugLoc dl = GA->getDebugLoc();
+ DebugLoc DL = GA->getDebugLoc();
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy();
@@ -2291,9 +1687,9 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
unsigned Flag = (model == TLSModel::LocalDynamic) ? MipsII::MO_TLSLDM
: MipsII::MO_TLSGD;
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag);
- SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT,
- GetGlobalReg(DAG, PtrVT), TGA);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, Flag);
+ SDValue Argument = DAG.getNode(MipsISD::Wrapper, DL, PtrVT,
+ getGlobalReg(DAG, PtrVT), TGA);
unsigned PtrSize = PtrVT.getSizeInBits();
IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
@@ -2307,9 +1703,9 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
TargetLowering::CallLoweringInfo CLI(DAG.getEntryNode(), PtrTy,
false, false, false, false, 0, CallingConv::C,
- /*isTailCall=*/false, /*doesNotRet=*/false,
+ /*IsTailCall=*/false, /*doesNotRet=*/false,
/*isReturnValueUsed=*/true,
- TlsGetAddr, Args, DAG, dl);
+ TlsGetAddr, Args, DAG, DL);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Ret = CallResult.first;
@@ -2317,44 +1713,44 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
if (model != TLSModel::LocalDynamic)
return Ret;
- SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
MipsII::MO_DTPREL_HI);
- SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
- SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ SDValue Hi = DAG.getNode(MipsISD::Hi, DL, PtrVT, TGAHi);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
MipsII::MO_DTPREL_LO);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
- SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Ret);
- return DAG.getNode(ISD::ADD, dl, PtrVT, Add, Lo);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Ret);
+ return DAG.getNode(ISD::ADD, DL, PtrVT, Add, Lo);
}
SDValue Offset;
if (model == TLSModel::InitialExec) {
// Initial Exec TLS Model
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
MipsII::MO_GOTTPREL);
- TGA = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT),
+ TGA = DAG.getNode(MipsISD::Wrapper, DL, PtrVT, getGlobalReg(DAG, PtrVT),
TGA);
- Offset = DAG.getLoad(PtrVT, dl,
+ Offset = DAG.getLoad(PtrVT, DL,
DAG.getEntryNode(), TGA, MachinePointerInfo(),
false, false, false, 0);
} else {
// Local Exec TLS Model
assert(model == TLSModel::LocalExec);
- SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
MipsII::MO_TPREL_HI);
- SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
MipsII::MO_TPREL_LO);
- SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
- Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, DL, PtrVT, TGAHi);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo);
+ Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
}
- SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
- return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+ SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT);
+ return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset);
}
SDValue MipsTargetLowering::
-LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
{
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
return getAddrNonPIC(Op, DAG);
@@ -2363,7 +1759,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
}
SDValue MipsTargetLowering::
-LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
{
// gp_rel relocation
// FIXME: we should reference the constant pool using small data sections,
@@ -2381,22 +1777,22 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
return getAddrLocal(Op, DAG, HasMips64);
}
-SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+ return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
MachinePointerInfo(SV), false, false, 0);
}
-static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
EVT TyX = Op.getOperand(0).getValueType();
EVT TyY = Op.getOperand(1).getValueType();
SDValue Const1 = DAG.getConstant(1, MVT::i32);
@@ -2441,7 +1837,7 @@ static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
}
-static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
unsigned WidthX = Op.getOperand(0).getValueSizeInBits();
unsigned WidthY = Op.getOperand(1).getValueSizeInBits();
EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY);
@@ -2490,14 +1886,14 @@ static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
}
SDValue
-MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->hasMips64())
- return LowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2());
- return LowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2());
}
-static SDValue LowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
DebugLoc DL = Op.getDebugLoc();
@@ -2526,7 +1922,7 @@ static SDValue LowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
}
-static SDValue LowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
DebugLoc DL = Op.getDebugLoc();
@@ -2547,15 +1943,15 @@ static SDValue LowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
}
SDValue
-MipsTargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
+MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->hasMips64() && (Op.getValueType() == MVT::f64))
- return LowerFABS64(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFABS64(Op, DAG, Subtarget->hasMips32r2());
- return LowerFABS32(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFABS32(Op, DAG, Subtarget->hasMips32r2());
}
SDValue MipsTargetLowering::
-LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// check the depth
assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
"Frame address can only be determined for current frame.");
@@ -2563,13 +1959,13 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
- SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
IsN64 ? Mips::FP_64 : Mips::FP, VT);
return FrameAddr;
}
-SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op,
+SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
// check the depth
assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
@@ -2590,7 +1986,7 @@ SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op,
// generated from __builtin_eh_return (offset, handler)
// The effect of this is to adjust the stack pointer by "offset"
// and then branch to "handler".
-SDValue MipsTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
+SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
const {
MachineFunction &MF = DAG.getMachineFunction();
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
@@ -2616,24 +2012,24 @@ SDValue MipsTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
// TODO: set SType according to the desired memory barrier behavior.
SDValue
-MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const {
+MipsTargetLowering::lowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const {
unsigned SType = 0;
- DebugLoc dl = Op.getDebugLoc();
- return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0),
DAG.getConstant(SType, MVT::i32));
}
-SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op,
SelectionDAG &DAG) const {
// FIXME: Need pseudo-fence for 'singlethread' fences
// FIXME: Set SType for weaker fences where supported/appropriate.
unsigned SType = 0;
- DebugLoc dl = Op.getDebugLoc();
- return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0),
DAG.getConstant(SType, MVT::i32));
}
-SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op,
+SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
@@ -2664,7 +2060,7 @@ SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op,
return DAG.getMergeValues(Ops, 2, DL);
}
-SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
+SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
bool IsSRA) const {
DebugLoc DL = Op.getDebugLoc();
SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
@@ -2723,7 +2119,7 @@ static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
}
// Expand an unaligned 32 or 64-bit integer load node.
-SDValue MipsTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LoadSDNode *LD = cast<LoadSDNode>(Op);
EVT MemVT = LD->getMemoryVT();
@@ -2801,7 +2197,7 @@ static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
}
// Expand an unaligned 32 or 64-bit integer store node.
-SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
StoreSDNode *SD = cast<StoreSDNode>(Op);
EVT MemVT = SD->getMemoryVT();
@@ -2849,7 +2245,7 @@ SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// v1 = copy hi
// out64 = merge-values (v0, v1)
//
-static SDValue LowerDSPIntr(SDValue Op, SelectionDAG &DAG,
+static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG,
unsigned Opc, bool HasI64In, bool HasI64Out) {
DebugLoc DL = Op.getDebugLoc();
bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
@@ -2894,95 +2290,95 @@ static SDValue LowerDSPIntr(SDValue Op, SelectionDAG &DAG,
return DAG.getMergeValues(Vals, 2, DL);
}
-SDValue MipsTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+SDValue MipsTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
default:
return SDValue();
case Intrinsic::mips_shilo:
- return LowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true);
case Intrinsic::mips_dpau_h_qbl:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true);
case Intrinsic::mips_dpau_h_qbr:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true);
case Intrinsic::mips_dpsu_h_qbl:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true);
case Intrinsic::mips_dpsu_h_qbr:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true);
case Intrinsic::mips_dpa_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true);
case Intrinsic::mips_dps_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true);
case Intrinsic::mips_dpax_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true);
case Intrinsic::mips_dpsx_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true);
case Intrinsic::mips_mulsa_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true);
case Intrinsic::mips_mult:
- return LowerDSPIntr(Op, DAG, MipsISD::MULT, false, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MULT, false, true);
case Intrinsic::mips_multu:
- return LowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true);
case Intrinsic::mips_madd:
- return LowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true);
case Intrinsic::mips_maddu:
- return LowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true);
case Intrinsic::mips_msub:
- return LowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true);
case Intrinsic::mips_msubu:
- return LowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true);
}
}
-SDValue MipsTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
+SDValue MipsTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
default:
return SDValue();
case Intrinsic::mips_extp:
- return LowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false);
case Intrinsic::mips_extpdp:
- return LowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false);
case Intrinsic::mips_extr_w:
- return LowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false);
case Intrinsic::mips_extr_r_w:
- return LowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false);
case Intrinsic::mips_extr_rs_w:
- return LowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false);
case Intrinsic::mips_extr_s_h:
- return LowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false);
case Intrinsic::mips_mthlip:
- return LowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true);
case Intrinsic::mips_mulsaq_s_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true);
case Intrinsic::mips_maq_s_w_phl:
- return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true);
case Intrinsic::mips_maq_s_w_phr:
- return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true);
case Intrinsic::mips_maq_sa_w_phl:
- return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true);
case Intrinsic::mips_maq_sa_w_phr:
- return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true);
case Intrinsic::mips_dpaq_s_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true);
case Intrinsic::mips_dpsq_s_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true);
case Intrinsic::mips_dpaq_sa_l_w:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true);
case Intrinsic::mips_dpsq_sa_l_w:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true);
case Intrinsic::mips_dpaqx_s_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true);
case Intrinsic::mips_dpaqx_sa_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true);
case Intrinsic::mips_dpsqx_s_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true);
case Intrinsic::mips_dpsqx_sa_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true);
}
}
-SDValue MipsTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
+SDValue MipsTargetLowering::lowerADD(SDValue Op, SelectionDAG &DAG) const {
if (Op->getOperand(0).getOpcode() != ISD::FRAMEADDR
|| cast<ConstantSDNode>
(Op->getOperand(0).getOperand(0))->getZExtValue() != 0
@@ -3119,28 +2515,6 @@ static unsigned getNextIntArgReg(unsigned Reg) {
return (Reg == Mips::A0) ? Mips::A1 : Mips::A3;
}
-/// IsEligibleForTailCallOptimization - Check whether the call is eligible
-/// for tail call optimization.
-bool MipsTargetLowering::
-IsEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const {
- if (!EnableMipsTailCalls)
- return false;
-
- // No tail call optimization for mips16.
- if (Subtarget->inMips16Mode())
- return false;
-
- // Return false if either the callee or caller has a byval argument.
- if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
- return false;
-
- // Return true if the callee's argument area is no larger than the
- // caller's.
- return NextStackOffset <= FI.getIncomingArgSize();
-}
-
SDValue
MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
SDValue Chain, SDValue Arg, DebugLoc DL,
@@ -3159,161 +2533,48 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
/*isVolatile=*/ true, false, 0);
}
-//
-// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much
-// cleaner way to do all of this but it will have to wait until the traditional
-// gcc mechanism is completed.
-//
-// For Pic, in order for Mips16 code to call Mips32 code which according the abi
-// have either arguments or returned values placed in floating point registers,
-// we use a set of helper functions. (This includes functions which return type
-// complex which on Mips are returned in a pair of floating point registers).
-//
-// This is an encoding that we inherited from gcc.
-// In Mips traditional O32, N32 ABI, floating point numbers are passed in
-// floating point argument registers 1,2 only when the first and optionally
-// the second arguments are float (sf) or double (df).
-// For Mips16 we are only concerned with the situations where floating point
-// arguments are being passed in floating point registers by the ABI, because
-// Mips16 mode code cannot execute floating point instructions to load those
-// values and hence helper functions are needed.
-// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df)
-// the helper function suffixs for these are:
-// 0, 1, 5, 9, 2, 6, 10
-// this suffix can then be calculated as follows:
-// for a given argument Arg:
-// Arg1x, Arg2x = 1 : Arg is sf
-// 2 : Arg is df
-// 0: Arg is neither sf or df
-// So this stub is the string for number Arg1x + Arg2x*4.
-// However not all numbers between 0 and 10 are possible, we check anyway and
-// assert if the impossible exists.
-//
-
-unsigned int MipsTargetLowering::getMips16HelperFunctionStubNumber
- (ArgListTy &Args) const {
- unsigned int resultNum = 0;
- if (Args.size() >= 1) {
- Type *t = Args[0].Ty;
- if (t->isFloatTy()) {
- resultNum = 1;
- }
- else if (t->isDoubleTy()) {
- resultNum = 2;
- }
- }
- if (resultNum) {
- if (Args.size() >=2) {
- Type *t = Args[1].Ty;
- if (t->isFloatTy()) {
- resultNum += 4;
- }
- else if (t->isDoubleTy()) {
- resultNum += 8;
- }
- }
+void MipsTargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ // Insert node "GP copy globalreg" before call to function.
+ //
+ // R_MIPS_CALL* operators (emitted when non-internal functions are called
+ // in PIC mode) allow symbols to be resolved via lazy binding.
+ // The lazy binding stub requires GP to point to the GOT.
+ if (IsPICCall && !InternalLinkage) {
+ unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
+ EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
+ RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty)));
}
- return resultNum;
-}
-//
-// prefixs are attached to stub numbers depending on the return type .
-// return type: float sf_
-// double df_
-// single complex sc_
-// double complext dc_
-// others NO PREFIX
-//
-//
-// The full name of a helper function is__mips16_call_stub +
-// return type dependent prefix + stub number
-//
-//
-// This is something that probably should be in a different source file and
-// perhaps done differently but my main purpose is to not waste runtime
-// on something that we can enumerate in the source. Another possibility is
-// to have a python script to generate these mapping tables. This will do
-// for now. There are a whole series of helper function mapping arrays, one
-// for each return type class as outlined above. There there are 11 possible
-// entries. Ones with 0 are ones which should never be selected
-//
-// All the arrays are similar except for ones which return neither
-// sf, df, sc, dc, in which only care about ones which have sf or df as a
-// first parameter.
-//
-#define P_ "__mips16_call_stub_"
-#define MAX_STUB_NUMBER 10
-#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10"
-#define T P "0" , T1
-#define P P_
-static char const * vMips16Helper[MAX_STUB_NUMBER+1] =
- {0, T1 };
-#undef P
-#define P P_ "sf_"
-static char const * sfMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#define P P_ "df_"
-static char const * dfMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#define P P_ "sc_"
-static char const * scMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#define P P_ "dc_"
-static char const * dcMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#undef P_
-
-
-const char* MipsTargetLowering::
- getMips16HelperFunction
- (Type* RetTy, ArgListTy &Args, bool &needHelper) const {
- const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args);
-#ifndef NDEBUG
- const unsigned int maxStubNum = 10;
- assert(stubNum <= maxStubNum);
- const bool validStubNum[maxStubNum+1] =
- {true, true, true, false, false, true, true, false, false, true, true};
- assert(validStubNum[stubNum]);
-#endif
- const char *result;
- if (RetTy->isFloatTy()) {
- result = sfMips16Helper[stubNum];
- }
- else if (RetTy ->isDoubleTy()) {
- result = dfMips16Helper[stubNum];
- }
- else if (RetTy->isStructTy()) {
- // check if it's complex
- if (RetTy->getNumContainedTypes() == 2) {
- if ((RetTy->getContainedType(0)->isFloatTy()) &&
- (RetTy->getContainedType(1)->isFloatTy())) {
- result = scMips16Helper[stubNum];
- }
- else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
- (RetTy->getContainedType(1)->isDoubleTy())) {
- result = dcMips16Helper[stubNum];
- }
- else {
- llvm_unreachable("Uncovered condition");
- }
- }
- else {
- llvm_unreachable("Uncovered condition");
- }
- }
- else {
- if (stubNum == 0) {
- needHelper = false;
- return "";
- }
- result = vMips16Helper[stubNum];
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
}
- needHelper = true;
- return result;
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(CLI.DAG.getRegisterMask(Mask));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
}
/// LowerCall - functions arguments are copied from virtual regs to
@@ -3322,36 +2583,16 @@ SDValue
MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
- DebugLoc &dl = CLI.DL;
+ DebugLoc &DL = CLI.DL;
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
- bool &isTailCall = CLI.IsTailCall;
+ bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
- bool isVarArg = CLI.IsVarArg;
-
- const char* mips16HelperFunction = 0;
- bool needMips16Helper = false;
-
- if (Subtarget->inMips16Mode() && getTargetMachine().Options.UseSoftFloat &&
- Mips16HardFloat) {
- //
- // currently we don't have symbols tagged with the mips16 or mips32
- // qualifier so we will assume that we don't know what kind it is.
- // and generate the helper
- //
- bool lookupHelper = true;
- if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- if (noHelperNeeded.find(S->getSymbol()) != noHelperNeeded.end()) {
- lookupHelper = false;
- }
- }
- if (lookupHelper) mips16HelperFunction =
- getMips16HelperFunction(CLI.RetTy, CLI.Args, needMips16Helper);
+ bool IsVarArg = CLI.IsVarArg;
- }
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
@@ -3359,22 +2600,24 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
- MipsCCInfo.analyzeCallOperands(Outs, isVarArg);
+ MipsCCInfo.analyzeCallOperands(Outs, IsVarArg,
+ getTargetMachine().Options.UseSoftFloat,
+ Callee.getNode(), CLI.Args);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
// Check if it's really possible to do a tail call.
- if (isTailCall)
- isTailCall =
- IsEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset,
+ if (IsTailCall)
+ IsTailCall =
+ isEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset,
*MF.getInfo<MipsFunctionInfo>());
- if (isTailCall)
+ if (IsTailCall)
++NumTailCalls;
// Chain is the output chain of the last Load/Store or CopyToReg node.
@@ -3384,10 +2627,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment);
SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
- if (!isTailCall)
+ if (!IsTailCall)
Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal);
- SDValue StackPtr = DAG.getCopyFromReg(Chain, dl,
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, DL,
IsN64 ? Mips::SP_64 : Mips::SP,
getPointerTy());
@@ -3408,9 +2651,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
assert(ByValArg != MipsCCInfo.byval_end());
- assert(!isTailCall &&
+ assert(!IsTailCall &&
"Do not tail-call optimize if there is a byval argument.");
- passByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg,
+ passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg,
MipsCCInfo, *ByValArg, Flags, Subtarget->isLittle());
++ByValArg;
continue;
@@ -3422,12 +2665,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
case CCValAssign::Full:
if (VA.isRegLoc()) {
if ((ValVT == MVT::f32 && LocVT == MVT::i32) ||
- (ValVT == MVT::f64 && LocVT == MVT::i64))
- Arg = DAG.getNode(ISD::BITCAST, dl, LocVT, Arg);
+ (ValVT == MVT::f64 && LocVT == MVT::i64) ||
+ (ValVT == MVT::i64 && LocVT == MVT::f64))
+ Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg);
else if (ValVT == MVT::f64 && LocVT == MVT::i32) {
- SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
Arg, DAG.getConstant(0, MVT::i32));
- SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
Arg, DAG.getConstant(1, MVT::i32));
if (!Subtarget->isLittle())
std::swap(Lo, Hi);
@@ -3440,13 +2684,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
break;
case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, LocVT, Arg);
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg);
break;
case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, LocVT, Arg);
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg);
break;
case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, dl, LocVT, Arg);
+ Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg);
break;
}
@@ -3463,13 +2707,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// emit ISD::STORE whichs stores the
// parameter value to a stack Location
MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(),
- Chain, Arg, dl, isTailCall, DAG));
+ Chain, Arg, DL, IsTailCall, DAG));
}
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -3491,7 +2735,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else
Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
} else
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0,
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0,
MipsII::MO_NO_FLAG);
GlobalOrExternal = true;
}
@@ -3508,80 +2752,17 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
GlobalOrExternal = true;
}
- SDValue JumpTarget = Callee;
-
- // T9 should contain the address of the callee function if
- // -reloction-model=pic or it is an indirect call.
- if (IsPICCall || !GlobalOrExternal) {
- unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
- unsigned V0Reg = Mips::V0;
- if (needMips16Helper) {
- RegsToPass.push_front(std::make_pair(V0Reg, Callee));
- JumpTarget = DAG.getExternalSymbol(
- mips16HelperFunction, getPointerTy());
- JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
- }
- else {
- RegsToPass.push_front(std::make_pair(T9Reg, Callee));
-
- if (!Subtarget->inMips16Mode())
- JumpTarget = SDValue();
- }
- }
-
- // Insert node "GP copy globalreg" before call to function.
- //
- // R_MIPS_CALL* operators (emitted when non-internal functions are called
- // in PIC mode) allow symbols to be resolved via lazy binding.
- // The lazy binding stub requires GP to point to the GOT.
- if (IsPICCall && !InternalLinkage) {
- unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
- EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
- RegsToPass.push_back(std::make_pair(GPReg, GetGlobalReg(DAG, Ty)));
- }
-
- // Build a sequence of copy-to-reg nodes chained together with token
- // chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emitted instructions must be
- // stuck together.
- SDValue InFlag;
-
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // MipsJmpLink = #chain, #target_address, #opt_in_flags...
- // = Chain, Callee, Reg#1, Reg#2, ...
- //
- // Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops(1, Chain);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- if (JumpTarget.getNode())
- Ops.push_back(JumpTarget);
-
- // Add argument registers to the end of the list so that they are
- // known live into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- // Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
+ getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage,
+ CLI, Callee, Chain);
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ if (IsTailCall)
+ return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size());
- if (isTailCall)
- return DAG.getNode(MipsISD::TailCall, dl, MVT::Other, &Ops[0], Ops.size());
-
- Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
+ Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size());
+ SDValue InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
@@ -3590,31 +2771,40 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
- Ins, dl, DAG, InVals);
+ return LowerCallResult(Chain, InFlag, CallConv, IsVarArg,
+ Ins, DL, DAG, InVals, CLI.Callee.getNode(), CLI.RetTy);
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue
MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool isVarArg,
+ CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SDNode *CallNode,
+ const Type *RetTy) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
+ MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
- CCInfo.AnalyzeCallResult(Ins, RetCC_Mips);
+ MipsCCInfo.analyzeCallResult(Ins, getTargetMachine().Options.UseSoftFloat,
+ CallNode, RetTy);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
- Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
- RVLocs[i].getValVT(), InFlag).getValue(1);
- InFlag = Chain.getValue(2);
- InVals.push_back(Chain.getValue(0));
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(),
+ RVLocs[i].getLocVT(), InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+
+ if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
+ Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getValVT(), Val);
+
+ InVals.push_back(Val);
}
return Chain;
@@ -3628,9 +2818,9 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SDValue
MipsTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
- bool isVarArg,
+ bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ DebugLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -3644,16 +2834,17 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
+ Function::const_arg_iterator FuncArg =
+ DAG.getMachineFunction().getFunction()->arg_begin();
+ bool UseSoftFloat = getTargetMachine().Options.UseSoftFloat;
- MipsCCInfo.analyzeFormalArguments(Ins);
+ MipsCCInfo.analyzeFormalArguments(Ins, UseSoftFloat, FuncArg);
MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
MipsCCInfo.hasByValArg());
- Function::const_arg_iterator FuncArg =
- DAG.getMachineFunction().getFunction()->arg_begin();
unsigned CurArgIdx = 0;
MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin();
@@ -3669,7 +2860,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
assert(ByValArg != MipsCCInfo.byval_end());
- copyByValRegs(Chain, dl, OutChains, DAG, Flags, InVals, &*FuncArg,
+ copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg,
MipsCCInfo, *ByValArg);
++ByValArg;
continue;
@@ -3695,8 +2886,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Transform the arguments stored on
// physical registers into virtual ones
- unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgReg, RC);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
// If this is an 8 or 16-bit value, it has been passed promoted
// to 32 bits. Insert an assert[sz]ext to capture this, then
@@ -3708,22 +2899,24 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
else if (VA.getLocInfo() == CCValAssign::ZExt)
Opcode = ISD::AssertZext;
if (Opcode)
- ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+ ArgValue = DAG.getNode(Opcode, DL, RegVT, ArgValue,
DAG.getValueType(ValVT));
- ArgValue = DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
+ ArgValue = DAG.getNode(ISD::TRUNCATE, DL, ValVT, ArgValue);
}
- // Handle floating point arguments passed in integer registers.
+ // Handle floating point arguments passed in integer registers and
+ // long double arguments passed in floating point registers.
if ((RegVT == MVT::i32 && ValVT == MVT::f32) ||
- (RegVT == MVT::i64 && ValVT == MVT::f64))
- ArgValue = DAG.getNode(ISD::BITCAST, dl, ValVT, ArgValue);
+ (RegVT == MVT::i64 && ValVT == MVT::f64) ||
+ (RegVT == MVT::f64 && ValVT == MVT::i64))
+ ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue);
else if (IsO32 && RegVT == MVT::i32 && ValVT == MVT::f64) {
- unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
+ unsigned Reg2 = addLiveIn(DAG.getMachineFunction(),
getNextIntArgReg(ArgReg), RC);
- SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
+ SDValue ArgValue2 = DAG.getCopyFromReg(Chain, DL, Reg2, RegVT);
if (!Subtarget->isLittle())
std::swap(ArgValue, ArgValue2);
- ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64,
+ ArgValue = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64,
ArgValue, ArgValue2);
}
@@ -3739,7 +2932,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- InVals.push_back(DAG.getLoad(ValVT, dl, Chain, FIN,
+ InVals.push_back(DAG.getLoad(ValVT, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0));
}
@@ -3755,18 +2948,18 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
createVirtualRegister(getRegClassFor(IsN64 ? MVT::i64 : MVT::i32));
MipsFI->setSRetReturnReg(Reg);
}
- SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[0]);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
}
- if (isVarArg)
- writeVarArgRegs(OutChains, MipsCCInfo, Chain, dl, DAG);
+ if (IsVarArg)
+ writeVarArgRegs(OutChains, MipsCCInfo, Chain, DL, DAG);
// All stores are grouped in one node to allow the matching between
// the size of Ins and InVals. This only happens when on varg functions
if (!OutChains.empty()) {
OutChains.push_back(Chain);
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
&OutChains[0], OutChains.size());
}
@@ -3779,42 +2972,48 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
bool
MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
- MachineFunction &MF, bool isVarArg,
+ MachineFunction &MF, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(),
RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_Mips);
}
SDValue
MipsTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
+ CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
-
+ DebugLoc DL, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of
// the return value to a location
SmallVector<CCValAssign, 16> RVLocs;
+ MachineFunction &MF = DAG.getMachineFunction();
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs,
+ *DAG.getContext());
+ MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
- // Analize return values.
- CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
+ // Analyze return values.
+ MipsCCInfo.analyzeReturn(Outs, getTargetMachine().Options.UseSoftFloat,
+ MF.getFunction()->getReturnType());
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ SDValue Val = OutVals[i];
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+ if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
+ Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getLocVT(), Val);
+
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
@@ -3825,17 +3024,16 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
// the sret argument into $v0 for the return. We saved the argument into
// a virtual register in the entry block, so now we copy the value out
// and into $v0.
- if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
- MachineFunction &MF = DAG.getMachineFunction();
+ if (MF.getFunction()->hasStructRetAttr()) {
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
unsigned Reg = MipsFI->getSRetReturnReg();
if (!Reg)
llvm_unreachable("sret virtual register not created in the entry block");
- SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
unsigned V0 = IsN64 ? Mips::V0_64 : Mips::V0;
- Chain = DAG.getCopyToReg(Chain, dl, V0, Val, Flag);
+ Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(V0, getPointerTy()));
}
@@ -3847,7 +3045,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
RetOps.push_back(Flag);
// Return on Mips is always a "jr $ra"
- return DAG.getNode(MipsISD::Ret, dl, MVT::Other, &RetOps[0], RetOps.size());
+ return DAG.getNode(MipsISD::Ret, DL, MVT::Other, &RetOps[0], RetOps.size());
}
//===----------------------------------------------------------------------===//
@@ -3880,6 +3078,8 @@ getConstraintType(const std::string &Constraint) const
case 'l':
case 'x':
return C_RegisterClass;
+ case 'R':
+ return C_Memory;
}
}
return TargetLowering::getConstraintType(Constraint);
@@ -3928,6 +3128,9 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
if (isa<ConstantInt>(CallOperandVal))
weight = CW_Constant;
break;
+ case 'R':
+ weight = CW_Memory;
+ break;
}
return weight;
}
@@ -4128,6 +3331,46 @@ unsigned MipsTargetLowering::getJumpTableEncoding() const {
return TargetLowering::getJumpTableEncoding();
}
+/// This function returns true if CallSym is a long double emulation routine.
+static bool isF128SoftLibCall(const char *CallSym) {
+ const char *const LibCalls[] =
+ {"__addtf3", "__divtf3", "__eqtf2", "__extenddftf2", "__extendsftf2",
+ "__fixtfdi", "__fixtfsi", "__fixtfti", "__fixunstfdi", "__fixunstfsi",
+ "__fixunstfti", "__floatditf", "__floatsitf", "__floattitf",
+ "__floatunditf", "__floatunsitf", "__floatuntitf", "__getf2", "__gttf2",
+ "__letf2", "__lttf2", "__multf3", "__netf2", "__powitf2", "__subtf3",
+ "__trunctfdf2", "__trunctfsf2", "__unordtf2",
+ "ceill", "copysignl", "cosl", "exp2l", "expl", "floorl", "fmal", "fmodl",
+ "log10l", "log2l", "logl", "nearbyintl", "powl", "rintl", "sinl", "sqrtl",
+ "truncl"};
+
+ const char * const *End = LibCalls + array_lengthof(LibCalls);
+
+ // Check that LibCalls is sorted alphabetically.
+ MipsTargetLowering::LTStr Comp;
+
+#ifndef NDEBUG
+ for (const char * const *I = LibCalls; I < End - 1; ++I)
+ assert(Comp(*I, *(I + 1)));
+#endif
+
+ return std::binary_search(LibCalls, End, CallSym, Comp);
+}
+
+/// This function returns true if Ty is fp128 or i128 which was originally a
+/// fp128.
+static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) {
+ if (Ty->isFP128Ty())
+ return true;
+
+ const ExternalSymbolSDNode *ES =
+ dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode);
+
+ // If the Ty is i128 and the function being called is a long double emulation
+ // routine, then the original type is f128.
+ return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol()));
+}
+
MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CC, bool IsO32_,
CCState &Info)
: CCInfo(Info), CallConv(CC), IsO32(IsO32_) {
@@ -4137,7 +3380,8 @@ MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CC, bool IsO32_,
void MipsTargetLowering::MipsCC::
analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args,
- bool IsVarArg) {
+ bool IsVarArg, bool IsSoftFloat, const SDNode *CallNode,
+ std::vector<ArgListEntry> &FuncArgs) {
assert((CallConv != CallingConv::Fast || !IsVarArg) &&
"CallingConv::Fast shouldn't be used for vararg functions.");
@@ -4156,8 +3400,11 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args,
if (IsVarArg && !Args[I].IsFixed)
R = VarFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
- else
- R = FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
+ else {
+ MVT RegVT = getRegVT(ArgVT, FuncArgs[Args[I].OrigArgIndex].Ty, CallNode,
+ IsSoftFloat);
+ R = FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo);
+ }
if (R) {
#ifndef NDEBUG
@@ -4170,20 +3417,26 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args,
}
void MipsTargetLowering::MipsCC::
-analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args) {
+analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args,
+ bool IsSoftFloat, Function::const_arg_iterator FuncArg) {
unsigned NumArgs = Args.size();
llvm::CCAssignFn *FixedFn = fixedArgFn();
+ unsigned CurArgIdx = 0;
for (unsigned I = 0; I != NumArgs; ++I) {
MVT ArgVT = Args[I].VT;
ISD::ArgFlagsTy ArgFlags = Args[I].Flags;
+ std::advance(FuncArg, Args[I].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Args[I].OrigArgIndex;
if (ArgFlags.isByVal()) {
handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags);
continue;
}
- if (!FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo))
+ MVT RegVT = getRegVT(ArgVT, FuncArg->getType(), 0, IsSoftFloat);
+
+ if (!FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo))
continue;
#ifndef NDEBUG
@@ -4194,6 +3447,44 @@ analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args) {
}
}
+template<typename Ty>
+void MipsTargetLowering::MipsCC::
+analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
+ const SDNode *CallNode, const Type *RetTy) const {
+ CCAssignFn *Fn;
+
+ if (IsSoftFloat && originalTypeIsF128(RetTy, CallNode))
+ Fn = RetCC_F128Soft;
+ else
+ Fn = RetCC_Mips;
+
+ for (unsigned I = 0, E = RetVals.size(); I < E; ++I) {
+ MVT VT = RetVals[I].VT;
+ ISD::ArgFlagsTy Flags = RetVals[I].Flags;
+ MVT RegVT = this->getRegVT(VT, RetTy, CallNode, IsSoftFloat);
+
+ if (Fn(I, VT, RegVT, CCValAssign::Full, Flags, this->CCInfo)) {
+#ifndef NDEBUG
+ dbgs() << "Call result #" << I << " has unhandled type "
+ << EVT(VT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+void MipsTargetLowering::MipsCC::
+analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsSoftFloat,
+ const SDNode *CallNode, const Type *RetTy) const {
+ analyzeReturn(Ins, IsSoftFloat, CallNode, RetTy);
+}
+
+void MipsTargetLowering::MipsCC::
+analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsSoftFloat,
+ const Type *RetTy) const {
+ analyzeReturn(Outs, IsSoftFloat, 0, RetTy);
+}
+
void
MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
MVT LocVT,
@@ -4268,6 +3559,21 @@ void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal,
CCInfo.AllocateReg(IntArgRegs[I], ShadowRegs[I]);
}
+MVT MipsTargetLowering::MipsCC::getRegVT(MVT VT, const Type *OrigTy,
+ const SDNode *CallNode,
+ bool IsSoftFloat) const {
+ if (IsSoftFloat || IsO32)
+ return VT;
+
+ // Check if the original type was fp128.
+ if (originalTypeIsF128(OrigTy, CallNode)) {
+ assert(VT == MVT::i64);
+ return MVT::f64;
+ }
+
+ return VT;
+}
+
void MipsTargetLowering::
copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains,
SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags,
@@ -4300,7 +3606,7 @@ copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains,
for (unsigned I = 0; I < ByVal.NumRegs; ++I) {
unsigned ArgReg = CC.intArgRegs()[ByVal.FirstIdx + I];
- unsigned VReg = AddLiveIn(MF, ArgReg, RC);
+ unsigned VReg = addLiveIn(MF, ArgReg, RC);
unsigned Offset = I * CC.regSize();
SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN,
DAG.getConstant(Offset, PtrTy));
@@ -4442,7 +3748,7 @@ MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
// in the caller's stack frame, while for N32/64, it is allocated in the
// callee's stack frame.
for (unsigned I = Idx; I < NumRegs; ++I, VaArgOffset += RegSize) {
- unsigned Reg = AddLiveIn(MF, ArgRegs[I], RC);
+ unsigned Reg = addLiveIn(MF, ArgRegs[I], RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true);
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index f0f3782..71977d7 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -19,6 +19,7 @@
#include "MipsSubtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/Function.h"
#include "llvm/Target/TargetLowering.h"
#include <deque>
#include <string>
@@ -151,9 +152,9 @@ namespace llvm {
public:
explicit MipsTargetLowering(MipsTargetMachine &TM);
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ static const MipsTargetLowering *create(MipsTargetMachine &TM);
- virtual bool allowsUnalignedMemoryAccesses (EVT VT, bool *Fast) const;
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
virtual void LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
@@ -176,17 +177,34 @@ namespace llvm {
EVT getSetCCResultType(EVT VT) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- private:
- void SetMips16LibcallName(RTLIB::Libcall, const char *Name);
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ struct LTStr {
+ bool operator()(const char *S1, const char *S2) const {
+ return strcmp(S1, S2) < 0;
+ }
+ };
+
+ protected:
+ SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
- void setMips16HardFloatLibCalls();
+ SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const;
- unsigned int
- getMips16HelperFunctionStubNumber(ArgListTy &Args) const;
+ SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const;
- const char *getMips16HelperFunction
- (Type* RetTy, ArgListTy &Args, bool &needHelper) const;
+ SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+ unsigned HiFlag, unsigned LoFlag) const;
+
+ /// This function fills Ops, which is the list of operands that will later
+ /// be used when a function call node is created. It also generates
+ /// copyToReg nodes to set up argument registers.
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
/// ByValArgInfo - Byval argument information.
struct ByValArgInfo {
@@ -204,8 +222,20 @@ namespace llvm {
MipsCC(CallingConv::ID CallConv, bool IsO32, CCState &Info);
void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
- bool IsVarArg);
- void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins);
+ bool IsVarArg, bool IsSoftFloat,
+ const SDNode *CallNode,
+ std::vector<ArgListEntry> &FuncArgs);
+ void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ bool IsSoftFloat,
+ Function::const_arg_iterator FuncArg);
+
+ void analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ bool IsSoftFloat, const SDNode *CallNode,
+ const Type *RetTy) const;
+
+ void analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ bool IsSoftFloat, const Type *RetTy) const;
+
const CCState &getCCInfo() const { return CCInfo; }
/// hasByValArg - Returns true if function has byval arguments.
@@ -248,6 +278,17 @@ namespace llvm {
void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize,
unsigned Align);
+ /// Return the type of the register which is used to pass an argument or
+ /// return a value. This function returns f64 if the argument is an i64
+ /// value which has been generated as a result of softening an f128 value.
+ /// Otherwise, it just returns VT.
+ MVT getRegVT(MVT VT, const Type *OrigTy, const SDNode *CallNode,
+ bool IsSoftFloat) const;
+
+ template<typename Ty>
+ void analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
+ const SDNode *CallNode, const Type *RetTy) const;
+
CCState &CCInfo;
CallingConv::ID CallConv;
bool IsO32;
@@ -259,45 +300,49 @@ namespace llvm {
bool HasMips64, IsN64, IsO32;
+ private:
// Lower Operand helpers
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals,
+ const SDNode *CallNode, const Type *RetTy) const;
// Lower Operand specifics
- SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
- SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
- SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
- SDValue LowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
+ SDValue lowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
+ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+ SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
+ SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
bool IsSRA) const;
- SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const;
- /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// isEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization.
- bool IsEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const;
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const = 0;
/// copyByValArg - Copy argument registers which were used to pass a byval
/// argument to the stack. Create a stack frame object for the byval
@@ -351,10 +396,6 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
- virtual MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
-
// Inline asm support
ConstraintType getConstraintType(const std::string &Constraint) const;
@@ -393,40 +434,20 @@ namespace llvm {
virtual unsigned getJumpTableEncoding() const;
- MachineBasicBlock *EmitBPOSGE32(MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode, bool Nand = false) const;
- MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI,
+ MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode,
bool Nand = false) const;
- MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *emitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size) const;
- MachineBasicBlock *EmitAtomicCmpSwapPartword(MachineInstr *MI,
+ MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size) const;
- MachineBasicBlock *EmitSel16(unsigned Opc, MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitSeliT16(unsigned Opc1, unsigned Opc2,
- MachineInstr *MI,
- MachineBasicBlock *BB) const;
-
- MachineBasicBlock *EmitSelT16(unsigned Opc1, unsigned Opc2,
- MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
- MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitFEXT_T8I8I16_ins(
- unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitFEXT_CCRX16_ins(
- unsigned SltOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitFEXT_CCRXI16_ins(
- unsigned SltiOpc, unsigned SltiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB )const;
-
};
+
+ /// Create MipsTargetLowering objects.
+ const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM);
+ const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM);
}
#endif // MipsISELLOWERING_H
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 76644c1..ad92d41 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -93,81 +93,11 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const
-{
-
- MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
-
- // Skip all the debug instructions.
- while (I != REnd && I->isDebugValue())
- ++I;
-
- if (I == REnd || !isUnpredicatedTerminator(&*I)) {
- // If this block ends with no branches (it just falls through to its succ)
- // just return false, leaving TBB/FBB null.
- TBB = FBB = NULL;
- return false;
- }
-
- MachineInstr *LastInst = &*I;
- unsigned LastOpc = LastInst->getOpcode();
-
- // Not an analyzable branch (must be an indirect jump).
- if (!GetAnalyzableBrOpc(LastOpc))
- return true;
-
- // Get the second to last instruction in the block.
- unsigned SecondLastOpc = 0;
- MachineInstr *SecondLastInst = NULL;
-
- if (++I != REnd) {
- SecondLastInst = &*I;
- SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());
-
- // Not an analyzable branch (must be an indirect jump).
- if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
- return true;
- }
-
- // If there is only one terminator instruction, process it.
- if (!SecondLastOpc) {
- // Unconditional branch
- if (LastOpc == UncondBrOpc) {
- TBB = LastInst->getOperand(0).getMBB();
- return false;
- }
-
- // Conditional branch
- AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
- return false;
- }
+ bool AllowModify) const {
+ SmallVector<MachineInstr*, 2> BranchInstrs;
+ BranchType BT = AnalyzeBranch(MBB, TBB, FBB, Cond, AllowModify, BranchInstrs);
- // If we reached here, there are two branches.
- // If there are three terminators, we don't know what sort of block this is.
- if (++I != REnd && isUnpredicatedTerminator(&*I))
- return true;
-
- // If second to last instruction is an unconditional branch,
- // analyze it and remove the last instruction.
- if (SecondLastOpc == UncondBrOpc) {
- // Return if the last instruction cannot be removed.
- if (!AllowModify)
- return true;
-
- TBB = SecondLastInst->getOperand(0).getMBB();
- LastInst->eraseFromParent();
- return false;
- }
-
- // Conditional branch followed by an unconditional branch.
- // The last one must be unconditional.
- if (LastOpc != UncondBrOpc)
- return true;
-
- AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
- FBB = LastInst->getOperand(0).getMBB();
-
- return false;
+ return (BT == BT_None) || (BT == BT_Indirect);
}
void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
@@ -256,6 +186,90 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
return false;
}
+MipsInstrInfo::BranchType MipsInstrInfo::
+AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify,
+ SmallVectorImpl<MachineInstr*> &BranchInstrs) const {
+
+ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+
+ // Skip all the debug instructions.
+ while (I != REnd && I->isDebugValue())
+ ++I;
+
+ if (I == REnd || !isUnpredicatedTerminator(&*I)) {
+ // This block ends with no branches (it just falls through to its succ).
+ // Leave TBB/FBB null.
+ TBB = FBB = NULL;
+ return BT_NoBranch;
+ }
+
+ MachineInstr *LastInst = &*I;
+ unsigned LastOpc = LastInst->getOpcode();
+ BranchInstrs.push_back(LastInst);
+
+ // Not an analyzable branch (e.g., indirect jump).
+ if (!GetAnalyzableBrOpc(LastOpc))
+ return LastInst->isIndirectBranch() ? BT_Indirect : BT_None;
+
+ // Get the second to last instruction in the block.
+ unsigned SecondLastOpc = 0;
+ MachineInstr *SecondLastInst = NULL;
+
+ if (++I != REnd) {
+ SecondLastInst = &*I;
+ SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());
+
+ // Not an analyzable branch (must be an indirect jump).
+ if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
+ return BT_None;
+ }
+
+ // If there is only one terminator instruction, process it.
+ if (!SecondLastOpc) {
+ // Unconditional branch
+ if (LastOpc == UncondBrOpc) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return BT_Uncond;
+ }
+
+ // Conditional branch
+ AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
+ return BT_Cond;
+ }
+
+ // If we reached here, there are two branches.
+ // If there are three terminators, we don't know what sort of block this is.
+ if (++I != REnd && isUnpredicatedTerminator(&*I))
+ return BT_None;
+
+ BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst);
+
+ // If second to last instruction is an unconditional branch,
+ // analyze it and remove the last instruction.
+ if (SecondLastOpc == UncondBrOpc) {
+ // Return if the last instruction cannot be removed.
+ if (!AllowModify)
+ return BT_None;
+
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ LastInst->eraseFromParent();
+ BranchInstrs.pop_back();
+ return BT_Uncond;
+ }
+
+ // Conditional branch followed by an unconditional branch.
+ // The last one must be unconditional.
+ if (LastOpc != UncondBrOpc)
+ return BT_None;
+
+ AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
+ FBB = LastInst->getOperand(0).getMBB();
+
+ return BT_CondUncond;
+}
+
/// Return the number of bytes of code the specified instruction may be.
unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index aca2bc7..3cd9088 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -31,6 +31,15 @@ protected:
unsigned UncondBrOpc;
public:
+ enum BranchType {
+ BT_None, // Couldn't analyze branch.
+ BT_NoBranch, // No branches found.
+ BT_Uncond, // One unconditional branch.
+ BT_Cond, // One conditional branch.
+ BT_CondUncond, // A conditional branch followed by an unconditional branch.
+ BT_Indirect // One indirct branch.
+ };
+
explicit MipsInstrInfo(MipsTargetMachine &TM, unsigned UncondBrOpc);
static const MipsInstrInfo *create(MipsTargetMachine &TM);
@@ -51,6 +60,12 @@ public:
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ BranchType AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify,
+ SmallVectorImpl<MachineInstr*> &BranchInstrs) const;
+
virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
const MDNode *MDPtr,
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index de09c9e..25b5d24 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -299,6 +299,9 @@ def HI16 : SDNodeXForm<imm, [{
return getImm(N, (N->getZExtValue() >> 16) & 0xFFFF);
}]>;
+// Plus 1.
+def Plus1 : SDNodeXForm<imm, [{ return getImm(N, N->getSExtValue() + 1); }]>;
+
// Node immediate fits as 16-bit sign extended on target immediate.
// e.g. addi, andi
def immSExt8 : PatLeaf<(imm), [{ return isInt<8>(N->getSExtValue()); }]>;
@@ -331,6 +334,11 @@ def immLow16Zero : PatLeaf<(imm), [{
// shamt field must fit in 5 bits.
def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
+// True if (N + 1) fits in 16-bit field.
+def immSExt16Plus1 : PatLeaf<(imm), [{
+ return isInt<17>(N->getSExtValue()) && isInt<16>(N->getSExtValue() + 1);
+}]>;
+
// Mips Address Mode! SDNode frameindex could possibily be a match
// since load and store instructions from stack used it.
def addr :
@@ -977,7 +985,7 @@ def : InstAlias<"move $dst, $src",
(ADDu CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 1>,
Requires<[NotMips64]>;
def : InstAlias<"move $dst, $src",
- (OR CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 0>,
+ (OR CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 1>,
Requires<[NotMips64]>;
def : InstAlias<"bal $offset", (BGEZAL RA, brtarget:$offset), 1>;
def : InstAlias<"addu $rs, $rt, $imm",
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 13b2a6a..3c210e7 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -58,7 +58,8 @@ public:
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index f93dd86..6d76e8a 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -18,6 +18,10 @@ def sub_lo : SubRegIndex;
def sub_hi : SubRegIndex;
}
+class Unallocatable {
+ bit isAllocatable = 0;
+}
+
// We have banks of 32 registers each.
class MipsReg<bits<16> Enc, string n> : Register<n> {
let HWEncoding = Enc;
@@ -291,9 +295,9 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add
// Callee save
S0, S1)>;
-def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>;
+def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>, Unallocatable;
-def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>;
+def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable;
// 64bit fp:
// * FGR64 - 32 64-bit registers
@@ -319,18 +323,19 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
// Condition Register for floating point operations
-def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>;
+def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable;
// Hi/Lo Registers
-def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
-def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>;
+def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable;
+def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable;
// Hardware registers
-def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
-def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
+def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable;
+def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable;
// Accumulator Registers
-def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>;
+def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>,
+ Unallocatable;
def CPURegsAsmOperand : AsmOperandClass {
let Name = "CPURegsAsm";
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
new file mode 100644
index 0000000..e22c3c8
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -0,0 +1,460 @@
+//===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "MipsSEISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+
+bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
+ const MachineInstr& MI) {
+ unsigned DstReg = 0, ZeroReg = 0;
+
+ // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
+ if ((MI.getOpcode() == Mips::ADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO;
+ } else if ((MI.getOpcode() == Mips::DADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO_64;
+ }
+
+ if (!DstReg)
+ return false;
+
+ // Replace uses with ZeroReg.
+ for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
+ E = MRI->use_end(); U != E;) {
+ MachineOperand &MO = U.getOperand();
+ unsigned OpNo = U.getOperandNo();
+ MachineInstr *MI = MO.getParent();
+ ++U;
+
+ // Do not replace if it is a phi's operand or is tied to def operand.
+ if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
+ continue;
+
+ MO.setReg(ZeroReg);
+ }
+
+ return true;
+}
+
+void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->globalBaseRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ const TargetRegisterClass *RC;
+
+ if (Subtarget.isABI_N64())
+ RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
+ else
+ RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+
+ if (Subtarget.isABI_N64()) {
+ MF.getRegInfo().addLiveIn(Mips::T9_64);
+ MBB.addLiveIn(Mips::T9_64);
+
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // daddu $v1, $v0, $t9
+ // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
+ .addReg(Mips::T9_64);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ if (MF.getTarget().getRelocationModel() == Reloc::Static) {
+ // Set global register to __gnu_local_gp.
+ //
+ // lui $v0, %hi(__gnu_local_gp)
+ // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
+ return;
+ }
+
+ MF.getRegInfo().addLiveIn(Mips::T9);
+ MBB.addLiveIn(Mips::T9);
+
+ if (Subtarget.isABI_N32()) {
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // addu $v1, $v0, $t9
+ // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ assert(Subtarget.isABI_O32());
+
+ // For O32 ABI, the following instruction sequence is emitted to initialize
+ // the global base register:
+ //
+ // 0. lui $2, %hi(_gp_disp)
+ // 1. addiu $2, $2, %lo(_gp_disp)
+ // 2. addu $globalbasereg, $2, $t9
+ //
+ // We emit only the last instruction here.
+ //
+ // GNU linker requires that the first two instructions appear at the beginning
+ // of a function and no instructions be inserted before or between them.
+ // The two instructions are emitted during lowering to MC layer in order to
+ // avoid any reordering.
+ //
+ // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
+ // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
+ // reads it.
+ MF.getRegInfo().addLiveIn(Mips::V0);
+ MBB.addLiveIn(Mips::V0);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
+ .addReg(Mips::V0).addReg(Mips::T9);
+}
+
+void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+ initGlobalBaseReg(MF);
+
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
+ ++MFI)
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+ replaceUsesWithZeroReg(MRI, *I);
+}
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+ bool HasLo, bool HasHi) {
+ SDNode *Lo = 0, *Hi = 0;
+ SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+ N->getOperand(1));
+ SDValue InFlag = SDValue(Mul, 0);
+
+ if (HasLo) {
+ unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
+ Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+ InFlag = SDValue(Lo, 1);
+ }
+ if (HasHi) {
+ unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
+ Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+ }
+ return std::make_pair(Lo, Hi);
+}
+
+SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
+ SDValue CmpLHS, DebugLoc DL,
+ SDNode *Node) const {
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+ SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+
+ SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2);
+ SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT,
+ SDValue(Carry, 0), RHS);
+ return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+ SDValue(AddCarry, 0));
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ EVT ValTy = Addr.getValueType();
+
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+ }
+
+ // on PIC code Load GA
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0)))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+ Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+ SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
+ Base = Addr.getOperand(0);
+ Offset = Opnd0;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
+ return true;
+}
+
+bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ return selectAddrRegImm(Addr, Base, Offset) ||
+ selectAddrDefault(Addr, Base, Offset);
+}
+
+std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc DL = Node->getDebugLoc();
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ EVT NodeTy = Node->getValueType(0);
+ SDNode *Result;
+ unsigned MultOpc;
+
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE: {
+ SDValue InFlag = Node->getOperand(2);
+ Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node);
+ return std::make_pair(true, Result);
+ }
+
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2);
+ Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
+ return std::make_pair(true, Result);
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
+
+ std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+ true, true);
+
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+ return std::make_pair(true, (SDNode*)NULL);
+ }
+
+ /// Special Muls
+ case ISD::MUL: {
+ // Mips32 has a 32-bit three operand mul instruction.
+ if (Subtarget.hasMips32() && NodeTy == MVT::i32)
+ break;
+ MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT;
+ Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first;
+ return std::make_pair(true, Result);
+ }
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
+
+ Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+ return std::make_pair(true, Result);
+ }
+
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+ if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+ if (Subtarget.hasMips64()) {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO_64, MVT::i64);
+ Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero);
+ } else {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO, MVT::i32);
+ Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero,
+ Zero);
+ }
+
+ return std::make_pair(true, Result);
+ }
+ break;
+ }
+
+ case ISD::Constant: {
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
+ unsigned Size = CN->getValueSizeInBits(0);
+
+ if (Size == 32)
+ break;
+
+ MipsAnalyzeImmediate AnalyzeImm;
+ int64_t Imm = CN->getSExtValue();
+
+ const MipsAnalyzeImmediate::InstSeq &Seq =
+ AnalyzeImm.Analyze(Imm, Size, false);
+
+ MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+ DebugLoc DL = CN->getDebugLoc();
+ SDNode *RegOpnd;
+ SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+
+ // The first instruction can be a LUi which is different from other
+ // instructions (ADDiu, ORI and SLL) in that it does not have a register
+ // operand.
+ if (Inst->Opc == Mips::LUi64)
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
+ else
+ RegOpnd =
+ CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
+ ImmOpnd);
+
+ // The remaining instructions in the sequence are handled here.
+ for (++Inst; Inst != Seq.end(); ++Inst) {
+ ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ SDValue(RegOpnd, 0), ImmOpnd);
+ }
+
+ return std::make_pair(true, RegOpnd);
+ }
+
+ case MipsISD::ThreadPointer: {
+ EVT PtrVT = TLI.getPointerTy();
+ unsigned RdhwrOpc, SrcReg, DestReg;
+
+ if (PtrVT == MVT::i32) {
+ RdhwrOpc = Mips::RDHWR;
+ SrcReg = Mips::HWR29;
+ DestReg = Mips::V1;
+ } else {
+ RdhwrOpc = Mips::RDHWR64;
+ SrcReg = Mips::HWR29_64;
+ DestReg = Mips::V1_64;
+ }
+
+ SDNode *Rdhwr =
+ CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
+ Node->getValueType(0),
+ CurDAG->getRegister(SrcReg, PtrVT));
+ SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg,
+ SDValue(Rdhwr, 0));
+ SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT);
+ ReplaceUses(SDValue(Node, 0), ResNode);
+ return std::make_pair(true, ResNode.getNode());
+ }
+ }
+
+ return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) {
+ return new MipsSEDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
new file mode 100644
index 0000000..6137ab0
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -0,0 +1,57 @@
+//===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEISELDAGTODAG_H
+#define MIPSSEISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class MipsSEDAGToDAGISel : public MipsDAGToDAGISel {
+
+public:
+ explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+ bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
+
+ std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
+ EVT Ty, bool HasLo, bool HasHi);
+
+ SDNode *selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS,
+ DebugLoc DL, SDNode *Node) const;
+
+ virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+ virtual void processFunctionAfterISel(MachineFunction &MF);
+
+ // Insert instructions to initialize the global base register in the
+ // first MBB of the function.
+ void initGlobalBaseReg(MachineFunction &MF);
+};
+
+FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
new file mode 100644
index 0000000..287e2ed
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -0,0 +1,197 @@
+//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+#include "MipsSEISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
+ cl::desc("MIPS: Enable tail calls."), cl::init(false));
+
+MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
+ : MipsTargetLowering(TM) {
+ // Set up the register classes
+ addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+
+ if (HasMips64)
+ addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
+
+ if (Subtarget->hasDSP()) {
+ MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
+
+ for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
+ addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
+
+ // Expand all builtin opcodes.
+ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+ setOperationAction(Opc, VecTys[i], Expand);
+
+ setOperationAction(ISD::LOAD, VecTys[i], Legal);
+ setOperationAction(ISD::STORE, VecTys[i], Legal);
+ setOperationAction(ISD::BITCAST, VecTys[i], Legal);
+ }
+ }
+
+ if (!TM.Options.UseSoftFloat) {
+ addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+
+ // When dealing with single precision only, use libcalls
+ if (!Subtarget->isSingleFloat()) {
+ if (HasMips64)
+ addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
+ else
+ addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
+ }
+ }
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+
+ computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
+ return new MipsSETargetLowering(TM);
+}
+
+
+bool
+MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
+
+ switch (SVT) {
+ case MVT::i64:
+ case MVT::i32:
+ if (Fast)
+ *Fast = true;
+ return true;
+ default:
+ return false;
+ }
+}
+
+MachineBasicBlock *
+MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case Mips::BPOSGE32_PSEUDO:
+ return emitBPOSGE32(MI, BB);
+ }
+}
+
+bool MipsSETargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const {
+ if (!EnableMipsTailCalls)
+ return false;
+
+ // Return false if either the callee or caller has a byval argument.
+ if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
+ return false;
+
+ // Return true if the callee's argument area is no larger than the
+ // caller's.
+ return NextStackOffset <= FI.getIncomingArgSize();
+}
+
+void MipsSETargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
+ unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
+ RegsToPass.push_front(std::make_pair(T9Reg, Callee));
+ } else
+ Ops.push_back(Callee);
+
+ MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+ InternalLinkage, CLI, Callee, Chain);
+}
+
+MachineBasicBlock * MipsSETargetLowering::
+emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
+ // $bb:
+ // bposge32_pseudo $vr0
+ // =>
+ // $bb:
+ // bposge32 $tbb
+ // $fbb:
+ // li $vr2, 0
+ // b $sink
+ // $tbb:
+ // li $vr1, 1
+ // $sink:
+ // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
+
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
+ DebugLoc DL = MI->getDebugLoc();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, FBB);
+ F->insert(It, TBB);
+ F->insert(It, Sink);
+
+ // Transfer the remainder of BB and its successor edges to Sink.
+ Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ Sink->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add successors.
+ BB->addSuccessor(FBB);
+ BB->addSuccessor(TBB);
+ FBB->addSuccessor(Sink);
+ TBB->addSuccessor(Sink);
+
+ // Insert the real bposge32 instruction to $BB.
+ BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
+
+ // Fill $FBB.
+ unsigned VR2 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
+ .addReg(Mips::ZERO).addImm(0);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
+
+ // Fill $TBB.
+ unsigned VR1 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
+ .addReg(Mips::ZERO).addImm(1);
+
+ // Insert phi function to $Sink.
+ BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return Sink;
+}
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
new file mode 100644
index 0000000..04a28ce
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -0,0 +1,46 @@
+//===-- MipsSEISelLowering.h - MipsSE DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsSEISELLOWERING_H
+#define MipsSEISELLOWERING_H
+
+#include "MipsISelLowering.h"
+
+namespace llvm {
+ class MipsSETargetLowering : public MipsTargetLowering {
+ public:
+ explicit MipsSETargetLowering(MipsTargetMachine &TM);
+
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ private:
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const;
+
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+ MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ };
+}
+
+#endif // MipsSEISELLOWERING_H
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 75b4c98..e11e5d1 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -33,7 +33,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false),
HasBitCount(false), HasFPIdx(false),
InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
- IsAndroid(false), RM(_RM)
+ RM(_RM)
{
std::string CPUName = CPU;
if (CPUName.empty())
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 32baa3d..7a2e47c 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -95,9 +95,6 @@ protected:
// HasDSP, HasDSPR2 -- supports DSP ASE.
bool HasDSP, HasDSPR2;
- // IsAndroid -- target is android
- bool IsAndroid;
-
InstrItineraryData InstrItins;
// The instance to the register info section object
@@ -144,7 +141,6 @@ public:
bool inMicroMipsMode() const { return InMicroMipsMode; }
bool hasDSP() const { return HasDSP; }
bool hasDSPR2() const { return HasDSPR2; }
- bool isAndroid() const { return IsAndroid; }
bool isLinux() const { return IsLinux; }
bool useSmallSection() const { return UseSmallSection; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 1b91e8b..3336358 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -54,7 +54,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
"E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")),
InstrInfo(MipsInstrInfo::create(*this)),
FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
- TLInfo(*this), TSInfo(*this), JITInfo() {
+ TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() {
}
void MipsebTargetMachine::anchor() { }
@@ -116,6 +116,8 @@ bool MipsPassConfig::addPreEmitPass() {
// NOTE: long branch has not been implemented for mips16.
if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding())
addPass(createMipsLongBranchPass(TM));
+ if (TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+ addPass(createMipsConstantIslandPass(TM));
return true;
}
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index c4928c2..7e5f192 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -34,7 +34,7 @@ class MipsTargetMachine : public LLVMTargetMachine {
const DataLayout DL; // Calculates type size & alignment
OwningPtr<const MipsInstrInfo> InstrInfo;
OwningPtr<const MipsFrameLowering> FrameLowering;
- MipsTargetLowering TLInfo;
+ OwningPtr<const MipsTargetLowering> TLInfo;
MipsSelectionDAGInfo TSInfo;
MipsJITInfo JITInfo;
@@ -63,7 +63,7 @@ public:
}
virtual const MipsTargetLowering *getTargetLowering() const {
- return &TLInfo;
+ return TLInfo.get();
}
virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 5ee747a..e9a9fbf 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -101,7 +101,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
// Operations not directly supported by NVPTX.
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i8, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i64, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -716,16 +722,15 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
unsigned sz = Ins[i].VT.getSizeInBits();
if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
- std::vector<EVT> LoadRetVTs;
- LoadRetVTs.push_back(Ins[i].VT);
- LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue);
- std::vector<SDValue> LoadRetOps;
- LoadRetOps.push_back(Chain);
- LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
- LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32));
- LoadRetOps.push_back(InFlag);
+ EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue };
+ SDValue LoadRetOps[] = {
+ Chain,
+ DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(resoffset, MVT::i32),
+ InFlag
+ };
SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
- &LoadRetOps[0], LoadRetOps.size());
+ LoadRetOps, array_lengthof(LoadRetOps));
Chain = retval.getValue(1);
InFlag = retval.getValue(2);
InVals.push_back(retval);
@@ -750,16 +755,15 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
std::vector<SDValue> tempRetVals;
for (unsigned j=0; j<numelems; ++j) {
- std::vector<EVT> MoveRetVTs;
- MoveRetVTs.push_back(elemtype);
- MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue);
- std::vector<SDValue> MoveRetOps;
- MoveRetOps.push_back(Chain);
- MoveRetOps.push_back(DAG.getConstant(0, MVT::i32));
- MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32));
- MoveRetOps.push_back(InFlag);
+ EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue };
+ SDValue MoveRetOps[] = {
+ Chain,
+ DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(paramNum, MVT::i32),
+ InFlag
+ };
SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
- &MoveRetOps[0], MoveRetOps.size());
+ MoveRetOps, array_lengthof(MoveRetOps));
Chain = retval.getValue(1);
InFlag = retval.getValue(2);
tempRetVals.push_back(retval);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 95e7b55..14afc14 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -136,7 +136,7 @@ public:
NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
- virtual MVT getShiftAmountTy(EVT LHSTy) const {
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const {
return MVT::i32;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 7917f77..709daa4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -12,6 +12,8 @@
#include "llvm/MC/MCFixup.h"
+#undef PPC
+
namespace llvm {
namespace PPC {
enum Fixups {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 4a42092..38a7420 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -47,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
uint8_t OSABI);
} // End llvm namespace
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
// Defines symbolic names for PowerPC registers. This defines a mapping from
// register name to register number.
//
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 972e138..b0680fb 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -17,6 +17,10 @@
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
namespace llvm {
namespace PPC {
/// Predicate - These are "(BI << 5) | BO" for various predicates.
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index b98cc48..ecece8c 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -189,12 +189,23 @@ INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
/// isCompareEquals - Returns true if the instruction is a compare equals
/// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) {
- if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) {
+static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp,
+ bool &Int64Cmp) {
+ if (MI->getOpcode() == PPC::CMPWI) {
SignedCmp = true;
+ Int64Cmp = false;
return true;
- } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) {
+ } else if (MI->getOpcode() == PPC::CMPDI) {
+ SignedCmp = true;
+ Int64Cmp = true;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLWI) {
+ SignedCmp = false;
+ Int64Cmp = false;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLDI) {
SignedCmp = false;
+ Int64Cmp = true;
return true;
}
@@ -353,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
RI != RE; ++RI) {
IV_Opnd = &RI.getOperand();
- bool SignedCmp;
+ bool SignedCmp, Int64Cmp;
MachineInstr *MI = IV_Opnd->getParent();
- if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
+ if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
MI->getOperand(0).getReg() == PredReg) {
OldInsts.push_back(MI);
@@ -380,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
assert(InitialValue->isReg() && "Expecting register for init value");
unsigned InitialValueReg = InitialValue->getReg();
- const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
+ MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
// Here we need to look for an immediate load (an li or lis/ori pair).
if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
DefInstr->getOpcode() == PPC::ORI)) {
int64_t start = (short) DefInstr->getOperand(2).getImm();
- const MachineInstr *DefInstr2 =
- MRI->getVRegDef(DefInstr->getOperand(0).getReg());
+ MachineInstr *DefInstr2 =
+ MRI->getVRegDef(DefInstr->getOperand(1).getReg());
if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
DefInstr2->getOpcode() == PPC::LIS)) {
DEBUG(dbgs() << " initial constant: " << *DefInstr);
@@ -399,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
if ((count % iv_value) != 0) {
return 0;
}
- return new CountValue(count/iv_value);
+
+ OldInsts.push_back(DefInstr);
+ OldInsts.push_back(DefInstr2);
+
+ // count/iv_value, the trip count, should be positive here. If it
+ // is negative, that indicates that the counter will wrap.
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
}
} else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
DefInstr->getOpcode() == PPC::LI)) {
DEBUG(dbgs() << " initial constant: " << *DefInstr);
- int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm()));
+ int64_t count = ImmVal -
+ int64_t(short(DefInstr->getOperand(1).getImm()));
if ((count % iv_value) != 0) {
return 0;
}
- return new CountValue(count/iv_value);
+
+ OldInsts.push_back(DefInstr);
+
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
} else if (iv_value == 1 || iv_value == -1) {
// We can't determine a constant starting value.
if (ImmVal == 0) {
@@ -417,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
}
// FIXME: handle non-zero end value.
}
- // FIXME: handle non-unit increments (we might not want to introduce division
- // but we can handle some 2^n cases with shifts).
+ // FIXME: handle non-unit increments (we might not want to introduce
+ // division but we can handle some 2^n cases with shifts).
}
}
@@ -489,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI,
if (MO.isReg() && MO.isDef()) {
unsigned Reg = MO.getReg();
if (!MRI->use_nodbg_empty(Reg)) {
- // This instruction has users, but if the only user is the phi node for the
- // parent block, and the only use of that phi node is this instruction, then
- // this instruction is dead: both it (and the phi node) can be removed.
+ // This instruction has users, but if the only user is the phi node for
+ // the parent block, and the only use of that phi node is this
+ // instruction, then this instruction is dead: both it (and the phi
+ // node) can be removed.
MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
if (llvm::next(I) == MRI->use_end() &&
I.getOperand().getParent()->isPHI()) {
@@ -594,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
DEBUG(dbgs() << "failed to get trip count!\n");
return false;
}
+
+ if (TripCount->isImm()) {
+ DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");
+
+ // FIXME: We currently can't form 64-bit constants
+ // (including 32-bit unsigned constants)
+ if (!isInt<32>(TripCount->getImm()))
+ return false;
+ }
+
// Does the loop contain any invalid instructions?
if (containsInvalidInstruction(L)) {
return false;
@@ -664,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
// Put the trip count in a register for transfer into the count register.
int64_t CountImm = TripCount->getImm();
- assert(!TripCount->isNeg() && "Constant trip count must be positive");
+ if (TripCount->isNeg())
+ CountImm = -CountImm;
CountReg = MF->getRegInfo().createVirtualRegister(RC);
- if (CountImm > 0xFFFF) {
+ if (abs64(CountImm) > 0x7FFF) {
BuildMI(*Preheader, InsertPos, dl,
TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
- CountReg).addImm(CountImm >> 16);
+ CountReg).addImm((CountImm >> 16) & 0xFFFF);
unsigned CountReg1 = CountReg;
CountReg = MF->getRegInfo().createVirtualRegister(RC);
BuildMI(*Preheader, InsertPos, dl,
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 0a396e6..353560d 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -188,13 +188,26 @@ static bool spillsCR(const MachineFunction &MF) {
return FuncInfo->isCRSpilled();
}
+static bool hasSpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasSpills();
+}
+
+static bool hasNonRISpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasNonRISpills();
+}
+
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
-void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF,
+ bool UseEstimate) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
// Get the number of bytes to allocate from the FrameInfo
- unsigned FrameSize = MFI->getStackSize();
+ unsigned FrameSize =
+ UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
// Get the alignments provided by the target, and the maximum alignment
// (if any) of the fixed frame objects.
@@ -223,8 +236,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
&& spillsCR(MF)) &&
(!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
// No need for frame
- MFI->setStackSize(0);
- return;
+ if (UpdateMF)
+ MFI->setStackSize(0);
+ return 0;
}
// Get the maximum call frame size of all the calls.
@@ -241,7 +255,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
// Update maximum call frame size.
- MFI->setMaxCallFrameSize(maxCallFrameSize);
+ if (UpdateMF)
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
// Include call frame size in total.
FrameSize += maxCallFrameSize;
@@ -250,7 +265,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
FrameSize = (FrameSize + AlignMask) & ~AlignMask;
// Update frame info.
- MFI->setStackSize(FrameSize);
+ if (UpdateMF)
+ MFI->setStackSize(FrameSize);
+
+ return FrameSize;
}
// hasFP - Return true if the specified function actually has a dedicated frame
@@ -311,11 +329,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MBBI = MBB.begin();
// Work out frame sizes.
- // FIXME: determineFrameLayout() may change the frame size. This should be
- // moved upper, to some hook.
- determineFrameLayout(MF);
- unsigned FrameSize = MFI->getStackSize();
-
+ unsigned FrameSize = determineFrameLayout(MF);
int NegFrameSize = -FrameSize;
// Get processor type.
@@ -780,7 +794,7 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+ RegScavenger *) const {
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
// Save and clear the LR state.
@@ -822,30 +836,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
FI->setCRSpillFrameIndex(FrameIdx);
}
-
- // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
- // a large stack, which will require scavenging a register to materialize a
- // large offset.
- // FIXME: this doesn't actually check stack size, so is a bit pessimistic
- // FIXME: doesn't detect whether or not we need to spill vXX, which requires
- // r0 for now.
-
- if (RegInfo->requiresRegisterScavenging(MF))
- if (needsFP(MF) || spillsCR(MF)) {
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
- }
}
-void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const {
// Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI())
+ if (!Subtarget.isSVR4ABI()) {
+ addScavengingSpillSlot(MF, RS);
return;
+ }
// Get callee saved register information.
MachineFrameInfo *FFI = MF.getFrameInfo();
@@ -853,6 +852,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// Early exit if no callee saved registers are modified!
if (CSI.empty() && !needsFP(MF)) {
+ addScavengingSpillSlot(MF, RS);
return;
}
@@ -1031,6 +1031,37 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
}
+
+ addScavengingSpillSlot(MF, RS);
+}
+
+void
+PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+
+ // We need to have a scavenger spill slot for spills if the frame size is
+ // large. In case there is no free register for large-offset addressing,
+ // this slot is used for the necessary emergency spill. Also, we need the
+ // slot for dynamic stack allocations.
+
+ // The scavenger might be invoked if the frame offset does not fit into
+ // the 16-bit immediate. We don't know the complete frame size here
+ // because we've not yet computed callee-saved register spills or the
+ // needed alignment padding.
+ unsigned StackSize = determineFrameLayout(MF, false, true);
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (MFI->hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
+ (hasSpills(MF) && !isInt<16>(StackSize))) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
}
bool
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index d09e47f..53ee326 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -32,7 +32,9 @@ public:
Subtarget(sti) {
}
- void determineFrameLayout(MachineFunction &MF) const;
+ unsigned determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF = true,
+ bool UseEstimate = false) const;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
@@ -44,7 +46,9 @@ public:
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
@@ -144,6 +148,9 @@ public:
return 0;
}
+ // Note that the offsets here overlap, but this is fixed up in
+ // processFunctionBeforeFrameFinalized.
+
static const SpillSlot Offsets[] = {
// Floating-point register save area offsets.
{PPC::F31, -8},
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index cf1f459..741e25e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
+static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
+cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
return new TargetLoweringObjectFileMachO();
@@ -1180,13 +1183,15 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue Ptr;
EVT VT;
+ unsigned Alignment;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
-
+ Alignment = LD->getAlignment();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
+ Alignment = ST->getAlignment();
} else
return false;
@@ -1205,6 +1210,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
return false;
} else {
+ // LDU/STU need an address with at least 4-byte alignment.
+ if (Alignment < 4)
+ return false;
+
// reg + imm * 4.
if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
return false;
@@ -4786,12 +4795,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
EVT VT = Op.getValueType();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- std::vector<EVT> NodeTys;
SDValue MFFSreg, InFlag;
// Save FP Control Word to register
- NodeTys.push_back(MVT::f64); // return register
- NodeTys.push_back(MVT::Glue); // unused in this context
+ EVT NodeTys[] = {
+ MVT::f64, // return register
+ MVT::Glue // unused in this context
+ };
SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
// Save FP register to stack slot
@@ -5408,9 +5418,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(3), // RHS
DAG.getConstant(CompareOpc, MVT::i32)
};
- std::vector<EVT> VTs;
- VTs.push_back(Op.getOperand(2).getValueType());
- VTs.push_back(MVT::Glue);
+ EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
// Now that we have the comparison, emit a copy from the CR to a GPR.
@@ -6466,14 +6474,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
// Create the PPCISD altivec 'dot' comparison node.
- std::vector<EVT> VTs;
SDValue Ops[] = {
LHS.getOperand(2), // LHS of compare
LHS.getOperand(3), // RHS of compare
DAG.getConstant(CompareOpc, MVT::i32)
};
- VTs.push_back(LHS.getOperand(2).getValueType());
- VTs.push_back(MVT::Glue);
+ EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
// Unpack the result based on how the target uses it.
@@ -6854,6 +6860,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
}
}
+bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ bool *Fast) const {
+ if (DisablePPCUnaligned)
+ return false;
+
+ // PowerPC supports unaligned memory access for simple non-vector types.
+ // Although accessing unaligned addresses is not as efficient as accessing
+ // aligned addresses, it is generally more efficient than manual expansion,
+ // and generally only traps for software emulation when crossing page
+ // boundaries.
+
+ if (!VT.isSimple())
+ return false;
+
+ if (VT.getSimpleVT().isVector())
+ return false;
+
+ if (VT == MVT::ppcf128)
+ return false;
+
+ if (Fast)
+ *Fast = true;
+
+ return true;
+}
+
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index f5d418c..8d44d9f 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -329,7 +329,7 @@ namespace llvm {
/// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
/// getSetCCResultType - Return the ISD::SETCC ValueType
virtual EVT getSetCCResultType(EVT VT) const;
@@ -449,6 +449,10 @@ namespace llvm {
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const;
+ /// Is unaligned memory access allowed for the given type, and is it fast
+ /// relative to software emulation.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
+
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 0120130..bca1bd5 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -555,7 +555,8 @@ def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
PPC970_DGroup_Cracked;
def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
"lwa $rD, $src", LdStLWA,
- [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+ [(set G8RC:$rD,
+ (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
"lhax $rD, $src", LdStLHA,
@@ -648,7 +649,7 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
"ld $rD, $src", LdStLD,
- [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+ [(set G8RC:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src),
"ld $rD, $src", LdStLD,
[]>, isPPC64;
@@ -682,6 +683,10 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
"ldx $rD, $src", LdStLD,
[(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
+let isCodeGenOnly = 1 in
+def LDXu : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
+ "ldx $rD, $src", LdStLD,
+ [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
let mayLoad = 1 in
def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
@@ -798,7 +803,7 @@ def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
// Normal 8-byte stores.
def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
"std $rS, $dst", LdStSTD,
- [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+ [(aligned4store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
"stdx $rS, $dst", LdStSTD,
[(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
@@ -833,8 +838,9 @@ def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
s16immX4:$ptroff, ptr_rc:$ptrreg),
"stdu $rS, $ptroff($ptrreg)", LdStSTDU,
- [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
+ [(set ptr_rc:$ea_res,
+ (aligned4pre_store G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
isPPC64;
@@ -979,3 +985,13 @@ def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
(ADDIS8 G8RC:$in, tjumptable:$g)>;
def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS8 G8RC:$in, tblockaddress:$g)>;
+
+// Patterns to match r+r indexed loads and stores for
+// addresses without at least 4-byte alignment.
+def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+ (LWAX xoaddr:$src)>;
+def : Pat<(i64 (unaligned4load xoaddr:$src)),
+ (LDX xoaddr:$src)>;
+def : Pat<(unaligned4store G8RC:$rS, xoaddr:$dst),
+ (STDX G8RC:$rS, xoaddr:$dst)>;
+
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 0cf28ae..0ed7ff2 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -182,6 +182,9 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
//===----------------------------------------------------------------------===//
// Instruction Definitions.
+def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">;
+let Predicates = [HasAltivec] in {
+
def DSS : DSS_Form<822, (outs),
(ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
"dss $STRM", LdStLoad /*FIXME*/, []>;
@@ -733,3 +736,6 @@ def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))),
(VRFIZ VRRC:$vA)>;
def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))),
(VRFIN VRRC:$vA)>;
+
+} // end HasAltivec
+
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index a0517a8..7fe7880 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -33,11 +33,6 @@
#define GET_INSTRINFO_CTOR
#include "PPCGenInstrInfo.inc"
-namespace llvm {
-extern cl::opt<bool> DisablePPC32RS;
-extern cl::opt<bool> DisablePPC64RS;
-}
-
using namespace llvm;
static cl::
@@ -444,7 +439,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill,
int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI) const{
DebugLoc DL;
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
if (SrcReg != PPC::LR) {
@@ -489,47 +485,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
- .addReg(SrcReg,
- getKillRegState(isKill)),
- FrameIdx));
- return true;
- } else {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
-
- bool is64Bit = TM.getSubtargetImpl()->isPPC64();
- // We need to store the CR in the low 4-bits of the saved value. First,
- // issue a MFCR to save all of the CRBits.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- (is64Bit ? PPC::X2 : PPC::R2) :
- (is64Bit ? PPC::X0 : PPC::R0);
- NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud :
- PPC::MFCRpseud), ScratchReg)
- .addReg(SrcReg, getKillRegState(isKill)));
-
- // If the saved register wasn't CR0, shift the bits left so that they are
- // in CR0's slot.
- if (SrcReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
- // rlwinm scratch, scratch, ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 :
- PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(ShiftBits)
- .addImm(0).addImm(31));
- }
-
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ?
- PPC::STW8 : PPC::STW))
- .addReg(ScratchReg,
- getKillRegState(isKill)),
- FrameIdx));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
// FIXME: We use CRi here because there is no mtcrf on a bit. Since the
// backend currently only uses CR1EQ as an individual bit, this should
@@ -562,23 +522,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
Reg = PPC::CR7;
return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
- &PPC::CRRCRegClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs, NonRI);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- // We don't have indexed addressing for vector loads. Emit:
- // R0 = ADDI FI#
- // STVX VAL, 0, R0
- //
- // FIXME: We use R0 here, because it isn't available for RA.
- bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
- unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
- FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
- .addReg(SrcReg, getKillRegState(isKill))
- .addReg(GPR0)
- .addReg(GPR0));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -595,10 +546,15 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
- if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false;
+ if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, NonRI))
FuncInfo->setSpillsCR();
- }
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
@@ -616,7 +572,8 @@ bool
PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs)const{
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI) const{
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
if (DestReg != PPC::LR) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
@@ -642,37 +599,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
- get(PPC::RESTORE_CR), DestReg)
- , FrameIdx));
- return true;
- } else {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- PPC::R2 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- ScratchReg), FrameIdx));
-
- // If the reloaded register isn't CR0, shift the bits right so that they are
- // in the right CR's slot.
- if (DestReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
- // rlwinm r11, r11, 32-ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
- .addImm(31));
- }
-
- NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ?
- PPC::MTCRF8 : PPC::MTCRF), DestReg)
- .addReg(ScratchReg));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CR), DestReg),
+ FrameIdx));
+ return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
unsigned Reg = 0;
@@ -702,21 +632,12 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
Reg = PPC::CR7;
return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
- &PPC::CRRCRegClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs, NonRI);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- // We don't have indexed addressing for vector loads. Emit:
- // R0 = ADDI FI#
- // Dest = LVX 0, R0
- //
- // FIXME: We use R0 here, because it isn't available for RA.
- bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
- unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
- FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0)
- .addReg(GPR0));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
+ FrameIdx));
+ NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -734,10 +655,17 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
- if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) {
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false;
+ if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, NonRI))
FuncInfo->setSpillsCR();
- }
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
+
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 374213e..5d4ae91 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI) const;
bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI) const;
public:
explicit PPCInstrInfo(PPCTargetMachine &TM);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 460e943..3f181aa 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -278,6 +278,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{
return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
}], HI16>;
+// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// restricted memrix (offset/4) constants are alignment sensitive. If these
+// offsets are hidden behind TOC entries than the values of the lower-order
+// bits cannot be checked directly. As a result, we need to also incorporate
+// an alignment check into the relevant patterns.
+
+def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4pre_store : PatFrag<
+ (ops node:$val, node:$base, node:$offset),
+ (pre_store node:$val, node:$base, node:$offset), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 045b375..b1636a2 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -37,6 +37,13 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// PEI.
bool MustSaveLR;
+ /// Does this function have any stack spills.
+ bool HasSpills;
+
+ /// Does this function spill using instructions with only r+r (not r+i)
+ /// forms.
+ bool HasNonRISpills;
+
/// SpillsCR - Indicates whether CR is spilled in the current function.
bool SpillsCR;
@@ -78,6 +85,8 @@ public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
ReturnAddrSaveIndex(0),
+ HasSpills(false),
+ HasNonRISpills(false),
SpillsCR(false),
LRStoreRequired(false),
MinReservedArea(0),
@@ -109,6 +118,12 @@ public:
void setMustSaveLR(bool U) { MustSaveLR = U; }
bool mustSaveLR() const { return MustSaveLR; }
+ void setHasSpills() { HasSpills = true; }
+ bool hasSpills() const { return HasSpills; }
+
+ void setHasNonRISpills() { HasNonRISpills = true; }
+ bool hasNonRISpills() const { return HasNonRISpills; }
+
void setSpillsCR() { SpillsCR = true; }
bool isCRSpilled() const { return SpillsCR; }
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index df245cc..e2c7221 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -46,26 +46,8 @@
#define GET_REGINFO_TARGET_DESC
#include "PPCGenRegisterInfo.inc"
-namespace llvm {
-cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger",
- cl::init(false),
- cl::desc("Disable PPC32 register scavenger"),
- cl::Hidden);
-cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger",
- cl::init(false),
- cl::desc("Disable PPC64 register scavenger"),
- cl::Hidden);
-}
-
using namespace llvm;
-// FIXME (64-bit): Should be inlined.
-bool
-PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
- return ((!DisablePPC32RS && !Subtarget.isPPC64()) ||
- (!DisablePPC64RS && Subtarget.isPPC64()));
-}
-
PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
const TargetInstrInfo &tii)
: PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
@@ -89,12 +71,6 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
}
-bool
-PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- return requiresRegisterScavenging(MF);
-}
-
-
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
@@ -139,12 +115,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R2); // System-reserved register
Reserved.set(PPC::R13); // Small Data Area pointer register
}
- // Reserve R2 on Darwin to hack around the problem of save/restore of CR
- // when the stack frame is too big to address directly; we need two regs.
- // This is a hack.
- if (Subtarget.isDarwinABI()) {
- Reserved.set(PPC::R2);
- }
// On PPC64, r13 is the thread pointer. Never allocate this register.
// Note that this is over conservative, as it also prevents allocation of R31
@@ -162,12 +132,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (Subtarget.isSVR4ABI()) {
Reserved.set(PPC::X2);
}
- // Reserve X2 on Darwin to hack around the problem of save/restore of CR
- // when the stack frame is too big to address directly; we need two regs.
- // This is a hack.
- if (Subtarget.isDarwinABI()) {
- Reserved.set(PPC::X2);
- }
}
if (PPCFI->needsFP(MF))
@@ -199,38 +163,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-bool
-PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
- switch (RC->getID()) {
- case PPC::G8RCRegClassID:
- case PPC::GPRCRegClassID:
- case PPC::F8RCRegClassID:
- case PPC::F4RCRegClassID:
- case PPC::VRRCRegClassID:
- return true;
- default:
- return false;
- }
-}
-
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
-/// register first and then a spilled callee-saved register if that fails.
-static
-unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
- const TargetRegisterClass *RC, int SPAdj) {
- assert(RS && "Register scavenging must be on");
- unsigned Reg = RS->FindUnusedReg(RC);
- // FIXME: move ARM callee-saved reg scan to target independent code, then
- // search for already spilled CS register here.
- if (Reg == 0)
- Reg = RS->scavengeRegister(RC, II, SPAdj);
- return Reg;
-}
-
/// lowerDynamicAlloc - Generate the code for allocating an object in the
/// current frame. The sequence of code with be in the general form
///
@@ -271,28 +207,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Fortunately, a frame greater than 32K is rare.
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *RC = LP64 ? G8RC : GPRC;
-
- // FIXME (64-bit): Use "findScratchRegister"
- unsigned Reg;
- if (requiresRegisterScavenging(MF))
- Reg = findScratchRegister(II, RS, RC, SPAdj);
- else
- Reg = PPC::R0;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
.addReg(PPC::R31)
.addImm(FrameSize);
} else if (LP64) {
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
- .addImm(0)
- .addReg(PPC::X1);
- else
- BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0)
- .addImm(0)
- .addReg(PPC::X1);
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+ .addImm(0)
+ .addReg(PPC::X1);
} else {
BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
.addImm(0)
@@ -302,17 +226,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Grow the stack and update the stack pointer link, then determine the
// address of new allocated space.
if (LP64) {
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(Reg, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(MI.getOperand(1).getReg());
- else
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(PPC::X0, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(MI.getOperand(1).getReg());
-
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
.addReg(PPC::X1)
@@ -369,8 +286,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
(void) RS;
bool LP64 = Subtarget.isPPC64();
- unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
- (LP64 ? PPC::X0 : PPC::R0);
+ unsigned Reg = LP64 ? PPC::X0 : PPC::R0;
unsigned SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
@@ -412,8 +328,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
(void) RS;
bool LP64 = Subtarget.isPPC64();
- unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
- (LP64 ? PPC::X0 : PPC::R0);
+ unsigned Reg = LP64 ? PPC::X0 : PPC::R0;
unsigned DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
"RESTORE_CR does not define its destination");
@@ -499,14 +414,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Special case for pseudo-ops SPILL_CR and RESTORE_CR.
- if (requiresRegisterScavenging(MF)) {
- if (OpC == PPC::SPILL_CR) {
- lowerCRSpilling(II, FrameIndex, SPAdj, RS);
- return;
- } else if (OpC == PPC::RESTORE_CR) {
- lowerCRRestore(II, FrameIndex, SPAdj, RS);
- return;
- }
+ if (OpC == PPC::SPILL_CR) {
+ lowerCRSpilling(II, FrameIndex, SPAdj, RS);
+ return;
+ } else if (OpC == PPC::RESTORE_CR) {
+ lowerCRRestore(II, FrameIndex, SPAdj, RS);
+ return;
}
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
@@ -529,7 +442,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
isIXAddr = true;
break;
}
-
+
+ bool noImmForm = false;
+ switch (OpC) {
+ case PPC::LVEBX:
+ case PPC::LVEHX:
+ case PPC::LVEWX:
+ case PPC::LVX:
+ case PPC::LVXL:
+ case PPC::LVSL:
+ case PPC::LVSR:
+ case PPC::STVEBX:
+ case PPC::STVEHX:
+ case PPC::STVEWX:
+ case PPC::STVX:
+ case PPC::STVXL:
+ noImmForm = true;
+ break;
+ }
+
// Now add the frame object offset to the offset from r1.
int Offset = MFI->getObjectOffset(FrameIndex);
if (!isIXAddr)
@@ -553,7 +484,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// only "std" to a stack slot that is at least 4-byte aligned, but it can
// happen in invalid code.
if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
- (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
+ (!noImmForm &&
+ isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
if (isIXAddr)
Offset >>= 2; // The actual encoded value has the low two bits zero.
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@@ -563,13 +495,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
- unsigned SReg;
- if (requiresRegisterScavenging(MF)) {
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj);
- } else
- SReg = is64Bit ? PPC::X0 : PPC::R0;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned SReg = MF.getRegInfo().createVirtualRegister(is64Bit ? G8RC : GPRC);
// Insert a set of rA with the full offset value before the ld, st, or add
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg)
@@ -584,7 +512,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
unsigned OperandBase;
- if (OpC != TargetOpcode::INLINEASM) {
+ if (noImmForm)
+ OperandBase = 1;
+ else if (OpC != TargetOpcode::INLINEASM) {
assert(ImmToIdxMap.count(OpC) &&
"No indexed form of load or store available!");
unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 9840666..5f89f63 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -47,13 +47,18 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const;
- virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
-
- /// requiresRegisterScavenging - We require a register scavenger.
- /// FIXME (64-bit): Should be inlined.
- bool requiresRegisterScavenging(const MachineFunction &MF) const;
-
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+ /// We require the register scavenger.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+ }
void lowerDynamicAlloc(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS) const;
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index ba87918..e099a9f 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -23,11 +23,9 @@ class AMDGPUTargetMachine;
// R600 Passes
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
-FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
// SI Passes
FunctionPass *createSIAnnotateControlFlowPass();
-FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index 40f4741..1a26c77 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -38,3 +38,4 @@ include "AMDGPUInstrInfo.td"
include "AMDGPUIntrinsics.td"
include "AMDGPURegisterInfo.td"
include "AMDGPUInstructions.td"
+include "AMDGPUCallingConv.td"
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index c30dbe4..f600144 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -141,5 +141,5 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
- OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
+ OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
}
diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td
new file mode 100644
index 0000000..45ae37e
--- /dev/null
+++ b/lib/Target/R600/AMDGPUCallingConv.td
@@ -0,0 +1,42 @@
+//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the AMD Radeon GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+// Inversion of CCIfInReg
+class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
+
+// Calling convention for SI
+def CC_SI : CallingConv<[
+
+ CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
+ SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
+ ]>>>,
+
+ CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
+ [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+ [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR12, SGPR15 ]
+ >>>,
+
+ CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+ VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+ VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
+ ]>>>
+
+]>;
+
+def CC_AMDGPU : CallingConv<[
+ CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().device()"#
+ "->getGeneration() == AMDGPUDeviceInfo::HD7XXX", CCDelegateTo<CC_SI>>
+]>;
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 0a33264..5995b6f 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -14,7 +14,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUISelLowering.h"
+#include "AMDGPURegisterInfo.h"
#include "AMDILIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -22,6 +25,8 @@
using namespace llvm;
+#include "AMDGPUGenCallingConv.inc"
+
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
TargetLowering(TM, new TargetLoweringObjectFileELF()) {
@@ -64,17 +69,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
// TargetLowering Callbacks
//===---------------------------------------------------------------------===//
-SDValue AMDGPUTargetLowering::LowerFormalArguments(
- SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
- InVals.push_back(SDValue());
- }
- return Chain;
+void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) const {
+
+ State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
}
SDValue AMDGPUTargetLowering::LowerReturn(
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 9e7d997..f31b646 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -39,15 +39,12 @@ protected:
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
+ void AnalyzeFormalArguments(CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) const;
+
public:
AMDGPUTargetLowering(TargetMachine &TM);
- virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
index 15840b3..ed6c8ec 100644
--- a/lib/Target/R600/AMDGPUIndirectAddressing.cpp
+++ b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
@@ -289,7 +289,6 @@ bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
// We only need to use REG_SEQUENCE for explicit defs, since the
// register coalescer won't do anything with the implicit defs.
- MachineInstr *DefInstr = MRI.getVRegDef(Reg);
if (!regHasExplicitDef(MRI, Reg)) {
continue;
}
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 960f108..e740348 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -132,13 +132,6 @@ class FNEG <RegisterClass rc> : AMDGPUShaderInst <
[(set rc:$dst, (fneg rc:$src0))]
>;
-def SHADER_TYPE : AMDGPUShaderInst <
- (outs),
- (ins i32imm:$type),
- "SHADER_TYPE $type",
- [(int_AMDGPU_shader_type imm:$type)]
->;
-
} // usesCustomInserter = 1
multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
@@ -209,8 +202,8 @@ class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
(vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
>;
-class Vector_Build <ValueType vecType, RegisterClass vectorClass,
- ValueType elemType, RegisterClass elemClass> : Pat <
+class Vector4_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
(elemType elemClass:$z), (elemType elemClass:$w))),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index 2ba2d4b..eecb25b 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -50,8 +50,6 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-
- def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
}
let TargetPrefix = "TGSI", isTarget = 1 in {
diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
index 26f842e..b723433 100644
--- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp
+++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
@@ -243,6 +243,7 @@ public:
initializeRegionInfoPass(*PassRegistry::getPassRegistry());
}
+ using Pass::doInitialization;
virtual bool doInitialization(Region *R, RGPassManager &RGM);
virtual bool runOnRegion(Region *R, RGPassManager &RGM);
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index e2f00be..0185747 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -17,6 +17,7 @@
#include "AMDGPU.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
+#include "R600MachineScheduler.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "llvm/Analysis/Passes.h"
@@ -39,6 +40,14 @@ extern "C" void LLVMInitializeR600Target() {
RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
}
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, new R600SchedStrategy());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("r600", "Run R600's custom scheduler",
+ createR600MachineScheduler);
+
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
TargetOptions Options,
@@ -70,7 +79,13 @@ namespace {
class AMDGPUPassConfig : public TargetPassConfig {
public:
AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ enablePass(&MachineSchedulerID);
+ MachineSchedRegistry::setDefault(createR600MachineScheduler);
+ }
+ }
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
return getTM<AMDGPUTargetMachine>();
@@ -112,11 +127,6 @@ bool AMDGPUPassConfig::addInstSelector() {
}
bool AMDGPUPassConfig::addPreRegAlloc() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-
- if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
- addPass(createSIAssignInterpRegsPass(*TM));
- }
addPass(createAMDGPUConvertToISAPass(*TM));
return false;
}
@@ -143,7 +153,6 @@ bool AMDGPUPassConfig::addPreEmitPass() {
addPass(createAMDGPUCFGStructurizerPass(*TM));
addPass(createR600ExpandSpecialInstrsPass(*TM));
addPass(&FinalizeMachineBundlesID);
- addPass(createR600LowerConstCopy(*TM));
} else {
addPass(createSILowerControlFlowPass(*TM));
}
diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h
index b39fbdb..39ab664 100644
--- a/lib/Target/R600/AMDIL.h
+++ b/lib/Target/R600/AMDIL.h
@@ -96,24 +96,23 @@ enum AddressSpaces {
ADDRESS_NONE = 5, ///< Address space for unknown memory.
PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
- USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
- CONSTANT_BUFFER_0 = 9,
- CONSTANT_BUFFER_1 = 10,
- CONSTANT_BUFFER_2 = 11,
- CONSTANT_BUFFER_3 = 12,
- CONSTANT_BUFFER_4 = 13,
- CONSTANT_BUFFER_5 = 14,
- CONSTANT_BUFFER_6 = 15,
- CONSTANT_BUFFER_7 = 16,
- CONSTANT_BUFFER_8 = 17,
- CONSTANT_BUFFER_9 = 18,
- CONSTANT_BUFFER_10 = 19,
- CONSTANT_BUFFER_11 = 20,
- CONSTANT_BUFFER_12 = 21,
- CONSTANT_BUFFER_13 = 22,
- CONSTANT_BUFFER_14 = 23,
- CONSTANT_BUFFER_15 = 24,
- LAST_ADDRESS = 25
+ CONSTANT_BUFFER_0 = 8,
+ CONSTANT_BUFFER_1 = 9,
+ CONSTANT_BUFFER_2 = 10,
+ CONSTANT_BUFFER_3 = 11,
+ CONSTANT_BUFFER_4 = 12,
+ CONSTANT_BUFFER_5 = 13,
+ CONSTANT_BUFFER_6 = 14,
+ CONSTANT_BUFFER_7 = 15,
+ CONSTANT_BUFFER_8 = 16,
+ CONSTANT_BUFFER_9 = 17,
+ CONSTANT_BUFFER_10 = 18,
+ CONSTANT_BUFFER_11 = 19,
+ CONSTANT_BUFFER_12 = 20,
+ CONSTANT_BUFFER_13 = 21,
+ CONSTANT_BUFFER_14 = 22,
+ CONSTANT_BUFFER_15 = 23,
+ LAST_ADDRESS = 24
};
} // namespace AMDGPUAS
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index aa8ab6b..b0cd0f9 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -2595,6 +2595,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
static int getBranchNzeroOpcode(int oldOpcode) {
switch(oldOpcode) {
+ case AMDGPU::JUMP_COND:
case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
case AMDGPU::BRANCH_COND_i32:
case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
@@ -2606,6 +2607,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
static int getBranchZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
+ case AMDGPU::JUMP_COND:
case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
case AMDGPU::BRANCH_COND_i32:
case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
@@ -2617,6 +2619,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
static int getContinueNzeroOpcode(int oldOpcode) {
switch(oldOpcode) {
+ case AMDGPU::JUMP_COND:
case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
default:
assert(0 && "internal error");
@@ -2626,6 +2629,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
static int getContinueZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
+ case AMDGPU::JUMP_COND:
case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
default:
assert(0 && "internal error");
@@ -2654,8 +2658,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
static bool isCondBranch(MachineInstr *instr) {
switch (instr->getOpcode()) {
- case AMDGPU::JUMP:
- return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
+ case AMDGPU::JUMP_COND:
case AMDGPU::BRANCH_COND_i32:
case AMDGPU::BRANCH_COND_f32:
break;
@@ -2668,7 +2671,6 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
static bool isUncondBranch(MachineInstr *instr) {
switch (instr->getOpcode()) {
case AMDGPU::JUMP:
- return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
case AMDGPU::BRANCH:
return true;
default:
diff --git a/lib/Target/R600/AMDILDevice.cpp b/lib/Target/R600/AMDILDevice.cpp
index eec5059..db8e01e 100644
--- a/lib/Target/R600/AMDILDevice.cpp
+++ b/lib/Target/R600/AMDILDevice.cpp
@@ -115,10 +115,18 @@ bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const {
std::string
AMDGPUDevice::getDataLayout() const {
- return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
- "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
- "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
- "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
- "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
- "-n8:16:32:64");
+ std::string DataLayout = std::string(
+ "e"
+ "-p:32:32:32"
+ "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128"
+ "-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n32:64"
+ );
+
+ if (usesHardware(AMDGPUDeviceInfo::DoubleOps)) {
+ DataLayout.append("-f64:64:64");
+ }
+
+ return DataLayout;
}
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index e77b9dc..fa8f62d 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -162,6 +162,35 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
switch (Opc) {
default: break;
+ case ISD::BUILD_VECTOR: {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ break;
+ }
+ // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+ // that adds a 128 bits reg copy when going through TwoAddressInstructions
+ // pass. We want to avoid 128 bits copies as much as possible because they
+ // can't be bundled by our scheduler.
+ SDValue RegSeqArgs[9] = {
+ CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
+ };
+ bool IsRegSeq = true;
+ for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
+ IsRegSeq = false;
+ break;
+ }
+ RegSeqArgs[2 * i + 1] = N->getOperand(i);
+ }
+ if (!IsRegSeq)
+ break;
+ return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
+ RegSeqArgs, 2 * N->getNumOperands() + 1);
+ }
case ISD::ConstantFP:
case ISD::Constant: {
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
@@ -336,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
SDValue Operand = Ops[OperandIdx[i] - 1];
switch (Operand.getOpcode()) {
case AMDGPUISD::CONST_ADDRESS: {
- if (i == 2)
- break;
SDValue CstOffset;
- if (!Operand.getValueType().isVector() &&
- SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
- Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
- Ops[SelIdx[i] - 1] = CstOffset;
- return true;
+ if (Operand.getValueType().isVector() ||
+ !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
+ break;
+
+ // Gather others constants values
+ std::vector<unsigned> Consts;
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = OperandIdx[j];
+ if (SrcIdx < 0)
+ break;
+ if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
+ if (Reg->getReg() == AMDGPU::ALU_CONST) {
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
+ Consts.push_back(Cst->getZExtValue());
+ }
+ }
}
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+ Consts.push_back(Cst->getZExtValue());
+ if (!TII->fitsConstReadLimitations(Consts))
+ break;
+
+ Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+ Ops[SelIdx[i] - 1] = CstOffset;
+ return true;
}
- break;
case ISD::FNEG:
if (NegIdx[i] < 0)
break;
diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp
index f65e1f3..922cac1 100644
--- a/lib/Target/R600/AMDILISelLowering.cpp
+++ b/lib/Target/R600/AMDILISelLowering.cpp
@@ -33,11 +33,6 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-#include "AMDGPUGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
// TargetLowering Implementation Help Functions End
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/AMDILSIDevice.cpp b/lib/Target/R600/AMDILSIDevice.cpp
index 3096c22..0d1de3d 100644
--- a/lib/Target/R600/AMDILSIDevice.cpp
+++ b/lib/Target/R600/AMDILSIDevice.cpp
@@ -36,10 +36,13 @@ AMDGPUSIDevice::getGeneration() const {
std::string
AMDGPUSIDevice::getDataLayout() const {
- return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
- "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
- "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
- "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
- "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
- "-n8:16:32:64");
+ return std::string(
+ "e"
+ "-p:64:64:64"
+ "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128"
+ "-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
+ "-v2048:2048:2048"
+ "-n32:64"
+ );
}
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 00f8b10..63c59e1 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -37,11 +37,10 @@ add_llvm_target(R600CodeGen
R600ExpandSpecialInstrs.cpp
R600InstrInfo.cpp
R600ISelLowering.cpp
- R600LowerConstCopy.cpp
R600MachineFunctionInfo.cpp
+ R600MachineScheduler.cpp
R600RegisterInfo.cpp
SIAnnotateControlFlow.cpp
- SIAssignInterpRegs.cpp
SIInsertWaits.cpp
SIInstrInfo.cpp
SIISelLowering.cpp
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
index 8721f80..cd3a7ce 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -33,15 +33,6 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const {
return 0;
}
-
- virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return 0;
- }
- virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return 0;
- }
};
} // End namespace llvm
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index 6cc0077..e27abcc 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -42,9 +42,6 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
const MCSubtargetInfo &STI;
MCContext &Ctx;
- /// \brief Encode a sequence of registers with the correct alignment.
- unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
-
/// \brief Can this operand also contain immediate values?
bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
@@ -65,14 +62,6 @@ public:
/// \returns the encoding for an MCOperand.
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const;
-
- /// \brief Encoding for when 2 consecutive registers are used
- virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixup) const;
-
- /// \brief Encoding for when 4 consectuive registers are used
- virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixup) const;
};
} // End anonymous namespace
@@ -212,24 +201,3 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
return 0;
}
-//===----------------------------------------------------------------------===//
-// Custom Operand Encodings
-//===----------------------------------------------------------------------===//
-
-unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
- unsigned shift) const {
- unsigned regCode = MRI.getEncodingValue(MI.getOperand(OpNo).getReg());
- return (regCode & 0xff) >> shift;
-}
-
-unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
- unsigned OpNo ,
- SmallVectorImpl<MCFixup> &Fixup) const {
- return GPRAlign(MI, OpNo, 1);
-}
-
-unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
- unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixup) const {
- return GPRAlign(MI, OpNo, 2);
-}
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index b5c2a93..a73691d 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -50,8 +50,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::UREM, MVT::v4i32, Expand);
setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
- setOperationAction(ISD::BR_CC, MVT::i32, Custom);
- setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::FSUB, MVT::f32, Expand);
@@ -65,8 +65,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
- setOperationAction(ISD::SETCC, MVT::i32, Custom);
- setOperationAction(ISD::SETCC, MVT::f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
@@ -94,6 +94,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::SELECT_CC);
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
setSchedulingPreference(Sched::VLIW);
}
@@ -105,7 +106,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
switch (MI->getOpcode()) {
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
- case AMDGPU::SHADER_TYPE: break;
case AMDGPU::CLAMP_R600: {
MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
AMDGPU::MOV,
@@ -150,7 +150,13 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
MI->getOperand(1).getImm());
break;
-
+ case AMDGPU::CONST_COPY: {
+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
+ MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
+ TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
+ MI->getOperand(1).getImm());
+ break;
+ }
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
@@ -215,8 +221,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::BRANCH:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
- .addOperand(MI->getOperand(0))
- .addReg(0);
+ .addOperand(MI->getOperand(0));
break;
case AMDGPU::BRANCH_COND_f32: {
@@ -227,7 +232,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
.addImm(OPCODE_IS_NOT_ZERO)
.addImm(0); // Flags
TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
@@ -241,7 +246,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
.addImm(OPCODE_IS_NOT_ZERO_INT)
.addImm(0); // Flags
TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
@@ -306,11 +311,9 @@ using namespace llvm::AMDGPUIntrinsic;
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::ROTL: return LowerROTL(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
- case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::FPOW: return LowerFPOW(Op, DAG);
@@ -470,44 +473,6 @@ SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
);
}
-SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
- SDValue CC = Op.getOperand(1);
- SDValue LHS = Op.getOperand(2);
- SDValue RHS = Op.getOperand(3);
- SDValue JumpT = Op.getOperand(4);
- SDValue CmpValue;
- SDValue Result;
-
- if (LHS.getValueType() == MVT::i32) {
- CmpValue = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- MVT::i32,
- LHS, RHS,
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- CC);
- } else if (LHS.getValueType() == MVT::f32) {
- CmpValue = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- MVT::f32,
- LHS, RHS,
- DAG.getConstantFP(1.0f, MVT::f32),
- DAG.getConstantFP(0.0f, MVT::f32),
- CC);
- } else {
- assert(0 && "Not valid type for br_cc");
- }
- Result = DAG.getNode(
- AMDGPUISD::BRANCH_COND,
- CmpValue.getDebugLoc(),
- MVT::Other, Chain,
- JumpT, CmpValue);
- return Result;
-}
-
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
DebugLoc DL,
unsigned DwordOffset) const {
@@ -576,12 +541,37 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
// Check if we can lower this to a native operation.
+ // Try to lower to a SET* instruction:
+ //
+ // SET* can match the following patterns:
+ //
+ // select_cc f32, f32, -1, 0, cc_any
+ // select_cc f32, f32, 1.0f, 0.0f, cc_any
+ // select_cc i32, i32, -1, 0, cc_any
+ //
+
+ // Move hardware True/False values to the correct operand.
+ if (isHWTrueValue(False) && isHWFalseValue(True)) {
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ std::swap(False, True);
+ CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
+ }
+
+ if (isHWTrueValue(True) && isHWFalseValue(False) &&
+ (CompareVT == VT || VT == MVT::i32)) {
+ // This can be matched by a SET* instruction.
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+ }
+
// Try to lower to a CND* instruction:
- // CND* instructions requires RHS to be zero. Some SELECT_CC nodes that
- // can be lowered to CND* instructions can also be lowered to SET*
- // instructions. CND* instructions are cheaper, because they dont't
- // require additional instructions to convert their result to the correct
- // value type, so this check should be first.
+ //
+ // CND* can match the following patterns:
+ //
+ // select_cc f32, 0.0, f32, f32, cc_any
+ // select_cc f32, 0.0, i32, i32, cc_any
+ // select_cc i32, 0, f32, f32, cc_any
+ // select_cc i32, 0, i32, i32, cc_any
+ //
if (isZero(LHS) || isZero(RHS)) {
SDValue Cond = (isZero(LHS) ? RHS : LHS);
SDValue Zero = (isZero(LHS) ? LHS : RHS);
@@ -623,38 +613,6 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
}
- // Try to lower to a SET* instruction:
- //
- // CompareVT == MVT::f32 and VT == MVT::i32 is supported by the hardware,
- // but for the other case where CompareVT != VT, all operands of
- // SELECT_CC need to have the same value type, so we need to change True and
- // False to be the same type as LHS and RHS, and then convert the result of
- // the select_cc back to the correct type.
-
- // Move hardware True/False values to the correct operand.
- if (isHWTrueValue(False) && isHWFalseValue(True)) {
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- std::swap(False, True);
- CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
- }
-
- if (isHWTrueValue(True) && isHWFalseValue(False)) {
- if (CompareVT != VT && VT == MVT::f32 && CompareVT == MVT::i32) {
- SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
- LHS, RHS,
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- CC);
- // Convert integer values of true (-1) and false (0) to fp values of
- // true (1.0f) and false (0.0f).
- SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
- DAG.getConstant(1, MVT::i32));
- return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
- } else {
- // This SELECT_CC is already legal.
- return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
- }
- }
// Possible Min/Max pattern
SDValue MinMax = LowerMinMax(Op, DAG);
@@ -698,48 +656,6 @@ SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
DAG.getCondCode(ISD::SETNE));
}
-SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Cond;
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue CC = Op.getOperand(2);
- DebugLoc DL = Op.getDebugLoc();
- assert(Op.getValueType() == MVT::i32);
- if (LHS.getValueType() == MVT::i32) {
- Cond = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- MVT::i32,
- LHS, RHS,
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- CC);
- } else if (LHS.getValueType() == MVT::f32) {
- Cond = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- MVT::f32,
- LHS, RHS,
- DAG.getConstantFP(1.0f, MVT::f32),
- DAG.getConstantFP(0.0f, MVT::f32),
- CC);
- Cond = DAG.getNode(
- ISD::FP_TO_SINT,
- DL,
- MVT::i32,
- Cond);
- } else {
- assert(0 && "Not valid type for set_cc");
- }
- Cond = DAG.getNode(
- ISD::AND,
- DL,
- MVT::i32,
- DAG.getConstant(1, MVT::i32),
- Cond);
- return Cond;
-}
-
/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
/// convert these pointers to a register index. Each register holds
/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
@@ -918,7 +834,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
if (ConstantBlock > -1) {
SDValue Result;
if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
- dyn_cast<Constant>(LoadNode->getSrcValue())) {
+ dyn_cast<Constant>(LoadNode->getSrcValue()) ||
+ dyn_cast<ConstantSDNode>(Ptr)) {
SDValue Slots[4];
for (unsigned i = 0; i < 4; i++) {
// We want Const position encoded with the following formula :
@@ -934,7 +851,9 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
} else {
// non constant ptr cant be folded, keeps it as a v4f32 load
Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
- DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
+ DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
+ DAG.getConstant(LoadNode->getAddressSpace() -
+ AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
);
}
@@ -1122,6 +1041,9 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SELECT_CC: {
// fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
// selectcc x, y, a, b, inv(cc)
+ //
+ // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
+ // selectcc x, y, a, b, cc
SDValue LHS = N->getOperand(0);
if (LHS.getOpcode() != ISD::SELECT_CC) {
return SDValue();
@@ -1130,24 +1052,30 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
SDValue RHS = N->getOperand(1);
SDValue True = N->getOperand(2);
SDValue False = N->getOperand(3);
+ ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
if (LHS.getOperand(2).getNode() != True.getNode() ||
LHS.getOperand(3).getNode() != False.getNode() ||
- RHS.getNode() != False.getNode() ||
- cast<CondCodeSDNode>(N->getOperand(4))->get() != ISD::SETEQ) {
+ RHS.getNode() != False.getNode()) {
return SDValue();
}
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(LHS->getOperand(4))->get();
- CCOpcode = ISD::getSetCCInverse(
- CCOpcode, LHS.getOperand(0).getValueType().isInteger());
- return DAG.getSelectCC(N->getDebugLoc(),
- LHS.getOperand(0),
- LHS.getOperand(1),
- LHS.getOperand(2),
- LHS.getOperand(3),
- CCOpcode);
+ switch (NCC) {
+ default: return SDValue();
+ case ISD::SETNE: return LHS;
+ case ISD::SETEQ: {
+ ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
+ LHSCC = ISD::getSetCCInverse(LHSCC,
+ LHS.getOperand(0).getValueType().isInteger());
+ return DAG.getSelectCC(N->getDebugLoc(),
+ LHS.getOperand(0),
+ LHS.getOperand(1),
+ LHS.getOperand(2),
+ LHS.getOperand(3),
+ LHSCC);
}
+ }
+ }
case AMDGPUISD::EXPORT: {
SDValue Arg = N->getOperand(1);
if (Arg.getOpcode() != ISD::BUILD_VECTOR)
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index afa3897..5cb4b91 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -52,14 +52,11 @@ private:
void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
MachineRegisterInfo & MRI, unsigned dword_offset) const;
- SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
-
/// \brief Lower ROTL opcode to BITALIGN
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 7e3f005..0865098 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
(TargetFlags & R600_InstFlag::OP3));
}
+bool
+R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
+ const {
+ assert (Consts.size() <= 12 && "Too many operands in instructions group");
+ unsigned Pair1 = 0, Pair2 = 0;
+ for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+ unsigned ReadConstHalf = Consts[i] & 2;
+ unsigned ReadConstIndex = Consts[i] & (~3);
+ unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
+ if (!Pair1) {
+ Pair1 = ReadHalfConst;
+ continue;
+ }
+ if (Pair1 == ReadHalfConst)
+ continue;
+ if (!Pair2) {
+ Pair2 = ReadHalfConst;
+ continue;
+ }
+ if (Pair2 != ReadHalfConst)
+ return false;
+ }
+ return true;
+}
+
+bool
+R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
+ std::vector<unsigned> Consts;
+ for (unsigned i = 0, n = MIs.size(); i < n; i++) {
+ const MachineInstr *MI = MIs[i];
+
+ const R600Operands::Ops OpTable[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL},
+ };
+
+ if (!isALUInstr(MI->getOpcode()))
+ continue;
+
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+ if (SrcIdx < 0)
+ break;
+ if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+ unsigned Const = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+ Consts.push_back(Const);
+ }
+ }
+ }
+ return fitsConstReadLimitations(Consts);
+}
+
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const {
const InstrItineraryData *II = TM->getInstrItineraryData();
@@ -168,6 +222,11 @@ findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
return NULL;
}
+static
+bool isJump(unsigned Opcode) {
+ return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
+}
+
bool
R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
@@ -186,7 +245,7 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
--I;
}
- if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
+ if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
return false;
}
@@ -196,22 +255,20 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// If there is only one terminator instruction, process it.
unsigned LastOpc = LastInst->getOpcode();
if (I == MBB.begin() ||
- static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
+ !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
if (LastOpc == AMDGPU::JUMP) {
- if(!isPredicated(LastInst)) {
- TBB = LastInst->getOperand(0).getMBB();
- return false;
- } else {
- MachineInstr *predSet = I;
- while (!isPredicateSetter(predSet->getOpcode())) {
- predSet = --I;
- }
- TBB = LastInst->getOperand(0).getMBB();
- Cond.push_back(predSet->getOperand(1));
- Cond.push_back(predSet->getOperand(2));
- Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
- return false;
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastOpc == AMDGPU::JUMP_COND) {
+ MachineInstr *predSet = I;
+ while (!isPredicateSetter(predSet->getOpcode())) {
+ predSet = --I;
}
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(predSet->getOperand(1));
+ Cond.push_back(predSet->getOperand(2));
+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ return false;
}
return true; // Can't handle indirect branch.
}
@@ -221,10 +278,7 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
unsigned SecondLastOpc = SecondLastInst->getOpcode();
// If the block ends with a B and a Bcc, handle it.
- if (SecondLastOpc == AMDGPU::JUMP &&
- isPredicated(SecondLastInst) &&
- LastOpc == AMDGPU::JUMP &&
- !isPredicated(LastInst)) {
+ if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
MachineInstr *predSet = --I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
@@ -261,7 +315,7 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
if (FBB == 0) {
if (Cond.empty()) {
- BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
return 1;
} else {
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
@@ -269,7 +323,7 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
addFlag(PredSet, 0, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
- BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
return 1;
@@ -279,10 +333,10 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
assert(PredSet && "No previous predicate !");
addFlag(PredSet, 0, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
- BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
- BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
return 2;
}
}
@@ -302,11 +356,13 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
switch (I->getOpcode()) {
default:
return 0;
+ case AMDGPU::JUMP_COND: {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ clearFlag(predSet, 0, MO_FLAG_PUSH);
+ I->eraseFromParent();
+ break;
+ }
case AMDGPU::JUMP:
- if (isPredicated(I)) {
- MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
- clearFlag(predSet, 0, MO_FLAG_PUSH);
- }
I->eraseFromParent();
break;
}
@@ -320,11 +376,13 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
// FIXME: only one case??
default:
return 1;
+ case AMDGPU::JUMP_COND: {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ clearFlag(predSet, 0, MO_FLAG_PUSH);
+ I->eraseFromParent();
+ break;
+ }
case AMDGPU::JUMP:
- if (isPredicated(I)) {
- MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
- clearFlag(predSet, 0, MO_FLAG_PUSH);
- }
I->eraseFromParent();
break;
}
@@ -356,6 +414,8 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const {
if (MI->getOpcode() == AMDGPU::KILLGT) {
return false;
+ } else if (isVector(*MI)) {
+ return false;
} else {
return AMDGPUInstrInfo::isPredicable(MI);
}
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index efe721c..bf9569e 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -53,6 +53,9 @@ namespace llvm {
/// \returns true if this \p Opcode represents an ALU instruction.
bool isALUInstr(unsigned Opcode) const;
+ bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
+ bool canBundle(const std::vector<MachineInstr *> &) const;
+
/// \breif Vector instructions are instructions that must fill all
/// instruction slots within an instruction group.
bool isVector(const MachineInstr &MI) const;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 8242df9..8c50d54 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -512,8 +512,8 @@ def INTERP_PAIR_ZW : AMDGPUShaderInst <
[]>;
def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
- SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
- [SDNPMayLoad]
+ SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
+ [SDNPVariadic]
>;
//===----------------------------------------------------------------------===//
@@ -1090,12 +1090,12 @@ class COS_Common <bits<11> inst> : R600_1OP <
multiclass DIV_Common <InstR600 recip_ieee> {
def : Pat<
(int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
- (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+ (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
>;
def : Pat<
(fdiv R600_Reg32:$src0, R600_Reg32:$src1),
- (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+ (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
>;
}
@@ -1169,12 +1169,12 @@ let Predicates = [isR600] in {
// cards.
class COS_PAT <InstR600 trig> : Pat<
(fcos R600_Reg32:$src),
- (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+ (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
>;
class SIN_PAT <InstR600 trig> : Pat<
(fsin R600_Reg32:$src),
- (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+ (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
>;
//===----------------------------------------------------------------------===//
@@ -1587,19 +1587,28 @@ def PRED_X : InstR600 <
(ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
"", [], NullALU> {
let FlagOperandIdx = 3;
- let isTerminator = 1;
}
-let isTerminator = 1, isBranch = 1, isBarrier = 1 in {
-
-def JUMP : InstR600 <0x10,
+let isTerminator = 1, isBranch = 1 in {
+def JUMP_COND : InstR600 <0x10,
(outs),
- (ins brtarget:$target, R600_Pred:$p),
+ (ins brtarget:$target, R600_Predicate_Bit:$p),
"JUMP $target ($p)",
[], AnyALU
>;
-} // End isTerminator = 1, isBranch = 1, isBarrier = 1
+def JUMP : InstR600 <0x10,
+ (outs),
+ (ins brtarget:$target),
+ "JUMP $target",
+ [], AnyALU
+ >
+{
+ let isPredicable = 1;
+ let isBarrier = 1;
+}
+
+} // End isTerminator = 1, isBranch = 1
let usesCustomInserter = 1 in {
@@ -1639,7 +1648,7 @@ def FNEG_R600 : FNEG<R600_Reg32>;
//===---------------------------------------------------------------------===//
// Return instruction
//===---------------------------------------------------------------------===//
-let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1,
+let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
usesCustomInserter = 1 in {
def RETURN : ILFormat<(outs), (ins variable_ops),
"RETURN", [(IL_retflag)]>;
@@ -1650,27 +1659,27 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1,
// Constant Buffer Addressing Support
//===----------------------------------------------------------------------===//
-let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
+let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
def CONST_COPY : Instruction {
let OutOperandList = (outs R600_Reg32:$dst);
let InOperandList = (ins i32imm:$src);
- let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
+ let Pattern =
+ [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
let AsmString = "CONST_COPY";
let neverHasSideEffects = 1;
let isAsCheapAsAMove = 1;
let Itinerary = NullALU;
}
-} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
+} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
def TEX_VTX_CONSTBUF :
- InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr",
- [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>,
+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
+ [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
VTX_WORD1_GPR, VTX_WORD0 {
let VC_INST = 0;
let FETCH_TYPE = 2;
let FETCH_WHOLE_QUAD = 0;
- let BUFFER_ID = 0;
let SRC_REL = 0;
let SRC_SEL_X = 0;
let DST_REL = 0;
@@ -1840,6 +1849,18 @@ let isTerminator=1 in {
// ISel Patterns
//===----------------------------------------------------------------------===//
+// CND*_INT Pattterns for f32 True / False values
+
+class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
+ (selectcc (i32 R600_Reg32:$src0), 0, (f32 R600_Reg32:$src1),
+ R600_Reg32:$src2, cc),
+ (cnd R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+>;
+
+def : CND_INT_f32 <CNDE_INT, SETEQ>;
+def : CND_INT_f32 <CNDGT_INT, SETGT>;
+def : CND_INT_f32 <CNDGE_INT, SETGE>;
+
//CNDGE_INT extra pattern
def : Pat <
(selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1),
@@ -1958,8 +1979,8 @@ def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
-def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
-def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
+def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
+def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
// bitconvert patterns
diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp
deleted file mode 100644
index 3ebe653..0000000
--- a/lib/Target/R600/R600LowerConstCopy.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr.
-/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot
-/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
-/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try
-/// to fold them if possible or replace them by MOV otherwise.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "R600InstrInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/GlobalValue.h"
-
-namespace llvm {
-
-class R600LowerConstCopy : public MachineFunctionPass {
-private:
- static char ID;
- const R600InstrInfo *TII;
-
- struct ConstPairs {
- unsigned XYPair;
- unsigned ZWPair;
- };
-
- bool canFoldInBundle(ConstPairs &UsedConst, unsigned ReadConst) const;
-public:
- R600LowerConstCopy(TargetMachine &tm);
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; }
-};
-
-char R600LowerConstCopy::ID = 0;
-
-R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) :
- MachineFunctionPass(ID),
- TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo()))
-{
-}
-
-bool R600LowerConstCopy::canFoldInBundle(ConstPairs &UsedConst,
- unsigned ReadConst) const {
- unsigned ReadConstChan = ReadConst & 3;
- unsigned ReadConstIndex = ReadConst & (~3);
- if (ReadConstChan < 2) {
- if (!UsedConst.XYPair) {
- UsedConst.XYPair = ReadConstIndex;
- }
- return UsedConst.XYPair == ReadConstIndex;
- } else {
- if (!UsedConst.ZWPair) {
- UsedConst.ZWPair = ReadConstIndex;
- }
- return UsedConst.ZWPair == ReadConstIndex;
- }
-}
-
-static bool isControlFlow(const MachineInstr &MI) {
- return (MI.getOpcode() == AMDGPU::IF_PREDICATE_SET) ||
- (MI.getOpcode() == AMDGPU::ENDIF) ||
- (MI.getOpcode() == AMDGPU::ELSE) ||
- (MI.getOpcode() == AMDGPU::WHILELOOP) ||
- (MI.getOpcode() == AMDGPU::BREAK);
-}
-
-bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) {
-
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- DenseMap<unsigned, MachineInstr *> RegToConstIndex;
- for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
- E = MBB.instr_end(); I != E;) {
-
- if (I->getOpcode() == AMDGPU::CONST_COPY) {
- MachineInstr &MI = *I;
- I = llvm::next(I);
- unsigned DstReg = MI.getOperand(0).getReg();
- DenseMap<unsigned, MachineInstr *>::iterator SrcMI =
- RegToConstIndex.find(DstReg);
- if (SrcMI != RegToConstIndex.end()) {
- SrcMI->second->eraseFromParent();
- RegToConstIndex.erase(SrcMI);
- }
- MachineInstr *NewMI =
- TII->buildDefaultInstruction(MBB, &MI, AMDGPU::MOV,
- MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
- TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
- MI.getOperand(1).getImm());
- RegToConstIndex[DstReg] = NewMI;
- MI.eraseFromParent();
- continue;
- }
-
- std::vector<unsigned> Defs;
- // We consider all Instructions as bundled because algorithm that handle
- // const read port limitations inside an IG is still valid with single
- // instructions.
- std::vector<MachineInstr *> Bundle;
-
- if (I->isBundle()) {
- unsigned BundleSize = I->getBundleSize();
- for (unsigned i = 0; i < BundleSize; i++) {
- I = llvm::next(I);
- Bundle.push_back(I);
- }
- } else if (TII->isALUInstr(I->getOpcode())){
- Bundle.push_back(I);
- } else if (isControlFlow(*I)) {
- RegToConstIndex.clear();
- I = llvm::next(I);
- continue;
- } else {
- MachineInstr &MI = *I;
- for (MachineInstr::mop_iterator MOp = MI.operands_begin(),
- MOpE = MI.operands_end(); MOp != MOpE; ++MOp) {
- MachineOperand &MO = *MOp;
- if (!MO.isReg())
- continue;
- if (MO.isDef()) {
- Defs.push_back(MO.getReg());
- } else {
- // Either a TEX or an Export inst, prevent from erasing def of used
- // operand
- RegToConstIndex.erase(MO.getReg());
- for (MCSubRegIterator SR(MO.getReg(), &TII->getRegisterInfo());
- SR.isValid(); ++SR) {
- RegToConstIndex.erase(*SR);
- }
- }
- }
- }
-
-
- R600Operands::Ops OpTable[3][2] = {
- {R600Operands::SRC0, R600Operands::SRC0_SEL},
- {R600Operands::SRC1, R600Operands::SRC1_SEL},
- {R600Operands::SRC2, R600Operands::SRC2_SEL},
- };
-
- for(std::vector<MachineInstr *>::iterator It = Bundle.begin(),
- ItE = Bundle.end(); It != ItE; ++It) {
- MachineInstr *MI = *It;
- if (TII->isPredicated(MI)) {
- // We don't want to erase previous assignment
- RegToConstIndex.erase(MI->getOperand(0).getReg());
- } else {
- int WriteIDX = TII->getOperandIdx(MI->getOpcode(), R600Operands::WRITE);
- if (WriteIDX < 0 || MI->getOperand(WriteIDX).getImm())
- Defs.push_back(MI->getOperand(0).getReg());
- }
- }
-
- ConstPairs CP = {0,0};
- for (unsigned SrcOp = 0; SrcOp < 3; SrcOp++) {
- for(std::vector<MachineInstr *>::iterator It = Bundle.begin(),
- ItE = Bundle.end(); It != ItE; ++It) {
- MachineInstr *MI = *It;
- int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[SrcOp][0]);
- if (SrcIdx < 0)
- continue;
- MachineOperand &MO = MI->getOperand(SrcIdx);
- DenseMap<unsigned, MachineInstr *>::iterator SrcMI =
- RegToConstIndex.find(MO.getReg());
- if (SrcMI != RegToConstIndex.end()) {
- MachineInstr *CstMov = SrcMI->second;
- int ConstMovSel =
- TII->getOperandIdx(CstMov->getOpcode(), R600Operands::SRC0_SEL);
- unsigned ConstIndex = CstMov->getOperand(ConstMovSel).getImm();
- if (MI->isInsideBundle() && canFoldInBundle(CP, ConstIndex)) {
- TII->setImmOperand(MI, OpTable[SrcOp][1], ConstIndex);
- MI->getOperand(SrcIdx).setReg(AMDGPU::ALU_CONST);
- } else {
- RegToConstIndex.erase(SrcMI);
- }
- }
- }
- }
-
- for (std::vector<unsigned>::iterator It = Defs.begin(), ItE = Defs.end();
- It != ItE; ++It) {
- DenseMap<unsigned, MachineInstr *>::iterator SrcMI =
- RegToConstIndex.find(*It);
- if (SrcMI != RegToConstIndex.end()) {
- SrcMI->second->eraseFromParent();
- RegToConstIndex.erase(SrcMI);
- }
- }
- I = llvm::next(I);
- }
-
- if (MBB.succ_empty()) {
- for (DenseMap<unsigned, MachineInstr *>::iterator
- DI = RegToConstIndex.begin(), DE = RegToConstIndex.end();
- DI != DE; ++DI) {
- DI->second->eraseFromParent();
- }
- }
- }
- return false;
-}
-
-FunctionPass *createR600LowerConstCopy(TargetMachine &tm) {
- return new R600LowerConstCopy(tm);
-}
-
-}
-
-
diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp
index 40aec83..b07a585 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.cpp
+++ b/lib/Target/R600/R600MachineFunctionInfo.cpp
@@ -14,5 +14,4 @@ using namespace llvm;
R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
: MachineFunctionInfo() {
- memset(Outputs, 0, sizeof(Outputs));
}
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
index 4b901f4..13a46b8 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -26,7 +26,6 @@ public:
R600MachineFunctionInfo(const MachineFunction &MF);
SmallVector<unsigned, 4> LiveOuts;
std::vector<unsigned> IndirectRegs;
- SDNode *Outputs[16];
};
} // End llvm namespace
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
new file mode 100644
index 0000000..9074364
--- /dev/null
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -0,0 +1,427 @@
+//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "R600MachineScheduler.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/raw_ostream.h"
+#include <set>
+
+using namespace llvm;
+
+void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
+
+ DAG = dag;
+ TII = static_cast<const R600InstrInfo*>(DAG->TII);
+ TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
+ MRI = &DAG->MRI;
+ Available[IDAlu]->clear();
+ Available[IDFetch]->clear();
+ Available[IDOther]->clear();
+ CurInstKind = IDOther;
+ CurEmitted = 0;
+ OccupedSlotsMask = 15;
+ InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
+
+
+ const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
+ InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
+ } else {
+ InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
+ }
+}
+
+void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
+{
+ if (QSrc->empty())
+ return;
+ for (ReadyQueue::iterator I = QSrc->begin(),
+ E = QSrc->end(); I != E; ++I) {
+ (*I)->NodeQueueId &= ~QSrc->getID();
+ QDst->push(*I);
+ }
+ QSrc->clear();
+}
+
+SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
+ SUnit *SU = 0;
+ IsTopNode = true;
+ NextInstKind = IDOther;
+
+ // check if we might want to switch current clause type
+ bool AllowSwitchToAlu = (CurInstKind == IDOther) ||
+ (CurEmitted > InstKindLimit[CurInstKind]) ||
+ (Available[CurInstKind]->empty());
+ bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) &&
+ (!Available[IDFetch]->empty() || !Available[IDOther]->empty());
+
+ if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
+ (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
+ // try to pick ALU
+ SU = pickAlu();
+ if (SU) {
+ if (CurEmitted > InstKindLimit[IDAlu])
+ CurEmitted = 0;
+ NextInstKind = IDAlu;
+ }
+ }
+
+ if (!SU) {
+ // try to pick FETCH
+ SU = pickOther(IDFetch);
+ if (SU)
+ NextInstKind = IDFetch;
+ }
+
+ // try to pick other
+ if (!SU) {
+ SU = pickOther(IDOther);
+ if (SU)
+ NextInstKind = IDOther;
+ }
+
+ DEBUG(
+ if (SU) {
+ dbgs() << "picked node: ";
+ SU->dump(DAG);
+ } else {
+ dbgs() << "NO NODE ";
+ for (int i = 0; i < IDLast; ++i) {
+ Available[i]->dump();
+ Pending[i]->dump();
+ }
+ for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
+ const SUnit &S = DAG->SUnits[i];
+ if (!S.isScheduled)
+ S.dump(DAG);
+ }
+ }
+ );
+
+ return SU;
+}
+
+void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+
+ DEBUG(dbgs() << "scheduled: ");
+ DEBUG(SU->dump(DAG));
+
+ if (NextInstKind != CurInstKind) {
+ DEBUG(dbgs() << "Instruction Type Switch\n");
+ if (NextInstKind != IDAlu)
+ OccupedSlotsMask = 15;
+ CurEmitted = 0;
+ CurInstKind = NextInstKind;
+ }
+
+ if (CurInstKind == IDAlu) {
+ switch (getAluKind(SU)) {
+ case AluT_XYZW:
+ CurEmitted += 4;
+ break;
+ case AluDiscarded:
+ break;
+ default: {
+ ++CurEmitted;
+ for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
+ E = SU->getInstr()->operands_end(); It != E; ++It) {
+ MachineOperand &MO = *It;
+ if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+ ++CurEmitted;
+ }
+ }
+ }
+ } else {
+ ++CurEmitted;
+ }
+
+
+ DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
+
+ if (CurInstKind != IDFetch) {
+ MoveUnits(Pending[IDFetch], Available[IDFetch]);
+ }
+ MoveUnits(Pending[IDOther], Available[IDOther]);
+}
+
+void R600SchedStrategy::releaseTopNode(SUnit *SU) {
+ int IK = getInstKind(SU);
+
+ DEBUG(dbgs() << IK << " <= ");
+ DEBUG(SU->dump(DAG));
+
+ Pending[IK]->push(SU);
+}
+
+void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
+}
+
+bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
+ const TargetRegisterClass *RC) const {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ return RC->contains(Reg);
+ } else {
+ return MRI->getRegClass(Reg) == RC;
+ }
+}
+
+R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
+ MachineInstr *MI = SU->getInstr();
+
+ switch (MI->getOpcode()) {
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ return AluT_XYZW;
+ case AMDGPU::COPY:
+ if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
+ // %vregX = COPY Tn_X is likely to be discarded in favor of an
+ // assignement of Tn_X to %vregX, don't considers it in scheduling
+ return AluDiscarded;
+ }
+ else if (MI->getOperand(1).isUndef()) {
+ // MI will become a KILL, don't considers it in scheduling
+ return AluDiscarded;
+ }
+ default:
+ break;
+ }
+
+ // Does the instruction take a whole IG ?
+ if(TII->isVector(*MI) ||
+ TII->isCubeOp(MI->getOpcode()) ||
+ TII->isReductionOp(MI->getOpcode()))
+ return AluT_XYZW;
+
+ // Is the result already assigned to a channel ?
+ unsigned DestSubReg = MI->getOperand(0).getSubReg();
+ switch (DestSubReg) {
+ case AMDGPU::sub0:
+ return AluT_X;
+ case AMDGPU::sub1:
+ return AluT_Y;
+ case AMDGPU::sub2:
+ return AluT_Z;
+ case AMDGPU::sub3:
+ return AluT_W;
+ default:
+ break;
+ }
+
+ // Is the result already member of a X/Y/Z/W class ?
+ unsigned DestReg = MI->getOperand(0).getReg();
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
+ regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
+ return AluT_X;
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
+ return AluT_Y;
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
+ return AluT_Z;
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
+ return AluT_W;
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
+ return AluT_XYZW;
+
+ return AluAny;
+
+}
+
+int R600SchedStrategy::getInstKind(SUnit* SU) {
+ int Opcode = SU->getInstr()->getOpcode();
+
+ if (TII->isALUInstr(Opcode)) {
+ return IDAlu;
+ }
+
+ switch (Opcode) {
+ case AMDGPU::COPY:
+ case AMDGPU::CONST_COPY:
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ case AMDGPU::DOT4_eg_pseudo:
+ case AMDGPU::DOT4_r600_pseudo:
+ return IDAlu;
+ case AMDGPU::TEX_VTX_CONSTBUF:
+ case AMDGPU::TEX_VTX_TEXBUF:
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TXD:
+ case AMDGPU::TXD_SHADOW:
+ return IDFetch;
+ default:
+ DEBUG(
+ dbgs() << "other inst: ";
+ SU->dump(DAG);
+ );
+ return IDOther;
+ }
+}
+
+SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
+ if (Q.empty())
+ return NULL;
+ for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
+ It != E; ++It) {
+ SUnit *SU = *It;
+ InstructionsGroupCandidate.push_back(SU->getInstr());
+ if (TII->canBundle(InstructionsGroupCandidate)) {
+ InstructionsGroupCandidate.pop_back();
+ Q.erase(It);
+ return SU;
+ } else {
+ InstructionsGroupCandidate.pop_back();
+ }
+ }
+ return NULL;
+}
+
+void R600SchedStrategy::LoadAlu() {
+ ReadyQueue *QSrc = Pending[IDAlu];
+ for (ReadyQueue::iterator I = QSrc->begin(),
+ E = QSrc->end(); I != E; ++I) {
+ (*I)->NodeQueueId &= ~QSrc->getID();
+ AluKind AK = getAluKind(*I);
+ AvailableAlus[AK].insert(*I);
+ }
+ QSrc->clear();
+}
+
+void R600SchedStrategy::PrepareNextSlot() {
+ DEBUG(dbgs() << "New Slot\n");
+ assert (OccupedSlotsMask && "Slot wasn't filled");
+ OccupedSlotsMask = 0;
+ InstructionsGroupCandidate.clear();
+ LoadAlu();
+}
+
+void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
+ unsigned DestReg = MI->getOperand(0).getReg();
+ // PressureRegister crashes if an operand is def and used in the same inst
+ // and we try to constraint its regclass
+ for (MachineInstr::mop_iterator It = MI->operands_begin(),
+ E = MI->operands_end(); It != E; ++It) {
+ MachineOperand &MO = *It;
+ if (MO.isReg() && !MO.isDef() &&
+ MO.getReg() == MI->getOperand(0).getReg())
+ return;
+ }
+ // Constrains the regclass of DestReg to assign it to Slot
+ switch (Slot) {
+ case 0:
+ MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
+ break;
+ case 1:
+ MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
+ break;
+ case 2:
+ MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
+ break;
+ case 3:
+ MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
+ break;
+ }
+}
+
+SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
+ static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
+ SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
+ SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
+ if (!UnslotedSU) {
+ return SlotedSU;
+ } else if (!SlotedSU) {
+ AssignSlot(UnslotedSU->getInstr(), Slot);
+ return UnslotedSU;
+ } else {
+ //Determine which one to pick (the lesser one)
+ if (CompareSUnit()(SlotedSU, UnslotedSU)) {
+ AvailableAlus[AluAny].insert(UnslotedSU);
+ return SlotedSU;
+ } else {
+ AvailableAlus[IndexToID[Slot]].insert(SlotedSU);
+ AssignSlot(UnslotedSU->getInstr(), Slot);
+ return UnslotedSU;
+ }
+ }
+}
+
+bool R600SchedStrategy::isAvailablesAluEmpty() const {
+ return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() &&
+ AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
+ AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
+ AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty();
+}
+
+SUnit* R600SchedStrategy::pickAlu() {
+ while (!isAvailablesAluEmpty()) {
+ if (!OccupedSlotsMask) {
+ // Flush physical reg copies (RA will discard them)
+ if (!AvailableAlus[AluDiscarded].empty()) {
+ OccupedSlotsMask = 15;
+ return PopInst(AvailableAlus[AluDiscarded]);
+ }
+ // If there is a T_XYZW alu available, use it
+ if (!AvailableAlus[AluT_XYZW].empty()) {
+ OccupedSlotsMask = 15;
+ return PopInst(AvailableAlus[AluT_XYZW]);
+ }
+ }
+ for (unsigned Chan = 0; Chan < 4; ++Chan) {
+ bool isOccupied = OccupedSlotsMask & (1 << Chan);
+ if (!isOccupied) {
+ SUnit *SU = AttemptFillSlot(Chan);
+ if (SU) {
+ OccupedSlotsMask |= (1 << Chan);
+ InstructionsGroupCandidate.push_back(SU->getInstr());
+ return SU;
+ }
+ }
+ }
+ PrepareNextSlot();
+ }
+ return NULL;
+}
+
+SUnit* R600SchedStrategy::pickOther(int QID) {
+ SUnit *SU = 0;
+ ReadyQueue *AQ = Available[QID];
+
+ if (AQ->empty()) {
+ MoveUnits(Pending[QID], AQ);
+ }
+ if (!AQ->empty()) {
+ SU = *AQ->begin();
+ AQ->remove(AQ->begin());
+ }
+ return SU;
+}
+
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
new file mode 100644
index 0000000..3d0367f
--- /dev/null
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -0,0 +1,120 @@
+//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600MACHINESCHEDULER_H_
+#define R600MACHINESCHEDULER_H_
+
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/PriorityQueue.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+class CompareSUnit {
+public:
+ bool operator()(const SUnit *S1, const SUnit *S2) {
+ return S1->getDepth() > S2->getDepth();
+ }
+};
+
+class R600SchedStrategy : public MachineSchedStrategy {
+
+ const ScheduleDAGMI *DAG;
+ const R600InstrInfo *TII;
+ const R600RegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ enum InstQueue {
+ QAlu = 1,
+ QFetch = 2,
+ QOther = 4
+ };
+
+ enum InstKind {
+ IDAlu,
+ IDFetch,
+ IDOther,
+ IDLast
+ };
+
+ enum AluKind {
+ AluAny,
+ AluT_X,
+ AluT_Y,
+ AluT_Z,
+ AluT_W,
+ AluT_XYZW,
+ AluDiscarded, // LLVM Instructions that are going to be eliminated
+ AluLast
+ };
+
+ ReadyQueue *Available[IDLast], *Pending[IDLast];
+ std::multiset<SUnit *, CompareSUnit> AvailableAlus[AluLast];
+
+ InstKind CurInstKind;
+ int CurEmitted;
+ InstKind NextInstKind;
+
+ int InstKindLimit[IDLast];
+
+ int OccupedSlotsMask;
+
+public:
+ R600SchedStrategy() :
+ DAG(0), TII(0), TRI(0), MRI(0) {
+ Available[IDAlu] = new ReadyQueue(QAlu, "AAlu");
+ Available[IDFetch] = new ReadyQueue(QFetch, "AFetch");
+ Available[IDOther] = new ReadyQueue(QOther, "AOther");
+ Pending[IDAlu] = new ReadyQueue(QAlu<<4, "PAlu");
+ Pending[IDFetch] = new ReadyQueue(QFetch<<4, "PFetch");
+ Pending[IDOther] = new ReadyQueue(QOther<<4, "POther");
+ }
+
+ virtual ~R600SchedStrategy() {
+ for (unsigned I = 0; I < IDLast; ++I) {
+ delete Available[I];
+ delete Pending[I];
+ }
+ }
+
+ virtual void initialize(ScheduleDAGMI *dag);
+ virtual SUnit *pickNode(bool &IsTopNode);
+ virtual void schedNode(SUnit *SU, bool IsTopNode);
+ virtual void releaseTopNode(SUnit *SU);
+ virtual void releaseBottomNode(SUnit *SU);
+
+private:
+ std::vector<MachineInstr *> InstructionsGroupCandidate;
+
+ int getInstKind(SUnit *SU);
+ bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
+ AluKind getAluKind(SUnit *SU) const;
+ void LoadAlu();
+ bool isAvailablesAluEmpty() const;
+ SUnit *AttemptFillSlot (unsigned Slot);
+ void PrepareNextSlot();
+ SUnit *PopInst(std::multiset<SUnit *, CompareSUnit> &Q);
+
+ void AssignSlot(MachineInstr *MI, unsigned Slot);
+ SUnit* pickAlu();
+ SUnit* pickOther(int QID);
+ void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
+};
+
+} // namespace llvm
+
+#endif /* R600MACHINESCHEDULER_H_ */
diff --git a/lib/Target/R600/SIAssignInterpRegs.cpp b/lib/Target/R600/SIAssignInterpRegs.cpp
deleted file mode 100644
index 832e44d..0000000
--- a/lib/Target/R600/SIAssignInterpRegs.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief This pass maps the pseudo interpolation registers to the correct physical
-/// registers.
-//
-/// Prior to executing a fragment shader, the GPU loads interpolation
-/// parameters into physical registers. The specific physical register that each
-/// interpolation parameter ends up in depends on the type of the interpolation
-/// parameter as well as how many interpolation parameters are used by the
-/// shader.
-//
-//===----------------------------------------------------------------------===//
-
-
-
-#include "AMDGPU.h"
-#include "AMDIL.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-namespace {
-
-class SIAssignInterpRegsPass : public MachineFunctionPass {
-
-private:
- static char ID;
- TargetMachine &TM;
-
- void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
- unsigned physReg, unsigned virtReg);
-
-public:
- SIAssignInterpRegsPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TM(tm) { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const { return "SI Assign intrpolation registers"; }
-};
-
-} // End anonymous namespace
-
-char SIAssignInterpRegsPass::ID = 0;
-
-#define INTERP_VALUES 16
-#define REQUIRED_VALUE_MAX_INDEX 7
-
-struct InterpInfo {
- bool Enabled;
- unsigned Regs[3];
- unsigned RegCount;
-};
-
-
-FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
- return new SIAssignInterpRegsPass(tm);
-}
-
-bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) {
-
- struct InterpInfo InterpUse[INTERP_VALUES] = {
- {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
- {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
- {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
- {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
- {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
- {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
- {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
- {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
- {false, {AMDGPU::POS_X_FLOAT}, 1},
- {false, {AMDGPU::POS_Y_FLOAT}, 1},
- {false, {AMDGPU::POS_Z_FLOAT}, 1},
- {false, {AMDGPU::POS_W_FLOAT}, 1},
- {false, {AMDGPU::FRONT_FACE}, 1},
- {false, {AMDGPU::ANCILLARY}, 1},
- {false, {AMDGPU::SAMPLE_COVERAGE}, 1},
- {false, {AMDGPU::POS_FIXED_PT}, 1}
- };
-
- SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
- // This pass is only needed for pixel shaders.
- if (MFI->ShaderType != ShaderType::PIXEL) {
- return false;
- }
- MachineRegisterInfo &MRI = MF.getRegInfo();
- bool ForceEnable = true;
-
- // First pass, mark the interpolation values that are used.
- for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
- for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
- RegIdx++) {
- InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled ||
- !MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]);
- if (InterpUse[InterpIdx].Enabled &&
- InterpIdx <= REQUIRED_VALUE_MAX_INDEX) {
- ForceEnable = false;
- }
- }
- }
-
- // At least one interpolation mode must be enabled or else the GPU will hang.
- if (ForceEnable) {
- InterpUse[0].Enabled = true;
- }
-
- unsigned UsedVgprs = 0;
-
- // Second pass, replace with VGPRs.
- for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
- if (!InterpUse[InterpIdx].Enabled) {
- continue;
- }
- MFI->SPIPSInputAddr |= (1 << InterpIdx);
-
- for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
- RegIdx++, UsedVgprs++) {
- unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs);
- unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg);
- addLiveIn(&MF, MRI, NewReg, VirtReg);
- }
- }
-
- return false;
-}
-
-void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF,
- MachineRegisterInfo & MRI,
- unsigned physReg, unsigned virtReg) {
- const TargetInstrInfo * TII = TM.getInstrInfo();
- if (!MRI.isLiveIn(physReg)) {
- MRI.addLiveIn(physReg, virtReg);
- MF->front().addLiveIn(physReg);
- BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
- TII->get(TargetOpcode::COPY), virtReg)
- .addReg(physReg);
- } else {
- MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
- }
-}
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 0a0fbd9..93f8c38 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -14,10 +14,13 @@
#include "SIISelLowering.h"
#include "AMDIL.h"
+#include "AMDGPU.h"
#include "AMDILIntrinsicInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -28,30 +31,41 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM),
TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())),
TRI(TM.getRegisterInfo()) {
- addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
- addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
- addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
- addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
+
addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
+ addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
+
+ addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass);
+ addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
+ addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
+
+ addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
+ addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass);
+
addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass);
+ addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
+
addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
+ addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
+
addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
+ addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
+
addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass);
+ addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
computeRegisterProperties();
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
+
setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::ADD, MVT::i32, Legal);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-
- // We need to custom lower loads from the USER_SGPR address space, so we can
- // add the SGPRs as livein registers.
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::LOAD, MVT::i64, Custom);
-
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
@@ -59,6 +73,137 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
+
+ setSchedulingPreference(Sched::Source);
+}
+
+SDValue SITargetLowering::LowerFormalArguments(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ FunctionType *FType = MF.getFunction()->getFunctionType();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ assert(CallConv == CallingConv::C);
+
+ SmallVector<ISD::InputArg, 16> Splits;
+ uint32_t Skipped = 0;
+
+ for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
+ const ISD::InputArg &Arg = Ins[i];
+
+ // First check if it's a PS input addr
+ if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg()) {
+
+ assert((PSInputNum <= 15) && "Too many PS inputs!");
+
+ if (!Arg.Used) {
+ // We can savely skip PS inputs
+ Skipped |= 1 << i;
+ ++PSInputNum;
+ continue;
+ }
+
+ Info->PSInputAddr |= 1 << PSInputNum++;
+ }
+
+ // Second split vertices into their elements
+ if (Arg.VT.isVector()) {
+ ISD::InputArg NewArg = Arg;
+ NewArg.Flags.setSplit();
+ NewArg.VT = Arg.VT.getVectorElementType();
+
+ // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
+ // three or five element vertex only needs three or five registers,
+ // NOT four or eigth.
+ Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
+ unsigned NumElements = ParamType->getVectorNumElements();
+
+ for (unsigned j = 0; j != NumElements; ++j) {
+ Splits.push_back(NewArg);
+ NewArg.PartOffset += NewArg.VT.getStoreSize();
+ }
+
+ } else {
+ Splits.push_back(Arg);
+ }
+ }
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // At least one interpolation mode must be enabled or else the GPU will hang.
+ if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) {
+ Info->PSInputAddr |= 1;
+ CCInfo.AllocateReg(AMDGPU::VGPR0);
+ CCInfo.AllocateReg(AMDGPU::VGPR1);
+ }
+
+ AnalyzeFormalArguments(CCInfo, Splits);
+
+ for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
+
+ if (Skipped & (1 << i)) {
+ InVals.push_back(SDValue());
+ continue;
+ }
+
+ CCValAssign &VA = ArgLocs[ArgIdx++];
+ assert(VA.isRegLoc() && "Parameter must be in a register!");
+
+ unsigned Reg = VA.getLocReg();
+ MVT VT = VA.getLocVT();
+
+ if (VT == MVT::i64) {
+ // For now assume it is a pointer
+ Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
+ &AMDGPU::SReg_64RegClass);
+ Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
+ InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+ continue;
+ }
+
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
+
+ Reg = MF.addLiveIn(Reg, RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+
+ const ISD::InputArg &Arg = Ins[i];
+ if (Arg.VT.isVector()) {
+
+ // Build a vector from the registers
+ Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
+ unsigned NumElements = ParamType->getVectorNumElements();
+
+ SmallVector<SDValue, 4> Regs;
+ Regs.push_back(Val);
+ for (unsigned j = 1; j != NumElements; ++j) {
+ Reg = ArgLocs[ArgIdx++].getLocReg();
+ Reg = MF.addLiveIn(Reg, RC);
+ Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+ }
+
+ // Fill up the missing vector elements
+ NumElements = Arg.VT.getVectorNumElements() - NumElements;
+ for (unsigned j = 0; j != NumElements; ++j)
+ Regs.push_back(DAG.getUNDEF(VT));
+
+ InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT,
+ Regs.data(), Regs.size()));
+ continue;
+ }
+
+ InVals.push_back(Val);
+ }
+ return Chain;
}
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
@@ -70,15 +215,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH: return BB;
- case AMDGPU::SHADER_TYPE:
- BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType =
- MI->getOperand(0).getImm();
- MI->eraseFromParent();
- break;
-
- case AMDGPU::SI_INTERP:
- LowerSI_INTERP(MI, *BB, I, MRI);
- break;
case AMDGPU::SI_WQM:
LowerSI_WQM(MI, *BB, I, MRI);
break;
@@ -94,41 +230,14 @@ void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
MI->eraseFromParent();
}
-void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
- unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
- MachineOperand dst = MI->getOperand(0);
- MachineOperand iReg = MI->getOperand(1);
- MachineOperand jReg = MI->getOperand(2);
- MachineOperand attr_chan = MI->getOperand(3);
- MachineOperand attr = MI->getOperand(4);
- MachineOperand params = MI->getOperand(5);
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
- .addOperand(params);
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
- .addOperand(iReg)
- .addOperand(attr_chan)
- .addOperand(attr)
- .addReg(M0);
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
- .addOperand(dst)
- .addReg(tmp)
- .addOperand(jReg)
- .addOperand(attr_chan)
- .addOperand(attr)
- .addReg(M0);
-
- MI->eraseFromParent();
-}
-
EVT SITargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i1;
}
+MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
+ return MVT::i32;
+}
+
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
@@ -137,20 +246,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
- case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- EVT VT = Op.getValueType();
- switch (IntrinsicID) {
- case AMDGPUIntrinsic::SI_vs_load_buffer_index:
- return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
- AMDGPU::VGPR0, VT);
- default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- }
- break;
- }
}
return SDValue();
}
@@ -249,47 +345,6 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
return Chain;
}
-SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op);
-
- assert(Ptr);
-
- unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace();
-
- // We only need to lower USER_SGPR address space loads
- if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) {
- return SDValue();
- }
-
- // Loads from the USER_SGPR address space can only have constant value
- // pointers.
- ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr());
- assert(BasePtr);
-
- unsigned TypeDwordWidth = VT.getSizeInBits() / 32;
- const TargetRegisterClass * dstClass;
- switch (TypeDwordWidth) {
- default:
- assert(!"USER_SGPR value size not implemented");
- return SDValue();
- case 1:
- dstClass = &AMDGPU::SReg_32RegClass;
- break;
- case 2:
- dstClass = &AMDGPU::SReg_64RegClass;
- break;
- }
- uint64_t Index = BasePtr->getZExtValue();
- assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned");
- unsigned SGPRIndex = Index / TypeDwordWidth;
- unsigned Reg = dstClass->getRegister(SGPRIndex);
-
- DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg,
- VT));
- return SDValue();
-}
-
SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 737162f..d656225 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -24,14 +24,9 @@ class SITargetLowering : public AMDGPUTargetLowering {
const SIInstrInfo * TII;
const TargetRegisterInfo * TRI;
- void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, unsigned Opocde) const;
- void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
- SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
@@ -43,9 +38,17 @@ class SITargetLowering : public AMDGPUTargetLowering {
public:
SITargetLowering(TargetMachine &tm);
+
+ SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
MachineBasicBlock * BB) const;
virtual EVT getSetCCResultType(EVT VT) const;
+ virtual MVT getScalarShiftAmountTy(EVT VT) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 24fc929..98bd3db 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -88,6 +88,9 @@ private:
MachineBasicBlock::iterator I,
const Counters &Counts);
+ /// \brief Do we need def2def checks?
+ bool unorderedDefines(MachineInstr &MI);
+
/// \brief Resolve all operand dependencies to counter requirements
Counters handleOperands(MachineInstr &MI);
@@ -125,7 +128,7 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
// Only consider stores or EXP for EXP_CNT
Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
- (MI.getOpcode() == AMDGPU::EXP || !MI.getDesc().mayStore()));
+ (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
// LGKM may uses larger values
if (TSFlags & SIInstrFlags::LGKM_CNT) {
@@ -311,8 +314,10 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
RegInterval Interval = getRegInterval(Op);
for (unsigned j = Interval.first; j < Interval.second; ++j) {
- if (Op.isDef())
+ if (Op.isDef()) {
increaseCounters(Result, UsedRegs[j]);
+ increaseCounters(Result, DefinedRegs[j]);
+ }
if (Op.isUse())
increaseCounters(Result, DefinedRegs[j]);
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index fe417d6..3891ddb 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -129,12 +129,12 @@ class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
list<dag> pattern> : Enc32<outs, ins, asm, pattern> {
bits<7> SDST;
- bits<6> SBASE;
+ bits<7> SBASE;
bits<8> OFFSET;
let Inst{7-0} = OFFSET;
let Inst{8} = imm;
- let Inst{14-9} = SBASE;
+ let Inst{14-9} = SBASE{6-1};
let Inst{21-15} = SDST;
let Inst{26-22} = op;
let Inst{31-27} = 0x18; //encoding
@@ -292,7 +292,7 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
bits<1> ADDR64;
bits<1> LDS;
bits<8> VADDR;
- bits<5> SRSRC;
+ bits<7> SRSRC;
bits<1> SLC;
bits<1> TFE;
bits<8> SOFFSET;
@@ -307,7 +307,7 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{31-26} = 0x38; //encoding
let Inst{39-32} = VADDR;
let Inst{47-40} = VDATA;
- let Inst{52-48} = SRSRC;
+ let Inst{52-48} = SRSRC{6-2};
let Inst{54} = SLC;
let Inst{55} = TFE;
let Inst{63-56} = SOFFSET;
@@ -330,7 +330,7 @@ class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
bits<4> DFMT;
bits<3> NFMT;
bits<8> VADDR;
- bits<5> SRSRC;
+ bits<7> SRSRC;
bits<1> SLC;
bits<1> TFE;
bits<8> SOFFSET;
@@ -346,7 +346,7 @@ class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = VADDR;
let Inst{47-40} = VDATA;
- let Inst{52-48} = SRSRC;
+ let Inst{52-48} = SRSRC{6-2};
let Inst{54} = SLC;
let Inst{55} = TFE;
let Inst{63-56} = SOFFSET;
@@ -370,8 +370,8 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
bits<1> LWE;
bits<1> SLC;
bits<8> VADDR;
- bits<5> SRSRC;
- bits<5> SSAMP;
+ bits<7> SRSRC;
+ bits<7> SSAMP;
let Inst{11-8} = DMASK;
let Inst{12} = UNORM;
@@ -385,8 +385,8 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{31-26} = 0x3c;
let Inst{39-32} = VADDR;
let Inst{47-40} = VDATA;
- let Inst{52-48} = SRSRC;
- let Inst{57-53} = SSAMP;
+ let Inst{52-48} = SRSRC{6-2};
+ let Inst{57-53} = SSAMP{6-2};
let VM_CNT = 1;
let EXP_CNT = 1;
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index d9dbd6a..de2373b 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -33,36 +33,108 @@ const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const {
void
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+
// If we are trying to copy to or from SCC, there is a bug somewhere else in
// the backend. While it may be theoretically possible to do this, it should
// never be necessary.
assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
- if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
- AMDGPU::SReg_64RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub0))
- .addReg(RI.getSubReg(SrcReg, AMDGPU::sub0), getKillRegState(KillSrc))
- .addReg(DestReg, RegState::Define | RegState::Implicit);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub1))
- .addReg(RI.getSubReg(SrcReg, AMDGPU::sub1), getKillRegState(KillSrc));
+ const int16_t Sub0_15[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
+ };
+
+ const int16_t Sub0_7[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
+ };
+
+ const int16_t Sub0_3[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
+ };
+
+ const int16_t Sub0_1[] = {
+ AMDGPU::sub0, AMDGPU::sub1, 0
+ };
+
+ unsigned Opcode;
+ const int16_t *SubIndices;
+
+ if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
} else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
+ } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_3;
+
+ } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_7;
+
+ } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_15;
+
} else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
- AMDGPU::SReg_32RegClass.contains(SrcReg));
+ AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
+ } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_64RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_1;
+
+ } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_128RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_3;
+
+ } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_256RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_7;
+
+ } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_512RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_15;
+
} else {
- assert(AMDGPU::SReg_32RegClass.contains(DestReg));
- assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ llvm_unreachable("Can't copy register!");
+ }
+
+ while (unsigned SubIdx = *SubIndices++) {
+ MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
+ get(Opcode), RI.getSubReg(DestReg, SubIdx));
+
+ Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
+
+ if (*SubIndices)
+ Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
}
}
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index d6c3f06..2f10c38 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -53,16 +53,6 @@ def SIOperand {
int VCC = 0x6A;
}
-class GPR4Align <RegisterClass rc> : Operand <vAny> {
- let EncoderMethod = "GPR4AlignEncode";
- let MIOperandInfo = (ops rc:$reg);
-}
-
-class GPR2Align <RegisterClass rc> : Operand <iPTR> {
- let EncoderMethod = "GPR2AlignEncode";
- let MIOperandInfo = (ops rc:$reg);
-}
-
include "SIInstrFormats.td"
//===----------------------------------------------------------------------===//
@@ -125,16 +115,17 @@ class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK <
opName#" $dst, $src0", pattern
>;
-multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> {
+multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
+ RegisterClass dstClass> {
def _IMM : SMRD <
op, 1, (outs dstClass:$dst),
- (ins GPR2Align<SReg_64>:$sbase, i32imm:$offset),
+ (ins baseClass:$sbase, i32imm:$offset),
asm#" $dst, $sbase, $offset", []
>;
def _SGPR : SMRD <
op, 0, (outs dstClass:$dst),
- (ins GPR2Align<SReg_64>:$sbase, SReg_32:$soff),
+ (ins baseClass:$sbase, SReg_32:$soff),
asm#" $dst, $sbase, $soff", []
>;
}
@@ -276,7 +267,7 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
(outs),
(ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
- GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
+ SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
#" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
[]> {
@@ -288,7 +279,7 @@ class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF
op,
(outs regClass:$dst),
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
- i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc,
+ i1imm:$lds, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc,
i1imm:$tfe, SSrc_32:$soffset),
asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, "
#"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset",
@@ -301,7 +292,7 @@ class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF
op,
(outs regClass:$dst),
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
- i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc,
+ i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc,
i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
#" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
@@ -315,7 +306,7 @@ class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
(outs VReg_128:$vdata),
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr,
- GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp),
+ SReg_256:$srsrc, SReg_128:$ssamp),
asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
#" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
[]> {
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index af116f0..05b04a9 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -403,9 +403,9 @@ def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT
//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>;
//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>;
//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>;
-//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>;
-//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>;
-//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>;
+def BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>;
+def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>;
+def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
@@ -458,17 +458,31 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM
let mayLoad = 1 in {
-defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>;
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
-//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>;
-//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>;
-//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>;
-//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>;
-//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>;
-//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>;
-//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>;
+defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
+ 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
+>;
+
+defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
+ 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64
+>;
+
+defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
+ 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128
+>;
+
+defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
+ 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256
+>;
+
+defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
+ 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
+>;
} // mayLoad = 1
@@ -840,7 +854,9 @@ defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>;
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>;
-defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
+ [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>;
let isCommutable = 1 in {
@@ -1044,13 +1060,6 @@ def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
let isCodeGenOnly = 1, isPseudo = 1 in {
-def SET_M0 : InstSI <
- (outs SReg_32:$dst),
- (ins i32imm:$src0),
- "SET_M0 $dst, $src0",
- [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))]
->;
-
def LOAD_CONST : AMDGPUShaderInst <
(outs GPRF32:$dst),
(ins i32imm:$src),
@@ -1060,13 +1069,6 @@ def LOAD_CONST : AMDGPUShaderInst <
let usesCustomInserter = 1 in {
-def SI_INTERP : InstSI <
- (outs VReg_32:$dst),
- (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
- "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params",
- []
->;
-
def SI_WQM : InstSI <
(outs),
(ins),
@@ -1147,6 +1149,31 @@ def SI_KILL : InstSI <
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
// Uses = [EXEC], Defs = [EXEC]
+let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+
+def SI_INDIRECT_SRC : InstSI <
+ (outs VReg_32:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
+ "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off",
+ []
+>;
+
+class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
+ (outs rc:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val),
+ "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val",
+ []
+> {
+ let Constraints = "$src = $dst";
+}
+
+def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
+def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
+def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
+def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
+
+} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
+
} // end IsCodeGenOnly, isPseudo
def : Pat<
@@ -1255,22 +1282,83 @@ defm : SamplePatterns<VReg_128, v4i32>;
defm : SamplePatterns<VReg_256, v8i32>;
defm : SamplePatterns<VReg_512, v16i32>;
-def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>;
-def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>;
-def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>;
-def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>;
+/********** ============================================ **********/
+/********** Extraction, Insertion, Building and Casting **********/
+/********** ============================================ **********/
+
+foreach Index = 0-2 in {
+ def Extract_Element_v2i32_#Index : Extract_Element <
+ i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v2i32_#Index : Insert_Element <
+ i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v2f32_#Index : Extract_Element <
+ f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v2f32_#Index : Insert_Element <
+ f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-3 in {
+ def Extract_Element_v4i32_#Index : Extract_Element <
+ i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v4i32_#Index : Insert_Element <
+ i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>;
+ def Extract_Element_v4f32_#Index : Extract_Element <
+ f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v4f32_#Index : Insert_Element <
+ f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-7 in {
+ def Extract_Element_v8i32_#Index : Extract_Element <
+ i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v8i32_#Index : Insert_Element <
+ i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v8f32_#Index : Extract_Element <
+ f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v8f32_#Index : Insert_Element <
+ f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-15 in {
+ def Extract_Element_v16i32_#Index : Extract_Element <
+ i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v16i32_#Index : Insert_Element <
+ i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v16f32_#Index : Extract_Element <
+ f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v16f32_#Index : Insert_Element <
+ f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
-def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
-def : Vector_Build <v4i32, VReg_128, i32, VReg_32>;
+def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>;
+def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>;
+def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>;
def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
+def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>;
def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
+def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>;
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <i32, f32, VReg_32>;
@@ -1305,11 +1393,6 @@ def : Pat <
/********** ================== **********/
def : Pat <
- (i1 imm:$imm),
- (S_MOV_B64 imm:$imm)
->;
-
-def : Pat <
(i32 imm:$imm),
(V_MOV_B32_e32 imm:$imm)
>;
@@ -1320,13 +1403,8 @@ def : Pat <
>;
def : Pat <
- (i32 imm:$imm),
- (S_MOV_B32 imm:$imm)
->;
-
-def : Pat <
- (f32 fpimm:$imm),
- (S_MOV_B32 fpimm:$imm)
+ (i1 imm:$imm),
+ (S_MOV_B64 imm:$imm)
>;
def : Pat <
@@ -1347,58 +1425,16 @@ def : Pat <
/********** ===================== **********/
def : Pat <
- (int_SI_fs_interp_constant imm:$attr_chan, imm:$attr, SReg_32:$params),
- (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr,
- (S_MOV_B32 SReg_32:$params))
->;
-
-def : Pat <
- (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params),
- (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan,
- imm:$attr, SReg_32:$params)
+ (int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
+ (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params)
>;
def : Pat <
- (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
- (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan,
- imm:$attr, SReg_32:$params)
->;
-
-def : Pat <
- (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params),
- (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan,
- imm:$attr, SReg_32:$params)
->;
-
-def : Pat <
- (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
- (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan,
- imm:$attr, SReg_32:$params)
->;
-
-def : Pat <
- (int_SI_fs_read_face),
- (f32 FRONT_FACE)
->;
-
-def : Pat <
- (int_SI_fs_read_pos 0),
- (f32 POS_X_FLOAT)
->;
-
-def : Pat <
- (int_SI_fs_read_pos 1),
- (f32 POS_Y_FLOAT)
->;
-
-def : Pat <
- (int_SI_fs_read_pos 2),
- (f32 POS_Z_FLOAT)
->;
-
-def : Pat <
- (int_SI_fs_read_pos 3),
- (f32 POS_W_FLOAT)
+ (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij),
+ (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0),
+ imm:$attr_chan, imm:$attr, M0Reg:$params),
+ (EXTRACT_SUBREG VReg_64:$ij, sub1),
+ imm:$attr_chan, imm:$attr, M0Reg:$params)
>;
/********** ================== **********/
@@ -1455,6 +1491,24 @@ def : Pat <
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
>;
+// 1. Offset as 8bit DWORD immediate
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset),
+ (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset)
+>;
+
+// 2. Offset loaded in an 32bit SGPR
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, imm:$offset),
+ (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset))
+>;
+
+// 3. Offset in an 32Bit VGPR
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, VReg_32:$voff),
+ (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
+>;
+
/********** ================== **********/
/********** VOP3 Patterns **********/
/********** ================== **********/
@@ -1489,7 +1543,51 @@ multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>;
+defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+
+/********** ====================== **********/
+/********** Indirect adressing **********/
+/********** ====================== **********/
+
+multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
+ SI_INDIRECT_DST IndDst> {
+ // 1. Extract with offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
+ >;
+
+ // 2. Extract without offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
+ >;
+
+ // 3. Insert with offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
+ >;
+
+ // 4. Insert without offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
+ >;
+}
+
+defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
} // End isSI predicate
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 611b9c4..33bb815 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -16,13 +16,11 @@ let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
- /* XXX: We may need a seperate intrinsic here for loading integer values */
- def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
- def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
- def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
+ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>;
+ def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
def int_SI_wqm : Intrinsic <[], [], []>;
- class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrReadMem]>;
+ class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>;
def int_SI_sample : Sample;
def int_SI_sampleb : Sample;
@@ -30,17 +28,8 @@ let TargetPrefix = "SI", isTarget = 1 in {
/* Interpolation Intrinsics */
- def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>;
- class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
-
- def int_SI_fs_interp_linear_center : Interp;
- def int_SI_fs_interp_linear_centroid : Interp;
- def int_SI_fs_interp_persp_center : Interp;
- def int_SI_fs_interp_persp_centroid : Interp;
- def int_SI_fs_interp_constant : Interp;
-
- def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>;
- def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
+ def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrReadMem]>;
/* Control flow Intrinsics */
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index b215aa2..9a027e7 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -66,6 +66,7 @@ private:
static const unsigned SkipThreshold = 12;
static char ID;
+ const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
@@ -84,9 +85,14 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
+ void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
+ void IndirectSrc(MachineInstr &MI);
+ void IndirectDst(MachineInstr &MI);
+
public:
SILowerControlFlowPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
+ MachineFunctionPass(ID), TRI(tm.getRegisterInfo()),
+ TII(tm.getInstrInfo()) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -302,6 +308,104 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MI.eraseFromParent();
}
+void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock::iterator I = MI;
+
+ unsigned Save = MI.getOperand(1).getReg();
+ unsigned Idx = MI.getOperand(3).getReg();
+
+ if (AMDGPU::SReg_32RegClass.contains(Idx)) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(Idx);
+ MBB.insert(I, MovRel);
+ MI.eraseFromParent();
+ return;
+ }
+
+ assert(AMDGPU::SReg_64RegClass.contains(Save));
+ assert(AMDGPU::VReg_32RegClass.contains(Idx));
+
+ // Save the EXEC mask
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+ .addReg(AMDGPU::EXEC);
+
+ // Read the next variant into VCC (lower 32 bits) <- also loop target
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
+ .addReg(Idx);
+
+ // Move index from VCC into M0
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(AMDGPU::VCC);
+
+ // Compare the just read M0 value to all possible Idx values
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+ .addReg(AMDGPU::M0)
+ .addReg(Idx);
+
+ // Update EXEC, save the original EXEC value to VCC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+ .addReg(AMDGPU::VCC);
+
+ // Do the actual move
+ MBB.insert(I, MovRel);
+
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(AMDGPU::VCC);
+
+ // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(-7)
+ .addReg(AMDGPU::EXEC);
+
+ // Restore EXEC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addReg(Save);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Vec = MI.getOperand(2).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
+ .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Vec, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
+void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+ unsigned Val = MI.getOperand(5).getReg();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
+ .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
+ .addReg(Val)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Dst, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
bool HaveKill = false;
@@ -363,6 +467,17 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::S_BRANCH:
Branch(MI);
break;
+
+ case AMDGPU::SI_INDIRECT_SRC:
+ IndirectSrc(MI);
+ break;
+
+ case AMDGPU::SI_INDIRECT_DST_V2:
+ case AMDGPU::SI_INDIRECT_DST_V4:
+ case AMDGPU::SI_INDIRECT_DST_V8:
+ case AMDGPU::SI_INDIRECT_DST_V16:
+ IndirectDst(MI);
+ break;
}
}
}
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
index 7e59b42..1a4e4cb 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -10,11 +10,25 @@
#include "SIMachineFunctionInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
+const char *SIMachineFunctionInfo::ShaderTypeAttribute = "ShaderType";
+
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: MachineFunctionInfo(),
- SPIPSInputAddr(0),
- ShaderType(0)
- { }
+ ShaderType(0),
+ PSInputAddr(0) {
+
+ AttributeSet Set = MF.getFunction()->getAttributes();
+ Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
+ ShaderTypeAttribute);
+
+ if (A.isStringAttribute()) {
+ StringRef Str = A.getValueAsString();
+ if (Str.getAsInteger(0, ShaderType))
+ llvm_unreachable("Can't parse shader type!");
+ }
+}
diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
index 47271f5..91a809b 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/lib/Target/R600/SIMachineFunctionInfo.h
@@ -23,9 +23,11 @@ namespace llvm {
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo : public MachineFunctionInfo {
public:
+ static const char *ShaderTypeAttribute;
+
SIMachineFunctionInfo(const MachineFunction &MF);
- unsigned SPIPSInputAddr;
unsigned ShaderType;
+ unsigned PSInputAddr;
};
} // End namespace llvm
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 9e04e24..4f14931 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -34,32 +34,6 @@ foreach Index = 0-255 in {
}
}
-// virtual Interpolation registers
-def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
-def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
-def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
-def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
-def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
-def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
-def PERSP_I_W : SIReg <"PERSP_I_W">;
-def PERSP_J_W : SIReg <"PERSP_J_W">;
-def PERSP_1_W : SIReg <"PERSP_1_W">;
-def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
-def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
-def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
-def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
-def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
-def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
-def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
-def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
-def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
-def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
-def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
-def FRONT_FACE : SIReg <"FRONT_FACE">;
-def ANCILLARY : SIReg <"ANCILLARY">;
-def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
-def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
-
//===----------------------------------------------------------------------===//
// Groupings using register classes and tuples
//===----------------------------------------------------------------------===//
@@ -177,22 +151,22 @@ def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
(add SGPR_64, VCCReg, EXECReg)
>;
-def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v16i8], 128, (add SGPR_128)>;
-def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>;
+def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
-def SReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add SGPR_512)>;
+def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>;
// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>;
+def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
-def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
-def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
-def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
-def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
//===----------------------------------------------------------------------===//
// [SV]Src_* register classes, can have either an immediate or an register
@@ -200,28 +174,9 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
-def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>;
-
-def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
- (add VReg_32, SReg_32,
- PERSP_SAMPLE_I, PERSP_SAMPLE_J,
- PERSP_CENTER_I, PERSP_CENTER_J,
- PERSP_CENTROID_I, PERSP_CENTROID_J,
- PERSP_I_W, PERSP_J_W, PERSP_1_W,
- LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
- LINEAR_CENTER_I, LINEAR_CENTER_J,
- LINEAR_CENTROID_I, LINEAR_CENTROID_J,
- LINE_STIPPLE_TEX_COORD,
- POS_X_FLOAT,
- POS_Y_FLOAT,
- POS_Z_FLOAT,
- POS_W_FLOAT,
- FRONT_FACE,
- ANCILLARY,
- SAMPLE_COVERAGE,
- POS_FIXED_PT
- )
->;
+def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>;
+
+def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
-def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>;
+def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 138b92d..28ac02a 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -955,9 +955,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
// Get the condition flag.
SDValue CompareFlag;
if (LHS.getValueType() == MVT::i32) {
- std::vector<EVT> VTs;
- VTs.push_back(MVT::i32);
- VTs.push_back(MVT::Glue);
+ EVT VTs[] = { MVT::i32, MVT::Glue };
SDValue Ops[2] = { LHS, RHS };
CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
@@ -986,9 +984,8 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
SDValue CompareFlag;
if (LHS.getValueType() == MVT::i32) {
- std::vector<EVT> VTs;
- VTs.push_back(LHS.getValueType()); // subcc returns a value
- VTs.push_back(MVT::Glue);
+ // subcc returns a value
+ EVT VTs[] = { LHS.getValueType(), MVT::Glue };
SDValue Ops[2] = { LHS, RHS };
CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
Opc = SPISD::SELECT_ICC;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 357879b..b53a1ed 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -40,7 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index 3a9ace4..ee88ce7 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -24,6 +24,8 @@ void TargetLibraryInfo::anchor() { }
const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
{
+ "_IO_getc",
+ "_IO_putc",
"_ZdaPv",
"_ZdlPv",
"_Znaj",
@@ -38,8 +40,14 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"__cxa_guard_abort",
"__cxa_guard_acquire",
"__cxa_guard_release",
+ "__isoc99_scanf",
+ "__isoc99_sscanf",
"__memcpy_chk",
+ "__strdup",
+ "__strndup",
+ "__strtok_r",
"abs",
+ "access",
"acos",
"acosf",
"acosh",
@@ -61,6 +69,13 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"atanhf",
"atanhl",
"atanl",
+ "atof",
+ "atoi",
+ "atol",
+ "atoll",
+ "bcmp",
+ "bcopy",
+ "bzero",
"calloc",
"cbrt",
"cbrtf",
@@ -68,6 +83,10 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"ceil",
"ceilf",
"ceill",
+ "chmod",
+ "chown",
+ "clearerr",
+ "closedir",
"copysign",
"copysignf",
"copysignl",
@@ -77,6 +96,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"coshf",
"coshl",
"cosl",
+ "ctermid",
"exp",
"exp10",
"exp10f",
@@ -92,25 +112,66 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"fabs",
"fabsf",
"fabsl",
+ "fclose",
+ "fdopen",
+ "feof",
+ "ferror",
+ "fflush",
"ffs",
"ffsl",
"ffsll",
+ "fgetc",
+ "fgetpos",
+ "fgets",
+ "fileno",
"fiprintf",
+ "flockfile",
"floor",
"floorf",
"floorl",
"fmod",
"fmodf",
"fmodl",
+ "fopen",
+ "fopen64",
"fprintf",
"fputc",
"fputs",
+ "fread",
"free",
+ "frexp",
+ "frexpf",
+ "frexpl",
+ "fscanf",
+ "fseek",
+ "fseeko",
+ "fseeko64",
+ "fsetpos",
+ "fstat",
+ "fstat64",
+ "fstatvfs",
+ "fstatvfs64",
+ "ftell",
+ "ftello",
+ "ftello64",
+ "ftrylockfile",
+ "funlockfile",
"fwrite",
+ "getc",
+ "getc_unlocked",
+ "getchar",
+ "getenv",
+ "getitimer",
+ "getlogin_r",
+ "getpwnam",
+ "gets",
+ "htonl",
+ "htons",
"iprintf",
"isascii",
"isdigit",
"labs",
+ "lchown",
"llabs",
"log",
"log10",
@@ -127,31 +188,64 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"logbl",
"logf",
"logl",
+ "lstat",
+ "lstat64",
"malloc",
+ "memalign",
+ "memccpy",
"memchr",
"memcmp",
"memcpy",
"memmove",
+ "memrchr",
"memset",
"memset_pattern16",
+ "mkdir",
+ "mktime",
+ "modf",
+ "modff",
+ "modfl",
"nearbyint",
"nearbyintf",
"nearbyintl",
+ "ntohl",
+ "ntohs",
+ "open",
+ "open64",
+ "opendir",
+ "pclose",
+ "perror",
+ "popen",
"posix_memalign",
"pow",
"powf",
"powl",
+ "pread",
"printf",
+ "putc",
"putchar",
"puts",
+ "pwrite",
+ "qsort",
+ "read",
+ "readlink",
"realloc",
"reallocf",
+ "realpath",
+ "remove",
+ "rename",
+ "rewind",
"rint",
"rintf",
"rintl",
+ "rmdir",
"round",
"roundf",
"roundl",
+ "scanf",
+ "setbuf",
+ "setitimer",
+ "setvbuf",
"sin",
"sinf",
"sinh",
@@ -159,18 +253,28 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"sinhl",
"sinl",
"siprintf",
+ "snprintf",
"sprintf",
"sqrt",
"sqrtf",
"sqrtl",
+ "sscanf",
+ "stat",
+ "stat64",
+ "statvfs",
+ "statvfs64",
"stpcpy",
+ "stpncpy",
+ "strcasecmp",
"strcat",
"strchr",
"strcmp",
+ "strcoll",
"strcpy",
"strcspn",
"strdup",
"strlen",
+ "strncasecmp",
"strncat",
"strncmp",
"strncpy",
@@ -182,22 +286,43 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"strstr",
"strtod",
"strtof",
+ "strtok",
+ "strtok_r",
"strtol",
"strtold",
"strtoll",
"strtoul",
"strtoull",
+ "strxfrm",
+ "system",
"tan",
"tanf",
"tanh",
"tanhf",
"tanhl",
"tanl",
+ "times",
+ "tmpfile",
+ "tmpfile64",
"toascii",
"trunc",
"truncf",
"truncl",
- "valloc"
+ "uname",
+ "ungetc",
+ "unlink",
+ "unsetenv",
+ "utime",
+ "utimes",
+ "valloc",
+ "vfprintf",
+ "vfscanf",
+ "vprintf",
+ "vscanf",
+ "vsnprintf",
+ "vsprintf",
+ "vsscanf",
+ "write"
};
/// initialize - Initialize the set of available library functions based on the
@@ -259,7 +384,9 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
TLI.setUnavailable(LibFunc::fabsl);
TLI.setUnavailable(LibFunc::floorl);
TLI.setUnavailable(LibFunc::fmodl);
+ TLI.setUnavailable(LibFunc::frexpl);
TLI.setUnavailable(LibFunc::logl);
+ TLI.setUnavailable(LibFunc::modfl);
TLI.setUnavailable(LibFunc::powl);
TLI.setUnavailable(LibFunc::sinl);
TLI.setUnavailable(LibFunc::sinhl);
@@ -336,16 +463,67 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
TLI.setUnavailable(LibFunc::tanhf);
}
- // Win32 does *not* provide stpcpy. It is provided on POSIX systems:
- // http://pubs.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html
- TLI.setUnavailable(LibFunc::stpcpy);
-
- // Win32 does *not* provide ffs. It is provided on POSIX systems:
- // http://pubs.opengroup.org/onlinepubs/009695399/functions/ffs.html
+ // Win32 does *not* provide provide these functions, but they are
+ // generally available on POSIX-compliant systems:
+ TLI.setUnavailable(LibFunc::access);
+ TLI.setUnavailable(LibFunc::bcmp);
+ TLI.setUnavailable(LibFunc::bcopy);
+ TLI.setUnavailable(LibFunc::bzero);
+ TLI.setUnavailable(LibFunc::chmod);
+ TLI.setUnavailable(LibFunc::chown);
+ TLI.setUnavailable(LibFunc::closedir);
+ TLI.setUnavailable(LibFunc::ctermid);
+ TLI.setUnavailable(LibFunc::fdopen);
TLI.setUnavailable(LibFunc::ffs);
+ TLI.setUnavailable(LibFunc::fileno);
+ TLI.setUnavailable(LibFunc::flockfile);
+ TLI.setUnavailable(LibFunc::fseeko);
+ TLI.setUnavailable(LibFunc::fstat);
+ TLI.setUnavailable(LibFunc::fstatvfs);
+ TLI.setUnavailable(LibFunc::ftello);
+ TLI.setUnavailable(LibFunc::ftrylockfile);
+ TLI.setUnavailable(LibFunc::funlockfile);
+ TLI.setUnavailable(LibFunc::getc_unlocked);
+ TLI.setUnavailable(LibFunc::getitimer);
+ TLI.setUnavailable(LibFunc::getlogin_r);
+ TLI.setUnavailable(LibFunc::getpwnam);
+ TLI.setUnavailable(LibFunc::htonl);
+ TLI.setUnavailable(LibFunc::htons);
+ TLI.setUnavailable(LibFunc::lchown);
+ TLI.setUnavailable(LibFunc::lstat);
+ TLI.setUnavailable(LibFunc::memccpy);
+ TLI.setUnavailable(LibFunc::mkdir);
+ TLI.setUnavailable(LibFunc::ntohl);
+ TLI.setUnavailable(LibFunc::ntohs);
+ TLI.setUnavailable(LibFunc::open);
+ TLI.setUnavailable(LibFunc::opendir);
+ TLI.setUnavailable(LibFunc::pclose);
+ TLI.setUnavailable(LibFunc::popen);
+ TLI.setUnavailable(LibFunc::pread);
+ TLI.setUnavailable(LibFunc::pwrite);
+ TLI.setUnavailable(LibFunc::read);
+ TLI.setUnavailable(LibFunc::readlink);
+ TLI.setUnavailable(LibFunc::realpath);
+ TLI.setUnavailable(LibFunc::rmdir);
+ TLI.setUnavailable(LibFunc::setitimer);
+ TLI.setUnavailable(LibFunc::stat);
+ TLI.setUnavailable(LibFunc::statvfs);
+ TLI.setUnavailable(LibFunc::stpcpy);
+ TLI.setUnavailable(LibFunc::stpncpy);
+ TLI.setUnavailable(LibFunc::strcasecmp);
+ TLI.setUnavailable(LibFunc::strncasecmp);
+ TLI.setUnavailable(LibFunc::times);
+ TLI.setUnavailable(LibFunc::uname);
+ TLI.setUnavailable(LibFunc::unlink);
+ TLI.setUnavailable(LibFunc::unsetenv);
+ TLI.setUnavailable(LibFunc::utime);
+ TLI.setUnavailable(LibFunc::utimes);
+ TLI.setUnavailable(LibFunc::write);
- // Win32 does *not* provide llabs. It is defined in ISO/IEC 9899:1999,
- // but Visual C++ does not support it.
+ // Win32 does *not* provide provide these functions, but they are
+ // specified by C99:
+ TLI.setUnavailable(LibFunc::atoll);
+ TLI.setUnavailable(LibFunc::frexpf);
TLI.setUnavailable(LibFunc::llabs);
}
@@ -375,6 +553,27 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
default:
TLI.setUnavailable(LibFunc::ffsll);
}
+
+ // The following functions are available on at least Linux:
+ if (T.getOS() != Triple::Linux) {
+ TLI.setUnavailable(LibFunc::dunder_strdup);
+ TLI.setUnavailable(LibFunc::dunder_strtok_r);
+ TLI.setUnavailable(LibFunc::dunder_isoc99_scanf);
+ TLI.setUnavailable(LibFunc::dunder_isoc99_sscanf);
+ TLI.setUnavailable(LibFunc::under_IO_getc);
+ TLI.setUnavailable(LibFunc::under_IO_putc);
+ TLI.setUnavailable(LibFunc::memalign);
+ TLI.setUnavailable(LibFunc::fopen64);
+ TLI.setUnavailable(LibFunc::fseeko64);
+ TLI.setUnavailable(LibFunc::fstat64);
+ TLI.setUnavailable(LibFunc::fstatvfs64);
+ TLI.setUnavailable(LibFunc::ftello64);
+ TLI.setUnavailable(LibFunc::lstat64);
+ TLI.setUnavailable(LibFunc::open64);
+ TLI.setUnavailable(LibFunc::stat64);
+ TLI.setUnavailable(LibFunc::statvfs64);
+ TLI.setUnavailable(LibFunc::tmpfile64);
+ }
}
@@ -398,11 +597,40 @@ TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
CustomNames = TLI.CustomNames;
}
+namespace {
+struct StringComparator {
+ /// Compare two strings and return true if LHS is lexicographically less than
+ /// RHS. Requires that RHS doesn't contain any zero bytes.
+ bool operator()(const char *LHS, StringRef RHS) const {
+ // Compare prefixes with strncmp. If prefixes match we know that LHS is
+ // greater or equal to RHS as RHS can't contain any '\0'.
+ return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
+ }
+
+ // Provided for compatibility with MSVC's debug mode.
+ bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; }
+ bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; }
+ bool operator()(const char *LHS, const char *RHS) const {
+ return std::strcmp(LHS, RHS) < 0;
+ }
+};
+}
+
bool TargetLibraryInfo::getLibFunc(StringRef funcName,
LibFunc::Func &F) const {
const char **Start = &StandardNames[0];
const char **End = &StandardNames[LibFunc::NumLibFuncs];
- const char **I = std::lower_bound(Start, End, funcName);
+
+ // Filter out empty names and names containing null bytes, those can't be in
+ // our table.
+ if (funcName.empty() || funcName.find('\0') != StringRef::npos)
+ return false;
+
+ // Check for \01 prefix that is used to mangle __asm declarations and
+ // strip it if present.
+ if (funcName.front() == '\01')
+ funcName = funcName.substr(1);
+ const char **I = std::lower_bound(Start, End, funcName, StringComparator());
if (I != End && *I == funcName) {
F = (LibFunc::Func)(I - Start);
return true;
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 7d8b49c..e728251 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -61,6 +63,30 @@ TargetMachine::~TargetMachine() {
delete AsmInfo;
}
+/// \brief Reset the target options based on the function's attributes.
+void TargetMachine::resetTargetOptions(const MachineFunction *MF) const {
+ const Function *F = MF->getFunction();
+ TargetOptions &TO = MF->getTarget().Options;
+
+#define RESET_OPTION(X, Y) \
+ do { \
+ if (F->hasFnAttribute(Y)) \
+ TO.X = \
+ (F->getAttributes(). \
+ getAttribute(AttributeSet::FunctionIndex, \
+ Y).getValueAsString() == "true"); \
+ } while (0)
+
+ RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim");
+ RESET_OPTION(NoFramePointerElimNonLeaf, "no-frame-pointer-elim-non-leaf");
+ RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad");
+ RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
+ RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
+ RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
+ RESET_OPTION(UseSoftFloat, "use-soft-float");
+ RESET_OPTION(DisableTailCalls, "disable-tail-calls");
+}
+
/// getRelocationModel - Returns the code generation relocation model. The
/// choices are static, PIC, and dynamic-no-pic, and target default.
Reloc::Model TargetMachine::getRelocationModel() const {
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index 1e4c195..79f74bd 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -184,7 +184,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
}
if (TM->addPassesToEmitFile(pass, destf, ft)) {
- error = "No DataLayout in TargetMachine";
+ error = "TargetMachine can't emit a file of this type";
*ErrorMessage = strdup(error.c_str());
return true;
}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index b2c6d55..4ed5534a6 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -170,30 +170,35 @@ struct X86Operand : public MCParsedAsmOperand {
SMLoc OffsetOfLoc;
bool AddressOf;
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNo;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ bool NeedAsmRewrite;
+ };
+
+ struct MemOp {
+ unsigned SegReg;
+ const MCExpr *Disp;
+ unsigned BaseReg;
+ unsigned IndexReg;
+ unsigned Scale;
+ unsigned Size;
+ bool NeedSizeDir;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNo;
- } Reg;
-
- struct {
- const MCExpr *Val;
- bool NeedAsmRewrite;
- } Imm;
-
- struct {
- unsigned SegReg;
- const MCExpr *Disp;
- unsigned BaseReg;
- unsigned IndexReg;
- unsigned Scale;
- unsigned Size;
- bool NeedSizeDir;
- } Mem;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
};
X86Operand(KindTy K, SMLoc Start, SMLoc End)
@@ -1734,242 +1739,74 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
return false;
}
-bool X86AsmParser::
-processInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
- switch (Inst.getOpcode()) {
- default: return false;
- case X86::AND16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::AND32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::AND64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
+ bool isCmp) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode);
+ if (!isCmp)
+ TmpInst.addOperand(MCOperand::CreateReg(Reg));
+ TmpInst.addOperand(MCOperand::CreateReg(Reg));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
+bool X86AsmParser::
+processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
+ switch (Inst.getOpcode()) {
+ default: return false;
+ case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
+ case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
+ case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
+ case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
+ case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
+ case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
+ case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
+ case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
+ case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
+ case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
+ case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
+ case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
+ case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
+ case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
+ case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
+ case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
+ case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
+ case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
+ case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
+ case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
+ case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
+ case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
+ case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
+ case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
}
}
@@ -2080,7 +1917,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Check for the various suffix matches.
Tmp[Base.size()] = Suffixes[0];
unsigned ErrorInfoIgnore;
- unsigned ErrorInfoMissingFeature;
+ unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
unsigned Match1, Match2, Match3, Match4;
Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index acc90ec..598ddee 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -315,18 +315,18 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
- // Write an optimal sequence for the first 15 bytes.
- const uint64_t OptimalCount = (Count < 16) ? Count : 15;
- const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10;
- for (uint64_t i = 0, e = Prefixes; i != e; i++)
- OW->Write8(0x66);
- const uint64_t Rest = OptimalCount - Prefixes;
- for (uint64_t i = 0, e = Rest; i != e; i++)
- OW->Write8(Nops[Rest - 1][i]);
-
- // Finish with single byte nops.
- for (uint64_t i = OptimalCount, e = Count; i != e; ++i)
- OW->Write8(0x90);
+ // 15 is the longest single nop instruction. Emit as many 15-byte nops as
+ // needed, then emit a nop of the remaining length.
+ do {
+ const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t) 15);
+ const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
+ for (uint8_t i = 0; i < Prefixes; i++)
+ OW->Write8(0x66);
+ const uint8_t Rest = ThisNopLength - Prefixes;
+ for (uint8_t i = 0; i < Rest; i++)
+ OW->Write8(Nops[Rest - 1][i]);
+ Count -= ThisNopLength;
+ } while (Count != 0);
return true;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 122204a..5fbefae 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -446,6 +446,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
raw_ostream &OS) const {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
@@ -650,12 +651,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// dst(ModR/M), src1(ModR/M)
// dst(ModR/M), src1(ModR/M), imm8
//
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
CurOp++;
if (HasVEX_4V)
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ CurOp++;
+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
CurOp++;
@@ -666,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// MRMDestReg instructions forms:
// dst(ModR/M), src(ModR/M)
// dst(ModR/M), src(ModR/M), imm8
- if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
break;
case X86II::MRM0r: case X86II::MRM1r:
@@ -1038,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRMDestReg:
EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + 1;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ ++SrcRegNum;
+
EmitRegModRMByte(MI.getOperand(CurOp),
- GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
- CurOp += 2;
+ GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
+ CurOp = SrcRegNum + 1;
break;
case X86II::MRMDestMem:
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index ece38aa..2518e02 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -816,6 +816,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
const MCInstrDesc *Desc) const {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
@@ -1032,6 +1033,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
if (HasVEX_4V)
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ CurOp++;
+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
CurOp++;
@@ -1042,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
// MRMDestReg instructions forms:
// dst(ModR/M), src(ModR/M)
// dst(ModR/M), src(ModR/M), imm8
- if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
break;
case X86II::MRM0r: case X86II::MRM1r:
@@ -1279,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::MRMDestReg: {
MCE.emitByte(BaseOpcode);
+
+ unsigned SrcRegNum = CurOp+1;
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
- getX86RegNum(MI.getOperand(CurOp+1).getReg()));
- CurOp += 2;
+ getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
+ CurOp = SrcRegNum + 1;
break;
}
case X86II::MRMDestMem: {
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index b5c3270..85155f5 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1526,6 +1526,9 @@ bool X86FastISel::FastLowerArguments() {
if (!FuncInfo.CanLowerReturn)
return false;
+ if (Subtarget->isTargetWindows())
+ return false;
+
const Function *F = FuncInfo.Fn;
if (F->isVarArg())
return false;
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index a05cf5c..54cbd40 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1386,7 +1386,6 @@ HasNestArgument(const MachineFunction *MF) {
return false;
}
-
/// GetScratchRegister - Get a temp register for performing work in the
/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
/// and the properties of the function either one or two registers will be
@@ -1612,22 +1611,21 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
#endif
}
-// Erlang programs may need a special prologue to handle the stack size they
-// might need at runtime. That is because Erlang/OTP does not implement a C
-// stack but uses a custom implementation of hybrid stack/heap
-// architecture. (for more information see Eric Stenman's Ph.D. thesis:
-// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
-//
-//
-// CheckStack:
-// temp0 = sp - MaxStack
-// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
-// OldStart:
-// ...
-// IncStack:
-// call inc_stack # doubles the stack space
-// temp0 = sp - MaxStack
-// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+/// Erlang programs may need a special prologue to handle the stack size they
+/// might need at runtime. That is because Erlang/OTP does not implement a C
+/// stack but uses a custom implementation of hybrid stack/heap architecture.
+/// (for more information see Eric Stenman's Ph.D. thesis:
+/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
+///
+/// CheckStack:
+/// temp0 = sp - MaxStack
+/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+/// OldStart:
+/// ...
+/// IncStack:
+/// call inc_stack # doubles the stack space
+/// temp0 = sp - MaxStack
+/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
const X86InstrInfo &TII = *TM.getInstrInfo();
MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1c3b9ae..e6858bc 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -85,6 +85,11 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
* ElemsPerChunk);
+ // If the input is a buildvector just emit a smaller one.
+ if (Vec.getOpcode() == ISD::BUILD_VECTOR)
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
+ Vec->op_begin()+NormalizedIdxVal, ElemsPerChunk);
+
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
VecIdx);
@@ -181,9 +186,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
- // Bypass i32 with i8 on Atom when compiling with O2
- if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default)
+ // Bypass expensive divides on Atom when compiling with O2
+ if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
addBypassSlowDiv(32, 8);
+ if (Subtarget->is64Bit())
+ addBypassSlowDiv(64, 16);
+ }
if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
// Setup Windows compiler runtime calls.
@@ -368,7 +376,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
- setOperationAction(ISD::BR_CC , MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC , MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC , MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC , MVT::f80, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i8, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i16, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i64, Expand);
setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
@@ -4956,7 +4970,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(Opc, dl, ShVT, SrcOp,
DAG.getConstant(NumBits,
- TLI.getShiftAmountTy(SrcOp.getValueType()))));
+ TLI.getScalarShiftAmountTy(SrcOp.getValueType()))));
}
SDValue
@@ -7820,7 +7834,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
Chain.getValue(1));
}
- if (Subtarget->isTargetWindows()) {
+ if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) {
// Just use the implicit TLS architecture
// Need to generate someting similar to:
// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
@@ -7840,18 +7854,19 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = DAG.getEntryNode();
// Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
- // %gs:0x58 (64-bit).
+ // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
+ // use its literal value of 0x2C.
Value *Ptr = Constant::getNullValue(Subtarget->is64Bit()
? Type::getInt8PtrTy(*DAG.getContext(),
256)
: Type::getInt32PtrTy(*DAG.getContext(),
257));
- SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain,
- Subtarget->is64Bit()
- ? DAG.getIntPtrConstant(0x58)
- : DAG.getExternalSymbol("_tls_array",
- getPointerTy()),
+ SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) :
+ (Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) :
+ DAG.getExternalSymbol("_tls_array", getPointerTy()));
+
+ SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray,
MachinePointerInfo(Ptr),
false, false, false, 0);
@@ -12248,7 +12263,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::UINT_TO_FP: {
- if (N->getOperand(0).getValueType() != MVT::v2i32 &&
+ assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+ if (N->getOperand(0).getValueType() != MVT::v2i32 ||
N->getValueType(0) != MVT::v2f32)
return;
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
@@ -12890,13 +12906,16 @@ static unsigned getPseudoCMOVOpc(EVT VT) {
// to
//
// ...
-// EAX = LOAD MI.addr
+// t1 = LOAD MI.addr
// loop:
-// t1 = OP MI.val, EAX
-// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+// t4 = phi(t1, t3 / loop)
+// t2 = OP MI.val, t4
+// EAX = t4
+// LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined]
+// t3 = EAX
// JNE loop
// sink:
-// dst = EAX
+// dst = t3
// ...
MachineBasicBlock *
X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
@@ -12933,7 +12952,11 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
MVT::SimpleValueType VT = *RC->vt_begin();
- unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT);
+ unsigned t1 = MRI.createVirtualRegister(RC);
+ unsigned t2 = MRI.createVirtualRegister(RC);
+ unsigned t3 = MRI.createVirtualRegister(RC);
+ unsigned t4 = MRI.createVirtualRegister(RC);
+ unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT);
unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
unsigned LOADOpc = getLoadOpcode(VT);
@@ -12941,12 +12964,16 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
// For the atomic load-arith operator, we generate
//
// thisMBB:
- // EAX = LOAD [MI.addr]
+ // t1 = LOAD [MI.addr]
// mainMBB:
+ // t4 = phi(t1 / thisMBB, t3 / mainMBB)
// t1 = OP MI.val, EAX
+ // EAX = t4
// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+ // t3 = EAX
// JNE mainMBB
// sinkMBB:
+ // dst = t3
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
@@ -12962,23 +12989,34 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI->getOperand(MemOpndSlot + i));
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
+ for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
+ unsigned flags = (*MMOI)->getFlags();
+ flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
+ MachineMemOperand *MMO =
+ MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
+ (*MMOI)->getSize(),
+ (*MMOI)->getBaseAlignment(),
+ (*MMOI)->getTBAAInfo(),
+ (*MMOI)->getRanges());
+ MIB.addMemOperand(MMO);
+ }
thisMBB->addSuccessor(mainMBB);
// mainMBB:
MachineBasicBlock *origMainMBB = mainMBB;
- mainMBB->addLiveIn(AccPhyReg);
- // Copy AccPhyReg as it is used more than once.
- unsigned AccReg = MRI.createVirtualRegister(RC);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg)
- .addReg(AccPhyReg);
+ // Add a PHI.
+ MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4)
+ .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
- unsigned t1 = MRI.createVirtualRegister(RC);
unsigned Opc = MI->getOpcode();
switch (Opc) {
default:
@@ -12996,20 +13034,20 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
case X86::ATOMXOR32:
case X86::ATOMXOR64: {
unsigned ARITHOpc = getNonAtomicOpcode(Opc);
- BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg)
- .addReg(AccReg);
+ BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg)
+ .addReg(t4);
break;
}
case X86::ATOMNAND8:
case X86::ATOMNAND16:
case X86::ATOMNAND32:
case X86::ATOMNAND64: {
- unsigned t2 = MRI.createVirtualRegister(RC);
+ unsigned Tmp = MRI.createVirtualRegister(RC);
unsigned NOTOpc;
unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
- BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg)
- .addReg(AccReg);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2);
+ BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg)
+ .addReg(t4);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp);
break;
}
case X86::ATOMMAX8:
@@ -13033,20 +13071,22 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
BuildMI(mainMBB, DL, TII->get(CMPOpc))
.addReg(SrcReg)
- .addReg(AccReg);
+ .addReg(t4);
if (Subtarget->hasCMov()) {
if (VT != MVT::i8) {
// Native support
- BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1)
+ BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
.addReg(SrcReg)
- .addReg(AccReg);
+ .addReg(t4);
} else {
// Promote i8 to i32 to use CMOV32
- const TargetRegisterClass *RC32 = getRegClassFor(MVT::i32);
+ const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterClass *RC32 =
+ TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit);
unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
unsigned AccReg32 = MRI.createVirtualRegister(RC32);
- unsigned t2 = MRI.createVirtualRegister(RC32);
+ unsigned Tmp = MRI.createVirtualRegister(RC32);
unsigned Undef = MRI.createVirtualRegister(RC32);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
@@ -13057,15 +13097,15 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
.addImm(X86::sub_8bit);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
.addReg(Undef)
- .addReg(AccReg)
+ .addReg(t4)
.addImm(X86::sub_8bit);
- BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
+ BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp)
.addReg(SrcReg32)
.addReg(AccReg32);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t1)
- .addReg(t2, 0, X86::sub_8bit);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2)
+ .addReg(Tmp, 0, X86::sub_8bit);
}
} else {
// Use pseudo select and lower them.
@@ -13074,36 +13114,47 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
unsigned SelOpc = getPseudoCMOVOpc(VT);
X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
- MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1)
- .addReg(SrcReg).addReg(AccReg)
+ MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2)
+ .addReg(SrcReg).addReg(t4)
.addImm(CC);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ // Replace the original PHI node as mainMBB is changed after CMOV
+ // lowering.
+ BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4)
+ .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
+ Phi->eraseFromParent();
}
break;
}
}
- // Copy AccPhyReg back from virtual register.
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg)
- .addReg(AccReg);
+ // Copy PhyReg back from virtual register.
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg)
+ .addReg(t4);
MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI->getOperand(MemOpndSlot + i));
- MIB.addReg(t1);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
+ MIB.addReg(t2);
MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Copy PhyReg back to virtual register.
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3)
+ .addReg(PhyReg);
+
BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
mainMBB->addSuccessor(origMainMBB);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
- sinkMBB->addLiveIn(AccPhyReg);
-
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstReg)
- .addReg(AccPhyReg);
+ .addReg(t3);
MI->eraseFromParent();
return sinkMBB;
@@ -13120,15 +13171,24 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
// to
//
// ...
-// EAX = LOAD [MI.addr + 0]
-// EDX = LOAD [MI.addr + 4]
+// t1L = LOAD [MI.addr + 0]
+// t1H = LOAD [MI.addr + 4]
// loop:
-// EBX = OP MI.val.lo, EAX
-// ECX = OP MI.val.hi, EDX
+// t4L = phi(t1L, t3L / loop)
+// t4H = phi(t1H, t3H / loop)
+// t2L = OP MI.val.lo, t4L
+// t2H = OP MI.val.hi, t4H
+// EAX = t4L
+// EDX = t4H
+// EBX = t2L
+// ECX = t2H
// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
+// t3L = EAX
+// t3H = EDX
// JNE loop
// sink:
-// dst = EDX:EAX
+// dstL = t3L
+// dstH = t3H
// ...
MachineBasicBlock *
X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
@@ -13169,20 +13229,37 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
const TargetRegisterClass *RC = &X86::GR32RegClass;
const TargetRegisterClass *RC8 = &X86::GR8RegClass;
+ unsigned t1L = MRI.createVirtualRegister(RC);
+ unsigned t1H = MRI.createVirtualRegister(RC);
+ unsigned t2L = MRI.createVirtualRegister(RC);
+ unsigned t2H = MRI.createVirtualRegister(RC);
+ unsigned t3L = MRI.createVirtualRegister(RC);
+ unsigned t3H = MRI.createVirtualRegister(RC);
+ unsigned t4L = MRI.createVirtualRegister(RC);
+ unsigned t4H = MRI.createVirtualRegister(RC);
+
unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
unsigned LOADOpc = X86::MOV32rm;
// For the atomic load-arith operator, we generate
//
// thisMBB:
- // EAX = LOAD [MI.addr + 0]
- // EDX = LOAD [MI.addr + 4]
+ // t1L = LOAD [MI.addr + 0]
+ // t1H = LOAD [MI.addr + 4]
// mainMBB:
- // EBX = OP MI.vallo, EAX
- // ECX = OP MI.valhi, EDX
+ // t4L = phi(t1L / thisMBB, t3L / mainMBB)
+ // t4H = phi(t1H / thisMBB, t3H / mainMBB)
+ // t2L = OP MI.val.lo, t4L
+ // t2H = OP MI.val.hi, t4H
+ // EBX = t2L
+ // ECX = t2H
// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
- // JNE mainMBB
+ // t3L = EAX
+ // t3H = EDX
+ // JNE loop
// sinkMBB:
+ // dstL = t3L
+ // dstH = t3H
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
@@ -13199,35 +13276,50 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
// thisMBB:
// Lo
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI->getOperand(MemOpndSlot + i));
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
+ for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
+ unsigned flags = (*MMOI)->getFlags();
+ flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
+ MachineMemOperand *MMO =
+ MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
+ (*MMOI)->getSize(),
+ (*MMOI)->getBaseAlignment(),
+ (*MMOI)->getTBAAInfo(),
+ (*MMOI)->getRanges());
+ MIB.addMemOperand(MMO);
+ };
+ MachineInstr *LowMI = MIB;
+
// Hi
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX);
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- if (i == X86::AddrDisp)
+ if (i == X86::AddrDisp) {
MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
- else
- MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ } else {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end());
thisMBB->addSuccessor(mainMBB);
// mainMBB:
MachineBasicBlock *origMainMBB = mainMBB;
- mainMBB->addLiveIn(X86::EAX);
- mainMBB->addLiveIn(X86::EDX);
-
- // Copy EDX:EAX as they are used more than once.
- unsigned LoReg = MRI.createVirtualRegister(RC);
- unsigned HiReg = MRI.createVirtualRegister(RC);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX);
- unsigned t1L = MRI.createVirtualRegister(RC);
- unsigned t1H = MRI.createVirtualRegister(RC);
+ // Add PHIs.
+ MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L)
+ .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
+ MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H)
+ .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -13240,19 +13332,23 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
case X86::ATOMSUB6432: {
unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(LoReg).addReg(SrcLoReg);
- BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(HiReg).addReg(SrcHiReg);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L)
+ .addReg(SrcLoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H)
+ .addReg(SrcHiReg);
break;
}
case X86::ATOMNAND6432: {
unsigned HiOpc, NOTOpc;
unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
- unsigned t2L = MRI.createVirtualRegister(RC);
- unsigned t2H = MRI.createVirtualRegister(RC);
- BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg);
- BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H);
+ unsigned TmpL = MRI.createVirtualRegister(RC);
+ unsigned TmpH = MRI.createVirtualRegister(RC);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg)
+ .addReg(t4L);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg)
+ .addReg(t4H);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH);
break;
}
case X86::ATOMMAX6432:
@@ -13268,12 +13364,12 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
unsigned cc = MRI.createVirtualRegister(RC);
// cl := cmp src_lo, lo
BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
- .addReg(SrcLoReg).addReg(LoReg);
+ .addReg(SrcLoReg).addReg(t4L);
BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
// ch := cmp src_hi, hi
BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
- .addReg(SrcHiReg).addReg(HiReg);
+ .addReg(SrcHiReg).addReg(t4H);
BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
// cc := if (src_hi == hi) ? cl : ch;
@@ -13288,58 +13384,74 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
}
BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
if (Subtarget->hasCMov()) {
- BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L)
- .addReg(SrcLoReg).addReg(LoReg);
- BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H)
- .addReg(SrcHiReg).addReg(HiReg);
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L)
+ .addReg(SrcLoReg).addReg(t4L);
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H)
+ .addReg(SrcHiReg).addReg(t4H);
} else {
- MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L)
- .addReg(SrcLoReg).addReg(LoReg)
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L)
+ .addReg(SrcLoReg).addReg(t4L)
.addImm(X86::COND_NE);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
- MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H)
- .addReg(SrcHiReg).addReg(HiReg)
+ // As the lowered CMOV won't clobber EFLAGS, we could reuse it for the
+ // 2nd CMOV lowering.
+ mainMBB->addLiveIn(X86::EFLAGS);
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H)
+ .addReg(SrcHiReg).addReg(t4H)
.addImm(X86::COND_NE);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ // Replace the original PHI node as mainMBB is changed after CMOV
+ // lowering.
+ BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L)
+ .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
+ BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H)
+ .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
+ PhiL->eraseFromParent();
+ PhiH->eraseFromParent();
}
break;
}
case X86::ATOMSWAP6432: {
unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg);
- BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg);
break;
}
}
// Copy EDX:EAX back from HiReg:LoReg
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H);
// Copy ECX:EBX from t1H:t1L
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H);
MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Copy EDX:EAX back to t3H:t3L
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX);
+
BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
mainMBB->addSuccessor(origMainMBB);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
- sinkMBB->addLiveIn(X86::EAX);
- sinkMBB->addLiveIn(X86::EDX);
-
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstLoReg)
- .addReg(X86::EAX);
+ .addReg(t3L);
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstHiReg)
- .addReg(X86::EDX);
+ .addReg(t3H);
MI->eraseFromParent();
return sinkMBB;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 958ceb0..da1dad0 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -471,7 +471,7 @@ namespace llvm {
virtual unsigned getJumpTableEncoding() const;
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
virtual const MCExpr *
LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index d86a406..f406416 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -14,7 +14,7 @@
//===----------------------------------------------------------------------===//
// LEA - Load Effective Address
-
+let SchedRW = [WriteLEA] in {
let neverHasSideEffects = 1 in
def LEA16r : I<0x8D, MRMSrcMem,
(outs GR16:$dst), (ins i32mem:$src),
@@ -36,41 +36,52 @@ let isReMaterializable = 1 in
def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
"lea{q}\t{$src|$dst}, {$dst|$src}",
[(set GR64:$dst, lea64addr:$src)], IIC_LEA>;
-
-
+} // SchedRW
//===----------------------------------------------------------------------===//
// Fixed-Register Multiplication and Division Instructions.
//
+// SchedModel info for instruction that loads one value and gets the second
+// (and possibly third) value from a register.
+// This is used for instructions that put the memory operands before other
+// uses.
+class SchedLoadReg<SchedWrite SW> : Sched<[SW,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Register reads (implicit or explicit).
+ ReadAfterLd, ReadAfterLd]>;
+
// Extra precision multiplication
// AL is really implied by AX, but the registers in Defs must match the
// SDNode results (i8, i32).
+// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
// FIXME: Used for 8-bit mul, ignore result upper 8 bits.
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8
-
+ (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
"mul{w}\t$src",
- [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16
-
+ [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
- "mul{l}\t$src", // EAX,EDX = EAX*GR32
+ "mul{l}\t$src",
[/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/],
- IIC_MUL32_REG>;
+ IIC_MUL32_REG>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
- "mul{q}\t$src", // RAX,RDX = RAX*GR64
+ "mul{q}\t$src",
[/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/],
- IIC_MUL64>;
-
+ IIC_MUL64>, Sched<[WriteIMul]>;
+// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
"mul{b}\t$src",
@@ -78,51 +89,60 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8]
-
+ (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
let mayLoad = 1, neverHasSideEffects = 1 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
"mul{w}\t$src",
- [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16]
-
+ [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
"mul{l}\t$src",
- [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32]
+ [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
- "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64]
+ "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>;
}
let neverHasSideEffects = 1 in {
+// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [],
- IIC_IMUL8>; // AL,AH = AL*GR8
+ IIC_IMUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [],
- IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16
+ IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [],
- IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32
+ IIC_IMUL32_RR>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [],
- IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64
+ IIC_IMUL64_RR>, Sched<[WriteIMul]>;
let mayLoad = 1 in {
+// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8]
+ "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize;
- // AX,DX = AX*[mem16]
+ "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize,
+ SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32]
+ "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64]
+ "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>;
}
} // neverHasSideEffects
@@ -130,7 +150,8 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
+let isCommutable = 1, SchedRW = [WriteIMul] in {
+// X = IMUL Y, Z --> X = IMUL Z, Y
// Register-Register Signed Integer Multiply
def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -148,9 +169,10 @@ def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>,
TB;
-}
+} // isCommutable, SchedRW
// Register-Memory Signed Integer Multiply
+let SchedRW = [WriteIMulLd, ReadAfterLd] in {
def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -172,12 +194,14 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
(X86smul_flag GR64:$src1, (load addr:$src2)))],
IIC_IMUL64_RM>,
TB;
+} // SchedRW
} // Constraints = "$src1 = $dst"
} // Defs = [EFLAGS]
// Surprisingly enough, these are not two address instructions!
let Defs = [EFLAGS] in {
+let SchedRW = [WriteIMul] in {
// Register-Integer Signed Integer Multiply
def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
@@ -216,9 +240,10 @@ def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, i64immSExt8:$src2))],
IIC_IMUL64_RRI>;
-
+} // SchedRW
// Memory-Integer Signed Integer Multiply
+let SchedRW = [WriteIMulLd] in {
def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
(outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -260,6 +285,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(X86smul_flag (load addr:$src1),
i64immSExt8:$src2))],
IIC_IMUL64_RMI>;
+} // SchedRW
} // Defs = [EFLAGS]
@@ -267,6 +293,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
// unsigned division/remainder
let hasSideEffects = 1 in { // so that we don't speculatively execute
+let SchedRW = [WriteIDiv] in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"div{b}\t$src", [], IIC_DIV8_REG>;
@@ -280,24 +307,30 @@ def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
"div{q}\t$src", [], IIC_DIV64>;
+} // SchedRW
let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", [], IIC_DIV8_MEM>;
+ "div{b}\t$src", [], IIC_DIV8_MEM>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", [], IIC_DIV16>, OpSize;
+ "div{w}\t$src", [], IIC_DIV16>, OpSize,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", [], IIC_DIV32>;
+ "div{l}\t$src", [], IIC_DIV32>,
+ SchedLoadReg<WriteIDivLd>;
// RDX:RAX/[mem64] = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
- "div{q}\t$src", [], IIC_DIV64>;
+ "div{q}\t$src", [], IIC_DIV64>,
+ SchedLoadReg<WriteIDivLd>;
}
// Signed division/remainder.
+let SchedRW = [WriteIDiv] in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"idiv{b}\t$src", [], IIC_IDIV8>;
@@ -311,20 +344,25 @@ def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
"idiv{q}\t$src", [], IIC_IDIV64>;
+} // SchedRW
let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", [], IIC_IDIV8>;
+ "idiv{b}\t$src", [], IIC_IDIV8>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", [], IIC_IDIV16>, OpSize;
+ "idiv{w}\t$src", [], IIC_IDIV16>, OpSize,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- "idiv{l}\t$src", [], IIC_IDIV32>;
+ "idiv{l}\t$src", [], IIC_IDIV32>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
- "idiv{q}\t$src", [], IIC_IDIV64>;
+ "idiv{q}\t$src", [], IIC_IDIV64>,
+ SchedLoadReg<WriteIDivLd>;
}
} // hasSideEffects = 0
@@ -335,7 +373,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
// unary instructions
let CodeSize = 2 in {
let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
"neg{b}\t$dst",
[(set GR8:$dst, (ineg GR8:$src1)),
@@ -351,8 +389,10 @@ def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
[(set GR64:$dst, (ineg GR64:$src1)),
(implicit EFLAGS)], IIC_UNARY_REG>;
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
+// Read-modify-write negate.
+let SchedRW = [WriteALULd, WriteRMW] in {
def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
"neg{b}\t$dst",
[(store (ineg (loadi8 addr:$dst)), addr:$dst),
@@ -368,12 +408,13 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
[(store (ineg (loadi64 addr:$dst)), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
+} // SchedRW
} // Defs = [EFLAGS]
// Note: NOT does not set EFLAGS!
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
// Match xor -1 to not. Favors these over a move imm + xor to save code size.
let AddedComplexity = 15 in {
def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -388,8 +429,9 @@ def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
[(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>;
}
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
+let SchedRW = [WriteALULd, WriteRMW] in {
def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
"not{b}\t$dst",
[(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
@@ -402,11 +444,12 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
[(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
[(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
+} // SchedRW
} // CodeSize
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"inc{b}\t$dst",
@@ -454,9 +497,9 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
Requires<[In64BitMode]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
[(store (add (loadi8 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -491,9 +534,9 @@ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
[(store (add (loadi32 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>,
Requires<[In64BitMode]>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"dec{b}\t$dst",
@@ -514,10 +557,10 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
[(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))],
IIC_UNARY_REG>;
} // CodeSize = 2
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
[(store (add (loadi8 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -532,7 +575,7 @@ let CodeSize = 2 in {
def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
[(store (add (loadi64 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
} // Defs = [EFLAGS]
@@ -646,7 +689,8 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f = MRMDestReg>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
+ Sched<[WriteALU]>;
// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
// just a regclass (no eflags) as a result.
@@ -689,7 +733,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo,
(outs typeinfo.RegClass:$dst),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
let hasSideEffects = 0;
@@ -699,7 +744,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo, (outs),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
let hasSideEffects = 0;
@@ -710,7 +756,8 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
dag outlist, list<dag> pattern>
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALULd, ReadAfterLd]>;
// BinOpRM_R - Instructions like "add reg, reg, [mem]".
class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -746,7 +793,8 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -783,7 +831,8 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -821,7 +870,8 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
list<dag> pattern>
: ITy<opcode, MRMDestMem, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>;
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]>;
// BinOpMR_RMW - Instructions like "add [mem], reg".
class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -849,7 +899,8 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
Format f, list<dag> pattern, bits<8> opcode = 0x80>
: ITy<opcode, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -1210,11 +1261,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))],
- IIC_BIN_NONMEM>;
+ IIC_BIN_NONMEM>, Sched<[WriteALU]>;
def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
- (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>;
+ (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>,
+ Sched<[WriteALULd, ReadAfterLd]>;
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 0979752..105963f 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -16,6 +16,8 @@
class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> {
InstrItinClass rr = arg_rr;
InstrItinClass rm = arg_rm;
+ // InstrSchedModel info.
+ X86FoldableSchedWrite Sched = WriteFAdd;
}
class SizeItins<OpndItins arg_s, OpndItins arg_d> {
@@ -45,6 +47,7 @@ def SSE_ALU_ITINS_S : SizeItins<
SSE_ALU_F32S, SSE_ALU_F64S
>;
+let Sched = WriteFMul in {
def SSE_MUL_F32S : OpndItins<
IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM
>;
@@ -52,11 +55,13 @@ def SSE_MUL_F32S : OpndItins<
def SSE_MUL_F64S : OpndItins<
IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM
>;
+}
def SSE_MUL_ITINS_S : SizeItins<
SSE_MUL_F32S, SSE_MUL_F64S
>;
+let Sched = WriteFDiv in {
def SSE_DIV_F32S : OpndItins<
IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM
>;
@@ -64,6 +69,7 @@ def SSE_DIV_F32S : OpndItins<
def SSE_DIV_F64S : OpndItins<
IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM
>;
+}
def SSE_DIV_ITINS_S : SizeItins<
SSE_DIV_F32S, SSE_DIV_F64S
@@ -82,6 +88,7 @@ def SSE_ALU_ITINS_P : SizeItins<
SSE_ALU_F32P, SSE_ALU_F64P
>;
+let Sched = WriteFMul in {
def SSE_MUL_F32P : OpndItins<
IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM
>;
@@ -89,11 +96,13 @@ def SSE_MUL_F32P : OpndItins<
def SSE_MUL_F64P : OpndItins<
IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM
>;
+}
def SSE_MUL_ITINS_P : SizeItins<
SSE_MUL_F32P, SSE_MUL_F64P
>;
+let Sched = WriteFDiv in {
def SSE_DIV_F32P : OpndItins<
IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM
>;
@@ -101,6 +110,7 @@ def SSE_DIV_F32P : OpndItins<
def SSE_DIV_F64P : OpndItins<
IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM
>;
+}
def SSE_DIV_ITINS_P : SizeItins<
SSE_DIV_F32P, SSE_DIV_F64P
@@ -110,6 +120,7 @@ def SSE_BIT_ITINS_P : OpndItins<
IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM
>;
+let Sched = WriteVecALU in {
def SSE_INTALU_ITINS_P : OpndItins<
IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
>;
@@ -117,7 +128,9 @@ def SSE_INTALU_ITINS_P : OpndItins<
def SSE_INTALUQ_ITINS_P : OpndItins<
IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM
>;
+}
+let Sched = WriteVecIMul in
def SSE_INTMUL_ITINS_P : OpndItins<
IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM
>;
@@ -148,13 +161,15 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
}
def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>;
+ [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
@@ -189,14 +204,16 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
+ Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
- itins.rm, d>;
+ itins.rm, d>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
@@ -209,12 +226,14 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rr, IIC_DEFAULT, d>;
+ pat_rr, IIC_DEFAULT, d>,
+ Sched<[WriteVecLogic]>;
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rm, IIC_DEFAULT, d>;
+ pat_rm, IIC_DEFAULT, d>,
+ Sched<[WriteVecLogicLd, ReadAfterLd]>;
}
//===----------------------------------------------------------------------===//
@@ -444,7 +463,7 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
!strconcat(base_opc, asm_opr),
[(set VR128:$dst, (vt (OpNode VR128:$src1,
(scalar_to_vector RC:$src2))))],
- IIC_SSE_MOV_S_RR>;
+ IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
// For the disassembler
let isCodeGenOnly = 1, hasSideEffects = 0 in
@@ -464,7 +483,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
- VEX, VEX_LIG;
+ VEX, VEX_LIG, Sched<[WriteStore]>;
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
@@ -473,7 +492,8 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ Sched<[WriteStore]>;
}
// Loading from memory automatically zeroing upper bits.
@@ -482,11 +502,11 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM>, VEX, VEX_LIG;
+ IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM>;
+ IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
}
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
@@ -745,11 +765,13 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
bit IsReMaterializable = 1> {
let neverHasSideEffects = 1 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>;
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>,
+ Sched<[WriteMove]>;
let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>;
+ [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>,
+ Sched<[WriteLoad]>;
}
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
@@ -790,6 +812,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
TB, OpSize;
+let SchedRW = [WriteStore] in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -822,6 +845,7 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)],
IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
+} // SchedRW
// For disassembler
let isCodeGenOnly = 1, hasSideEffects = 0 in {
@@ -880,6 +904,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
(VMOVUPDYmr addr:$dst, VR256:$src)>;
+let SchedRW = [WriteStore] in {
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -896,6 +921,7 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)],
IIC_SSE_MOVU_P_MR>;
+} // SchedRW
// For disassembler
let isCodeGenOnly = 1, hasSideEffects = 0 in {
@@ -1009,7 +1035,7 @@ let Predicates = [HasAVX] in {
(VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
def : Pat<(store (v8i16 (extract_subvector
(v16i16 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
def : Pat<(store (v16i8 (extract_subvector
(v32i8 VR256:$src), (iPTR 0))), addr:$dst),
(VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
@@ -1095,14 +1121,16 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
[(set VR128:$dst,
(psnode VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
- itin, SSEPackedSingle>, TB;
+ itin, SSEPackedSingle>, TB,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
def PDrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "d", asm_opr),
[(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
- itin, SSEPackedDouble>, TB, OpSize;
+ itin, SSEPackedDouble>, TB, OpSize,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
@@ -1123,6 +1151,7 @@ let AddedComplexity = 20 in {
IIC_SSE_MOV_LH>;
}
+let SchedRW = [WriteStore] in {
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -1143,6 +1172,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (vector_extract (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>;
+} // SchedRW
let Predicates = [HasAVX] in {
// Shuffle with VMOVLPS
@@ -1222,6 +1252,7 @@ let AddedComplexity = 20 in {
IIC_SSE_MOV_LH>;
}
+let SchedRW = [WriteStore] in {
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -1246,6 +1277,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (vector_extract
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
+} // SchedRW
let Predicates = [HasAVX] in {
// VMOVHPS patterns
@@ -1296,14 +1328,14 @@ let AddedComplexity = 20 in {
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V;
+ VEX_4V, Sched<[WriteShuffle]>;
def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V;
+ VEX_4V, Sched<[WriteShuffle]>;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
@@ -1311,13 +1343,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
"movlhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
}
let Predicates = [HasAVX] in {
@@ -1352,22 +1384,27 @@ def SSE_CVT_PD : OpndItins<
IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM
>;
+let Sched = WriteCvtI2F in
def SSE_CVT_PS : OpndItins<
IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM
>;
+let Sched = WriteCvtI2F in
def SSE_CVT_Scalar : OpndItins<
IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SS2SI_32 : OpndItins<
IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SS2SI_64 : OpndItins<
IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SD2SI : OpndItins<
IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM
>;
@@ -1377,10 +1414,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[(set DstRC:$dst, (OpNode SrcRC:$src))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded]>;
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -1388,10 +1425,10 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
OpndItins itins> {
let neverHasSideEffects = 1 in {
def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [], itins.rr, d>;
+ [], itins.rr, d>, Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [], itins.rm, d>;
+ [], itins.rm, d>, Sched<[itins.Sched.Folded]>;
}
}
@@ -1534,10 +1571,12 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
+ [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>;
+ [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -1549,14 +1588,14 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
@@ -2193,12 +2232,13 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
def rr : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
(ld_frag addr:$src2), imm:$cc))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
@@ -2241,12 +2281,14 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
(ins VR128:$src1, VR128:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
VR128:$src, imm:$cc))],
- itins.rr>;
+ itins.rr>,
+ Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
(load addr:$src), imm:$cc))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
// Aliases to match intrinsics which expect XMM operand(s).
@@ -2276,12 +2318,14 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
- IIC_SSE_COMIS_RR, d>;
+ IIC_SSE_COMIS_RR, d>,
+ Sched<[WriteFAdd]>;
def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
(ld_frag addr:$src2)))],
- IIC_SSE_COMIS_RM, d>;
+ IIC_SSE_COMIS_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
}
let Defs = [EFLAGS] in {
@@ -2338,11 +2382,13 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
- IIC_SSE_CMPP_RR, d>;
+ IIC_SSE_CMPP_RR, d>,
+ Sched<[WriteFAdd]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
- IIC_SSE_CMPP_RM, d>;
+ IIC_SSE_CMPP_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
@@ -2427,12 +2473,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
- (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
- (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteShuffle]>;
}
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2516,13 +2564,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
(outs RC:$dst), (ins RC:$src1, RC:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1, RC:$src2)))],
- IIC_SSE_UNPCK, d>;
+ IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>;
def rm : PI<opc, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1,
(mem_frag addr:$src2))))],
- IIC_SSE_UNPCK, d>;
+ IIC_SSE_UNPCK, d>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
@@ -2613,10 +2662,11 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
Domain d> {
def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>;
+ [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
+ Sched<[WriteVecLogic]>;
def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
- IIC_SSE_MOVMSK, d>, REX_W;
+ IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>;
}
let Predicates = [HasAVX] in {
@@ -2693,7 +2743,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>;
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
@@ -2701,7 +2752,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1,
(bitconvert (memop_frag addr:$src2)))))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
} // ExeDomain = SSEPackedInt
@@ -2967,6 +3019,7 @@ let isCodeGenOnly = 1 in {
///
/// And, we have a special variant form for a full-vector intrinsic form.
+let Sched = WriteFSqrt in {
def SSE_SQRTP : OpndItins<
IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM
>;
@@ -2974,7 +3027,9 @@ def SSE_SQRTP : OpndItins<
def SSE_SQRTS : OpndItins<
IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM
>;
+}
+let Sched = WriteFRcp in {
def SSE_RCPP : OpndItins<
IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
>;
@@ -2982,6 +3037,7 @@ def SSE_RCPP : OpndItins<
def SSE_RCPS : OpndItins<
IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM
>;
+}
/// sse1_fp_unop_s - SSE1 unops in scalar form.
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
@@ -2991,24 +3047,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
(ins FR32:$src1, FR32:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
let mayLoad = 1 in {
def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1,f32mem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]>;
+ [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
// For scalar unary operations, fold a load into the operation
// only in OptForSize mode. It eliminates an instruction, but it also
// eliminates a whole-register clobber (the load), so it introduces a
@@ -3016,13 +3074,15 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[UseSSE1, OptForSize]>;
+ Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>;
+ [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>;
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
@@ -3033,24 +3093,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
(ins FR32:$src1, FR32:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
let mayLoad = 1 in {
def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1,f32mem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]>;
+ [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
// For scalar unary operations, fold a load into the operation
// only in OptForSize mode. It eliminates an instruction, but it also
// eliminates a whole-register clobber (the load), so it introduces a
@@ -3058,17 +3120,17 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[UseSSE1, OptForSize]>;
+ Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
let Constraints = "$src1 = $dst" in {
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rr>;
+ [], itins.rr>, Sched<[itins.Sched]>;
let mayLoad = 1, hasSideEffects = 0 in
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rm>;
+ [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -3080,30 +3142,32 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
- itins.rr>, VEX;
+ itins.rr>, VEX, Sched<[itins.Sched]>;
def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))],
- itins.rm>, VEX;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
- itins.rr>, VEX, VEX_L;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
- itins.rm>, VEX, VEX_L;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>,
+ Sched<[itins.Sched]>;
def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>;
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
@@ -3115,33 +3179,33 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int VR128:$src))],
- itins.rr>, VEX;
+ itins.rr>, VEX, Sched<[itins.Sched]>;
def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
- itins.rm>, VEX;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V8F32Int VR256:$src))],
- itins.rr>, VEX, VEX_L;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst),
(ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))],
- itins.rm>, VEX, VEX_L;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int VR128:$src))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded]>;
}
/// sse2_fp_unop_s - SSE2 unops in scalar form.
@@ -3152,35 +3216,40 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
(ins FR64:$src1, FR64:$src2),
!strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
let mayLoad = 1 in {
def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1,f64mem:$src2),
!strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, sdmem:$src2),
!strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>;
+ [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
// See the comments in sse1_fp_unop_s for why this is OptForSize.
def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
- Requires<[UseSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>;
+ [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>;
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse2_fp_unop_p - SSE2 unops in vector forms.
@@ -3191,30 +3260,32 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
- itins.rr>, VEX;
+ itins.rr>, VEX, Sched<[itins.Sched]>;
def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))],
- itins.rm>, VEX;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
- itins.rr>, VEX, VEX_L;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
- itins.rm>, VEX, VEX_L;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>;
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>,
+ Sched<[itins.Sched]>;
def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>;
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
// Square root.
@@ -3305,52 +3376,48 @@ let Predicates = [UseSSE1] in {
//===----------------------------------------------------------------------===//
let AddedComplexity = 400 in { // Prefer non-temporal versions
- def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
- def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
-
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2i64 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
-
- def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
-
- def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f32 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
- def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4i64 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
-}
+let SchedRW = [WriteStore] in {
+def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2i64 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+
+def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
+def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4i64 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
-let AddedComplexity = 400 in { // Prefer non-temporal versions
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)],
@@ -3366,9 +3433,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)],
IIC_SSE_MOVNT>;
-def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
-
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti{l}\t{$src, $dst|$dst, $src}",
@@ -3380,7 +3444,14 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
[(nontemporalstore (i64 GR64:$src), addr:$dst)],
IIC_SSE_MOVNT>,
TB, Requires<[HasSSE2]>;
-}
+} // SchedRW = [WriteStore]
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
+} // AddedComplexity
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Prefetch and memory fence
@@ -3450,7 +3521,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
let ExeDomain = SSEPackedInt in { // SSE integer instructions
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
VEX;
@@ -3466,7 +3537,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
}
// For Disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>,
@@ -3484,7 +3555,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
}
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- neverHasSideEffects = 1 in {
+ neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
VEX;
@@ -3501,7 +3572,7 @@ let Predicates = [HasAVX] in {
}
}
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
@@ -3520,6 +3591,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
}
}
+let SchedRW = [WriteMove] in {
let neverHasSideEffects = 1 in
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
@@ -3538,9 +3610,10 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
}
+} // SchedRW
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- neverHasSideEffects = 1 in {
+ neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
@@ -3552,7 +3625,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
XS, Requires<[UseSSE2]>;
}
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
@@ -3580,6 +3653,7 @@ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
// SSE2 - Packed Integer Arithmetic Instructions
//===---------------------------------------------------------------------===//
+let Sched = WriteVecIMul in
def SSE_PMADD : OpndItins<
IIC_SSE_PMADD, IIC_SSE_PMADD
>;
@@ -3598,14 +3672,15 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
@@ -3639,20 +3714,22 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
- itins.rr>;
+ itins.rr>, Sched<[WriteVecShift]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode RC:$src1,
- (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>;
+ (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>,
+ Sched<[WriteVecShiftLd, ReadAfterLd]>;
def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
(ins RC:$src1, i32i8imm:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>;
+ [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>,
+ Sched<[WriteVecShift]>;
}
/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
@@ -3667,14 +3744,16 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>;
+ [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
- (bitconvert (memop_frag addr:$src2)))))]>;
+ (bitconvert (memop_frag addr:$src2)))))]>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
} // ExeDomain = SSEPackedInt
@@ -3779,7 +3858,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR128, v4i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 128-bit logical shifts.
def VPSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3825,7 +3904,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR256, v8i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 256-bit logical shifts.
def VPSLLDQYri : PDIi8<0x73, MRM7r,
(outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
@@ -3871,7 +3950,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
VR128, v4i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P>;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 128-bit logical shifts.
def PSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3966,14 +4045,15 @@ let Predicates = [HasAVX] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX;
+ IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX;
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX2] in {
@@ -3983,14 +4063,15 @@ let Predicates = [HasAVX2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX, VEX_L;
+ IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L;
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [UseSSE2] in {
@@ -4000,14 +4081,15 @@ let Predicates = [UseSSE2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>;
+ IIC_SSE_PSHUF>, Sched<[WriteShuffle]>;
def mi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>;
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>,
+ Sched<[WriteShuffleLd]>;
}
}
} // ExeDomain = SSEPackedInt
@@ -4043,7 +4125,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))],
- IIC_SSE_UNPCK>;
+ IIC_SSE_UNPCK>, Sched<[WriteShuffle]>;
def rm : PDI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -4052,7 +4134,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
[(set VR128:$dst, (OpNode VR128:$src1,
(bc_frag (memopv2i64
addr:$src2))))],
- IIC_SSE_UNPCK>;
+ IIC_SSE_UNPCK>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
@@ -4060,12 +4143,14 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
def Yrr : PDI<opc, MRMSrcReg,
(outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>;
+ [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>,
+ Sched<[WriteShuffle]>;
def Yrm : PDI<opc, MRMSrcMem,
(outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (OpNode VR256:$src1,
- (bc_frag (memopv4i64 addr:$src2))))]>;
+ (bc_frag (memopv4i64 addr:$src2))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -4142,7 +4227,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>;
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>,
+ Sched<[WriteShuffle]>;
def rmi : Ii8<0xC4, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
i16mem:$src2, i32i8imm:$src3),
@@ -4151,7 +4237,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
- imm:$src3))], IIC_SSE_PINSRW>;
+ imm:$src3))], IIC_SSE_PINSRW>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
// Extract
@@ -4160,12 +4247,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))]>, TB, OpSize, VEX;
+ imm:$src2))]>, TB, OpSize, VEX,
+ Sched<[WriteShuffle]>;
def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))], IIC_SSE_PEXTRW>;
+ imm:$src2))], IIC_SSE_PEXTRW>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
// Insert
let Predicates = [HasAVX] in {
@@ -4173,7 +4262,7 @@ let Predicates = [HasAVX] in {
def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, TB, OpSize, VEX_4V;
+ []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>;
}
let Constraints = "$src1 = $dst" in
@@ -4185,7 +4274,7 @@ let Constraints = "$src1 = $dst" in
// SSE2 - Packed Mask Creation
//===---------------------------------------------------------------------===//
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
@@ -4213,7 +4302,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
// SSE2 - Conditional Store
//===---------------------------------------------------------------------===//
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
let Uses = [EDI] in
def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
@@ -4252,41 +4341,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteMove]>;
def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>;
+ (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
+ Sched<[WriteMove]>;
def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
//===---------------------------------------------------------------------===//
// Move Int Doubleword to Single Scalar
@@ -4294,22 +4384,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int to Packed Double Int
@@ -4317,26 +4407,29 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
- (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX;
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX,
+ Sched<[WriteMove]>;
def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
- (iPTR 0)))], IIC_SSE_MOVD_ToGP>;
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
+ Sched<[WriteMove]>;
def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int first element to Doubleword Int
//
+let SchedRW = [WriteMove] in {
def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"vmov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
@@ -4349,6 +4442,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>;
+} //SchedRW
//===---------------------------------------------------------------------===//
// Bitcast FR64 <-> GR64
@@ -4357,28 +4451,28 @@ let Predicates = [HasAVX] in
def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVD_ToGP>;
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
//===---------------------------------------------------------------------===//
// Move Scalar Single to Double Int
diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td
index ad55058..a37a8cc 100644
--- a/lib/Target/X86/X86InstrTSX.td
+++ b/lib/Target/X86/X86InstrTSX.td
@@ -22,7 +22,7 @@ def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
let isBranch = 1, isTerminator = 1, Defs = [EAX] in
def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget:$dst),
- "xbegin\t$dst", []>;
+ "xbegin\t$dst", []>, Requires<[HasRTM]>;
def XEND : I<0x01, MRM_D5, (outs), (ins),
"xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 3af1b3e..a8a9fd8 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -407,6 +407,57 @@ ReSimplify:
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
+ // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
+ // if one of the registers is extended, but other isn't.
+ case X86::VMOVAPDrr:
+ case X86::VMOVAPDYrr:
+ case X86::VMOVAPSrr:
+ case X86::VMOVAPSYrr:
+ case X86::VMOVDQArr:
+ case X86::VMOVDQAYrr:
+ case X86::VMOVDQUrr:
+ case X86::VMOVDQUYrr:
+ case X86::VMOVUPDrr:
+ case X86::VMOVUPDYrr:
+ case X86::VMOVUPSrr:
+ case X86::VMOVUPSYrr: {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ }
+ break;
+ }
+ case X86::VMOVSDrr:
+ case X86::VMOVSSrr: {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
+ case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ }
+ break;
+ }
+
// TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register
// inputs modeled as normal uses instead of implicit uses. As such, truncate
// off all but the first operand (the callee). FIXME: Change isel.
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index d99d085..da0ca7d 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -7,6 +7,78 @@
//
//===----------------------------------------------------------------------===//
+// InstrSchedModel annotations for out-of-order CPUs.
+//
+// These annotations are independent of the itinerary classes defined below.
+
+// Instructions with folded loads need to read the memory operand immediately,
+// but other register operands don't have to be read until the load is ready.
+// These operands are marked with ReadAfterLd.
+def ReadAfterLd : SchedRead;
+
+// Instructions with both a load and a store folded are modeled as a folded
+// load + WriteRMW.
+def WriteRMW : SchedWrite;
+
+// Most instructions can fold loads, so almost every SchedWrite comes in two
+// variants: With and without a folded load.
+// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
+// with a folded load.
+class X86FoldableSchedWrite : SchedWrite {
+ // The SchedWrite to use when a load is folded into the instruction.
+ SchedWrite Folded;
+}
+
+// Multiclass that produces a linked pair of SchedWrites.
+multiclass X86SchedWritePair {
+ // Register-Memory operation.
+ def Ld : SchedWrite;
+ // Register-Register operation.
+ def NAME : X86FoldableSchedWrite {
+ let Folded = !cast<SchedWrite>(NAME#"Ld");
+ }
+}
+
+// Arithmetic.
+defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
+defm WriteIMul : X86SchedWritePair; // Integer multiplication.
+defm WriteIDiv : X86SchedWritePair; // Integer division.
+def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
+
+// Integer shifts and rotates.
+defm WriteShift : X86SchedWritePair;
+
+// Loads, stores, and moves, not folded with other operations.
+def WriteLoad : SchedWrite;
+def WriteStore : SchedWrite;
+def WriteMove : SchedWrite;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm WriteJump : X86SchedWritePair;
+
+// Floating point. This covers both scalar and vector operations.
+defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare.
+defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
+defm WriteFDiv : X86SchedWritePair; // Floating point division.
+defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
+defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal.
+
+// Vector integer operations.
+defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
+defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
+
+// Vector bitwise operations.
+// These are often used on both floating point and integer vectors.
+defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor.
+defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends.
+
+// Conversion between integer and float.
+defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
+defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
+defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
+
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for X86
def IIC_DEFAULT : InstrItinClass;
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index fefb479..be2a997 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -176,18 +176,42 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
{ ISD::MUL, MVT::v8i32, 4 },
{ ISD::SUB, MVT::v8i32, 4 },
{ ISD::ADD, MVT::v8i32, 4 },
- { ISD::MUL, MVT::v4i64, 4 },
{ ISD::SUB, MVT::v4i64, 4 },
{ ISD::ADD, MVT::v4i64, 4 },
- };
+ // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
+ // are lowered as a series of long multiplies(3), shifts(4) and adds(2)
+ // Because we believe v4i64 to be a legal type, we must also include the
+ // split factor of two in the cost table. Therefore, the cost here is 18
+ // instead of 9.
+ { ISD::MUL, MVT::v4i64, 18 },
+ };
// Look for AVX1 lowering tricks.
- if (ST->hasAVX()) {
- int Idx = CostTableLookup<MVT>(AVX1CostTable, array_lengthof(AVX1CostTable), ISD,
- LT.second);
+ if (ST->hasAVX() && !ST->hasAVX2()) {
+ int Idx = CostTableLookup<MVT>(AVX1CostTable, array_lengthof(AVX1CostTable),
+ ISD, LT.second);
if (Idx != -1)
return LT.first * AVX1CostTable[Idx].Cost;
}
+
+ // Custom lowering of vectors.
+ static const CostTblEntry<MVT> CustomLowered[] = {
+ // A v2i64/v4i64 and multiply is custom lowered as a series of long
+ // multiplies(3), shifts(4) and adds(2).
+ { ISD::MUL, MVT::v2i64, 9 },
+ { ISD::MUL, MVT::v4i64, 9 },
+ };
+ int Idx = CostTableLookup<MVT>(CustomLowered, array_lengthof(CustomLowered),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * CustomLowered[Idx].Cost;
+
+ // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
+ // 2x pmuludq, 2x shuffle.
+ if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
+ !ST->hasSSE41())
+ return 6;
+
// Fallback to the default implementation.
return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty);
}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index f8a9125..a5d2be8 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -84,7 +84,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
// XCore does not have the NodeTypes below.
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::ADDC, MVT::i32, Expand);
setOperationAction(ISD::ADDE, MVT::i32, Expand);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 6d430ef..8d258f5 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -84,7 +84,7 @@ namespace llvm {
explicit XCoreTargetLowering(XCoreTargetMachine &TM);
virtual unsigned getJumpTableEncoding() const;
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
/// LowerOperation - Provide custom lowering hooks for some operations.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 8a6bfc6..fa3d72d 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -60,7 +60,7 @@ namespace {
continue;
}
- bool Local = I->hasLocalLinkage();
+ bool Local = I->isDiscardableIfUnused();
if (Local)
I->setVisibility(GlobalValue::HiddenVisibility);
@@ -80,7 +80,7 @@ namespace {
continue;
}
- bool Local = I->hasLocalLinkage();
+ bool Local = I->isDiscardableIfUnused();
if (Local)
I->setVisibility(GlobalValue::HiddenVisibility);
@@ -97,7 +97,7 @@ namespace {
Module::alias_iterator CurI = I;
++I;
- if (CurI->hasLocalLinkage()) {
+ if (CurI->isDiscardableIfUnused()) {
CurI->setVisibility(GlobalValue::HiddenVisibility);
CurI->setLinkage(GlobalValue::ExternalLinkage);
}
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 47b2b51..027a9f2 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -391,9 +391,9 @@ LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
LLVMPassManagerRef PM,
- bool Internalize,
- bool RunInliner) {
+ LLVMBool Internalize,
+ LLVMBool RunInliner) {
PassManagerBuilder *Builder = unwrap(PMB);
PassManagerBase *LPM = unwrap(PM);
- Builder->populateLTOPassManager(*LPM, Internalize, RunInliner);
+ Builder->populateLTOPassManager(*LPM, Internalize != 0, RunInliner != 0);
}
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c6d60d6..3c5781c 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -150,7 +150,9 @@ namespace {
typedef SmallVector<const FAddend*, 4> AddendVect;
Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
-
+
+ Value *performFactorization(Instruction *I);
+
/// Convert given addend to a Value
Value *createAddendVal(const FAddend &A, bool& NeedNeg);
@@ -159,6 +161,7 @@ namespace {
Value *createFSub(Value *Opnd0, Value *Opnd1);
Value *createFAdd(Value *Opnd0, Value *Opnd1);
Value *createFMul(Value *Opnd0, Value *Opnd1);
+ Value *createFDiv(Value *Opnd0, Value *Opnd1);
Value *createFNeg(Value *V);
Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
void createInstPostProc(Instruction *NewInst);
@@ -388,6 +391,78 @@ unsigned FAddend::drillAddendDownOneStep
return BreakNum;
}
+// Try to perform following optimization on the input instruction I. Return the
+// simplified expression if was successful; otherwise, return 0.
+//
+// Instruction "I" is Simplified into
+// -------------------------------------------------------
+// (x * y) +/- (x * z) x * (y +/- z)
+// (y / x) +/- (z / x) (y +/- z) / x
+//
+Value *FAddCombine::performFactorization(Instruction *I) {
+ assert((I->getOpcode() == Instruction::FAdd ||
+ I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+
+ Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
+ Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
+
+ if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
+ return 0;
+
+ bool isMpy = false;
+ if (I0->getOpcode() == Instruction::FMul)
+ isMpy = true;
+ else if (I0->getOpcode() != Instruction::FDiv)
+ return 0;
+
+ Value *Opnd0_0 = I0->getOperand(0);
+ Value *Opnd0_1 = I0->getOperand(1);
+ Value *Opnd1_0 = I1->getOperand(0);
+ Value *Opnd1_1 = I1->getOperand(1);
+
+ // Input Instr I Factor AddSub0 AddSub1
+ // ----------------------------------------------
+ // (x*y) +/- (x*z) x y z
+ // (y/x) +/- (z/x) x y z
+ //
+ Value *Factor = 0;
+ Value *AddSub0 = 0, *AddSub1 = 0;
+
+ if (isMpy) {
+ if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
+ Factor = Opnd0_0;
+ else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1)
+ Factor = Opnd0_1;
+
+ if (Factor) {
+ AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0;
+ AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0;
+ }
+ } else if (Opnd0_1 == Opnd1_1) {
+ Factor = Opnd0_1;
+ AddSub0 = Opnd0_0;
+ AddSub1 = Opnd1_0;
+ }
+
+ if (!Factor)
+ return 0;
+
+ // Create expression "NewAddSub = AddSub0 +/- AddsSub1"
+ Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
+ createFAdd(AddSub0, AddSub1) :
+ createFSub(AddSub0, AddSub1);
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
+ const APFloat &F = CFP->getValueAPF();
+ if (!F.isNormal() || F.isDenormal())
+ return 0;
+ }
+
+ if (isMpy)
+ return createFMul(Factor, NewAddSub);
+
+ return createFDiv(NewAddSub, Factor);
+}
+
Value *FAddCombine::simplify(Instruction *I) {
assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode");
@@ -471,7 +546,8 @@ Value *FAddCombine::simplify(Instruction *I) {
return R;
}
- return 0;
+ // step 6: Try factorization as the last resort,
+ return performFactorization(I);
}
Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
@@ -627,7 +703,8 @@ Value *FAddCombine::createNaryFAdd
Value *FAddCombine::createFSub
(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFSub(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
@@ -639,13 +716,22 @@ Value *FAddCombine::createFNeg(Value *V) {
Value *FAddCombine::createFAdd
(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFAdd(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFMul(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFDiv(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 4332467..990cbc3 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -22,8 +22,8 @@ using namespace PatternMatch;
/// AddOne - Add one to a ConstantInt.
-static Constant *AddOne(Constant *C) {
- return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+static Constant *AddOne(ConstantInt *C) {
+ return ConstantInt::get(C->getContext(), C->getValue() + 1);
}
/// SubOne - Subtract one from a ConstantInt.
static Constant *SubOne(ConstantInt *C) {
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a960ab2..d162223 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -104,6 +104,12 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+ // If the allocation has multiple uses, only promote it if we're not
+ // shrinking the amount of memory being allocated.
+ uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy);
+ uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy);
+ if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0;
+
// See if we can satisfy the modulus by pulling a scale out of the array
// size argument.
unsigned ArraySizeScale;
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index bad46b4..32fdb9b 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1333,13 +1333,14 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
// Transform (icmp pred iM (shl iM %v, N), CI)
- // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N))
- // Transform the shl to a trunc if (trunc (CI>>N)) has no loss.
+ // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N))
+ // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N.
// This enables to get rid of the shift in favor of a trunc which can be
// free on the target. It has the additional benefit of comparing to a
// smaller constant, which will be target friendly.
unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
- if (Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
+ if (LHSI->hasOneUse() &&
+ Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
Constant *NCI = ConstantExpr::getTrunc(
ConstantExpr::getAShr(RHS,
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 8e4267f..173f2bf 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -402,7 +402,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
}
- // (MDC +/- C1) * C2 => (MDC * C2) +/- (C1 * C2)
+ // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C)
Instruction *FAddSub = dyn_cast<Instruction>(Op0);
if (FAddSub &&
(FAddSub->getOpcode() == Instruction::FAdd ||
@@ -420,8 +420,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (C1 && C1->getValueAPF().isNormal() &&
isFMulOrFDivWithConstant(Opnd0)) {
- Value *M0 = ConstantExpr::getFMul(C1, C);
- Value *M1 = isNormalFp(cast<ConstantFP>(M0)) ?
+ Value *M1 = ConstantExpr::getFMul(C1, C);
+ Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
0;
if (M0 && M1) {
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6877475..92b42ee 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -71,7 +71,7 @@ static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *kAsanInitName = "__asan_init_v1";
+static const char *kAsanInitName = "__asan_init_v2";
static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
static const char *kAsanMappingScaleName = "__asan_mapping_scale";
@@ -244,7 +244,7 @@ static size_t RedzoneSizeForScale(int MappingScale) {
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer : public FunctionPass {
- AddressSanitizer(bool CheckInitOrder = false,
+ AddressSanitizer(bool CheckInitOrder = true,
bool CheckUseAfterReturn = false,
bool CheckLifetime = false,
StringRef BlacklistFile = StringRef(),
@@ -315,7 +315,7 @@ struct AddressSanitizer : public FunctionPass {
class AddressSanitizerModule : public ModulePass {
public:
- AddressSanitizerModule(bool CheckInitOrder = false,
+ AddressSanitizerModule(bool CheckInitOrder = true,
StringRef BlacklistFile = StringRef(),
bool ZeroBaseShadow = false)
: ModulePass(ID),
@@ -531,9 +531,12 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
// Create a constant for Str so that we can pass it to the run-time lib.
static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
- return new GlobalVariable(M, StrConst->getType(), true,
+ GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true,
GlobalValue::PrivateLinkage, StrConst,
kAsanGenPrefix);
+ GV->setUnnamedAddr(true); // Ok to merge these.
+ GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
+ return GV;
}
static bool GlobalWasGeneratedByAsan(GlobalVariable *G) {
@@ -885,11 +888,12 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
// size_t size;
// size_t size_with_redzone;
// const char *name;
+ // const char *module_name;
// size_t has_dynamic_init;
// We initialize an array of such structures and pass it to a run-time call.
StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
IntptrTy, IntptrTy,
- IntptrTy, NULL);
+ IntptrTy, IntptrTy, NULL);
SmallVector<Constant *, 16> Initializers(n), DynamicInit;
@@ -901,6 +905,9 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
// this TU. Used in initialization order checking.
Value *FirstDynamic = 0, *LastDynamic = 0;
+ GlobalVariable *ModuleName = createPrivateGlobalForString(
+ M, M.getModuleIdentifier());
+
for (size_t i = 0; i < n; i++) {
static const uint64_t kMaxGlobalRedzone = 1 << 18;
GlobalVariable *G = GlobalsToChange[i];
@@ -930,11 +937,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
NewTy, G->getInitializer(),
Constant::getNullValue(RightRedZoneTy), NULL);
- SmallString<2048> DescriptionOfGlobal = G->getName();
- DescriptionOfGlobal += " (";
- DescriptionOfGlobal += M.getModuleIdentifier();
- DescriptionOfGlobal += ")";
- GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal);
+ GlobalVariable *Name = createPrivateGlobalForString(M, G->getName());
// Create a new global variable with enough space for a redzone.
GlobalVariable *NewGlobal = new GlobalVariable(
@@ -958,6 +961,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
ConstantInt::get(IntptrTy, SizeInBytes),
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
+ ConstantExpr::getPointerCast(ModuleName, IntptrTy),
ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
NULL);
@@ -1095,6 +1099,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
bool AddressSanitizer::runOnFunction(Function &F) {
if (BL->isIn(F)) return false;
if (&F == AsanCtorFunction) return false;
+ if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
initializeCallbacks(*F.getParent());
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index eb0dc1e..e8d4ac8 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugLoc.h"
#include "llvm/Support/InstIterator.h"
@@ -39,31 +40,57 @@
#include <utility>
using namespace llvm;
+static cl::opt<std::string>
+DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
+ cl::ValueRequired);
+
+GCOVOptions GCOVOptions::getDefault() {
+ GCOVOptions Options;
+ Options.EmitNotes = true;
+ Options.EmitData = true;
+ Options.UseCfgChecksum = false;
+ Options.NoRedZone = false;
+ Options.FunctionNamesInData = true;
+
+ if (DefaultGCOVVersion.size() != 4) {
+ llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
+ DefaultGCOVVersion);
+ }
+ memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
+ return Options;
+}
+
namespace {
class GCOVProfiler : public ModulePass {
public:
static char ID;
- GCOVProfiler()
- : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false),
- UseExtraChecksum(false), NoRedZone(false) {
+ GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) {
+ ReversedVersion[0] = Options.Version[3];
+ ReversedVersion[1] = Options.Version[2];
+ ReversedVersion[2] = Options.Version[1];
+ ReversedVersion[3] = Options.Version[0];
+ ReversedVersion[4] = '\0';
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
- GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format,
- bool useExtraChecksum, bool NoRedZone_)
- : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
- Use402Format(use402Format), UseExtraChecksum(useExtraChecksum),
- NoRedZone(NoRedZone_) {
- assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
+ GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){
+ assert((Options.EmitNotes || Options.EmitData) &&
+ "GCOVProfiler asked to do nothing?");
+ ReversedVersion[0] = Options.Version[3];
+ ReversedVersion[1] = Options.Version[2];
+ ReversedVersion[2] = Options.Version[1];
+ ReversedVersion[3] = Options.Version[0];
+ ReversedVersion[4] = '\0';
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
virtual const char *getPassName() const {
return "GCOV Profiler";
}
+
private:
bool runOnModule(Module &M);
- // Create the GCNO files for the Module based on DebugInfo.
- void emitGCNO();
+ // Create the .gcno files for the Module based on DebugInfo.
+ void emitProfileNotes();
// Modify the program to track transitions along edges and call into the
// profiling runtime to emit .gcda files when run.
@@ -74,6 +101,7 @@ namespace {
Constant *getIncrementIndirectCounterFunc();
Constant *getEmitFunctionFunc();
Constant *getEmitArcsFunc();
+ Constant *getDeleteFlushFunctionListFunc();
Constant *getEndFileFunc();
// Create or retrieve an i32 state value that is used to represent the
@@ -84,22 +112,22 @@ namespace {
// block number.
GlobalVariable *buildEdgeLookupTable(Function *F,
GlobalVariable *Counter,
- const UniqueVector<BasicBlock *> &Preds,
- const UniqueVector<BasicBlock *> &Succs);
+ const UniqueVector<BasicBlock *>&Preds,
+ const UniqueVector<BasicBlock*>&Succs);
// Add the function to write out all our counters to the global destructor
// list.
- void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
+ Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*,
+ MDNode*> >);
+ Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
void insertIndirectCounterIncrement();
- void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
std::string mangleName(DICompileUnit CU, const char *NewStem);
- bool EmitNotes;
- bool EmitData;
- bool Use402Format;
- bool UseExtraChecksum;
- bool NoRedZone;
+ GCOVOptions Options;
+
+ // Reversed, NUL-terminated copy of Options.Version.
+ char ReversedVersion[5];
Module *M;
LLVMContext *Ctx;
@@ -110,12 +138,8 @@ char GCOVProfiler::ID = 0;
INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
"Insert instrumentation for GCOV profiling", false, false)
-ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
- bool Use402Format,
- bool UseExtraChecksum,
- bool NoRedZone) {
- return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum,
- NoRedZone);
+ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
+ return new GCOVProfiler(Options);
}
namespace {
@@ -253,8 +277,8 @@ namespace {
// object users can construct, the blocks and lines will be rooted here.
class GCOVFunction : public GCOVRecord {
public:
- GCOVFunction(DISubprogram SP, raw_ostream *os,
- bool Use402Format, bool UseExtraChecksum) {
+ GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
+ bool UseCfgChecksum) {
this->os = os;
Function *F = SP.getFunction();
@@ -268,13 +292,12 @@ namespace {
writeBytes(FunctionTag, 4);
uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
1 + lengthOfGCOVString(SP.getFilename()) + 1;
- if (UseExtraChecksum)
+ if (UseCfgChecksum)
++BlockLen;
write(BlockLen);
- uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
write(Ident);
write(0); // lineno checksum
- if (UseExtraChecksum)
+ if (UseCfgChecksum)
write(0); // cfg checksum
writeGCOVString(SP.getName());
writeGCOVString(SP.getFilename());
@@ -358,12 +381,12 @@ bool GCOVProfiler::runOnModule(Module &M) {
this->M = &M;
Ctx = &M.getContext();
- if (EmitNotes) emitGCNO();
- if (EmitData) return emitProfileArcs();
+ if (Options.EmitNotes) emitProfileNotes();
+ if (Options.EmitData) return emitProfileArcs();
return false;
}
-void GCOVProfiler::emitGCNO() {
+void GCOVProfiler::emitProfileNotes() {
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes) return;
@@ -376,10 +399,9 @@ void GCOVProfiler::emitGCNO() {
std::string ErrorInfo;
raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
raw_fd_ostream::F_Binary);
- if (!Use402Format)
- out.write("oncg*404MVLL", 12);
- else
- out.write("oncg*204MVLL", 12);
+ out.write("oncg", 4);
+ out.write(ReversedVersion, 4);
+ out.write("MVLL", 4);
DIArray SPs = CU.getSubprograms();
for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
@@ -388,7 +410,7 @@ void GCOVProfiler::emitGCNO() {
Function *F = SP.getFunction();
if (!F) continue;
- GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum);
+ GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum);
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
GCOVBlock &Block = Func.getBlock(BB);
@@ -469,21 +491,18 @@ bool GCOVProfiler::emitProfileArcs() {
Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
Edge);
Value *Count = Builder.CreateLoad(Counter);
- Count = Builder.CreateAdd(Count,
- ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+ Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Counter);
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- Value *Sel = Builder.CreateSelect(
- BI->getCondition(),
- ConstantInt::get(Type::getInt64Ty(*Ctx), Edge),
- ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1));
+ Value *Sel = Builder.CreateSelect(BI->getCondition(),
+ Builder.getInt64(Edge),
+ Builder.getInt64(Edge + 1));
SmallVector<Value *, 2> Idx;
- Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx)));
+ Idx.push_back(Builder.getInt64(0));
Idx.push_back(Sel);
Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx);
Value *Count = Builder.CreateLoad(Counter);
- Count = Builder.CreateAdd(Count,
- ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+ Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Counter);
} else {
ComplexEdgePreds.insert(BB);
@@ -500,10 +519,9 @@ bool GCOVProfiler::emitProfileArcs() {
ComplexEdgePreds, ComplexEdgeSuccs);
GlobalVariable *EdgeState = getEdgeStateValue();
- Type *Int32Ty = Type::getInt32Ty(*Ctx);
for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
- Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState);
+ Builder.CreateStore(Builder.getInt32(i), EdgeState);
}
for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
// call runtime to perform increment
@@ -522,8 +540,42 @@ bool GCOVProfiler::emitProfileArcs() {
}
}
- insertCounterWriteout(CountersBySP);
- insertFlush(CountersBySP);
+ Function *WriteoutF = insertCounterWriteout(CountersBySP);
+ Function *FlushF = insertFlush(CountersBySP);
+
+ // Create a small bit of code that registers the "__llvm_gcov_writeout" to
+ // be executed at exit and the "__llvm_gcov_flush" function to be executed
+ // when "__gcov_flush" is called.
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
+ "__llvm_gcov_init", M);
+ F->setUnnamedAddr(true);
+ F->setLinkage(GlobalValue::InternalLinkage);
+ F->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ F->addFnAttr(Attribute::NoRedZone);
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+ IRBuilder<> Builder(BB);
+
+ FTy = FunctionType::get(Builder.getInt32Ty(),
+ PointerType::get(FTy, 0), false);
+ Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy);
+ Builder.CreateCall(AtExitFn, WriteoutF);
+
+ // Register the local flush function.
+ FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ FTy = FunctionType::get(Builder.getVoidTy(),
+ PointerType::get(FTy, 0), false);
+ Constant *RegFlush =
+ M->getOrInsertFunction("llvm_register_flush_function", FTy);
+ Builder.CreateCall(RegFlush, FlushF);
+
+ // Make sure that all the flush function list is deleted.
+ Builder.CreateCall(AtExitFn, getDeleteFlushFunctionListFunc());
+ Builder.CreateRetVoid();
+
+ appendToGlobalCtors(*M, F, 0);
}
if (InsertIndCounterIncrCode)
@@ -560,8 +612,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) {
for (int i = 0; i != Successors; ++i) {
BasicBlock *Succ = TI->getSuccessor(i);
- IRBuilder<> builder(Succ);
- Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0,
+ IRBuilder<> Builder(Succ);
+ Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
Edge + i);
EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) +
(Preds.idFor(BB)-1)] = cast<Constant>(Counter);
@@ -581,8 +633,11 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
}
Constant *GCOVProfiler::getStartFileFunc() {
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
- Type::getInt8PtrTy(*Ctx), false);
+ Type *Args[] = {
+ Type::getInt8PtrTy(*Ctx), // const char *orig_filename
+ Type::getInt8PtrTy(*Ctx), // const char version[4]
+ };
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
}
@@ -598,9 +653,10 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
}
Constant *GCOVProfiler::getEmitFunctionFunc() {
- Type *Args[2] = {
+ Type *Args[3] = {
Type::getInt32Ty(*Ctx), // uint32_t ident
Type::getInt8PtrTy(*Ctx), // const char *function_name
+ Type::getInt8Ty(*Ctx), // uint8_t use_extra_checksum
};
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
@@ -611,11 +667,15 @@ Constant *GCOVProfiler::getEmitArcsFunc() {
Type::getInt32Ty(*Ctx), // uint32_t num_counters
Type::getInt64PtrTy(*Ctx), // uint64_t *counters
};
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
- Args, false);
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
}
+Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy);
+}
+
Constant *GCOVProfiler::getEndFileFunc() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
@@ -634,7 +694,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() {
return GV;
}
-void GCOVProfiler::insertCounterWriteout(
+Function *GCOVProfiler::insertCounterWriteout(
ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
@@ -643,7 +703,7 @@ void GCOVProfiler::insertCounterWriteout(
"__llvm_gcov_writeout", M);
WriteoutF->setUnnamedAddr(true);
WriteoutF->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
WriteoutF->addFnAttr(Attribute::NoRedZone);
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
@@ -659,50 +719,31 @@ void GCOVProfiler::insertCounterWriteout(
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
DICompileUnit CU(CU_Nodes->getOperand(i));
std::string FilenameGcda = mangleName(CU, "gcda");
- Builder.CreateCall(StartFile,
- Builder.CreateGlobalStringPtr(FilenameGcda));
- for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator
- I = CountersBySP.begin(), E = CountersBySP.end();
- I != E; ++I) {
- DISubprogram SP(I->second);
- intptr_t ident = reinterpret_cast<intptr_t>(I->second);
- Builder.CreateCall2(EmitFunction,
- ConstantInt::get(Type::getInt32Ty(*Ctx), ident),
- Builder.CreateGlobalStringPtr(SP.getName()));
-
- GlobalVariable *GV = I->first;
+ Builder.CreateCall2(StartFile,
+ Builder.CreateGlobalStringPtr(FilenameGcda),
+ Builder.CreateGlobalStringPtr(ReversedVersion));
+ for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) {
+ DISubprogram SP(CountersBySP[j].second);
+ Builder.CreateCall3(EmitFunction,
+ Builder.getInt32(j),
+ Options.FunctionNamesInData ?
+ Builder.CreateGlobalStringPtr(SP.getName()) :
+ Constant::getNullValue(Builder.getInt8PtrTy()),
+ Builder.getInt8(Options.UseCfgChecksum));
+
+ GlobalVariable *GV = CountersBySP[j].first;
unsigned Arcs =
cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
Builder.CreateCall2(EmitArcs,
- ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs),
+ Builder.getInt32(Arcs),
Builder.CreateConstGEP2_64(GV, 0, 0));
}
Builder.CreateCall(EndFile);
}
}
- Builder.CreateRetVoid();
- // Create a small bit of code that registers the "__llvm_gcov_writeout"
- // function to be executed at exit.
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
- Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
- "__llvm_gcov_init", M);
- F->setUnnamedAddr(true);
- F->setLinkage(GlobalValue::InternalLinkage);
- F->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
- F->addFnAttr(Attribute::NoRedZone);
-
- BB = BasicBlock::Create(*Ctx, "entry", F);
- Builder.SetInsertPoint(BB);
-
- FTy = FunctionType::get(Type::getInt32Ty(*Ctx),
- PointerType::get(FTy, 0), false);
- Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy);
- Builder.CreateCall(AtExitFn, WriteoutF);
Builder.CreateRetVoid();
-
- appendToGlobalCtors(*M, F, 0);
+ return WriteoutF;
}
void GCOVProfiler::insertIndirectCounterIncrement() {
@@ -711,13 +752,9 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Fn->setUnnamedAddr(true);
Fn->setLinkage(GlobalValue::InternalLinkage);
Fn->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
Fn->addFnAttr(Attribute::NoRedZone);
- Type *Int32Ty = Type::getInt32Ty(*Ctx);
- Type *Int64Ty = Type::getInt64Ty(*Ctx);
- Constant *NegOne = ConstantInt::get(Int32Ty, 0xffffffff);
-
// Create basic blocks for function.
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", Fn);
IRBuilder<> Builder(BB);
@@ -731,26 +768,27 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Argument *Arg = Fn->arg_begin();
Arg->setName("predecessor");
Value *Pred = Builder.CreateLoad(Arg, "pred");
- Value *Cond = Builder.CreateICmpEQ(Pred, NegOne);
+ Value *Cond = Builder.CreateICmpEQ(Pred, Builder.getInt32(0xffffffff));
BranchInst::Create(Exit, PredNotNegOne, Cond, BB);
Builder.SetInsertPoint(PredNotNegOne);
// uint64_t *counter = counters[pred];
// if (!counter) return;
- Value *ZExtPred = Builder.CreateZExt(Pred, Int64Ty);
+ Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
Arg = llvm::next(Fn->arg_begin());
Arg->setName("counters");
Value *GEP = Builder.CreateGEP(Arg, ZExtPred);
Value *Counter = Builder.CreateLoad(GEP, "counter");
Cond = Builder.CreateICmpEQ(Counter,
- Constant::getNullValue(Int64Ty->getPointerTo()));
+ Constant::getNullValue(
+ Builder.getInt64Ty()->getPointerTo()));
Builder.CreateCondBr(Cond, Exit, CounterEnd);
// ++*counter;
Builder.SetInsertPoint(CounterEnd);
Value *Add = Builder.CreateAdd(Builder.CreateLoad(Counter),
- ConstantInt::get(Int64Ty, 1));
+ Builder.getInt64(1));
Builder.CreateStore(Add, Counter);
Builder.CreateBr(Exit);
@@ -759,18 +797,18 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Builder.CreateRetVoid();
}
-void GCOVProfiler::
+Function *GCOVProfiler::
insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
- Function *FlushF = M->getFunction("__gcov_flush");
+ Function *FlushF = M->getFunction("__llvm_gcov_flush");
if (!FlushF)
FlushF = Function::Create(FTy, GlobalValue::InternalLinkage,
- "__gcov_flush", M);
+ "__llvm_gcov_flush", M);
else
FlushF->setLinkage(GlobalValue::InternalLinkage);
FlushF->setUnnamedAddr(true);
FlushF->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
FlushF->addFnAttr(Attribute::NoRedZone);
BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
@@ -795,8 +833,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
if (RetTy == Type::getVoidTy(*Ctx))
Builder.CreateRetVoid();
else if (RetTy->isIntegerTy())
- // Used if __gcov_flush was implicitly declared.
+ // Used if __llvm_gcov_flush was implicitly declared.
Builder.CreateRet(ConstantInt::get(RetTy, 0));
else
- report_fatal_error("invalid return type for __gcov_flush");
+ report_fatal_error("invalid return type for __llvm_gcov_flush");
+
+ return FlushF;
}
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 80705af..fce6513 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -418,13 +418,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
ValueMap<Value*, Value*> ShadowMap, OriginMap;
bool InsertChecks;
+ bool LoadShadow;
OwningPtr<VarArgHelper> VAHelper;
- // An unfortunate workaround for asymmetric lowering of va_arg stuff.
- // See a comment in visitCallSite for more details.
- static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
- static const unsigned AMD64FpEndOffset = 176;
-
struct ShadowOriginAndInsertPoint {
Instruction *Shadow;
Instruction *Origin;
@@ -437,11 +433,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
SmallVector<Instruction*, 16> StoreList;
MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
- : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
- InsertChecks = !MS.BL->isIn(F);
+ : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
+ LoadShadow = InsertChecks =
+ !MS.BL->isIn(F) &&
+ F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::SanitizeMemory);
+
DEBUG(if (!InsertChecks)
- dbgs() << "MemorySanitizer is not inserting checks into '"
- << F.getName() << "'\n");
+ dbgs() << "MemorySanitizer is not inserting checks into '"
+ << F.getName() << "'\n");
}
void materializeStores() {
@@ -836,15 +836,25 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(&I);
Type *ShadowTy = getShadowTy(&I);
Value *Addr = I.getPointerOperand();
- Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
- setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld"));
+ if (LoadShadow) {
+ Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
+ setShadow(&I,
+ IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld"));
+ } else {
+ setShadow(&I, getCleanShadow(&I));
+ }
if (ClCheckAccessAddress)
insertCheck(I.getPointerOperand(), &I);
if (MS.TrackOrigins) {
- unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
- setOrigin(&I, IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), Alignment));
+ if (LoadShadow) {
+ unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
+ setOrigin(&I,
+ IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), Alignment));
+ } else {
+ setOrigin(&I, getCleanOrigin());
+ }
}
}
@@ -1410,16 +1420,25 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Addr = I.getArgOperand(0);
Type *ShadowTy = getShadowTy(&I);
- Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
- // We don't know the pointer alignment (could be unaligned SSE load!).
- // Have to assume to worst case.
- setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld"));
+ if (LoadShadow) {
+ Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
+ // We don't know the pointer alignment (could be unaligned SSE load!).
+ // Have to assume to worst case.
+ setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld"));
+ } else {
+ setShadow(&I, getCleanShadow(&I));
+ }
+
if (ClCheckAccessAddress)
insertCheck(Addr, &I);
- if (MS.TrackOrigins)
- setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB)));
+ if (MS.TrackOrigins) {
+ if (LoadShadow)
+ setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB)));
+ else
+ setOrigin(&I, getCleanOrigin());
+ }
return true;
}
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index d71dd5d..015fd2e 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -154,7 +154,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
- if (TLI && TLI->isSlowDivBypassed()) {
+ if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
const DenseMap<unsigned int, unsigned int> &BypassWidths =
TLI->getBypassSlowDivWidths();
for (Function::iterator I = F.begin(); I != F.end(); I++)
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c04b447..129af8d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1714,7 +1714,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
-static void patchReplacementInstruction(Value *Repl, Instruction *I) {
+static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
@@ -1756,8 +1756,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) {
}
}
-static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) {
- patchReplacementInstruction(Repl, I);
+static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
+ patchReplacementInstruction(I, Repl);
I->replaceAllUsesWith(Repl);
}
@@ -1919,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) {
}
// Remove it!
- patchAndReplaceAllUsesWith(AvailableVal, L);
+ patchAndReplaceAllUsesWith(L, AvailableVal);
if (DepLI->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
markInstructionForDeletion(L);
@@ -2260,7 +2260,7 @@ bool GVN::processInstruction(Instruction *I) {
}
// Remove it!
- patchAndReplaceAllUsesWith(repl, I);
+ patchAndReplaceAllUsesWith(I, repl);
if (MD && repl->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
markInstructionForDeletion(I);
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 1601a8d..14e463a 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -53,6 +53,7 @@
#define DEBUG_TYPE "global-merge"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -60,14 +61,22 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+ cl::desc("Enable global merge pass on constants"),
+ cl::init(false));
+
STATISTIC(NumMerged , "Number of globals merged");
namespace {
class GlobalMerge : public FunctionPass {
@@ -78,6 +87,23 @@ namespace {
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
+ /// \brief Check if the given variable has been identified as must keep
+ /// \pre setMustKeepGlobalVariables must have been called on the Module that
+ /// contains GV
+ bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+ return MustKeepGlobalVariables.count(GV);
+ }
+
+ /// Collect every variables marked as "used" or used in a landing pad
+ /// instruction for this Module.
+ void setMustKeepGlobalVariables(Module &M);
+
+ /// Collect every variables marked as "used"
+ void collectUsedGlobalVariables(Module &M);
+
+ /// Keep track of the GlobalVariable that are marked as "used"
+ SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
+
public:
static char ID; // Pass identification, replacement for typeid.
explicit GlobalMerge(const TargetLowering *tli = 0)
@@ -169,6 +195,46 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
return true;
}
+void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+ // Extract global variables from llvm.used array
+ const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const GlobalVariable *G =
+ dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+ // If we already processed a Module, UsedGlobalVariables may have been
+ // populated. Reset the information for this module.
+ MustKeepGlobalVariables.clear();
+ collectUsedGlobalVariables(M);
+
+ for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+ ++IFn) {
+ for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
+ IBB != IEndBB; ++IBB) {
+ // Follow the inwoke link to find the landing pad instruction
+ const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
+ if (!II) continue;
+
+ const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
+ // Look for globals in the clauses of the landing pad instruction
+ for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
+ Idx != NumClauses; ++Idx)
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
+ ->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ }
+ }
+}
bool GlobalMerge::doInitialization(Module &M) {
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
@@ -176,6 +242,7 @@ bool GlobalMerge::doInitialization(Module &M) {
const DataLayout *TD = TLI->getDataLayout();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
+ setMustKeepGlobalVariables(M);
// Grab all non-const globals.
for (Module::global_iterator I = M.global_begin(),
@@ -200,6 +267,12 @@ bool GlobalMerge::doInitialization(Module &M) {
I->getName().startswith(".llvm."))
continue;
+ // Ignore all "required" globals:
+ // - the ones used for EH
+ // - the ones marked with "used" attribute
+ if (isMustKeepGlobalVariable(I))
+ continue;
+
if (TD->getTypeAllocSize(Ty) < MaxOffset) {
if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
.isBSSLocal())
@@ -221,11 +294,11 @@ bool GlobalMerge::doInitialization(Module &M) {
if (I->second.size() > 1)
Changed |= doMerge(I->second, M, false, I->first);
- // FIXME: This currently breaks the EH processing due to way how the
- // typeinfo detection works. We might want to detect the TIs and ignore
- // them in the future.
- // if (ConstGlobals.size() > 1)
- // Changed |= doMerge(ConstGlobals, M, true);
+ if (EnableGlobalMergeOnConst)
+ for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+ I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
+ if (I->second.size() > 1)
+ Changed |= doMerge(I->second, M, true, I->first);
return Changed;
}
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index e90fe90..0e57e5c 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -69,112 +69,130 @@ static cl::opt<bool>
ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
namespace {
-/// \brief Alloca partitioning representation.
-///
-/// This class represents a partitioning of an alloca into slices, and
-/// information about the nature of uses of each slice of the alloca. The goal
-/// is that this information is sufficient to decide if and how to split the
-/// alloca apart and replace slices with scalars. It is also intended that this
-/// structure can capture the relevant information needed both to decide about
-/// and to enact these transformations.
-class AllocaPartitioning {
-public:
- /// \brief A common base class for representing a half-open byte range.
- struct ByteRange {
- /// \brief The beginning offset of the range.
- uint64_t BeginOffset;
+/// \brief A common base class for representing a half-open byte range.
+struct ByteRange {
+ /// \brief The beginning offset of the range.
+ uint64_t BeginOffset;
- /// \brief The ending offset, not included in the range.
- uint64_t EndOffset;
+ /// \brief The ending offset, not included in the range.
+ uint64_t EndOffset;
- ByteRange() : BeginOffset(), EndOffset() {}
- ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
- : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+ ByteRange() : BeginOffset(), EndOffset() {}
+ ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
+ : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
- /// \brief Support for ordering ranges.
- ///
- /// This provides an ordering over ranges such that start offsets are
- /// always increasing, and within equal start offsets, the end offsets are
- /// decreasing. Thus the spanning range comes first in a cluster with the
- /// same start position.
- bool operator<(const ByteRange &RHS) const {
- if (BeginOffset < RHS.BeginOffset) return true;
- if (BeginOffset > RHS.BeginOffset) return false;
- if (EndOffset > RHS.EndOffset) return true;
- return false;
- }
+ /// \brief Support for ordering ranges.
+ ///
+ /// This provides an ordering over ranges such that start offsets are
+ /// always increasing, and within equal start offsets, the end offsets are
+ /// decreasing. Thus the spanning range comes first in a cluster with the
+ /// same start position.
+ bool operator<(const ByteRange &RHS) const {
+ if (BeginOffset < RHS.BeginOffset) return true;
+ if (BeginOffset > RHS.BeginOffset) return false;
+ if (EndOffset > RHS.EndOffset) return true;
+ return false;
+ }
- /// \brief Support comparison with a single offset to allow binary searches.
- friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
- return LHS.BeginOffset < RHSOffset;
- }
+ /// \brief Support comparison with a single offset to allow binary searches.
+ friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
+ return LHS.BeginOffset < RHSOffset;
+ }
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
- const ByteRange &RHS) {
- return LHSOffset < RHS.BeginOffset;
- }
+ friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
+ const ByteRange &RHS) {
+ return LHSOffset < RHS.BeginOffset;
+ }
- bool operator==(const ByteRange &RHS) const {
- return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
- }
- bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
- };
+ bool operator==(const ByteRange &RHS) const {
+ return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+ }
+ bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+};
- /// \brief A partition of an alloca.
+/// \brief A partition of an alloca.
+///
+/// This structure represents a contiguous partition of the alloca. These are
+/// formed by examining the uses of the alloca. During formation, they may
+/// overlap but once an AllocaPartitioning is built, the Partitions within it
+/// are all disjoint.
+struct Partition : public ByteRange {
+ /// \brief Whether this partition is splittable into smaller partitions.
///
- /// This structure represents a contiguous partition of the alloca. These are
- /// formed by examining the uses of the alloca. During formation, they may
- /// overlap but once an AllocaPartitioning is built, the Partitions within it
- /// are all disjoint.
- struct Partition : public ByteRange {
- /// \brief Whether this partition is splittable into smaller partitions.
- ///
- /// We flag partitions as splittable when they are formed entirely due to
- /// accesses by trivially splittable operations such as memset and memcpy.
- bool IsSplittable;
-
- /// \brief Test whether a partition has been marked as dead.
- bool isDead() const {
- if (BeginOffset == UINT64_MAX) {
- assert(EndOffset == UINT64_MAX);
- return true;
- }
- return false;
+ /// We flag partitions as splittable when they are formed entirely due to
+ /// accesses by trivially splittable operations such as memset and memcpy.
+ bool IsSplittable;
+
+ /// \brief Test whether a partition has been marked as dead.
+ bool isDead() const {
+ if (BeginOffset == UINT64_MAX) {
+ assert(EndOffset == UINT64_MAX);
+ return true;
}
+ return false;
+ }
- /// \brief Kill a partition.
- /// This is accomplished by setting both its beginning and end offset to
- /// the maximum possible value.
- void kill() {
- assert(!isDead() && "He's Dead, Jim!");
- BeginOffset = EndOffset = UINT64_MAX;
- }
+ /// \brief Kill a partition.
+ /// This is accomplished by setting both its beginning and end offset to
+ /// the maximum possible value.
+ void kill() {
+ assert(!isDead() && "He's Dead, Jim!");
+ BeginOffset = EndOffset = UINT64_MAX;
+ }
- Partition() : ByteRange(), IsSplittable() {}
- Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
- : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
- };
+ Partition() : ByteRange(), IsSplittable() {}
+ Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
+ : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
+};
+
+/// \brief A particular use of a partition of the alloca.
+///
+/// This structure is used to associate uses of a partition with it. They
+/// mark the range of bytes which are referenced by a particular instruction,
+/// and includes a handle to the user itself and the pointer value in use.
+/// The bounds of these uses are determined by intersecting the bounds of the
+/// memory use itself with a particular partition. As a consequence there is
+/// intentionally overlap between various uses of the same partition.
+class PartitionUse : public ByteRange {
+ /// \brief Combined storage for both the Use* and split state.
+ PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit;
- /// \brief A particular use of a partition of the alloca.
+public:
+ PartitionUse() : ByteRange(), UsePtrAndIsSplit() {}
+ PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
+ bool IsSplit)
+ : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {}
+
+ /// \brief The use in question. Provides access to both user and used value.
///
- /// This structure is used to associate uses of a partition with it. They
- /// mark the range of bytes which are referenced by a particular instruction,
- /// and includes a handle to the user itself and the pointer value in use.
- /// The bounds of these uses are determined by intersecting the bounds of the
- /// memory use itself with a particular partition. As a consequence there is
- /// intentionally overlap between various uses of the same partition.
- struct PartitionUse : public ByteRange {
- /// \brief The use in question. Provides access to both user and used value.
- ///
- /// Note that this may be null if the partition use is *dead*, that is, it
- /// should be ignored.
- Use *U;
+ /// Note that this may be null if the partition use is *dead*, that is, it
+ /// should be ignored.
+ Use *getUse() const { return UsePtrAndIsSplit.getPointer(); }
- PartitionUse() : ByteRange(), U() {}
- PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U)
- : ByteRange(BeginOffset, EndOffset), U(U) {}
- };
+ /// \brief Set the use for this partition use range.
+ void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); }
+
+ /// \brief Whether this use is split across multiple partitions.
+ bool isSplit() const { return UsePtrAndIsSplit.getInt(); }
+};
+}
+
+namespace llvm {
+template <> struct isPodLike<Partition> : llvm::true_type {};
+template <> struct isPodLike<PartitionUse> : llvm::true_type {};
+}
+namespace {
+/// \brief Alloca partitioning representation.
+///
+/// This class represents a partitioning of an alloca into slices, and
+/// information about the nature of uses of each slice of the alloca. The goal
+/// is that this information is sufficient to decide if and how to split the
+/// alloca apart and replace slices with scalars. It is also intended that this
+/// structure can capture the relevant information needed both to decide about
+/// and to enact these transformations.
+class AllocaPartitioning {
+public:
/// \brief Construct a partitioning of a particular alloca.
///
/// Construction does most of the work for partitioning the alloca. This
@@ -456,10 +474,10 @@ private:
// Clamp the end offset to the end of the allocation. Note that this is
// formulated to handle even the case where "BeginOffset + Size" overflows.
- // NOTE! This may appear superficially to be something we could ignore
- // entirely, but that is not so! There may be PHI-node uses where some
- // instructions are dead but not others. We can't completely ignore the
- // PHI node, and so have to record at least the information here.
+ // This may appear superficially to be something we could ignore entirely,
+ // but that is not so! There may be widened loads or PHI-node uses where
+ // some instructions are dead but not others. We can't completely ignore
+ // them, and so have to record at least the information here.
assert(AllocSize >= BeginOffset); // Established above.
if (Size > AllocSize - BeginOffset) {
DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
@@ -474,33 +492,17 @@ private:
}
void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
- bool IsVolatile) {
- uint64_t Size = DL.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse. We also try to handle cases which might run the
- // risk of overflow.
- // FIXME: We should instead consider the pointer to have escaped if this
- // function is being instrumented for addressing bugs or race conditions.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size)) {
- DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
- << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
- << " which extends past the end of the " << AllocSize
- << " byte alloca:\n"
- << " alloca: " << P.AI << "\n"
- << " use: " << I << "\n");
- return;
- }
-
+ uint64_t Size, bool IsVolatile) {
// We allow splitting of loads and stores where the type is an integer type
- // and which cover the entire alloca. Such integer loads and stores
- // often require decomposition into fine grained loads and stores.
- bool IsSplittable = false;
- if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
- IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+ // and cover the entire alloca. This prevents us from splitting over
+ // eagerly.
+ // FIXME: In the great blue eventually, we should eagerly split all integer
+ // loads and stores, and then have a separate step that merges adjacent
+ // alloca partitions into a single partition suitable for integer widening.
+ // Or we should skip the merge step and rely on GVN and other passes to
+ // merge adjacent loads and stores that survive mem2reg.
+ bool IsSplittable =
+ Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
insertUse(I, Offset, Size, IsSplittable);
}
@@ -512,7 +514,8 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&LI);
- return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
void visitStoreInst(StoreInst &SI) {
@@ -522,9 +525,28 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&SI);
+ uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse. We also try to handle cases which might run the
+ // risk of overflow.
+ // FIXME: We should instead consider the pointer to have escaped if this
+ // function is being instrumented for addressing bugs or race conditions.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size)) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
+ << " which extends past the end of the " << AllocSize
+ << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << SI << "\n");
+ return;
+ }
+
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
- handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
+ handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
}
@@ -795,13 +817,14 @@ private:
EndOffset = AllocSize;
// NB: This only works if we have zero overlapping partitions.
- iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset);
- if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset)
- B = llvm::prior(B);
- for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset;
- ++I) {
+ iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset);
+ if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset)
+ I = llvm::prior(I);
+ iterator E = P.end();
+ bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset;
+ for (; I != E && I->BeginOffset < EndOffset; ++I) {
PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
- std::min(I->EndOffset, EndOffset), U);
+ std::min(I->EndOffset, EndOffset), U, IsSplit);
P.use_push_back(I, NewPU);
if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
P.PHIOrSelectOpMap[U]
@@ -809,20 +832,6 @@ private:
}
}
- void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) {
- uint64_t Size = DL.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size))
- return markAsDead(I);
-
- insertUse(I, Offset, Size);
- }
-
void visitBitCastInst(BitCastInst &BC) {
if (BC.use_empty())
return markAsDead(BC);
@@ -839,12 +848,23 @@ private:
void visitLoadInst(LoadInst &LI) {
assert(IsOffsetKnown);
- handleLoadOrStore(LI.getType(), LI, Offset);
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ insertUse(LI, Offset, Size);
}
void visitStoreInst(StoreInst &SI) {
assert(IsOffsetKnown);
- handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset);
+ uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size))
+ return markAsDead(SI);
+
+ insertUse(SI, Offset, Size);
}
void visitMemSetInst(MemSetInst &II) {
@@ -1089,21 +1109,21 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
Type *AllocaPartitioning::getCommonType(iterator I) const {
Type *Ty = 0;
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->U)
+ Use *U = UI->getUse();
+ if (!U)
continue; // Skip dead uses.
- if (isa<IntrinsicInst>(*UI->U->getUser()))
+ if (isa<IntrinsicInst>(*U->getUser()))
continue;
if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
continue;
Type *UserTy = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
UserTy = LI->getType();
- } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
+ else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
UserTy = SI->getValueOperand()->getType();
- } else {
+ else
return 0; // Bail if we have weird uses.
- }
if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
// If the type is larger than the partition, skip it. We only encounter
@@ -1140,11 +1160,12 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->U)
+ if (!UI->getUse())
continue; // Skip dead uses.
OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") "
- << "used by: " << *UI->U->getUser() << "\n";
- if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) {
+ << "used by: " << *UI->getUse()->getUser() << "\n";
+ if (MemTransferInst *II =
+ dyn_cast<MemTransferInst>(UI->getUse()->getUser())) {
const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
bool IsDest;
if (!MTO.IsSplittable)
@@ -1375,11 +1396,11 @@ public:
// may be grown during speculation. However, we never need to re-visit the
// new uses, and so we can use the initial size bound.
for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
- const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
- if (!PU.U)
+ const PartitionUse &PU = P.getUse(PI, Idx);
+ if (!PU.getUse())
continue; // Skip dead use.
- visit(cast<Instruction>(PU.U->getUser()));
+ visit(cast<Instruction>(PU.getUse()->getUser()));
}
}
@@ -1523,8 +1544,8 @@ private:
// inside the load.
AllocaPartitioning::use_iterator UI
= P.findPartitionUseForPHIOrSelectOperand(InUse);
- assert(isa<PHINode>(*UI->U->getUser()));
- UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
+ assert(isa<PHINode>(*UI->getUse()->getUser()));
+ UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex()));
}
DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
}
@@ -1571,16 +1592,16 @@ private:
void visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
- IRBuilder<> IRB(&SI);
// If the select isn't safe to speculate, just use simple logic to emit it.
SmallVector<LoadInst *, 4> Loads;
if (!isSafeSelectToSpeculate(SI, Loads))
return;
+ IRBuilder<> IRB(&SI);
Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
AllocaPartitioning::iterator PIs[2];
- AllocaPartitioning::PartitionUse PUs[2];
+ PartitionUse PUs[2];
for (unsigned i = 0, e = 2; i != e; ++i) {
PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
if (PIs[i] != P.end()) {
@@ -1591,7 +1612,7 @@ private:
PUs[i] = *UI;
// Clear out the use here so that the offsets into the use list remain
// stable but this use is ignored when rewriting.
- UI->U = 0;
+ UI->setUse(0);
}
}
@@ -1623,8 +1644,8 @@ private:
for (unsigned i = 0, e = 2; i != e; ++i) {
if (PIs[i] != P.end()) {
Use *LoadUse = &Loads[i]->getOperandUse(0);
- assert(PUs[i].U->get() == LoadUse->get());
- PUs[i].U = LoadUse;
+ assert(PUs[i].getUse()->get() == LoadUse->get());
+ PUs[i].setUse(LoadUse);
P.use_push_back(PIs[i], PUs[i]);
}
}
@@ -1911,6 +1932,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (OldTy == NewTy)
return true;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
+ if (NewITy->getBitWidth() >= OldITy->getBitWidth())
+ return true;
if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
return false;
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
@@ -1939,6 +1964,10 @@ static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
"Value not convertable to type");
if (V->getType() == Ty)
return V;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+ if (NewITy->getBitWidth() > OldITy->getBitWidth())
+ return IRB.CreateZExt(V, NewITy);
if (V->getType()->isIntegerTy() && Ty->isPointerTy())
return IRB.CreateIntToPtr(V, Ty);
if (V->getType()->isPointerTy() && Ty->isIntegerTy())
@@ -1977,7 +2006,8 @@ static bool isVectorPromotionViable(const DataLayout &TD,
ElementSize /= 8;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
@@ -1997,24 +2027,24 @@ static bool isVectorPromotionViable(const DataLayout &TD,
= (NumElements == 1) ? Ty->getElementType()
: VectorType::get(Ty->getElementType(), NumElements);
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile())
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
+ } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
return false;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (!canConvertValue(TD, PartitionTy, LI->getType()))
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
if (SI->isVolatile())
return false;
if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
@@ -2063,7 +2093,8 @@ static bool isIntegerWideningViable(const DataLayout &TD,
// unsplittable entry (which we may make splittable later).
bool WholeAllocaOp = false;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
@@ -2074,7 +2105,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelEnd > Size)
return false;
- if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (RelBegin == 0 && RelEnd == Size)
@@ -2089,7 +2120,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, AllocaTy, LI->getType()))
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
Type *ValueTy = SI->getValueOperand()->getType();
if (SI->isVolatile())
return false;
@@ -2105,16 +2136,16 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, ValueTy, AllocaTy))
return false;
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
II->getIntrinsicID() != Intrinsic::lifetime_end)
return false;
@@ -2297,6 +2328,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
// The offset of the partition user currently being rewritten.
uint64_t BeginOffset, EndOffset;
+ bool IsSplit;
Use *OldUse;
Instruction *OldPtr;
@@ -2314,7 +2346,7 @@ public:
NewAllocaEndOffset(NewEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
VecTy(), ElementTy(), ElementSize(), IntTy(),
- BeginOffset(), EndOffset() {
+ BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr() {
}
/// \brief Visit the users of the alloca partition and rewrite them.
@@ -2336,14 +2368,15 @@ public:
}
bool CanSROA = true;
for (; I != E; ++I) {
- if (!I->U)
+ if (!I->getUse())
continue; // Skip dead uses.
BeginOffset = I->BeginOffset;
EndOffset = I->EndOffset;
- OldUse = I->U;
- OldPtr = cast<Instruction>(I->U->get());
+ IsSplit = I->isSplit();
+ OldUse = I->getUse();
+ OldPtr = cast<Instruction>(OldUse->get());
NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str();
- CanSROA &= visit(cast<Instruction>(I->U->getUser()));
+ CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
}
if (VecTy) {
assert(CanSROA);
@@ -2450,29 +2483,12 @@ private:
DEBUG(dbgs() << " original: " << LI << "\n");
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
- IRBuilder<> IRB(&LI);
uint64_t Size = EndOffset - BeginOffset;
- bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
- // If this memory access can be shown to *statically* extend outside the
- // bounds of the original allocation it's behavior is undefined. Rather
- // than trying to transform it, just replace it with undef.
- // FIXME: We should do something more clever for functions being
- // instrumented by asan.
- // FIXME: Eventually, once ASan and friends can flush out bugs here, this
- // should be transformed to a load of null making it unreachable.
- uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
- if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
- LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
- Pass.DeadInsts.insert(&LI);
- deleteIfTriviallyDead(OldOp);
- DEBUG(dbgs() << " to: undef!!\n");
- return true;
- }
-
- Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
- : LI.getType();
+ IRBuilder<> IRB(&LI);
+ Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+ : LI.getType();
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
@@ -2492,16 +2508,15 @@ private:
}
V = convertValue(TD, IRB, V, TargetTy);
- if (IsSplitIntLoad) {
+ if (IsSplit) {
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
+ assert(Size < TD.getTypeStoreSize(LI.getType()) &&
+ "Split load isn't smaller than original load");
assert(LI.getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
- assert(LI.getType()->getIntegerBitWidth() ==
- TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide loads can be split and recomposed");
// Move the insertion point just past the load so that we can refer to it.
IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
// Create a placeholder value with the same type as LI to use as the
@@ -2588,14 +2603,12 @@ private:
uint64_t Size = EndOffset - BeginOffset;
if (Size < TD.getTypeStoreSize(V->getType())) {
assert(!SI.isVolatile());
+ assert(IsSplit && "A seemingly split store isn't splittable");
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(V->getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
- assert(V->getType()->getIntegerBitWidth() ==
- TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide stores can be split and recomposed");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
getName(".extract"));
@@ -3381,7 +3394,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
for (AllocaPartitioning::use_iterator UI = P.use_begin(PI),
UE = P.use_end(PI);
UI != UE && !IsLive; ++UI)
- if (UI->U)
+ if (UI->getUse())
IsLive = true;
if (!IsLive)
return false; // No live uses left of this partition.
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 00cda8e..1f517d0 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -163,7 +163,7 @@ static bool insertFastDiv(Function &F,
Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
// Compare operand values and branch
- Value *ZeroV = MainBuilder.getInt32(0);
+ Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
@@ -244,7 +244,7 @@ bool llvm::bypassSlowDivision(Function &F,
// Get bitwidth of div/rem instruction
IntegerType *T = cast<IntegerType>(J->getType());
- int bitwidth = T->getBitWidth();
+ unsigned int bitwidth = T->getBitWidth();
// Continue if bitwidth is not bypassed
DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index a309bce..63d7a1d 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -94,9 +94,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
//Some arguments were deleted with the VMap. Copy arguments one by one
for (Function::const_arg_iterator I = OldFunc->arg_begin(),
E = OldFunc->arg_end(); I != E; ++I)
- if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
- Anew->addAttr(OldFunc->getAttributes()
- .getParamAttributes(I->getArgNo() + 1));
+ if (Argument* Anew = dyn_cast<Argument>(VMap[I])) {
+ AttributeSet attrs = OldFunc->getAttributes()
+ .getParamAttributes(I->getArgNo() + 1);
+ if (attrs.getNumSlots() > 0)
+ Anew->addAttr(attrs);
+ }
NewFunc->setAttributes(NewFunc->getAttributes()
.addAttributes(NewFunc->getContext(),
AttributeSet::ReturnIndex,
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index a63d31d..681bf9c 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2789,9 +2789,20 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
return false;
// Turn all invokes that unwind here into calls and delete the basic block.
+ bool InvokeRequiresTableEntry = false;
+ bool Changed = false;
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
+
+ if (II->hasFnAttr(Attribute::UWTable)) {
+ // Don't remove an `invoke' instruction if the ABI requires an entry into
+ // the table.
+ InvokeRequiresTableEntry = true;
+ continue;
+ }
+
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+
// Insert a call instruction before the invoke.
CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
Call->takeName(II);
@@ -2811,11 +2822,14 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
// Finally, delete the invoke instruction!
II->eraseFromParent();
+ Changed = true;
}
- // The landingpad is now unreachable. Zap it.
- BB->eraseFromParent();
- return true;
+ if (!InvokeRequiresTableEntry)
+ // The landingpad is now unreachable. Zap it.
+ BB->eraseFromParent();
+
+ return Changed;
}
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
@@ -3944,11 +3958,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
// Load from null is undefined.
if (LoadInst *LI = dyn_cast<LoadInst>(Use))
- return LI->getPointerAddressSpace() == 0;
+ if (!LI->isVolatile())
+ return LI->getPointerAddressSpace() == 0;
// Store to null is undefined.
if (StoreInst *SI = dyn_cast<StoreInst>(Use))
- return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
+ if (!SI->isVolatile())
+ return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
}
return false;
}
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 8ad566c..c231704 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -15,14 +15,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
@@ -797,8 +800,7 @@ struct StrToOpt : public LibCallOptimization {
if (isa<ConstantPointerNull>(EndPtr)) {
// With a null EndPtr, this function won't capture the main argument.
// It would be readonly too, except that it still may write to errno.
- CI->addAttribute(1, Attribute::get(Callee->getContext(),
- Attribute::NoCapture));
+ CI->addAttribute(1, Attribute::NoCapture);
}
return 0;
@@ -1672,67 +1674,16 @@ class LibCallSimplifierImpl {
const TargetLibraryInfo *TLI;
const LibCallSimplifier *LCS;
bool UnsafeFPShrink;
- StringMap<LibCallOptimization*> Optimizations;
-
- // Fortified library call optimizations.
- MemCpyChkOpt MemCpyChk;
- MemMoveChkOpt MemMoveChk;
- MemSetChkOpt MemSetChk;
- StrCpyChkOpt StrCpyChk;
- StpCpyChkOpt StpCpyChk;
- StrNCpyChkOpt StrNCpyChk;
-
- // String library call optimizations.
- StrCatOpt StrCat;
- StrNCatOpt StrNCat;
- StrChrOpt StrChr;
- StrRChrOpt StrRChr;
- StrCmpOpt StrCmp;
- StrNCmpOpt StrNCmp;
- StrCpyOpt StrCpy;
- StpCpyOpt StpCpy;
- StrNCpyOpt StrNCpy;
- StrLenOpt StrLen;
- StrPBrkOpt StrPBrk;
- StrToOpt StrTo;
- StrSpnOpt StrSpn;
- StrCSpnOpt StrCSpn;
- StrStrOpt StrStr;
-
- // Memory library call optimizations.
- MemCmpOpt MemCmp;
- MemCpyOpt MemCpy;
- MemMoveOpt MemMove;
- MemSetOpt MemSet;
// Math library call optimizations.
- UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP;
- CosOpt Cos; PowOpt Pow; Exp2Opt Exp2;
-
- // Integer library call optimizations.
- FFSOpt FFS;
- AbsOpt Abs;
- IsDigitOpt IsDigit;
- IsAsciiOpt IsAscii;
- ToAsciiOpt ToAscii;
-
- // Formatting and IO library call optimizations.
- PrintFOpt PrintF;
- SPrintFOpt SPrintF;
- FPrintFOpt FPrintF;
- FWriteOpt FWrite;
- FPutsOpt FPuts;
- PutsOpt Puts;
-
- void initOptimizations();
- void addOpt(LibFunc::Func F, LibCallOptimization* Opt);
- void addOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt);
+ CosOpt Cos;
+ PowOpt Pow;
+ Exp2Opt Exp2;
public:
LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI,
const LibCallSimplifier *LCS,
bool UnsafeFPShrink = false)
- : UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true),
- Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
+ : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
this->TD = TD;
this->TLI = TLI;
this->LCS = LCS;
@@ -1740,126 +1691,234 @@ public:
}
Value *optimizeCall(CallInst *CI);
+ LibCallOptimization *lookupOptimization(CallInst *CI);
+ bool hasFloatVersion(StringRef FuncName);
};
-void LibCallSimplifierImpl::initOptimizations() {
- // Fortified library call optimizations.
- Optimizations["__memcpy_chk"] = &MemCpyChk;
- Optimizations["__memmove_chk"] = &MemMoveChk;
- Optimizations["__memset_chk"] = &MemSetChk;
- Optimizations["__strcpy_chk"] = &StrCpyChk;
- Optimizations["__stpcpy_chk"] = &StpCpyChk;
- Optimizations["__strncpy_chk"] = &StrNCpyChk;
- Optimizations["__stpncpy_chk"] = &StrNCpyChk;
-
- // String library call optimizations.
- addOpt(LibFunc::strcat, &StrCat);
- addOpt(LibFunc::strncat, &StrNCat);
- addOpt(LibFunc::strchr, &StrChr);
- addOpt(LibFunc::strrchr, &StrRChr);
- addOpt(LibFunc::strcmp, &StrCmp);
- addOpt(LibFunc::strncmp, &StrNCmp);
- addOpt(LibFunc::strcpy, &StrCpy);
- addOpt(LibFunc::stpcpy, &StpCpy);
- addOpt(LibFunc::strncpy, &StrNCpy);
- addOpt(LibFunc::strlen, &StrLen);
- addOpt(LibFunc::strpbrk, &StrPBrk);
- addOpt(LibFunc::strtol, &StrTo);
- addOpt(LibFunc::strtod, &StrTo);
- addOpt(LibFunc::strtof, &StrTo);
- addOpt(LibFunc::strtoul, &StrTo);
- addOpt(LibFunc::strtoll, &StrTo);
- addOpt(LibFunc::strtold, &StrTo);
- addOpt(LibFunc::strtoull, &StrTo);
- addOpt(LibFunc::strspn, &StrSpn);
- addOpt(LibFunc::strcspn, &StrCSpn);
- addOpt(LibFunc::strstr, &StrStr);
-
- // Memory library call optimizations.
- addOpt(LibFunc::memcmp, &MemCmp);
- addOpt(LibFunc::memcpy, &MemCpy);
- addOpt(LibFunc::memmove, &MemMove);
- addOpt(LibFunc::memset, &MemSet);
+bool LibCallSimplifierImpl::hasFloatVersion(StringRef FuncName) {
+ LibFunc::Func Func;
+ SmallString<20> FloatFuncName = FuncName;
+ FloatFuncName += 'f';
+ if (TLI->getLibFunc(FloatFuncName, Func))
+ return TLI->has(Func);
+ return false;
+}
- // Math library call optimizations.
- addOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP);
- addOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP);
- addOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP);
- addOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP);
- addOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP);
- addOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP);
- addOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP);
-
- if(UnsafeFPShrink) {
- addOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP);
+// Fortified library call optimizations.
+static MemCpyChkOpt MemCpyChk;
+static MemMoveChkOpt MemMoveChk;
+static MemSetChkOpt MemSetChk;
+static StrCpyChkOpt StrCpyChk;
+static StpCpyChkOpt StpCpyChk;
+static StrNCpyChkOpt StrNCpyChk;
+
+// String library call optimizations.
+static StrCatOpt StrCat;
+static StrNCatOpt StrNCat;
+static StrChrOpt StrChr;
+static StrRChrOpt StrRChr;
+static StrCmpOpt StrCmp;
+static StrNCmpOpt StrNCmp;
+static StrCpyOpt StrCpy;
+static StpCpyOpt StpCpy;
+static StrNCpyOpt StrNCpy;
+static StrLenOpt StrLen;
+static StrPBrkOpt StrPBrk;
+static StrToOpt StrTo;
+static StrSpnOpt StrSpn;
+static StrCSpnOpt StrCSpn;
+static StrStrOpt StrStr;
+
+// Memory library call optimizations.
+static MemCmpOpt MemCmp;
+static MemCpyOpt MemCpy;
+static MemMoveOpt MemMove;
+static MemSetOpt MemSet;
+
+// Math library call optimizations.
+static UnaryDoubleFPOpt UnaryDoubleFP(false);
+static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+
+ // Integer library call optimizations.
+static FFSOpt FFS;
+static AbsOpt Abs;
+static IsDigitOpt IsDigit;
+static IsAsciiOpt IsAscii;
+static ToAsciiOpt ToAscii;
+
+// Formatting and IO library call optimizations.
+static PrintFOpt PrintF;
+static SPrintFOpt SPrintF;
+static FPrintFOpt FPrintF;
+static FWriteOpt FWrite;
+static FPutsOpt FPuts;
+static PutsOpt Puts;
+
+LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
+ LibFunc::Func Func;
+ Function *Callee = CI->getCalledFunction();
+ StringRef FuncName = Callee->getName();
+
+ // Next check for intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::pow:
+ return &Pow;
+ case Intrinsic::exp2:
+ return &Exp2;
+ default:
+ return 0;
+ }
}
- addOpt(LibFunc::cosf, &Cos);
- addOpt(LibFunc::cos, &Cos);
- addOpt(LibFunc::cosl, &Cos);
- addOpt(LibFunc::powf, &Pow);
- addOpt(LibFunc::pow, &Pow);
- addOpt(LibFunc::powl, &Pow);
- Optimizations["llvm.pow.f32"] = &Pow;
- Optimizations["llvm.pow.f64"] = &Pow;
- Optimizations["llvm.pow.f80"] = &Pow;
- Optimizations["llvm.pow.f128"] = &Pow;
- Optimizations["llvm.pow.ppcf128"] = &Pow;
- addOpt(LibFunc::exp2l, &Exp2);
- addOpt(LibFunc::exp2, &Exp2);
- addOpt(LibFunc::exp2f, &Exp2);
- Optimizations["llvm.exp2.ppcf128"] = &Exp2;
- Optimizations["llvm.exp2.f128"] = &Exp2;
- Optimizations["llvm.exp2.f80"] = &Exp2;
- Optimizations["llvm.exp2.f64"] = &Exp2;
- Optimizations["llvm.exp2.f32"] = &Exp2;
+ // Then check for known library functions.
+ if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) {
+ switch (Func) {
+ case LibFunc::strcat:
+ return &StrCat;
+ case LibFunc::strncat:
+ return &StrNCat;
+ case LibFunc::strchr:
+ return &StrChr;
+ case LibFunc::strrchr:
+ return &StrRChr;
+ case LibFunc::strcmp:
+ return &StrCmp;
+ case LibFunc::strncmp:
+ return &StrNCmp;
+ case LibFunc::strcpy:
+ return &StrCpy;
+ case LibFunc::stpcpy:
+ return &StpCpy;
+ case LibFunc::strncpy:
+ return &StrNCpy;
+ case LibFunc::strlen:
+ return &StrLen;
+ case LibFunc::strpbrk:
+ return &StrPBrk;
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strtoull:
+ return &StrTo;
+ case LibFunc::strspn:
+ return &StrSpn;
+ case LibFunc::strcspn:
+ return &StrCSpn;
+ case LibFunc::strstr:
+ return &StrStr;
+ case LibFunc::memcmp:
+ return &MemCmp;
+ case LibFunc::memcpy:
+ return &MemCpy;
+ case LibFunc::memmove:
+ return &MemMove;
+ case LibFunc::memset:
+ return &MemSet;
+ case LibFunc::cosf:
+ case LibFunc::cos:
+ case LibFunc::cosl:
+ return &Cos;
+ case LibFunc::powf:
+ case LibFunc::pow:
+ case LibFunc::powl:
+ return &Pow;
+ case LibFunc::exp2l:
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ return &Exp2;
+ case LibFunc::ffs:
+ case LibFunc::ffsl:
+ case LibFunc::ffsll:
+ return &FFS;
+ case LibFunc::abs:
+ case LibFunc::labs:
+ case LibFunc::llabs:
+ return &Abs;
+ case LibFunc::isdigit:
+ return &IsDigit;
+ case LibFunc::isascii:
+ return &IsAscii;
+ case LibFunc::toascii:
+ return &ToAscii;
+ case LibFunc::printf:
+ return &PrintF;
+ case LibFunc::sprintf:
+ return &SPrintF;
+ case LibFunc::fprintf:
+ return &FPrintF;
+ case LibFunc::fwrite:
+ return &FWrite;
+ case LibFunc::fputs:
+ return &FPuts;
+ case LibFunc::puts:
+ return &Puts;
+ case LibFunc::ceil:
+ case LibFunc::fabs:
+ case LibFunc::floor:
+ case LibFunc::rint:
+ case LibFunc::round:
+ case LibFunc::nearbyint:
+ case LibFunc::trunc:
+ if (hasFloatVersion(FuncName))
+ return &UnaryDoubleFP;
+ return 0;
+ case LibFunc::acos:
+ case LibFunc::acosh:
+ case LibFunc::asin:
+ case LibFunc::asinh:
+ case LibFunc::atan:
+ case LibFunc::atanh:
+ case LibFunc::cbrt:
+ case LibFunc::cosh:
+ case LibFunc::exp:
+ case LibFunc::exp10:
+ case LibFunc::expm1:
+ case LibFunc::log:
+ case LibFunc::log10:
+ case LibFunc::log1p:
+ case LibFunc::log2:
+ case LibFunc::logb:
+ case LibFunc::sin:
+ case LibFunc::sinh:
+ case LibFunc::sqrt:
+ case LibFunc::tan:
+ case LibFunc::tanh:
+ if (UnsafeFPShrink && hasFloatVersion(FuncName))
+ return &UnsafeUnaryDoubleFP;
+ return 0;
+ case LibFunc::memcpy_chk:
+ return &MemCpyChk;
+ default:
+ return 0;
+ }
+ }
+
+ // Finally check for fortified library calls.
+ if (FuncName.endswith("_chk")) {
+ if (FuncName == "__memmove_chk")
+ return &MemMoveChk;
+ else if (FuncName == "__memset_chk")
+ return &MemSetChk;
+ else if (FuncName == "__strcpy_chk")
+ return &StrCpyChk;
+ else if (FuncName == "__stpcpy_chk")
+ return &StpCpyChk;
+ else if (FuncName == "__strncpy_chk")
+ return &StrNCpyChk;
+ else if (FuncName == "__stpncpy_chk")
+ return &StrNCpyChk;
+ }
+
+ return 0;
- // Integer library call optimizations.
- addOpt(LibFunc::ffs, &FFS);
- addOpt(LibFunc::ffsl, &FFS);
- addOpt(LibFunc::ffsll, &FFS);
- addOpt(LibFunc::abs, &Abs);
- addOpt(LibFunc::labs, &Abs);
- addOpt(LibFunc::llabs, &Abs);
- addOpt(LibFunc::isdigit, &IsDigit);
- addOpt(LibFunc::isascii, &IsAscii);
- addOpt(LibFunc::toascii, &ToAscii);
-
- // Formatting and IO library call optimizations.
- addOpt(LibFunc::printf, &PrintF);
- addOpt(LibFunc::sprintf, &SPrintF);
- addOpt(LibFunc::fprintf, &FPrintF);
- addOpt(LibFunc::fwrite, &FWrite);
- addOpt(LibFunc::fputs, &FPuts);
- addOpt(LibFunc::puts, &Puts);
}
Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
- if (Optimizations.empty())
- initOptimizations();
-
- Function *Callee = CI->getCalledFunction();
- LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+ LibCallOptimization *LCO = lookupOptimization(CI);
if (LCO) {
IRBuilder<> Builder(CI);
return LCO->optimizeCall(CI, TD, TLI, LCS, Builder);
@@ -1867,17 +1926,6 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
return 0;
}
-void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) {
- if (TLI->has(F))
- Optimizations[TLI->getName(F)] = Opt;
-}
-
-void LibCallSimplifierImpl::addOpt(LibFunc::Func F1, LibFunc::Func F2,
- LibCallOptimization* Opt) {
- if (TLI->has(F1) && TLI->has(F2))
- Optimizations[TLI->getName(F1)] = Opt;
-}
-
LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
const TargetLibraryInfo *TLI,
bool UnsafeFPShrink) {
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 7636541..17900da 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -1771,7 +1771,7 @@ namespace {
size_t MaxDepth = DAG.lookup(IJ);
DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {"
- << IJ.first << " <-> " << IJ.second << "} of depth " <<
+ << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
MaxDepth << " and size " << DAG.size() << "\n");
// At this point the DAG has been constructed, but, may contain
@@ -2086,7 +2086,7 @@ namespace {
DEBUG(if (DebugPairSelection)
dbgs() << "BBV: found pruned DAG for pair {"
- << IJ.first << " <-> " << IJ.second << "} of depth " <<
+ << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
MaxDepth << " and size " << PrunedDAG.size() <<
" (effective size: " << EffSize << ")\n");
if (((TTI && !UseChainDepthWithTI) ||
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index f489393..930d9c4 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -79,6 +79,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -115,6 +116,12 @@ static const unsigned TinyTripCountUnrollThreshold = 128;
/// number of pointers. Notice that the check is quadratic!
static const unsigned RuntimeMemoryCheckThreshold = 4;
+/// We use a metadata with this name to indicate that a scalar loop was
+/// vectorized and that we don't need to re-vectorize it if we run into it
+/// again.
+static const char*
+AlreadyVectorizedMDName = "llvm.vectorizer.already_vectorized";
+
namespace {
// Forward declarations.
@@ -138,10 +145,11 @@ class LoopVectorizationCostModel;
class InnerLoopVectorizer {
public:
InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, DataLayout *DL, unsigned VecWidth,
+ DominatorTree *DT, DataLayout *DL,
+ const TargetLibraryInfo *TLI, unsigned VecWidth,
unsigned UnrollFactor)
- : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), VF(VecWidth),
- UF(UnrollFactor), Builder(SE->getContext()), Induction(0),
+ : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI),
+ VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(0),
OldInduction(0), WidenMap(UnrollFactor) {}
// Perform the actual loop widening (vectorization).
@@ -268,6 +276,9 @@ private:
DominatorTree *DT;
/// Data Layout.
DataLayout *DL;
+ /// Target Library Info.
+ const TargetLibraryInfo *TLI;
+
/// The vectorization SIMD factor to use. Each vector will have this many
/// vector elements.
unsigned VF;
@@ -320,8 +331,9 @@ class LoopVectorizationLegality {
public:
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL,
DominatorTree *DT, TargetTransformInfo* TTI,
- AliasAnalysis* AA)
- : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), Induction(0) {}
+ AliasAnalysis *AA, TargetLibraryInfo *TLI)
+ : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
+ Induction(0) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -407,7 +419,7 @@ public:
/// Alias(Multi)Map stores the values (GEPs or underlying objects and their
/// respective Store/Load instruction(s) to calculate aliasing.
- typedef DenseMap<Value*, Instruction* > AliasMap;
+ typedef MapVector<Value*, Instruction* > AliasMap;
typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap;
/// Returns true if it is legal to vectorize this loop.
@@ -504,6 +516,8 @@ private:
TargetTransformInfo *TTI;
/// Alias Analysis.
AliasAnalysis *AA;
+ /// Target Library Info.
+ TargetLibraryInfo *TLI;
// --- vectorization state --- //
@@ -540,8 +554,8 @@ public:
LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
- DataLayout *DL)
- : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL) {}
+ DataLayout *DL, const TargetLibraryInfo *TLI)
+ : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI) {}
/// Information about vectorization costs
struct VectorizationFactor {
@@ -614,6 +628,8 @@ private:
const TargetTransformInfo &TTI;
/// Target data layout information.
DataLayout *DL;
+ /// Target Library Info.
+ const TargetLibraryInfo *TLI;
};
/// The LoopVectorize Pass.
@@ -631,6 +647,7 @@ struct LoopVectorize : public LoopPass {
TargetTransformInfo *TTI;
DominatorTree *DT;
AliasAnalysis *AA;
+ TargetLibraryInfo *TLI;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
// We only vectorize innermost loops.
@@ -643,19 +660,20 @@ struct LoopVectorize : public LoopPass {
TTI = &getAnalysis<TargetTransformInfo>();
DT = &getAnalysis<DominatorTree>();
AA = getAnalysisIfAvailable<AliasAnalysis>();
+ TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA);
+ LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA, TLI);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing.\n");
return false;
}
// Use the cost model.
- LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL);
+ LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI);
// Check the function attributes to find out if this function should be
// optimized for size.
@@ -689,7 +707,7 @@ struct LoopVectorize : public LoopPass {
DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n");
// If we decided that it is *legal* to vectorize the loop then do it.
- InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF.Width, UF);
+ InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
LB.vectorize(&LVL);
DEBUG(verifyFunction(*L->getHeader()->getParent()));
@@ -1147,6 +1165,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
assert(ExitBlock && "Must have an exit block");
+ // Mark the old scalar loop with metadata that tells us not to vectorize this
+ // loop again if we run into it.
+ MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>());
+ OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD);
+
// Some loops have a single integer induction variable, while other loops
// don't. One example is c++ iterators that often have multiple pointer
// induction variables. In the code below we also support a case where we
@@ -1438,34 +1461,108 @@ getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) {
}
}
-static bool
-isTriviallyVectorizableIntrinsic(Instruction *Inst) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
- if (!II)
- return false;
- switch (II->getIntrinsicID()) {
- case Intrinsic::sqrt:
- case Intrinsic::sin:
- case Intrinsic::cos:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::log:
- case Intrinsic::log10:
- case Intrinsic::log2:
- case Intrinsic::fabs:
- case Intrinsic::floor:
- case Intrinsic::ceil:
- case Intrinsic::trunc:
- case Intrinsic::rint:
- case Intrinsic::nearbyint:
- case Intrinsic::pow:
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- return true;
- default:
- return false;
+static Intrinsic::ID
+getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
+ // If we have an intrinsic call, check if it is trivially vectorizable.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::sqrt:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::log:
+ case Intrinsic::log10:
+ case Intrinsic::log2:
+ case Intrinsic::fabs:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::pow:
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
+ return II->getIntrinsicID();
+ default:
+ return Intrinsic::not_intrinsic;
+ }
}
- return false;
+
+ if (!TLI)
+ return Intrinsic::not_intrinsic;
+
+ LibFunc::Func Func;
+ Function *F = CI->getCalledFunction();
+ // We're going to make assumptions on the semantics of the functions, check
+ // that the target knows that it's available in this environment.
+ if (!F || !TLI->getLibFunc(F->getName(), Func))
+ return Intrinsic::not_intrinsic;
+
+ // Otherwise check if we have a call to a function that can be turned into a
+ // vector intrinsic.
+ switch (Func) {
+ default:
+ break;
+ case LibFunc::sin:
+ case LibFunc::sinf:
+ case LibFunc::sinl:
+ return Intrinsic::sin;
+ case LibFunc::cos:
+ case LibFunc::cosf:
+ case LibFunc::cosl:
+ return Intrinsic::cos;
+ case LibFunc::exp:
+ case LibFunc::expf:
+ case LibFunc::expl:
+ return Intrinsic::exp;
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ case LibFunc::exp2l:
+ return Intrinsic::exp2;
+ case LibFunc::log:
+ case LibFunc::logf:
+ case LibFunc::logl:
+ return Intrinsic::log;
+ case LibFunc::log10:
+ case LibFunc::log10f:
+ case LibFunc::log10l:
+ return Intrinsic::log10;
+ case LibFunc::log2:
+ case LibFunc::log2f:
+ case LibFunc::log2l:
+ return Intrinsic::log2;
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ return Intrinsic::fabs;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ return Intrinsic::floor;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ return Intrinsic::ceil;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ return Intrinsic::trunc;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ return Intrinsic::rint;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ return Intrinsic::nearbyint;
+ case LibFunc::pow:
+ case LibFunc::powf:
+ case LibFunc::powl:
+ return Intrinsic::pow;
+ }
+
+ return Intrinsic::not_intrinsic;
}
/// This function translates the reduction kind to an LLVM binary operator.
@@ -1546,7 +1643,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// To do so, we need to generate the 'identity' vector and overide
// one of the elements with the incoming scalar reduction. We need
// to do it in the vector-loop preheader.
- Builder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator());
+ Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
// This is the vector-clone of the value that leaves the loop.
VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
@@ -1991,17 +2088,21 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
}
case Instruction::Call: {
- assert(isTriviallyVectorizableIntrinsic(it));
+ // Ignore dbg intrinsics.
+ if (isa<DbgInfoIntrinsic>(it))
+ break;
+
Module *M = BB->getParent()->getParent();
- IntrinsicInst *II = cast<IntrinsicInst>(it);
- Intrinsic::ID ID = II->getIntrinsicID();
+ CallInst *CI = cast<CallInst>(it);
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+ assert(ID && "Not an intrinsic call!");
for (unsigned Part = 0; Part < UF; ++Part) {
SmallVector<Value*, 4> Args;
- for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i) {
- VectorParts &Arg = getVectorValue(II->getArgOperand(i));
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
Args.push_back(Arg[Part]);
}
- Type *Tys[] = { VectorType::get(II->getType()->getScalarType(), VF) };
+ Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) };
Function *F = Intrinsic::getDeclaration(M, ID, Tys);
Entry[Part] = Builder.CreateCall(F, Args);
}
@@ -2138,6 +2239,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
BasicBlock *Header = TheLoop->getHeader();
+ // If we marked the scalar loop as "already vectorized" then no need
+ // to vectorize it again.
+ if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) {
+ DEBUG(dbgs() << "LV: This loop was vectorized before\n");
+ return false;
+ }
+
// For each block in the loop.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {
@@ -2220,9 +2328,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return false;
}// end of PHI handling
- // We still don't handle functions.
+ // We still don't handle functions. However, we can ignore dbg intrinsic
+ // calls and we do handle certain intrinsic and libm functions.
CallInst *CI = dyn_cast<CallInst>(it);
- if (CI && !isTriviallyVectorizableIntrinsic(it)) {
+ if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
DEBUG(dbgs() << "LV: Found a call site.\n");
return false;
}
@@ -2638,6 +2747,9 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// Is this a bin op ?
FoundBinOp |= !isa<PHINode>(Iter);
+ // Remember the current instruction.
+ Instruction *OldIter = Iter;
+
// For each of the *users* of iter.
for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
it != e; ++it) {
@@ -2663,7 +2775,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
if (isa<PHINode>(Iter) && isa<PHINode>(U) &&
U->getParent() != TheLoop->getHeader() &&
TheLoop->contains(U) &&
- Iter->getNumUses() > 1)
+ Iter->hasNUsesOrMore(2))
continue;
// We can't have multiple inside users.
@@ -2683,6 +2795,10 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
Iter = U;
}
+ // If all uses were skipped this can't be a reduction variable.
+ if (Iter == OldIter)
+ return false;
+
// We found a reduction var if we have reached the original
// phi node and we only have a single instruction with out-of-loop
// users.
@@ -3152,6 +3268,10 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
// For each instruction in the old loop.
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ // Skip dbg intrinsics.
+ if (isa<DbgInfoIntrinsic>(it))
+ continue;
+
unsigned C = getInstructionCost(it, VF);
Cost += C;
DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " <<
@@ -3218,7 +3338,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
Type *CondTy = SI->getCondition()->getType();
- if (ScalarCond)
+ if (!ScalarCond)
CondTy = VectorType::get(CondTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
@@ -3305,13 +3425,14 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
}
case Instruction::Call: {
- assert(isTriviallyVectorizableIntrinsic(I));
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- Type *RetTy = ToVectorTy(II->getType(), VF);
+ CallInst *CI = cast<CallInst>(I);
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+ assert(ID && "Not an intrinsic call!");
+ Type *RetTy = ToVectorTy(CI->getType(), VF);
SmallVector<Type*, 4> Tys;
- for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i)
- Tys.push_back(ToVectorTy(II->getArgOperand(i)->getType(), VF));
- return TTI.getIntrinsicInstrCost(II->getIntrinsicID(), RetTy, Tys);
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
+ return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
}
default: {
// We are scalarizing the instruction. Return the cost of the scalar