aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/CMakeLists.txt1
-rw-r--r--lib/Analysis/CodeMetrics.cpp176
-rw-r--r--lib/Analysis/DebugInfo.cpp2
-rw-r--r--lib/Analysis/IVUsers.cpp55
-rw-r--r--lib/Analysis/InlineCost.cpp627
-rw-r--r--lib/Analysis/InstructionSimplify.cpp808
-rw-r--r--lib/Analysis/LazyValueInfo.cpp7
-rw-r--r--lib/Analysis/Loads.cpp16
-rw-r--r--lib/Analysis/PHITransAddr.cpp2
-rw-r--r--lib/Analysis/SparsePropagation.cpp5
-rw-r--r--lib/Analysis/ValueTracking.cpp372
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp49
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp23
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp21
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h7
-rw-r--r--lib/CodeGen/BranchFolding.cpp31
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp34
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h3
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp36
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp2
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp4
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp8
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp24
-rw-r--r--lib/CodeGen/LiveVariables.cpp1
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp12
-rw-r--r--lib/CodeGen/MachineInstr.cpp80
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp2
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp5
-rw-r--r--lib/CodeGen/MachineScheduler.cpp633
-rw-r--r--lib/CodeGen/MachineVerifier.cpp20
-rw-r--r--lib/CodeGen/Passes.cpp3
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp140
-rw-r--r--lib/CodeGen/RegAllocFast.cpp2
-rw-r--r--lib/CodeGen/RegisterCoalescer.h2
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp52
-rw-r--r--lib/CodeGen/ScheduleDAGEmit.cpp68
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp144
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.h306
-rw-r--r--lib/CodeGen/ScheduleDAGPrinter.cpp23
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt1
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp122
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp34
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp28
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp44
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp130
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h39
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp6
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp652
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp631
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp13
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp34
-rw-r--r--lib/ExecutionEngine/CMakeLists.txt10
-rw-r--r--lib/ExecutionEngine/EventListenerCommon.h67
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt11
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp183
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt23
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/Makefile17
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp8
-rw-r--r--lib/ExecutionEngine/JIT/CMakeLists.txt2
-rw-r--r--lib/ExecutionEngine/JIT/Intercept.cpp162
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp28
-rw-r--r--lib/ExecutionEngine/JIT/JIT.h1
-rw-r--r--lib/ExecutionEngine/JIT/JITMemoryManager.cpp153
-rw-r--r--lib/ExecutionEngine/LLVMBuild.txt2
-rw-r--r--lib/ExecutionEngine/MCJIT/CMakeLists.txt1
-rw-r--r--lib/ExecutionEngine/MCJIT/Intercept.cpp162
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp20
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.h1
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h39
-rw-r--r--lib/ExecutionEngine/Makefile13
-rw-r--r--lib/ExecutionEngine/OProfileJIT/CMakeLists.txt7
-rw-r--r--lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt23
-rw-r--r--lib/ExecutionEngine/OProfileJIT/Makefile18
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp (renamed from lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp)99
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp263
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp295
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp408
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h176
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h168
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp595
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h85
-rw-r--r--lib/MC/MCParser/AsmParser.cpp7
-rw-r--r--lib/MC/MCSymbol.cpp7
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp31
-rw-r--r--lib/Object/Archive.cpp2
-rw-r--r--lib/Object/COFFObjectFile.cpp73
-rw-r--r--lib/Object/MachOObjectFile.cpp2
-rw-r--r--lib/Support/APInt.cpp33
-rw-r--r--lib/Support/CommandLine.cpp2
-rw-r--r--lib/Support/GraphWriter.cpp77
-rw-r--r--lib/Support/MemoryBuffer.cpp6
-rw-r--r--lib/Support/SmallPtrSet.cpp51
-rw-r--r--lib/Support/StringRef.cpp36
-rw-r--r--lib/Support/Triple.cpp10
-rw-r--r--lib/TableGen/Record.cpp8
-rw-r--r--lib/Target/ARM/ARM.h2
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp2
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp40
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp12
-rw-r--r--lib/Target/ARM/ARMCallingConv.h20
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp44
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp71
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp2
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp15
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp95
-rw-r--r--lib/Target/ARM/ARMISelLowering.h3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h4
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td124
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td100
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td69
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td85
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h3
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td20
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp53
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp118
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp42
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h46
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp80
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp1
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp1
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h4
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp3
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h3
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp1
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h2
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp4
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.h3
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp8
-rw-r--r--lib/Target/CBackend/CBackend.cpp10
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.cpp2
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp70
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h2
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h2
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp2
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp2
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h3
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp8
-rw-r--r--lib/Target/Hexagon/Hexagon.h1
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h2
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h2
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h3
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h9
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp6
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h6
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/Makefile2
-rw-r--r--lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp4
-rw-r--r--lib/Target/MBlaze/MBlazeFrameLowering.h3
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.h4
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h2
-rw-r--r--lib/Target/MBlaze/MBlazeMCInstLower.h1
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp2
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h2
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.h1
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.h3
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp37
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp6
-rw-r--r--lib/Target/Mips/Mips.h2
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.cpp20
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.h10
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp4
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h2
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp51
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp104
-rw-r--r--lib/Target/Mips/MipsISelLowering.h5
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h2
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp2
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h4
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp6
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h6
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h2
-rw-r--r--lib/Target/PTX/PTX.h1
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.cpp2
-rw-r--r--lib/Target/PTX/PTXISelLowering.cpp2
-rw-r--r--lib/Target/PTX/PTXISelLowering.h2
-rw-r--r--lib/Target/PTX/PTXInstrInfo.cpp2
-rw-r--r--lib/Target/PTX/PTXParamManager.cpp2
-rw-r--r--lib/Target/PTX/PTXParamManager.h1
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.cpp2
-rw-r--r--lib/Target/PTX/PTXTargetMachine.cpp5
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp2
-rw-r--r--lib/Target/PowerPC/PPC.h5
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp2
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td31
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp2
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h2
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp52
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h4
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td16
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td16
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp106
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h1
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h2
-rw-r--r--lib/Target/Sparc/FPMover.cpp8
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp6
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h2
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h2
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp4
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp32
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c9
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp8
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp58
-rw-r--r--lib/Target/X86/README-SSE.txt19
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp26
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h28
-rw-r--r--lib/Target/X86/X86.h2
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp4
-rw-r--r--lib/Target/X86/X86AsmPrinter.h4
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h2
-rw-r--r--lib/Target/X86/X86FastISel.cpp2
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp6
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp399
-rw-r--r--lib/Target/X86/X86InstrCompiler.td107
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td5
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp50
-rw-r--r--lib/Target/X86/X86InstrInfo.h2
-rw-r--r--lib/Target/X86/X86InstrInfo.td26
-rw-r--r--lib/Target/X86/X86InstrSSE.td94
-rw-r--r--lib/Target/X86/X86InstrSystem.td10
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp4
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp2
-rw-r--r--lib/Target/X86/X86Schedule.td11
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td15
-rw-r--r--lib/Target/X86/X86Subtarget.h2
-rw-r--r--lib/Target/X86/X86TargetMachine.h1
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h1
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp2
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp2
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h2
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp2
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h2
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp18
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h9
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h4
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp14
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp28
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp43
-rw-r--r--lib/Transforms/IPO/Inliner.cpp97
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstCombineWorklist.h4
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp15
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp104
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt1
-rw-r--r--lib/Transforms/Instrumentation/FunctionBlackList.cpp79
-rw-r--r--lib/Transforms/Instrumentation/FunctionBlackList.h37
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp9
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp9
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp92
-rw-r--r--lib/Transforms/Scalar/GVN.cpp7
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp13
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp16
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp15
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp22
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp75
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp17
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp8
-rw-r--r--lib/Transforms/Utils/BasicInliner.cpp182
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp8
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp3
-rw-r--r--lib/Transforms/Utils/Local.cpp21
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp15
-rw-r--r--lib/Transforms/Utils/LowerExpectIntrinsic.cpp6
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp8
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp74
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp6
-rw-r--r--lib/VMCore/AsmWriter.cpp8
-rw-r--r--lib/VMCore/ConstantsContext.h45
-rw-r--r--lib/VMCore/Instructions.cpp9
-rw-r--r--lib/VMCore/Type.cpp31
-rw-r--r--lib/VMCore/Value.cpp45
-rw-r--r--lib/VMCore/Verifier.cpp8
305 files changed, 7066 insertions, 5924 deletions
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index cb1e1eb..2e3ec8b 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -10,6 +10,7 @@ add_llvm_library(LLVMAnalysis
BranchProbabilityInfo.cpp
CFGPrinter.cpp
CaptureTracking.cpp
+ CodeMetrics.cpp
ConstantFolding.cpp
DIBuilder.cpp
DbgInfoPrinter.cpp
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
new file mode 100644
index 0000000..6c93f78
--- /dev/null
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -0,0 +1,176 @@
+//===- CodeMetrics.cpp - Code cost measurements ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements code cost measurement utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Target/TargetData.h"
+
+using namespace llvm;
+
+/// callIsSmall - If a call is likely to lower to a single target instruction,
+/// or is otherwise deemed small return true.
+/// TODO: Perhaps calls like memcpy, strcpy, etc?
+bool llvm::callIsSmall(const Function *F) {
+ if (!F) return false;
+
+ if (F->hasLocalLinkage()) return false;
+
+ if (!F->hasName()) return false;
+
+ StringRef Name = F->getName();
+
+ // These will all likely lower to a single selection DAG node.
+ if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
+ Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
+ Name == "sin" || Name == "sinf" || Name == "sinl" ||
+ Name == "cos" || Name == "cosf" || Name == "cosl" ||
+ Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
+ return true;
+
+ // These are all likely to be optimized into something smaller.
+ if (Name == "pow" || Name == "powf" || Name == "powl" ||
+ Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
+ Name == "floor" || Name == "floorf" || Name == "ceil" ||
+ Name == "round" || Name == "ffs" || Name == "ffsl" ||
+ Name == "abs" || Name == "labs" || Name == "llabs")
+ return true;
+
+ return false;
+}
+
+/// analyzeBasicBlock - Fill in the current structure with information gleaned
+/// from the specified block.
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
+ const TargetData *TD) {
+ ++NumBlocks;
+ unsigned NumInstsBeforeThisBB = NumInsts;
+ for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
+ II != E; ++II) {
+ if (isa<PHINode>(II)) continue; // PHI nodes don't count.
+
+ // Special handling for calls.
+ if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
+ if (const IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(II)) {
+ switch (IntrinsicI->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::objectsize:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::var_annotation:
+ // These intrinsics don't count as size.
+ continue;
+ }
+ }
+
+ ImmutableCallSite CS(cast<Instruction>(II));
+
+ if (const Function *F = CS.getCalledFunction()) {
+ // If a function is both internal and has a single use, then it is
+ // extremely likely to get inlined in the future (it was probably
+ // exposed by an interleaved devirtualization pass).
+ if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
+ ++NumInlineCandidates;
+
+ // If this call is to function itself, then the function is recursive.
+ // Inlining it into other functions is a bad idea, because this is
+ // basically just a form of loop peeling, and our metrics aren't useful
+ // for that case.
+ if (F == BB->getParent())
+ isRecursive = true;
+ }
+
+ if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
+ // Each argument to a call takes on average one instruction to set up.
+ NumInsts += CS.arg_size();
+
+ // We don't want inline asm to count as a call - that would prevent loop
+ // unrolling. The argument setup cost is still real, though.
+ if (!isa<InlineAsm>(CS.getCalledValue()))
+ ++NumCalls;
+ }
+ }
+
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (!AI->isStaticAlloca())
+ this->usesDynamicAlloca = true;
+ }
+
+ if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
+ ++NumVectorInsts;
+
+ if (const CastInst *CI = dyn_cast<CastInst>(II)) {
+ // Noop casts, including ptr <-> int, don't count.
+ if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
+ isa<PtrToIntInst>(CI))
+ continue;
+ // trunc to a native type is free (assuming the target has compare and
+ // shift-right of the same width).
+ if (isa<TruncInst>(CI) && TD &&
+ TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
+ continue;
+ // Result of a cmp instruction is often extended (to be used by other
+ // cmp instructions, logical or return instructions). These are usually
+ // nop on most sane targets.
+ if (isa<CmpInst>(CI->getOperand(0)))
+ continue;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
+ // If a GEP has all constant indices, it will probably be folded with
+ // a load/store.
+ if (GEPI->hasAllConstantIndices())
+ continue;
+ }
+
+ ++NumInsts;
+ }
+
+ if (isa<ReturnInst>(BB->getTerminator()))
+ ++NumRets;
+
+ // We never want to inline functions that contain an indirectbr. This is
+ // incorrect because all the blockaddress's (in static global initializers
+ // for example) would be referring to the original function, and this indirect
+ // jump would jump from the inlined copy of the function into the original
+ // function which is extremely undefined behavior.
+ // FIXME: This logic isn't really right; we can safely inline functions
+ // with indirectbr's as long as no other function or global references the
+ // blockaddress of a block within the current function. And as a QOI issue,
+ // if someone is using a blockaddress without an indirectbr, and that
+ // reference somehow ends up in another function or global, we probably
+ // don't want to inline this function.
+ if (isa<IndirectBrInst>(BB->getTerminator()))
+ containsIndirectBr = true;
+
+ // Remember NumInsts for this BB.
+ NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
+}
+
+void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) {
+ // If this function contains a call that "returns twice" (e.g., setjmp or
+ // _setjmp) and it isn't marked with "returns twice" itself, never inline it.
+ // This is a hack because we depend on the user marking their local variables
+ // as volatile if they are live across a setjmp call, and they probably
+ // won't do this in callers.
+ exposesReturnsTwice = F->callsFunctionThatReturnsTwice() &&
+ !F->hasFnAttr(Attribute::ReturnsTwice);
+
+ // Look at the size of the callee.
+ for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ analyzeBasicBlock(&*BB, TD);
+}
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 585a087..e30c0a9 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -68,7 +68,7 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
return 0;
if (Elt < DbgNode->getNumOperands())
- if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getOperand(Elt)))
+ if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
return CI->getZExtValue();
return 0;
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index cad22f8..463584d 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -79,10 +79,39 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
return false;
}
+/// Return true if all loop headers that dominate this block are in simplified
+/// form.
+static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
+ const LoopInfo *LI,
+ SmallPtrSet<Loop*,16> &SimpleLoopNests) {
+ Loop *NearestLoop = 0;
+ for (DomTreeNode *Rung = DT->getNode(BB);
+ Rung; Rung = Rung->getIDom()) {
+ BasicBlock *DomBB = Rung->getBlock();
+ Loop *DomLoop = LI->getLoopFor(DomBB);
+ if (DomLoop && DomLoop->getHeader() == DomBB) {
+ // If the domtree walk reaches a loop with no preheader, return false.
+ if (!DomLoop->isLoopSimplifyForm())
+ return false;
+ // If we have already checked this loop nest, stop checking.
+ if (SimpleLoopNests.count(DomLoop))
+ break;
+ // If we have not already checked this loop nest, remember the loop
+ // header nearest to BB. The nearest loop may not contain BB.
+ if (!NearestLoop)
+ NearestLoop = DomLoop;
+ }
+ }
+ if (NearestLoop)
+ SimpleLoopNests.insert(NearestLoop);
+ return true;
+}
+
/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
/// reducible SCEV, recursively add its users to the IVUsesByStride set and
/// return true. Otherwise, return false.
-bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+bool IVUsers::AddUsersIfInteresting(Instruction *I,
+ SmallPtrSet<Loop*,16> &SimpleLoopNests) {
// Add this IV user to the Processed set before returning false to ensure that
// all IV users are members of the set. See IVUsers::isIVUserOrOperand.
if (!Processed.insert(I))
@@ -117,6 +146,18 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
if (isa<PHINode>(User) && Processed.count(User))
continue;
+ // Only consider IVUsers that are dominated by simplified loop
+ // headers. Otherwise, SCEVExpander will crash.
+ BasicBlock *UseBB = User->getParent();
+ // A phi's use is live out of its predecessor block.
+ if (PHINode *PHI = dyn_cast<PHINode>(User)) {
+ unsigned OperandNo = UI.getOperandNo();
+ unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
+ UseBB = PHI->getIncomingBlock(ValNo);
+ }
+ if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests))
+ return false;
+
// Descend recursively, but not into PHI nodes outside the current loop.
// It's important to see the entire expression outside the loop to get
// choices that depend on addressing mode use right, although we won't
@@ -126,12 +167,13 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
bool AddUserToIVUsers = false;
if (LI->getLoopFor(User->getParent()) != L) {
if (isa<PHINode>(User) || Processed.count(User) ||
- !AddUsersIfInteresting(User)) {
+ !AddUsersIfInteresting(User, SimpleLoopNests)) {
DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
<< " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
}
- } else if (Processed.count(User) || !AddUsersIfInteresting(User)) {
+ } else if (Processed.count(User)
+ || !AddUsersIfInteresting(User, SimpleLoopNests)) {
DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
<< " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
@@ -180,11 +222,16 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
SE = &getAnalysis<ScalarEvolution>();
TD = getAnalysisIfAvailable<TargetData>();
+ // SCEVExpander can only handle users that are dominated by simplified loop
+ // entries. Keep track of all loops that are only dominated by other simple
+ // loops so we don't traverse the domtree for each user.
+ SmallPtrSet<Loop*,16> SimpleLoopNests;
+
// Find all uses of induction variables in this loop, and categorize
// them by stride. Start by finding all of the PHI nodes in the header for
// this loop. If they are induction variables, inspect their uses.
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
- (void)AddUsersIfInteresting(I);
+ (void)AddUsersIfInteresting(I, SimpleLoopNests);
return false;
}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index b326ba7..dedbfeb 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -20,165 +20,27 @@
using namespace llvm;
-/// callIsSmall - If a call is likely to lower to a single target instruction,
-/// or is otherwise deemed small return true.
-/// TODO: Perhaps calls like memcpy, strcpy, etc?
-bool llvm::callIsSmall(const Function *F) {
- if (!F) return false;
-
- if (F->hasLocalLinkage()) return false;
-
- if (!F->hasName()) return false;
-
- StringRef Name = F->getName();
-
- // These will all likely lower to a single selection DAG node.
- if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
- Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
- Name == "sin" || Name == "sinf" || Name == "sinl" ||
- Name == "cos" || Name == "cosf" || Name == "cosl" ||
- Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
- return true;
-
- // These are all likely to be optimized into something smaller.
- if (Name == "pow" || Name == "powf" || Name == "powl" ||
- Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
- Name == "floor" || Name == "floorf" || Name == "ceil" ||
- Name == "round" || Name == "ffs" || Name == "ffsl" ||
- Name == "abs" || Name == "labs" || Name == "llabs")
- return true;
-
- return false;
-}
-
-/// analyzeBasicBlock - Fill in the current structure with information gleaned
-/// from the specified block.
-void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
- const TargetData *TD) {
- ++NumBlocks;
- unsigned NumInstsBeforeThisBB = NumInsts;
- for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
- II != E; ++II) {
- if (isa<PHINode>(II)) continue; // PHI nodes don't count.
-
- // Special handling for calls.
- if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
- if (const IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(II)) {
- switch (IntrinsicI->getIntrinsicID()) {
- default: break;
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- case Intrinsic::invariant_start:
- case Intrinsic::invariant_end:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::objectsize:
- case Intrinsic::ptr_annotation:
- case Intrinsic::var_annotation:
- // These intrinsics don't count as size.
- continue;
- }
- }
-
- ImmutableCallSite CS(cast<Instruction>(II));
-
- if (const Function *F = CS.getCalledFunction()) {
- // If a function is both internal and has a single use, then it is
- // extremely likely to get inlined in the future (it was probably
- // exposed by an interleaved devirtualization pass).
- if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
- ++NumInlineCandidates;
-
- // If this call is to function itself, then the function is recursive.
- // Inlining it into other functions is a bad idea, because this is
- // basically just a form of loop peeling, and our metrics aren't useful
- // for that case.
- if (F == BB->getParent())
- isRecursive = true;
- }
-
- if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
- // Each argument to a call takes on average one instruction to set up.
- NumInsts += CS.arg_size();
-
- // We don't want inline asm to count as a call - that would prevent loop
- // unrolling. The argument setup cost is still real, though.
- if (!isa<InlineAsm>(CS.getCalledValue()))
- ++NumCalls;
- }
- }
-
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
- if (!AI->isStaticAlloca())
- this->usesDynamicAlloca = true;
- }
-
- if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
- ++NumVectorInsts;
-
- if (const CastInst *CI = dyn_cast<CastInst>(II)) {
- // Noop casts, including ptr <-> int, don't count.
- if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
- isa<PtrToIntInst>(CI))
- continue;
- // trunc to a native type is free (assuming the target has compare and
- // shift-right of the same width).
- if (isa<TruncInst>(CI) && TD &&
- TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
- continue;
- // Result of a cmp instruction is often extended (to be used by other
- // cmp instructions, logical or return instructions). These are usually
- // nop on most sane targets.
- if (isa<CmpInst>(CI->getOperand(0)))
- continue;
- } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
- // If a GEP has all constant indices, it will probably be folded with
- // a load/store.
- if (GEPI->hasAllConstantIndices())
+unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForConstant(
+ const CodeMetrics &Metrics, Value *V) {
+ unsigned Reduction = 0;
+ SmallVector<Value *, 4> Worklist;
+ Worklist.push_back(V);
+ do {
+ Value *V = Worklist.pop_back_val();
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ User *U = *UI;
+ if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
+ // We will be able to eliminate all but one of the successors.
+ const TerminatorInst &TI = cast<TerminatorInst>(*U);
+ const unsigned NumSucc = TI.getNumSuccessors();
+ unsigned Instrs = 0;
+ for (unsigned I = 0; I != NumSucc; ++I)
+ Instrs += Metrics.NumBBInsts.lookup(TI.getSuccessor(I));
+ // We don't know which blocks will be eliminated, so use the average size.
+ Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
continue;
- }
-
- ++NumInsts;
- }
-
- if (isa<ReturnInst>(BB->getTerminator()))
- ++NumRets;
-
- // We never want to inline functions that contain an indirectbr. This is
- // incorrect because all the blockaddress's (in static global initializers
- // for example) would be referring to the original function, and this indirect
- // jump would jump from the inlined copy of the function into the original
- // function which is extremely undefined behavior.
- // FIXME: This logic isn't really right; we can safely inline functions
- // with indirectbr's as long as no other function or global references the
- // blockaddress of a block within the current function. And as a QOI issue,
- // if someone is using a blockaddress without an indirectbr, and that
- // reference somehow ends up in another function or global, we probably
- // don't want to inline this function.
- if (isa<IndirectBrInst>(BB->getTerminator()))
- containsIndirectBr = true;
-
- // Remember NumInsts for this BB.
- NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
-}
+ }
-// CountCodeReductionForConstant - Figure out an approximation for how many
-// instructions will be constant folded if the specified value is constant.
-//
-unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
- unsigned Reduction = 0;
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- User *U = *UI;
- if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
- // We will be able to eliminate all but one of the successors.
- const TerminatorInst &TI = cast<TerminatorInst>(*U);
- const unsigned NumSucc = TI.getNumSuccessors();
- unsigned Instrs = 0;
- for (unsigned I = 0; I != NumSucc; ++I)
- Instrs += NumBBInsts[TI.getSuccessor(I)];
- // We don't know which blocks will be eliminated, so use the average size.
- Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
- } else {
// Figure out if this instruction will be removed due to simple constant
// propagation.
Instruction &Inst = cast<Instruction>(*U);
@@ -200,33 +62,186 @@ unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
AllOperandsConstant = false;
break;
}
+ if (!AllOperandsConstant)
+ continue;
- if (AllOperandsConstant) {
- // We will get to remove this instruction...
- Reduction += InlineConstants::InstrCost;
+ // We will get to remove this instruction...
+ Reduction += InlineConstants::InstrCost;
- // And any other instructions that use it which become constants
- // themselves.
- Reduction += CountCodeReductionForConstant(&Inst);
+ // And any other instructions that use it which become constants
+ // themselves.
+ Worklist.push_back(&Inst);
+ }
+ } while (!Worklist.empty());
+ return Reduction;
+}
+
+static unsigned countCodeReductionForAllocaICmp(const CodeMetrics &Metrics,
+ ICmpInst *ICI) {
+ unsigned Reduction = 0;
+
+ // Bail if this is comparing against a non-constant; there is nothing we can
+ // do there.
+ if (!isa<Constant>(ICI->getOperand(1)))
+ return Reduction;
+
+ // An icmp pred (alloca, C) becomes true if the predicate is true when
+ // equal and false otherwise.
+ bool Result = ICI->isTrueWhenEqual();
+
+ SmallVector<Instruction *, 4> Worklist;
+ Worklist.push_back(ICI);
+ do {
+ Instruction *U = Worklist.pop_back_val();
+ Reduction += InlineConstants::InstrCost;
+ for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
+ UI != UE; ++UI) {
+ Instruction *I = dyn_cast<Instruction>(*UI);
+ if (!I || I->mayHaveSideEffects()) continue;
+ if (I->getNumOperands() == 1)
+ Worklist.push_back(I);
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ // If BO produces the same value as U, then the other operand is
+ // irrelevant and we can put it into the Worklist to continue
+ // deleting dead instructions. If BO produces the same value as the
+ // other operand, we can delete BO but that's it.
+ if (Result == true) {
+ if (BO->getOpcode() == Instruction::Or)
+ Worklist.push_back(I);
+ if (BO->getOpcode() == Instruction::And)
+ Reduction += InlineConstants::InstrCost;
+ } else {
+ if (BO->getOpcode() == Instruction::Or ||
+ BO->getOpcode() == Instruction::Xor)
+ Reduction += InlineConstants::InstrCost;
+ if (BO->getOpcode() == Instruction::And)
+ Worklist.push_back(I);
+ }
+ }
+ if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+ BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1);
+ if (BB->getSinglePredecessor())
+ Reduction
+ += InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB);
}
}
- }
+ } while (!Worklist.empty());
+
return Reduction;
}
-// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
-// the function will be if it is inlined into a context where an argument
-// becomes an alloca.
-//
-unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
+/// \brief Compute the reduction possible for a given instruction if we are able
+/// to SROA an alloca.
+///
+/// The reduction for this instruction is added to the SROAReduction output
+/// parameter. Returns false if this instruction is expected to defeat SROA in
+/// general.
+static bool countCodeReductionForSROAInst(Instruction *I,
+ SmallVectorImpl<Value *> &Worklist,
+ unsigned &SROAReduction) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (!LI->isSimple())
+ return false;
+ SROAReduction += InlineConstants::InstrCost;
+ return true;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (!SI->isSimple())
+ return false;
+ SROAReduction += InlineConstants::InstrCost;
+ return true;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ // If the GEP has variable indices, we won't be able to do much with it.
+ if (!GEP->hasAllConstantIndices())
+ return false;
+ // A non-zero GEP will likely become a mask operation after SROA.
+ if (GEP->hasAllZeroIndices())
+ SROAReduction += InlineConstants::InstrCost;
+ Worklist.push_back(GEP);
+ return true;
+ }
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+ // Track pointer through bitcasts.
+ Worklist.push_back(BCI);
+ SROAReduction += InlineConstants::InstrCost;
+ return true;
+ }
+
+ // We just look for non-constant operands to ICmp instructions as those will
+ // defeat SROA. The actual reduction for these happens even without SROA.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+ return isa<Constant>(ICI->getOperand(1));
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ // SROA can handle a select of alloca iff all uses of the alloca are
+ // loads, and dereferenceable. We assume it's dereferenceable since
+ // we're told the input is an alloca.
+ for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || !LI->isSimple())
+ return false;
+ }
+ // We don't know whether we'll be deleting the rest of the chain of
+ // instructions from the SelectInst on, because we don't know whether
+ // the other side of the select is also an alloca or not.
+ return true;
+ }
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // SROA can usually chew through these intrinsics.
+ SROAReduction += InlineConstants::InstrCost;
+ return true;
+ }
+ }
+
+ // If there is some other strange instruction, we're not going to be
+ // able to do much if we inline this.
+ return false;
+}
+
+unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca(
+ const CodeMetrics &Metrics, Value *V) {
if (!V->getType()->isPointerTy()) return 0; // Not a pointer
unsigned Reduction = 0;
+ unsigned SROAReduction = 0;
+ bool CanSROAAlloca = true;
- // Looking at ICmpInsts will never abort the analysis and return zero, and
- // analyzing them is expensive, so save them for last so that we don't do
- // extra work that we end up throwing out.
- SmallVector<ICmpInst *, 4> ICmpInsts;
+ SmallVector<Value *, 4> Worklist;
+ Worklist.push_back(V);
+ do {
+ Value *V = Worklist.pop_back_val();
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI){
+ Instruction *I = cast<Instruction>(*UI);
+
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+ Reduction += countCodeReductionForAllocaICmp(Metrics, ICI);
+
+ if (CanSROAAlloca)
+ CanSROAAlloca = countCodeReductionForSROAInst(I, Worklist,
+ SROAReduction);
+ }
+ } while (!Worklist.empty());
+
+ return Reduction + (CanSROAAlloca ? SROAReduction : 0);
+}
+void InlineCostAnalyzer::FunctionInfo::countCodeReductionForPointerPair(
+ const CodeMetrics &Metrics, DenseMap<Value *, unsigned> &PointerArgs,
+ Value *V, unsigned ArgIdx) {
SmallVector<Value *, 4> Worklist;
Worklist.push_back(V);
do {
@@ -234,126 +249,57 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
UI != E; ++UI){
Instruction *I = cast<Instruction>(*UI);
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!LI->isSimple())
- return 0;
- Reduction += InlineConstants::InstrCost;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!SI->isSimple())
- return 0;
- Reduction += InlineConstants::InstrCost;
- } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
// If the GEP has variable indices, we won't be able to do much with it.
if (!GEP->hasAllConstantIndices())
- return 0;
- // A non-zero GEP will likely become a mask operation after SROA.
- if (GEP->hasAllZeroIndices())
- Reduction += InlineConstants::InstrCost;
+ continue;
+ // Unless the GEP is in-bounds, some comparisons will be non-constant.
+ // Fortunately, the real-world cases where this occurs uses in-bounds
+ // GEPs, and so we restrict the optimization to them here.
+ if (!GEP->isInBounds())
+ continue;
+
+ // Constant indices just change the constant offset. Add the resulting
+ // value both to our worklist for this argument, and to the set of
+ // viable paired values with future arguments.
+ PointerArgs[GEP] = ArgIdx;
Worklist.push_back(GEP);
- } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
- // Track pointer through bitcasts.
- Worklist.push_back(BCI);
- Reduction += InlineConstants::InstrCost;
- } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
- // SROA can handle a select of alloca iff all uses of the alloca are
- // loads, and dereferenceable. We assume it's dereferenceable since
- // we're told the input is an alloca.
- for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
- UI != UE; ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || !LI->isSimple()) return 0;
- }
- // We don't know whether we'll be deleting the rest of the chain of
- // instructions from the SelectInst on, because we don't know whether
- // the other side of the select is also an alloca or not.
continue;
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- switch (II->getIntrinsicID()) {
- default:
- return 0;
- case Intrinsic::memset:
- case Intrinsic::memcpy:
- case Intrinsic::memmove:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- // SROA can usually chew through these intrinsics.
- Reduction += InlineConstants::InstrCost;
- break;
- }
- } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
- if (!isa<Constant>(ICI->getOperand(1)))
- return 0;
- ICmpInsts.push_back(ICI);
- } else {
- // If there is some other strange instruction, we're not going to be
- // able to do much if we inline this.
- return 0;
}
- }
- } while (!Worklist.empty());
-
- while (!ICmpInsts.empty()) {
- ICmpInst *ICI = ICmpInsts.pop_back_val();
- // An icmp pred (alloca, C) becomes true if the predicate is true when
- // equal and false otherwise.
- bool Result = ICI->isTrueWhenEqual();
-
- SmallVector<Instruction *, 4> Worklist;
- Worklist.push_back(ICI);
- do {
- Instruction *U = Worklist.pop_back_val();
- Reduction += InlineConstants::InstrCost;
- for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
- UI != UE; ++UI) {
- Instruction *I = dyn_cast<Instruction>(*UI);
- if (!I || I->mayHaveSideEffects()) continue;
- if (I->getNumOperands() == 1)
- Worklist.push_back(I);
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- // If BO produces the same value as U, then the other operand is
- // irrelevant and we can put it into the Worklist to continue
- // deleting dead instructions. If BO produces the same value as the
- // other operand, we can delete BO but that's it.
- if (Result == true) {
- if (BO->getOpcode() == Instruction::Or)
- Worklist.push_back(I);
- if (BO->getOpcode() == Instruction::And)
- Reduction += InlineConstants::InstrCost;
- } else {
- if (BO->getOpcode() == Instruction::Or ||
- BO->getOpcode() == Instruction::Xor)
- Reduction += InlineConstants::InstrCost;
- if (BO->getOpcode() == Instruction::And)
- Worklist.push_back(I);
- }
- }
- if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1);
- if (BB->getSinglePredecessor())
- Reduction += InlineConstants::InstrCost * NumBBInsts[BB];
- }
+ // Track pointer through casts. Even when the result is not a pointer, it
+ // remains a constant relative to constants derived from other constant
+ // pointers.
+ if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ PointerArgs[CI] = ArgIdx;
+ Worklist.push_back(CI);
+ continue;
}
- } while (!Worklist.empty());
- }
- return Reduction;
-}
+ // There are two instructions which produce a strict constant value when
+ // applied to two related pointer values. Ignore everything else.
+ if (!isa<ICmpInst>(I) && I->getOpcode() != Instruction::Sub)
+ continue;
+ assert(I->getNumOperands() == 2);
+
+ // Ensure that the two operands are in our set of potentially paired
+ // pointers (or are derived from them).
+ Value *OtherArg = I->getOperand(0);
+ if (OtherArg == V)
+ OtherArg = I->getOperand(1);
+ DenseMap<Value *, unsigned>::const_iterator ArgIt
+ = PointerArgs.find(OtherArg);
+ if (ArgIt == PointerArgs.end())
+ continue;
+ std::pair<unsigned, unsigned> ArgPair(ArgIt->second, ArgIdx);
+ if (ArgPair.first > ArgPair.second)
+ std::swap(ArgPair.first, ArgPair.second);
-/// analyzeFunction - Fill in the current structure with information gleaned
-/// from the specified function.
-void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) {
- // If this function contains a call that "returns twice" (e.g., setjmp or
- // _setjmp) and it isn't marked with "returns twice" itself, never inline it.
- // This is a hack because we depend on the user marking their local variables
- // as volatile if they are live across a setjmp call, and they probably
- // won't do this in callers.
- exposesReturnsTwice = F->callsFunctionThatReturnsTwice() &&
- !F->hasFnAttr(Attribute::ReturnsTwice);
-
- // Look at the size of the callee.
- for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- analyzeBasicBlock(&*BB, TD);
+ PointerArgPairWeights[ArgPair]
+ += countCodeReductionForConstant(Metrics, I);
+ }
+ } while (!Worklist.empty());
}
/// analyzeFunction - Fill in the current structure with information gleaned
@@ -368,12 +314,25 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F,
if (Metrics.NumRets==1)
--Metrics.NumInsts;
- // Check out all of the arguments to the function, figuring out how much
- // code can be eliminated if one of the arguments is a constant.
ArgumentWeights.reserve(F->arg_size());
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
- ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I),
- Metrics.CountCodeReductionForAlloca(I)));
+ DenseMap<Value *, unsigned> PointerArgs;
+ unsigned ArgIdx = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++ArgIdx) {
+ // Count how much code can be eliminated if one of the arguments is
+ // a constant or an alloca.
+ ArgumentWeights.push_back(ArgInfo(countCodeReductionForConstant(Metrics, I),
+ countCodeReductionForAlloca(Metrics, I)));
+
+ // If the argument is a pointer, also check for pairs of pointers where
+ // knowing a fixed offset between them allows simplification. This pattern
+ // arises mostly due to STL algorithm patterns where pointers are used as
+ // random access iterators.
+ if (!I->getType()->isPointerTy())
+ continue;
+ PointerArgs[I] = ArgIdx;
+ countCodeReductionForPointerPair(Metrics, PointerArgs, I, ArgIdx);
+ }
}
/// NeverInline - returns true if the function should never be inlined into
@@ -382,43 +341,6 @@ bool InlineCostAnalyzer::FunctionInfo::NeverInline() {
return (Metrics.exposesReturnsTwice || Metrics.isRecursive ||
Metrics.containsIndirectBr);
}
-// getSpecializationBonus - The heuristic used to determine the per-call
-// performance boost for using a specialization of Callee with argument
-// specializedArgNo replaced by a constant.
-int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
- SmallVectorImpl<unsigned> &SpecializedArgNos)
-{
- if (Callee->mayBeOverridden())
- return 0;
-
- int Bonus = 0;
- // If this function uses the coldcc calling convention, prefer not to
- // specialize it.
- if (Callee->getCallingConv() == CallingConv::Cold)
- Bonus -= InlineConstants::ColdccPenalty;
-
- // Get information about the callee.
- FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
- // If we haven't calculated this information yet, do so now.
- if (CalleeFI->Metrics.NumBlocks == 0)
- CalleeFI->analyzeFunction(Callee, TD);
-
- unsigned ArgNo = 0;
- unsigned i = 0;
- for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end();
- I != E; ++I, ++ArgNo)
- if (ArgNo == SpecializedArgNos[i]) {
- ++i;
- Bonus += CountBonusForConstant(I);
- }
-
- // Calls usually take a long time, so they make the specialization gain
- // smaller.
- Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
-
- return Bonus;
-}
// ConstantFunctionBonus - Figure out how much of a bonus we can get for
// possibly devirtualizing a function. We'll subtract the size of the function
@@ -522,6 +444,15 @@ int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
}
+ const DenseMap<std::pair<unsigned, unsigned>, unsigned> &ArgPairWeights
+ = CalleeFI->PointerArgPairWeights;
+ for (DenseMap<std::pair<unsigned, unsigned>, unsigned>::const_iterator I
+ = ArgPairWeights.begin(), E = ArgPairWeights.end();
+ I != E; ++I)
+ if (CS.getArgument(I->first.first)->stripInBoundsConstantOffsets() ==
+ CS.getArgument(I->first.second)->stripInBoundsConstantOffsets())
+ InlineCost -= I->second;
+
// Each argument passed in has a cost at both the caller and the callee
// sides. Measurements show that each argument costs about the same as an
// instruction.
@@ -589,22 +520,18 @@ int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
// getInlineCost - The heuristic used to determine if we should inline the
// function call or not.
//
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
- SmallPtrSet<const Function*, 16> &NeverInline) {
- return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS) {
+ return getInlineCost(CS, CS.getCalledFunction());
}
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
- Function *Callee,
- SmallPtrSet<const Function*, 16> &NeverInline) {
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee) {
Instruction *TheCall = CS.getInstruction();
Function *Caller = TheCall->getParent()->getParent();
// Don't inline functions which can be redefined at link-time to mean
// something else. Don't inline functions marked noinline or call sites
// marked noinline.
- if (Callee->mayBeOverridden() ||
- Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
+ if (Callee->mayBeOverridden() || Callee->hasFnAttr(Attribute::NoInline) ||
CS.isNoInline())
return llvm::InlineCost::getNever();
@@ -655,38 +582,6 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
return llvm::InlineCost::get(InlineCost);
}
-// getSpecializationCost - The heuristic used to determine the code-size
-// impact of creating a specialized version of Callee with argument
-// SpecializedArgNo replaced by a constant.
-InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
- SmallVectorImpl<unsigned> &SpecializedArgNos)
-{
- // Don't specialize functions which can be redefined at link-time to mean
- // something else.
- if (Callee->mayBeOverridden())
- return llvm::InlineCost::getNever();
-
- // Get information about the callee.
- FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
- // If we haven't calculated this information yet, do so now.
- if (CalleeFI->Metrics.NumBlocks == 0)
- CalleeFI->analyzeFunction(Callee, TD);
-
- int Cost = 0;
-
- // Look at the original size of the callee. Each instruction counts as 5.
- Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
-
- // Offset that with the amount of code that can be constant-folded
- // away with the given arguments replaced by constants.
- for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
- ae = SpecializedArgNos.end(); an != ae; ++an)
- Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
-
- return llvm::InlineCost::get(Cost);
-}
-
// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
// higher threshold to determine if the function call should be inlined.
float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 370ab96..72e33d1 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -18,6 +18,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "instsimplify"
+#include "llvm/GlobalAlias.h"
#include "llvm/Operator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -26,6 +27,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetData.h"
@@ -38,21 +40,23 @@ STATISTIC(NumExpand, "Number of expansions");
STATISTIC(NumFactor , "Number of factorizations");
STATISTIC(NumReassoc, "Number of reassociations");
-static Value *SimplifyAndInst(Value *, Value *, const TargetData *,
- const TargetLibraryInfo *, const DominatorTree *,
- unsigned);
-static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *,
- const TargetLibraryInfo *, const DominatorTree *,
+struct Query {
+ const TargetData *TD;
+ const TargetLibraryInfo *TLI;
+ const DominatorTree *DT;
+
+ Query(const TargetData *td, const TargetLibraryInfo *tli,
+ const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {};
+};
+
+static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &,
unsigned);
-static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *,
- const TargetLibraryInfo *, const DominatorTree *,
- unsigned);
-static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
- const TargetLibraryInfo *, const DominatorTree *,
- unsigned);
-static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
- const TargetLibraryInfo *, const DominatorTree *,
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
unsigned);
+static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned);
/// getFalse - For a boolean type, or a vector of boolean type, return false, or
/// a vector with every element false, as appropriate for the type.
@@ -91,10 +95,20 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
// Arguments and constants dominate all instructions.
return true;
+ // If we are processing instructions (and/or basic blocks) that have not been
+ // fully added to a function, the parent nodes may still be null. Simply
+ // return the conservative answer in these cases.
+ if (!I->getParent() || !P->getParent() || !I->getParent()->getParent())
+ return false;
+
// If we have a DominatorTree then do a precise test.
- if (DT)
- return !DT->isReachableFromEntry(P->getParent()) ||
- !DT->isReachableFromEntry(I->getParent()) || DT->dominates(I, P);
+ if (DT) {
+ if (!DT->isReachableFromEntry(P->getParent()))
+ return true;
+ if (!DT->isReachableFromEntry(I->getParent()))
+ return false;
+ return DT->dominates(I, P);
+ }
// Otherwise, if the instruction is in the entry block, and is not an invoke,
// then it obviously dominates all phi nodes.
@@ -111,8 +125,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
/// Returns the simplified value, or null if no simplification was performed.
static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- unsigned OpcToExpand, const TargetData *TD,
- const TargetLibraryInfo *TLI, const DominatorTree *DT,
+ unsigned OpcToExpand, const Query &Q,
unsigned MaxRecurse) {
Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -125,8 +138,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
// It does! Try turning it into "(A op C) op' (B op C)".
Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
// Do "A op C" and "B op C" both simplify?
- if (Value *L = SimplifyBinOp(Opcode, A, C, TD, TLI, DT, MaxRecurse))
- if (Value *R = SimplifyBinOp(Opcode, B, C, TD, TLI, DT, MaxRecurse)) {
+ if (Value *L = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
// They do! Return "L op' R" if it simplifies or is already available.
// If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
@@ -135,8 +148,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return LHS;
}
// Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, TLI, DT,
- MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
++NumExpand;
return V;
}
@@ -149,8 +161,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
// It does! Try turning it into "(A op B) op' (A op C)".
Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
// Do "A op B" and "A op C" both simplify?
- if (Value *L = SimplifyBinOp(Opcode, A, B, TD, TLI, DT, MaxRecurse))
- if (Value *R = SimplifyBinOp(Opcode, A, C, TD, TLI, DT, MaxRecurse)) {
+ if (Value *L = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) {
// They do! Return "L op' R" if it simplifies or is already available.
// If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
@@ -159,8 +171,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return RHS;
}
// Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, TLI, DT,
- MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
++NumExpand;
return V;
}
@@ -175,9 +186,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
/// Returns the simplified value, or null if no simplification was performed.
static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- unsigned OpcToExtract, const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
+ unsigned OpcToExtract, const Query &Q,
unsigned MaxRecurse) {
Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -202,7 +211,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
Value *DD = A == C ? D : C;
// Form "A op' (B op DD)" if it simplifies completely.
// Does "B op DD" simplify?
- if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, TLI, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, B, DD, Q, MaxRecurse)) {
// It does! Return "A op' V" if it simplifies or is already available.
// If V equals B then "A op' V" is just the LHS. If V equals DD then
// "A op' V" is just the RHS.
@@ -211,8 +220,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return V == B ? LHS : RHS;
}
// Otherwise return "A op' V" if it simplifies.
- if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, TLI, DT,
- MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, Q, MaxRecurse)) {
++NumFactor;
return W;
}
@@ -226,7 +234,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
Value *CC = B == D ? C : D;
// Form "(A op CC) op' B" if it simplifies completely..
// Does "A op CC" simplify?
- if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, TLI, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, A, CC, Q, MaxRecurse)) {
// It does! Return "V op' B" if it simplifies or is already available.
// If V equals A then "V op' B" is just the LHS. If V equals CC then
// "V op' B" is just the RHS.
@@ -235,8 +243,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return V == A ? LHS : RHS;
}
// Otherwise return "V op' B" if it simplifies.
- if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, TLI, DT,
- MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, Q, MaxRecurse)) {
++NumFactor;
return W;
}
@@ -249,10 +256,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
/// operations. Returns the simpler value, or null if none was found.
static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
@@ -270,12 +274,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
Value *C = RHS;
// Does "B op C" simplify?
- if (Value *V = SimplifyBinOp(Opcode, B, C, TD, TLI, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
// It does! Return "A op V" if it simplifies or is already available.
// If V equals B then "A op V" is just the LHS.
if (V == B) return LHS;
// Otherwise return "A op V" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, A, V, TD, TLI, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -289,12 +293,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
Value *C = Op1->getOperand(1);
// Does "A op B" simplify?
- if (Value *V = SimplifyBinOp(Opcode, A, B, TD, TLI, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) {
// It does! Return "V op C" if it simplifies or is already available.
// If V equals B then "V op C" is just the RHS.
if (V == B) return RHS;
// Otherwise return "V op C" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, V, C, TD, TLI, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -312,12 +316,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
Value *C = RHS;
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, TD, TLI, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
// It does! Return "V op B" if it simplifies or is already available.
// If V equals A then "V op B" is just the LHS.
if (V == A) return LHS;
// Otherwise return "V op B" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, V, B, TD, TLI, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -331,12 +335,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
Value *C = Op1->getOperand(1);
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, TD, TLI, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
// It does! Return "B op V" if it simplifies or is already available.
// If V equals C then "B op V" is just the RHS.
if (V == C) return RHS;
// Otherwise return "B op V" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, B, V, TD, TLI, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -351,10 +355,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
/// evaluating it on both branches of the select results in the same value.
/// Returns the common value if so, otherwise returns null.
static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return 0;
@@ -371,11 +372,11 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
Value *TV;
Value *FV;
if (SI == LHS) {
- TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, TLI, DT, MaxRecurse);
- FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, TLI, DT, MaxRecurse);
+ TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse);
} else {
- TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, TLI, DT, MaxRecurse);
- FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, TLI, DT, MaxRecurse);
+ TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse);
}
// If they simplified to the same value, then return the common value.
@@ -426,9 +427,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
/// result in the same value. Returns the common value if so, otherwise returns
/// null.
static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
- Value *RHS, const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
+ Value *RHS, const Query &Q,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
@@ -447,7 +446,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
// Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
// Does "cmp TV, RHS" simplify?
- Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, TD, TLI, DT, MaxRecurse);
+ Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse);
if (TCmp == Cond) {
// It not only simplified, it simplified to the select condition. Replace
// it with 'true'.
@@ -461,7 +460,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
}
// Does "cmp FV, RHS" simplify?
- Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, TD, TLI, DT, MaxRecurse);
+ Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse);
if (FCmp == Cond) {
// It not only simplified, it simplified to the select condition. Replace
// it with 'false'.
@@ -487,19 +486,19 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
// is equal to "Cond && TCmp". This also catches the case when the false
// value simplified to false and the true value to true, returning "Cond".
if (match(FCmp, m_Zero()))
- if (Value *V = SimplifyAndInst(Cond, TCmp, TD, TLI, DT, MaxRecurse))
+ if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse))
return V;
// If the true value simplified to true, then the result of the compare
// is equal to "Cond || FCmp".
if (match(TCmp, m_One()))
- if (Value *V = SimplifyOrInst(Cond, FCmp, TD, TLI, DT, MaxRecurse))
+ if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse))
return V;
// Finally, if the false value simplified to true and the true value to
// false, then the result of the compare is equal to "!Cond".
if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
if (Value *V =
SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
- TD, TLI, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
return 0;
@@ -510,10 +509,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
/// it on the incoming phi values yields the same result for every value. If so
/// returns the common value, otherwise returns null.
static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return 0;
@@ -522,13 +518,13 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
if (isa<PHINode>(LHS)) {
PI = cast<PHINode>(LHS);
// Bail out if RHS and the phi may be mutually interdependent due to a loop.
- if (!ValueDominatesPHI(RHS, PI, DT))
+ if (!ValueDominatesPHI(RHS, PI, Q.DT))
return 0;
} else {
assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
PI = cast<PHINode>(RHS);
// Bail out if LHS and the phi may be mutually interdependent due to a loop.
- if (!ValueDominatesPHI(LHS, PI, DT))
+ if (!ValueDominatesPHI(LHS, PI, Q.DT))
return 0;
}
@@ -539,8 +535,8 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PI) continue;
Value *V = PI == LHS ?
- SimplifyBinOp(Opcode, Incoming, RHS, TD, TLI, DT, MaxRecurse) :
- SimplifyBinOp(Opcode, LHS, Incoming, TD, TLI, DT, MaxRecurse);
+ SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) :
+ SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse);
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
if (!V || (CommonValue && V != CommonValue))
@@ -556,10 +552,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
/// incoming phi values yields the same result every time. If so returns the
/// common result, otherwise returns null.
static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return 0;
@@ -573,7 +566,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
PHINode *PI = cast<PHINode>(LHS);
// Bail out if RHS and the phi may be mutually interdependent due to a loop.
- if (!ValueDominatesPHI(RHS, PI, DT))
+ if (!ValueDominatesPHI(RHS, PI, Q.DT))
return 0;
// Evaluate the BinOp on the incoming phi values.
@@ -582,7 +575,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
Value *Incoming = PI->getIncomingValue(i);
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PI) continue;
- Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, TLI, DT, MaxRecurse);
+ Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse);
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
if (!V || (CommonValue && V != CommonValue))
@@ -596,15 +589,12 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
/// SimplifyAddInst - Given operands for an Add, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
- return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
- Ops, TD, TLI);
+ return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), Ops,
+ Q.TD, Q.TLI);
}
// Canonicalize the constant to the RHS.
@@ -634,17 +624,17 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
/// i1 add -> xor.
if (MaxRecurse && Op0->getType()->isIntegerTy(1))
- if (Value *V = SimplifyXorInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
return V;
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, TLI, DT,
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q,
MaxRecurse))
return V;
// Mul distributes over Add. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
- TD, TLI, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// Threading Add over selects and phi nodes is pointless, so don't bother.
@@ -662,21 +652,114 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const TargetData *TD, const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
+/// \brief Accumulate the constant integer offset a GEP represents.
+///
+/// Given a getelementptr instruction/constantexpr, accumulate the constant
+/// offset from the base pointer into the provided APInt 'Offset'. Returns true
+/// if the GEP has all-constant indices. Returns false if any non-constant
+/// index is encountered leaving the 'Offset' in an undefined state. The
+/// 'Offset' APInt must be the bitwidth of the target's pointer size.
+static bool accumulateGEPOffset(const TargetData &TD, GEPOperator *GEP,
+ APInt &Offset) {
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ assert(IntPtrWidth == Offset.getBitWidth());
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E;
+ ++I, ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(*I);
+ if (!OpC) return false;
+ if (OpC->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ unsigned ElementIdx = OpC->getZExtValue();
+ const StructLayout *SL = TD.getStructLayout(STy);
+ Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
+ continue;
+ }
+
+ APInt TypeSize(IntPtrWidth, TD.getTypeAllocSize(GTI.getIndexedType()));
+ Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
+ }
+ return true;
+}
+
+/// \brief Compute the base pointer and cumulative constant offsets for V.
+///
+/// This strips all constant offsets off of V, leaving it the base pointer, and
+/// accumulates the total constant offset applied in the returned constant. It
+/// returns 0 if V is not a pointer, and returns the constant '0' if there are
+/// no constant offsets applied.
+static Constant *stripAndComputeConstantOffsets(const TargetData &TD,
+ Value *&V) {
+ if (!V->getType()->isPointerTy())
+ return 0;
+
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ APInt Offset = APInt::getNullValue(IntPtrWidth);
+
+ // Even though we don't look through PHI nodes, we could be called on an
+ // instruction in an unreachable block, which may be on a cycle.
+ SmallPtrSet<Value *, 4> Visited;
+ Visited.insert(V);
+ do {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ if (!accumulateGEPOffset(TD, GEP, Offset))
+ break;
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ break;
+ V = GA->getAliasee();
+ } else {
+ break;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ } while (Visited.insert(V));
+
+ Type *IntPtrTy = TD.getIntPtrType(V->getContext());
+ return ConstantInt::get(IntPtrTy, Offset);
+}
+
+/// \brief Compute the constant difference between two pointer values.
+/// If the difference is not a constant, returns zero.
+static Constant *computePointerDifference(const TargetData &TD,
+ Value *LHS, Value *RHS) {
+ Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS);
+ if (!LHSOffset)
+ return 0;
+ Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS);
+ if (!RHSOffset)
+ return 0;
+
+ // If LHS and RHS are not related via constant offsets to the same base
+ // value, there is nothing we can do here.
+ if (LHS != RHS)
+ return 0;
+
+ // Otherwise, the difference of LHS - RHS can be computed as:
+ // LHS - RHS
+ // = (LHSOffset + Base) - (RHSOffset + Base)
+ // = LHSOffset - RHSOffset
+ return ConstantExpr::getSub(LHSOffset, RHSOffset);
}
/// SimplifySubInst - Given operands for a Sub, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0))
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
- Ops, TD, TLI);
+ Ops, Q.TD, Q.TLI);
}
// X - undef -> undef
@@ -704,19 +787,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *Y = 0, *Z = Op1;
if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
// See if "V === Y - Z" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, TLI, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1))
// It does! Now see if "X + V" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, TLI, DT,
- MaxRecurse-1)) {
+ if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
// See if "V === X - Z" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, TLI, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
// It does! Now see if "Y + V" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, TLI, DT,
- MaxRecurse-1)) {
+ if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
@@ -728,19 +809,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
X = Op0;
if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
// See if "V === X - Y" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, TLI, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
// It does! Now see if "V - Z" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, TLI, DT,
- MaxRecurse-1)) {
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
// See if "V === X - Z" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, TLI, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
// It does! Now see if "V - Y" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, TLI, DT,
- MaxRecurse-1)) {
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
@@ -752,23 +831,39 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Z = Op0;
if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
// See if "V === Z - X" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, TLI, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1))
// It does! Now see if "V + Y" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, TLI, DT,
- MaxRecurse-1)) {
+ if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
+ // trunc(X) - trunc(Y) -> trunc(X - Y) if everything simplifies.
+ if (MaxRecurse && match(Op0, m_Trunc(m_Value(X))) &&
+ match(Op1, m_Trunc(m_Value(Y))))
+ if (X->getType() == Y->getType())
+ // See if "V === X - Y" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
+ // It does! Now see if "trunc V" simplifies.
+ if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1))
+ // It does, return the simplified "trunc V".
+ return W;
+
+ // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...).
+ if (Q.TD && match(Op0, m_PtrToInt(m_Value(X))) &&
+ match(Op1, m_PtrToInt(m_Value(Y))))
+ if (Constant *Result = computePointerDifference(*Q.TD, X, Y))
+ return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
+
// Mul distributes over Sub. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
- TD, TLI, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// i1 sub -> xor.
if (MaxRecurse && Op0->getType()->isIntegerTy(1))
- if (Value *V = SimplifyXorInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
return V;
// Threading Sub over selects and phi nodes is pointless, so don't bother.
@@ -784,22 +879,21 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
}
Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
+ const TargetData *TD, const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit);
+ return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+ RecursionLimit);
}
/// SimplifyMulInst - Given operands for a Mul, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
- Ops, TD, TLI);
+ Ops, Q.TD, Q.TLI);
}
// Canonicalize the constant to the RHS.
@@ -826,30 +920,30 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
// i1 mul -> and.
if (MaxRecurse && Op0->getType()->isIntegerTy(1))
- if (Value *V = SimplifyAndInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1))
+ if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1))
return V;
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, TLI, DT,
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q,
MaxRecurse))
return V;
// Mul distributes over Add. Try some generic simplifications based on this.
if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
- TD, TLI, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, TLI, DT,
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q,
MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, TLI, DT,
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q,
MaxRecurse))
return V;
@@ -859,18 +953,17 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyMulInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyMulInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
- const TargetData *TD, const TargetLibraryInfo *TLI,
- const DominatorTree *DT, unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
if (Constant *C1 = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { C0, C1 };
- return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI);
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
}
}
@@ -923,15 +1016,13 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT,
- MaxRecurse))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT,
- MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
@@ -939,11 +1030,9 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
/// SimplifySDivInst - Given operands for an SDiv, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT, unsigned MaxRecurse) {
- if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, TLI, DT,
- MaxRecurse))
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
@@ -952,16 +1041,14 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifySDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+ return ::SimplifySDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyUDivInst - Given operands for a UDiv, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT, unsigned MaxRecurse) {
- if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, TLI, DT,
- MaxRecurse))
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
@@ -970,12 +1057,11 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyUDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyUDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
-static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
- const TargetLibraryInfo *,
- const DominatorTree *, unsigned) {
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned) {
// undef / X -> undef (the undef could be a snan).
if (match(Op0, m_Undef()))
return Op0;
@@ -990,18 +1076,17 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyFDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyFDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyRem - Given operands for an SRem or URem, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
- const TargetData *TD, const TargetLibraryInfo *TLI,
- const DominatorTree *DT, unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
if (Constant *C1 = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { C0, C1 };
- return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI);
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
}
}
@@ -1036,13 +1121,13 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
@@ -1050,11 +1135,9 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
/// SimplifySRemInst - Given operands for an SRem, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
- if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, TLI, DT, MaxRecurse))
+static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
@@ -1063,16 +1146,14 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifySRemInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+ return ::SimplifySRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyURemInst - Given operands for a URem, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
+static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
- if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, TLI, DT, MaxRecurse))
+ if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
@@ -1081,12 +1162,10 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyURemInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyURemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
-static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
- const TargetLibraryInfo *,
- const DominatorTree *,
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
unsigned) {
// undef % X -> undef (the undef could be a snan).
if (match(Op0, m_Undef()))
@@ -1102,18 +1181,17 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyFRemInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyFRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
- const TargetData *TD, const TargetLibraryInfo *TLI,
- const DominatorTree *DT, unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
if (Constant *C1 = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { C0, C1 };
- return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI);
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
}
}
@@ -1138,13 +1216,13 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
@@ -1153,10 +1231,8 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
/// SimplifyShlInst - Given operands for an Shl, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT, unsigned MaxRecurse) {
- if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, TLI, DT, MaxRecurse))
+ const Query &Q, unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse))
return V;
// undef << X -> 0
@@ -1173,17 +1249,15 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const TargetData *TD, const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+ RecursionLimit);
}
/// SimplifyLShrInst - Given operands for an LShr, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
- if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, TLI, DT, MaxRecurse))
+ const Query &Q, unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse))
return V;
// undef >>l X -> 0
@@ -1203,17 +1277,15 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyLShrInst(Op0, Op1, isExact, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyLShrInst(Op0, Op1, isExact, Query (TD, TLI, DT),
+ RecursionLimit);
}
/// SimplifyAShrInst - Given operands for an AShr, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
- if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, TLI, DT, MaxRecurse))
+ const Query &Q, unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse))
return V;
// all ones >>a X -> all ones
@@ -1237,20 +1309,19 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyAShrInst(Op0, Op1, isExact, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyAShrInst(Op0, Op1, isExact, Query (TD, TLI, DT),
+ RecursionLimit);
}
/// SimplifyAndInst - Given operands for an And, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
- Ops, TD, TLI);
+ Ops, Q.TD, Q.TLI);
}
// Canonicalize the constant to the RHS.
@@ -1292,43 +1363,43 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
// A & (-A) = A if A is a power of two or zero.
if (match(Op0, m_Neg(m_Specific(Op1))) ||
match(Op1, m_Neg(m_Specific(Op0)))) {
- if (isPowerOfTwo(Op0, TD, /*OrZero*/true))
+ if (isPowerOfTwo(Op0, Q.TD, /*OrZero*/true))
return Op0;
- if (isPowerOfTwo(Op1, TD, /*OrZero*/true))
+ if (isPowerOfTwo(Op1, Q.TD, /*OrZero*/true))
return Op1;
}
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, TLI,
- DT, MaxRecurse))
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q,
+ MaxRecurse))
return V;
// And distributes over Or. Try some generic simplifications based on this.
if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
- TD, TLI, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// And distributes over Xor. Try some generic simplifications based on this.
if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
- TD, TLI, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// Or distributes over And. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
- TD, TLI, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, TLI,
- DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q,
+ MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, TLI, DT,
+ if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q,
MaxRecurse))
return V;
@@ -1338,19 +1409,18 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyAndInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyAndInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyOrInst - Given operands for an Or, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
- Ops, TD, TLI);
+ Ops, Q.TD, Q.TLI);
}
// Canonicalize the constant to the RHS.
@@ -1400,32 +1470,31 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
return Constant::getAllOnesValue(Op0->getType());
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, TLI,
- DT, MaxRecurse))
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
+ MaxRecurse))
return V;
// Or distributes over And. Try some generic simplifications based on this.
- if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, TD,
- TLI, DT, MaxRecurse))
+ if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, Q,
+ MaxRecurse))
return V;
// And distributes over Or. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
- TD, TLI, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, TLI, DT,
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q,
MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, TLI, DT,
- MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
@@ -1434,19 +1503,18 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyOrInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyOrInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyXorInst - Given operands for a Xor, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
- Ops, TD, TLI);
+ Ops, Q.TD, Q.TLI);
}
// Canonicalize the constant to the RHS.
@@ -1471,13 +1539,13 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
return Constant::getAllOnesValue(Op0->getType());
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, TLI,
- DT, MaxRecurse))
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q,
+ MaxRecurse))
return V;
// And distributes over Xor. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
- TD, TLI, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// Threading Xor over selects and phi nodes is pointless, so don't bother.
@@ -1495,7 +1563,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyXorInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyXorInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
static Type *GetCompareTy(Value *Op) {
@@ -1522,42 +1590,17 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
return 0;
}
-/// stripPointerAdjustments - This is like Value::stripPointerCasts, but also
-/// removes inbounds gep operations, regardless of their indices.
-static Value *stripPointerAdjustmentsImpl(Value *V,
- SmallPtrSet<GEPOperator*, 8> &VisitedGEPs) {
- GEPOperator *GEP = dyn_cast<GEPOperator>(V);
- if (GEP == 0 || !GEP->isInBounds())
- return V;
-
- // If we've already seen this GEP, we will end up infinitely looping. This
- // can happen in unreachable code.
- if (!VisitedGEPs.insert(GEP))
- return V;
-
- return stripPointerAdjustmentsImpl(GEP->getOperand(0)->stripPointerCasts(),
- VisitedGEPs);
-}
-
-static Value *stripPointerAdjustments(Value *V) {
- SmallPtrSet<GEPOperator*, 8> VisitedGEPs;
- return stripPointerAdjustmentsImpl(V, VisitedGEPs);
-}
-
/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD, TLI);
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI);
// If we have a constant, make sure it is on the RHS.
std::swap(LHS, RHS);
@@ -1625,29 +1668,39 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Be more aggressive about stripping pointer adjustments when checking a
// comparison of an alloca address to another object. We can rip off all
// inbounds GEP operations, even if they are variable.
- LHSPtr = stripPointerAdjustments(LHSPtr);
+ LHSPtr = LHSPtr->stripInBoundsOffsets();
if (llvm::isIdentifiedObject(LHSPtr)) {
- RHSPtr = stripPointerAdjustments(RHSPtr);
+ RHSPtr = RHSPtr->stripInBoundsOffsets();
if (llvm::isKnownNonNull(LHSPtr) || llvm::isKnownNonNull(RHSPtr)) {
// If both sides are different identified objects, they aren't equal
// unless they're null.
- if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr))
- return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+ if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr) &&
+ Pred == CmpInst::ICMP_EQ)
+ return ConstantInt::get(ITy, false);
// A local identified object (alloca or noalias call) can't equal any
// incoming argument, unless they're both null.
- if (isa<Instruction>(LHSPtr) && isa<Argument>(RHSPtr))
- return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+ if (isa<Instruction>(LHSPtr) && isa<Argument>(RHSPtr) &&
+ Pred == CmpInst::ICMP_EQ)
+ return ConstantInt::get(ITy, false);
}
// Assume that the constant null is on the right.
- if (llvm::isKnownNonNull(LHSPtr) && isa<ConstantPointerNull>(RHSPtr))
- return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+ if (llvm::isKnownNonNull(LHSPtr) && isa<ConstantPointerNull>(RHSPtr)) {
+ if (Pred == CmpInst::ICMP_EQ)
+ return ConstantInt::get(ITy, false);
+ else if (Pred == CmpInst::ICMP_NE)
+ return ConstantInt::get(ITy, true);
+ }
} else if (isa<Argument>(LHSPtr)) {
- RHSPtr = stripPointerAdjustments(RHSPtr);
+ RHSPtr = RHSPtr->stripInBoundsOffsets();
// An alloca can't be equal to an argument.
- if (isa<AllocaInst>(RHSPtr))
- return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+ if (isa<AllocaInst>(RHSPtr)) {
+ if (Pred == CmpInst::ICMP_EQ)
+ return ConstantInt::get(ITy, false);
+ else if (Pred == CmpInst::ICMP_NE)
+ return ConstantInt::get(ITy, true);
+ }
}
// If we are comparing with zero then try hard since this is a common case.
@@ -1661,40 +1714,40 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULE:
- if (isKnownNonZero(LHS, TD))
+ if (isKnownNonZero(LHS, Q.TD))
return getFalse(ITy);
break;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
- if (isKnownNonZero(LHS, TD))
+ if (isKnownNonZero(LHS, Q.TD))
return getTrue(ITy);
break;
case ICmpInst::ICMP_SLT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
if (LHSKnownNegative)
return getTrue(ITy);
if (LHSKnownNonNegative)
return getFalse(ITy);
break;
case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
if (LHSKnownNegative)
return getTrue(ITy);
- if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD))
return getFalse(ITy);
break;
case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
if (LHSKnownNegative)
return getFalse(ITy);
if (LHSKnownNonNegative)
return getTrue(ITy);
break;
case ICmpInst::ICMP_SGT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
if (LHSKnownNegative)
return getFalse(ITy);
- if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD))
return getTrue(ITy);
break;
}
@@ -1777,19 +1830,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
// if the integer type is the same size as the pointer type.
- if (MaxRecurse && TD && isa<PtrToIntInst>(LI) &&
- TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+ if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) &&
+ Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
// Transfer the cast to the constant.
if (Value *V = SimplifyICmpInst(Pred, SrcOp,
ConstantExpr::getIntToPtr(RHSC, SrcTy),
- TD, TLI, DT, MaxRecurse-1))
+ Q, MaxRecurse-1))
return V;
} else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
if (RI->getOperand(0)->getType() == SrcTy)
// Compare without the cast.
if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
- TD, TLI, DT, MaxRecurse-1))
+ Q, MaxRecurse-1))
return V;
}
}
@@ -1801,7 +1854,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
// Compare X and Y. Note that signed predicates become unsigned.
if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
- SrcOp, RI->getOperand(0), TD, TLI, DT,
+ SrcOp, RI->getOperand(0), Q,
MaxRecurse-1))
return V;
}
@@ -1817,7 +1870,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// also a case of comparing two zero-extended values.
if (RExt == CI && MaxRecurse)
if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
- SrcOp, Trunc, TD, TLI, DT, MaxRecurse-1))
+ SrcOp, Trunc, Q, MaxRecurse-1))
return V;
// Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
@@ -1861,7 +1914,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
// Compare X and Y. Note that the predicate does not change.
if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
- TD, TLI, DT, MaxRecurse-1))
+ Q, MaxRecurse-1))
return V;
}
// Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
@@ -1875,8 +1928,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If the re-extended constant didn't change then this is effectively
// also a case of comparing two sign-extended values.
if (RExt == CI && MaxRecurse)
- if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, TLI, DT,
- MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
return V;
// Otherwise the upper bits of LHS are all equal, while RHS has varying
@@ -1910,7 +1962,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (MaxRecurse)
if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
Constant::getNullValue(SrcTy),
- TD, TLI, DT, MaxRecurse-1))
+ Q, MaxRecurse-1))
return V;
break;
case ICmpInst::ICMP_ULT:
@@ -1919,7 +1971,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (MaxRecurse)
if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
Constant::getNullValue(SrcTy),
- TD, TLI, DT, MaxRecurse-1))
+ Q, MaxRecurse-1))
return V;
break;
}
@@ -1953,14 +2005,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if ((A == RHS || B == RHS) && NoLHSWrapProblem)
if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
Constant::getNullValue(RHS->getType()),
- TD, TLI, DT, MaxRecurse-1))
+ Q, MaxRecurse-1))
return V;
// icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
if ((C == LHS || D == LHS) && NoRHSWrapProblem)
if (Value *V = SimplifyICmpInst(Pred,
Constant::getNullValue(LHS->getType()),
- C == LHS ? D : C, TD, TLI, DT, MaxRecurse-1))
+ C == LHS ? D : C, Q, MaxRecurse-1))
return V;
// icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
@@ -1969,7 +2021,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Determine Y and Z in the form icmp (X+Y), (X+Z).
Value *Y = (A == C || A == D) ? B : A;
Value *Z = (C == A || C == B) ? D : C;
- if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, TLI, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1))
return V;
}
}
@@ -1981,7 +2033,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
if (!KnownNonNegative)
break;
// fall-through
@@ -1991,7 +2043,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getFalse(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
if (!KnownNonNegative)
break;
// fall-through
@@ -2008,7 +2060,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
if (!KnownNonNegative)
break;
// fall-through
@@ -2018,7 +2070,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
if (!KnownNonNegative)
break;
// fall-through
@@ -2052,7 +2104,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (!LBO->isExact() || !RBO->isExact())
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
- RBO->getOperand(0), TD, TLI, DT, MaxRecurse-1))
+ RBO->getOperand(0), Q, MaxRecurse-1))
return V;
break;
case Instruction::Shl: {
@@ -2063,7 +2115,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (!NSW && ICmpInst::isSigned(Pred))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
- RBO->getOperand(0), TD, TLI, DT, MaxRecurse-1))
+ RBO->getOperand(0), Q, MaxRecurse-1))
return V;
break;
}
@@ -2117,7 +2169,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A EqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(EqP, A, B, TD, TLI, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
return V;
break;
case CmpInst::ICMP_NE:
@@ -2131,7 +2183,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A InvEqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, TLI, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
return V;
break;
}
@@ -2187,7 +2239,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A EqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(EqP, A, B, TD, TLI, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
return V;
break;
case CmpInst::ICMP_NE:
@@ -2201,7 +2253,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A InvEqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, TLI, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
return V;
break;
}
@@ -2283,13 +2335,13 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
- if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
return V;
// If the comparison is with the result of a phi instruction, check whether
// doing the compare with each incoming phi value yields a common result.
if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
- if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
return V;
return 0;
@@ -2299,22 +2351,20 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+ RecursionLimit);
}
/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD, TLI);
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI);
// If we have a constant, make sure it is on the RHS.
std::swap(LHS, RHS);
@@ -2382,13 +2432,13 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
- if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
return V;
// If the comparison is with the result of a phi instruction, check whether
// doing the compare with each incoming phi value yields a common result.
if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
- if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
return V;
return 0;
@@ -2398,13 +2448,15 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const TargetData *TD,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+ RecursionLimit);
}
/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
/// the result. If not, this returns null.
-Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
- const TargetData *TD, const DominatorTree *) {
+static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
+ Value *FalseVal, const Query &Q,
+ unsigned MaxRecurse) {
// select true, X, Y -> X
// select false, X, Y -> Y
if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
@@ -2427,10 +2479,17 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
return 0;
}
+Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
/// fold the result. If not, this returns null.
-Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD,
- const DominatorTree *) {
+static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
// The type of the GEP pointer operand.
PointerType *PtrTy = dyn_cast<PointerType>(Ops[0]->getType());
// The GEP pointer operand is not a pointer, it's a vector of pointers.
@@ -2454,9 +2513,9 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD,
if (C->isZero())
return Ops[0];
// getelementptr P, N -> P if P points to a type of zero size.
- if (TD) {
+ if (Q.TD) {
Type *Ty = PtrTy->getElementType();
- if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0)
+ if (Ty->isSized() && Q.TD->getTypeAllocSize(Ty) == 0)
return Ops[0];
}
}
@@ -2469,12 +2528,17 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD,
return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
}
+Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyGEPInst(Ops, Query (TD, TLI, DT), RecursionLimit);
+}
+
/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
/// can fold the result. If not, this returns null.
-Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
- ArrayRef<unsigned> Idxs,
- const TargetData *,
- const DominatorTree *) {
+static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
+ ArrayRef<unsigned> Idxs, const Query &Q,
+ unsigned) {
if (Constant *CAgg = dyn_cast<Constant>(Agg))
if (Constant *CVal = dyn_cast<Constant>(Val))
return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs);
@@ -2499,8 +2563,17 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
return 0;
}
+Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
+ ArrayRef<unsigned> Idxs,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
/// SimplifyPHINode - See if we can fold the given phi. If not, returns null.
-static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
+static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
// If all of the PHI's incoming values are the same then replace the PHI node
// with the common value.
Value *CommonValue = 0;
@@ -2528,81 +2601,77 @@ static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
// instruction, we cannot return X as the result of the PHI node unless it
// dominates the PHI block.
if (HasUndefInput)
- return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0;
+ return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0;
return CommonValue;
}
+static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
+ if (Constant *C = dyn_cast<Constant>(Op))
+ return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.TD, Q.TLI);
+
+ return 0;
+}
+
+Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyTruncInst(Op, Ty, Query (TD, TLI, DT), RecursionLimit);
+}
+
//=== Helper functions for higher up the class hierarchy.
/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::Add:
return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
- TD, TLI, DT, MaxRecurse);
+ Q, MaxRecurse);
case Instruction::Sub:
return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
- TD, TLI, DT, MaxRecurse);
- case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, TLI, DT,
- MaxRecurse);
- case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, TLI, DT,
- MaxRecurse);
- case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, TLI, DT,
- MaxRecurse);
- case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, TLI, DT,
- MaxRecurse);
- case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, TLI, DT,
- MaxRecurse);
- case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, TLI, DT,
- MaxRecurse);
- case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, TLI, DT,
- MaxRecurse);
+ Q, MaxRecurse);
+ case Instruction::Mul: return SimplifyMulInst (LHS, RHS, Q, MaxRecurse);
+ case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse);
case Instruction::Shl:
return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
- TD, TLI, DT, MaxRecurse);
+ Q, MaxRecurse);
case Instruction::LShr:
- return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, TLI, DT,
- MaxRecurse);
+ return SimplifyLShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
case Instruction::AShr:
- return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, TLI, DT,
- MaxRecurse);
- case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, TLI, DT,
- MaxRecurse);
- case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, TLI, DT,
- MaxRecurse);
- case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, TLI, DT,
- MaxRecurse);
+ return SimplifyAShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
+ case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::Or: return SimplifyOrInst (LHS, RHS, Q, MaxRecurse);
+ case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
default:
if (Constant *CLHS = dyn_cast<Constant>(LHS))
if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
Constant *COps[] = {CLHS, CRHS};
- return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD, TLI);
+ return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.TD,
+ Q.TLI);
}
// If the operation is associative, try some generic simplifications.
if (Instruction::isAssociative(Opcode))
- if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, TLI, DT,
- MaxRecurse))
+ if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, Q, MaxRecurse))
return V;
- // If the operation is with the result of a select instruction, check whether
+ // If the operation is with the result of a select instruction check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
- if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, TLI, DT,
- MaxRecurse))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
- if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, TLI, DT,
- MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse))
return V;
return 0;
@@ -2612,28 +2681,26 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const TargetData *TD, const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyBinOp(Opcode, LHS, RHS, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyBinOp(Opcode, LHS, RHS, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
/// fold the result.
static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
- return SimplifyICmpInst(Predicate, LHS, RHS, TD, TLI, DT, MaxRecurse);
- return SimplifyFCmpInst(Predicate, LHS, RHS, TD, TLI, DT, MaxRecurse);
+ return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
+ return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
}
Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const TargetData *TD, const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit);
+ return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+ RecursionLimit);
}
-static Value *SimplifyCallInst(CallInst *CI) {
+static Value *SimplifyCallInst(CallInst *CI, const Query &) {
// call undef -> undef
if (isa<UndefValue>(CI->getCalledValue()))
return UndefValue::get(CI->getType());
@@ -2720,25 +2787,28 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
break;
case Instruction::Select:
Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
- I->getOperand(2), TD, DT);
+ I->getOperand(2), TD, TLI, DT);
break;
case Instruction::GetElementPtr: {
SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
- Result = SimplifyGEPInst(Ops, TD, DT);
+ Result = SimplifyGEPInst(Ops, TD, TLI, DT);
break;
}
case Instruction::InsertValue: {
InsertValueInst *IV = cast<InsertValueInst>(I);
Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
IV->getInsertedValueOperand(),
- IV->getIndices(), TD, DT);
+ IV->getIndices(), TD, TLI, DT);
break;
}
case Instruction::PHI:
- Result = SimplifyPHINode(cast<PHINode>(I), DT);
+ Result = SimplifyPHINode(cast<PHINode>(I), Query (TD, TLI, DT));
break;
case Instruction::Call:
- Result = SimplifyCallInst(cast<CallInst>(I));
+ Result = SimplifyCallInst(cast<CallInst>(I), Query (TD, TLI, DT));
+ break;
+ case Instruction::Trunc:
+ Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT);
break;
}
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 279d6a9..5ca2746 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -866,10 +866,11 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
// BBFrom to BBTo.
unsigned NumEdges = 0;
ConstantInt *EdgeVal = 0;
- for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) {
- if (SI->getCaseSuccessor(i) != BBTo) continue;
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ if (i.getCaseSuccessor() != BBTo) continue;
if (NumEdges++) break;
- EdgeVal = SI->getCaseValue(i);
+ EdgeVal = i.getCaseValue();
}
assert(EdgeVal && "Missing successor?");
if (NumEdges == 1) {
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 0e6bcbf..873a275 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -17,6 +17,7 @@
#include "llvm/GlobalAlias.h"
#include "llvm/GlobalVariable.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Operator.h"
using namespace llvm;
@@ -160,10 +161,15 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
/// MaxInstsToScan specifies the maximum instructions to scan in the block. If
/// it is set to 0, it will scan the whole block. You can also optionally
/// specify an alias analysis implementation, which makes this more precise.
+///
+/// If TBAATag is non-null and a load or store is found, the TBAA tag from the
+/// load or store is recorded there. If there is no TBAA tag or if no access
+/// is found, it is left unmodified.
Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan,
- AliasAnalysis *AA) {
+ AliasAnalysis *AA,
+ MDNode **TBAATag) {
if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
// If we're using alias analysis to disambiguate get the size of *Ptr.
@@ -191,15 +197,19 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// (This is true even if the load is volatile or atomic, although
// those cases are unlikely.)
if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- if (AreEquivalentAddressValues(LI->getOperand(0), Ptr))
+ if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) {
+ if (TBAATag) *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
return LI;
+ }
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If this is a store through Ptr, the value is available!
// (This is true even if the store is volatile or atomic, although
// those cases are unlikely.)
- if (AreEquivalentAddressValues(SI->getOperand(1), Ptr))
+ if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) {
+ if (TBAATag) *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa);
return SI->getOperand(0);
+ }
// If Ptr is an alloca and this is a store to a different alloca, ignore
// the store. This is a trivial form of alias analysis that is important
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index ca06300..38cb1c9 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -227,7 +227,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
return GEP;
// Simplify the GEP to handle 'gep x, 0' -> x etc.
- if (Value *V = SimplifyGEPInst(GEPOps, TD, DT)) {
+ if (Value *V = SimplifyGEPInst(GEPOps, TD, TLI, DT)) {
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
RemoveInstInputs(GEPOps[i], InstInputs);
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
index 0c7d05f..c819666 100644
--- a/lib/Analysis/SparsePropagation.cpp
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -194,9 +194,8 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
Succs.assign(TI.getNumSuccessors(), true);
return;
}
-
- unsigned CCase = SI.findCaseValue(cast<ConstantInt>(C));
- Succs[SI.resolveSuccessorIndex(CCase)] = true;
+ SwitchInst::CaseIt Case = SI.findCaseValue(cast<ConstantInt>(C));
+ Succs[Case.getSuccessorIndex()] = true;
}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index b5811f2..01e00ca 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -41,6 +41,160 @@ static unsigned getBitWidth(Type *Ty, const TargetData *TD) {
return TD ? TD->getPointerSizeInBits() : 0;
}
+static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
+ const APInt &Mask,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2,
+ const TargetData *TD, unsigned Depth) {
+ if (!Add) {
+ if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (!CLHS->getValue().isNegative()) {
+ unsigned BitWidth = Mask.getBitWidth();
+ unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ llvm::ComputeMaskedBits(Op1, MaskV, KnownZero2, KnownOne2, TD, Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+ }
+ }
+ }
+ }
+
+ unsigned BitWidth = Mask.getBitWidth();
+
+ // If one of the operands has trailing zeros, then the bits that the
+ // other operand has in those bit positions will be preserved in the
+ // result. For an add, this works with either operand. For a subtract,
+ // this only works if the known zeros are in the right operand.
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ APInt Mask2 = APInt::getLowBitsSet(BitWidth,
+ BitWidth - Mask.countLeadingZeros());
+ llvm::ComputeMaskedBits(Op0, Mask2, LHSKnownZero, LHSKnownOne, TD, Depth+1);
+ assert((LHSKnownZero & LHSKnownOne) == 0 &&
+ "Bits known to be one AND zero?");
+ unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
+
+ llvm::ComputeMaskedBits(Op1, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
+
+ // Determine which operand has more trailing zeros, and use that
+ // many bits from the other operand.
+ if (LHSKnownZeroOut > RHSKnownZeroOut) {
+ if (Add) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
+ KnownZero |= KnownZero2 & Mask;
+ KnownOne |= KnownOne2 & Mask;
+ } else {
+ // If the known zeros are in the left operand for a subtract,
+ // fall back to the minimum known zeros in both operands.
+ KnownZero |= APInt::getLowBitsSet(BitWidth,
+ std::min(LHSKnownZeroOut,
+ RHSKnownZeroOut));
+ }
+ } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
+ KnownZero |= LHSKnownZero & Mask;
+ KnownOne |= LHSKnownOne & Mask;
+ }
+
+ // Are we still trying to solve for the sign bit?
+ if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()) {
+ if (NSW) {
+ if (Add) {
+ // Adding two positive numbers can't wrap into negative
+ if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+ KnownZero |= APInt::getSignBit(BitWidth);
+ // and adding two negative numbers can't wrap into positive.
+ else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+ KnownOne |= APInt::getSignBit(BitWidth);
+ } else {
+ // Subtracting a negative number from a positive one can't wrap
+ if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
+ KnownZero |= APInt::getSignBit(BitWidth);
+ // neither can subtracting a positive number from a negative one.
+ else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
+ KnownOne |= APInt::getSignBit(BitWidth);
+ }
+ }
+ }
+}
+
+static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW,
+ const APInt &Mask,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2,
+ const TargetData *TD, unsigned Depth) {
+ unsigned BitWidth = Mask.getBitWidth();
+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op1, Mask2, KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(Op0, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ bool isKnownNegative = false;
+ bool isKnownNonNegative = false;
+ // If the multiplication is known not to overflow, compute the sign bit.
+ if (Mask.isNegative() && NSW) {
+ if (Op0 == Op1) {
+ // The product of a number with itself is non-negative.
+ isKnownNonNegative = true;
+ } else {
+ bool isKnownNonNegativeOp1 = KnownZero.isNegative();
+ bool isKnownNonNegativeOp0 = KnownZero2.isNegative();
+ bool isKnownNegativeOp1 = KnownOne.isNegative();
+ bool isKnownNegativeOp0 = KnownOne2.isNegative();
+ // The product of two numbers with the same sign is non-negative.
+ isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
+ (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
+ // The product of a negative number and a non-negative number is either
+ // negative or zero.
+ if (!isKnownNonNegative)
+ isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
+ isKnownNonZero(Op0, TD, Depth)) ||
+ (isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
+ isKnownNonZero(Op1, TD, Depth));
+ }
+ }
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clearAllBits();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero &= Mask;
+
+ // Only make use of no-wrap flags if we failed to compute the sign bit
+ // directly. This matters if the multiplication always overflows, in
+ // which case we prefer to follow the result of the direct computation,
+ // though as the program is invoking undefined behaviour we can choose
+ // whatever we like here.
+ if (isKnownNonNegative && !KnownOne.isNegative())
+ KnownZero.setBit(BitWidth - 1);
+ else if (isKnownNegative && !KnownZero.isNegative())
+ KnownOne.setBit(BitWidth - 1);
+}
+
/// ComputeMaskedBits - Determine which of the bits specified in Mask are
/// known to be either zero or one and return them in the KnownZero/KnownOne
/// bit sets. This code only analyzes bits in Mask, in order to short-circuit
@@ -106,16 +260,18 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
// The address of an aligned GlobalValue has trailing zeros.
if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
unsigned Align = GV->getAlignment();
- if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) {
+ if (Align == 0 && TD) {
if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
Type *ObjectType = GVar->getType()->getElementType();
- // If the object is defined in the current Module, we'll be giving
- // it the preferred alignment. Otherwise, we have to assume that it
- // may only have the minimum ABI alignment.
- if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
- Align = TD->getPreferredAlignment(GVar);
- else
- Align = TD->getABITypeAlignment(ObjectType);
+ if (ObjectType->isSized()) {
+ // If the object is defined in the current Module, we'll be giving
+ // it the preferred alignment. Otherwise, we have to assume that it
+ // may only have the minimum ABI alignment.
+ if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
+ Align = TD->getPreferredAlignment(GVar);
+ else
+ Align = TD->getABITypeAlignment(ObjectType);
+ }
}
}
if (Align > 0)
@@ -203,68 +359,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
return;
}
case Instruction::Mul: {
- APInt Mask2 = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1);
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
- Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- bool isKnownNegative = false;
- bool isKnownNonNegative = false;
- // If the multiplication is known not to overflow, compute the sign bit.
- if (Mask.isNegative() &&
- cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap()) {
- Value *Op1 = I->getOperand(1), *Op2 = I->getOperand(0);
- if (Op1 == Op2) {
- // The product of a number with itself is non-negative.
- isKnownNonNegative = true;
- } else {
- bool isKnownNonNegative1 = KnownZero.isNegative();
- bool isKnownNonNegative2 = KnownZero2.isNegative();
- bool isKnownNegative1 = KnownOne.isNegative();
- bool isKnownNegative2 = KnownOne2.isNegative();
- // The product of two numbers with the same sign is non-negative.
- isKnownNonNegative = (isKnownNegative1 && isKnownNegative2) ||
- (isKnownNonNegative1 && isKnownNonNegative2);
- // The product of a negative number and a non-negative number is either
- // negative or zero.
- if (!isKnownNonNegative)
- isKnownNegative = (isKnownNegative1 && isKnownNonNegative2 &&
- isKnownNonZero(Op2, TD, Depth)) ||
- (isKnownNegative2 && isKnownNonNegative1 &&
- isKnownNonZero(Op1, TD, Depth));
- }
- }
-
- // If low bits are zero in either operand, output low known-0 bits.
- // Also compute a conserative estimate for high known-0 bits.
- // More trickiness is possible, but this is sufficient for the
- // interesting case of alignment computation.
- KnownOne.clearAllBits();
- unsigned TrailZ = KnownZero.countTrailingOnes() +
- KnownZero2.countTrailingOnes();
- unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
- KnownZero2.countLeadingOnes(),
- BitWidth) - BitWidth;
-
- TrailZ = std::min(TrailZ, BitWidth);
- LeadZ = std::min(LeadZ, BitWidth);
- KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
- APInt::getHighBitsSet(BitWidth, LeadZ);
- KnownZero &= Mask;
-
- // Only make use of no-wrap flags if we failed to compute the sign bit
- // directly. This matters if the multiplication always overflows, in
- // which case we prefer to follow the result of the direct computation,
- // though as the program is invoking undefined behaviour we can choose
- // whatever we like here.
- if (isKnownNonNegative && !KnownOne.isNegative())
- KnownZero.setBit(BitWidth - 1);
- else if (isKnownNegative && !KnownZero.isNegative())
- KnownOne.setBit(BitWidth - 1);
-
- return;
+ bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW,
+ Mask, KnownZero, KnownOne, KnownZero2, KnownOne2,
+ TD, Depth);
+ break;
}
case Instruction::UDiv: {
// For the purposes of computing leading zeros we can conservatively
@@ -422,91 +521,18 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
}
break;
case Instruction::Sub: {
- if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) {
- // We know that the top bits of C-X are clear if X contains less bits
- // than C (i.e. no wrap-around can happen). For example, 20-X is
- // positive if we can prove that X is >= 0 and < 16.
- if (!CLHS->getValue().isNegative()) {
- unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
- // NLZ can't be BitWidth with no sign bit
- APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2,
- TD, Depth+1);
-
- // If all of the MaskV bits are known to be zero, then we know the
- // output top bits are zero, because we now know that the output is
- // from [0-C].
- if ((KnownZero2 & MaskV) == MaskV) {
- unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
- // Top bits known zero.
- KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
- }
- }
- }
+ bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
+ Mask, KnownZero, KnownOne, KnownZero2, KnownOne2,
+ TD, Depth);
+ break;
}
- // fall through
case Instruction::Add: {
- // If one of the operands has trailing zeros, then the bits that the
- // other operand has in those bit positions will be preserved in the
- // result. For an add, this works with either operand. For a subtract,
- // this only works if the known zeros are in the right operand.
- APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- APInt Mask2 = APInt::getLowBitsSet(BitWidth,
- BitWidth - Mask.countLeadingZeros());
- ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
- Depth+1);
- assert((LHSKnownZero & LHSKnownOne) == 0 &&
- "Bits known to be one AND zero?");
- unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
-
- ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD,
- Depth+1);
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
- unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
-
- // Determine which operand has more trailing zeros, and use that
- // many bits from the other operand.
- if (LHSKnownZeroOut > RHSKnownZeroOut) {
- if (I->getOpcode() == Instruction::Add) {
- APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
- KnownZero |= KnownZero2 & Mask;
- KnownOne |= KnownOne2 & Mask;
- } else {
- // If the known zeros are in the left operand for a subtract,
- // fall back to the minimum known zeros in both operands.
- KnownZero |= APInt::getLowBitsSet(BitWidth,
- std::min(LHSKnownZeroOut,
- RHSKnownZeroOut));
- }
- } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
- APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
- KnownZero |= LHSKnownZero & Mask;
- KnownOne |= LHSKnownOne & Mask;
- }
-
- // Are we still trying to solve for the sign bit?
- if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){
- OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(I);
- if (OBO->hasNoSignedWrap()) {
- if (I->getOpcode() == Instruction::Add) {
- // Adding two positive numbers can't wrap into negative
- if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
- KnownZero |= APInt::getSignBit(BitWidth);
- // and adding two negative numbers can't wrap into positive.
- else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
- KnownOne |= APInt::getSignBit(BitWidth);
- } else {
- // Subtracting a negative number from a positive one can't wrap
- if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
- KnownZero |= APInt::getSignBit(BitWidth);
- // neither can subtracting a positive number from a negative one.
- else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
- KnownOne |= APInt::getSignBit(BitWidth);
- }
- }
- }
-
- return;
+ bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
+ Mask, KnownZero, KnownOne, KnownZero2, KnownOne2,
+ TD, Depth);
+ break;
}
case Instruction::SRem:
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -691,8 +717,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
if (P->hasConstantValue() == P)
break;
- KnownZero = APInt::getAllOnesValue(BitWidth);
- KnownOne = APInt::getAllOnesValue(BitWidth);
+ KnownZero = Mask;
+ KnownOne = Mask;
for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) {
// Skip direct self references.
if (P->getIncomingValue(i) == P) continue;
@@ -723,21 +749,51 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
// If this call is undefined for 0, the result will be less than 2^n.
if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
LowBits -= 1;
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ KnownZero = Mask & APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
break;
}
case Intrinsic::ctpop: {
unsigned LowBits = Log2_32(BitWidth)+1;
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ KnownZero = Mask & APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
break;
}
case Intrinsic::x86_sse42_crc32_64_8:
case Intrinsic::x86_sse42_crc32_64_64:
- KnownZero = APInt::getHighBitsSet(64, 32);
+ KnownZero = Mask & APInt::getHighBitsSet(64, 32);
break;
}
}
break;
+ case Instruction::ExtractValue:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
+ ExtractValueInst *EVI = cast<ExtractValueInst>(I);
+ if (EVI->getNumIndices() != 1) break;
+ if (EVI->getIndices()[0] == 0) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ ComputeMaskedBitsAddSub(true, II->getArgOperand(0),
+ II->getArgOperand(1), false, Mask,
+ KnownZero, KnownOne, KnownZero2, KnownOne2,
+ TD, Depth);
+ break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ ComputeMaskedBitsAddSub(false, II->getArgOperand(0),
+ II->getArgOperand(1), false, Mask,
+ KnownZero, KnownOne, KnownZero2, KnownOne2,
+ TD, Depth);
+ break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1),
+ false, Mask, KnownZero, KnownOne,
+ KnownZero2, KnownOne2, TD, Depth);
+ break;
+ }
+ }
+ }
}
}
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 9376990..b25d2e9 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1142,9 +1142,10 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
Vals.push_back(VE.getTypeID(SI.getCondition()->getType()));
Vals.push_back(VE.getValueID(SI.getCondition()));
Vals.push_back(VE.getValueID(SI.getDefaultDest()));
- for (unsigned i = 0, e = SI.getNumCases(); i != e; ++i) {
- Vals.push_back(VE.getValueID(SI.getCaseValue(i)));
- Vals.push_back(VE.getValueID(SI.getCaseSuccessor(i)));
+ for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
+ Vals.push_back(VE.getValueID(i.getCaseValue()));
+ Vals.push_back(VE.getValueID(i.getCaseSuccessor()));
}
}
break;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d5926f9..dd3fb3b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1864,13 +1864,12 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
AsmPrinter &AP) {
- if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
- uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+ const TargetData *TD = AP.TM.getTargetData();
+ uint64_t Size = TD->getTypeAllocSize(CV->getType());
+ if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
return AP.OutStreamer.EmitZeros(Size, AddrSpace);
- }
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
- unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
switch (Size) {
case 1:
case 2:
@@ -1891,7 +1890,6 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
return EmitGlobalConstantFP(CFP, AddrSpace, AP);
if (isa<ConstantPointerNull>(CV)) {
- unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
return;
}
@@ -1905,20 +1903,28 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
- // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
- // vectors).
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV))
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
+ // vectors).
if (CE->getOpcode() == Instruction::BitCast)
return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP);
+
+ if (Size > 8) {
+ // If the constant expression's size is greater than 64-bits, then we have
+ // to emit the value in chunks. Try to constant fold the value and emit it
+ // that way.
+ Constant *New = ConstantFoldConstantExpression(CE, TD);
+ if (New && New != CE)
+ return EmitGlobalConstantImpl(New, AddrSpace, AP);
+ }
+ }
if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
return EmitGlobalConstantVector(V, AddrSpace, AP);
// Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
// thread the streamer with EmitValue.
- AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
- AP.TM.getTargetData()->getTypeAllocSize(CV->getType()),
- AddrSpace);
+ AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace);
}
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
@@ -2102,27 +2108,22 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
OutStreamer.EmitLabel(Syms[i]);
}
+ // Print some verbose block comments.
+ if (isVerbose()) {
+ if (const BasicBlock *BB = MBB->getBasicBlock())
+ if (BB->hasName())
+ OutStreamer.AddComment("%" + BB->getName());
+ EmitBasicBlockLoopComments(*MBB, LI, *this);
+ }
+
// Print the main label for the block.
if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
if (isVerbose() && OutStreamer.hasRawTextSupport()) {
- if (const BasicBlock *BB = MBB->getBasicBlock())
- if (BB->hasName())
- OutStreamer.AddComment("%" + BB->getName());
-
- EmitBasicBlockLoopComments(*MBB, LI, *this);
-
// NOTE: Want this comment at start of line, don't emit with AddComment.
OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
Twine(MBB->getNumber()) + ":");
}
} else {
- if (isVerbose()) {
- if (const BasicBlock *BB = MBB->getBasicBlock())
- if (BB->hasName())
- OutStreamer.AddComment("%" + BB->getName());
- EmitBasicBlockLoopComments(*MBB, LI, *this);
- }
-
OutStreamer.EmitLabel(MBB->getSymbol());
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 51c635e..3b383f6 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -156,13 +156,12 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
// Verify subprogram.
if (!SP.Verify())
return;
- // If the line number is 0, don't add it.
- if (SP.getLineNumber() == 0)
- return;
+ // If the line number is 0, don't add it.
unsigned Line = SP.getLineNumber();
- if (!SP.getContext().Verify())
+ if (Line == 0)
return;
+
unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(),
SP.getDirectory());
assert(FileID && "Invalid file id");
@@ -178,7 +177,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
return;
unsigned Line = Ty.getLineNumber();
- if (Line == 0 || !Ty.getContext().Verify())
+ if (Line == 0)
return;
unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(),
Ty.getDirectory());
@@ -870,11 +869,6 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (CTy.isAppleBlockExtension())
addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
- unsigned RLang = CTy.getRunTimeLang();
- if (RLang)
- addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
- dwarf::DW_FORM_data1, RLang);
-
DICompositeType ContainingType = CTy.getContainingType();
if (DIDescriptor(ContainingType).isCompositeType())
addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
@@ -922,6 +916,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add source line info if available.
if (!CTy.isForwardDecl())
addSourceLine(&Buffer, CTy);
+
+ // No harm in adding the runtime language to the declaration.
+ unsigned RLang = CTy.getRunTimeLang();
+ if (RLang)
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+ dwarf::DW_FORM_data1, RLang);
}
}
@@ -1006,6 +1006,9 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
// Add function template parameters.
addTemplateParams(*SPDie, SP.getTemplateParams());
+ // Unfortunately this code needs to stay here to work around
+ // a bug in older gdbs that requires the linkage name to resolve
+ // multiple template functions.
StringRef LinkageName = SP.getLinkageName();
if (!LinkageName.empty())
addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index fa62169..388cef4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -523,20 +523,19 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
DirName = "";
unsigned SrcId = SourceIdMap.size()+1;
- std::pair<std::string, std::string> SourceName =
- std::make_pair(FileName, DirName);
- std::pair<std::pair<std::string, std::string>, unsigned> Entry =
- make_pair(SourceName, SrcId);
- std::map<std::pair<std::string, std::string>, unsigned>::iterator I;
- bool NewlyInserted;
- llvm::tie(I, NewlyInserted) = SourceIdMap.insert(Entry);
- if (!NewlyInserted)
- return I->second;
+ // We look up the file/dir pair by concatenating them with a zero byte.
+ SmallString<128> NamePair;
+ NamePair += DirName;
+ NamePair += '\0'; // Zero bytes are not allowed in paths.
+ NamePair += FileName;
+
+ StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId);
+ if (Ent.getValue() != SrcId)
+ return Ent.getValue();
// Print out a .file directive to specify files for .loc directives.
- Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.first.second,
- Entry.first.first);
+ Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName);
return SrcId;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 8b802d2..83f30f5 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -26,7 +26,6 @@
#include "llvm/ADT/UniqueVector.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/DebugLoc.h"
-#include <map>
namespace llvm {
@@ -209,9 +208,9 @@ class DwarfDebug {
///
std::vector<DIEAbbrev *> Abbreviations;
- /// SourceIdMap - Source id map, i.e. pair of source filename and directory
- /// mapped to a unique id.
- std::map<std::pair<std::string, std::string>, unsigned> SourceIdMap;
+ /// SourceIdMap - Source id map, i.e. pair of source filename and directory,
+ /// separated by a zero byte, mapped to a unique id.
+ StringMap<unsigned> SourceIdMap;
/// StringPool - A String->Symbol mapping of strings used by indirect
/// references.
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 59c92b3..f57f4a8 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -1019,12 +1019,27 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
return MBB2I->isCall() && !MBB1I->isCall();
}
+/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch
+/// instructions on the block. Always use the DebugLoc of the first
+/// branching instruction found unless its absent, in which case use the
+/// DebugLoc of the second if present.
+static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return DebugLoc();
+ --I;
+ while (I->isDebugValue() && I != MBB.begin())
+ --I;
+ if (I->isBranch())
+ return I->getDebugLoc();
+ return DebugLoc();
+}
+
/// OptimizeBlock - Analyze and optimize control flow related to the specified
/// block. This is never called on the entry block.
bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool MadeChange = false;
MachineFunction &MF = *MBB->getParent();
- DebugLoc dl; // FIXME: this is nowhere
ReoptimizeBlock:
MachineFunction::iterator FallThrough = MBB;
@@ -1073,6 +1088,7 @@ ReoptimizeBlock:
// destination, remove the branch, replacing it with an unconditional one or
// a fall-through.
if (PriorTBB && PriorTBB == PriorFBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
PriorCond.clear();
if (PriorTBB != MBB)
@@ -1130,6 +1146,7 @@ ReoptimizeBlock:
// If the prior block branches somewhere else on the condition and here if
// the condition is false, remove the uncond second branch.
if (PriorFBB == MBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
MadeChange = true;
@@ -1143,6 +1160,7 @@ ReoptimizeBlock:
if (PriorTBB == MBB) {
SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
MadeChange = true;
@@ -1180,6 +1198,7 @@ ReoptimizeBlock:
DEBUG(dbgs() << "\nMoving MBB: " << *MBB
<< "To make fallthrough to: " << *PriorTBB << "\n");
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
@@ -1209,6 +1228,7 @@ ReoptimizeBlock:
if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
SmallVector<MachineOperand, 4> NewCond(CurCond);
if (!TII->ReverseBranchCondition(NewCond)) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
TII->RemoveBranch(*MBB);
TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
MadeChange = true;
@@ -1222,6 +1242,7 @@ ReoptimizeBlock:
if (CurTBB && CurCond.empty() && CurFBB == 0 &&
IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
!MBB->hasAddressTaken()) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
// This block may contain just an unconditional branch. Because there can
// be 'non-branch terminators' in the block, try removing the branch and
// then seeing if the block is empty.
@@ -1264,8 +1285,9 @@ ReoptimizeBlock:
assert(PriorFBB == 0 && "Machine CFG out of date!");
PriorFBB = MBB;
}
+ DebugLoc pdl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl);
+ TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
}
// Iterate through all the predecessors, revectoring each in-turn.
@@ -1289,9 +1311,10 @@ ReoptimizeBlock:
bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
NewCurFBB, NewCurCond, true);
if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+ DebugLoc pdl = getBranchDebugLoc(*PMBB);
TII->RemoveBranch(*PMBB);
NewCurCond.clear();
- TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl);
+ TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl);
MadeChange = true;
++NumBranchOpts;
PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
@@ -1351,7 +1374,7 @@ ReoptimizeBlock:
if (CurFallsThru) {
MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
CurCond.clear();
- TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl);
+ TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc());
}
MBB->moveAfter(PredBB);
MadeChange = true;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 0362365..21729cd 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -80,7 +80,6 @@ add_llvm_library(LLVMCodeGen
RegisterScavenging.cpp
RenderMachineFunction.cpp
ScheduleDAG.cpp
- ScheduleDAGEmit.cpp
ScheduleDAGInstrs.cpp
ScheduleDAGPrinter.cpp
ScoreboardHazardRecognizer.cpp
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index c684cdc..bad5010 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -35,7 +35,8 @@ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
RegClassInfo(RCI),
Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
KillIndices(TRI->getNumRegs(), 0),
- DefIndices(TRI->getNumRegs(), 0) {}
+ DefIndices(TRI->getNumRegs(), 0),
+ KeepRegs(TRI->getNumRegs(), false) {}
CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
}
@@ -52,9 +53,9 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
}
// Clear "do not change" set.
- KeepRegs.clear();
+ KeepRegs.reset();
- bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
+ bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn());
// Determine the live-out physregs for this block.
if (IsReturnBlock) {
@@ -63,14 +64,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
E = MRI.liveout_end(); I != E; ++I) {
unsigned Reg = *I;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BB->size();
+ KillIndices[Reg] = BBSize;
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BB->size();
+ KillIndices[AliasReg] = BBSize;
DefIndices[AliasReg] = ~0u;
}
}
@@ -85,14 +86,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BB->size();
+ KillIndices[Reg] = BBSize;
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BB->size();
+ KillIndices[AliasReg] = BBSize;
DefIndices[AliasReg] = ~0u;
}
}
@@ -106,14 +107,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BB->size();
+ KillIndices[Reg] = BBSize;
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BB->size();
+ KillIndices[AliasReg] = BBSize;
DefIndices[AliasReg] = ~0u;
}
}
@@ -121,7 +122,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
void CriticalAntiDepBreaker::FinishBlock() {
RegRefs.clear();
- KeepRegs.clear();
+ KeepRegs.reset();
}
void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
@@ -233,10 +234,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
RegRefs.insert(std::make_pair(Reg, &MO));
if (MO.isUse() && Special) {
- if (KeepRegs.insert(Reg)) {
+ if (!KeepRegs.test(Reg)) {
+ KeepRegs.set(Reg);
for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg)
- KeepRegs.insert(*Subreg);
+ KeepRegs.set(*Subreg);
}
}
}
@@ -259,7 +261,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
if (MO.clobbersPhysReg(i)) {
DefIndices[i] = Count;
KillIndices[i] = ~0u;
- KeepRegs.erase(i);
+ KeepRegs.reset(i);
Classes[i] = 0;
RegRefs.erase(i);
}
@@ -276,7 +278,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
assert(((KillIndices[Reg] == ~0u) !=
(DefIndices[Reg] == ~0u)) &&
"Kill and Def maps aren't consistent for Reg!");
- KeepRegs.erase(Reg);
+ KeepRegs.reset(Reg);
Classes[Reg] = 0;
RegRefs.erase(Reg);
// Repeat, for all subregs.
@@ -285,7 +287,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
unsigned SubregReg = *Subreg;
DefIndices[SubregReg] = Count;
KillIndices[SubregReg] = ~0u;
- KeepRegs.erase(SubregReg);
+ KeepRegs.reset(SubregReg);
Classes[SubregReg] = 0;
RegRefs.erase(SubregReg);
}
@@ -551,7 +553,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
if (!RegClassInfo.isAllocatable(AntiDepReg))
// Don't break anti-dependencies on non-allocatable registers.
AntiDepReg = 0;
- else if (KeepRegs.count(AntiDepReg))
+ else if (KeepRegs.test(AntiDepReg))
// Don't break anti-dependencies if an use down below requires
// this exact register.
AntiDepReg = 0;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 0710780..7746259 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -24,7 +24,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
#include <map>
namespace llvm {
@@ -66,7 +65,7 @@ class TargetRegisterInfo;
/// KeepRegs - A set of registers which are live and cannot be changed to
/// break anti-dependencies.
- SmallSet<unsigned, 4> KeepRegs;
+ BitVector KeepRegs;
public:
CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index f0cf290..5ff641c 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -23,10 +23,10 @@
//
//===----------------------------------------------------------------------===//
-#include "ScheduleDAGInstrs.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
using namespace llvm;
@@ -103,15 +103,12 @@ void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
namespace {
// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
// Schedule method to build the dependence graph.
-//
-// ScheduleDAGInstrs has LLVM_LIBRARY_VISIBILITY so we have to reference it as
-// an opaque pointer in VLIWPacketizerList.
class DefaultVLIWScheduler : public ScheduleDAGInstrs {
public:
DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
MachineDominatorTree &MDT, bool IsPostRA);
// Schedule - Actual scheduling work.
- void Schedule();
+ void schedule();
};
} // end anonymous namespace
@@ -121,9 +118,9 @@ DefaultVLIWScheduler::DefaultVLIWScheduler(
ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) {
}
-void DefaultVLIWScheduler::Schedule() {
+void DefaultVLIWScheduler::schedule() {
// Build the scheduling graph.
- BuildSchedGraph(0);
+ buildSchedGraph(0);
}
// VLIWPacketizerList Ctor
@@ -137,7 +134,7 @@ VLIWPacketizerList::VLIWPacketizerList(
// VLIWPacketizerList Dtor
VLIWPacketizerList::~VLIWPacketizerList() {
- delete (DefaultVLIWScheduler *)SchedulerImpl;
+ delete SchedulerImpl;
delete ResourceTracker;
}
@@ -184,18 +181,15 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
MachineBasicBlock::iterator BeginItr,
MachineBasicBlock::iterator EndItr) {
- DefaultVLIWScheduler *Scheduler = (DefaultVLIWScheduler *)SchedulerImpl;
- Scheduler->Run(MBB, BeginItr, EndItr, MBB->size());
+ assert(MBB->end() == EndItr && "Bad EndIndex");
- // Remember scheduling units.
- SUnits = Scheduler->SUnits;
+ SchedulerImpl->enterRegion(MBB, BeginItr, EndItr, MBB->size());
- // Generate MI -> SU map.
- std::map <MachineInstr*, SUnit*> MIToSUnit;
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- SUnit *SU = &SUnits[i];
- MIToSUnit[SU->getInstr()] = SU;
- }
+ // Build the DAG without reordering instructions.
+ SchedulerImpl->schedule();
+
+ // Remember scheduling units.
+ SUnits = SchedulerImpl->SUnits;
// The main packetizer loop.
for (; BeginItr != EndItr; ++BeginItr) {
@@ -211,7 +205,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
continue;
}
- SUnit *SUI = MIToSUnit[MI];
+ SUnit *SUI = SchedulerImpl->getSUnit(MI);
assert(SUI && "Missing SUnit Info!");
// Ask DFA if machine resource is available for MI.
@@ -221,7 +215,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
MachineInstr *MJ = *VI;
- SUnit *SUJ = MIToSUnit[MJ];
+ SUnit *SUJ = SchedulerImpl->getSUnit(MJ);
assert(SUJ && "Missing SUnit Info!");
// Is it legal to packetize SUI and SUJ together.
@@ -245,4 +239,6 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
// End any packet left behind.
endPacket(MBB, EndItr);
+
+ SchedulerImpl->exitRegion();
}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 17633e2..97e6547 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -90,7 +90,7 @@ static void addPassesToHandleExceptions(TargetMachine *TM,
// removed from the parent invoke(s). This could happen when a landing
// pad is shared by multiple invokes and is also a target of a normal
// edge from elsewhere.
- PM.add(createSjLjEHPass(TM->getTargetLowering()));
+ PM.add(createSjLjEHPreparePass(TM->getTargetLowering()));
// FALLTHROUGH
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 0578229..deab05a 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -84,11 +84,11 @@ void LatencyPriorityQueue::push(SUnit *SU) {
}
-// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// scheduledNode - As nodes are scheduled, we look to see if there are any
// successor nodes that have a single unscheduled predecessor. If so, that
// single predecessor has a higher priority, since scheduling it will make
// the node available.
-void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+void LatencyPriorityQueue::scheduledNode(SUnit *SU) {
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
AdjustPriorityOfUnscheduledPreds(I->getSUnit());
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index c35302a..2187833 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -226,7 +226,7 @@ public:
LiveInterval *LI, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS, MachineDominatorTree &MDT,
- UserValueScopes &UVS);
+ UserValueScopes &UVS);
/// addDefsFromCopies - The value in LI/LocNo may be copies to other
/// registers. Determine if any of the copies are available at the kill
@@ -486,7 +486,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
LiveInterval *LI, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS, MachineDominatorTree &MDT,
- UserValueScopes &UVS) {
+ UserValueScopes &UVS) {
SmallVector<SlotIndex, 16> Todo;
Todo.push_back(Idx);
do {
@@ -620,7 +620,7 @@ void
UserValue::computeIntervals(MachineRegisterInfo &MRI,
LiveIntervals &LIS,
MachineDominatorTree &MDT,
- UserValueScopes &UVS) {
+ UserValueScopes &UVS) {
SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
// Collect all defs to be extended (Skipping undefs).
@@ -841,7 +841,7 @@ bool
UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
bool DidChange = false;
// Split locations referring to OldReg. Iterate backwards so splitLocation can
- // safely erase unuused locations.
+ // safely erase unused locations.
for (unsigned i = locations.size(); i ; --i) {
unsigned LocNo = i-1;
const MachineOperand *Loc = &locations[LocNo];
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 70ed1c3..3ade660 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -1049,9 +1049,19 @@ public:
bool hasRegMaskOp = false;
collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
- moveAllEnteringFrom(OldIdx, Entering);
- moveAllInternalFrom(OldIdx, Internal);
- moveAllExitingFrom(OldIdx, Exiting);
+ // To keep the LiveRanges valid within an interval, move the ranges closest
+ // to the destination first. This prevents ranges from overlapping, to that
+ // APIs like removeRange still work.
+ if (NewIdx < OldIdx) {
+ moveAllEnteringFrom(OldIdx, Entering);
+ moveAllInternalFrom(OldIdx, Internal);
+ moveAllExitingFrom(OldIdx, Exiting);
+ }
+ else {
+ moveAllExitingFrom(OldIdx, Exiting);
+ moveAllInternalFrom(OldIdx, Internal);
+ moveAllEnteringFrom(OldIdx, Entering);
+ }
if (hasRegMaskOp)
updateRegMaskSlots(OldIdx);
@@ -1319,8 +1329,14 @@ private:
void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) {
LiveInterval* LI = P.first;
LiveRange* LR = P.second;
+ // Extend the LiveRange if NewIdx is past the end.
if (NewIdx > LR->end) {
- moveKillFlags(LI->reg, LR->end, NewIdx);
+ // Move kill flags if OldIdx was not originally the end
+ // (otherwise LR->end points to an invalid slot).
+ if (LR->end.getRegSlot() != OldIdx.getRegSlot()) {
+ assert(LR->end > OldIdx && "LiveRange does not cover original slot");
+ moveKillFlags(LI->reg, LR->end, NewIdx);
+ }
LR->end = NewIdx.getRegSlot();
}
}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 9c3d255..48e1e4c 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -109,6 +109,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
// Mark the variable known alive in this bb
VRInfo.AliveBlocks.set(BBNum);
+ assert(MBB != &MF->front() && "Can't find reaching def for virtreg");
WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 611b045..ca8a8e8 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -238,6 +238,18 @@ StringRef MachineBasicBlock::getName() const {
return "(null)";
}
+/// Return a hopefully unique identifier for this block.
+std::string MachineBasicBlock::getFullName() const {
+ std::string Name;
+ if (getParent())
+ Name = (getParent()->getFunction()->getName() + ":").str();
+ if (getBasicBlock())
+ Name += getBasicBlock()->getName();
+ else
+ Name += (Twine("BB") + Twine(getNumber())).str();
+ return Name;
+}
+
void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
const MachineFunction *MF = getParent();
if (!MF) {
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index e9f9475..43af1ad 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -40,6 +40,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -481,7 +482,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
/// MCID NULL and no operands.
MachineInstr::MachineInstr()
: MCID(0), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0),
+ NumMemRefs(0), MemRefs(0),
Parent(0) {
// Make sure that we get added to a machine basicblock
LeakDetector::addGarbageObject(this);
@@ -489,10 +490,10 @@ MachineInstr::MachineInstr()
void MachineInstr::addImplicitDefUseOperands() {
if (MCID->ImplicitDefs)
- for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+ for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
if (MCID->ImplicitUses)
- for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses)
+ for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses)
addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
}
@@ -501,7 +502,7 @@ void MachineInstr::addImplicitDefUseOperands() {
/// the MCInstrDesc.
MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0), Parent(0) {
+ NumMemRefs(0), MemRefs(0), Parent(0) {
unsigned NumImplicitOps = 0;
if (!NoImp)
NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -516,7 +517,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
bool NoImp)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+ NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
unsigned NumImplicitOps = 0;
if (!NoImp)
NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -532,7 +533,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
/// basic block.
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0), Parent(0) {
+ NumMemRefs(0), MemRefs(0), Parent(0) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
unsigned NumImplicitOps =
MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -548,7 +549,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
const MCInstrDesc &tid)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+ NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
unsigned NumImplicitOps =
MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -563,7 +564,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
///
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
: MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0),
- MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
+ NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
Parent(0), debugLoc(MI.getDebugLoc()) {
Operands.reserve(MI.getNumOperands());
@@ -738,28 +739,23 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
void MachineInstr::addMemOperand(MachineFunction &MF,
MachineMemOperand *MO) {
mmo_iterator OldMemRefs = MemRefs;
- mmo_iterator OldMemRefsEnd = MemRefsEnd;
+ uint16_t OldNumMemRefs = NumMemRefs;
- size_t NewNum = (MemRefsEnd - MemRefs) + 1;
+ uint16_t NewNum = NumMemRefs + 1;
mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
- mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum;
- std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs);
+ std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
NewMemRefs[NewNum - 1] = MO;
MemRefs = NewMemRefs;
- MemRefsEnd = NewMemRefsEnd;
+ NumMemRefs = NewNum;
}
-bool
-MachineInstr::hasProperty(unsigned MCFlag, QueryType Type) const {
- if (Type == IgnoreBundle || !isBundle())
- return getDesc().getFlags() & (1 << MCFlag);
-
+bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
const MachineBasicBlock *MBB = getParent();
MachineBasicBlock::const_instr_iterator MII = *this; ++MII;
while (MII != MBB->end() && MII->isInsideBundle()) {
- if (MII->getDesc().getFlags() & (1 << MCFlag)) {
+ if (MII->getDesc().getFlags() & Mask) {
if (Type == AnyInBundle)
return true;
} else {
@@ -1843,49 +1839,55 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
unsigned
MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
- unsigned Hash = MI->getOpcode() * 37;
+ // Build up a buffer of hash code components.
+ //
+ // FIXME: This is a total hack. We should have a hash_value overload for
+ // MachineOperand, but currently that doesn't work because there are many
+ // different ideas of "equality" and thus different sets of information that
+ // contribute to the hash code. This one happens to want to take a specific
+ // subset. And it's still not clear that this routine uses the *correct*
+ // subset of information when computing the hash code. The goal is to use the
+ // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to
+ // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would
+ // be very useful to factor the selection of relevant inputs out of the two
+ // functions and into a common routine, but it's not clear how that can be
+ // done.
+ SmallVector<size_t, 8> HashComponents;
+ HashComponents.reserve(MI->getNumOperands() + 1);
+ HashComponents.push_back(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- uint64_t Key = (uint64_t)MO.getType() << 32;
switch (MO.getType()) {
default: break;
case MachineOperand::MO_Register:
if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue; // Skip virtual register defs.
- Key |= MO.getReg();
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getReg()));
break;
case MachineOperand::MO_Immediate:
- Key |= MO.getImm();
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getImm()));
break;
case MachineOperand::MO_FrameIndex:
case MachineOperand::MO_ConstantPoolIndex:
case MachineOperand::MO_JumpTableIndex:
- Key |= MO.getIndex();
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex()));
break;
case MachineOperand::MO_MachineBasicBlock:
- Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB()));
break;
case MachineOperand::MO_GlobalAddress:
- Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal()));
break;
case MachineOperand::MO_BlockAddress:
- Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+ HashComponents.push_back(hash_combine(MO.getType(),
+ MO.getBlockAddress()));
break;
case MachineOperand::MO_MCSymbol:
- Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol());
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol()));
break;
}
- Key += ~(Key << 32);
- Key ^= (Key >> 22);
- Key += ~(Key << 13);
- Key ^= (Key >> 8);
- Key += (Key << 3);
- Key ^= (Key >> 15);
- Key += ~(Key << 27);
- Key ^= (Key >> 31);
- Hash = (unsigned)Key + Hash * 37;
- }
- return Hash;
+ }
+ return hash_combine_range(HashComponents.begin(), HashComponents.end());
}
void MachineInstr::emitError(StringRef Msg) const {
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index d1f2df9..73489a7 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -229,6 +229,8 @@ bool llvm::finalizeBundles(MachineFunction &MF) {
"First instr cannot be inside bundle before finalization!");
MachineBasicBlock::instr_iterator MIE = MBB.instr_end();
+ if (MII == MIE)
+ continue;
for (++MII; MII != MIE; ) {
if (!MII->isInsideBundle())
++MII;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 7d40e66..f140dec 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -161,9 +161,8 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
/// form, so there should only be one definition.
MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
// Since we are in SSA form, we can use the first definition.
- if (!def_empty(Reg))
- return &*def_begin(Reg);
- return 0;
+ def_iterator I = def_begin(Reg);
+ return !I.atEnd() ? &*I : 0;
}
bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 8a485e0..364a244 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -14,10 +14,10 @@
#define DEBUG_TYPE "misched"
-#include "ScheduleDAGInstrs.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachinePassRegistry.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -25,25 +25,36 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
#include <queue>
using namespace llvm;
+static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
+ cl::desc("Force top-down list scheduling"));
+static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
+ cl::desc("Force bottom-up list scheduling"));
+
+#ifndef NDEBUG
+static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show MISched dags after they are processed"));
+
+static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
+ cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
+#else
+static bool ViewMISchedDAGs = false;
+#endif // NDEBUG
+
//===----------------------------------------------------------------------===//
// Machine Instruction Scheduling Pass and Registry
//===----------------------------------------------------------------------===//
namespace {
/// MachineScheduler runs after coalescing and before register allocation.
-class MachineScheduler : public MachineFunctionPass {
+class MachineScheduler : public MachineSchedContext,
+ public MachineFunctionPass {
public:
- MachineFunction *MF;
- const TargetInstrInfo *TII;
- const MachineLoopInfo *MLI;
- const MachineDominatorTree *MDT;
- LiveIntervals *LIS;
-
MachineScheduler();
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -71,7 +82,7 @@ INITIALIZE_PASS_END(MachineScheduler, "misched",
"Machine Instruction Scheduler", false, false)
MachineScheduler::MachineScheduler()
-: MachineFunctionPass(ID), MF(0), MLI(0), MDT(0) {
+: MachineFunctionPass(ID) {
initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
}
@@ -80,7 +91,7 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(MachineDominatorsID);
AU.addRequired<MachineLoopInfo>();
AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<TargetPassConfig>();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveIntervals>();
@@ -88,91 +99,226 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-namespace {
-/// MachineSchedRegistry provides a selection of available machine instruction
-/// schedulers.
-class MachineSchedRegistry : public MachinePassRegistryNode {
-public:
- typedef ScheduleDAGInstrs *(*ScheduleDAGCtor)(MachineScheduler *);
+MachinePassRegistry MachineSchedRegistry::Registry;
- // RegisterPassParser requires a (misnamed) FunctionPassCtor type.
- typedef ScheduleDAGCtor FunctionPassCtor;
+/// A dummy default scheduler factory indicates whether the scheduler
+/// is overridden on the command line.
+static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
+ return 0;
+}
- static MachinePassRegistry Registry;
+/// MachineSchedOpt allows command line selection of the scheduler.
+static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
+ RegisterPassParser<MachineSchedRegistry> >
+MachineSchedOpt("misched",
+ cl::init(&useDefaultMachineSched), cl::Hidden,
+ cl::desc("Machine instruction scheduler to use"));
- MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C)
- : MachinePassRegistryNode(N, D, (MachinePassCtor)C) {
- Registry.Add(this);
- }
- ~MachineSchedRegistry() { Registry.Remove(this); }
+static MachineSchedRegistry
+DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
+ useDefaultMachineSched);
+
+/// Forward declare the standard machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
+
+/// Top-level MachineScheduler pass driver.
+///
+/// Visit blocks in function order. Divide each block into scheduling regions
+/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
+/// consistent with the DAG builder, which traverses the interior of the
+/// scheduling regions bottom-up.
+///
+/// This design avoids exposing scheduling boundaries to the DAG builder,
+/// simplifying the DAG builder's support for "special" target instructions.
+/// At the same time the design allows target schedulers to operate across
+/// scheduling boundaries, for example to bundle the boudary instructions
+/// without reordering them. This creates complexity, because the target
+/// scheduler must update the RegionBegin and RegionEnd positions cached by
+/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
+/// design would be to split blocks at scheduling boundaries, but LLVM has a
+/// general bias against block splitting purely for implementation simplicity.
+bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ // Initialize the context of the pass.
+ MF = &mf;
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ PassConfig = &getAnalysis<TargetPassConfig>();
+ AA = &getAnalysis<AliasAnalysis>();
- // Accessors.
- //
- MachineSchedRegistry *getNext() const {
- return (MachineSchedRegistry *)MachinePassRegistryNode::getNext();
- }
- static MachineSchedRegistry *getList() {
- return (MachineSchedRegistry *)Registry.getList();
- }
- static ScheduleDAGCtor getDefault() {
- return (ScheduleDAGCtor)Registry.getDefault();
- }
- static void setDefault(ScheduleDAGCtor C) {
- Registry.setDefault((MachinePassCtor)C);
+ LIS = &getAnalysis<LiveIntervals>();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ // Select the scheduler, or set the default.
+ MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
+ if (Ctor == useDefaultMachineSched) {
+ // Get the default scheduler set by the target.
+ Ctor = MachineSchedRegistry::getDefault();
+ if (!Ctor) {
+ Ctor = createConvergingSched;
+ MachineSchedRegistry::setDefault(Ctor);
+ }
}
- static void setListener(MachinePassRegistryListener *L) {
- Registry.setListener(L);
+ // Instantiate the selected scheduler.
+ OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
+
+ // Visit all machine basic blocks.
+ for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
+ MBB != MBBEnd; ++MBB) {
+
+ Scheduler->startBlock(MBB);
+
+ // Break the block into scheduling regions [I, RegionEnd), and schedule each
+ // region as soon as it is discovered. RegionEnd points the the scheduling
+ // boundary at the bottom of the region. The DAG does not include RegionEnd,
+ // but the region does (i.e. the next RegionEnd is above the previous
+ // RegionBegin). If the current block has no terminator then RegionEnd ==
+ // MBB->end() for the bottom region.
+ //
+ // The Scheduler may insert instructions during either schedule() or
+ // exitRegion(), even for empty regions. So the local iterators 'I' and
+ // 'RegionEnd' are invalid across these calls.
+ unsigned RemainingCount = MBB->size();
+ for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+ RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
+ // Avoid decrementing RegionEnd for blocks with no terminator.
+ if (RegionEnd != MBB->end()
+ || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
+ --RegionEnd;
+ // Count the boundary instruction.
+ --RemainingCount;
+ }
+
+ // The next region starts above the previous region. Look backward in the
+ // instruction stream until we find the nearest boundary.
+ MachineBasicBlock::iterator I = RegionEnd;
+ for(;I != MBB->begin(); --I, --RemainingCount) {
+ if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
+ break;
+ }
+ // Notify the scheduler of the region, even if we may skip scheduling
+ // it. Perhaps it still needs to be bundled.
+ Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount);
+
+ // Skip empty scheduling regions (0 or 1 schedulable instructions).
+ if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
+ // Close the current region. Bundle the terminator if needed.
+ // This invalidates 'RegionEnd' and 'I'.
+ Scheduler->exitRegion();
+ continue;
+ }
+ DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName()
+ << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: ";
+ if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+ else dbgs() << "End";
+ dbgs() << " Remaining: " << RemainingCount << "\n");
+
+ // Schedule a region: possibly reorder instructions.
+ // This invalidates 'RegionEnd' and 'I'.
+ Scheduler->schedule();
+
+ // Close the current region.
+ Scheduler->exitRegion();
+
+ // Scheduling has invalidated the current iterator 'I'. Ask the
+ // scheduler for the top of it's scheduled region.
+ RegionEnd = Scheduler->begin();
+ }
+ assert(RemainingCount == 0 && "Instruction count mismatch!");
+ Scheduler->finishBlock();
}
-};
-} // namespace
+ DEBUG(LIS->print(dbgs()));
+ return true;
+}
-MachinePassRegistry MachineSchedRegistry::Registry;
+void MachineScheduler::print(raw_ostream &O, const Module* m) const {
+ // unimplemented
+}
-static ScheduleDAGInstrs *createDefaultMachineSched(MachineScheduler *P);
+//===----------------------------------------------------------------------===//
+// MachineSchedStrategy - Interface to a machine scheduling algorithm.
+//===----------------------------------------------------------------------===//
-/// MachineSchedOpt allows command line selection of the scheduler.
-static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
- RegisterPassParser<MachineSchedRegistry> >
-MachineSchedOpt("misched",
- cl::init(&createDefaultMachineSched), cl::Hidden,
- cl::desc("Machine instruction scheduler to use"));
+namespace {
+class ScheduleDAGMI;
+
+/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected
+/// scheduling algorithm.
+///
+/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it
+/// in ScheduleDAGInstrs.h
+class MachineSchedStrategy {
+public:
+ virtual ~MachineSchedStrategy() {}
+
+ /// Initialize the strategy after building the DAG for a new region.
+ virtual void initialize(ScheduleDAGMI *DAG) = 0;
+
+ /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
+ /// schedule the node at the top of the unscheduled region. Otherwise it will
+ /// be scheduled at the bottom.
+ virtual SUnit *pickNode(bool &IsTopNode) = 0;
+
+ /// When all predecessor dependencies have been resolved, free this node for
+ /// top-down scheduling.
+ virtual void releaseTopNode(SUnit *SU) = 0;
+ /// When all successor dependencies have been resolved, free this node for
+ /// bottom-up scheduling.
+ virtual void releaseBottomNode(SUnit *SU) = 0;
+};
+} // namespace
//===----------------------------------------------------------------------===//
-// Machine Instruction Scheduling Common Implementation
+// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
+// preservation.
//===----------------------------------------------------------------------===//
namespace {
-/// ScheduleTopDownLive is an implementation of ScheduleDAGInstrs that schedules
+/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules
/// machine instructions while updating LiveIntervals.
-class ScheduleTopDownLive : public ScheduleDAGInstrs {
-protected:
- MachineScheduler *Pass;
+class ScheduleDAGMI : public ScheduleDAGInstrs {
+ AliasAnalysis *AA;
+ MachineSchedStrategy *SchedImpl;
+
+ /// The top of the unscheduled zone.
+ MachineBasicBlock::iterator CurrentTop;
+
+ /// The bottom of the unscheduled zone.
+ MachineBasicBlock::iterator CurrentBottom;
+
+ /// The number of instructions scheduled so far. Used to cut off the
+ /// scheduler at the point determined by misched-cutoff.
+ unsigned NumInstrsScheduled;
public:
- ScheduleTopDownLive(MachineScheduler *P):
- ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS),
- Pass(P) {}
+ ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
+ ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
+ AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(),
+ NumInstrsScheduled(0) {}
- /// ScheduleDAGInstrs callback.
- void Schedule();
+ ~ScheduleDAGMI() {
+ delete SchedImpl;
+ }
- /// Interface implemented by the selected top-down liveinterval scheduler.
- ///
- /// Pick the next node to schedule, or return NULL.
- virtual SUnit *pickNode() = 0;
+ MachineBasicBlock::iterator top() const { return CurrentTop; }
+ MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
- /// When all preceeding dependencies have been resolved, free this node for
- /// scheduling.
- virtual void releaseNode(SUnit *SU) = 0;
+ /// Implement ScheduleDAGInstrs interface.
+ void schedule();
protected:
+ void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
+ bool checkSchedLimit();
+
void releaseSucc(SUnit *SU, SDep *SuccEdge);
void releaseSuccessors(SUnit *SU);
+ void releasePred(SUnit *SU, SDep *PredEdge);
+ void releasePredecessors(SUnit *SU);
};
} // namespace
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
/// NumPredsLeft reaches zero, release the successor node.
-void ScheduleTopDownLive::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
SUnit *SuccSU = SuccEdge->getSUnit();
#ifndef NDEBUG
@@ -185,164 +331,199 @@ void ScheduleTopDownLive::releaseSucc(SUnit *SU, SDep *SuccEdge) {
#endif
--SuccSU->NumPredsLeft;
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
- releaseNode(SuccSU);
+ SchedImpl->releaseTopNode(SuccSU);
}
/// releaseSuccessors - Call releaseSucc on each of SU's successors.
-void ScheduleTopDownLive::releaseSuccessors(SUnit *SU) {
+void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
releaseSucc(SU, &*I);
}
}
-/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
-/// time to do some work.
-void ScheduleTopDownLive::Schedule() {
- BuildSchedGraph(&Pass->getAnalysis<AliasAnalysis>());
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
+/// NumSuccsLeft reaches zero, release the predecessor node.
+void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
+ SchedImpl->releaseBottomNode(PredSU);
+}
+
+/// releasePredecessors - Call releasePred on each of SU's predecessors.
+void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ releasePred(SU, &*I);
+ }
+}
+
+void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
+ MachineBasicBlock::iterator InsertPos) {
+ // Fix RegionBegin if the first instruction moves down.
+ if (&*RegionBegin == MI)
+ RegionBegin = llvm::next(RegionBegin);
+ BB->splice(InsertPos, BB, MI);
+ LIS->handleMove(MI);
+ // Fix RegionBegin if another instruction moves above the first instruction.
+ if (RegionBegin == InsertPos)
+ RegionBegin = MI;
+}
+
+bool ScheduleDAGMI::checkSchedLimit() {
+#ifndef NDEBUG
+ if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
+ CurrentTop = CurrentBottom;
+ return false;
+ }
+ ++NumInstrsScheduled;
+#endif
+ return true;
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region.
+void ScheduleDAGMI::schedule() {
+ buildSchedGraph(AA);
DEBUG(dbgs() << "********** MI Scheduling **********\n");
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this));
- // Release any successors of the special Entry node. It is currently unused,
- // but we keep up appearances.
+ if (ViewMISchedDAGs) viewGraph();
+
+ SchedImpl->initialize(this);
+
+ // Release edges from the special Entry node or to the special Exit node.
releaseSuccessors(&EntrySU);
+ releasePredecessors(&ExitSU);
// Release all DAG roots for scheduling.
for (std::vector<SUnit>::iterator I = SUnits.begin(), E = SUnits.end();
I != E; ++I) {
- // A SUnit is ready to schedule if it has no predecessors.
+ // A SUnit is ready to top schedule if it has no predecessors.
if (I->Preds.empty())
- releaseNode(&(*I));
+ SchedImpl->releaseTopNode(&(*I));
+ // A SUnit is ready to bottom schedule if it has no successors.
+ if (I->Succs.empty())
+ SchedImpl->releaseBottomNode(&(*I));
}
- InsertPos = Begin;
- while (SUnit *SU = pickNode()) {
- DEBUG(dbgs() << "*** Scheduling Instruction:\n"; SU->dump(this));
+ CurrentTop = RegionBegin;
+ CurrentBottom = RegionEnd;
+ bool IsTopNode = false;
+ while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+ << " Scheduling Instruction:\n"; SU->dump(this));
+ if (!checkSchedLimit())
+ break;
// Move the instruction to its new location in the instruction stream.
MachineInstr *MI = SU->getInstr();
- if (&*InsertPos == MI)
- ++InsertPos;
+
+ if (IsTopNode) {
+ assert(SU->isTopReady() && "node still has unscheduled dependencies");
+ if (&*CurrentTop == MI)
+ ++CurrentTop;
+ else
+ moveInstruction(MI, CurrentTop);
+ // Release dependent instructions for scheduling.
+ releaseSuccessors(SU);
+ }
else {
- BB->splice(InsertPos, BB, MI);
- Pass->LIS->handleMove(MI);
- if (Begin == InsertPos)
- Begin = MI;
+ assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+ if (&*llvm::prior(CurrentBottom) == MI)
+ --CurrentBottom;
+ else {
+ if (&*CurrentTop == MI)
+ CurrentTop = llvm::next(CurrentTop);
+ moveInstruction(MI, CurrentBottom);
+ CurrentBottom = MI;
+ }
+ // Release dependent instructions for scheduling.
+ releasePredecessors(SU);
}
-
- // Release dependent instructions for scheduling.
- releaseSuccessors(SU);
+ SU->isScheduled = true;
}
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
}
-bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
- // Initialize the context of the pass.
- MF = &mf;
- MLI = &getAnalysis<MachineLoopInfo>();
- MDT = &getAnalysis<MachineDominatorTree>();
- LIS = &getAnalysis<LiveIntervals>();
- TII = MF->getTarget().getInstrInfo();
+//===----------------------------------------------------------------------===//
+// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
- // Select the scheduler, or set the default.
- MachineSchedRegistry::ScheduleDAGCtor Ctor =
- MachineSchedRegistry::getDefault();
- if (!Ctor) {
- Ctor = MachineSchedOpt;
- MachineSchedRegistry::setDefault(Ctor);
- }
- // Instantiate the selected scheduler.
- OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
+namespace {
+/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
+/// the schedule.
+class ConvergingScheduler : public MachineSchedStrategy {
+ ScheduleDAGMI *DAG;
- // Visit all machine basic blocks.
- for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
- MBB != MBBEnd; ++MBB) {
+ unsigned NumTopReady;
+ unsigned NumBottomReady;
- // Break the block into scheduling regions [I, RegionEnd), and schedule each
- // region as soon as it is discovered.
- unsigned RemainingCount = MBB->size();
- for(MachineBasicBlock::iterator RegionEnd = MBB->end();
- RegionEnd != MBB->begin();) {
- // The next region starts above the previous region. Look backward in the
- // instruction stream until we find the nearest boundary.
- MachineBasicBlock::iterator I = RegionEnd;
- for(;I != MBB->begin(); --I, --RemainingCount) {
- if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
- break;
- }
- if (I == RegionEnd) {
- // Skip empty scheduling regions.
- RegionEnd = llvm::prior(RegionEnd);
- --RemainingCount;
- continue;
- }
- // Skip regions with one instruction.
- if (I == llvm::prior(RegionEnd)) {
- RegionEnd = llvm::prior(RegionEnd);
- continue;
- }
- DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName()
- << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: ";
- if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
- else dbgs() << "End";
- dbgs() << " Remaining: " << RemainingCount << "\n");
+public:
+ virtual void initialize(ScheduleDAGMI *dag) {
+ DAG = dag;
- // Inform ScheduleDAGInstrs of the region being scheduled. It calls back
- // to our Schedule() method.
- Scheduler->Run(MBB, I, RegionEnd, MBB->size());
- RegionEnd = Scheduler->Begin;
- }
- assert(RemainingCount == 0 && "Instruction count mismatch!");
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
}
- return true;
-}
-void MachineScheduler::print(raw_ostream &O, const Module* m) const {
- // unimplemented
-}
+ virtual SUnit *pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom())
+ return NULL;
-//===----------------------------------------------------------------------===//
-// Placeholder for extending the machine instruction scheduler.
-//===----------------------------------------------------------------------===//
-
-namespace {
-class DefaultMachineScheduler : public ScheduleDAGInstrs {
- MachineScheduler *Pass;
-public:
- DefaultMachineScheduler(MachineScheduler *P):
- ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS),
- Pass(P) {}
+ // As an initial placeholder heuristic, schedule in the direction that has
+ // the fewest choices.
+ SUnit *SU;
+ if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) {
+ SU = DAG->getSUnit(DAG->top());
+ IsTopNode = true;
+ }
+ else {
+ SU = DAG->getSUnit(llvm::prior(DAG->bottom()));
+ IsTopNode = false;
+ }
+ if (SU->isTopReady()) {
+ assert(NumTopReady > 0 && "bad ready count");
+ --NumTopReady;
+ }
+ if (SU->isBottomReady()) {
+ assert(NumBottomReady > 0 && "bad ready count");
+ --NumBottomReady;
+ }
+ return SU;
+ }
- /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
- /// time to do some work.
- void Schedule();
+ virtual void releaseTopNode(SUnit *SU) {
+ ++NumTopReady;
+ }
+ virtual void releaseBottomNode(SUnit *SU) {
+ ++NumBottomReady;
+ }
};
} // namespace
-static ScheduleDAGInstrs *createDefaultMachineSched(MachineScheduler *P) {
- return new DefaultMachineScheduler(P);
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ return new ScheduleDAGMI(C, new ConvergingScheduler());
}
static MachineSchedRegistry
-SchedDefaultRegistry("default", "Activate the scheduler pass, "
- "but don't reorder instructions",
- createDefaultMachineSched);
-
-
-/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
-/// time to do some work.
-void DefaultMachineScheduler::Schedule() {
- BuildSchedGraph(&Pass->getAnalysis<AliasAnalysis>());
-
- DEBUG(dbgs() << "********** MI Scheduling **********\n");
- DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this));
-
- // TODO: Put interesting things here.
- //
- // When this is fully implemented, it will become a subclass of
- // ScheduleTopDownLive. So this driver will disappear.
-}
+ConvergingSchedRegistry("converge", "Standard converging scheduler.",
+ createConvergingSched);
//===----------------------------------------------------------------------===//
// Machine Instruction Shuffler for Correctness Testing
@@ -350,43 +531,83 @@ void DefaultMachineScheduler::Schedule() {
#ifndef NDEBUG
namespace {
-// Nodes with a higher number have higher priority. This way we attempt to
-// schedule the latest instructions earliest.
-//
-// TODO: Relies on the property of the BuildSchedGraph that results in SUnits
-// being ordered in sequence top-down.
-struct ShuffleSUnitOrder {
+/// Apply a less-than relation on the node order, which corresponds to the
+/// instruction order prior to scheduling. IsReverse implements greater-than.
+template<bool IsReverse>
+struct SUnitOrder {
bool operator()(SUnit *A, SUnit *B) const {
- return A->NodeNum < B->NodeNum;
+ if (IsReverse)
+ return A->NodeNum > B->NodeNum;
+ else
+ return A->NodeNum < B->NodeNum;
}
};
/// Reorder instructions as much as possible.
-class InstructionShuffler : public ScheduleTopDownLive {
- std::priority_queue<SUnit*, std::vector<SUnit*>, ShuffleSUnitOrder> Queue;
+class InstructionShuffler : public MachineSchedStrategy {
+ bool IsAlternating;
+ bool IsTopDown;
+
+ // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
+ // gives nodes with a higher number higher priority causing the latest
+ // instructions to be scheduled first.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
+ TopQ;
+ // When scheduling bottom-up, use greater-than as the queue priority.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
+ BottomQ;
public:
- InstructionShuffler(MachineScheduler *P):
- ScheduleTopDownLive(P) {}
+ InstructionShuffler(bool alternate, bool topdown)
+ : IsAlternating(alternate), IsTopDown(topdown) {}
- /// ScheduleTopDownLive Interface
+ virtual void initialize(ScheduleDAGMI *) {
+ TopQ.clear();
+ BottomQ.clear();
+ }
- virtual SUnit *pickNode() {
- if (Queue.empty()) return NULL;
- SUnit *SU = Queue.top();
- Queue.pop();
+ /// Implement MachineSchedStrategy interface.
+ /// -----------------------------------------
+
+ virtual SUnit *pickNode(bool &IsTopNode) {
+ SUnit *SU;
+ if (IsTopDown) {
+ do {
+ if (TopQ.empty()) return NULL;
+ SU = TopQ.top();
+ TopQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = true;
+ }
+ else {
+ do {
+ if (BottomQ.empty()) return NULL;
+ SU = BottomQ.top();
+ BottomQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = false;
+ }
+ if (IsAlternating)
+ IsTopDown = !IsTopDown;
return SU;
}
- virtual void releaseNode(SUnit *SU) {
- Queue.push(SU);
+ virtual void releaseTopNode(SUnit *SU) {
+ TopQ.push(SU);
+ }
+ virtual void releaseBottomNode(SUnit *SU) {
+ BottomQ.push(SU);
}
};
} // namespace
-static ScheduleDAGInstrs *createInstructionShuffler(MachineScheduler *P) {
- return new InstructionShuffler(P);
+static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
+ bool Alternate = !ForceTopDown && !ForceBottomUp;
+ bool TopDown = !ForceBottomUp;
+ assert((TopDown || !ForceTopDown) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
}
-static MachineSchedRegistry ShufflerRegistry("shuffle",
- "Shuffle machine instructions",
- createInstructionShuffler);
+static MachineSchedRegistry ShufflerRegistry(
+ "shuffle", "Shuffle machine instructions alternating directions",
+ createInstructionShuffler);
#endif // !NDEBUG
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 394a960..830a876 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -900,7 +900,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
void MachineVerifier::calcRegsPassed() {
// First push live-out regs to successors' vregsPassed. Remember the MBBs that
// have any vregsPassed.
- DenseSet<const MachineBasicBlock*> todo;
+ SmallPtrSet<const MachineBasicBlock*, 8> todo;
for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
MFI != MFE; ++MFI) {
const MachineBasicBlock &MBB(*MFI);
@@ -937,7 +937,7 @@ void MachineVerifier::calcRegsPassed() {
// similar to calcRegsPassed, only backwards.
void MachineVerifier::calcRegsRequired() {
// First push live-in regs to predecessors' vregsRequired.
- DenseSet<const MachineBasicBlock*> todo;
+ SmallPtrSet<const MachineBasicBlock*, 8> todo;
for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
MFI != MFE; ++MFI) {
const MachineBasicBlock &MBB(*MFI);
@@ -970,9 +970,10 @@ void MachineVerifier::calcRegsRequired() {
// Check PHI instructions at the beginning of MBB. It is assumed that
// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+ SmallPtrSet<const MachineBasicBlock*, 8> seen;
for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
BBI != BBE && BBI->isPHI(); ++BBI) {
- DenseSet<const MachineBasicBlock*> seen;
+ seen.clear();
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
unsigned Reg = BBI->getOperand(i).getReg();
@@ -1013,8 +1014,17 @@ void MachineVerifier::visitMachineFunctionAfter() {
}
// Now check liveness info if available
- if (LiveVars || LiveInts)
- calcRegsRequired();
+ calcRegsRequired();
+
+ if (MRI->isSSA() && !MF->empty()) {
+ BBInfo &MInfo = MBBInfoMap[&MF->front()];
+ for (RegSet::iterator
+ I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+ ++I)
+ report("Virtual register def doesn't dominate all uses.",
+ MRI->getVRegDef(*I));
+ }
+
if (LiveVars)
verifyLiveVariables();
if (LiveInts)
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index ec1f2b4..6246c21 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -564,7 +564,8 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
addPass(RegisterCoalescerID);
// PreRA instruction scheduling.
- addPass(MachineSchedulerID);
+ if (addPass(MachineSchedulerID) != &NoPassID)
+ printAndVerify("After Machine Scheduling");
// Add the selected register allocation pass.
PM.add(RegAllocPass);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index e59aa9d..24d3e5a 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -23,7 +23,6 @@
#include "AggressiveAntiDepBreaker.h"
#include "CriticalAntiDepBreaker.h"
#include "RegisterClassInfo.h"
-#include "ScheduleDAGInstrs.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
@@ -32,6 +31,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetLowering.h"
@@ -127,6 +127,9 @@ namespace {
/// LiveRegs - true if the register is live.
BitVector LiveRegs;
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
public:
SchedulePostRATDList(
MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
@@ -136,23 +139,34 @@ namespace {
~SchedulePostRATDList();
- /// StartBlock - Initialize register live-range state for scheduling in
+ /// startBlock - Initialize register live-range state for scheduling in
/// this block.
///
- void StartBlock(MachineBasicBlock *BB);
+ void startBlock(MachineBasicBlock *BB);
+
+ /// Initialize the scheduler state for the next scheduling region.
+ virtual void enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount);
+
+ /// Notify that the scheduler has finished scheduling the current region.
+ virtual void exitRegion();
/// Schedule - Schedule the instruction range using list scheduling.
///
- void Schedule();
+ void schedule();
+
+ void EmitSchedule();
/// Observe - Update liveness information to account for the current
/// instruction, which will not be scheduled.
///
void Observe(MachineInstr *MI, unsigned Count);
- /// FinishBlock - Clean up register live-range state.
+ /// finishBlock - Clean up register live-range state.
///
- void FinishBlock();
+ void finishBlock();
/// FixupKills - Fix register kill flags that have been made
/// invalid due to scheduling
@@ -170,6 +184,8 @@ namespace {
// adjustments may be made to the instruction if necessary. Return
// true if the operand has been deleted, false if not.
bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+
+ void dumpSchedule() const;
};
}
@@ -202,6 +218,35 @@ SchedulePostRATDList::~SchedulePostRATDList() {
delete AntiDepBreak;
}
+/// Initialize state associated with the next scheduling region.
+void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+ Sequence.clear();
+}
+
+/// Print the schedule before exiting the region.
+void SchedulePostRATDList::exitRegion() {
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+ ScheduleDAGInstrs::exitRegion();
+}
+
+/// dumpSchedule - dump the scheduled Sequence.
+void SchedulePostRATDList::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
TII = Fn.getTarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
@@ -256,7 +301,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
#endif
// Initialize register live-range state for scheduling in this block.
- Scheduler.StartBlock(MBB);
+ Scheduler.startBlock(MBB);
// Schedule each sequence of instructions not interrupted by a label
// or anything else that effectively needs to shut down scheduling.
@@ -268,7 +313,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
// post-ra we don't gain anything by scheduling across calls since we
// don't need to worry about register pressure.
if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) {
- Scheduler.Run(MBB, I, Current, CurrentCount);
+ Scheduler.enterRegion(MBB, I, Current, CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
Scheduler.EmitSchedule();
Current = MI;
CurrentCount = Count - 1;
@@ -282,11 +329,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
assert(Count == 0 && "Instruction count mismatch!");
assert((MBB->begin() == Current || CurrentCount != 0) &&
"Instruction count mismatch!");
- Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
Scheduler.EmitSchedule();
// Clean up register live-range state.
- Scheduler.FinishBlock();
+ Scheduler.finishBlock();
// Update register kills
Scheduler.FixupKills(MBB);
@@ -298,9 +347,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
/// StartBlock - Initialize register live-range state for scheduling in
/// this block.
///
-void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
+void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
// Call the superclass.
- ScheduleDAGInstrs::StartBlock(BB);
+ ScheduleDAGInstrs::startBlock(BB);
// Reset the hazard recognizer and anti-dep breaker.
HazardRec->Reset();
@@ -310,14 +359,14 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
/// Schedule - Schedule the instruction range using list scheduling.
///
-void SchedulePostRATDList::Schedule() {
+void SchedulePostRATDList::schedule() {
// Build the scheduling graph.
- BuildSchedGraph(AA);
+ buildSchedGraph(AA);
if (AntiDepBreak != NULL) {
unsigned Broken =
- AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
- InsertPosIndex, DbgValues);
+ AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
+ EndIndex, DbgValues);
if (Broken != 0) {
// We made changes. Update the dependency graph.
@@ -326,11 +375,8 @@ void SchedulePostRATDList::Schedule() {
// the def's anti-dependence *and* output-dependence edges due to
// that register, and add new anti-dependence and output-dependence
// edges based on the next live range of the register.
- SUnits.clear();
- Sequence.clear();
- EntrySU = SUnit();
- ExitSU = SUnit();
- BuildSchedGraph(AA);
+ ScheduleDAG::clearDAG();
+ buildSchedGraph(AA);
NumFixedAnti += Broken;
}
@@ -350,17 +396,17 @@ void SchedulePostRATDList::Schedule() {
///
void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
if (AntiDepBreak != NULL)
- AntiDepBreak->Observe(MI, Count, InsertPosIndex);
+ AntiDepBreak->Observe(MI, Count, EndIndex);
}
/// FinishBlock - Clean up register live-range state.
///
-void SchedulePostRATDList::FinishBlock() {
+void SchedulePostRATDList::finishBlock() {
if (AntiDepBreak != NULL)
AntiDepBreak->FinishBlock();
// Call the superclass.
- ScheduleDAGInstrs::FinishBlock();
+ ScheduleDAGInstrs::finishBlock();
}
/// StartBlockForKills - Initialize register live-range state for updating kills
@@ -589,7 +635,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
ReleaseSuccessors(SU);
SU->isScheduled = true;
- AvailableQueue.ScheduledNode(SU);
+ AvailableQueue.scheduledNode(SU);
}
/// ListScheduleTopDown - The main loop of list scheduling for top-down
@@ -703,6 +749,46 @@ void SchedulePostRATDList::ListScheduleTopDown() {
}
#ifndef NDEBUG
- VerifySchedule(/*isBottomUp=*/false);
-#endif
+ unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+#endif // NDEBUG
+}
+
+// EmitSchedule - Emit the machine code in scheduled order.
+void SchedulePostRATDList::EmitSchedule() {
+ RegionBegin = RegionEnd;
+
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue)
+ BB->splice(RegionEnd, BB, FirstDbgValue);
+
+ // Then re-insert them according to the given schedule.
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ BB->splice(RegionEnd, BB, SU->getInstr());
+ else
+ // Null SUnit* is a noop.
+ TII->insertNoop(*BB, RegionEnd);
+
+ // Update the Begin iterator, as the first instruction in the block
+ // may have been scheduled later.
+ if (i == 0)
+ RegionBegin = prior(RegionEnd);
+ }
+
+ // Reinsert any remaining debug_values.
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrivMI = P.second;
+ BB->splice(++OrigPrivMI, BB, DbgValue);
+ }
+ DbgValues.clear();
+ FirstDbgValue = NULL;
}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index c27a485..e09b7f8 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -1139,7 +1139,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
// Add the clobber lists for all the instructions we skipped earlier.
for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
- if (const unsigned *Defs = (*I)->getImplicitDefs())
+ if (const uint16_t *Defs = (*I)->getImplicitDefs())
while (*Defs)
MRI->setPhysRegUsed(*Defs++);
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index ef0c508..310b933 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -47,7 +47,7 @@ namespace llvm {
/// CrossClass - True when both regs are virtual, and newRC is constrained.
bool CrossClass;
- /// Flipped - True when DstReg and SrcReg are reversed from the oriignal
+ /// Flipped - True when DstReg and SrcReg are reversed from the original
/// copy instruction.
bool Flipped;
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 94b28b6..8fd6426 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -46,42 +46,17 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
ScheduleDAG::~ScheduleDAG() {}
-/// getInstrDesc helper to handle SDNodes.
-const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
- if (!Node || !Node->isMachineOpcode()) return NULL;
- return &TII->get(Node->getMachineOpcode());
-}
-
-/// dump - dump the schedule.
-void ScheduleDAG::dumpSchedule() const {
- for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- if (SUnit *SU = Sequence[i])
- SU->dump(this);
- else
- dbgs() << "**** NOOP ****\n";
- }
-}
-
-
-/// Run - perform scheduling.
-///
-void ScheduleDAG::Run(MachineBasicBlock *bb,
- MachineBasicBlock::iterator insertPos) {
- BB = bb;
- InsertPos = insertPos;
-
+/// Clear the DAG state (e.g. between scheduling regions).
+void ScheduleDAG::clearDAG() {
SUnits.clear();
- Sequence.clear();
EntrySU = SUnit();
ExitSU = SUnit();
+}
- Schedule();
-
- DEBUG({
- dbgs() << "*** Final schedule ***\n";
- dumpSchedule();
- dbgs() << '\n';
- });
+/// getInstrDesc helper to handle SDNodes.
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+ if (!Node || !Node->isMachineOpcode()) return NULL;
+ return &TII->get(Node->getMachineOpcode());
}
/// addPred - This adds the specified edge as a pred of the current node if
@@ -346,13 +321,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
}
#ifndef NDEBUG
-/// VerifySchedule - Verify that all SUnits were scheduled and that
-/// their state is consistent.
+/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
+/// their state is consistent. Return the number of scheduled nodes.
///
-void ScheduleDAG::VerifySchedule(bool isBottomUp) {
+unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
bool AnyNotSched = false;
unsigned DeadNodes = 0;
- unsigned Noops = 0;
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
if (!SUnits[i].isScheduled) {
if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
@@ -393,12 +367,8 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
}
}
}
- for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
- if (!Sequence[i])
- ++Noops;
assert(!AnyNotSched);
- assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
- "The number of nodes scheduled doesn't match the expected number!");
+ return SUnits.size() - DeadNodes;
}
#endif
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
deleted file mode 100644
index f8b1bc7..0000000
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements the Emit routines for the ScheduleDAG class, which creates
-// MachineInstrs according to the computed schedule.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pre-RA-sched"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-using namespace llvm;
-
-void ScheduleDAG::EmitNoop() {
- TII->insertNoop(*BB, InsertPos);
-}
-
-void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
- DenseMap<SUnit*, unsigned> &VRBaseMap) {
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl()) continue; // ignore chain preds
- if (I->getSUnit()->CopyDstRC) {
- // Copy to physical register.
- DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
- assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
- // Find the destination physical register.
- unsigned Reg = 0;
- for (SUnit::const_succ_iterator II = SU->Succs.begin(),
- EE = SU->Succs.end(); II != EE; ++II) {
- if (II->isCtrl()) continue; // ignore chain preds
- if (II->getReg()) {
- Reg = II->getReg();
- break;
- }
- }
- BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
- .addReg(VRI->second);
- } else {
- // Copy from physical register.
- assert(I->getReg() && "Unknown physical register!");
- unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
- bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
- (void)isNew; // Silence compiler warning.
- assert(isNew && "Node emitted out of order - early");
- BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
- .addReg(I->getReg());
- }
- break;
- }
-}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index c0ccdb3..6be1ab7 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sched-instrs"
-#include "ScheduleDAGInstrs.h"
#include "llvm/Operator.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -22,6 +21,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -38,30 +38,15 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
bool IsPostRAFlag,
LiveIntervals *lis)
: ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
- InstrItins(mf.getTarget().getInstrItineraryData()), IsPostRA(IsPostRAFlag),
- LIS(lis), UnitLatencies(false), LoopRegs(MLI, MDT), FirstDbgValue(0) {
+ InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
+ IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT),
+ FirstDbgValue(0) {
assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
DbgValues.clear();
assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
"Virtual registers must be removed prior to PostRA scheduling");
}
-/// Run - perform scheduling.
-///
-void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
- MachineBasicBlock::iterator begin,
- MachineBasicBlock::iterator end,
- unsigned endcount) {
- BB = bb;
- Begin = begin;
- InsertPosIndex = endcount;
-
- // Check to see if the scheduler cares about latencies.
- UnitLatencies = ForceUnitLatencies();
-
- ScheduleDAG::Run(bb, end);
-}
-
/// getUnderlyingObjectFromInt - This is the function that does the work of
/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
static const Value *getUnderlyingObjectFromInt(const Value *V) {
@@ -141,28 +126,58 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
return 0;
}
-void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) {
LoopRegs.Deps.clear();
if (MachineLoop *ML = MLI.getLoopFor(BB))
if (BB == ML->getLoopLatch())
LoopRegs.VisitLoop(ML);
}
+void ScheduleDAGInstrs::finishBlock() {
+ // Nothing to do.
+}
+
/// Initialize the map with the number of registers.
-void ScheduleDAGInstrs::Reg2SUnitsMap::setRegLimit(unsigned Limit) {
+void Reg2SUnitsMap::setRegLimit(unsigned Limit) {
PhysRegSet.setUniverse(Limit);
SUnits.resize(Limit);
}
/// Clear the map without deallocating storage.
-void ScheduleDAGInstrs::Reg2SUnitsMap::clear() {
+void Reg2SUnitsMap::clear() {
for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) {
SUnits[*I].clear();
}
PhysRegSet.clear();
}
-/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+/// Initialize the DAG and common scheduler state for the current scheduling
+/// region. This does not actually create the DAG, only clears it. The
+/// scheduling driver may call BuildSchedGraph multiple times per scheduling
+/// region.
+void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ BB = bb;
+ RegionBegin = begin;
+ RegionEnd = end;
+ EndIndex = endcount;
+ MISUnitMap.clear();
+
+ // Check to see if the scheduler cares about latencies.
+ UnitLatencies = forceUnitLatencies();
+
+ ScheduleDAG::clearDAG();
+}
+
+/// Close the current scheduling region. Don't clear any state in case the
+/// driver wants to refer to the previous scheduling region.
+void ScheduleDAGInstrs::exitRegion() {
+ // Nothing to do.
+}
+
+/// addSchedBarrierDeps - Add dependencies from instructions in the current
/// list of instructions being scheduled to scheduling barrier by adding
/// the exit SU to the register defs and use list. This is because we want to
/// make sure instructions which define registers that are either used by
@@ -170,8 +185,8 @@ void ScheduleDAGInstrs::Reg2SUnitsMap::clear() {
/// especially important when the definition latency of the return value(s)
/// are too high to be hidden by the branch or when the liveout registers
/// used by instructions in the fallthrough block.
-void ScheduleDAGInstrs::AddSchedBarrierDeps() {
- MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+void ScheduleDAGInstrs::addSchedBarrierDeps() {
+ MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
ExitSU.setInstr(ExitMI);
bool AllDepKnown = ExitMI &&
(ExitMI->isCall() || ExitMI->isBarrier());
@@ -186,19 +201,21 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() {
if (TRI->isPhysicalRegister(Reg))
Uses[Reg].push_back(&ExitSU);
- else
+ else {
assert(!IsPostRA && "Virtual register encountered after regalloc.");
+ addVRegUseDeps(&ExitSU, i);
+ }
}
} else {
// For others, e.g. fallthrough, conditional branch, assume the exit
// uses all the registers that are livein to the successor blocks.
- SmallSet<unsigned, 8> Seen;
+ assert(Uses.empty() && "Uses in set before adding deps?");
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
- if (Seen.insert(Reg))
+ if (!Uses.contains(Reg))
Uses[Reg].push_back(&ExitSU);
}
}
@@ -246,7 +263,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
// perform its own adjustments.
const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
if (!UnitLatencies) {
- ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+ computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
}
UseSU->addPred(dep);
@@ -436,7 +453,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
if (!UnitLatencies) {
// Adjust the dependence latency using operand def/use information, then
// allow the target to perform its own adjustments.
- ComputeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+ computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
}
@@ -455,20 +472,23 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
///
/// Map each real instruction to its SUnit.
///
-/// After initSUnits, the SUnits vector is cannot be resized and the scheduler
-/// may hang onto SUnit pointers. We may relax this in the future by using SUnit
-/// IDs instead of pointers.
+/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
+/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
+/// instead of pointers.
+///
+/// MachineScheduler relies on initSUnits numbering the nodes by their order in
+/// the original instruction list.
void ScheduleDAGInstrs::initSUnits() {
// We'll be allocating one SUnit for each real instruction in the region,
// which is contained within a basic block.
SUnits.reserve(BB->size());
- for (MachineBasicBlock::iterator I = Begin; I != InsertPos; ++I) {
+ for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
MachineInstr *MI = I;
if (MI->isDebugValue())
continue;
- SUnit *SU = NewSUnit(MI);
+ SUnit *SU = newSUnit(MI);
MISUnitMap[MI] = SU;
SU->isCall = MI->isCall();
@@ -478,11 +498,11 @@ void ScheduleDAGInstrs::initSUnits() {
if (UnitLatencies)
SU->Latency = 1;
else
- ComputeLatency(SU);
+ computeLatency(SU);
}
}
-void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
+void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
// Create an SUnit for each real instruction.
initSUnits();
@@ -517,11 +537,11 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// Model data dependencies between instructions being scheduled and the
// ExitSU.
- AddSchedBarrierDeps();
+ addSchedBarrierDeps();
// Walk the list of instructions, from bottom moving up.
MachineInstr *PrevMI = NULL;
- for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
+ for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
MII != MIE; --MII) {
MachineInstr *MI = prior(MII);
if (MI && PrevMI) {
@@ -712,14 +732,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
Uses.clear();
VRegDefs.clear();
PendingLoads.clear();
- MISUnitMap.clear();
-}
-
-void ScheduleDAGInstrs::FinishBlock() {
- // Nothing to do.
}
-void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
+void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
// Compute the latency for the node.
if (!InstrItins || InstrItins->isEmpty()) {
SU->Latency = 1;
@@ -733,7 +748,7 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
}
}
-void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
+void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
SDep& dep) const {
if (!InstrItins || InstrItins->isEmpty())
return;
@@ -808,37 +823,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
return oss.str();
}
-// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
- Begin = InsertPos;
-
- // If first instruction was a DBG_VALUE then put it back.
- if (FirstDbgValue)
- BB->splice(InsertPos, BB, FirstDbgValue);
-
- // Then re-insert them according to the given schedule.
- for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- if (SUnit *SU = Sequence[i])
- BB->splice(InsertPos, BB, SU->getInstr());
- else
- // Null SUnit* is a noop.
- EmitNoop();
-
- // Update the Begin iterator, as the first instruction in the block
- // may have been scheduled later.
- if (i == 0)
- Begin = prior(InsertPos);
- }
-
- // Reinsert any remaining debug_values.
- for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
- DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
- std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
- MachineInstr *DbgValue = P.first;
- MachineBasicBlock::iterator OrigPrivMI = P.second;
- BB->splice(++OrigPrivMI, BB, DbgValue);
- }
- DbgValues.clear();
- FirstDbgValue = NULL;
- return BB;
+/// Return the basic block label. It is not necessarilly unique because a block
+/// contains multiple scheduling regions. But it is fine for visualization.
+std::string ScheduleDAGInstrs::getDAGName() const {
+ return "dag." + BB->getFullName();
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
deleted file mode 100644
index c7ffed9..0000000
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ /dev/null
@@ -1,306 +0,0 @@
-//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ScheduleDAGInstrs class, which implements
-// scheduling for a MachineInstr-based dependency graph.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SCHEDULEDAGINSTRS_H
-#define SCHEDULEDAGINSTRS_H
-
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SparseSet.h"
-#include <map>
-
-namespace llvm {
- class MachineLoopInfo;
- class MachineDominatorTree;
- class LiveIntervals;
-
- /// LoopDependencies - This class analyzes loop-oriented register
- /// dependencies, which are used to guide scheduling decisions.
- /// For example, loop induction variable increments should be
- /// scheduled as soon as possible after the variable's last use.
- ///
- class LLVM_LIBRARY_VISIBILITY LoopDependencies {
- const MachineLoopInfo &MLI;
- const MachineDominatorTree &MDT;
-
- public:
- typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
- LoopDeps;
- LoopDeps Deps;
-
- LoopDependencies(const MachineLoopInfo &mli,
- const MachineDominatorTree &mdt) :
- MLI(mli), MDT(mdt) {}
-
- /// VisitLoop - Clear out any previous state and analyze the given loop.
- ///
- void VisitLoop(const MachineLoop *Loop) {
- assert(Deps.empty() && "stale loop dependencies");
-
- MachineBasicBlock *Header = Loop->getHeader();
- SmallSet<unsigned, 8> LoopLiveIns;
- for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
- LE = Header->livein_end(); LI != LE; ++LI)
- LoopLiveIns.insert(*LI);
-
- const MachineDomTreeNode *Node = MDT.getNode(Header);
- const MachineBasicBlock *MBB = Node->getBlock();
- assert(Loop->contains(MBB) &&
- "Loop does not contain header!");
- VisitRegion(Node, MBB, Loop, LoopLiveIns);
- }
-
- private:
- void VisitRegion(const MachineDomTreeNode *Node,
- const MachineBasicBlock *MBB,
- const MachineLoop *Loop,
- const SmallSet<unsigned, 8> &LoopLiveIns) {
- unsigned Count = 0;
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- const MachineInstr *MI = I;
- if (MI->isDebugValue())
- continue;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isUse())
- continue;
- unsigned MOReg = MO.getReg();
- if (LoopLiveIns.count(MOReg))
- Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
- }
- ++Count; // Not every iteration due to dbg_value above.
- }
-
- const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
- for (std::vector<MachineDomTreeNode*>::const_iterator I =
- Children.begin(), E = Children.end(); I != E; ++I) {
- const MachineDomTreeNode *ChildNode = *I;
- MachineBasicBlock *ChildBlock = ChildNode->getBlock();
- if (Loop->contains(ChildBlock))
- VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns);
- }
- }
- };
-
- /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
- /// MachineInstrs.
- class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG {
- const MachineLoopInfo &MLI;
- const MachineDominatorTree &MDT;
- const MachineFrameInfo *MFI;
- const InstrItineraryData *InstrItins;
-
- /// isPostRA flag indicates vregs cannot be present.
- bool IsPostRA;
-
- /// Live Intervals provides reaching defs in preRA scheduling.
- LiveIntervals *LIS;
-
- DenseMap<MachineInstr*, SUnit*> MISUnitMap;
-
- /// UnitLatencies (misnamed) flag avoids computing def-use latencies, using
- /// the def-side latency only.
- bool UnitLatencies;
-
- /// Combine a SparseSet with a 1x1 vector to track physical registers.
- /// The SparseSet allows iterating over the (few) live registers for quickly
- /// comparing against a regmask or clearing the set.
- ///
- /// Storage for the map is allocated once for the pass. The map can be
- /// cleared between scheduling regions without freeing unused entries.
- class Reg2SUnitsMap {
- SparseSet<unsigned> PhysRegSet;
- std::vector<std::vector<SUnit*> > SUnits;
- public:
- typedef SparseSet<unsigned>::const_iterator const_iterator;
-
- // Allow iteration over register numbers (keys) in the map. If needed, we
- // can provide an iterator over SUnits (values) as well.
- const_iterator reg_begin() const { return PhysRegSet.begin(); }
- const_iterator reg_end() const { return PhysRegSet.end(); }
-
- /// Initialize the map with the number of registers.
- /// If the map is already large enough, no allocation occurs.
- /// For simplicity we expect the map to be empty().
- void setRegLimit(unsigned Limit);
-
- /// Returns true if the map is empty.
- bool empty() const { return PhysRegSet.empty(); }
-
- /// Clear the map without deallocating storage.
- void clear();
-
- bool contains(unsigned Reg) const { return PhysRegSet.count(Reg); }
-
- /// If this register is mapped, return its existing SUnits vector.
- /// Otherwise map the register and return an empty SUnits vector.
- std::vector<SUnit *> &operator[](unsigned Reg) {
- bool New = PhysRegSet.insert(Reg).second;
- assert((!New || SUnits[Reg].empty()) && "stale SUnits vector");
- (void)New;
- return SUnits[Reg];
- }
-
- /// Erase an existing element without freeing memory.
- void erase(unsigned Reg) {
- PhysRegSet.erase(Reg);
- SUnits[Reg].clear();
- }
- };
- /// Defs, Uses - Remember where defs and uses of each register are as we
- /// iterate upward through the instructions. This is allocated here instead
- /// of inside BuildSchedGraph to avoid the need for it to be initialized and
- /// destructed for each block.
- Reg2SUnitsMap Defs;
- Reg2SUnitsMap Uses;
-
- /// An individual mapping from virtual register number to SUnit.
- struct VReg2SUnit {
- unsigned VirtReg;
- SUnit *SU;
-
- VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {}
-
- unsigned getSparseSetKey() const {
- return TargetRegisterInfo::virtReg2Index(VirtReg);
- }
- };
- /// Use SparseSet as a SparseMap by relying on the fact that it never
- /// compares ValueT's, only unsigned keys. This allows the set to be cleared
- /// between scheduling regions in constant time as long as ValueT does not
- /// require a destructor.
- typedef SparseSet<VReg2SUnit> VReg2SUnitMap;
- /// Track the last instructon in this region defining each virtual register.
- VReg2SUnitMap VRegDefs;
-
- /// PendingLoads - Remember where unknown loads are after the most recent
- /// unknown store, as we iterate. As with Defs and Uses, this is here
- /// to minimize construction/destruction.
- std::vector<SUnit *> PendingLoads;
-
- /// LoopRegs - Track which registers are used for loop-carried dependencies.
- ///
- LoopDependencies LoopRegs;
-
- protected:
-
- /// DbgValues - Remember instruction that preceeds DBG_VALUE.
- typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
- DbgValueVector;
- DbgValueVector DbgValues;
- MachineInstr *FirstDbgValue;
-
- public:
- MachineBasicBlock::iterator Begin; // The beginning of the range to
- // be scheduled. The range extends
- // to InsertPos.
- unsigned InsertPosIndex; // The index in BB of InsertPos.
-
- explicit ScheduleDAGInstrs(MachineFunction &mf,
- const MachineLoopInfo &mli,
- const MachineDominatorTree &mdt,
- bool IsPostRAFlag,
- LiveIntervals *LIS = 0);
-
- virtual ~ScheduleDAGInstrs() {}
-
- /// NewSUnit - Creates a new SUnit and return a ptr to it.
- ///
- SUnit *NewSUnit(MachineInstr *MI) {
-#ifndef NDEBUG
- const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
-#endif
- SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
- assert((Addr == 0 || Addr == &SUnits[0]) &&
- "SUnits std::vector reallocated on the fly!");
- SUnits.back().OrigNode = &SUnits.back();
- return &SUnits.back();
- }
-
-
- /// Run - perform scheduling.
- ///
- void Run(MachineBasicBlock *bb,
- MachineBasicBlock::iterator begin,
- MachineBasicBlock::iterator end,
- unsigned endindex);
-
- /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
- /// input.
- virtual void BuildSchedGraph(AliasAnalysis *AA);
-
- /// AddSchedBarrierDeps - Add dependencies from instructions in the current
- /// list of instructions being scheduled to scheduling barrier. We want to
- /// make sure instructions which define registers that are either used by
- /// the terminator or are live-out are properly scheduled. This is
- /// especially important when the definition latency of the return value(s)
- /// are too high to be hidden by the branch or when the liveout registers
- /// used by instructions in the fallthrough block.
- void AddSchedBarrierDeps();
-
- /// ComputeLatency - Compute node latency.
- ///
- virtual void ComputeLatency(SUnit *SU);
-
- /// ComputeOperandLatency - Override dependence edge latency using
- /// operand use/def information
- ///
- virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
- SDep& dep) const;
-
- virtual MachineBasicBlock *EmitSchedule();
-
- /// StartBlock - Prepare to perform scheduling in the given block.
- ///
- virtual void StartBlock(MachineBasicBlock *BB);
-
- /// Schedule - Order nodes according to selected style, filling
- /// in the Sequence member.
- ///
- virtual void Schedule() = 0;
-
- /// FinishBlock - Clean up after scheduling in the given block.
- ///
- virtual void FinishBlock();
-
- virtual void dumpNode(const SUnit *SU) const;
-
- virtual std::string getGraphNodeLabel(const SUnit *SU) const;
-
- protected:
- SUnit *getSUnit(MachineInstr *MI) const {
- DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI);
- if (I == MISUnitMap.end())
- return 0;
- return I->second;
- }
-
- void initSUnits();
- void addPhysRegDataDeps(SUnit *SU, const MachineOperand &MO);
- void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
- void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
- void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
-
- VReg2SUnitMap::iterator findVRegDef(unsigned VirtReg) {
- return VRegDefs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
- }
- };
-}
-
-#endif
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 4251583..38feee9 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -41,12 +41,12 @@ namespace llvm {
static bool renderGraphFromBottomUp() {
return true;
}
-
+
static bool hasNodeAddressLabel(const SUnit *Node,
const ScheduleDAG *Graph) {
return true;
}
-
+
/// If you want to override the dot attributes printed for a particular
/// edge, override this method.
static std::string getEdgeAttributes(const SUnit *Node,
@@ -58,7 +58,7 @@ namespace llvm {
return "color=blue,style=dashed";
return "";
}
-
+
std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
static std::string getNodeAttributes(const SUnit *N,
@@ -81,18 +81,17 @@ std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
/// rendered using 'dot'.
///
-void ScheduleDAG::viewGraph() {
-// This code is only for debugging!
+void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) {
+ // This code is only for debugging!
#ifndef NDEBUG
- if (BB->getBasicBlock())
- ViewGraph(this, "dag." + MF.getFunction()->getName(), false,
- "Scheduling-Units Graph for " + MF.getFunction()->getName() +
- ":" + BB->getBasicBlock()->getName());
- else
- ViewGraph(this, "dag." + MF.getFunction()->getName(), false,
- "Scheduling-Units Graph for " + MF.getFunction()->getName());
+ ViewGraph(this, Name, false, Title);
#else
errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
#endif // NDEBUG
}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAG::viewGraph() {
+ viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 9a79217..a6bdc3b 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_library(LLVMSelectionDAG
ScheduleDAGSDNodes.cpp
SelectionDAG.cpp
SelectionDAGBuilder.cpp
+ SelectionDAGDumper.cpp
SelectionDAGISel.cpp
SelectionDAGPrinter.cpp
ScheduleDAGVLIW.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1b148ad..7c4db97 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -80,7 +80,7 @@ namespace {
// visit, we pop off the order stack until we find an item that is
// also in the contents set. All operations are O(log N).
SmallPtrSet<SDNode*, 64> WorkListContents;
- std::vector<SDNode*> WorkListOrder;
+ SmallVector<SDNode*, 64> WorkListOrder;
// AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA;
@@ -381,6 +381,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
/// specified expression for the same cost as the expression itself, or 2 if we
/// can compute the negated form more cheaply than the expression itself.
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+ const TargetLowering &TLI,
const TargetOptions *Options,
unsigned Depth = 0) {
// No compile time optimizations on this type.
@@ -406,12 +407,17 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
// FIXME: determine better conditions for this xform.
if (!Options->UnsafeFPMath) return 0;
+ // After operation legalization, it might not be legal to create new FSUBs.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
+ return 0;
+
// fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options,
- Depth + 1))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
return V;
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options,
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
@@ -425,17 +431,17 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
if (Options->HonorSignDependentRoundingFPMath()) return 0;
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options,
- Depth + 1))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
return V;
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options,
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FSIN:
- return isNegatibleForFree(Op.getOperand(0), LegalOperations, Options,
+ return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
Depth + 1);
}
}
@@ -464,6 +470,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(),
&DAG.getTarget().Options, Depth+1))
return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
@@ -493,6 +500,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(),
&DAG.getTarget().Options, Depth+1))
return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
@@ -997,8 +1005,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// worklist *should* contain, and check the node we want to visit is should
// actually be visited.
do {
- N = WorkListOrder.back();
- WorkListOrder.pop_back();
+ N = WorkListOrder.pop_back_val();
} while (!WorkListContents.erase(N));
// If N has no uses, it is dead. Make sure to revisit all N's operands once
@@ -5507,11 +5514,13 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N1CFP->getValueAPF().isZero())
return N0;
// fold (fadd A, (fneg B)) -> (fsub A, B)
- if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options) == 2)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// fold (fadd (fneg A), B) -> (fsub B, A)
- if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options) == 2)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations));
@@ -5549,16 +5558,33 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub 0, B) -> -B
if (DAG.getTarget().Options.UnsafeFPMath &&
N0CFP && N0CFP->getValueAPF().isZero()) {
- if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options))
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options))
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
+ // If 'unsafe math' is enabled, fold
+ // (fsub x, (fadd x, y)) -> (fneg y) &
+ // (fsub x, (fadd y, x)) -> (fneg y)
+ if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (N1.getOpcode() == ISD::FADD) {
+ SDValue N10 = N1->getOperand(0);
+ SDValue N11 = N1->getOperand(1);
+
+ if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N11, DAG, LegalOperations);
+ else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N10, DAG, LegalOperations);
+ }
+ }
+
return SDValue();
}
@@ -5568,6 +5594,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// fold vector ops
if (VT.isVector()) {
@@ -5598,9 +5625,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations,
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
&DAG.getTarget().Options)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations,
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
&DAG.getTarget().Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
@@ -5628,6 +5655,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// fold vector ops
if (VT.isVector()) {
@@ -5641,9 +5669,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations,
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
&DAG.getTarget().Options)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations,
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
&DAG.getTarget().Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
@@ -5897,7 +5925,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options))
+ if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options))
return GetNegatedExpression(N0, DAG, LegalOperations);
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
@@ -6129,8 +6158,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
/// uses N as its base pointer and that N may be folded in the load / store
-/// addressing mode. FIXME: This currently only looks for folding of
-/// [reg +/- imm] addressing modes.
+/// addressing mode.
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
SelectionDAG &DAG,
const TargetLowering &TLI) {
@@ -6150,15 +6178,19 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
if (N->getOpcode() == ISD::ADD) {
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (Offset)
+ // [reg +/- imm]
AM.BaseOffs = Offset->getSExtValue();
else
- return false;
+ // [reg +/- reg]
+ AM.Scale = 1;
} else if (N->getOpcode() == ISD::SUB) {
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (Offset)
+ // [reg +/- imm]
AM.BaseOffs = -Offset->getSExtValue();
else
- return false;
+ // [reg +/- reg]
+ AM.Scale = 1;
} else
return false;
@@ -7187,6 +7219,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ return SDValue();
+
if (InVec.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
@@ -7287,17 +7324,36 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// Note that this replacement assumes that the extractvalue is the only
// use of the load; that's okay because we don't want to perform this
// transformation in other cases anyway.
- SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), Align);
+ SDValue Load;
+ SDValue Chain;
+ if (NVT.bitsGT(LVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
+ ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(),
+ NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
+ LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align);
+ Chain = Load.getValue(1);
+ } else {
+ Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), Align);
+ Chain = Load.getValue(1);
+ if (NVT.bitsLT(LVT))
+ Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load);
+ else
+ Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load);
+ }
WorkListRemover DeadNodes(*this);
SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
- SDValue To[] = { Load.getValue(0), Load.getValue(1) };
+ SDValue To[] = { Load, Chain };
DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes);
// Since we're explcitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
AddToWorkList(Load.getNode());
+ AddUsersToWorkList(Load.getNode()); // Add users too
// Make sure to revisit this node to clean it up; it will usually be dead.
AddToWorkList(N);
return SDValue(N, 0);
@@ -7367,6 +7423,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// will be type-legalized to complex code sequences.
// We perform this optimization only before the operation legalizer because we
// may introduce illegal operations.
+ // Create a new simpler BUILD_VECTOR sequence which other optimizations can
+ // turn into a single shuffle instruction.
if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
ValidTypes) {
bool isLE = TLI.isLittleEndian();
@@ -7407,6 +7465,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
VecVT, &Ops[0], Ops.size());
+ // The new BUILD_VECTOR node has the potential to be further optimized.
+ AddToWorkList(BV.getNode());
// Bitcast to the desired type.
return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
}
@@ -7414,6 +7474,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
// at most two distinct vectors, turn this into a shuffle node.
+
+ // May only combine to shuffle after legalize if shuffle is legal.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
+ return SDValue();
+
SDValue VecIn1, VecIn2;
for (unsigned i = 0; i != NumInScalars; ++i) {
// Ignore undef inputs.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index fd8ce78..9f4a44a 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -136,15 +136,8 @@ unsigned FastISel::getRegForValue(const Value *V) {
return 0;
}
- // Look up the value to see if we already have a register for it. We
- // cache values defined by Instructions across blocks, and other values
- // only locally. This is because Instructions already have the SSA
- // def-dominates-use requirement enforced.
- DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
- if (I != FuncInfo.ValueMap.end())
- return I->second;
-
- unsigned Reg = LocalValueMap[V];
+ // Look up the value to see if we already have a register for it.
+ unsigned Reg = lookUpRegForValue(V);
if (Reg != 0)
return Reg;
@@ -199,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
uint32_t IntBitWidth = IntVT.getSizeInBits();
bool isExact;
(void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
- APFloat::rmTowardZero, &isExact);
+ APFloat::rmTowardZero, &isExact);
if (isExact) {
APInt IntVal(IntBitWidth, x);
@@ -577,12 +570,16 @@ bool FastISel::SelectCall(const User *I) {
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
if (!DIVariable(DI->getVariable()).Verify() ||
- !FuncInfo.MF->getMMI().hasDebugInfo())
+ !FuncInfo.MF->getMMI().hasDebugInfo()) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
+ }
const Value *Address = DI->getAddress();
- if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
+ if (!Address || isa<UndefValue>(Address)) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
+ }
unsigned Reg = 0;
unsigned Offset = 0;
@@ -590,16 +587,25 @@ bool FastISel::SelectCall(const User *I) {
// Some arguments' frame index is recorded during argument lowering.
Offset = FuncInfo.getArgumentFrameIndex(Arg);
if (Offset)
- Reg = TRI.getFrameRegister(*FuncInfo.MF);
+ Reg = TRI.getFrameRegister(*FuncInfo.MF);
}
if (!Reg)
- Reg = getRegForValue(Address);
+ Reg = lookUpRegForValue(Address);
+
+ if (!Reg && isa<Instruction>(Address) &&
+ (!isa<AllocaInst>(Address) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
+ Reg = FuncInfo.InitializeRegForValue(Address);
if (Reg)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::DBG_VALUE))
.addReg(Reg, RegState::Debug).addImm(Offset)
.addMetadata(DI->getVariable());
+ else
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return true;
}
case Intrinsic::dbg_value: {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 31df458..1b84b13 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3032,6 +3032,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Results[0].getValue(1));
break;
}
+ case ISD::FSUB: {
+ EVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+ "Don't know how to expand this FP subtraction!");
+ Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
case ISD::SUB: {
EVT VT = Node->getValueType(0);
assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
@@ -3590,10 +3600,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp1, Tmp2, Node->getOperand(2)));
break;
}
+ case ISD::FDIV:
case ISD::FPOW: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
- Tmp3 = DAG.getNode(ISD::FPOW, dl, NVT, Tmp1, Tmp2);
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp3, DAG.getIntPtrConstant(0)));
break;
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 1a27f3f..ff0136e 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -470,7 +470,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
/// Main resource tracking point.
-void ResourcePriorityQueue::ScheduledNode(SUnit *SU) {
+void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
// Use NULL entry as an event marker to reset
// the DFA state.
if (!SU) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 34ee1f3..24da432 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -43,7 +43,7 @@ namespace {
SmallVector<SUnit *, 16> Queue;
bool empty() const { return Queue.empty(); }
-
+
void push(SUnit *U) {
Queue.push_back(U);
}
@@ -101,8 +101,8 @@ private:
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
void ListScheduleBottomUp();
- /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
- bool ForceUnitLatencies() const { return true; }
+ /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool forceUnitLatencies() const { return true; }
};
} // end anonymous namespace
@@ -112,7 +112,7 @@ void ScheduleDAGFast::Schedule() {
DEBUG(dbgs() << "********** List Scheduling **********\n");
NumLiveRegs = 0;
- LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
LiveRegCycles.resize(TRI->getNumRegs(), 0);
// Build the scheduling graph.
@@ -159,7 +159,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
ReleasePred(SU, &*I);
if (I->isAssignedRegDep()) {
// This is a physical register dependency and it's impossible or
- // expensive to copy the register. Make sure nothing that can
+ // expensive to copy the register. Make sure nothing that can
// clobber the register is scheduled between the predecessor and
// this node.
if (!LiveRegDefs[I->getReg()]) {
@@ -245,10 +245,10 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
SDValue(LoadNode, 1));
- SUnit *NewSU = NewSUnit(N);
+ SUnit *NewSU = newSUnit(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NewSU->NodeNum);
-
+
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
@@ -268,7 +268,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
LoadSU = &SUnits[LoadNode->getNodeId()];
isNewLoad = false;
} else {
- LoadSU = NewSUnit(LoadNode);
+ LoadSU = newSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
}
@@ -329,7 +329,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
D.setSUnit(LoadSU);
AddPred(SuccDep, D);
}
- }
+ }
if (isNewLoad) {
AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
}
@@ -381,11 +381,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC,
SmallVector<SUnit*, 2> &Copies) {
- SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+ SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL));
CopyFromSU->CopySrcRC = SrcRC;
CopyFromSU->CopyDstRC = DestRC;
- SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+ SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL));
CopyToSU->CopySrcRC = DestRC;
CopyToSU->CopyDstRC = SrcRC;
@@ -425,7 +425,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
unsigned NumRes = MCID.getNumDefs();
- for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -508,7 +508,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) {
+ for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
}
@@ -630,7 +630,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
std::reverse(Sequence.begin(), Sequence.end());
#ifndef NDEBUG
- VerifySchedule(/*isBottomUp=*/true);
+ VerifyScheduledSequence(/*isBottomUp=*/true);
#endif
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 1017d36..f44adfc 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -232,7 +232,7 @@ private:
/// Updates the topological ordering if required.
SUnit *CreateNewSUnit(SDNode *N) {
unsigned NumSUnits = SUnits.size();
- SUnit *NewNode = NewSUnit(N);
+ SUnit *NewNode = newSUnit(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
Topo.InitDAGTopologicalSorting();
@@ -250,9 +250,9 @@ private:
return NewNode;
}
- /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
+ /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
/// need actual latency information but the hybrid scheduler does.
- bool ForceUnitLatencies() const {
+ bool forceUnitLatencies() const {
return !NeedLatency;
}
};
@@ -327,6 +327,12 @@ void ScheduleDAGRRList::Schedule() {
ListScheduleBottomUp();
AvailableQueue->releaseState();
+
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
}
//===----------------------------------------------------------------------===//
@@ -348,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
#endif
--PredSU->NumSuccsLeft;
- if (!ForceUnitLatencies()) {
+ if (!forceUnitLatencies()) {
// Updating predecessor's height. This is now the cycle when the
// predecessor can be scheduled without causing a pipeline stall.
PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
@@ -695,7 +701,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
Sequence.push_back(SU);
- AvailableQueue->ScheduledNode(SU);
+ AvailableQueue->scheduledNode(SU);
// If HazardRec is disabled, and each inst counts as one cycle, then
// advance CurCycle before ReleasePredecessors to avoid useless pushes to
@@ -842,7 +848,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
else {
AvailableQueue->push(SU);
}
- AvailableQueue->UnscheduledNode(SU);
+ AvailableQueue->unscheduledNode(SU);
}
/// After backtracking, the hazard checker needs to be restored to a state
@@ -963,7 +969,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
LoadNode->setNodeId(LoadSU->NodeNum);
InitNumRegDefsLeft(LoadSU);
- ComputeLatency(LoadSU);
+ computeLatency(LoadSU);
}
SUnit *NewSU = CreateNewSUnit(N);
@@ -981,7 +987,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
NewSU->isCommutable = true;
InitNumRegDefsLeft(NewSU);
- ComputeLatency(NewSU);
+ computeLatency(NewSU);
// Record all the edges to and from the old SU, by category.
SmallVector<SDep, 4> ChainPreds;
@@ -1160,7 +1166,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
unsigned NumRes = MCID.getNumDefs();
- for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -1286,7 +1292,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
+ for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
@@ -1475,7 +1481,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
std::reverse(Sequence.begin(), Sequence.end());
#ifndef NDEBUG
- VerifySchedule(/*isBottomUp=*/true);
+ VerifyScheduledSequence(/*isBottomUp=*/true);
#endif
}
@@ -1681,9 +1687,9 @@ public:
int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
- void ScheduledNode(SUnit *SU);
+ void scheduledNode(SUnit *SU);
- void UnscheduledNode(SUnit *SU);
+ void unscheduledNode(SUnit *SU);
protected:
bool canClobber(const SUnit *SU, const SUnit *Op);
@@ -1984,7 +1990,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
return PDiff;
}
-void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+void RegReductionPQBase::scheduledNode(SUnit *SU) {
if (!TracksRegPressure)
return;
@@ -2053,7 +2059,7 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
dumpRegPressure();
}
-void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+void RegReductionPQBase::unscheduledNode(SUnit *SU) {
if (!TracksRegPressure)
return;
@@ -2661,7 +2667,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
ScheduleDAGRRList *scheduleDAG,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- const unsigned *ImpDefs
+ const uint16_t *ImpDefs
= TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
const uint32_t *RegMask = getNodeRegMask(SU->getNode());
if(!ImpDefs && !RegMask)
@@ -2680,7 +2686,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
return true;
if (ImpDefs)
- for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+ for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
// Return true if SU clobbers this physical register use and the
// definition of the register reaches from DepSU. IsReachable queries
// a topological forward sort of the DAG (following the successors).
@@ -2699,13 +2705,13 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
const TargetRegisterInfo *TRI) {
SDNode *N = SuccSU->getNode();
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
assert(ImpDefs && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (!SUNode->isMachineOpcode())
continue;
- const unsigned *SUImpDefs =
+ const uint16_t *SUImpDefs =
TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
const uint32_t *SURegMask = getNodeRegMask(SUNode);
if (!SUImpDefs && !SURegMask)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 71f07d6..69dd813 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -17,6 +17,8 @@
#include "ScheduleDAGSDNodes.h"
#include "InstrEmitter.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -44,20 +46,26 @@ static cl::opt<int> HighLatencyCycles(
"instructions take for targets with no itinerary"));
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
- : ScheduleDAG(mf),
+ : ScheduleDAG(mf), BB(0), DAG(0),
InstrItins(mf.getTarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
///
-void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
- MachineBasicBlock::iterator insertPos) {
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
+ BB = bb;
DAG = dag;
- ScheduleDAG::Run(bb, insertPos);
+
+ // Clear the scheduler's SUnit DAG.
+ ScheduleDAG::clearDAG();
+ Sequence.clear();
+
+ // Invoke the target's selection of scheduler.
+ Schedule();
}
/// NewSUnit - Creates a new SUnit and return a ptr to it.
///
-SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
#ifndef NDEBUG
const SUnit *Addr = 0;
if (!SUnits.empty())
@@ -79,7 +87,7 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
}
SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
- SUnit *SU = NewSUnit(Old->getNode());
+ SUnit *SU = newSUnit(Old->getNode());
SU->OrigNode = Old->OrigNode;
SU->Latency = Old->Latency;
SU->isVRegCycle = Old->isVRegCycle;
@@ -302,7 +310,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// If this node has already been processed, stop now.
if (NI->getNodeId() != -1) continue;
- SUnit *NodeSUnit = NewSUnit(NI);
+ SUnit *NodeSUnit = newSUnit(NI);
// See if anything is glued to this node, if so, add them to glued
// nodes. Nodes can have at most one glue input and one glue output. Glue
@@ -360,7 +368,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
InitNumRegDefsLeft(NodeSUnit);
// Assign the Latency field of NodeSUnit using target-provided information.
- ComputeLatency(NodeSUnit);
+ computeLatency(NodeSUnit);
}
// Find all call operands.
@@ -382,7 +390,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
// Check to see if the scheduler cares about latencies.
- bool UnitLatencies = ForceUnitLatencies();
+ bool UnitLatencies = forceUnitLatencies();
// Pass 2: add the preds, succs, etc.
for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
@@ -448,7 +456,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
OpLatency, PhysReg);
if (!isChain && !UnitLatencies) {
- ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
+ computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
@@ -541,7 +549,7 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
}
}
-void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
SDNode *N = SU->getNode();
// TokenFactor operands are considered zero latency, and some schedulers
@@ -553,7 +561,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
}
// Check to see if the scheduler cares about latencies.
- if (ForceUnitLatencies()) {
+ if (forceUnitLatencies()) {
SU->Latency = 1;
return;
}
@@ -575,10 +583,10 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
SU->Latency += TII->getInstrLatency(InstrItins, N);
}
-void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
unsigned OpIdx, SDep& dep) const{
// Check to see if the scheduler cares about latencies.
- if (ForceUnitLatencies())
+ if (forceUnitLatencies())
return;
if (dep.getKind() != SDep::Data)
@@ -621,6 +629,30 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
}
}
+void ScheduleDAGSDNodes::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+
+#ifndef NDEBUG
+/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
+/// their state is consistent with the nodes listed in Sequence.
+///
+void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
+ unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif // NDEBUG
+
namespace {
struct OrderSorter {
bool operator()(const std::pair<unsigned, MachineInstr*> &A,
@@ -686,9 +718,48 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
}
+void ScheduleDAGSDNodes::
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos) {
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->CopyDstRC) {
+ // Copy to physical register.
+ DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+ // Find the destination physical register.
+ unsigned Reg = 0;
+ for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+ EE = SU->Succs.end(); II != EE; ++II) {
+ if (II->isCtrl()) continue; // ignore chain preds
+ if (II->getReg()) {
+ Reg = II->getReg();
+ break;
+ }
+ }
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(VRI->second);
+ } else {
+ // Copy from physical register.
+ assert(I->getReg() && "Unknown physical register!");
+ unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+ .addReg(I->getReg());
+ }
+ break;
+ }
+}
-/// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
+/// EmitSchedule - Emit the machine code in scheduled order. Return the new
+/// InsertPos and MachineBasicBlock that contains this insertion
+/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
+/// not necessarily refer to returned BB. The emitter may split blocks.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
InstrEmitter Emitter(BB, InsertPos);
DenseMap<SDValue, unsigned> VRBaseMap;
DenseMap<SUnit*, unsigned> CopyVRBaseMap;
@@ -711,7 +782,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
SUnit *SU = Sequence[i];
if (!SU) {
// Null SUnit* is a noop.
- EmitNoop();
+ TII->insertNoop(*Emitter.getBlock(), InsertPos);
continue;
}
@@ -719,7 +790,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
// SDNode and any glued SDNodes and append them to the block.
if (!SU->getNode()) {
// Emit a copy.
- EmitPhysRegCopy(SU, CopyVRBaseMap);
+ EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
continue;
}
@@ -784,19 +855,24 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
}
// Add trailing DbgValue's before the terminator. FIXME: May want to add
// some of them before one or more conditional branches?
+ SmallVector<MachineInstr*, 8> DbgMIs;
while (DI != DE) {
- MachineBasicBlock *InsertBB = Emitter.getBlock();
- MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
- if (!(*DI)->isInvalidated()) {
- MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
- if (DbgMI)
- InsertBB->insert(Pos, DbgMI);
- }
+ if (!(*DI)->isInvalidated())
+ if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+ DbgMIs.push_back(DbgMI);
++DI;
}
+
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
+ InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
}
- BB = Emitter.getBlock();
InsertPos = Emitter.getInsertPos();
- return BB;
+ return Emitter.getBlock();
+}
+
+/// Return the basic block label.
+std::string ScheduleDAGSDNodes::getDAGName() const {
+ return "sunit-dag." + BB->getFullName();
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 17b4901..75940ec 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -35,17 +35,20 @@ namespace llvm {
///
class ScheduleDAGSDNodes : public ScheduleDAG {
public:
+ MachineBasicBlock *BB;
SelectionDAG *DAG; // DAG of the current basic block
const InstrItineraryData *InstrItins;
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
explicit ScheduleDAGSDNodes(MachineFunction &mf);
virtual ~ScheduleDAGSDNodes() {}
/// Run - perform scheduling.
///
- void Run(SelectionDAG *dag, MachineBasicBlock *bb,
- MachineBasicBlock::iterator insertPos);
+ void Run(SelectionDAG *dag, MachineBasicBlock *bb);
/// isPassiveNode - Return true if the node is a non-scheduled leaf.
///
@@ -68,7 +71,7 @@ namespace llvm {
/// NewSUnit - Creates a new SUnit and return a ptr to it.
///
- SUnit *NewSUnit(SDNode *N);
+ SUnit *newSUnit(SDNode *N);
/// Clone - Creates a clone of the specified SUnit. It does not copy the
/// predecessors / successors info nor the temporary scheduling states.
@@ -79,7 +82,7 @@ namespace llvm {
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
- virtual void BuildSchedGraph(AliasAnalysis *AA);
+ void BuildSchedGraph(AliasAnalysis *AA);
/// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
/// CopyToReg and its only active data operands are CopyFromReg within a
@@ -91,30 +94,41 @@ namespace llvm {
///
void InitNumRegDefsLeft(SUnit *SU);
- /// ComputeLatency - Compute node latency.
+ /// computeLatency - Compute node latency.
///
- virtual void ComputeLatency(SUnit *SU);
+ virtual void computeLatency(SUnit *SU);
- /// ComputeOperandLatency - Override dependence edge latency using
+ /// computeOperandLatency - Override dependence edge latency using
/// operand use/def information
///
- virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+ virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
SDep& dep) const { }
- virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use,
+ virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
unsigned OpIdx, SDep& dep) const;
- virtual MachineBasicBlock *EmitSchedule();
-
/// Schedule - Order nodes according to selected style, filling
/// in the Sequence member.
///
virtual void Schedule() = 0;
+ /// VerifyScheduledSequence - Verify that all SUnits are scheduled and
+ /// consistent with the Sequence of scheduled instructions.
+ void VerifyScheduledSequence(bool isBottomUp);
+
+ /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
+ /// according to the order specified in Sequence.
+ ///
+ MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
virtual void dumpNode(const SUnit *SU) const;
+ void dumpSchedule() const;
+
virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+ virtual std::string getDAGName() const;
+
virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
/// RegDefIter - In place iteration over the values defined by an
@@ -160,6 +174,9 @@ namespace llvm {
/// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
void BuildSchedUnits();
void AddSchedEdges();
+
+ void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos);
};
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 7d12509..c851291 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -158,7 +158,7 @@ void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
releaseSuccessors(SU);
SU->isScheduled = true;
- AvailableQueue->ScheduledNode(SU);
+ AvailableQueue->scheduledNode(SU);
}
/// listScheduleTopDown - The main loop of list scheduling for top-down
@@ -202,7 +202,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
// don't advance the hazard recognizer.
if (AvailableQueue->empty()) {
// Reset DFA state.
- AvailableQueue->ScheduledNode(0);
+ AvailableQueue->scheduledNode(0);
++CurCycle;
continue;
}
@@ -261,7 +261,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
}
#ifndef NDEBUG
- VerifySchedule(/*isBottomUp=*/false);
+ VerifyScheduledSequence(/*isBottomUp=*/false);
#endif
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 796abf4..e3a7305 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -124,20 +124,29 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
if (i == e) return false;
// Do not accept build_vectors that aren't all constants or which have non-~0
- // elements.
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target and
+ // a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all ones, not whether the individual
+ // constants are.
SDValue NotZero = N->getOperand(i);
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
if (isa<ConstantSDNode>(NotZero)) {
- if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+ if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() <
+ EltSize)
return false;
} else if (isa<ConstantFPSDNode>(NotZero)) {
- if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
- bitcastToAPInt().isAllOnesValue())
+ if (cast<ConstantFPSDNode>(NotZero)->getValueAPF()
+ .bitcastToAPInt().countTrailingOnes() < EltSize)
return false;
} else
return false;
// Okay, we have at least one ~0 value, check to see if the rest match or are
- // undefs.
+ // undefs. Even with the above element type twiddling, this should be OK, as
+ // the same type legalization should have applied to all the elements.
for (++i; i != e; ++i)
if (N->getOperand(i) != NotZero &&
N->getOperand(i).getOpcode() != ISD::UNDEF)
@@ -5904,571 +5913,6 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
}
-std::string SDNode::getOperationName(const SelectionDAG *G) const {
- switch (getOpcode()) {
- default:
- if (getOpcode() < ISD::BUILTIN_OP_END)
- return "<<Unknown DAG Node>>";
- if (isMachineOpcode()) {
- if (G)
- if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
- if (getMachineOpcode() < TII->getNumOpcodes())
- return TII->getName(getMachineOpcode());
- return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
- }
- if (G) {
- const TargetLowering &TLI = G->getTargetLoweringInfo();
- const char *Name = TLI.getTargetNodeName(getOpcode());
- if (Name) return Name;
- return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
- }
- return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
-
-#ifndef NDEBUG
- case ISD::DELETED_NODE:
- return "<<Deleted Node!>>";
-#endif
- case ISD::PREFETCH: return "Prefetch";
- case ISD::MEMBARRIER: return "MemBarrier";
- case ISD::ATOMIC_FENCE: return "AtomicFence";
- case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
- case ISD::ATOMIC_SWAP: return "AtomicSwap";
- case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
- case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
- case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
- case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
- case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
- case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
- case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
- case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
- case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
- case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
- case ISD::ATOMIC_LOAD: return "AtomicLoad";
- case ISD::ATOMIC_STORE: return "AtomicStore";
- case ISD::PCMARKER: return "PCMarker";
- case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
- case ISD::SRCVALUE: return "SrcValue";
- case ISD::MDNODE_SDNODE: return "MDNode";
- case ISD::EntryToken: return "EntryToken";
- case ISD::TokenFactor: return "TokenFactor";
- case ISD::AssertSext: return "AssertSext";
- case ISD::AssertZext: return "AssertZext";
-
- case ISD::BasicBlock: return "BasicBlock";
- case ISD::VALUETYPE: return "ValueType";
- case ISD::Register: return "Register";
- case ISD::RegisterMask: return "RegisterMask";
- case ISD::Constant: return "Constant";
- case ISD::ConstantFP: return "ConstantFP";
- case ISD::GlobalAddress: return "GlobalAddress";
- case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
- case ISD::FrameIndex: return "FrameIndex";
- case ISD::JumpTable: return "JumpTable";
- case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
- case ISD::RETURNADDR: return "RETURNADDR";
- case ISD::FRAMEADDR: return "FRAMEADDR";
- case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
- case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
- case ISD::LSDAADDR: return "LSDAADDR";
- case ISD::EHSELECTION: return "EHSELECTION";
- case ISD::EH_RETURN: return "EH_RETURN";
- case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
- case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
- case ISD::ConstantPool: return "ConstantPool";
- case ISD::ExternalSymbol: return "ExternalSymbol";
- case ISD::BlockAddress: return "BlockAddress";
- case ISD::INTRINSIC_WO_CHAIN:
- case ISD::INTRINSIC_VOID:
- case ISD::INTRINSIC_W_CHAIN: {
- unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
- unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
- if (IID < Intrinsic::num_intrinsics)
- return Intrinsic::getName((Intrinsic::ID)IID);
- else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
- return TII->getName(IID);
- llvm_unreachable("Invalid intrinsic ID");
- }
-
- case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
- case ISD::TargetConstant: return "TargetConstant";
- case ISD::TargetConstantFP:return "TargetConstantFP";
- case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
- case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
- case ISD::TargetFrameIndex: return "TargetFrameIndex";
- case ISD::TargetJumpTable: return "TargetJumpTable";
- case ISD::TargetConstantPool: return "TargetConstantPool";
- case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
- case ISD::TargetBlockAddress: return "TargetBlockAddress";
-
- case ISD::CopyToReg: return "CopyToReg";
- case ISD::CopyFromReg: return "CopyFromReg";
- case ISD::UNDEF: return "undef";
- case ISD::MERGE_VALUES: return "merge_values";
- case ISD::INLINEASM: return "inlineasm";
- case ISD::EH_LABEL: return "eh_label";
- case ISD::HANDLENODE: return "handlenode";
-
- // Unary operators
- case ISD::FABS: return "fabs";
- case ISD::FNEG: return "fneg";
- case ISD::FSQRT: return "fsqrt";
- case ISD::FSIN: return "fsin";
- case ISD::FCOS: return "fcos";
- case ISD::FTRUNC: return "ftrunc";
- case ISD::FFLOOR: return "ffloor";
- case ISD::FCEIL: return "fceil";
- case ISD::FRINT: return "frint";
- case ISD::FNEARBYINT: return "fnearbyint";
- case ISD::FEXP: return "fexp";
- case ISD::FEXP2: return "fexp2";
- case ISD::FLOG: return "flog";
- case ISD::FLOG2: return "flog2";
- case ISD::FLOG10: return "flog10";
-
- // Binary operators
- case ISD::ADD: return "add";
- case ISD::SUB: return "sub";
- case ISD::MUL: return "mul";
- case ISD::MULHU: return "mulhu";
- case ISD::MULHS: return "mulhs";
- case ISD::SDIV: return "sdiv";
- case ISD::UDIV: return "udiv";
- case ISD::SREM: return "srem";
- case ISD::UREM: return "urem";
- case ISD::SMUL_LOHI: return "smul_lohi";
- case ISD::UMUL_LOHI: return "umul_lohi";
- case ISD::SDIVREM: return "sdivrem";
- case ISD::UDIVREM: return "udivrem";
- case ISD::AND: return "and";
- case ISD::OR: return "or";
- case ISD::XOR: return "xor";
- case ISD::SHL: return "shl";
- case ISD::SRA: return "sra";
- case ISD::SRL: return "srl";
- case ISD::ROTL: return "rotl";
- case ISD::ROTR: return "rotr";
- case ISD::FADD: return "fadd";
- case ISD::FSUB: return "fsub";
- case ISD::FMUL: return "fmul";
- case ISD::FDIV: return "fdiv";
- case ISD::FMA: return "fma";
- case ISD::FREM: return "frem";
- case ISD::FCOPYSIGN: return "fcopysign";
- case ISD::FGETSIGN: return "fgetsign";
- case ISD::FPOW: return "fpow";
-
- case ISD::FPOWI: return "fpowi";
- case ISD::SETCC: return "setcc";
- case ISD::SELECT: return "select";
- case ISD::VSELECT: return "vselect";
- case ISD::SELECT_CC: return "select_cc";
- case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
- case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
- case ISD::CONCAT_VECTORS: return "concat_vectors";
- case ISD::INSERT_SUBVECTOR: return "insert_subvector";
- case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
- case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
- case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
- case ISD::CARRY_FALSE: return "carry_false";
- case ISD::ADDC: return "addc";
- case ISD::ADDE: return "adde";
- case ISD::SADDO: return "saddo";
- case ISD::UADDO: return "uaddo";
- case ISD::SSUBO: return "ssubo";
- case ISD::USUBO: return "usubo";
- case ISD::SMULO: return "smulo";
- case ISD::UMULO: return "umulo";
- case ISD::SUBC: return "subc";
- case ISD::SUBE: return "sube";
- case ISD::SHL_PARTS: return "shl_parts";
- case ISD::SRA_PARTS: return "sra_parts";
- case ISD::SRL_PARTS: return "srl_parts";
-
- // Conversion operators.
- case ISD::SIGN_EXTEND: return "sign_extend";
- case ISD::ZERO_EXTEND: return "zero_extend";
- case ISD::ANY_EXTEND: return "any_extend";
- case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
- case ISD::TRUNCATE: return "truncate";
- case ISD::FP_ROUND: return "fp_round";
- case ISD::FLT_ROUNDS_: return "flt_rounds";
- case ISD::FP_ROUND_INREG: return "fp_round_inreg";
- case ISD::FP_EXTEND: return "fp_extend";
-
- case ISD::SINT_TO_FP: return "sint_to_fp";
- case ISD::UINT_TO_FP: return "uint_to_fp";
- case ISD::FP_TO_SINT: return "fp_to_sint";
- case ISD::FP_TO_UINT: return "fp_to_uint";
- case ISD::BITCAST: return "bitcast";
- case ISD::FP16_TO_FP32: return "fp16_to_fp32";
- case ISD::FP32_TO_FP16: return "fp32_to_fp16";
-
- case ISD::CONVERT_RNDSAT: {
- switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
- default: llvm_unreachable("Unknown cvt code!");
- case ISD::CVT_FF: return "cvt_ff";
- case ISD::CVT_FS: return "cvt_fs";
- case ISD::CVT_FU: return "cvt_fu";
- case ISD::CVT_SF: return "cvt_sf";
- case ISD::CVT_UF: return "cvt_uf";
- case ISD::CVT_SS: return "cvt_ss";
- case ISD::CVT_SU: return "cvt_su";
- case ISD::CVT_US: return "cvt_us";
- case ISD::CVT_UU: return "cvt_uu";
- }
- }
-
- // Control flow instructions
- case ISD::BR: return "br";
- case ISD::BRIND: return "brind";
- case ISD::BR_JT: return "br_jt";
- case ISD::BRCOND: return "brcond";
- case ISD::BR_CC: return "br_cc";
- case ISD::CALLSEQ_START: return "callseq_start";
- case ISD::CALLSEQ_END: return "callseq_end";
-
- // Other operators
- case ISD::LOAD: return "load";
- case ISD::STORE: return "store";
- case ISD::VAARG: return "vaarg";
- case ISD::VACOPY: return "vacopy";
- case ISD::VAEND: return "vaend";
- case ISD::VASTART: return "vastart";
- case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
- case ISD::EXTRACT_ELEMENT: return "extract_element";
- case ISD::BUILD_PAIR: return "build_pair";
- case ISD::STACKSAVE: return "stacksave";
- case ISD::STACKRESTORE: return "stackrestore";
- case ISD::TRAP: return "trap";
-
- // Bit manipulation
- case ISD::BSWAP: return "bswap";
- case ISD::CTPOP: return "ctpop";
- case ISD::CTTZ: return "cttz";
- case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
- case ISD::CTLZ: return "ctlz";
- case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
-
- // Trampolines
- case ISD::INIT_TRAMPOLINE: return "init_trampoline";
- case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
-
- case ISD::CONDCODE:
- switch (cast<CondCodeSDNode>(this)->get()) {
- default: llvm_unreachable("Unknown setcc condition!");
- case ISD::SETOEQ: return "setoeq";
- case ISD::SETOGT: return "setogt";
- case ISD::SETOGE: return "setoge";
- case ISD::SETOLT: return "setolt";
- case ISD::SETOLE: return "setole";
- case ISD::SETONE: return "setone";
-
- case ISD::SETO: return "seto";
- case ISD::SETUO: return "setuo";
- case ISD::SETUEQ: return "setue";
- case ISD::SETUGT: return "setugt";
- case ISD::SETUGE: return "setuge";
- case ISD::SETULT: return "setult";
- case ISD::SETULE: return "setule";
- case ISD::SETUNE: return "setune";
-
- case ISD::SETEQ: return "seteq";
- case ISD::SETGT: return "setgt";
- case ISD::SETGE: return "setge";
- case ISD::SETLT: return "setlt";
- case ISD::SETLE: return "setle";
- case ISD::SETNE: return "setne";
-
- case ISD::SETTRUE: return "settrue";
- case ISD::SETTRUE2: return "settrue2";
- case ISD::SETFALSE: return "setfalse";
- case ISD::SETFALSE2: return "setfalse2";
- }
- }
-}
-
-const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
- switch (AM) {
- default:
- return "";
- case ISD::PRE_INC:
- return "<pre-inc>";
- case ISD::PRE_DEC:
- return "<pre-dec>";
- case ISD::POST_INC:
- return "<post-inc>";
- case ISD::POST_DEC:
- return "<post-dec>";
- }
-}
-
-std::string ISD::ArgFlagsTy::getArgFlagsString() {
- std::string S = "< ";
-
- if (isZExt())
- S += "zext ";
- if (isSExt())
- S += "sext ";
- if (isInReg())
- S += "inreg ";
- if (isSRet())
- S += "sret ";
- if (isByVal())
- S += "byval ";
- if (isNest())
- S += "nest ";
- if (getByValAlign())
- S += "byval-align:" + utostr(getByValAlign()) + " ";
- if (getOrigAlign())
- S += "orig-align:" + utostr(getOrigAlign()) + " ";
- if (getByValSize())
- S += "byval-size:" + utostr(getByValSize()) + " ";
- return S + ">";
-}
-
-void SDNode::dump() const { dump(0); }
-void SDNode::dump(const SelectionDAG *G) const {
- print(dbgs(), G);
- dbgs() << '\n';
-}
-
-void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
- OS << (void*)this << ": ";
-
- for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
- if (i) OS << ",";
- if (getValueType(i) == MVT::Other)
- OS << "ch";
- else
- OS << getValueType(i).getEVTString();
- }
- OS << " = " << getOperationName(G);
-}
-
-void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
- if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
- if (!MN->memoperands_empty()) {
- OS << "<";
- OS << "Mem:";
- for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
- e = MN->memoperands_end(); i != e; ++i) {
- OS << **i;
- if (llvm::next(i) != e)
- OS << " ";
- }
- OS << ">";
- }
- } else if (const ShuffleVectorSDNode *SVN =
- dyn_cast<ShuffleVectorSDNode>(this)) {
- OS << "<";
- for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
- int Idx = SVN->getMaskElt(i);
- if (i) OS << ",";
- if (Idx < 0)
- OS << "u";
- else
- OS << Idx;
- }
- OS << ">";
- } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
- OS << '<' << CSDN->getAPIntValue() << '>';
- } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
- if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
- OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
- else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
- OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
- else {
- OS << "<APFloat(";
- CSDN->getValueAPF().bitcastToAPInt().dump();
- OS << ")>";
- }
- } else if (const GlobalAddressSDNode *GADN =
- dyn_cast<GlobalAddressSDNode>(this)) {
- int64_t offset = GADN->getOffset();
- OS << '<';
- WriteAsOperand(OS, GADN->getGlobal());
- OS << '>';
- if (offset > 0)
- OS << " + " << offset;
- else
- OS << " " << offset;
- if (unsigned int TF = GADN->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
- OS << "<" << FIDN->getIndex() << ">";
- } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
- OS << "<" << JTDN->getIndex() << ">";
- if (unsigned int TF = JTDN->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
- int offset = CP->getOffset();
- if (CP->isMachineConstantPoolEntry())
- OS << "<" << *CP->getMachineCPVal() << ">";
- else
- OS << "<" << *CP->getConstVal() << ">";
- if (offset > 0)
- OS << " + " << offset;
- else
- OS << " " << offset;
- if (unsigned int TF = CP->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
- OS << "<";
- const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
- if (LBB)
- OS << LBB->getName() << " ";
- OS << (const void*)BBDN->getBasicBlock() << ">";
- } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
- OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
- } else if (const ExternalSymbolSDNode *ES =
- dyn_cast<ExternalSymbolSDNode>(this)) {
- OS << "'" << ES->getSymbol() << "'";
- if (unsigned int TF = ES->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
- if (M->getValue())
- OS << "<" << M->getValue() << ">";
- else
- OS << "<null>";
- } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
- if (MD->getMD())
- OS << "<" << MD->getMD() << ">";
- else
- OS << "<null>";
- } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
- OS << ":" << N->getVT().getEVTString();
- }
- else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
- OS << "<" << *LD->getMemOperand();
-
- bool doExt = true;
- switch (LD->getExtensionType()) {
- default: doExt = false; break;
- case ISD::EXTLOAD: OS << ", anyext"; break;
- case ISD::SEXTLOAD: OS << ", sext"; break;
- case ISD::ZEXTLOAD: OS << ", zext"; break;
- }
- if (doExt)
- OS << " from " << LD->getMemoryVT().getEVTString();
-
- const char *AM = getIndexedModeName(LD->getAddressingMode());
- if (*AM)
- OS << ", " << AM;
-
- OS << ">";
- } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
- OS << "<" << *ST->getMemOperand();
-
- if (ST->isTruncatingStore())
- OS << ", trunc to " << ST->getMemoryVT().getEVTString();
-
- const char *AM = getIndexedModeName(ST->getAddressingMode());
- if (*AM)
- OS << ", " << AM;
-
- OS << ">";
- } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
- OS << "<" << *M->getMemOperand() << ">";
- } else if (const BlockAddressSDNode *BA =
- dyn_cast<BlockAddressSDNode>(this)) {
- OS << "<";
- WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
- OS << ", ";
- WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
- OS << ">";
- if (unsigned int TF = BA->getTargetFlags())
- OS << " [TF=" << TF << ']';
- }
-
- if (G)
- if (unsigned Order = G->GetOrdering(this))
- OS << " [ORD=" << Order << ']';
-
- if (getNodeId() != -1)
- OS << " [ID=" << getNodeId() << ']';
-
- DebugLoc dl = getDebugLoc();
- if (G && !dl.isUnknown()) {
- DIScope
- Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
- OS << " dbg:";
- // Omit the directory, since it's usually long and uninteresting.
- if (Scope.Verify())
- OS << Scope.getFilename();
- else
- OS << "<unknown>";
- OS << ':' << dl.getLine();
- if (dl.getCol() != 0)
- OS << ':' << dl.getCol();
- }
-}
-
-void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
- print_types(OS, G);
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- if (i) OS << ", "; else OS << " ";
- OS << (void*)getOperand(i).getNode();
- if (unsigned RN = getOperand(i).getResNo())
- OS << ":" << RN;
- }
- print_details(OS, G);
-}
-
-static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
- const SelectionDAG *G, unsigned depth,
- unsigned indent) {
- if (depth == 0)
- return;
-
- OS.indent(indent);
-
- N->print(OS, G);
-
- if (depth < 1)
- return;
-
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- // Don't follow chain operands.
- if (N->getOperand(i).getValueType() == MVT::Other)
- continue;
- OS << '\n';
- printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
- }
-}
-
-void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
- unsigned depth) const {
- printrWithDepthHelper(OS, this, G, depth, 0);
-}
-
-void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
- // Don't print impossibly deep things.
- printrWithDepth(OS, G, 10);
-}
-
-void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
- printrWithDepth(dbgs(), G, depth);
-}
-
-void SDNode::dumprFull(const SelectionDAG *G) const {
- // Don't print impossibly deep things.
- dumprWithDepth(G, 10);
-}
-
-static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- if (N->getOperand(i).getNode()->hasOneUse())
- DumpNodes(N->getOperand(i).getNode(), indent+2, G);
- else
- dbgs() << "\n" << std::string(indent+2, ' ')
- << (void*)N->getOperand(i).getNode() << ": <multiple use>";
-
-
- dbgs() << "\n";
- dbgs().indent(indent);
- N->dump(G);
-}
-
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
assert(N->getNumValues() == 1 &&
"Can't unroll a vector with multiple results!");
@@ -6625,74 +6069,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
return 0;
}
-void SelectionDAG::dump() const {
- dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
-
- for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
- I != E; ++I) {
- const SDNode *N = I;
- if (!N->hasOneUse() && N != getRoot().getNode())
- DumpNodes(N, 2, this);
- }
-
- if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
-
- dbgs() << "\n\n";
-}
-
-void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
- print_types(OS, G);
- print_details(OS, G);
-}
-
-typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
-static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
- const SelectionDAG *G, VisitedSDNodeSet &once) {
- if (!once.insert(N)) // If we've been here before, return now.
- return;
-
- // Dump the current SDNode, but don't end the line yet.
- OS.indent(indent);
- N->printr(OS, G);
-
- // Having printed this SDNode, walk the children:
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- const SDNode *child = N->getOperand(i).getNode();
-
- if (i) OS << ",";
- OS << " ";
-
- if (child->getNumOperands() == 0) {
- // This child has no grandchildren; print it inline right here.
- child->printr(OS, G);
- once.insert(child);
- } else { // Just the address. FIXME: also print the child's opcode.
- OS << (void*)child;
- if (unsigned RN = N->getOperand(i).getResNo())
- OS << ":" << RN;
- }
- }
-
- OS << "\n";
-
- // Dump children that have grandchildren on their own line(s).
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- const SDNode *child = N->getOperand(i).getNode();
- DumpNodesr(OS, child, indent+2, G, once);
- }
-}
-
-void SDNode::dumpr() const {
- VisitedSDNodeSet once;
- DumpNodesr(dbgs(), this, 0, 0, once);
-}
-
-void SDNode::dumpr(const SelectionDAG *G) const {
- VisitedSDNodeSet once;
- DumpNodesr(dbgs(), this, 0, G, once);
-}
-
-
// getAddressSpace - Return the address space this GlobalAddress belongs to.
unsigned GlobalAddressSDNode::getAddressSpace() const {
return getGlobal()->getType()->getAddressSpace();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4e4aa11..2ac9655 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2411,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
- for (size_t i = 0; i < SI.getNumCases(); ++i) {
- BasicBlock *SuccBB = SI.getCaseSuccessor(i);
+ for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
+ const BasicBlock *SuccBB = i.getCaseSuccessor();
MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
- Cases.push_back(Case(SI.getCaseValue(i),
- SI.getCaseValue(i),
+ Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
SMBB, ExtraWeight));
}
std::sort(Cases.begin(), Cases.end(), CaseCmp());
@@ -4561,8 +4561,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
MDNode *Variable = DI.getVariable();
const Value *Address = DI.getAddress();
- if (!Address || !DIVariable(Variable).Verify())
+ if (!Address || !DIVariable(Variable).Verify()) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
return 0;
+ }
// Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
// but do not always have a corresponding SDNode built. The SDNodeOrder
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
new file mode 100644
index 0000000..f981afb
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -0,0 +1,631 @@
+//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::dump method and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ if (isMachineOpcode()) {
+ if (G)
+ if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+ if (getMachineOpcode() < TII->getNumOpcodes())
+ return TII->getName(getMachineOpcode());
+ return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+ }
+ if (G) {
+ const TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name = TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+ }
+ return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+ case ISD::DELETED_NODE: return "<<Deleted Node!>>";
+#endif
+ case ISD::PREFETCH: return "Prefetch";
+ case ISD::MEMBARRIER: return "MemBarrier";
+ case ISD::ATOMIC_FENCE: return "AtomicFence";
+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_SWAP: return "AtomicSwap";
+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_LOAD: return "AtomicLoad";
+ case ISD::ATOMIC_STORE: return "AtomicStore";
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::MDNODE_SDNODE: return "MDNode";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+ case ISD::RegisterMask: return "RegisterMask";
+ case ISD::Constant: return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+ case ISD::LSDAADDR: return "LSDAADDR";
+ case ISD::EHSELECTION: return "EHSELECTION";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::BlockAddress: return "BlockAddress";
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+ unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+ if (IID < Intrinsic::num_intrinsics)
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ return TII->getName(IID);
+ llvm_unreachable("Invalid intrinsic ID");
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant: return "TargetConstant";
+ case ISD::TargetConstantFP: return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+ case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::EH_LABEL: return "eh_label";
+ case ISD::HANDLENODE: return "handlenode";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::FTRUNC: return "ftrunc";
+ case ISD::FFLOOR: return "ffloor";
+ case ISD::FCEIL: return "fceil";
+ case ISD::FRINT: return "frint";
+ case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::FEXP: return "fexp";
+ case ISD::FEXP2: return "fexp2";
+ case ISD::FLOG: return "flog";
+ case ISD::FLOG2: return "flog2";
+ case ISD::FLOG10: return "flog10";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::SMUL_LOHI: return "smul_lohi";
+ case ISD::UMUL_LOHI: return "umul_lohi";
+ case ISD::SDIVREM: return "sdivrem";
+ case ISD::UDIVREM: return "udivrem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FADD: return "fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::FMA: return "fma";
+ case ISD::FREM: return "frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+ case ISD::FGETSIGN: return "fgetsign";
+ case ISD::FPOW: return "fpow";
+
+ case ISD::FPOWI: return "fpowi";
+ case ISD::SETCC: return "setcc";
+ case ISD::SELECT: return "select";
+ case ISD::VSELECT: return "vselect";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::SADDO: return "saddo";
+ case ISD::UADDO: return "uaddo";
+ case ISD::SSUBO: return "ssubo";
+ case ISD::USUBO: return "usubo";
+ case ISD::SMULO: return "smulo";
+ case ISD::UMULO: return "umulo";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+ case ISD::FP_EXTEND: return "fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::BITCAST: return "bitcast";
+ case ISD::FP16_TO_FP32: return "fp16_to_fp32";
+ case ISD::FP32_TO_FP16: return "fp32_to_fp16";
+
+ case ISD::CONVERT_RNDSAT: {
+ switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+ default: llvm_unreachable("Unknown cvt code!");
+ case ISD::CVT_FF: return "cvt_ff";
+ case ISD::CVT_FS: return "cvt_fs";
+ case ISD::CVT_FU: return "cvt_fu";
+ case ISD::CVT_SF: return "cvt_sf";
+ case ISD::CVT_UF: return "cvt_uf";
+ case ISD::CVT_SS: return "cvt_ss";
+ case ISD::CVT_SU: return "cvt_su";
+ case ISD::CVT_US: return "cvt_us";
+ case ISD::CVT_UU: return "cvt_uu";
+ }
+ }
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+ case ISD::TRAP: return "trap";
+
+ // Bit manipulation
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
+ case ISD::CTLZ: return "ctlz";
+ case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
+
+ // Trampolines
+ case ISD::INIT_TRAMPOLINE: return "init_trampoline";
+ case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: llvm_unreachable("Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setue";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+
+ case ISD::SETTRUE: return "settrue";
+ case ISD::SETTRUE2: return "settrue2";
+ case ISD::SETFALSE: return "setfalse";
+ case ISD::SETFALSE2: return "setfalse2";
+ }
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default: return "";
+ case ISD::PRE_INC: return "<pre-inc>";
+ case ISD::PRE_DEC: return "<pre-dec>";
+ case ISD::POST_INC: return "<post-inc>";
+ case ISD::POST_DEC: return "<post-dec>";
+ }
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+ print(dbgs(), G);
+ dbgs() << '\n';
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << (void*)this << ": ";
+
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ",";
+ if (getValueType(i) == MVT::Other)
+ OS << "ch";
+ else
+ OS << getValueType(i).getEVTString();
+ }
+ OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+ if (!MN->memoperands_empty()) {
+ OS << "<";
+ OS << "Mem:";
+ for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+ e = MN->memoperands_end(); i != e; ++i) {
+ OS << **i;
+ if (llvm::next(i) != e)
+ OS << " ";
+ }
+ OS << ">";
+ }
+ } else if (const ShuffleVectorSDNode *SVN =
+ dyn_cast<ShuffleVectorSDNode>(this)) {
+ OS << "<";
+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (i) OS << ",";
+ if (Idx < 0)
+ OS << "u";
+ else
+ OS << Idx;
+ }
+ OS << ">";
+ } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ OS << '<' << CSDN->getAPIntValue() << '>';
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+ else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+ else {
+ OS << "<APFloat(";
+ CSDN->getValueAPF().bitcastToAPInt().dump();
+ OS << ")>";
+ }
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int64_t offset = GADN->getOffset();
+ OS << '<';
+ WriteAsOperand(OS, GADN->getGlobal());
+ OS << '>';
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = GADN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ OS << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ OS << "<" << JTDN->getIndex() << ">";
+ if (unsigned int TF = JTDN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ OS << "<" << *CP->getMachineCPVal() << ">";
+ else
+ OS << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = CP->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ OS << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ OS << LBB->getName() << " ";
+ OS << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ OS << "'" << ES->getSymbol() << "'";
+ if (unsigned int TF = ES->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ OS << "<" << M->getValue() << ">";
+ else
+ OS << "<null>";
+ } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+ if (MD->getMD())
+ OS << "<" << MD->getMD() << ">";
+ else
+ OS << "<null>";
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ OS << ":" << N->getVT().getEVTString();
+ }
+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ OS << "<" << *LD->getMemOperand();
+
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << LD->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ OS << "<" << *ST->getMemOperand();
+
+ if (ST->isTruncatingStore())
+ OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+ OS << "<" << *M->getMemOperand() << ">";
+ } else if (const BlockAddressSDNode *BA =
+ dyn_cast<BlockAddressSDNode>(this)) {
+ OS << "<";
+ WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+ OS << ", ";
+ WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+ OS << ">";
+ if (unsigned int TF = BA->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ }
+
+ if (G)
+ if (unsigned Order = G->GetOrdering(this))
+ OS << " [ORD=" << Order << ']';
+
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
+
+ DebugLoc dl = getDebugLoc();
+ if (G && !dl.isUnknown()) {
+ DIScope
+ Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+ OS << " dbg:";
+ // Omit the directory, since it's usually long and uninteresting.
+ if (Scope.Verify())
+ OS << Scope.getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << dl.getLine();
+ if (dl.getCol() != 0)
+ OS << ':' << dl.getCol();
+ }
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getNode()->hasOneUse())
+ DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+ else
+ dbgs() << "\n" << std::string(indent+2, ' ')
+ << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+ dbgs() << '\n';
+ dbgs().indent(indent);
+ N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I) {
+ const SDNode *N = I;
+ if (!N->hasOneUse() && N != getRoot().getNode())
+ DumpNodes(N, 2, this);
+ }
+
+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+ dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+ const SelectionDAG *G, VisitedSDNodeSet &once) {
+ if (!once.insert(N)) // If we've been here before, return now.
+ return;
+
+ // Dump the current SDNode, but don't end the line yet.
+ OS.indent(indent);
+ N->printr(OS, G);
+
+ // Having printed this SDNode, walk the children:
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+
+ if (i) OS << ",";
+ OS << " ";
+
+ if (child->getNumOperands() == 0) {
+ // This child has no grandchildren; print it inline right here.
+ child->printr(OS, G);
+ once.insert(child);
+ } else { // Just the address. FIXME: also print the child's opcode.
+ OS << (void*)child;
+ if (unsigned RN = N->getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ }
+
+ OS << "\n";
+
+ // Dump children that have grandchildren on their own line(s).
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+ DumpNodesr(OS, child, indent+2, G, once);
+ }
+}
+
+void SDNode::dumpr() const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+ const SelectionDAG *G, unsigned depth,
+ unsigned indent) {
+ if (depth == 0)
+ return;
+
+ OS.indent(indent);
+
+ N->print(OS, G);
+
+ if (depth < 1)
+ return;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ // Don't follow chain operands.
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ continue;
+ OS << '\n';
+ printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+ }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+ unsigned depth) const {
+ printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ printrWithDepth(OS, G, 10);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+ printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ dumprWithDepth(G, 10);
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) OS << ", "; else OS << " ";
+ OS << (void*)getOperand(i).getNode();
+ if (unsigned RN = getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ print_details(OS, G);
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 2173d8d..8aabc02 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -673,7 +673,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("Instruction Scheduling", GroupName,
TimePassesIsEnabled);
- Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt);
+ Scheduler->Run(CurDAG, FuncInfo->MBB);
}
if (ViewSUnitDAGs) Scheduler->viewGraph();
@@ -684,8 +684,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
- LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
- FuncInfo->InsertPt = Scheduler->InsertPos;
+ // FuncInfo->InsertPt is passed by reference and set to the end of the
+ // scheduled instructions.
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt);
}
// If the block was split, make sure we update any references that are used to
@@ -774,7 +775,7 @@ void SelectionDAGISel::PrepareEHLandingPad() {
// Assign the call site to the landing pad's begin label.
MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
-
+
const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
@@ -934,9 +935,9 @@ static void collectFailStats(const Instruction *I) {
case Instruction::FPToSI: NumFastIselFailFPToSI++; return;
case Instruction::UIToFP: NumFastIselFailUIToFP++; return;
case Instruction::SIToFP: NumFastIselFailSIToFP++; return;
- case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
+ case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
- case Instruction::BitCast: NumFastIselFailBitCast++; return;
+ case Instruction::BitCast: NumFastIselFailBitCast++; return;
// Other instructions...
case Instruction::ICmp: NumFastIselFailICmp++; return;
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 5412c97..9a86f32 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -1,4 +1,4 @@
-//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -42,7 +42,7 @@ STATISTIC(NumInvokes, "Number of invokes replaced");
STATISTIC(NumSpilled, "Number of registers live across unwind edges");
namespace {
- class SjLjEHPass : public FunctionPass {
+ class SjLjEHPrepare : public FunctionPass {
const TargetLowering *TLI;
Type *FunctionContextTy;
Constant *RegisterFn;
@@ -58,7 +58,7 @@ namespace {
AllocaInst *FuncCtx;
public:
static char ID; // Pass identification, replacement for typeid
- explicit SjLjEHPass(const TargetLowering *tli = NULL)
+ explicit SjLjEHPrepare(const TargetLowering *tli = NULL)
: FunctionPass(ID), TLI(tli) { }
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
@@ -79,15 +79,15 @@ namespace {
};
} // end anonymous namespace
-char SjLjEHPass::ID = 0;
+char SjLjEHPrepare::ID = 0;
-// Public Interface To the SjLjEHPass pass.
-FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
- return new SjLjEHPass(TLI);
+// Public Interface To the SjLjEHPrepare pass.
+FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) {
+ return new SjLjEHPrepare(TLI);
}
// doInitialization - Set up decalarations and types needed to process
// exceptions.
-bool SjLjEHPass::doInitialization(Module &M) {
+bool SjLjEHPrepare::doInitialization(Module &M) {
// Build the function context structure.
// builtin_setjmp uses a five word jbuf
Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
@@ -123,7 +123,7 @@ bool SjLjEHPass::doInitialization(Module &M) {
/// insertCallSiteStore - Insert a store of the call-site value to the
/// function context
-void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number) {
+void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
IRBuilder<> Builder(I);
// Get a reference to the call_site field.
@@ -151,8 +151,8 @@ static void MarkBlocksLiveIn(BasicBlock *BB,
/// substituteLPadValues - Substitute the values returned by the landingpad
/// instruction with those returned by the personality function.
-void SjLjEHPass::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
- Value *SelVal) {
+void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal) {
SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end());
while (!UseWorkList.empty()) {
Value *Val = UseWorkList.pop_back_val();
@@ -183,7 +183,7 @@ void SjLjEHPass::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
/// setupFunctionContext - Allocate the function context on the stack and fill
/// it with all of the data that we know at this point.
-Value *SjLjEHPass::
+Value *SjLjEHPrepare::
setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
BasicBlock *EntryBB = F.begin();
@@ -251,7 +251,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
/// specially, we lower each arg to a copy instruction in the entry block. This
/// ensures that the argument value itself cannot be live out of the entry
/// block.
-void SjLjEHPass::lowerIncomingArguments(Function &F) {
+void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
while (isa<AllocaInst>(AfterAllocaInsPt) &&
isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
@@ -295,8 +295,8 @@ void SjLjEHPass::lowerIncomingArguments(Function &F) {
/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
/// edge and spill them.
-void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
- ArrayRef<InvokeInst*> Invokes) {
+void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
+ ArrayRef<InvokeInst*> Invokes) {
// Finally, scan the code looking for instructions with bad live ranges.
for (Function::iterator
BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
@@ -393,7 +393,7 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
/// the function context and marking the call sites with the appropriate
/// values. These values are used by the DWARF EH emitter.
-bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
+bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
SmallVector<ReturnInst*, 16> Returns;
SmallVector<InvokeInst*, 16> Invokes;
SmallSetVector<LandingPadInst*, 16> LPads;
@@ -519,7 +519,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
return true;
}
-bool SjLjEHPass::runOnFunction(Function &F) {
+bool SjLjEHPrepare::runOnFunction(Function &F) {
bool Res = setupEntryBlockAndCallSites(F);
return Res;
}
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index 58caae8..cb11bfe 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -1,3 +1,5 @@
+
+
add_llvm_library(LLVMExecutionEngine
ExecutionEngine.cpp
ExecutionEngineBindings.cpp
@@ -8,3 +10,11 @@ add_subdirectory(Interpreter)
add_subdirectory(JIT)
add_subdirectory(MCJIT)
add_subdirectory(RuntimeDyld)
+
+if( LLVM_USE_OPROFILE )
+ add_subdirectory(OProfileJIT)
+endif( LLVM_USE_OPROFILE )
+
+if( LLVM_USE_INTEL_JITEVENTS )
+ add_subdirectory(IntelJITEvents)
+endif( LLVM_USE_INTEL_JITEVENTS )
diff --git a/lib/ExecutionEngine/EventListenerCommon.h b/lib/ExecutionEngine/EventListenerCommon.h
new file mode 100644
index 0000000..1c07c94
--- /dev/null
+++ b/lib/ExecutionEngine/EventListenerCommon.h
@@ -0,0 +1,67 @@
+//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common functionality for JITEventListener implementations
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef EVENT_LISTENER_COMMON_H
+#define EVENT_LISTENER_COMMON_H
+
+#include "llvm/Metadata.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+
+namespace jitprofiling {
+
+class FilenameCache {
+ // Holds the filename of each Scope, so that we can pass a null-terminated
+ // string into oprofile. Use an AssertingVH rather than a ValueMap because we
+ // shouldn't be modifying any MDNodes while this map is alive.
+ DenseMap<AssertingVH<MDNode>, std::string> Filenames;
+ DenseMap<AssertingVH<MDNode>, std::string> Paths;
+
+ public:
+ const char *getFilename(MDNode *Scope) {
+ std::string &Filename = Filenames[Scope];
+ if (Filename.empty()) {
+ DIScope DIScope(Scope);
+ Filename = DIScope.getFilename();
+ }
+ return Filename.c_str();
+ }
+
+ const char *getFullPath(MDNode *Scope) {
+ std::string &P = Paths[Scope];
+ if (P.empty()) {
+ DIScope DIScope(Scope);
+ StringRef DirName = DIScope.getDirectory();
+ StringRef FileName = DIScope.getFilename();
+ SmallString<256> FullPath;
+ if (DirName != "." && DirName != "") {
+ FullPath = DirName;
+ }
+ if (FileName != "") {
+ sys::path::append(FullPath, FileName);
+ }
+ P = FullPath.str();
+ }
+ return P.c_str();
+ }
+};
+
+} // namespace jitprofiling
+
+} // namespace llvm
+
+#endif //EVENT_LISTENER_COMMON_H
diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
new file mode 100644
index 0000000..7d67d0d
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
@@ -0,0 +1,11 @@
+
+include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+set(system_libs
+ ${system_libs}
+ jitprofiling
+ )
+
+add_llvm_library(LLVMIntelJITEvents
+ IntelJITEventListener.cpp
+ )
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
new file mode 100644
index 0000000..5dfa78f
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -0,0 +1,183 @@
+//===-- IntelJITEventListener.cpp - Tell Intel profiler about JITed code --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object to tell Intel(R) VTune(TM)
+// Amplifier XE 2011 about JITted functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+
+#define DEBUG_TYPE "amplifier-jit-event-listener"
+#include "llvm/Function.h"
+#include "llvm/Metadata.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/ValueHandle.h"
+#include "EventListenerCommon.h"
+
+using namespace llvm;
+using namespace llvm::jitprofiling;
+
+namespace {
+
+class IntelJITEventListener : public JITEventListener {
+ typedef DenseMap<void*, unsigned int> MethodIDMap;
+
+ IntelJITEventsWrapper& Wrapper;
+ MethodIDMap MethodIDs;
+ FilenameCache Filenames;
+
+public:
+ IntelJITEventListener(IntelJITEventsWrapper& libraryWrapper)
+ : Wrapper(libraryWrapper) {
+ }
+
+ ~IntelJITEventListener() {
+ }
+
+ virtual void NotifyFunctionEmitted(const Function &F,
+ void *FnStart, size_t FnSize,
+ const EmittedFunctionDetails &Details);
+
+ virtual void NotifyFreeingMachineCode(void *OldPtr);
+};
+
+static LineNumberInfo LineStartToIntelJITFormat(
+ uintptr_t StartAddress,
+ uintptr_t Address,
+ DebugLoc Loc) {
+ LineNumberInfo Result;
+
+ Result.Offset = Address - StartAddress;
+ Result.LineNumber = Loc.getLine();
+
+ return Result;
+}
+
+static iJIT_Method_Load FunctionDescToIntelJITFormat(
+ IntelJITEventsWrapper& Wrapper,
+ const char* FnName,
+ uintptr_t FnStart,
+ size_t FnSize) {
+ iJIT_Method_Load Result;
+ memset(&Result, 0, sizeof(iJIT_Method_Load));
+
+ Result.method_id = Wrapper.iJIT_GetNewMethodID();
+ Result.method_name = const_cast<char*>(FnName);
+ Result.method_load_address = reinterpret_cast<void*>(FnStart);
+ Result.method_size = FnSize;
+
+ Result.class_id = 0;
+ Result.class_file_name = NULL;
+ Result.user_data = NULL;
+ Result.user_data_size = 0;
+ Result.env = iJDE_JittingAPI;
+
+ return Result;
+}
+
+// Adds the just-emitted function to the symbol table.
+void IntelJITEventListener::NotifyFunctionEmitted(
+ const Function &F, void *FnStart, size_t FnSize,
+ const EmittedFunctionDetails &Details) {
+ iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(Wrapper,
+ F.getName().data(),
+ reinterpret_cast<uint64_t>(FnStart),
+ FnSize);
+
+ std::vector<LineNumberInfo> LineInfo;
+
+ if (!Details.LineStarts.empty()) {
+ // Now convert the line number information from the address/DebugLoc
+ // format in Details to the offset/lineno in Intel JIT API format.
+
+ LineInfo.reserve(Details.LineStarts.size() + 1);
+
+ DebugLoc FirstLoc = Details.LineStarts[0].Loc;
+ assert(!FirstLoc.isUnknown()
+ && "LineStarts should not contain unknown DebugLocs");
+
+ MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
+ DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
+ if (FunctionDI.Verify()) {
+ FunctionMessage.source_file_name = const_cast<char*>(
+ Filenames.getFullPath(FirstLocScope));
+
+ LineNumberInfo FirstLine;
+ FirstLine.Offset = 0;
+ FirstLine.LineNumber = FunctionDI.getLineNumber();
+ LineInfo.push_back(FirstLine);
+ }
+
+ for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator I =
+ Details.LineStarts.begin(), E = Details.LineStarts.end();
+ I != E; ++I) {
+ // This implementation ignores the DebugLoc filename because the Intel
+ // JIT API does not support multiple source files associated with a single
+ // JIT function
+ LineInfo.push_back(LineStartToIntelJITFormat(
+ reinterpret_cast<uintptr_t>(FnStart),
+ I->Address,
+ I->Loc));
+
+ // If we have no file name yet for the function, use the filename from
+ // the first instruction that has one
+ if (FunctionMessage.source_file_name == 0) {
+ MDNode *scope = I->Loc.getScope(
+ Details.MF->getFunction()->getContext());
+ FunctionMessage.source_file_name = const_cast<char*>(
+ Filenames.getFullPath(scope));
+ }
+ }
+
+ FunctionMessage.line_number_size = LineInfo.size();
+ FunctionMessage.line_number_table = &*LineInfo.begin();
+ } else {
+ FunctionMessage.line_number_size = 0;
+ FunctionMessage.line_number_table = 0;
+ }
+
+ Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+ &FunctionMessage);
+ MethodIDs[FnStart] = FunctionMessage.method_id;
+}
+
+void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
+ MethodIDMap::iterator I = MethodIDs.find(FnStart);
+ if (I != MethodIDs.end()) {
+ Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second);
+ MethodIDs.erase(I);
+ }
+}
+
+} // anonymous namespace.
+
+namespace llvm {
+JITEventListener *JITEventListener::createIntelJITEventListener() {
+ static OwningPtr<IntelJITEventsWrapper> JITProfilingWrapper(
+ new IntelJITEventsWrapper);
+ return new IntelJITEventListener(*JITProfilingWrapper);
+}
+
+// for testing
+JITEventListener *JITEventListener::createIntelJITEventListener(
+ IntelJITEventsWrapper* TestImpl) {
+ return new IntelJITEventListener(*TestImpl);
+}
+
+} // namespace llvm
+
diff --git a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
new file mode 100644
index 0000000..80d2273
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/ExecutionEngine/JITProfileAmplifier/LLVMBuild.txt --*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+
+[component_0]
+type = Library
+name = IntelJITEvents
+parent = ExecutionEngine
diff --git a/lib/ExecutionEngine/IntelJITEvents/Makefile b/lib/ExecutionEngine/IntelJITEvents/Makefile
new file mode 100644
index 0000000..ba75ac6
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/Makefile
@@ -0,0 +1,17 @@
+##===- lib/ExecutionEngine/JITProfile/Makefile -------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMIntelJITEvents
+
+include $(LEVEL)/Makefile.config
+
+SOURCES := IntelJITEventListener.cpp
+CPPFLAGS += -I$(INTEL_JITEVENTS_INCDIR) -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 3dce3b3..af47be9 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -650,12 +650,10 @@ void Interpreter::visitSwitchInst(SwitchInst &I) {
// Check to see if any of the cases match...
BasicBlock *Dest = 0;
- unsigned NumCases = I.getNumCases();
- // Skip the first item since that's the default case.
- for (unsigned i = 0; i < NumCases; ++i) {
- GenericValue CaseVal = getOperandValue(I.getCaseValue(i), SF);
+ for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) {
+ GenericValue CaseVal = getOperandValue(i.getCaseValue(), SF);
if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) {
- Dest = cast<BasicBlock>(I.getCaseSuccessor(i));
+ Dest = cast<BasicBlock>(i.getCaseSuccessor());
break;
}
}
diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt
index dcef08c..52bb389 100644
--- a/lib/ExecutionEngine/JIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/JIT/CMakeLists.txt
@@ -2,10 +2,8 @@
add_definitions(-DENABLE_X86_JIT)
add_llvm_library(LLVMJIT
- Intercept.cpp
JIT.cpp
JITDwarfEmitter.cpp
JITEmitter.cpp
JITMemoryManager.cpp
- OProfileJITEventListener.cpp
)
diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp
deleted file mode 100644
index 2251a8e..0000000
--- a/lib/ExecutionEngine/JIT/Intercept.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-//===-- Intercept.cpp - System function interception routines -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// If a function call occurs to an external function, the JIT is designed to use
-// the dynamic loader interface to find a function to call. This is useful for
-// calling system calls and library functions that are not available in LLVM.
-// Some system calls, however, need to be handled specially. For this reason,
-// we intercept some of them here and use our own stubs to handle them.
-//
-//===----------------------------------------------------------------------===//
-
-#include "JIT.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Config/config.h"
-using namespace llvm;
-
-// AtExitHandlers - List of functions to call when the program exits,
-// registered with the atexit() library function.
-static std::vector<void (*)()> AtExitHandlers;
-
-/// runAtExitHandlers - Run any functions registered by the program's
-/// calls to atexit(3), which we intercept and store in
-/// AtExitHandlers.
-///
-static void runAtExitHandlers() {
- while (!AtExitHandlers.empty()) {
- void (*Fn)() = AtExitHandlers.back();
- AtExitHandlers.pop_back();
- Fn();
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Function stubs that are invoked instead of certain library calls
-//===----------------------------------------------------------------------===//
-
-// Force the following functions to be linked in to anything that uses the
-// JIT. This is a hack designed to work around the all-too-clever Glibc
-// strategy of making these functions work differently when inlined vs. when
-// not inlined, and hiding their real definitions in a separate archive file
-// that the dynamic linker can't see. For more info, search for
-// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-#if defined(__linux__)
-#if defined(HAVE_SYS_STAT_H)
-#include <sys/stat.h>
-#endif
-#include <fcntl.h>
-#include <unistd.h>
-/* stat functions are redirecting to __xstat with a version number. On x86-64
- * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
- * available as an exported symbol, so we have to add it explicitly.
- */
-namespace {
-class StatSymbols {
-public:
- StatSymbols() {
- sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
- sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
- sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
- sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
- sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
- sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
- sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
- sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
- sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
- sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
- sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
- }
-};
-}
-static StatSymbols initStatSymbols;
-#endif // __linux__
-
-// jit_exit - Used to intercept the "exit" library call.
-static void jit_exit(int Status) {
- runAtExitHandlers(); // Run atexit handlers...
- exit(Status);
-}
-
-// jit_atexit - Used to intercept the "atexit" library call.
-static int jit_atexit(void (*Fn)()) {
- AtExitHandlers.push_back(Fn); // Take note of atexit handler...
- return 0; // Always successful
-}
-
-static int jit_noop() {
- return 0;
-}
-
-//===----------------------------------------------------------------------===//
-//
-/// getPointerToNamedFunction - This method returns the address of the specified
-/// function by using the dynamic loader interface. As such it is only useful
-/// for resolving library symbols, not code generated symbols.
-///
-void *JIT::getPointerToNamedFunction(const std::string &Name,
- bool AbortOnFailure) {
- if (!isSymbolSearchingDisabled()) {
- // Check to see if this is one of the functions we want to intercept. Note,
- // we cast to intptr_t here to silence a -pedantic warning that complains
- // about casting a function pointer to a normal pointer.
- if (Name == "exit") return (void*)(intptr_t)&jit_exit;
- if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
-
- // We should not invoke parent's ctors/dtors from generated main()!
- // On Mingw and Cygwin, the symbol __main is resolved to
- // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
- // (and register wrong callee's dtors with atexit(3)).
- // We expect ExecutionEngine::runStaticConstructorsDestructors()
- // is called before ExecutionEngine::runFunctionAsMain() is called.
- if (Name == "__main") return (void*)(intptr_t)&jit_noop;
-
- const char *NameStr = Name.c_str();
- // If this is an asm specifier, skip the sentinal.
- if (NameStr[0] == 1) ++NameStr;
-
- // If it's an external function, look it up in the process image...
- void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
- if (Ptr) return Ptr;
-
- // If it wasn't found and if it starts with an underscore ('_') character,
- // and has an asm specifier, try again without the underscore.
- if (Name[0] == 1 && NameStr[0] == '_') {
- Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
- if (Ptr) return Ptr;
- }
-
- // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These
- // are references to hidden visibility symbols that dlsym cannot resolve.
- // If we have one of these, strip off $LDBLStub and try again.
-#if defined(__APPLE__) && defined(__ppc__)
- if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
- memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
- // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
- // This mirrors logic in libSystemStubs.a.
- std::string Prefix = std::string(Name.begin(), Name.end()-9);
- if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
- return Ptr;
- if (void *Ptr = getPointerToNamedFunction(Prefix, false))
- return Ptr;
- }
-#endif
- }
-
- /// If a LazyFunctionCreator is installed, use it to get/create the function.
- if (LazyFunctionCreator)
- if (void *RP = LazyFunctionCreator(Name))
- return RP;
-
- if (AbortOnFailure) {
- report_fatal_error("Program used external function '"+Name+
- "' which could not be resolved!");
- }
- return 0;
-}
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index f715f6f..16b8ee2 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineCodeInfo.h"
#include "llvm/ExecutionEngine/GenericValue.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetJITInfo.h"
@@ -267,9 +268,9 @@ extern "C" {
}
JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
- JITMemoryManager *JMM, bool GVsWithCode)
- : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode),
- isAlreadyCodeGenerating(false) {
+ JITMemoryManager *jmm, bool GVsWithCode)
+ : ExecutionEngine(M), TM(tm), TJI(tji), JMM(jmm),
+ AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) {
setTargetData(TM.getTargetData());
jitstate = new JITState(M);
@@ -711,6 +712,27 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
}
}
+void *JIT::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure){
+ if (!isSymbolSearchingDisabled()) {
+ void *ptr = JMM->getPointerToNamedFunction(Name, false);
+ if (ptr)
+ return ptr;
+ }
+
+ /// If a LazyFunctionCreator is installed, use it to get/create the function.
+ if (LazyFunctionCreator)
+ if (void *RP = LazyFunctionCreator(Name))
+ return RP;
+
+ if (AbortOnFailure) {
+ report_fatal_error("Program used external function '"+Name+
+ "' which could not be resolved!");
+ }
+ return 0;
+}
+
+
/// getOrEmitGlobalVariable - Return the address of the specified global
/// variable, possibly emitting it to memory if needed. This is used by the
/// Emitter.
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index 17d33fe..c557981 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -58,6 +58,7 @@ class JIT : public ExecutionEngine {
TargetMachine &TM; // The current target we are compiling to
TargetJITInfo &TJI; // The JITInfo for the target we are compiling to
JITCodeEmitter *JCE; // JCE object
+ JITMemoryManager *JMM;
std::vector<JITEventListener*> EventListeners;
/// AllocateGVsWithCode - Some applications require that global variables and
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index efd570d..d404d0c 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -314,6 +314,17 @@ namespace {
/// should allocate a separate slab.
static const size_t DefaultSizeThreshold;
+ /// getPointerToNamedFunction - This method returns the address of the
+ /// specified function by using the dlsym function call. As such it is only
+ /// useful for resolving library symbols, not code generated symbols.
+ ///
+ /// If AbortOnFailure is false and no function with the given name is
+ /// found, this function silently returns a null pointer. Otherwise,
+ /// it prints a message to stderr and aborts.
+ ///
+ virtual void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true);
+
void AllocateGOT();
// Testing methods.
@@ -757,6 +768,148 @@ bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) {
return true;
}
+//===----------------------------------------------------------------------===//
+// getPointerToNamedFunction() implementation.
+//===----------------------------------------------------------------------===//
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+ while (!AtExitHandlers.empty()) {
+ void (*Fn)() = AtExitHandlers.back();
+ AtExitHandlers.pop_back();
+ Fn();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//===----------------------------------------------------------------------===//
+
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+#include <unistd.h>
+/* stat functions are redirecting to __xstat with a version number. On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+ StatSymbols() {
+ sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+ sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+ sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+ sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+ sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+ sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+ sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+ sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+ sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+ }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+ runAtExitHandlers(); // Run atexit handlers...
+ exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+ AtExitHandlers.push_back(Fn); // Take note of atexit handler...
+ return 0; // Always successful
+}
+
+static int jit_noop() {
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface. As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *DefaultJITMemoryManager::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure) {
+ // Check to see if this is one of the functions we want to intercept. Note,
+ // we cast to intptr_t here to silence a -pedantic warning that complains
+ // about casting a function pointer to a normal pointer.
+ if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+ if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+ // We should not invoke parent's ctors/dtors from generated main()!
+ // On Mingw and Cygwin, the symbol __main is resolved to
+ // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+ // (and register wrong callee's dtors with atexit(3)).
+ // We expect ExecutionEngine::runStaticConstructorsDestructors()
+ // is called before ExecutionEngine::runFunctionAsMain() is called.
+ if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+ const char *NameStr = Name.c_str();
+ // If this is an asm specifier, skip the sentinal.
+ if (NameStr[0] == 1) ++NameStr;
+
+ // If it's an external function, look it up in the process image...
+ void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+ if (Ptr) return Ptr;
+
+ // If it wasn't found and if it starts with an underscore ('_') character,
+ // try again without the underscore.
+ if (NameStr[0] == '_') {
+ Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+ if (Ptr) return Ptr;
+ }
+
+ // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These
+ // are references to hidden visibility symbols that dlsym cannot resolve.
+ // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+ if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+ memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+ // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+ // This mirrors logic in libSystemStubs.a.
+ std::string Prefix = std::string(Name.begin(), Name.end()-9);
+ if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+ return Ptr;
+ if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+ return Ptr;
+ }
+#endif
+
+ if (AbortOnFailure) {
+ report_fatal_error("Program used external function '"+Name+
+ "' which could not be resolved!");
+ }
+ return 0;
+}
+
+
+
JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() {
return new DefaultJITMemoryManager();
}
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
index d426969..1f94a4f 100644
--- a/lib/ExecutionEngine/LLVMBuild.txt
+++ b/lib/ExecutionEngine/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = Interpreter JIT MCJIT RuntimeDyld
+subdirectories = Interpreter JIT MCJIT RuntimeDyld IntelJITEvents OProfileJIT
[component_0]
type = Library
diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
index 2c0f8d6..fef7176 100644
--- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -1,5 +1,4 @@
add_llvm_library(LLVMMCJIT
MCJIT.cpp
MCJITMemoryManager.cpp
- Intercept.cpp
)
diff --git a/lib/ExecutionEngine/MCJIT/Intercept.cpp b/lib/ExecutionEngine/MCJIT/Intercept.cpp
deleted file mode 100644
index f83f428..0000000
--- a/lib/ExecutionEngine/MCJIT/Intercept.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-//===-- Intercept.cpp - System function interception routines -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// If a function call occurs to an external function, the JIT is designed to use
-// the dynamic loader interface to find a function to call. This is useful for
-// calling system calls and library functions that are not available in LLVM.
-// Some system calls, however, need to be handled specially. For this reason,
-// we intercept some of them here and use our own stubs to handle them.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCJIT.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Config/config.h"
-using namespace llvm;
-
-// AtExitHandlers - List of functions to call when the program exits,
-// registered with the atexit() library function.
-static std::vector<void (*)()> AtExitHandlers;
-
-/// runAtExitHandlers - Run any functions registered by the program's
-/// calls to atexit(3), which we intercept and store in
-/// AtExitHandlers.
-///
-static void runAtExitHandlers() {
- while (!AtExitHandlers.empty()) {
- void (*Fn)() = AtExitHandlers.back();
- AtExitHandlers.pop_back();
- Fn();
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Function stubs that are invoked instead of certain library calls
-//===----------------------------------------------------------------------===//
-
-// Force the following functions to be linked in to anything that uses the
-// JIT. This is a hack designed to work around the all-too-clever Glibc
-// strategy of making these functions work differently when inlined vs. when
-// not inlined, and hiding their real definitions in a separate archive file
-// that the dynamic linker can't see. For more info, search for
-// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-#if defined(__linux__)
-#if defined(HAVE_SYS_STAT_H)
-#include <sys/stat.h>
-#endif
-#include <fcntl.h>
-#include <unistd.h>
-/* stat functions are redirecting to __xstat with a version number. On x86-64
- * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
- * available as an exported symbol, so we have to add it explicitly.
- */
-namespace {
-class StatSymbols {
-public:
- StatSymbols() {
- sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
- sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
- sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
- sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
- sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
- sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
- sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
- sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
- sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
- sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
- sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
- }
-};
-}
-static StatSymbols initStatSymbols;
-#endif // __linux__
-
-// jit_exit - Used to intercept the "exit" library call.
-static void jit_exit(int Status) {
- runAtExitHandlers(); // Run atexit handlers...
- exit(Status);
-}
-
-// jit_atexit - Used to intercept the "atexit" library call.
-static int jit_atexit(void (*Fn)()) {
- AtExitHandlers.push_back(Fn); // Take note of atexit handler...
- return 0; // Always successful
-}
-
-static int jit_noop() {
- return 0;
-}
-
-//===----------------------------------------------------------------------===//
-//
-/// getPointerToNamedFunction - This method returns the address of the specified
-/// function by using the dynamic loader interface. As such it is only useful
-/// for resolving library symbols, not code generated symbols.
-///
-void *MCJIT::getPointerToNamedFunction(const std::string &Name,
- bool AbortOnFailure) {
- if (!isSymbolSearchingDisabled()) {
- // Check to see if this is one of the functions we want to intercept. Note,
- // we cast to intptr_t here to silence a -pedantic warning that complains
- // about casting a function pointer to a normal pointer.
- if (Name == "exit") return (void*)(intptr_t)&jit_exit;
- if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
-
- // We should not invoke parent's ctors/dtors from generated main()!
- // On Mingw and Cygwin, the symbol __main is resolved to
- // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
- // (and register wrong callee's dtors with atexit(3)).
- // We expect ExecutionEngine::runStaticConstructorsDestructors()
- // is called before ExecutionEngine::runFunctionAsMain() is called.
- if (Name == "__main") return (void*)(intptr_t)&jit_noop;
-
- const char *NameStr = Name.c_str();
- // If this is an asm specifier, skip the sentinal.
- if (NameStr[0] == 1) ++NameStr;
-
- // If it's an external function, look it up in the process image...
- void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
- if (Ptr) return Ptr;
-
- // If it wasn't found and if it starts with an underscore ('_') character,
- // and has an asm specifier, try again without the underscore.
- if (Name[0] == 1 && NameStr[0] == '_') {
- Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
- if (Ptr) return Ptr;
- }
-
- // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These
- // are references to hidden visibility symbols that dlsym cannot resolve.
- // If we have one of these, strip off $LDBLStub and try again.
-#if defined(__APPLE__) && defined(__ppc__)
- if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
- memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
- // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
- // This mirrors logic in libSystemStubs.a.
- std::string Prefix = std::string(Name.begin(), Name.end()-9);
- if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
- return Ptr;
- if (void *Ptr = getPointerToNamedFunction(Prefix, false))
- return Ptr;
- }
-#endif
- }
-
- /// If a LazyFunctionCreator is installed, use it to get/create the function.
- if (LazyFunctionCreator)
- if (void *RP = LazyFunctionCreator(Name))
- return RP;
-
- if (AbortOnFailure) {
- report_fatal_error("Program used external function '"+Name+
- "' which could not be resolved!");
- }
- return 0;
-}
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 5f93a8d..cbb23d3 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -215,3 +215,23 @@ GenericValue MCJIT::runFunction(Function *F,
llvm_unreachable("Full-featured argument passing not supported yet!");
}
+
+void *MCJIT::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure){
+ if (!isSymbolSearchingDisabled()) {
+ void *ptr = MemMgr->getPointerToNamedFunction(Name, false);
+ if (ptr)
+ return ptr;
+ }
+
+ /// If a LazyFunctionCreator is installed, use it to get/create the function.
+ if (LazyFunctionCreator)
+ if (void *RP = LazyFunctionCreator(Name))
+ return RP;
+
+ if (AbortOnFailure) {
+ report_fatal_error("Program used external function '"+Name+
+ "' which could not be resolved!");
+ }
+ return 0;
+}
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 7f4ae77..2b3df98 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -67,6 +67,7 @@ public:
///
virtual void *getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure = true);
+
/// mapSectionAddress - map a section to its target address space value.
/// Map the address of a JIT section as returned from the memory manager
/// to the address in the target process as the running code will see it.
diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
index ac8c155..dac8b26 100644
--- a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
+++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
@@ -33,46 +33,17 @@ public:
uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID) {
- return JMM->allocateDataSection(Size, Alignment, SectionID);
+ return JMM->allocateSpace(Size, Alignment);
}
uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID) {
- return JMM->allocateCodeSection(Size, Alignment, SectionID);
+ return JMM->allocateSpace(Size, Alignment);
}
- // Allocate ActualSize bytes, or more, for the named function. Return
- // a pointer to the allocated memory and update Size to reflect how much
- // memory was acutally allocated.
- uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) {
- // FIXME: This should really reference the MCAsmInfo to get the global
- // prefix.
- if (Name[0] == '_') ++Name;
- Function *F = M->getFunction(Name);
- // Some ObjC names have a prefixed \01 in the IR. If we failed to find
- // the symbol and it's of the ObjC conventions (starts with "-" or
- // "+"), try prepending a \01 and see if we can find it that way.
- if (!F && (Name[0] == '-' || Name[0] == '+'))
- F = M->getFunction((Twine("\1") + Name).str());
- assert(F && "No matching function in JIT IR Module!");
- return JMM->startFunctionBody(F, Size);
- }
-
- // Mark the end of the function, including how much of the allocated
- // memory was actually used.
- void endFunctionBody(const char *Name, uint8_t *FunctionStart,
- uint8_t *FunctionEnd) {
- // FIXME: This should really reference the MCAsmInfo to get the global
- // prefix.
- if (Name[0] == '_') ++Name;
- Function *F = M->getFunction(Name);
- // Some ObjC names have a prefixed \01 in the IR. If we failed to find
- // the symbol and it's of the ObjC conventions (starts with "-" or
- // "+"), try prepending a \01 and see if we can find it that way.
- if (!F && (Name[0] == '-' || Name[0] == '+'))
- F = M->getFunction((Twine("\1") + Name).str());
- assert(F && "No matching function in JIT IR Module!");
- JMM->endFunctionBody(F, FunctionStart, FunctionEnd);
+ virtual void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true) {
+ return JMM->getPointerToNamedFunction(Name, AbortOnFailure);
}
};
diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile
index 9a649a5..c26e0ad 100644
--- a/lib/ExecutionEngine/Makefile
+++ b/lib/ExecutionEngine/Makefile
@@ -8,6 +8,17 @@
##===----------------------------------------------------------------------===##
LEVEL = ../..
LIBRARYNAME = LLVMExecutionEngine
+
+include $(LEVEL)/Makefile.config
+
PARALLEL_DIRS = Interpreter JIT MCJIT RuntimeDyld
-include $(LEVEL)/Makefile.common
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+PARALLEL_DIRS += IntelJITEvents
+endif
+
+ifeq ($(USE_OPROFILE), 1)
+PARALLEL_DIRS += OProfileJIT
+endif
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt b/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt
new file mode 100644
index 0000000..d585136
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt
@@ -0,0 +1,7 @@
+
+include_directories( ${LLVM_OPROFILE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMOProfileJIT
+ OProfileJITEventListener.cpp
+ OProfileWrapper.cpp
+ )
diff --git a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
new file mode 100644
index 0000000..4516dfa
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt ----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+
+[component_0]
+type = Library
+name = OProfileJIT
+parent = ExecutionEngine
diff --git a/lib/ExecutionEngine/OProfileJIT/Makefile b/lib/ExecutionEngine/OProfileJIT/Makefile
new file mode 100644
index 0000000..fd3adce
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/Makefile
@@ -0,0 +1,18 @@
+##===- lib/ExecutionEngine/OProfileJIT/Makefile ------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMOProfileJIT
+
+include $(LEVEL)/Makefile.config
+
+SOURCES += OProfileJITEventListener.cpp \
+ OProfileWrapper.cpp
+CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 9a9ed6d..e6142e3 100644
--- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -7,51 +7,55 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines a JITEventListener object that calls into OProfile to tell
-// it about JITted functions. For now, we only record function names and sizes,
-// but eventually we'll also record line number information.
-//
-// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the
-// definition of the interface we're using.
+// This file defines a JITEventListener object that uses OProfileWrapper to tell
+// oprofile about JITted functions, including source line information.
//
//===----------------------------------------------------------------------===//
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+
#define DEBUG_TYPE "oprofile-jit-event-listener"
#include "llvm/Function.h"
-#include "llvm/Metadata.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/OProfileWrapper.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Errno.h"
-#include "llvm/Config/config.h"
-#include <stddef.h>
-using namespace llvm;
+#include "EventListenerCommon.h"
-#if USE_OPROFILE
+#include <dirent.h>
+#include <fcntl.h>
-#include <opagent.h>
+using namespace llvm;
+using namespace llvm::jitprofiling;
namespace {
class OProfileJITEventListener : public JITEventListener {
- op_agent_t Agent;
+ OProfileWrapper& Wrapper;
+
+ void initialize();
+
public:
- OProfileJITEventListener();
+ OProfileJITEventListener(OProfileWrapper& LibraryWrapper)
+ : Wrapper(LibraryWrapper) {
+ initialize();
+ }
+
~OProfileJITEventListener();
virtual void NotifyFunctionEmitted(const Function &F,
- void *FnStart, size_t FnSize,
- const EmittedFunctionDetails &Details);
+ void *FnStart, size_t FnSize,
+ const JITEvent_EmittedFunctionDetails &Details);
+
virtual void NotifyFreeingMachineCode(void *OldPtr);
};
-OProfileJITEventListener::OProfileJITEventListener()
- : Agent(op_open_agent()) {
- if (Agent == NULL) {
+void OProfileJITEventListener::initialize() {
+ if (!Wrapper.op_open_agent()) {
const std::string err_str = sys::StrError();
DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n");
} else {
@@ -60,8 +64,8 @@ OProfileJITEventListener::OProfileJITEventListener()
}
OProfileJITEventListener::~OProfileJITEventListener() {
- if (Agent != NULL) {
- if (op_close_agent(Agent) == -1) {
+ if (Wrapper.isAgentAvailable()) {
+ if (Wrapper.op_close_agent() == -1) {
const std::string err_str = sys::StrError();
DEBUG(dbgs() << "Failed to disconnect from OProfile agent: "
<< err_str << "\n");
@@ -71,22 +75,6 @@ OProfileJITEventListener::~OProfileJITEventListener() {
}
}
-class FilenameCache {
- // Holds the filename of each Scope, so that we can pass a null-terminated
- // string into oprofile. Use an AssertingVH rather than a ValueMap because we
- // shouldn't be modifying any MDNodes while this map is alive.
- DenseMap<AssertingVH<MDNode>, std::string> Filenames;
-
- public:
- const char *getFilename(MDNode *Scope) {
- std::string &Filename = Filenames[Scope];
- if (Filename.empty()) {
- Filename = DIScope(Scope).getFilename();
- }
- return Filename.c_str();
- }
-};
-
static debug_line_info LineStartToOProfileFormat(
const MachineFunction &MF, FilenameCache &Filenames,
uintptr_t Address, DebugLoc Loc) {
@@ -103,9 +91,9 @@ static debug_line_info LineStartToOProfileFormat(
// Adds the just-emitted function to the symbol table.
void OProfileJITEventListener::NotifyFunctionEmitted(
const Function &F, void *FnStart, size_t FnSize,
- const EmittedFunctionDetails &Details) {
+ const JITEvent_EmittedFunctionDetails &Details) {
assert(F.hasName() && FnStart != 0 && "Bad symbol to add");
- if (op_write_native_code(Agent, F.getName().data(),
+ if (Wrapper.op_write_native_code(F.getName().data(),
reinterpret_cast<uint64_t>(FnStart),
FnStart, FnSize) == -1) {
DEBUG(dbgs() << "Failed to tell OProfile about native function "
@@ -151,8 +139,8 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
// line info's address to include the start of the function.
LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart);
- if (op_write_debug_line_info(Agent, FnStart,
- LineInfo.size(), &*LineInfo.begin()) == -1) {
+ if (Wrapper.op_write_debug_line_info(FnStart, LineInfo.size(),
+ &*LineInfo.begin()) == -1) {
DEBUG(dbgs()
<< "Failed to tell OProfile about line numbers for native function "
<< F.getName() << " at ["
@@ -164,7 +152,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
// Removes the being-deleted function from the symbol table.
void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
assert(FnStart && "Invalid function pointer");
- if (op_unload_native_code(Agent, reinterpret_cast<uint64_t>(FnStart)) == -1) {
+ if (Wrapper.op_unload_native_code(reinterpret_cast<uint64_t>(FnStart)) == -1) {
DEBUG(dbgs()
<< "Failed to tell OProfile about unload of native function at "
<< FnStart << "\n");
@@ -174,19 +162,16 @@ void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
} // anonymous namespace.
namespace llvm {
-JITEventListener *createOProfileJITEventListener() {
- return new OProfileJITEventListener;
-}
+JITEventListener *JITEventListener::createOProfileJITEventListener() {
+ static OwningPtr<OProfileWrapper> JITProfilingWrapper(new OProfileWrapper);
+ return new OProfileJITEventListener(*JITProfilingWrapper);
}
-#else // USE_OPROFILE
-
-namespace llvm {
-// By defining this to return NULL, we can let clients call it unconditionally,
-// even if they haven't configured with the OProfile libraries.
-JITEventListener *createOProfileJITEventListener() {
- return NULL;
+// for testing
+JITEventListener *JITEventListener::createOProfileJITEventListener(
+ OProfileWrapper* TestImpl) {
+ return new OProfileJITEventListener(*TestImpl);
}
-} // namespace llvm
-#endif // USE_OPROFILE
+} // namespace llvm
+
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
new file mode 100644
index 0000000..d67f537
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
@@ -0,0 +1,263 @@
+//===-- OProfileWrapper.cpp - OProfile JIT API Wrapper implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface in OProfileWrapper.h. It is responsible
+// for loading the opagent dynamic library when the first call to an op_
+// function occurs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/OProfileWrapper.h"
+
+#define DEBUG_TYPE "oprofile-wrapper"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/ADT/SmallString.h"
+
+#include <sstream>
+#include <cstring>
+#include <stddef.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+namespace {
+
+// Global mutex to ensure a single thread initializes oprofile agent.
+llvm::sys::Mutex OProfileInitializationMutex;
+
+} // anonymous namespace
+
+namespace llvm {
+
+OProfileWrapper::OProfileWrapper()
+: Agent(0),
+ OpenAgentFunc(0),
+ CloseAgentFunc(0),
+ WriteNativeCodeFunc(0),
+ WriteDebugLineInfoFunc(0),
+ UnloadNativeCodeFunc(0),
+ MajorVersionFunc(0),
+ MinorVersionFunc(0),
+ IsOProfileRunningFunc(0),
+ Initialized(false) {
+}
+
+bool OProfileWrapper::initialize() {
+ using namespace llvm;
+ using namespace llvm::sys;
+
+ MutexGuard Guard(OProfileInitializationMutex);
+
+ if (Initialized)
+ return OpenAgentFunc != 0;
+
+ Initialized = true;
+
+ // If the oprofile daemon is not running, don't load the opagent library
+ if (!isOProfileRunning()) {
+ DEBUG(dbgs() << "OProfile daemon is not detected.\n");
+ return false;
+ }
+
+ std::string error;
+ if(!DynamicLibrary::LoadLibraryPermanently("libopagent.so", &error)) {
+ DEBUG(dbgs()
+ << "OProfile connector library libopagent.so could not be loaded: "
+ << error << "\n");
+ }
+
+ // Get the addresses of the opagent functions
+ OpenAgentFunc = (op_open_agent_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_open_agent");
+ CloseAgentFunc = (op_close_agent_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_close_agent");
+ WriteNativeCodeFunc = (op_write_native_code_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_write_native_code");
+ WriteDebugLineInfoFunc = (op_write_debug_line_info_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_write_debug_line_info");
+ UnloadNativeCodeFunc = (op_unload_native_code_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_unload_native_code");
+ MajorVersionFunc = (op_major_version_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_major_version");
+ MinorVersionFunc = (op_major_version_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_minor_version");
+
+ // With missing functions, we can do nothing
+ if (!OpenAgentFunc
+ || !CloseAgentFunc
+ || !WriteNativeCodeFunc
+ || !WriteDebugLineInfoFunc
+ || !UnloadNativeCodeFunc) {
+ OpenAgentFunc = 0;
+ CloseAgentFunc = 0;
+ WriteNativeCodeFunc = 0;
+ WriteDebugLineInfoFunc = 0;
+ UnloadNativeCodeFunc = 0;
+ return false;
+ }
+
+ return true;
+}
+
+bool OProfileWrapper::isOProfileRunning() {
+ if (IsOProfileRunningFunc != 0)
+ return IsOProfileRunningFunc();
+ return checkForOProfileProcEntry();
+}
+
+bool OProfileWrapper::checkForOProfileProcEntry() {
+ DIR* ProcDir;
+
+ ProcDir = opendir("/proc");
+ if (!ProcDir)
+ return false;
+
+ // Walk the /proc tree looking for the oprofile daemon
+ struct dirent* Entry;
+ while (0 != (Entry = readdir(ProcDir))) {
+ if (Entry->d_type == DT_DIR) {
+ // Build a path from the current entry name
+ SmallString<256> CmdLineFName;
+ raw_svector_ostream(CmdLineFName) << "/proc/" << Entry->d_name
+ << "/cmdline";
+
+ // Open the cmdline file
+ int CmdLineFD = open(CmdLineFName.c_str(), S_IRUSR);
+ if (CmdLineFD != -1) {
+ char ExeName[PATH_MAX+1];
+ char* BaseName = 0;
+
+ // Read the cmdline file
+ ssize_t NumRead = read(CmdLineFD, ExeName, PATH_MAX+1);
+ close(CmdLineFD);
+ ssize_t Idx = 0;
+
+ // Find the terminator for the first string
+ while (Idx < NumRead-1 && ExeName[Idx] != 0) {
+ Idx++;
+ }
+
+ // Go back to the last non-null character
+ Idx--;
+
+ // Find the last path separator in the first string
+ while (Idx > 0) {
+ if (ExeName[Idx] == '/') {
+ BaseName = ExeName + Idx + 1;
+ break;
+ }
+ Idx--;
+ }
+
+ // Test this to see if it is the oprofile daemon
+ if (BaseName != 0 && !strcmp("oprofiled", BaseName)) {
+ // If it is, we're done
+ closedir(ProcDir);
+ return true;
+ }
+ }
+ }
+ }
+
+ // We've looked through all the files and didn't find the daemon
+ closedir(ProcDir);
+ return false;
+}
+
+bool OProfileWrapper::op_open_agent() {
+ if (!Initialized)
+ initialize();
+
+ if (OpenAgentFunc != 0) {
+ Agent = OpenAgentFunc();
+ return Agent != 0;
+ }
+
+ return false;
+}
+
+int OProfileWrapper::op_close_agent() {
+ if (!Initialized)
+ initialize();
+
+ int ret = -1;
+ if (Agent && CloseAgentFunc) {
+ ret = CloseAgentFunc(Agent);
+ if (ret == 0) {
+ Agent = 0;
+ }
+ }
+ return ret;
+}
+
+bool OProfileWrapper::isAgentAvailable() {
+ return Agent != 0;
+}
+
+int OProfileWrapper::op_write_native_code(const char* Name,
+ uint64_t Addr,
+ void const* Code,
+ const unsigned int Size) {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && WriteNativeCodeFunc)
+ return WriteNativeCodeFunc(Agent, Name, Addr, Code, Size);
+
+ return -1;
+}
+
+int OProfileWrapper::op_write_debug_line_info(
+ void const* Code,
+ size_t NumEntries,
+ struct debug_line_info const* Info) {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && WriteDebugLineInfoFunc)
+ return WriteDebugLineInfoFunc(Agent, Code, NumEntries, Info);
+
+ return -1;
+}
+
+int OProfileWrapper::op_major_version() {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && MajorVersionFunc)
+ return MajorVersionFunc();
+
+ return -1;
+}
+
+int OProfileWrapper::op_minor_version() {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && MinorVersionFunc)
+ return MinorVersionFunc();
+
+ return -1;
+}
+
+int OProfileWrapper::op_unload_native_code(uint64_t Addr) {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && UnloadNativeCodeFunc)
+ return UnloadNativeCodeFunc(Agent, Addr);
+
+ return -1;
+}
+
+} // namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 2896c2d..ff4a2c8 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -26,45 +26,290 @@ RuntimeDyldImpl::~RuntimeDyldImpl() {}
namespace llvm {
-void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress,
- uint8_t *EndAddress) {
- // FIXME: DEPRECATED in favor of by-section allocation.
- // Allocate memory for the function via the memory manager.
- uintptr_t Size = EndAddress - StartAddress + 1;
- uintptr_t AllocSize = Size;
- uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), AllocSize);
- assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) &&
- "Memory manager failed to allocate enough memory!");
- // Copy the function payload into the memory block.
- memcpy(Mem, StartAddress, Size);
- MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size);
- // Remember where we put it.
- unsigned SectionID = Sections.size();
- Sections.push_back(sys::MemoryBlock(Mem, Size));
- // Default the assigned address for this symbol to wherever this
- // allocated it.
- SymbolTable[Name] = SymbolLoc(SectionID, 0);
- DEBUG(dbgs() << " allocated to [" << Mem << ", " << Mem + Size << "]\n");
-}
// Resolve the relocations for all symbols we currently know about.
void RuntimeDyldImpl::resolveRelocations() {
+ // First, resolve relocations assotiated with external symbols.
+ resolveSymbols();
+
// Just iterate over the sections we have and resolve all the relocations
// in them. Gross overkill, but it gets the job done.
for (int i = 0, e = Sections.size(); i != e; ++i) {
- reassignSectionAddress(i, SectionLoadAddress[i]);
+ reassignSectionAddress(i, Sections[i].LoadAddress);
}
}
void RuntimeDyldImpl::mapSectionAddress(void *LocalAddress,
uint64_t TargetAddress) {
- assert(SectionLocalMemToID.count(LocalAddress) &&
- "Attempting to remap address of unknown section!");
- unsigned SectionID = SectionLocalMemToID[LocalAddress];
- reassignSectionAddress(SectionID, TargetAddress);
+ for (unsigned i = 0, e = Sections.size(); i != e; ++i) {
+ if (Sections[i].Address == LocalAddress) {
+ reassignSectionAddress(i, TargetAddress);
+ return;
+ }
+ }
+ llvm_unreachable("Attempting to remap address of unknown section!");
+}
+
+bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) {
+ // FIXME: ObjectFile don't modify MemoryBuffer.
+ // It should use const MemoryBuffer as parameter.
+ ObjectFile *obj = ObjectFile::
+ createObjectFile(const_cast<MemoryBuffer*>(InputBuffer));
+
+ Arch = (Triple::ArchType)obj->getArch();
+
+ LocalSymbolMap LocalSymbols; // Functions and data symbols from the
+ // object file.
+ ObjSectionToIDMap LocalSections; // Used sections from the object file
+
+ error_code err;
+
+
+ // Parse symbols
+ DEBUG(dbgs() << "Parse symbols:\n");
+ for (symbol_iterator it = obj->begin_symbols(), itEnd = obj->end_symbols();
+ it != itEnd; it.increment(err)) {
+ if (err) break;
+ object::SymbolRef::Type SymType;
+ StringRef Name;
+ if ((bool)(err = it->getType(SymType))) break;
+ if ((bool)(err = it->getName(Name))) break;
+
+ if (SymType == object::SymbolRef::ST_Function ||
+ SymType == object::SymbolRef::ST_Data) {
+ uint64_t FileOffset;
+ uint32_t flags;
+ StringRef sData;
+ section_iterator sIt = obj->end_sections();
+ if ((bool)(err = it->getFileOffset(FileOffset))) break;
+ if ((bool)(err = it->getFlags(flags))) break;
+ if ((bool)(err = it->getSection(sIt))) break;
+ if (sIt == obj->end_sections()) continue;
+ if ((bool)(err = sIt->getContents(sData))) break;
+ const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() +
+ (uintptr_t)FileOffset;
+ uintptr_t SectOffset = (uintptr_t)(SymPtr - (const uint8_t*)sData.begin());
+ unsigned SectionID =
+ findOrEmitSection(*sIt,
+ SymType == object::SymbolRef::ST_Function,
+ LocalSections);
+ bool isGlobal = flags & SymbolRef::SF_Global;
+ LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset);
+ DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset)
+ << " flags: " << flags
+ << " SID: " << SectionID
+ << " Offset: " << format("%p", SectOffset));
+ if (isGlobal)
+ SymbolTable[Name] = SymbolLoc(SectionID, SectOffset);
+ }
+ DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n");
+ }
+ if (err) {
+ report_fatal_error(err.message());
+ }
+
+ // Parse and proccess relocations
+ DEBUG(dbgs() << "Parse relocations:\n");
+ for (section_iterator sIt = obj->begin_sections(),
+ sItEnd = obj->end_sections(); sIt != sItEnd; sIt.increment(err)) {
+ if (err) break;
+ bool isFirstRelocation = true;
+ unsigned SectionID = 0;
+ StubMap Stubs;
+
+ for (relocation_iterator it = sIt->begin_relocations(),
+ itEnd = sIt->end_relocations(); it != itEnd; it.increment(err)) {
+ if (err) break;
+
+ // If it's first relocation in this section, find its SectionID
+ if (isFirstRelocation) {
+ SectionID = findOrEmitSection(*sIt, true, LocalSections);
+ DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n");
+ isFirstRelocation = false;
+ }
+
+ ObjRelocationInfo RI;
+ RI.SectionID = SectionID;
+ if ((bool)(err = it->getAdditionalInfo(RI.AdditionalInfo))) break;
+ if ((bool)(err = it->getOffset(RI.Offset))) break;
+ if ((bool)(err = it->getSymbol(RI.Symbol))) break;
+ if ((bool)(err = it->getType(RI.Type))) break;
+
+ DEBUG(dbgs() << "\t\tAddend: " << RI.AdditionalInfo
+ << " Offset: " << format("%p", (uintptr_t)RI.Offset)
+ << " Type: " << (uint32_t)(RI.Type & 0xffffffffL)
+ << "\n");
+ processRelocationRef(RI, *obj, LocalSections, LocalSymbols, Stubs);
+ }
+ if (err) {
+ report_fatal_error(err.message());
+ }
+ }
+ return false;
+}
+
+unsigned RuntimeDyldImpl::emitSection(const SectionRef &Section,
+ bool IsCode) {
+
+ unsigned StubBufSize = 0,
+ StubSize = getMaxStubSize();
+ error_code err;
+ if (StubSize > 0) {
+ for (relocation_iterator it = Section.begin_relocations(),
+ itEnd = Section.end_relocations(); it != itEnd; it.increment(err))
+ StubBufSize += StubSize;
+ }
+ StringRef data;
+ uint64_t Alignment64;
+ if ((bool)(err = Section.getContents(data))) report_fatal_error(err.message());
+ if ((bool)(err = Section.getAlignment(Alignment64)))
+ report_fatal_error(err.message());
+
+ unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
+ unsigned DataSize = data.size();
+ unsigned Allocate = DataSize + StubBufSize;
+ unsigned SectionID = Sections.size();
+ const char *pData = data.data();
+ uint8_t *Addr = IsCode
+ ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID)
+ : MemMgr->allocateDataSection(Allocate, Alignment, SectionID);
+
+ memcpy(Addr, pData, DataSize);
+ DEBUG(dbgs() << "emitSection SectionID: " << SectionID
+ << " obj addr: " << format("%p", pData)
+ << " new addr: " << format("%p", Addr)
+ << " DataSize: " << DataSize
+ << " StubBufSize: " << StubBufSize
+ << " Allocate: " << Allocate
+ << "\n");
+ Sections.push_back(SectionEntry(Addr, Allocate, DataSize,(uintptr_t)pData));
+ return SectionID;
+}
+
+unsigned RuntimeDyldImpl::
+findOrEmitSection(const SectionRef &Section, bool IsCode,
+ ObjSectionToIDMap &LocalSections) {
+
+ unsigned SectionID = 0;
+ ObjSectionToIDMap::iterator sIDIt = LocalSections.find(Section);
+ if (sIDIt != LocalSections.end())
+ SectionID = sIDIt->second;
+ else {
+ SectionID = emitSection(Section, IsCode);
+ LocalSections[Section] = SectionID;
+ }
+ return SectionID;
+}
+
+void RuntimeDyldImpl::AddRelocation(const RelocationValueRef &Value,
+ unsigned SectionID, uintptr_t Offset,
+ uint32_t RelType) {
+ DEBUG(dbgs() << "AddRelocation SymNamePtr: " << format("%p", Value.SymbolName)
+ << " SID: " << Value.SectionID
+ << " Addend: " << format("%p", Value.Addend)
+ << " Offset: " << format("%p", Offset)
+ << " RelType: " << format("%x", RelType)
+ << "\n");
+
+ if (Value.SymbolName == 0) {
+ Relocations[Value.SectionID].push_back(RelocationEntry(
+ SectionID,
+ Offset,
+ RelType,
+ Value.Addend));
+ } else
+ SymbolRelocations[Value.SymbolName].push_back(RelocationEntry(
+ SectionID,
+ Offset,
+ RelType,
+ Value.Addend));
+}
+
+uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
+ // TODO: There is only ARM far stub now. We should add the Thumb stub,
+ // and stubs for branches Thumb - ARM and ARM - Thumb.
+ if (Arch == Triple::arm) {
+ uint32_t *StubAddr = (uint32_t*)Addr;
+ *StubAddr = 0xe51ff004; // ldr pc,<label>
+ return (uint8_t*)++StubAddr;
+ }
+ else
+ return Addr;
}
+// Assign an address to a symbol name and resolve all the relocations
+// associated with it.
+void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
+ uint64_t Addr) {
+ // The address to use for relocation resolution is not
+ // the address of the local section buffer. We must be doing
+ // a remote execution environment of some sort. Re-apply any
+ // relocations referencing this section with the given address.
+ //
+ // Addr is a uint64_t because we can't assume the pointer width
+ // of the target is the same as that of the host. Just use a generic
+ // "big enough" type.
+ Sections[SectionID].LoadAddress = Addr;
+ DEBUG(dbgs() << "Resolving relocations Section #" << SectionID
+ << "\t" << format("%p", (uint8_t *)Addr)
+ << "\n");
+ resolveRelocationList(Relocations[SectionID], Addr);
+}
+
+void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE,
+ uint64_t Value) {
+ uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset;
+ DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
+ << " + " << RE.Offset << " (" << format("%p", Target) << ")"
+ << " Data: " << RE.Data
+ << " Addend: " << RE.Addend
+ << "\n");
+
+ resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset,
+ Value, RE.Data, RE.Addend);
+}
+
+void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
+ uint64_t Value) {
+ for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+ resolveRelocationEntry(Relocs[i], Value);
+ }
+}
+
+// resolveSymbols - Resolve any relocations to the specified symbols if
+// we know where it lives.
+void RuntimeDyldImpl::resolveSymbols() {
+ StringMap<RelocationList>::iterator it = SymbolRelocations.begin(),
+ itEnd = SymbolRelocations.end();
+ for (; it != itEnd; it++) {
+ StringRef Name = it->first();
+ RelocationList &Relocs = it->second;
+ StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(Name);
+ if (Loc == SymbolTable.end()) {
+ // This is an external symbol, try to get it address from
+ // MemoryManager.
+ uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(),
+ true);
+ DEBUG(dbgs() << "Resolving relocations Name: " << Name
+ << "\t" << format("%p", Addr)
+ << "\n");
+ resolveRelocationList(Relocs, (uintptr_t)Addr);
+ } else {
+ // Change the relocation to be section relative rather than symbol
+ // relative and move it to the resolved relocation list.
+ DEBUG(dbgs() << "Resolving symbol '" << Name << "'\n");
+ for (int i = 0, e = Relocs.size(); i != e; ++i) {
+ RelocationEntry Entry = Relocs[i];
+ Entry.Addend += Loc->second.second;
+ Relocations[Loc->second.first].push_back(Entry);
+ }
+ Relocs.clear();
+ }
+ }
+}
+
+
//===----------------------------------------------------------------------===//
// RuntimeDyld class implementation
RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index e15b200..9351b6c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -25,222 +25,58 @@ using namespace llvm::object;
namespace llvm {
-namespace {
-// FIXME: this function should probably not live here...
-//
-// Returns the name and address of an unrelocated symbol in an ELF section
-void getSymbolInfo(symbol_iterator Sym, uint64_t &Addr, StringRef &Name) {
- //FIXME: error checking here required to catch corrupt ELF objects...
- error_code Err = Sym->getName(Name);
-
- uint64_t AddrInSection;
- Err = Sym->getAddress(AddrInSection);
-
- SectionRef empty_section;
- section_iterator Section(empty_section);
- Err = Sym->getSection(Section);
-
- StringRef SectionContents;
- Section->getContents(SectionContents);
-
- Addr = reinterpret_cast<uint64_t>(SectionContents.data()) + AddrInSection;
-}
-
-}
-
-bool RuntimeDyldELF::loadObject(MemoryBuffer *InputBuffer) {
- if (!isCompatibleFormat(InputBuffer))
- return true;
-
- OwningPtr<ObjectFile> Obj(ObjectFile::createELFObjectFile(InputBuffer));
-
- Arch = Obj->getArch();
-
- // Map address in the Object file image to function names
- IntervalMap<uint64_t, StringRef>::Allocator A;
- IntervalMap<uint64_t, StringRef> FuncMap(A);
-
- // This is a bit of a hack. The ObjectFile we've just loaded reports
- // section addresses as 0 and doesn't provide access to the section
- // offset (from which we could calculate the address. Instead,
- // we're storing the address when it comes up in the ST_Debug case
- // below.
- //
- StringMap<uint64_t> DebugSymbolMap;
-
- symbol_iterator SymEnd = Obj->end_symbols();
- error_code Err;
- for (symbol_iterator Sym = Obj->begin_symbols();
- Sym != SymEnd; Sym.increment(Err)) {
- SymbolRef::Type Type;
- Sym->getType(Type);
- if (Type == SymbolRef::ST_Function) {
- StringRef Name;
- uint64_t Addr;
- getSymbolInfo(Sym, Addr, Name);
-
- uint64_t Size;
- Err = Sym->getSize(Size);
-
- uint8_t *Start;
- uint8_t *End;
- Start = reinterpret_cast<uint8_t*>(Addr);
- End = reinterpret_cast<uint8_t*>(Addr + Size - 1);
-
- extractFunction(Name, Start, End);
- FuncMap.insert(Addr, Addr + Size - 1, Name);
- } else if (Type == SymbolRef::ST_Debug) {
- // This case helps us find section addresses
- StringRef Name;
- uint64_t Addr;
- getSymbolInfo(Sym, Addr, Name);
- DebugSymbolMap[Name] = Addr;
- }
- }
-
- // Iterate through the relocations for this object
- section_iterator SecEnd = Obj->end_sections();
- for (section_iterator Sec = Obj->begin_sections();
- Sec != SecEnd; Sec.increment(Err)) {
- StringRef SecName;
- uint64_t SecAddr;
- Sec->getName(SecName);
- // Ignore sections that aren't in our map
- if (DebugSymbolMap.find(SecName) == DebugSymbolMap.end()) {
- continue;
- }
- SecAddr = DebugSymbolMap[SecName];
- relocation_iterator RelEnd = Sec->end_relocations();
- for (relocation_iterator Rel = Sec->begin_relocations();
- Rel != RelEnd; Rel.increment(Err)) {
- uint64_t RelOffset;
- uint64_t RelType;
- int64_t RelAddend;
- SymbolRef RelSym;
- StringRef SymName;
- uint64_t SymAddr;
- uint64_t SymOffset;
-
- Rel->getAddress(RelOffset);
- Rel->getType(RelType);
- Rel->getAdditionalInfo(RelAddend);
- Rel->getSymbol(RelSym);
- RelSym.getName(SymName);
- RelSym.getAddress(SymAddr);
- RelSym.getFileOffset(SymOffset);
-
- // If this relocation is inside a function, we want to store the
- // function name and a function-relative offset
- IntervalMap<uint64_t, StringRef>::iterator ContainingFunc
- = FuncMap.find(SecAddr + RelOffset);
- if (ContainingFunc.valid()) {
- // Re-base the relocation to make it relative to the target function
- RelOffset = (SecAddr + RelOffset) - ContainingFunc.start();
- Relocations[SymName].push_back(RelocationEntry(ContainingFunc.value(),
- RelOffset,
- RelType,
- RelAddend,
- true));
- } else {
- Relocations[SymName].push_back(RelocationEntry(SecName,
- RelOffset,
- RelType,
- RelAddend,
- false));
- }
- }
- }
- return false;
-}
-
-void RuntimeDyldELF::resolveRelocations() {
- // FIXME: deprecated. should be changed to use the by-section
- // allocation and relocation scheme.
-
- // Just iterate over the symbols in our symbol table and assign their
- // addresses.
- StringMap<SymbolLoc>::iterator i = SymbolTable.begin();
- StringMap<SymbolLoc>::iterator e = SymbolTable.end();
- for (;i != e; ++i) {
- assert (i->getValue().second == 0 && "non-zero offset in by-function sym!");
- reassignSymbolAddress(i->getKey(),
- (uint8_t*)Sections[i->getValue().first].base());
- }
-}
-
-void RuntimeDyldELF::resolveX86_64Relocation(StringRef Name,
- uint8_t *Addr,
- const RelocationEntry &RE) {
- uint8_t *TargetAddr;
- if (RE.IsFunctionRelative) {
- StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(RE.Target);
- assert(Loc != SymbolTable.end() && "Function for relocation not found");
- TargetAddr =
- reinterpret_cast<uint8_t*>(Sections[Loc->second.first].base()) +
- Loc->second.second + RE.Offset;
- } else {
- // FIXME: Get the address of the target section and add that to RE.Offset
- llvm_unreachable("Non-function relocation not implemented yet!");
- }
-
- switch (RE.Type) {
- default: llvm_unreachable("Relocation type not implemented yet!");
+void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) {
+ switch (Type) {
+ default:
+ llvm_unreachable("Relocation type not implemented yet!");
+ break;
case ELF::R_X86_64_64: {
- uint8_t **Target = reinterpret_cast<uint8_t**>(TargetAddr);
- *Target = Addr + RE.Addend;
+ uint64_t *Target = (uint64_t*)(LocalAddress);
+ *Target = Value + Addend;
break;
}
case ELF::R_X86_64_32:
case ELF::R_X86_64_32S: {
- uint64_t Value = reinterpret_cast<uint64_t>(Addr) + RE.Addend;
+ Value += Addend;
// FIXME: Handle the possibility of this assertion failing
- assert((RE.Type == ELF::R_X86_64_32 && !(Value & 0xFFFFFFFF00000000ULL)) ||
- (RE.Type == ELF::R_X86_64_32S &&
+ assert((Type == ELF::R_X86_64_32 && !(Value & 0xFFFFFFFF00000000ULL)) ||
+ (Type == ELF::R_X86_64_32S &&
(Value & 0xFFFFFFFF00000000ULL) == 0xFFFFFFFF00000000ULL));
uint32_t TruncatedAddr = (Value & 0xFFFFFFFF);
- uint32_t *Target = reinterpret_cast<uint32_t*>(TargetAddr);
+ uint32_t *Target = reinterpret_cast<uint32_t*>(LocalAddress);
*Target = TruncatedAddr;
break;
}
case ELF::R_X86_64_PC32: {
- uint32_t *Placeholder = reinterpret_cast<uint32_t*>(TargetAddr);
- uint64_t RealOffset = *Placeholder +
- reinterpret_cast<uint64_t>(Addr) +
- RE.Addend - reinterpret_cast<uint64_t>(TargetAddr);
- assert((RealOffset & 0xFFFFFFFF) == RealOffset);
- uint32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
+ uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress);
+ int64_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
+ assert(RealOffset <= 214783647 && RealOffset >= -214783648);
+ int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
*Placeholder = TruncOffset;
break;
}
}
}
-void RuntimeDyldELF::resolveX86Relocation(StringRef Name,
- uint8_t *Addr,
- const RelocationEntry &RE) {
- uint8_t *TargetAddr;
- if (RE.IsFunctionRelative) {
- StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(RE.Target);
- assert(Loc != SymbolTable.end() && "Function for relocation not found");
- TargetAddr =
- reinterpret_cast<uint8_t*>(Sections[Loc->second.first].base()) +
- Loc->second.second + RE.Offset;
- } else {
- // FIXME: Get the address of the target section and add that to RE.Offset
- llvm_unreachable("Non-function relocation not implemented yet!");
- }
-
- switch (RE.Type) {
+void RuntimeDyldELF::resolveX86Relocation(uint8_t *LocalAddress,
+ uint32_t FinalAddress,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend) {
+ switch (Type) {
case ELF::R_386_32: {
- uint8_t **Target = reinterpret_cast<uint8_t**>(TargetAddr);
- *Target = Addr + RE.Addend;
+ uint32_t *Target = (uint32_t*)(LocalAddress);
+ *Target = Value + Addend;
break;
}
case ELF::R_386_PC32: {
- uint32_t *Placeholder = reinterpret_cast<uint32_t*>(TargetAddr);
- uint32_t RealOffset = *Placeholder + reinterpret_cast<uintptr_t>(Addr) +
- RE.Addend - reinterpret_cast<uintptr_t>(TargetAddr);
+ uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress);
+ uint32_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
*Placeholder = RealOffset;
break;
}
@@ -248,57 +84,173 @@ void RuntimeDyldELF::resolveX86Relocation(StringRef Name,
// There are other relocation types, but it appears these are the
// only ones currently used by the LLVM ELF object writer
llvm_unreachable("Relocation type not implemented yet!");
+ break;
}
}
-void RuntimeDyldELF::resolveArmRelocation(StringRef Name,
- uint8_t *Addr,
- const RelocationEntry &RE) {
+void RuntimeDyldELF::resolveARMRelocation(uint8_t *LocalAddress,
+ uint32_t FinalAddress,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend) {
+ // TODO: Add Thumb relocations.
+ uint32_t* TargetPtr = (uint32_t*)LocalAddress;
+ Value += Addend;
+
+ DEBUG(dbgs() << "resolveARMRelocation, LocalAddress: " << LocalAddress
+ << " FinalAddress: " << format("%p",FinalAddress)
+ << " Value: " << format("%x",Value)
+ << " Type: " << format("%x",Type)
+ << " Addend: " << format("%x",Addend)
+ << "\n");
+
+ switch(Type) {
+ default:
+ llvm_unreachable("Not implemented relocation type!");
+
+ // Just write 32bit value to relocation address
+ case ELF::R_ARM_ABS32 :
+ *TargetPtr = Value;
+ break;
+
+ // Write first 16 bit of 32 bit value to the mov instruction.
+ // Last 4 bit should be shifted.
+ case ELF::R_ARM_MOVW_ABS_NC :
+ Value = Value & 0xFFFF;
+ *TargetPtr |= Value & 0xFFF;
+ *TargetPtr |= ((Value >> 12) & 0xF) << 16;
+ break;
+
+ // Write last 16 bit of 32 bit value to the mov instruction.
+ // Last 4 bit should be shifted.
+ case ELF::R_ARM_MOVT_ABS :
+ Value = (Value >> 16) & 0xFFFF;
+ *TargetPtr |= Value & 0xFFF;
+ *TargetPtr |= ((Value >> 12) & 0xF) << 16;
+ break;
+
+ // Write 24 bit relative value to the branch instruction.
+ case ELF::R_ARM_PC24 : // Fall through.
+ case ELF::R_ARM_CALL : // Fall through.
+ case ELF::R_ARM_JUMP24 :
+ int32_t RelValue = static_cast<int32_t>(Value - FinalAddress - 8);
+ RelValue = (RelValue & 0x03FFFFFC) >> 2;
+ *TargetPtr &= 0xFF000000;
+ *TargetPtr |= RelValue;
+ break;
+ }
}
-void RuntimeDyldELF::resolveRelocation(StringRef Name,
- uint8_t *Addr,
- const RelocationEntry &RE) {
+void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) {
switch (Arch) {
case Triple::x86_64:
- resolveX86_64Relocation(Name, Addr, RE);
+ resolveX86_64Relocation(LocalAddress, FinalAddress, Value, Type, Addend);
break;
case Triple::x86:
- resolveX86Relocation(Name, Addr, RE);
+ resolveX86Relocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL),
+ (uint32_t)(Value & 0xffffffffL), Type,
+ (uint32_t)(Addend & 0xffffffffL));
break;
- case Triple::arm:
- resolveArmRelocation(Name, Addr, RE);
+ case Triple::arm: // Fall through.
+ case Triple::thumb:
+ resolveARMRelocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL),
+ (uint32_t)(Value & 0xffffffffL), Type,
+ (uint32_t)(Addend & 0xffffffffL));
break;
default: llvm_unreachable("Unsupported CPU type!");
}
}
-void RuntimeDyldELF::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
- // FIXME: deprecated. switch to reassignSectionAddress() instead.
- //
- // Actually moving the symbol address requires by-section mapping.
- assert(Sections[SymbolTable.lookup(Name).first].base() == (void*)Addr &&
- "Unable to relocate section in by-function JIT allocation model!");
-
- RelocationList &Relocs = Relocations[Name];
- for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
- RelocationEntry &RE = Relocs[i];
- resolveRelocation(Name, Addr, RE);
+void RuntimeDyldELF::
+processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ LocalSymbolMap &Symbols, StubMap &Stubs) {
+
+ uint32_t RelType = (uint32_t)(Rel.Type & 0xffffffffL);
+ intptr_t Addend = (intptr_t)Rel.AdditionalInfo;
+ RelocationValueRef Value;
+ StringRef TargetName;
+ const SymbolRef &Symbol = Rel.Symbol;
+ Symbol.getName(TargetName);
+ DEBUG(dbgs() << "\t\tRelType: " << RelType
+ << " Addend: " << Addend
+ << " TargetName: " << TargetName
+ << "\n");
+ // First look the symbol in object file symbols.
+ LocalSymbolMap::iterator it = Symbols.find(TargetName.data());
+ if (it != Symbols.end()) {
+ Value.SectionID = it->second.first;
+ Value.Addend = it->second.second;
+ } else {
+ // Second look the symbol in global symbol table.
+ StringMap<SymbolLoc>::iterator itS = SymbolTable.find(TargetName.data());
+ if (itS != SymbolTable.end()) {
+ Value.SectionID = itS->second.first;
+ Value.Addend = itS->second.second;
+ } else {
+ SymbolRef::Type SymType;
+ Symbol.getType(SymType);
+ switch (SymType) {
+ case SymbolRef::ST_Debug: {
+ // TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously
+ // and can be changed by another developers. Maybe best way is add
+ // a new symbol type ST_Section to SymbolRef and use it.
+ section_iterator sIt = Obj.end_sections();
+ Symbol.getSection(sIt);
+ if (sIt == Obj.end_sections())
+ llvm_unreachable("Symbol section not found, bad object file format!");
+ DEBUG(dbgs() << "\t\tThis is section symbol\n");
+ Value.SectionID = findOrEmitSection((*sIt), true, ObjSectionToID);
+ Value.Addend = Addend;
+ break;
+ }
+ case SymbolRef::ST_Unknown: {
+ Value.SymbolName = TargetName.data();
+ Value.Addend = Addend;
+ break;
+ }
+ default:
+ llvm_unreachable("Unresolved symbol type!");
+ break;
+ }
+ }
}
-}
-
-// Assign an address to a symbol name and resolve all the relocations
-// associated with it.
-void RuntimeDyldELF::reassignSectionAddress(unsigned SectionID, uint64_t Addr) {
- // The address to use for relocation resolution is not
- // the address of the local section buffer. We must be doing
- // a remote execution environment of some sort. Re-apply any
- // relocations referencing this section with the given address.
- //
- // Addr is a uint64_t because we can't assume the pointer width
- // of the target is the same as that of the host. Just use a generic
- // "big enough" type.
- assert(0);
+ DEBUG(dbgs() << "\t\tRel.SectionID: " << Rel.SectionID
+ << " Rel.Offset: " << Rel.Offset
+ << "\n");
+ if (Arch == Triple::arm &&
+ (RelType == ELF::R_ARM_PC24 ||
+ RelType == ELF::R_ARM_CALL ||
+ RelType == ELF::R_ARM_JUMP24)) {
+ // This is an ARM branch relocation, need to use a stub function.
+ DEBUG(dbgs() << "\t\tThis is an ARM branch relocation.");
+ SectionEntry &Section = Sections[Rel.SectionID];
+ uint8_t *Target = Section.Address + Rel.Offset;
+
+ // Look up for existing stub.
+ StubMap::const_iterator stubIt = Stubs.find(Value);
+ if (stubIt != Stubs.end()) {
+ resolveRelocation(Target, Section.LoadAddress, (uint64_t)Section.Address +
+ stubIt->second, RelType, 0);
+ DEBUG(dbgs() << " Stub function found\n");
+ } else {
+ // Create a new stub function.
+ DEBUG(dbgs() << " Create a new stub function\n");
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+ Section.StubOffset);
+ AddRelocation(Value, Rel.SectionID,
+ StubTargetAddr - Section.Address, ELF::R_ARM_ABS32);
+ resolveRelocation(Target, Section.LoadAddress, (uint64_t)Section.Address +
+ Section.StubOffset, RelType, 0);
+ Section.StubOffset += getMaxStubSize();
+ }
+ } else
+ AddRelocation(Value, Rel.SectionID, Rel.Offset, RelType);
}
bool RuntimeDyldELF::isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index e0f7d54..36566da 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -21,158 +21,42 @@ using namespace llvm;
namespace llvm {
class RuntimeDyldELF : public RuntimeDyldImpl {
- // For each symbol, keep a list of relocations based on it. Anytime
- // its address is reassigned (the JIT re-compiled the function, e.g.),
- // the relocations get re-resolved.
- struct RelocationEntry {
- // Function or section this relocation is contained in.
- std::string Target;
- // Offset into the target function or section for the relocation.
- uint32_t Offset;
- // Relocation type
- uint32_t Type;
- // Addend encoded in the instruction itself, if any.
- int32_t Addend;
- // Has the relocation been recalcuated as an offset within a function?
- bool IsFunctionRelative;
- // Has this relocation been resolved previously?
- bool isResolved;
-
- RelocationEntry(StringRef t,
- uint32_t offset,
- uint32_t type,
- int32_t addend,
- bool isFunctionRelative)
- : Target(t)
- , Offset(offset)
- , Type(type)
- , Addend(addend)
- , IsFunctionRelative(isFunctionRelative)
- , isResolved(false) { }
- };
- typedef SmallVector<RelocationEntry, 4> RelocationList;
- StringMap<RelocationList> Relocations;
- unsigned Arch;
-
- void resolveRelocations();
-
- void resolveX86_64Relocation(StringRef Name,
- uint8_t *Addr,
- const RelocationEntry &RE);
-
- void resolveX86Relocation(StringRef Name,
- uint8_t *Addr,
- const RelocationEntry &RE);
-
- void resolveArmRelocation(StringRef Name,
- uint8_t *Addr,
- const RelocationEntry &RE);
-
- void resolveRelocation(StringRef Name,
- uint8_t *Addr,
- const RelocationEntry &RE);
-
-public:
- RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
-
- bool loadObject(MemoryBuffer *InputBuffer);
-
- void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
- void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
-
- bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const;
-};
-
-} // end namespace llvm
-
-#endif
-
-//===-- RuntimeDyldELF.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// ELF support for MC-JIT runtime dynamic linker.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_RUNTIME_DYLD_ELF_H
-#define LLVM_RUNTIME_DYLD_ELF_H
-
-#include "RuntimeDyldImpl.h"
-
-using namespace llvm;
-
-
-namespace llvm {
-class RuntimeDyldELF : public RuntimeDyldImpl {
- // For each symbol, keep a list of relocations based on it. Anytime
- // its address is reassigned (the JIT re-compiled the function, e.g.),
- // the relocations get re-resolved.
- struct RelocationEntry {
- // Function or section this relocation is contained in.
- std::string Target;
- // Offset into the target function or section for the relocation.
- uint32_t Offset;
- // Relocation type
- uint32_t Type;
- // Addend encoded in the instruction itself, if any.
- int32_t Addend;
- // Has the relocation been recalcuated as an offset within a function?
- bool IsFunctionRelative;
- // Has this relocation been resolved previously?
- bool isResolved;
-
- RelocationEntry(StringRef t,
- uint32_t offset,
- uint32_t type,
- int32_t addend,
- bool isFunctionRelative)
- : Target(t)
- , Offset(offset)
- , Type(type)
- , Addend(addend)
- , IsFunctionRelative(isFunctionRelative)
- , isResolved(false) { }
- };
- typedef SmallVector<RelocationEntry, 4> RelocationList;
- StringMap<RelocationList> Relocations;
- unsigned Arch;
-
- void resolveRelocations();
-
- void resolveX86_64Relocation(StringRef Name,
- uint8_t *Addr,
- const RelocationEntry &RE);
-
- void resolveX86Relocation(StringRef Name,
- uint8_t *Addr,
- const RelocationEntry &RE);
-
- void resolveArmRelocation(StringRef Name,
- uint8_t *Addr,
- const RelocationEntry &RE);
-
- void resolveRelocation(StringRef Name,
- uint8_t *Addr,
- const RelocationEntry &RE);
+protected:
+ void resolveX86_64Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
+
+ void resolveX86Relocation(uint8_t *LocalAddress,
+ uint32_t FinalAddress,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend);
+
+ void resolveARMRelocation(uint8_t *LocalAddress,
+ uint32_t FinalAddress,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend);
+
+ virtual void resolveRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
+
+ virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+ const ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ LocalSymbolMap &Symbols, StubMap &Stubs);
public:
RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
- bool loadObject(MemoryBuffer *InputBuffer);
-
- void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
- void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
-
bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const;
};
} // end namespace llvm
-#endif
-
+#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 28e99be..d6430a9 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -15,45 +15,125 @@
#define LLVM_RUNTIME_DYLD_IMPL_H
#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/Support/Memory.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/system_error.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/Triple.h"
+#include <map>
+#include "llvm/Support/Format.h"
using namespace llvm;
+using namespace llvm::object;
namespace llvm {
+
+class SectionEntry {
+public:
+ uint8_t* Address;
+ size_t Size;
+ uint64_t LoadAddress; // For each section, the address it will be
+ // considered to live at for relocations. The same
+ // as the pointer to the above memory block for
+ // hosted JITs.
+ uintptr_t StubOffset; // It's used for architecturies with stub
+ // functions for far relocations like ARM.
+ uintptr_t ObjAddress; // Section address in object file. It's use for
+ // calculate MachO relocation addend
+ SectionEntry(uint8_t* address, size_t size, uintptr_t stubOffset,
+ uintptr_t objAddress)
+ : Address(address), Size(size), LoadAddress((uintptr_t)address),
+ StubOffset(stubOffset), ObjAddress(objAddress) {}
+};
+
+class RelocationEntry {
+public:
+ unsigned SectionID; // Section the relocation is contained in.
+ uintptr_t Offset; // Offset into the section for the relocation.
+ uint32_t Data; // Relocatino data. Including type of relocation
+ // and another flags and parameners from
+ intptr_t Addend; // Addend encoded in the instruction itself, if any,
+ // plus the offset into the source section for
+ // the symbol once the relocation is resolvable.
+ RelocationEntry(unsigned id, uint64_t offset, uint32_t data, int64_t addend)
+ : SectionID(id), Offset(offset), Data(data), Addend(addend) {}
+};
+
+// Raw relocation data from object file
+class ObjRelocationInfo {
+public:
+ unsigned SectionID;
+ uint64_t Offset;
+ SymbolRef Symbol;
+ uint64_t Type;
+ int64_t AdditionalInfo;
+};
+
+class RelocationValueRef {
+public:
+ unsigned SectionID;
+ intptr_t Addend;
+ const char *SymbolName;
+ RelocationValueRef(): SectionID(0), Addend(0), SymbolName(0) {}
+
+ inline bool operator==(const RelocationValueRef &Other) const {
+ return std::memcmp(this, &Other, sizeof(RelocationValueRef)) == 0;
+ }
+ inline bool operator <(const RelocationValueRef &Other) const {
+ return std::memcmp(this, &Other, sizeof(RelocationValueRef)) < 0;
+ }
+};
+
class RuntimeDyldImpl {
protected:
- unsigned CPUType;
- unsigned CPUSubtype;
-
// The MemoryManager to load objects into.
RTDyldMemoryManager *MemMgr;
- // For each section, we have a MemoryBlock of it's data.
- // Indexed by SectionID.
- SmallVector<sys::MemoryBlock, 32> Sections;
- // For each section, the address it will be considered to live at for
- // relocations. The same as the pointer to the above memory block for hosted
- // JITs. Indexed by SectionID.
- SmallVector<uint64_t, 32> SectionLoadAddress;
+ // A list of emmitted sections.
+ typedef SmallVector<SectionEntry, 64> SectionList;
+ SectionList Sections;
- // Keep a map of starting local address to the SectionID which references it.
- // Lookup function for when we assign virtual addresses.
- DenseMap<void *, unsigned> SectionLocalMemToID;
+ // Keep a map of sections from object file to the SectionID which
+ // references it.
+ typedef std::map<SectionRef, unsigned> ObjSectionToIDMap;
// Master symbol table. As modules are loaded and external symbols are
// resolved, their addresses are stored here as a SectionID/Offset pair.
- typedef std::pair<unsigned, uint64_t> SymbolLoc;
+ typedef std::pair<unsigned, uintptr_t> SymbolLoc;
StringMap<SymbolLoc> SymbolTable;
+ typedef DenseMap<const char*, SymbolLoc> LocalSymbolMap;
+
+ // For each symbol, keep a list of relocations based on it. Anytime
+ // its address is reassigned (the JIT re-compiled the function, e.g.),
+ // the relocations get re-resolved.
+ // The symbol (or section) the relocation is sourced from is the Key
+ // in the relocation list where it's stored.
+ typedef SmallVector<RelocationEntry, 64> RelocationList;
+ // Relocations to sections already loaded. Indexed by SectionID which is the
+ // source of the address. The target where the address will be writen is
+ // SectionID/Offset in the relocation itself.
+ DenseMap<unsigned, RelocationList> Relocations;
+ // Relocations to external symbols that are not yet resolved.
+ // Indexed by symbol name.
+ StringMap<RelocationList> SymbolRelocations;
+
+ typedef std::map<RelocationValueRef, uintptr_t> StubMap;
+
+ Triple::ArchType Arch;
+
+ inline unsigned getMaxStubSize() {
+ if (Arch == Triple::arm || Arch == Triple::thumb)
+ return 8; // 32-bit instruction and 32-bit address
+ else
+ return 0;
+ }
bool HasError;
std::string ErrorStr;
@@ -66,17 +146,62 @@ protected:
}
uint8_t *getSectionAddress(unsigned SectionID) {
- return (uint8_t*)Sections[SectionID].base();
+ return (uint8_t*)Sections[SectionID].Address;
}
- void extractFunction(StringRef Name, uint8_t *StartAddress,
- uint8_t *EndAddress);
+ /// \brief Emits section data from the object file to the MemoryManager.
+ /// \param IsCode if it's true then allocateCodeSection() will be
+ /// used for emmits, else allocateDataSection() will be used.
+ /// \return SectionID.
+ unsigned emitSection(const SectionRef &Section, bool IsCode);
+
+ /// \brief Find Section in LocalSections. If the secton is not found - emit
+ /// it and store in LocalSections.
+ /// \param IsCode if it's true then allocateCodeSection() will be
+ /// used for emmits, else allocateDataSection() will be used.
+ /// \return SectionID.
+ unsigned findOrEmitSection(const SectionRef &Section, bool IsCode,
+ ObjSectionToIDMap &LocalSections);
+
+ /// \brief If Value.SymbolName is NULL then store relocation to the
+ /// Relocations, else store it in the SymbolRelocations.
+ void AddRelocation(const RelocationValueRef &Value, unsigned SectionID,
+ uintptr_t Offset, uint32_t RelType);
+
+ /// \brief Emits long jump instruction to Addr.
+ /// \return Pointer to the memory area for emitting target address.
+ uint8_t* createStubFunction(uint8_t *Addr);
+
+ /// \brief Resolves relocations from Relocs list with address from Value.
+ void resolveRelocationList(const RelocationList &Relocs, uint64_t Value);
+ void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value);
+
+ /// \brief A object file specific relocation resolver
+ /// \param Address Address to apply the relocation action
+ /// \param Value Target symbol address to apply the relocation action
+ /// \param Type object file specific relocation type
+ /// \param Addend A constant addend used to compute the value to be stored
+ /// into the relocatable field
+ virtual void resolveRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) = 0;
+
+ /// \brief Parses the object file relocation and store it to Relocations
+ /// or SymbolRelocations. Its depend from object file type.
+ virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+ const ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ LocalSymbolMap &Symbols, StubMap &Stubs) = 0;
+
+ void resolveSymbols();
public:
RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
virtual ~RuntimeDyldImpl();
- virtual bool loadObject(MemoryBuffer *InputBuffer) = 0;
+ bool loadObject(const MemoryBuffer *InputBuffer);
void *getSymbolAddress(StringRef Name) {
// FIXME: Just look up as a function for now. Overly simple of course.
@@ -87,9 +212,9 @@ public:
return getSectionAddress(Loc.first) + Loc.second;
}
- virtual void resolveRelocations();
+ void resolveRelocations();
- virtual void reassignSectionAddress(unsigned SectionID, uint64_t Addr) = 0;
+ void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
void mapSectionAddress(void *LocalAddress, uint64_t TargetAddress);
@@ -103,6 +228,7 @@ public:
StringRef getErrorString() { return ErrorStr; }
virtual bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const = 0;
+
};
} // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index c11b2c3..24437e0 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -21,30 +21,64 @@ using namespace llvm::object;
namespace llvm {
-bool RuntimeDyldMachO::
-resolveRelocation(uint8_t *Address, uint64_t Value, bool isPCRel,
- unsigned Type, unsigned Size, int64_t Addend) {
+void RuntimeDyldMachO::resolveRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) {
+ bool isPCRel = (Type >> 24) & 1;
+ unsigned MachoType = (Type >> 28) & 0xf;
+ unsigned Size = 1 << ((Type >> 25) & 3);
+
+ DEBUG(dbgs() << "resolveRelocation LocalAddress: " << format("%p", LocalAddress)
+ << " FinalAddress: " << format("%p", FinalAddress)
+ << " Value: " << format("%p", Value)
+ << " Addend: " << Addend
+ << " isPCRel: " << isPCRel
+ << " MachoType: " << MachoType
+ << " Size: " << Size
+ << "\n");
+
// This just dispatches to the proper target specific routine.
- switch (CPUType) {
+ switch (Arch) {
default: llvm_unreachable("Unsupported CPU type!");
- case mach::CTM_x86_64:
- return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value,
- isPCRel, Type, Size, Addend);
- case mach::CTM_ARM:
- return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value,
- isPCRel, Type, Size, Addend);
+ case Triple::x86_64: // Fall through.
+ case Triple::x86:
+ resolveX86_64Relocation(LocalAddress,
+ FinalAddress,
+ (uintptr_t)Value,
+ isPCRel,
+ MachoType,
+ Size,
+ Addend);
+ break;
+ case Triple::arm: // Fall through.
+ case Triple::thumb:
+ resolveARMRelocation(LocalAddress,
+ FinalAddress,
+ (uintptr_t)Value,
+ isPCRel,
+ MachoType,
+ Size,
+ Addend);
+ break;
}
}
bool RuntimeDyldMachO::
-resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
- unsigned Type, unsigned Size, int64_t Addend) {
+resolveX86_64Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend) {
// If the relocation is PC-relative, the value to be encoded is the
// pointer difference.
if (isPCRel)
// FIXME: It seems this value needs to be adjusted by 4 for an effective PC
// address. Is that expected? Only for branches, perhaps?
- Value -= Address + 4;
+ Value -= FinalAddress + 4;
switch(Type) {
default:
@@ -58,7 +92,7 @@ resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
Value += Addend;
// Mask in the target value a byte at a time (we don't have an alignment
// guarantee for the target address, so this is safest).
- uint8_t *p = (uint8_t*)Address;
+ uint8_t *p = (uint8_t*)LocalAddress;
for (unsigned i = 0; i < Size; ++i) {
*p++ = (uint8_t)Value;
Value >>= 8;
@@ -74,12 +108,17 @@ resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
}
bool RuntimeDyldMachO::
-resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
- unsigned Type, unsigned Size, int64_t Addend) {
+resolveARMRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend) {
// If the relocation is PC-relative, the value to be encoded is the
// pointer difference.
if (isPCRel) {
- Value -= Address;
+ Value -= FinalAddress;
// ARM PCRel relocations have an effective-PC offset of two instructions
// (four bytes in Thumb mode, 8 bytes in ARM mode).
// FIXME: For now, assume ARM mode.
@@ -92,7 +131,7 @@ resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
case macho::RIT_Vanilla: {
// Mask in the target value a byte at a time (we don't have an alignment
// guarantee for the target address, so this is safest).
- uint8_t *p = (uint8_t*)Address;
+ uint8_t *p = (uint8_t*)LocalAddress;
for (unsigned i = 0; i < Size; ++i) {
*p++ = (uint8_t)Value;
Value >>= 8;
@@ -102,7 +141,7 @@ resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
case macho::RIT_ARM_Branch24Bit: {
// Mask the value into the target address. We know instructions are
// 32-bit aligned, so we can do it all at once.
- uint32_t *p = (uint32_t*)Address;
+ uint32_t *p = (uint32_t*)LocalAddress;
// The low two bits of the value are not encoded.
Value >>= 2;
// Mask the value to 24 bits.
@@ -128,463 +167,83 @@ resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
return false;
}
-bool RuntimeDyldMachO::
-loadSegment32(const MachOObject *Obj,
- const MachOObject::LoadCommandInfo *SegmentLCI,
- const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
- // FIXME: This should really be combined w/ loadSegment64. Templatized
- // function on the 32/64 datatypes maybe?
- InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
- Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC);
- if (!SegmentLC)
- return Error("unable to load segment load command");
-
-
- SmallVector<unsigned, 16> SectionMap;
- for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
- InMemoryStruct<macho::Section> Sect;
- Obj->ReadSection(*SegmentLCI, SectNum, Sect);
- if (!Sect)
- return Error("unable to load section: '" + Twine(SectNum) + "'");
-
- // Allocate memory via the MM for the section.
- uint8_t *Buffer;
- uint32_t SectionID = Sections.size();
- if (Sect->Flags == 0x80000400)
- Buffer = MemMgr->allocateCodeSection(Sect->Size, Sect->Align, SectionID);
- else
- Buffer = MemMgr->allocateDataSection(Sect->Size, Sect->Align, SectionID);
-
- DEBUG(dbgs() << "Loading "
- << ((Sect->Flags == 0x80000400) ? "text" : "data")
- << " (ID #" << SectionID << ")"
- << " '" << Sect->SegmentName << ","
- << Sect->Name << "' of size " << Sect->Size
- << " to address " << Buffer << ".\n");
-
- // Copy the payload from the object file into the allocated buffer.
- uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset,
- SegmentLC->FileSize).data();
- memcpy(Buffer, Base + Sect->Address, Sect->Size);
-
- // Remember what got allocated for this SectionID.
- Sections.push_back(sys::MemoryBlock(Buffer, Sect->Size));
- SectionLocalMemToID[Buffer] = SectionID;
-
- // By default, the load address of a section is its memory buffer.
- SectionLoadAddress.push_back((uint64_t)Buffer);
-
- // Keep a map of object file section numbers to corresponding SectionIDs
- // while processing the file.
- SectionMap.push_back(SectionID);
- }
-
- // Process the symbol table.
- SmallVector<StringRef, 64> SymbolNames;
- processSymbols32(Obj, SectionMap, SymbolNames, SymtabLC);
-
- // Process the relocations for each section we're loading.
- Relocations.grow(Relocations.size() + SegmentLC->NumSections);
- for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
- InMemoryStruct<macho::Section> Sect;
- Obj->ReadSection(*SegmentLCI, SectNum, Sect);
- if (!Sect)
- return Error("unable to load section: '" + Twine(SectNum) + "'");
- for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
- InMemoryStruct<macho::RelocationEntry> RE;
- Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
- if (RE->Word0 & macho::RF_Scattered)
- return Error("NOT YET IMPLEMENTED: scattered relocations.");
- // Word0 of the relocation is the offset into the section where the
- // relocation should be applied. We need to translate that into an
- // offset into a function since that's our atom.
- uint32_t Offset = RE->Word0;
- bool isExtern = (RE->Word1 >> 27) & 1;
-
- // FIXME: Get the relocation addend from the target address.
- // FIXME: VERY imporant for internal relocations.
-
- // Figure out the source symbol of the relocation. If isExtern is true,
- // this relocation references the symbol table, otherwise it references
- // a section in the same object, numbered from 1 through NumSections
- // (SectionBases is [0, NumSections-1]).
- uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
- if (!isExtern) {
- assert(SourceNum > 0 && "Invalid relocation section number!");
- unsigned SectionID = SectionMap[SourceNum - 1];
- unsigned TargetID = SectionMap[SectNum];
- DEBUG(dbgs() << "Internal relocation at Section #"
- << TargetID << " + " << Offset
- << " from Section #"
- << SectionID << " (Word1: "
- << format("0x%x", RE->Word1) << ")\n");
-
- // Store the relocation information. It will get resolved when
- // the section addresses are assigned.
- Relocations[SectionID].push_back(RelocationEntry(TargetID,
- Offset,
- RE->Word1,
- 0 /*Addend*/));
- } else {
- StringRef SourceName = SymbolNames[SourceNum];
-
- // Now store the relocation information. Associate it with the source
- // symbol. Just add it to the unresolved list and let the general
- // path post-load resolve it if we know where the symbol is.
- UnresolvedRelocations[SourceName].push_back(RelocationEntry(SectNum,
- Offset,
- RE->Word1,
- 0 /*Addend*/));
- DEBUG(dbgs() << "Relocation at Section #" << SectNum << " + " << Offset
- << " from '" << SourceName << "(Word1: "
- << format("0x%x", RE->Word1) << ")\n");
- }
+void RuntimeDyldMachO::
+processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ LocalSymbolMap &Symbols, StubMap &Stubs) {
+
+ uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL);
+ RelocationValueRef Value;
+ SectionEntry &Section = Sections[Rel.SectionID];
+ uint8_t *Target = Section.Address + Rel.Offset;
+
+ bool isExtern = (RelType >> 27) & 1;
+ if (isExtern) {
+ StringRef TargetName;
+ const SymbolRef &Symbol = Rel.Symbol;
+ Symbol.getName(TargetName);
+ // First look the symbol in object file symbols.
+ LocalSymbolMap::iterator it = Symbols.find(TargetName.data());
+ if (it != Symbols.end()) {
+ Value.SectionID = it->second.first;
+ Value.Addend = it->second.second;
+ } else {
+ // Second look the symbol in global symbol table.
+ StringMap<SymbolLoc>::iterator itS = SymbolTable.find(TargetName.data());
+ if (itS != SymbolTable.end()) {
+ Value.SectionID = itS->second.first;
+ Value.Addend = itS->second.second;
+ } else
+ Value.SymbolName = TargetName.data();
}
- }
-
- // Resolve the addresses of any symbols that were defined in this segment.
- for (int i = 0, e = SymbolNames.size(); i != e; ++i)
- resolveSymbol(SymbolNames[i]);
-
- return false;
-}
-
-
-bool RuntimeDyldMachO::
-loadSegment64(const MachOObject *Obj,
- const MachOObject::LoadCommandInfo *SegmentLCI,
- const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
- InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
- Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
- if (!Segment64LC)
- return Error("unable to load segment load command");
-
-
- SmallVector<unsigned, 16> SectionMap;
- for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
- InMemoryStruct<macho::Section64> Sect;
- Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
- if (!Sect)
- return Error("unable to load section: '" + Twine(SectNum) + "'");
-
- // Allocate memory via the MM for the section.
- uint8_t *Buffer;
- uint32_t SectionID = Sections.size();
- unsigned Align = 1 << Sect->Align; // .o file has log2 alignment.
- if (Sect->Flags == 0x80000400)
- Buffer = MemMgr->allocateCodeSection(Sect->Size, Align, SectionID);
- else
- Buffer = MemMgr->allocateDataSection(Sect->Size, Align, SectionID);
-
- DEBUG(dbgs() << "Loading "
- << ((Sect->Flags == 0x80000400) ? "text" : "data")
- << " (ID #" << SectionID << ")"
- << " '" << Sect->SegmentName << ","
- << Sect->Name << "' of size " << Sect->Size
- << " (align " << Align << ")"
- << " to address " << Buffer << ".\n");
-
- // Copy the payload from the object file into the allocated buffer.
- uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset,
- Segment64LC->FileSize).data();
- memcpy(Buffer, Base + Sect->Address, Sect->Size);
-
- // Remember what got allocated for this SectionID.
- Sections.push_back(sys::MemoryBlock(Buffer, Sect->Size));
- SectionLocalMemToID[Buffer] = SectionID;
-
- // By default, the load address of a section is its memory buffer.
- SectionLoadAddress.push_back((uint64_t)Buffer);
-
- // Keep a map of object file section numbers to corresponding SectionIDs
- // while processing the file.
- SectionMap.push_back(SectionID);
- }
-
- // Process the symbol table.
- SmallVector<StringRef, 64> SymbolNames;
- processSymbols64(Obj, SectionMap, SymbolNames, SymtabLC);
-
- // Process the relocations for each section we're loading.
- Relocations.grow(Relocations.size() + Segment64LC->NumSections);
- for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
- InMemoryStruct<macho::Section64> Sect;
- Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
- if (!Sect)
- return Error("unable to load section: '" + Twine(SectNum) + "'");
- for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
- InMemoryStruct<macho::RelocationEntry> RE;
- Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
- if (RE->Word0 & macho::RF_Scattered)
- return Error("NOT YET IMPLEMENTED: scattered relocations.");
- // Word0 of the relocation is the offset into the section where the
- // relocation should be applied. We need to translate that into an
- // offset into a function since that's our atom.
- uint32_t Offset = RE->Word0;
- bool isExtern = (RE->Word1 >> 27) & 1;
-
- // FIXME: Get the relocation addend from the target address.
- // FIXME: VERY imporant for internal relocations.
-
- // Figure out the source symbol of the relocation. If isExtern is true,
- // this relocation references the symbol table, otherwise it references
- // a section in the same object, numbered from 1 through NumSections
- // (SectionBases is [0, NumSections-1]).
- uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
- if (!isExtern) {
- assert(SourceNum > 0 && "Invalid relocation section number!");
- unsigned SectionID = SectionMap[SourceNum - 1];
- unsigned TargetID = SectionMap[SectNum];
- DEBUG(dbgs() << "Internal relocation at Section #"
- << TargetID << " + " << Offset
- << " from Section #"
- << SectionID << " (Word1: "
- << format("0x%x", RE->Word1) << ")\n");
-
- // Store the relocation information. It will get resolved when
- // the section addresses are assigned.
- Relocations[SectionID].push_back(RelocationEntry(TargetID,
- Offset,
- RE->Word1,
- 0 /*Addend*/));
- } else {
- StringRef SourceName = SymbolNames[SourceNum];
-
- // Now store the relocation information. Associate it with the source
- // symbol. Just add it to the unresolved list and let the general
- // path post-load resolve it if we know where the symbol is.
- UnresolvedRelocations[SourceName].push_back(RelocationEntry(SectNum,
- Offset,
- RE->Word1,
- 0 /*Addend*/));
- DEBUG(dbgs() << "Relocation at Section #" << SectNum << " + " << Offset
- << " from '" << SourceName << "(Word1: "
- << format("0x%x", RE->Word1) << ")\n");
- }
+ } else {
+ error_code err;
+ uint8_t sIdx = static_cast<uint8_t>(RelType & 0xFF);
+ section_iterator sIt = Obj.begin_sections(),
+ sItEnd = Obj.end_sections();
+ for (uint8_t i = 1; i < sIdx; i++) {
+ error_code err;
+ sIt.increment(err);
+ if (sIt == sItEnd)
+ break;
}
- }
-
- // Resolve the addresses of any symbols that were defined in this segment.
- for (int i = 0, e = SymbolNames.size(); i != e; ++i)
- resolveSymbol(SymbolNames[i]);
-
- return false;
-}
-
-bool RuntimeDyldMachO::
-processSymbols32(const MachOObject *Obj,
- SmallVectorImpl<unsigned> &SectionMap,
- SmallVectorImpl<StringRef> &SymbolNames,
- const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
- // FIXME: Combine w/ processSymbols64. Factor 64/32 datatype and such.
- for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
- InMemoryStruct<macho::SymbolTableEntry> STE;
- Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
- if (!STE)
- return Error("unable to read symbol: '" + Twine(i) + "'");
- // Get the symbol name.
- StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
- SymbolNames.push_back(Name);
-
- // FIXME: Check the symbol type and flags.
- if (STE->Type != 0xF) // external, defined in this segment.
- continue;
- // Flags in the upper nibble we don't care about.
- if ((STE->Flags & 0xf) != 0x0)
- continue;
-
- // Remember the symbol.
- uint32_t SectionID = SectionMap[STE->SectionIndex - 1];
- SymbolTable[Name] = SymbolLoc(SectionID, STE->Value);
-
- DEBUG(dbgs() << "Symbol: '" << Name << "' @ "
- << (getSectionAddress(SectionID) + STE->Value)
- << "\n");
- }
- return false;
-}
-
-bool RuntimeDyldMachO::
-processSymbols64(const MachOObject *Obj,
- SmallVectorImpl<unsigned> &SectionMap,
- SmallVectorImpl<StringRef> &SymbolNames,
- const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
- for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
- InMemoryStruct<macho::Symbol64TableEntry> STE;
- Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
- if (!STE)
- return Error("unable to read symbol: '" + Twine(i) + "'");
- // Get the symbol name.
- StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
- SymbolNames.push_back(Name);
-
- // FIXME: Check the symbol type and flags.
- if (STE->Type != 0xF) // external, defined in this segment.
- continue;
- // Flags in the upper nibble we don't care about.
- if ((STE->Flags & 0xf) != 0x0)
- continue;
-
- // Remember the symbol.
- uint32_t SectionID = SectionMap[STE->SectionIndex - 1];
- SymbolTable[Name] = SymbolLoc(SectionID, STE->Value);
-
- DEBUG(dbgs() << "Symbol: '" << Name << "' @ "
- << (getSectionAddress(SectionID) + STE->Value)
- << "\n");
- }
- return false;
-}
-
-// resolveSymbol - Resolve any relocations to the specified symbol if
-// we know where it lives.
-void RuntimeDyldMachO::resolveSymbol(StringRef Name) {
- StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(Name);
- if (Loc == SymbolTable.end())
- return;
-
- RelocationList &Relocs = UnresolvedRelocations[Name];
- DEBUG(dbgs() << "Resolving symbol '" << Name << "'\n");
- for (int i = 0, e = Relocs.size(); i != e; ++i) {
- // Change the relocation to be section relative rather than symbol
- // relative and move it to the resolved relocation list.
- RelocationEntry Entry = Relocs[i];
- Entry.Addend += Loc->second.second;
- Relocations[Loc->second.first].push_back(Entry);
- }
- // FIXME: Keep a worklist of the relocations we've added so that we can
- // resolve more selectively later.
- Relocs.clear();
-}
-
-bool RuntimeDyldMachO::loadObject(MemoryBuffer *InputBuffer) {
- // If the linker is in an error state, don't do anything.
- if (hasError())
- return true;
- // Load the Mach-O wrapper object.
- std::string ErrorStr;
- OwningPtr<MachOObject> Obj(
- MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
- if (!Obj)
- return Error("unable to load object: '" + ErrorStr + "'");
-
- // Get the CPU type information from the header.
- const macho::Header &Header = Obj->getHeader();
-
- // FIXME: Error checking that the loaded object is compatible with
- // the system we're running on.
- CPUType = Header.CPUType;
- CPUSubtype = Header.CPUSubtype;
-
- // Validate that the load commands match what we expect.
- const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
- *DysymtabLCI = 0;
- for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
- const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
- switch (LCI.Command.Type) {
- case macho::LCT_Segment:
- case macho::LCT_Segment64:
- if (SegmentLCI)
- return Error("unexpected input object (multiple segments)");
- SegmentLCI = &LCI;
- break;
- case macho::LCT_Symtab:
- if (SymtabLCI)
- return Error("unexpected input object (multiple symbol tables)");
- SymtabLCI = &LCI;
- break;
- case macho::LCT_Dysymtab:
- if (DysymtabLCI)
- return Error("unexpected input object (multiple symbol tables)");
- DysymtabLCI = &LCI;
- break;
- default:
- return Error("unexpected input object (unexpected load command");
+ assert(sIt != sItEnd && "No section containing relocation!");
+ Value.SectionID = findOrEmitSection(*sIt, true, ObjSectionToID);
+ Value.Addend = *(const intptr_t *)Target;
+ if (Value.Addend) {
+ // The MachO addend is offset from the current section, we need set it
+ // as offset from destination section
+ Value.Addend += Section.ObjAddress - Sections[Value.SectionID].ObjAddress;
}
}
- if (!SymtabLCI)
- return Error("no symbol table found in object");
- if (!SegmentLCI)
- return Error("no segments found in object");
-
- // Read and register the symbol table data.
- InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
- Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
- if (!SymtabLC)
- return Error("unable to load symbol table load command");
- Obj->RegisterStringTable(*SymtabLC);
-
- // Read the dynamic link-edit information, if present (not present in static
- // objects).
- if (DysymtabLCI) {
- InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
- Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
- if (!DysymtabLC)
- return Error("unable to load dynamic link-exit load command");
-
- // FIXME: We don't support anything interesting yet.
-// if (DysymtabLC->LocalSymbolsIndex != 0)
-// return Error("NOT YET IMPLEMENTED: local symbol entries");
-// if (DysymtabLC->ExternalSymbolsIndex != 0)
-// return Error("NOT YET IMPLEMENTED: non-external symbol entries");
-// if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
-// return Error("NOT YET IMPLEMENTED: undefined symbol entries");
- }
-
- // Load the segment load command.
- if (SegmentLCI->Command.Type == macho::LCT_Segment) {
- if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
- return true;
- } else {
- if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
- return true;
- }
-
- // Assign the addresses of the sections from the object so that any
- // relocations to them get set properly.
- // FIXME: This is done directly from the client at the moment. We should
- // default the values to the local storage, at least when the target arch
- // is the same as the host arch.
-
- return false;
+ if (Arch == Triple::arm && RelType == macho::RIT_ARM_Branch24Bit) {
+ // This is an ARM branch relocation, need to use a stub function.
+
+ // Look up for existing stub.
+ StubMap::const_iterator stubIt = Stubs.find(Value);
+ if (stubIt != Stubs.end())
+ resolveRelocation(Target, (uint64_t)Target,
+ (uint64_t)Section.Address + stubIt->second,
+ RelType, 0);
+ else {
+ // Create a new stub function.
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+ Section.StubOffset);
+ AddRelocation(Value, Rel.SectionID, StubTargetAddr - Section.Address,
+ macho::RIT_Vanilla);
+ resolveRelocation(Target, (uint64_t)Target,
+ (uint64_t)Section.Address + Section.StubOffset,
+ RelType, 0);
+ Section.StubOffset += getMaxStubSize();
+ }
+ } else
+ AddRelocation(Value, Rel.SectionID, Rel.Offset, RelType);
}
-// Assign an address to a symbol name and resolve all the relocations
-// associated with it.
-void RuntimeDyldMachO::reassignSectionAddress(unsigned SectionID,
- uint64_t Addr) {
- // The address to use for relocation resolution is not
- // the address of the local section buffer. We must be doing
- // a remote execution environment of some sort. Re-apply any
- // relocations referencing this section with the given address.
- //
- // Addr is a uint64_t because we can't assume the pointer width
- // of the target is the same as that of the host. Just use a generic
- // "big enough" type.
-
- SectionLoadAddress[SectionID] = Addr;
-
- RelocationList &Relocs = Relocations[SectionID];
- for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
- RelocationEntry &RE = Relocs[i];
- uint8_t *Target = (uint8_t*)Sections[RE.SectionID].base() + RE.Offset;
- bool isPCRel = (RE.Data >> 24) & 1;
- unsigned Type = (RE.Data >> 28) & 0xf;
- unsigned Size = 1 << ((RE.Data >> 25) & 3);
-
- DEBUG(dbgs() << "Resolving relocation at Section #" << RE.SectionID
- << " + " << RE.Offset << " (" << format("%p", Target) << ")"
- << " from Section #" << SectionID << " (" << format("%p", Addr) << ")"
- << "(" << (isPCRel ? "pcrel" : "absolute")
- << ", type: " << Type << ", Size: " << Size << ", Addend: "
- << RE.Addend << ").\n");
-
- resolveRelocation(Target, Addr, isPCRel, Type, Size, RE.Addend);
- }
-}
-bool RuntimeDyldMachO::isKnownFormat(const MemoryBuffer *InputBuffer) {
+bool RuntimeDyldMachO::isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
StringRef Magic = InputBuffer->getBuffer().slice(0, 4);
if (Magic == "\xFE\xED\xFA\xCE") return true;
if (Magic == "\xCE\xFA\xED\xFE") return true;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 5798981..36b39dd 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -25,68 +25,37 @@ using namespace llvm::object;
namespace llvm {
class RuntimeDyldMachO : public RuntimeDyldImpl {
-
- // For each symbol, keep a list of relocations based on it. Anytime
- // its address is reassigned (the JIT re-compiled the function, e.g.),
- // the relocations get re-resolved.
- // The symbol (or section) the relocation is sourced from is the Key
- // in the relocation list where it's stored.
- struct RelocationEntry {
- unsigned SectionID; // Section the relocation is contained in.
- uint64_t Offset; // Offset into the section for the relocation.
- uint32_t Data; // Second word of the raw macho relocation entry.
- int64_t Addend; // Addend encoded in the instruction itself, if any,
- // plus the offset into the source section for
- // the symbol once the relocation is resolvable.
-
- RelocationEntry(unsigned id, uint64_t offset, uint32_t data, int64_t addend)
- : SectionID(id), Offset(offset), Data(data), Addend(addend) {}
- };
- typedef SmallVector<RelocationEntry, 4> RelocationList;
- // Relocations to sections already loaded. Indexed by SectionID which is the
- // source of the address. The target where the address will be writen is
- // SectionID/Offset in the relocation itself.
- IndexedMap<RelocationList> Relocations;
- // Relocations to symbols that are not yet resolved. Must be external
- // relocations by definition. Indexed by symbol name.
- StringMap<RelocationList> UnresolvedRelocations;
-
- bool resolveRelocation(uint8_t *Address, uint64_t Value, bool isPCRel,
- unsigned Type, unsigned Size, int64_t Addend);
- bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
- unsigned Type, unsigned Size, int64_t Addend);
- bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
- unsigned Type, unsigned Size, int64_t Addend);
-
- bool loadSegment32(const MachOObject *Obj,
- const MachOObject::LoadCommandInfo *SegmentLCI,
- const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
- bool loadSegment64(const MachOObject *Obj,
- const MachOObject::LoadCommandInfo *SegmentLCI,
- const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
- bool processSymbols32(const MachOObject *Obj,
- SmallVectorImpl<unsigned> &SectionMap,
- SmallVectorImpl<StringRef> &SymbolNames,
- const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
- bool processSymbols64(const MachOObject *Obj,
- SmallVectorImpl<unsigned> &SectionMap,
- SmallVectorImpl<StringRef> &SymbolNames,
- const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
-
- void resolveSymbol(StringRef Name);
+protected:
+ bool resolveX86_64Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend);
+ bool resolveARMRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend);
+
+ virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+ const ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ LocalSymbolMap &Symbols, StubMap &Stubs);
public:
+ virtual void resolveRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
+
RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
- bool loadObject(MemoryBuffer *InputBuffer);
-
- void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
-
- static bool isKnownFormat(const MemoryBuffer *InputBuffer);
-
- bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
- return isKnownFormat(InputBuffer);
- }
+ bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const;
};
} // end namespace llvm
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index bd5956f..ab5ddaf 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -1528,6 +1528,11 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
}
Lex();
}
+ // If there weren't any arguments, erase the token vector so everything
+ // else knows that. Leaving around the vestigal empty token list confuses
+ // things.
+ if (MacroArguments.size() == 1 && MacroArguments.back().empty())
+ MacroArguments.clear();
// Macro instantiation is lexical, unfortunately. We construct a new buffer
// to hold the macro body with substitutions.
@@ -1624,6 +1629,8 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
return Error(EqualLoc, "Recursive use of '" + Name + "'");
else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
; // Allow redefinitions of undefined symbols only used in directives.
+ else if (Sym->isVariable() && !Sym->isUsed() && allow_redef)
+ ; // Allow redefinitions of variables that haven't yet been used.
else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef))
return Error(EqualLoc, "redefinition of '" + Name + "'");
else if (!Sym->isVariable())
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index c2fad167..e013e77 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -54,17 +54,14 @@ const MCSymbol &MCSymbol::AliasedSymbol() const {
void MCSymbol::setVariableValue(const MCExpr *Value) {
assert(!IsUsed && "Cannot set a variable that has already been used.");
assert(Value && "Invalid variable value!");
- assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) &&
- "Invalid redefinition!");
this->Value = Value;
// Variables should always be marked as in the same "section" as the value.
const MCSection *Section = Value->FindAssociatedSection();
- if (Section) {
+ if (Section)
setSection(*Section);
- } else {
+ else
setUndefined();
- }
}
void MCSymbol::print(raw_ostream &OS) const {
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 7144e68..f706cac 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -783,9 +783,22 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
}
if (Sec->Relocations.size() > 0) {
- Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+ bool RelocationsOverflow = Sec->Relocations.size() >= 0xffff;
+
+ if (RelocationsOverflow) {
+ // Signal overflow by setting NumberOfSections to max value. Actual
+ // size is found in reloc #0. Microsoft tools understand this.
+ Sec->Header.NumberOfRelocations = 0xffff;
+ } else {
+ Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+ }
Sec->Header.PointerToRelocations = offset;
+ if (RelocationsOverflow) {
+ // Reloc #0 will contain actual count, so make room for it.
+ offset += COFF::RelocationSize;
+ }
+
offset += COFF::RelocationSize * Sec->Relocations.size();
for (relocations::iterator cr = Sec->Relocations.begin(),
@@ -820,8 +833,12 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
MCAssembler::const_iterator j, je;
for (i = Sections.begin(), ie = Sections.end(); i != ie; i++)
- if ((*i)->Number != -1)
+ if ((*i)->Number != -1) {
+ if ((*i)->Relocations.size() >= 0xffff) {
+ (*i)->Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
+ }
WriteSectionHeader((*i)->Header);
+ }
for (i = Sections.begin(), ie = Sections.end(),
j = Asm.begin(), je = Asm.end();
@@ -841,6 +858,16 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
assert(OS.tell() == (*i)->Header.PointerToRelocations &&
"Section::PointerToRelocations is insane!");
+ if ((*i)->Relocations.size() >= 0xffff) {
+ // In case of overflow, write actual relocation count as first
+ // relocation. Including the synthetic reloc itself (+ 1).
+ COFF::relocation r;
+ r.VirtualAddress = (*i)->Relocations.size() + 1;
+ r.SymbolTableIndex = 0;
+ r.Type = 0;
+ WriteRelocation(r);
+ }
+
for (relocations::const_iterator k = (*i)->Relocations.begin(),
ke = (*i)->Relocations.end();
k != ke; k++) {
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index b67377c..c5f15ba 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -174,7 +174,7 @@ error_code Archive::Child::getAsBinary(OwningPtr<Binary> &Result) const {
}
Archive::Archive(MemoryBuffer *source, error_code &ec)
- : Binary(Binary::isArchive, source) {
+ : Binary(Binary::ID_Archive, source) {
// Check for sufficient magic.
if (!source || source->getBufferSize()
< (8 + sizeof(ArchiveMemberHeader) + 2) // Smallest archive.
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index a3fdd5b..b8ba905 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/COFF.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
@@ -300,24 +301,7 @@ error_code COFFObjectFile::getSectionNext(DataRefImpl Sec,
error_code COFFObjectFile::getSectionName(DataRefImpl Sec,
StringRef &Result) const {
const coff_section *sec = toSec(Sec);
- StringRef name;
- if (sec->Name[7] == 0)
- // Null terminated, let ::strlen figure out the length.
- name = sec->Name;
- else
- // Not null terminated, use all 8 bytes.
- name = StringRef(sec->Name, 8);
-
- // Check for string table entry. First byte is '/'.
- if (name[0] == '/') {
- uint32_t Offset;
- name.substr(1).getAsInteger(10, Offset);
- if (error_code ec = getString(Offset, name))
- return ec;
- }
-
- Result = name;
- return object_error::success;
+ return getSectionName(sec, Result);
}
error_code COFFObjectFile::getSectionAddress(DataRefImpl Sec,
@@ -337,16 +321,10 @@ error_code COFFObjectFile::getSectionSize(DataRefImpl Sec,
error_code COFFObjectFile::getSectionContents(DataRefImpl Sec,
StringRef &Result) const {
const coff_section *sec = toSec(Sec);
- // The only thing that we need to verify is that the contents is contained
- // within the file bounds. We don't need to make sure it doesn't cover other
- // data, as there's nothing that says that is not allowed.
- uintptr_t con_start = uintptr_t(base()) + sec->PointerToRawData;
- uintptr_t con_end = con_start + sec->SizeOfRawData;
- if (con_end > uintptr_t(Data->getBufferEnd()))
- return object_error::parse_failed;
- Result = StringRef(reinterpret_cast<const char*>(con_start),
- sec->SizeOfRawData);
- return object_error::success;
+ ArrayRef<uint8_t> Res;
+ error_code EC = getSectionContents(sec, Res);
+ Result = StringRef(reinterpret_cast<const char*>(Res.data()), Res.size());
+ return EC;
}
error_code COFFObjectFile::getSectionAlignment(DataRefImpl Sec,
@@ -421,7 +399,7 @@ relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
}
COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
- : ObjectFile(Binary::isCOFF, Object, ec)
+ : ObjectFile(Binary::ID_COFF, Object, ec)
, Header(0)
, SectionTable(0)
, SymbolTable(0)
@@ -630,6 +608,43 @@ error_code COFFObjectFile::getSymbolName(const coff_symbol *symbol,
return object_error::success;
}
+error_code COFFObjectFile::getSectionName(const coff_section *Sec,
+ StringRef &Res) const {
+ StringRef Name;
+ if (Sec->Name[7] == 0)
+ // Null terminated, let ::strlen figure out the length.
+ Name = Sec->Name;
+ else
+ // Not null terminated, use all 8 bytes.
+ Name = StringRef(Sec->Name, 8);
+
+ // Check for string table entry. First byte is '/'.
+ if (Name[0] == '/') {
+ uint32_t Offset;
+ if (Name.substr(1).getAsInteger(10, Offset))
+ return object_error::parse_failed;
+ if (error_code ec = getString(Offset, Name))
+ return ec;
+ }
+
+ Res = Name;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionContents(const coff_section *Sec,
+ ArrayRef<uint8_t> &Res) const {
+ // The only thing that we need to verify is that the contents is contained
+ // within the file bounds. We don't need to make sure it doesn't cover other
+ // data, as there's nothing that says that is not allowed.
+ uintptr_t ConStart = uintptr_t(base()) + Sec->PointerToRawData;
+ uintptr_t ConEnd = ConStart + Sec->SizeOfRawData;
+ if (ConEnd > uintptr_t(Data->getBufferEnd()))
+ return object_error::parse_failed;
+ Res = ArrayRef<uint8_t>(reinterpret_cast<const unsigned char*>(ConStart),
+ Sec->SizeOfRawData);
+ return object_error::success;
+}
+
const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const {
return reinterpret_cast<const coff_relocation*>(Rel.p);
}
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 655c40a..819409e 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -30,7 +30,7 @@ namespace object {
MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO,
error_code &ec)
- : ObjectFile(Binary::isMachO, Object, ec),
+ : ObjectFile(Binary::ID_MachO, Object, ec),
MachOObj(MOO),
RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {
DataRefImpl DRI;
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 031bbb8..9b81fe7 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -457,16 +457,6 @@ APInt APInt::XorSlowCase(const APInt& RHS) const {
return APInt(val, getBitWidth()).clearUnusedBits();
}
-bool APInt::operator !() const {
- if (isSingleWord())
- return !VAL;
-
- for (unsigned i = 0; i < getNumWords(); ++i)
- if (pVal[i])
- return false;
- return true;
-}
-
APInt APInt::operator*(const APInt& RHS) const {
assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
if (isSingleWord())
@@ -494,12 +484,6 @@ APInt APInt::operator-(const APInt& RHS) const {
return Result.clearUnusedBits();
}
-bool APInt::operator[](unsigned bitPosition) const {
- assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
- return (maskBit(bitPosition) &
- (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != 0;
-}
-
bool APInt::EqualSlowCase(const APInt& RHS) const {
// Get some facts about the number of bits used in the two operands.
unsigned n1 = getActiveBits();
@@ -722,20 +706,9 @@ unsigned APInt::countLeadingZerosSlowCase() const {
return Count;
}
-static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
- unsigned Count = 0;
- if (skip)
- V <<= skip;
- while (V && (V & (1ULL << 63))) {
- Count++;
- V <<= 1;
- }
- return Count;
-}
-
unsigned APInt::countLeadingOnes() const {
if (isSingleWord())
- return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
+ return CountLeadingOnes_64(VAL << (APINT_BITS_PER_WORD - BitWidth));
unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
unsigned shift;
@@ -746,13 +719,13 @@ unsigned APInt::countLeadingOnes() const {
shift = APINT_BITS_PER_WORD - highWordBits;
}
int i = getNumWords() - 1;
- unsigned Count = countLeadingOnes_64(pVal[i], shift);
+ unsigned Count = CountLeadingOnes_64(pVal[i] << shift);
if (Count == highWordBits) {
for (i--; i >= 0; --i) {
if (pVal[i] == -1ULL)
Count += APINT_BITS_PER_WORD;
else {
- Count += countLeadingOnes_64(pVal[i], 0);
+ Count += CountLeadingOnes_64(pVal[i]);
break;
}
}
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index d1ec4b0..e6fdf16 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -1191,7 +1191,7 @@ printOptionNoValue(const Option &O, size_t GlobalWidth) const {
static int OptNameCompare(const void *LHS, const void *RHS) {
typedef std::pair<const char *, Option*> pair_ty;
- return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first);
+ return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first);
}
// Copy Options into a vector so we can sort them as we like.
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index 0dba28a..32126ec 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -11,12 +11,16 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
#include "llvm/Config/config.h"
using namespace llvm;
+static cl::opt<bool> ViewBackground("view-background", cl::Hidden,
+ cl::desc("Execute graph viewer in the background. Creates tmp file litter."));
+
std::string llvm::DOT::EscapeString(const std::string &Label) {
std::string Str(Label);
for (unsigned i = 0; i != Str.length(); ++i)
@@ -49,10 +53,28 @@ std::string llvm::DOT::EscapeString(const std::string &Label) {
return Str;
}
-
+// Execute the graph viewer. Return true if successful.
+static bool LLVM_ATTRIBUTE_UNUSED
+ExecGraphViewer(const sys::Path &ExecPath, std::vector<const char*> &args,
+ const sys::Path &Filename, bool wait, std::string &ErrMsg) {
+ if (wait) {
+ if (sys::Program::ExecuteAndWait(ExecPath, &args[0],0,0,0,0,&ErrMsg)) {
+ errs() << "Error: " << ErrMsg << "\n";
+ return false;
+ }
+ Filename.eraseFromDisk();
+ errs() << " done. \n";
+ }
+ else {
+ sys::Program::ExecuteNoWait(ExecPath, &args[0],0,0,0,&ErrMsg);
+ errs() << "Remember to erase graph file: " << Filename.str() << "\n";
+ }
+ return true;
+}
void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
GraphProgram::Name program) {
+ wait &= !ViewBackground;
std::string ErrMsg;
#if HAVE_GRAPHVIZ
sys::Path Graphviz(LLVM_PATH_GRAPHVIZ);
@@ -61,14 +83,10 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
args.push_back(Graphviz.c_str());
args.push_back(Filename.c_str());
args.push_back(0);
-
+
errs() << "Running 'Graphviz' program... ";
- if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) {
- errs() << "Error: " << ErrMsg << "\n";
+ if (!ExecGraphViewer(Graphviz, args, Filename, wait, ErrMsg))
return;
- }
- Filename.eraseFromDisk();
- errs() << " done. \n";
#elif HAVE_XDOT_PY
std::vector<const char*> args;
@@ -83,17 +101,12 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break;
default: errs() << "Unknown graph layout name; using default.\n";
}
-
+
args.push_back(0);
errs() << "Running 'xdot.py' program... ";
- if (sys::Program::ExecuteAndWait(sys::Path(LLVM_PATH_XDOT_PY),
- &args[0],0,0,0,0,&ErrMsg)) {
- errs() << "Error: " << ErrMsg << "\n";
+ if (!ExecGraphViewer(sys::Path(LLVM_PATH_XDOT_PY), args, Filename, wait, ErrMsg))
return;
- }
- Filename.eraseFromDisk();
- errs() << " done. \n";
#elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \
HAVE_TWOPI || HAVE_CIRCO))
@@ -150,14 +163,11 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
args.push_back("-o");
args.push_back(PSFilename.c_str());
args.push_back(0);
-
+
errs() << "Running '" << prog.str() << "' program... ";
- if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) {
- errs() << "Error: " << ErrMsg << "\n";
+ if (!ExecGraphViewer(prog, args, Filename, wait, ErrMsg))
return;
- }
- errs() << " done. \n";
sys::Path gv(LLVM_PATH_GV);
args.clear();
@@ -165,19 +175,11 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
args.push_back(PSFilename.c_str());
args.push_back("--spartan");
args.push_back(0);
-
+
ErrMsg.clear();
- if (wait) {
- if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
- errs() << "Error: " << ErrMsg << "\n";
- Filename.eraseFromDisk();
- PSFilename.eraseFromDisk();
- }
- else {
- sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg);
- errs() << "Remember to erase graph files: " << Filename.str() << " "
- << PSFilename.str() << "\n";
- }
+ if (!ExecGraphViewer(gv, args, PSFilename, wait, ErrMsg))
+ return;
+
#elif HAVE_DOTTY
sys::Path dotty(LLVM_PATH_DOTTY);
@@ -185,16 +187,13 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
args.push_back(dotty.c_str());
args.push_back(Filename.c_str());
args.push_back(0);
-
- errs() << "Running 'dotty' program... ";
- if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) {
- errs() << "Error: " << ErrMsg << "\n";
- } else {
+
// Dotty spawns another app and doesn't wait until it returns
#if defined (__MINGW32__) || defined (_WINDOWS)
- return;
+ wait = false;
#endif
- Filename.eraseFromDisk();
- }
+ errs() << "Running 'dotty' program... ";
+ if (!ExecGraphViewer(dotty, args, Filename, wait, ErrMsg))
+ return;
#endif
}
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 4b15587..911a03f 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -336,7 +336,11 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
// Error while reading.
return error_code(errno, posix_category());
}
- assert(NumRead != 0 && "fstat reported an invalid file size.");
+ if (NumRead == 0) {
+ assert(0 && "We got inaccurate FileSize value or fstat reported an "
+ "invalid file size.");
+ break;
+ }
BytesLeft -= NumRead;
BufPtr += NumRead;
}
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index 997ce0b..68d9c29 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/MathExtras.h"
+#include <algorithm>
#include <cstdlib>
using namespace llvm;
@@ -223,6 +224,56 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
NumTombstones = RHS.NumTombstones;
}
+void SmallPtrSetImpl::swap(SmallPtrSetImpl &RHS) {
+ if (this == &RHS) return;
+
+ // We can only avoid copying elements if neither set is small.
+ if (!this->isSmall() && !RHS.isSmall()) {
+ std::swap(this->CurArray, RHS.CurArray);
+ std::swap(this->CurArraySize, RHS.CurArraySize);
+ std::swap(this->NumElements, RHS.NumElements);
+ std::swap(this->NumTombstones, RHS.NumTombstones);
+ return;
+ }
+
+ // FIXME: From here on we assume that both sets have the same small size.
+
+ // If only RHS is small, copy the small elements into LHS and move the pointer
+ // from LHS to RHS.
+ if (!this->isSmall() && RHS.isSmall()) {
+ std::copy(RHS.SmallArray, RHS.SmallArray+RHS.CurArraySize,
+ this->SmallArray);
+ std::swap(this->NumElements, RHS.NumElements);
+ std::swap(this->CurArraySize, RHS.CurArraySize);
+ RHS.CurArray = this->CurArray;
+ RHS.NumTombstones = this->NumTombstones;
+ this->CurArray = this->SmallArray;
+ this->NumTombstones = 0;
+ return;
+ }
+
+ // If only LHS is small, copy the small elements into RHS and move the pointer
+ // from RHS to LHS.
+ if (this->isSmall() && !RHS.isSmall()) {
+ std::copy(this->SmallArray, this->SmallArray+this->CurArraySize,
+ RHS.SmallArray);
+ std::swap(RHS.NumElements, this->NumElements);
+ std::swap(RHS.CurArraySize, this->CurArraySize);
+ this->CurArray = RHS.CurArray;
+ this->NumTombstones = RHS.NumTombstones;
+ RHS.CurArray = RHS.SmallArray;
+ RHS.NumTombstones = 0;
+ return;
+ }
+
+ // Both a small, just swap the small elements.
+ assert(this->isSmall() && RHS.isSmall());
+ assert(this->CurArraySize == RHS.CurArraySize);
+ std::swap_ranges(this->SmallArray, this->SmallArray+this->CurArraySize,
+ RHS.SmallArray);
+ std::swap(this->NumElements, RHS.NumElements);
+}
+
SmallPtrSetImpl::~SmallPtrSetImpl() {
if (!isSmall())
free(CurArray);
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index 1c28bf8..abe570f 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -285,8 +285,8 @@ static unsigned GetAutoSenseRadix(StringRef &Str) {
/// GetAsUnsignedInteger - Workhorse method that converts a integer character
/// sequence of radix up to 36 to an unsigned long long value.
-static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
- unsigned long long &Result) {
+bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
+ unsigned long long &Result) {
// Autosense radix if not specified.
if (Radix == 0)
Radix = GetAutoSenseRadix(Str);
@@ -326,17 +326,13 @@ static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
return false;
}
-bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
- return GetAsUnsignedInteger(*this, Radix, Result);
-}
-
-
-bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
+bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
+ long long &Result) {
unsigned long long ULLVal;
// Handle positive strings first.
- if (empty() || front() != '-') {
- if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
+ if (Str.empty() || Str.front() != '-') {
+ if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
// Check for value so large it overflows a signed value.
(long long)ULLVal < 0)
return true;
@@ -345,7 +341,7 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
}
// Get the positive part of the value.
- if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
+ if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
// Reject values so large they'd overflow as negative signed, but allow
// "-0". This negates the unsigned so that the negative isn't undefined
// on signed overflow.
@@ -356,24 +352,6 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
return false;
}
-bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
- long long Val;
- if (getAsInteger(Radix, Val) ||
- (int)Val != Val)
- return true;
- Result = Val;
- return false;
-}
-
-bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
- unsigned long long Val;
- if (getAsInteger(Radix, Val) ||
- (unsigned)Val != Val)
- return true;
- Result = Val;
- return false;
-}
-
bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
StringRef Str = *this;
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 94333a3..d261c53 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -29,6 +29,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case msp430: return "msp430";
case ppc64: return "powerpc64";
case ppc: return "powerpc";
+ case r600: return "r600";
case sparc: return "sparc";
case sparcv9: return "sparcv9";
case tce: return "tce";
@@ -63,6 +64,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case hexagon: return "hexagon";
+ case r600: return "r600";
+
case sparcv9:
case sparc: return "sparc";
@@ -145,6 +148,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("ppc32", ppc)
.Case("ppc", ppc)
.Case("mblaze", mblaze)
+ .Case("r600", r600)
.Case("hexagon", hexagon)
.Case("sparc", sparc)
.Case("sparcv9", sparcv9)
@@ -184,6 +188,7 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
// This is derived from the driver driver.
.Cases("arm", "armv4t", "armv5", "armv6", Triple::arm)
.Cases("armv7", "armv7f", "armv7k", "armv7s", "xscale", Triple::arm)
+ .Case("r600", Triple::r600)
.Case("ptx32", Triple::ptx32)
.Case("ptx64", Triple::ptx64)
.Case("amdil", Triple::amdil)
@@ -206,6 +211,7 @@ const char *Triple::getArchNameForAssembler() {
.Cases("armv5", "armv5e", "thumbv5", "thumbv5e", "armv5")
.Cases("armv6", "thumbv6", "armv6")
.Cases("armv7", "thumbv7", "armv7")
+ .Case("r600", "r600")
.Case("ptx32", "ptx32")
.Case("ptx64", "ptx64")
.Case("le32", "le32")
@@ -234,6 +240,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Cases("mipsel", "mipsallegrexel", Triple::mipsel)
.Cases("mips64", "mips64eb", Triple::mips64)
.Case("mips64el", Triple::mips64el)
+ .Case("r600", Triple::r600)
.Case("hexagon", Triple::hexagon)
.Case("sparc", Triple::sparc)
.Case("sparcv9", Triple::sparcv9)
@@ -641,6 +648,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::mipsel:
case llvm::Triple::ppc:
case llvm::Triple::ptx32:
+ case llvm::Triple::r600:
case llvm::Triple::sparc:
case llvm::Triple::tce:
case llvm::Triple::thumb:
@@ -689,6 +697,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::mipsel:
case Triple::ppc:
case Triple::ptx32:
+ case Triple::r600:
case Triple::sparc:
case Triple::tce:
case Triple::thumb:
@@ -718,6 +727,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::le32:
case Triple::mblaze:
case Triple::msp430:
+ case Triple::r600:
case Triple::tce:
case Triple::thumb:
case Triple::xcore:
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 12d1b1a..93eed24 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -1726,12 +1726,6 @@ void Record::setName(Init *NewName) {
} // Otherwise this isn't yet registered.
Name = NewName;
checkName();
- // Since the Init for the name was changed, see if we can resolve
- // any of it using members of the Record.
- Init *ComputedName = Name->resolveReferences(*this, 0);
- if (ComputedName != Name) {
- setName(ComputedName);
- }
// DO NOT resolve record values to the name at this point because
// there might be default values for arguments of this def. Those
// arguments might not have been resolved yet so we don't want to
@@ -1754,6 +1748,8 @@ void Record::setName(const std::string &Name) {
/// references.
void Record::resolveReferencesTo(const RecordVal *RV) {
for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+ if (RV == &Values[i]) // Skip resolve the same field as the given one
+ continue;
if (Init *V = Values[i].getValue())
Values[i].setValue(V->resolveReferences(*this, RV));
}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index acb57f7..2a1e8e4 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -18,9 +18,7 @@
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
-#include <cassert>
namespace llvm {
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 4ec19cc..ca30716 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
-#include "ARM.h"
#include "ARMAsmPrinter.h"
+#include "ARM.h"
#include "ARMBuildAttrs.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 75b796e..366e2fa 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -935,6 +935,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
}
} else
llvm_unreachable("Unknown reg class!");
@@ -953,6 +955,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -2756,24 +2760,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4q8oddPseudo_UPD:
case ARM::VLD4q16oddPseudo_UPD:
case ARM::VLD4q32oddPseudo_UPD:
- case ARM::VLD1DUPq8Pseudo:
- case ARM::VLD1DUPq16Pseudo:
- case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8PseudoWB_fixed:
- case ARM::VLD1DUPq16PseudoWB_fixed:
- case ARM::VLD1DUPq32PseudoWB_fixed:
- case ARM::VLD1DUPq8PseudoWB_register:
- case ARM::VLD1DUPq16PseudoWB_register:
- case ARM::VLD1DUPq32PseudoWB_register:
- case ARM::VLD2DUPd8Pseudo:
- case ARM::VLD2DUPd16Pseudo:
- case ARM::VLD2DUPd32Pseudo:
- case ARM::VLD2DUPd8PseudoWB_fixed:
- case ARM::VLD2DUPd16PseudoWB_fixed:
- case ARM::VLD2DUPd32PseudoWB_fixed:
- case ARM::VLD2DUPd8PseudoWB_register:
- case ARM::VLD2DUPd16PseudoWB_register:
- case ARM::VLD2DUPd32PseudoWB_register:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16:
+ case ARM::VLD2DUPd32:
+ case ARM::VLD2DUPd8wb_fixed:
+ case ARM::VLD2DUPd16wb_fixed:
+ case ARM::VLD2DUPd32wb_fixed:
+ case ARM::VLD2DUPd8wb_register:
+ case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_register:
case ARM::VLD4DUPd8Pseudo:
case ARM::VLD4DUPd16Pseudo:
case ARM::VLD4DUPd32Pseudo:
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index d2aff9a..291369f 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMBaseRegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
-#include "ARMBaseRegisterInfo.h"
#include "ARMFrameLowering.h"
#include "ARMInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
@@ -79,6 +79,7 @@ getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(ARM::SP);
Reserved.set(ARM::PC);
+ Reserved.set(ARM::FPSCR);
if (TFI->hasFP(MF))
Reserved.set(FramePtr);
if (hasBasePointer(MF))
@@ -492,8 +493,7 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// When outgoing call frames are so large that we adjust the stack pointer
// around the call, we can no longer use the stack pointer to reach the
// emergency spill slot.
- if (needsStackRealignment(MF) && (MFI->hasVarSizedObjects() ||
- !TFI->hasReservedCallFrame(MF)))
+ if (needsStackRealignment(MF) && !TFI->hasReservedCallFrame(MF))
return true;
// Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
@@ -517,7 +517,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
}
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// We can't realign the stack if:
@@ -532,8 +531,9 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// register allocation with frame pointer elimination, it is too late now.
if (!MRI->canReserveReg(FramePtr))
return false;
- // We may also need a base pointer if there are dynamic allocas.
- if (!MFI->hasVarSizedObjects())
+ // We may also need a base pointer if there are dynamic allocas or stack
+ // pointer adjustments around calls.
+ if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
return true;
if (!EnableBasePointer)
return false;
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 437b4c7..2b9c55d 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -15,13 +15,13 @@
#ifndef ARMCALLINGCONV_H
#define ARMCALLINGCONV_H
-#include "llvm/CallingConv.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMRegisterInfo.h"
#include "ARMSubtarget.h"
-#include "ARM.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
@@ -29,7 +29,7 @@ namespace llvm {
static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
CCState &State, bool CanFail) {
- static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+ static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
// Try to get the first register.
if (unsigned Reg = State.AllocateReg(RegList, 4))
@@ -72,9 +72,9 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
CCState &State, bool CanFail) {
- static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
- static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
- static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
+ static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+ static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
+ static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
@@ -118,8 +118,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo, CCState &State) {
- static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
- static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+ static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+ static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
if (Reg == 0)
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index c4ab99d..c2b7816 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -99,8 +99,8 @@ namespace {
// Entries for NEON load/store information table. The table is sorted by
// PseudoOpc for fast binary-search lookups.
struct NEONLdStTableEntry {
- unsigned PseudoOpc;
- unsigned RealOpc;
+ uint16_t PseudoOpc;
+ uint16_t RealOpc;
bool IsLoad;
bool isUpdating;
bool hasWritebackOperand;
@@ -129,16 +129,6 @@ namespace {
}
static const NEONLdStTableEntry NEONLdStTable[] = {
-{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,false},
-{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true, SingleSpc, 2, 4,false},
-{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true, SingleSpc, 2, 4,false},
-{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,false},
-{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false, SingleSpc, 2, 2,false},
-{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true, SingleSpc, 2, 2,false},
-{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,false},
-{ ARM::VLD1DUPq8PseudoWB_fixed, ARM::VLD1DUPq8wb_fixed, true, true, false, SingleSpc, 2, 8,false},
-{ ARM::VLD1DUPq8PseudoWB_register, ARM::VLD1DUPq8wb_register, true, true, true, SingleSpc, 2, 8,false},
-
{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true},
@@ -149,16 +139,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
-{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, false, SingleSpc, 2, 4,false},
-{ ARM::VLD2DUPd16PseudoWB_fixed, ARM::VLD2DUPd16wb_fixed, true, true, false, SingleSpc, 2, 4,false},
-{ ARM::VLD2DUPd16PseudoWB_register, ARM::VLD2DUPd16wb_register, true, true, true, SingleSpc, 2, 4,false},
-{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, false, SingleSpc, 2, 2,false},
-{ ARM::VLD2DUPd32PseudoWB_fixed, ARM::VLD2DUPd32wb_fixed, true, true, false, SingleSpc, 2, 2,false},
-{ ARM::VLD2DUPd32PseudoWB_register, ARM::VLD2DUPd32wb_register, true, true, true, SingleSpc, 2, 2,false},
-{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, false, SingleSpc, 2, 8,false},
-{ ARM::VLD2DUPd8PseudoWB_fixed, ARM::VLD2DUPd8wb_fixed, true, true, false, SingleSpc, 2, 8,false},
-{ ARM::VLD2DUPd8PseudoWB_register, ARM::VLD2DUPd8wb_register, true, true, true, SingleSpc, 2, 8,false},
-
{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true},
@@ -345,7 +325,7 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
/// load or store pseudo instruction.
static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
- unsigned NumEntries = array_lengthof(NEONLdStTable);
+ const unsigned NumEntries = array_lengthof(NEONLdStTable);
#ifndef NDEBUG
// Make sure the table is sorted.
@@ -1090,24 +1070,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD4q8oddPseudo_UPD:
case ARM::VLD4q16oddPseudo_UPD:
case ARM::VLD4q32oddPseudo_UPD:
- case ARM::VLD1DUPq8Pseudo:
- case ARM::VLD1DUPq16Pseudo:
- case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8PseudoWB_fixed:
- case ARM::VLD1DUPq16PseudoWB_fixed:
- case ARM::VLD1DUPq32PseudoWB_fixed:
- case ARM::VLD1DUPq8PseudoWB_register:
- case ARM::VLD1DUPq16PseudoWB_register:
- case ARM::VLD1DUPq32PseudoWB_register:
- case ARM::VLD2DUPd8Pseudo:
- case ARM::VLD2DUPd16Pseudo:
- case ARM::VLD2DUPd32Pseudo:
- case ARM::VLD2DUPd8PseudoWB_fixed:
- case ARM::VLD2DUPd16PseudoWB_fixed:
- case ARM::VLD2DUPd32PseudoWB_fixed:
- case ARM::VLD2DUPd8PseudoWB_register:
- case ARM::VLD2DUPd16PseudoWB_register:
- case ARM::VLD2DUPd32PseudoWB_register:
case ARM::VLD3DUPd8Pseudo:
case ARM::VLD3DUPd16Pseudo:
case ARM::VLD3DUPd32Pseudo:
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 818b202..a24eab4 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -1384,7 +1384,10 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
SrcVT == MVT::i1) {
const APInt &CIVal = ConstInt->getValue();
Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
- if (Imm < 0) {
+ // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
+ // then a cmn, because there is no way to represent 2147483648 as a
+ // signed 32-bit int.
+ if (Imm < 0 && Imm != (int)0x80000000) {
isNegativeImm = true;
Imm = -Imm;
}
@@ -1475,7 +1478,6 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool ARMFastISel::SelectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
- Type *Ty = CI->getOperand(0)->getType();
// Get the compare predicate.
ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
@@ -1495,11 +1497,10 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
unsigned DestReg = createResultReg(RC);
Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
unsigned ZeroReg = TargetMaterializeConstant(Zero);
- bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
- unsigned CondReg = isFloat ? ARM::FPSCR : ARM::CPSR;
+ // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
.addReg(ZeroReg).addImm(1)
- .addImm(ARMPred).addReg(CondReg);
+ .addImm(ARMPred).addReg(ARM::CPSR);
UpdateValueMap(I, DestReg);
return true;
@@ -1851,6 +1852,48 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+ // Check that we can handle all of the arguments. If we can't, then bail out
+ // now before we add code to the MBB.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // We don't handle NEON/vector parameters yet.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+ return false;
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ continue;
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ if (VA.getLocVT() != MVT::f64 ||
+ // TODO: Only handle register args for now.
+ !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
+ return false;
+ } else {
+ switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2())
+ return false;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2())
+ return false;
+ break;
+ }
+ }
+ }
+
+ // At the point, we are able to handle the call's arguments in fast isel.
+
// Get a count of how many bytes are to be pushed on the stack.
NumBytes = CCInfo.getNextStackOffset();
@@ -1866,9 +1909,8 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
unsigned Arg = ArgRegs[VA.getValNo()];
MVT ArgVT = ArgVTs[VA.getValNo()];
- // We don't handle NEON/vector parameters yet.
- if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
- return false;
+ assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
+ "We don't handle NEON/vector parameters yet.");
// Handle arg promotion, etc.
switch (VA.getLocInfo()) {
@@ -1908,12 +1950,13 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
RegArgs.push_back(VA.getLocReg());
} else if (VA.needsCustom()) {
// TODO: We need custom lowering for vector (v2f64) args.
- if (VA.getLocVT() != MVT::f64) return false;
+ assert(VA.getLocVT() == MVT::f64 &&
+ "Custom lowering for v2f64 args not available");
CCValAssign &NextVA = ArgLocs[++i];
- // TODO: Only handle register args for now.
- if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+ assert(VA.isRegLoc() && NextVA.isRegLoc() &&
+ "We only handle register args!");
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::VMOVRRD), VA.getLocReg())
@@ -1929,9 +1972,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
Addr.Base.Reg = ARM::SP;
Addr.Offset = VA.getLocMemOffset();
- if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
+ bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
+ assert(EmitRet && "Could not emit a store for argument!");
}
}
+
return true;
}
@@ -2136,7 +2181,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// TODO: Turn this into the table of arm call ops.
MachineInstrBuilder MIB;
unsigned CallOpc = ARMSelectCallOp(NULL);
- if(isThumb2)
+ if (isThumb2)
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc)))
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 0fd6025..bd4b2a9 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -501,7 +501,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
// SP can move around if there are allocas. We may also lose track of SP
// when emergency spilling inside a non-reserved call frame setup.
- bool hasMovingSP = MFI->hasVarSizedObjects() || !hasReservedCallFrame(MF);
+ bool hasMovingSP = !hasReservedCallFrame(MF);
// When dynamically realigning the stack, use the frame pointer for
// parameters, and the stack/base pointer for locals.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c99db98..ffb9acb 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1589,9 +1589,9 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
- case ARM::VLD2DUPd8PseudoWB_fixed: return ARM::VLD2DUPd8PseudoWB_register;
- case ARM::VLD2DUPd16PseudoWB_fixed: return ARM::VLD2DUPd16PseudoWB_register;
- case ARM::VLD2DUPd32PseudoWB_fixed: return ARM::VLD2DUPd32PseudoWB_register;
+ case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
+ case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
+ case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
}
return Opc; // If not one we handle, return it unchanged.
}
@@ -2891,8 +2891,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2DUP: {
- unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
- ARM::VLD2DUPd32Pseudo };
+ unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
+ ARM::VLD2DUPd32 };
return SelectVLDDup(N, false, 2, Opcodes);
}
@@ -2909,9 +2909,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2DUP_UPD: {
- unsigned Opcodes[] = { ARM::VLD2DUPd8PseudoWB_fixed,
- ARM::VLD2DUPd16PseudoWB_fixed,
- ARM::VLD2DUPd32PseudoWB_fixed };
+ unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed,
+ ARM::VLD2DUPd32wb_fixed };
return SelectVLDDup(N, true, 2, Opcodes);
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 477b5f4..e26dd22 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -13,10 +13,10 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-isel"
+#include "ARMISelLowering.h"
#include "ARM.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
-#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMPerfectShuffle.h"
#include "ARMRegisterInfo.h"
@@ -49,7 +49,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include <sstream>
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
@@ -87,7 +86,7 @@ namespace {
}
// The APCS parameter registers.
-static const unsigned GPRArgRegs[] = {
+static const uint16_t GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
@@ -456,6 +455,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
}
+ setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
@@ -3673,6 +3674,27 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
return Result;
}
+SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const {
+ if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+ return SDValue();
+
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+ assert(Op.getValueType() == MVT::f32 &&
+ "ConstantFP custom lowering should only occur for f32.");
+
+ APFloat FPVal = CFP->getValueAPF();
+ int ImmVal = ARM_AM::getFP32Imm(FPVal);
+ if (ImmVal == -1)
+ return SDValue();
+
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
+ SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
+ DAG.getConstant(0, MVT::i32));
+}
+
/// isNEONModifiedImm - Check if the specified splat value corresponds to a
/// valid vector constant for a NEON instruction with a "modified immediate"
/// operand (e.g., VMOV). If so, return the encoded value.
@@ -5109,6 +5131,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::SETCC: return LowerVSETCC(Op, DAG);
+ case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
@@ -6842,33 +6865,63 @@ static SDValue PerformMULCombine(SDNode *N,
if (!C)
return SDValue();
- uint64_t MulAmt = C->getZExtValue();
+ int64_t MulAmt = C->getSExtValue();
unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+
ShiftAmt = ShiftAmt & (32 - 1);
SDValue V = N->getOperand(0);
DebugLoc DL = N->getDebugLoc();
SDValue Res;
MulAmt >>= ShiftAmt;
- if (isPowerOf2_32(MulAmt - 1)) {
- // (mul x, 2^N + 1) => (add (shl x, N), x)
- Res = DAG.getNode(ISD::ADD, DL, VT,
- V, DAG.getNode(ISD::SHL, DL, VT,
- V, DAG.getConstant(Log2_32(MulAmt-1),
- MVT::i32)));
- } else if (isPowerOf2_32(MulAmt + 1)) {
- // (mul x, 2^N - 1) => (sub (shl x, N), x)
- Res = DAG.getNode(ISD::SUB, DL, VT,
- DAG.getNode(ISD::SHL, DL, VT,
- V, DAG.getConstant(Log2_32(MulAmt+1),
- MVT::i32)),
- V);
- } else
- return SDValue();
+
+ if (MulAmt >= 0) {
+ if (isPowerOf2_32(MulAmt - 1)) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmt - 1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmt + 1)) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmt + 1),
+ MVT::i32)),
+ V);
+ } else
+ return SDValue();
+ } else {
+ uint64_t MulAmtAbs = -MulAmt;
+ if (isPowerOf2_32(MulAmtAbs + 1)) {
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmtAbs + 1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmtAbs - 1)) {
+ // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmtAbs-1),
+ MVT::i32)));
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, MVT::i32),Res);
+
+ } else
+ return SDValue();
+ }
if (ShiftAmt != 0)
- Res = DAG.getNode(ISD::SHL, DL, VT, Res,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ Res = DAG.getNode(ISD::SHL, DL, VT,
+ Res, DAG.getConstant(ShiftAmt, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 7f12293..a71b74e 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -15,6 +15,7 @@
#ifndef ARMISELLOWERING_H
#define ARMISELLOWERING_H
+#include "ARM.h"
#include "ARMSubtarget.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -434,6 +435,8 @@ namespace llvm {
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 7bedf30..72af535 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -14,11 +14,11 @@
#ifndef ARMINSTRUCTIONINFO_H
#define ARMINSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMRegisterInfo.h"
#include "ARMSubtarget.h"
-#include "ARM.h"
+#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
class ARMSubtarget;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 0b1406e..8196582 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -637,6 +637,7 @@ def BitfieldAsmOperand : AsmOperandClass {
let Name = "Bitfield";
let ParserMethod = "parseBitfield";
}
+
def bf_inv_mask_imm : Operand<i32>,
PatLeaf<(imm), [{
return ARM::isBitFieldInvertedMask(N->getZExtValue());
@@ -4084,74 +4085,43 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
[/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd">;
-let isCodeGenOnly = 1 in {
// Conditional instructions
-multiclass AsI1_bincc_irs<bits<4> opcod, string opc,
- InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
- iii, opc, "\t$Rd, $Rn, $imm", []>,
- RegConstraint<"$Rn = $Rd"> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> imm;
- let Inst{25} = 1;
- let Inst{19-16} = Rn;
- let Inst{15-12} = Rd;
- let Inst{11-0} = imm;
- }
- def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
- iir, opc, "\t$Rd, $Rn, $Rm", []>,
- RegConstraint<"$Rn = $Rd"> {
- bits<4> Rd;
- bits<4> Rn;
- bits<4> Rm;
- let Inst{25} = 0;
- let Inst{19-16} = Rn;
- let Inst{15-12} = Rd;
- let Inst{11-4} = 0b00000000;
- let Inst{3-0} = Rm;
- }
-
- def rsi : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
- iis, opc, "\t$Rd, $Rn, $shift", []>,
- RegConstraint<"$Rn = $Rd"> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> shift;
- let Inst{25} = 0;
- let Inst{19-16} = Rn;
- let Inst{15-12} = Rd;
- let Inst{11-5} = shift{11-5};
- let Inst{4} = 0;
- let Inst{3-0} = shift{3-0};
- }
-
- def rsr : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
- iis, opc, "\t$Rd, $Rn, $shift", []>,
- RegConstraint<"$Rn = $Rd"> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> shift;
- let Inst{25} = 0;
- let Inst{19-16} = Rn;
- let Inst{15-12} = Rd;
- let Inst{11-8} = shift{11-8};
- let Inst{7} = 0;
- let Inst{6-5} = shift{6-5};
- let Inst{4} = 1;
- let Inst{3-0} = shift{3-0};
- }
-} // AsI1_bincc_irs
-
-defm ANDCC : AsI1_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm ORRCC : AsI1_bincc_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm EORCC : AsI1_bincc_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi,
+ Instruction irsr,
+ InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis> {
+ def ri : ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s),
+ 4, iii, [],
+ (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ def rr : ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, iir, [],
+ (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ def rsi : ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s),
+ 4, iis, [],
+ (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s),
+ 4, iis, [],
+ (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+}
+
+defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-} // isCodeGenOnly
} // neverHasSideEffects
+
//===----------------------------------------------------------------------===//
// Atomic operations intrinsics
//
@@ -4605,10 +4575,16 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
(outs GPR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -4642,10 +4618,16 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
+ (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
(outs GPR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
+ (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
imm:$CRm, imm:$opc2),
@@ -5252,6 +5234,20 @@ def : ARMInstAlias<"mul${s}${p} $Rn, $Rm",
def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
(RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
+// Pre-v6, 'mov r0, r0' was used as a NOP encoding.
+def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>,
+ Requires<[IsARM, NoV6]>;
+
+// UMULL/SMULL are available on all arches, but the instruction definitions
+// need difference constraints pre-v6. Use these aliases for the assembly
+// parsing on pre-v6.
+def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
// 'it' blocks in ARM mode just validate the predicates. The IT itself
// is discarded.
def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 8684ce1..f61eb2b 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -94,7 +94,7 @@ def VecListDPairAsmOperand : AsmOperandClass {
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
-def VecListDPair : RegisterOperand<DPair, "printVectorListDPair"> {
+def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
let ParserMatchClass = VecListDPairAsmOperand;
}
// Register list of three sequential D registers.
@@ -121,7 +121,7 @@ def VecListDPairSpacedAsmOperand : AsmOperandClass {
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
-def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListDPairSpaced"> {
+def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
let ParserMatchClass = VecListDPairSpacedAsmOperand;
}
// Register list of three D registers spaced by 2 (three Q registers).
@@ -153,23 +153,24 @@ def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
let ParserMatchClass = VecListOneDAllLanesAsmOperand;
}
// Register list of two D registers, with "all lanes" subscripting.
-def VecListTwoDAllLanesAsmOperand : AsmOperandClass {
- let Name = "VecListTwoDAllLanes";
+def VecListDPairAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
-def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> {
- let ParserMatchClass = VecListTwoDAllLanesAsmOperand;
+def VecListDPairAllLanes : RegisterOperand<DPair,
+ "printVectorListTwoAllLanes"> {
+ let ParserMatchClass = VecListDPairAllLanesAsmOperand;
}
// Register list of two D registers spaced by 2 (two sequential Q registers).
-def VecListTwoQAllLanesAsmOperand : AsmOperandClass {
- let Name = "VecListTwoQAllLanes";
+def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairSpacedAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
-def VecListTwoQAllLanes : RegisterOperand<DPR,
+def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
"printVectorListTwoSpacedAllLanes"> {
- let ParserMatchClass = VecListTwoQAllLanesAsmOperand;
+ let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
}
// Register list of three D registers, with "all lanes" subscripting.
def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
@@ -1276,39 +1277,32 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
-class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
- let Pattern = [(set QPR:$dst,
- (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
-}
-
def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
-def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>;
-def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
-def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
-
def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPd32 addrmode6:$addr)>;
-def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
- (VLD1DUPq32Pseudo addrmode6:$addr)>;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-
-class VLD1QDUP<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd),
+class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD1dup,
- "vld1", Dt, "$Vd, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "",
+ [(set VecListDPairAllLanes:$Vd,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
-def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">;
-def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
-def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPq32 addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
@@ -1333,7 +1327,7 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
}
multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
- (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+ (outs VecListDPairAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
@@ -1343,7 +1337,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
- (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+ (outs VecListDPairAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@@ -1361,13 +1355,6 @@ defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
-def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
-def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
-
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
: NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
@@ -1378,18 +1365,14 @@ class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
let DecoderMethod = "DecodeVLD2DupInstruction";
}
-def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListTwoDAllLanes>;
-def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListTwoDAllLanes>;
-def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListTwoDAllLanes>;
+def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
-def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
-
-// ...with double-spaced registers (not used for codegen):
-def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListTwoQAllLanes>;
-def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListTwoQAllLanes>;
-def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListTwoQAllLanes>;
+// ...with double-spaced registers
+def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
// ...with address register writeback:
multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
@@ -1414,20 +1397,13 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
}
}
-defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListTwoDAllLanes>;
-defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListTwoDAllLanes>;
-defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListTwoDAllLanes>;
-
-defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListTwoQAllLanes>;
-defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>;
-defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>;
+defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>;
+defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
+defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
-def VLD2DUPd8PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>;
-def VLD2DUPd8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
-def VLD2DUPd16PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>;
-def VLD2DUPd16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
-def VLD2DUPd32PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>;
-def VLD2DUPd32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
+defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
class VLD3DUP<bits<4> op7_4, string Dt>
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index e8984e1..1f7edc1 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -574,7 +574,7 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
cc_out:$s)>;
// and with the optional destination operand, too.
- def : t2InstAlias<!strconcat(opc, "${s}${p}.ri", " $Rdn, $imm"),
+ def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"),
(!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
t2_so_imm:$imm, pred:$p,
cc_out:$s)>;
@@ -2952,45 +2952,36 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
(ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
RegConstraint<"$false = $Rd">;
+} // isCodeGenOnly = 1
-multiclass T2I_bincc_irs<bits<4> opcod, string opc,
+multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
// shifted imm
- def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
- iii, opc, ".w\t$Rd, $Rn, $imm", []>,
- RegConstraint<"$Rn = $Rd"> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 0;
- let Inst{24-21} = opcod;
- let Inst{15} = 0;
- }
+ def ri : t2PseudoExpand<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s),
+ 4, iii, [],
+ (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
// register
- def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
- iir, opc, ".w\t$Rd, $Rn, $Rm", []>,
- RegConstraint<"$Rn = $Rd"> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{14-12} = 0b000; // imm3
- let Inst{7-6} = 0b00; // imm2
- let Inst{5-4} = 0b00; // type
- }
+ def rr : t2PseudoExpand<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s),
+ 4, iir, [],
+ (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
// shifted register
- def rs : T2sTwoRegShiftedReg<(outs rGPR:$Rd),
- (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
- iis, opc, ".w\t$Rd, $Rn, $ShiftedRm", []>,
- RegConstraint<"$Rn = $Rd"> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- }
+ def rs : t2PseudoExpand<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s),
+ 4, iis, [],
+ (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
} // T2I_bincc_irs
-defm t2ANDCC : T2I_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
-defm t2ORRCC : T2I_bincc_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
-defm t2EORCC : T2I_bincc_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
-
-} // isCodeGenOnly = 1
+defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
+defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
+defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
@@ -3768,20 +3759,32 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
(outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
/* from coprocessor to ARM core register */
def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
(outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
(outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index aa10af7..e9d5720 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -206,6 +206,14 @@ def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
Requires<[HasVFP2]>;
def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
Requires<[HasVFP2]>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+ (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+ (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+ (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+ (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>;
// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
@@ -286,7 +294,7 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b),
(VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
// These are encoded as unary instructions.
-let Defs = [FPSCR] in {
+let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
@@ -315,7 +323,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-} // Defs = [FPSCR]
+} // Defs = [FPSCR_NZCV]
//===----------------------------------------------------------------------===//
// FP Unary Operations.
@@ -335,7 +343,7 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
let D = VFPNeonA8Domain;
}
-let Defs = [FPSCR] in {
+let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
@@ -376,7 +384,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-} // Defs = [FPSCR]
+} // Defs = [FPSCR_NZCV]
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
@@ -810,7 +818,29 @@ let Constraints = "$a = $dst" in {
// FP to Fixed-Point:
-def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
+// Single Precision register
+class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ bits<5> dst;
+ // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+ let Inst{22} = dst{0};
+ let Inst{15-12} = dst{4-1};
+}
+
+// Double Precision register
+class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ bits<5> dst;
+ // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+ let Inst{22} = dst{4};
+ let Inst{15-12} = dst{3-0};
+}
+
+def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -818,7 +848,7 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
let D = VFPNeonA8Domain;
}
-def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
+def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -826,7 +856,7 @@ def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
let D = VFPNeonA8Domain;
}
-def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
+def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -834,7 +864,7 @@ def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
let D = VFPNeonA8Domain;
}
-def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
+def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -842,25 +872,25 @@ def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
let D = VFPNeonA8Domain;
}
-def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
+def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
-def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
+def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
-def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
+def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
-def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
+def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
// Fixed-Point to FP:
-def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
+def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -868,7 +898,7 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
let D = VFPNeonA8Domain;
}
-def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
+def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -876,7 +906,7 @@ def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
let D = VFPNeonA8Domain;
}
-def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
+def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -884,7 +914,7 @@ def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
let D = VFPNeonA8Domain;
}
-def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
+def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -892,19 +922,19 @@ def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
let D = VFPNeonA8Domain;
}
-def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
+def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
-def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
+def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
-def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
+def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
-def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
+def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
@@ -1166,9 +1196,9 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
// to APSR.
-let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in
+let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
- "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>;
+ "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
// Application level FPSCR -> GPR
let hasSideEffects = 1, Uses = [FPSCR] in
@@ -1182,6 +1212,10 @@ let Uses = [FPSCR] in {
"vmrs", "\t$Rt, fpexc", []>;
def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
"vmrs", "\t$Rt, fpsid", []>;
+ def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr0", []>;
+ def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr1", []>;
}
//===----------------------------------------------------------------------===//
@@ -1304,6 +1338,13 @@ def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">;
def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">;
def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">;
def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">;
+def : VFP2MnemonicAlias<"fmrx", "vmrs">;
+def : VFP2MnemonicAlias<"fmxr", "vmsr">;
+
+// Be friendly and accept the old form of zero-compare
+def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
+def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
+
def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index afbe0e4..753e578 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -62,7 +62,7 @@ extern "C" {
// concerned, so we can't just preserve the callee saved regs.
"stmdb sp!, {r0, r1, r2, r3, lr}\n"
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
- "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+ "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
// The LR contains the address of the stub function on entry.
// pass it as the argument to the C part of the callback
@@ -86,7 +86,7 @@ extern "C" {
//
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
// Restore VFP caller-saved registers.
- "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+ "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
//
// We need to exchange the values in slots 0 and 1 so we can
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 1f83762..6f3819a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMRegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
-#include "ARMRegisterInfo.h"
using namespace llvm;
void ARMRegisterInfo::anchor() { }
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 65ed95d..8a24842 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -15,13 +15,12 @@
#define ARMREGISTERINFO_H
#include "ARM.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "ARMBaseRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
- class Type;
struct ARMRegisterInfo : public ARMBaseRegisterInfo {
virtual void anchor();
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index b16a12c..1327fb8 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -153,14 +153,21 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>;
}
// Current Program Status Register.
-def CPSR : ARMReg<0, "cpsr">;
-def APSR : ARMReg<1, "apsr">;
-def SPSR : ARMReg<2, "spsr">;
-def FPSCR : ARMReg<3, "fpscr">;
-def ITSTATE : ARMReg<4, "itstate">;
+// We model fpscr with two registers: FPSCR models the control bits and will be
+// reserved. FPSCR_NZCV models the flag bits and will be unreserved.
+def CPSR : ARMReg<0, "cpsr">;
+def APSR : ARMReg<1, "apsr">;
+def SPSR : ARMReg<2, "spsr">;
+def FPSCR : ARMReg<3, "fpscr">;
+def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
+ let Aliases = [FPSCR];
+}
+def ITSTATE : ARMReg<4, "itstate">;
// Special Registers - only available in privileged mode.
def FPSID : ARMReg<0, "fpsid">;
+def MVFR1 : ARMReg<6, "mvfr1">;
+def MVFR0 : ARMReg<7, "mvfr0">;
def FPEXC : ARMReg<8, "fpexc">;
// Register classes.
@@ -304,7 +311,8 @@ def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1],
// Register class representing a pair of consecutive D registers.
// Use the Q registers for the even-odd pairs.
-def DPair : RegisterClass<"ARM", [v2i64], 128, (interleave QPR, TuplesOE2D)> {
+def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (interleave QPR, TuplesOE2D)> {
// Allocate starting at non-VFP2 registers D16-D31 first.
let AltOrders = [(rotl DPair, 16)];
let AltOrderSelect = [{ return 1; }];
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 2045482..911eb13 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -1101,13 +1101,8 @@ public:
return VectorList.Count == 4;
}
- bool isVecListTwoQ() const {
- if (!isDoubleSpacedVectorList()) return false;
- return VectorList.Count == 2;
- }
-
bool isVecListDPairSpaced() const {
- if (!isSingleSpacedVectorList()) return false;
+ if (isSingleSpacedVectorList()) return false;
return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID]
.contains(VectorList.RegNum));
}
@@ -1133,12 +1128,13 @@ public:
return VectorList.Count == 1;
}
- bool isVecListTwoDAllLanes() const {
+ bool isVecListDPairAllLanes() const {
if (!isSingleSpacedVectorAllLanes()) return false;
- return VectorList.Count == 2;
+ return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+ .contains(VectorList.RegNum));
}
- bool isVecListTwoQAllLanes() const {
+ bool isVecListDPairSpacedAllLanes() const {
if (!isDoubleSpacedVectorAllLanes()) return false;
return VectorList.Count == 2;
}
@@ -2858,8 +2854,12 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!RC->contains(Reg))
return Error(RegLoc, "invalid register in register list");
// List must be monotonically increasing.
- if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg))
- return Error(RegLoc, "register list not in ascending order");
+ if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) {
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ Warning(RegLoc, "register list not in ascending order");
+ else
+ return Error(RegLoc, "register list not in ascending order");
+ }
if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
Warning(RegLoc, "duplicated register (" + RegTok.getString() +
") in register list");
@@ -2905,6 +2905,12 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
Parser.Lex(); // Eat the ']'.
return MatchOperand_Success;
}
+
+ // There's an optional '#' token here. Normally there wouldn't be, but
+ // inline assemble puts one in, and it's friendly to accept that.
+ if (Parser.getTok().is(AsmToken::Hash))
+ Parser.Lex(); // Eat the '#'
+
const MCExpr *LaneIndex;
SMLoc Loc = Parser.getTok().getLoc();
if (getParser().ParseExpression(LaneIndex)) {
@@ -2981,12 +2987,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
case NoLanes:
E = Parser.getTok().getLoc();
Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
- &ARMMCRegisterClasses[ARM::DPairRegClassID]);
-
+ &ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
break;
case AllLanes:
E = Parser.getTok().getLoc();
+ Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+ &ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
S, E));
break;
@@ -3152,7 +3159,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (LaneKind) {
case NoLanes:
- // Non-lane two-register operands have been converted to the
+ // Two-register operands have been converted to the
// composite register classes.
if (Count == 2) {
const MCRegisterClass *RC = (Spacing == 1) ?
@@ -3165,6 +3172,14 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
(Spacing == 2), S, E));
break;
case AllLanes:
+ // Two-register operands have been converted to the
+ // composite register classes.
+ if (Count == 2) {
+ const MCRegisterClass *RC = (Spacing == 1) ?
+ &ARMMCRegisterClasses[ARM::DPairRegClassID] :
+ &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
+ FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+ }
Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
(Spacing == 2),
S, E));
@@ -3253,7 +3268,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (isMClass()) {
// See ARMv6-M 10.1.1
- unsigned FlagsVal = StringSwitch<unsigned>(Mask)
+ std::string Name = Mask.lower();
+ unsigned FlagsVal = StringSwitch<unsigned>(Name)
.Case("apsr", 0)
.Case("iapsr", 1)
.Case("eapsr", 2)
@@ -4427,10 +4443,11 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
else if (Res == -1) // irrecoverable error
return true;
// If this is VMRS, check for the apsr_nzcv operand.
- if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") {
+ if (Mnemonic == "vmrs" &&
+ Parser.getTok().getString().equals_lower("apsr_nzcv")) {
S = Parser.getTok().getLoc();
Parser.Lex();
- Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S));
+ Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
return false;
}
@@ -4598,7 +4615,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" ||
Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" ||
Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" ||
- Mnemonic == "fmuls" || Mnemonic == "fcmps" ||
+ Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" ||
(Mnemonic == "movs" && isThumb()))) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
CarrySetting = true;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4101f59..ce4587b 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -849,7 +849,7 @@ extern "C" void LLVMInitializeARMDisassembler() {
createThumbDisassembler);
}
-static const unsigned GPRDecoderTable[] = {
+static const uint16_t GPRDecoderTable[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R4, ARM::R5, ARM::R6, ARM::R7,
ARM::R8, ARM::R9, ARM::R10, ARM::R11,
@@ -869,8 +869,14 @@ static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
static DecodeStatus
DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
- if (RegNo == 15) return MCDisassembler::Fail;
- return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo == 15)
+ S = MCDisassembler::SoftFail;
+
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+
+ return S;
}
static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
@@ -916,7 +922,7 @@ static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static const unsigned SPRDecoderTable[] = {
+static const uint16_t SPRDecoderTable[] = {
ARM::S0, ARM::S1, ARM::S2, ARM::S3,
ARM::S4, ARM::S5, ARM::S6, ARM::S7,
ARM::S8, ARM::S9, ARM::S10, ARM::S11,
@@ -937,7 +943,7 @@ static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static const unsigned DPRDecoderTable[] = {
+static const uint16_t DPRDecoderTable[] = {
ARM::D0, ARM::D1, ARM::D2, ARM::D3,
ARM::D4, ARM::D5, ARM::D6, ARM::D7,
ARM::D8, ARM::D9, ARM::D10, ARM::D11,
@@ -973,7 +979,7 @@ DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static const unsigned QPRDecoderTable[] = {
+static const uint16_t QPRDecoderTable[] = {
ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7,
ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11,
@@ -992,7 +998,7 @@ static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static const unsigned DPairDecoderTable[] = {
+static const uint16_t DPairDecoderTable[] = {
ARM::Q0, ARM::D1_D2, ARM::Q1, ARM::D3_D4, ARM::Q2, ARM::D5_D6,
ARM::Q3, ARM::D7_D8, ARM::Q4, ARM::D9_D10, ARM::Q5, ARM::D11_D12,
ARM::Q6, ARM::D13_D14, ARM::Q7, ARM::D15_D16, ARM::Q8, ARM::D17_D18,
@@ -1011,7 +1017,7 @@ static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static const unsigned DPairSpacedDecoderTable[] = {
+static const uint16_t DPairSpacedDecoderTable[] = {
ARM::D0_D2, ARM::D1_D3, ARM::D2_D4, ARM::D3_D5,
ARM::D4_D6, ARM::D5_D7, ARM::D6_D8, ARM::D7_D9,
ARM::D8_D10, ARM::D9_D11, ARM::D10_D12, ARM::D11_D13,
@@ -2001,27 +2007,15 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
// First output register
switch (Inst.getOpcode()) {
- case ARM::VLD1q16:
- case ARM::VLD1q32:
- case ARM::VLD1q64:
- case ARM::VLD1q8:
- case ARM::VLD1q16wb_fixed:
- case ARM::VLD1q16wb_register:
- case ARM::VLD1q32wb_fixed:
- case ARM::VLD1q32wb_register:
- case ARM::VLD1q64wb_fixed:
- case ARM::VLD1q64wb_register:
- case ARM::VLD1q8wb_fixed:
- case ARM::VLD1q8wb_register:
- case ARM::VLD2d16:
- case ARM::VLD2d32:
- case ARM::VLD2d8:
- case ARM::VLD2d16wb_fixed:
- case ARM::VLD2d16wb_register:
- case ARM::VLD2d32wb_fixed:
- case ARM::VLD2d32wb_register:
- case ARM::VLD2d8wb_fixed:
- case ARM::VLD2d8wb_register:
+ case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: case ARM::VLD1q8:
+ case ARM::VLD1q16wb_fixed: case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_fixed: case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_fixed: case ARM::VLD1q64wb_register:
+ case ARM::VLD1q8wb_fixed: case ARM::VLD1q8wb_register:
+ case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2d8:
+ case ARM::VLD2d16wb_fixed: case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_fixed: case ARM::VLD2d32wb_register:
+ case ARM::VLD2d8wb_fixed: case ARM::VLD2d8wb_register:
if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
break;
@@ -2325,6 +2319,8 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::VST2b8wb_register:
case ARM::VST2b16wb_register:
case ARM::VST2b32wb_register:
+ Inst.addOperand(MCOperand::CreateImm(0));
+ break;
case ARM::VST3d8_UPD:
case ARM::VST3d16_UPD:
case ARM::VST3d32_UPD:
@@ -2366,6 +2362,23 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::VST1q16wb_fixed:
case ARM::VST1q32wb_fixed:
case ARM::VST1q64wb_fixed:
+ case ARM::VST1d8Twb_fixed:
+ case ARM::VST1d16Twb_fixed:
+ case ARM::VST1d32Twb_fixed:
+ case ARM::VST1d64Twb_fixed:
+ case ARM::VST1d8Qwb_fixed:
+ case ARM::VST1d16Qwb_fixed:
+ case ARM::VST1d32Qwb_fixed:
+ case ARM::VST1d64Qwb_fixed:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
break;
}
@@ -2525,8 +2538,19 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
align *= (1 << size);
- if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16wb_fixed: case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_fixed: case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD1DUPq8wb_fixed: case ARM::VLD1DUPq8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ }
if (Rm != 0xF) {
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2556,18 +2580,33 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
unsigned align = fieldFromInstruction32(Insn, 4, 1);
unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2);
- unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+ unsigned pred = fieldFromInstruction32(Insn, 22, 4);
align *= 2*size;
- if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
- return MCDisassembler::Fail;
- if (Rm != 0xF) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ switch (Inst.getOpcode()) {
+ case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register:
+ case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2DUPd16x2: case ARM::VLD2DUPd32x2: case ARM::VLD2DUPd8x2:
+ case ARM::VLD2DUPd16x2wb_fixed: case ARM::VLD2DUPd16x2wb_register:
+ case ARM::VLD2DUPd32x2wb_fixed: case ARM::VLD2DUPd32x2wb_register:
+ case ARM::VLD2DUPd8x2wb_fixed: case ARM::VLD2DUPd8x2wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
+ break;
}
+ if (Rm != 0xF)
+ Inst.addOperand(MCOperand::CreateImm(0));
+
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(align));
@@ -2579,6 +2618,9 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
return S;
}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index bae4e78..2b994df 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -1026,15 +1026,6 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- // Normally, it's not safe to use register enum values directly with
- // addition to get the next register, but for VFP registers, the
- // sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "}";
-}
-
-void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
@@ -1042,9 +1033,9 @@ void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum,
O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
}
-void ARMInstPrinter::printVectorListDPairSpaced(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
@@ -1081,11 +1072,10 @@ void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
- // Normally, it's not safe to use register enum values directly with
- // addition to get the next register, but for VFP registers, the
- // sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}";
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+ O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
}
void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
@@ -1111,23 +1101,13 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
<< getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}";
}
-void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- // Normally, it's not safe to use register enum values directly with
- // addition to get the next register, but for VFP registers, the
- // sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
-}
-
void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
- // Normally, it's not safe to use register enum values directly with
- // addition to get the next register, but for VFP registers, the
- // sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}";
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+ O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
}
void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 1037161..e9cd407 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -134,9 +134,8 @@ public:
void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printVectorListDPair(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printVectorListDPairSpaced(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
@@ -147,8 +146,6 @@ public:
raw_ostream &O);
void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
- void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index d3a3d3a..25849ee 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -167,6 +167,7 @@ static unsigned getRelaxedOpcode(unsigned Op) {
case ARM::tBcc: return ARM::t2Bcc;
case ARM::tLDRpciASM: return ARM::t2LDRpci;
case ARM::tADR: return ARM::t2ADR;
+ case ARM::tB: return ARM::t2B;
}
}
@@ -181,6 +182,16 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
const MCInstFragment *DF,
const MCAsmLayout &Layout) const {
switch ((unsigned)Fixup.getKind()) {
+ case ARM::fixup_arm_thumb_br: {
+ // Relaxing tB to t2B. tB has a signed 12-bit displacement with the
+ // low bit being an implied zero. There's an implied +4 offset for the
+ // branch, so we adjust the other way here to determine what's
+ // encodable.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 2046 || Offset < -2048;
+ }
case ARM::fixup_arm_thumb_bcc: {
// Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
// low bit being an implied zero. There's an implied +4 offset for the
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 06eb4e5..ae11be8 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -187,21 +187,37 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) {
case S31: case D31: return 31;
// Composite registers use the regnum of the first register in the list.
- case D1_D2: return 1;
- case D3_D5: return 3;
- case D5_D7: return 5;
- case D7_D9: return 7;
- case D9_D10: return 9;
- case D11_D12: return 11;
- case D13_D14: return 13;
- case D15_D16: return 15;
- case D17_D18: return 17;
- case D19_D20: return 19;
- case D21_D22: return 21;
- case D23_D24: return 23;
- case D25_D26: return 25;
- case D27_D28: return 27;
- case D29_D30: return 29;
+ /* Q0 */ case D0_D2: return 0;
+ case D1_D2: case D1_D3: return 1;
+ /* Q1 */ case D2_D4: return 2;
+ case D3_D4: case D3_D5: return 3;
+ /* Q2 */ case D4_D6: return 4;
+ case D5_D6: case D5_D7: return 5;
+ /* Q3 */ case D6_D8: return 6;
+ case D7_D8: case D7_D9: return 7;
+ /* Q4 */ case D8_D10: return 8;
+ case D9_D10: case D9_D11: return 9;
+ /* Q5 */ case D10_D12: return 10;
+ case D11_D12: case D11_D13: return 11;
+ /* Q6 */ case D12_D14: return 12;
+ case D13_D14: case D13_D15: return 13;
+ /* Q7 */ case D14_D16: return 14;
+ case D15_D16: case D15_D17: return 15;
+ /* Q8 */ case D16_D18: return 16;
+ case D17_D18: case D17_D19: return 17;
+ /* Q9 */ case D18_D20: return 18;
+ case D19_D20: case D19_D21: return 19;
+ /* Q10 */ case D20_D22: return 20;
+ case D21_D22: case D21_D23: return 21;
+ /* Q11 */ case D22_D24: return 22;
+ case D23_D24: case D23_D25: return 23;
+ /* Q12 */ case D24_D26: return 24;
+ case D25_D26: case D25_D27: return 25;
+ /* Q13 */ case D26_D28: return 26;
+ case D27_D28: case D27_D29: return 27;
+ /* Q14 */ case D28_D30: return 28;
+ case D29_D30: case D29_D31: return 29;
+ /* Q15 */
}
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 1606b92..ed27f9f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -151,13 +151,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
- return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, false);
if (TheTriple.isOSWindows()) {
llvm_unreachable("ARM does not support Windows COFF format");
}
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack);
}
static MCInstPrinter *createARMMCInstPrinter(const Target &T,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index faf73ac..9d3da14 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -34,12 +34,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
MCValue Target,
unsigned Log2Size,
uint64_t &FixedValue);
- void RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue);
+ void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue);
public:
ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
@@ -102,34 +102,47 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
Log2Size = llvm::Log2_32(4);
return true;
+ // For movw/movt r_type relocations they always have a pair following them and
+ // the r_length bits are used differently. The encoding of the r_length is as
+ // follows:
+ // low bit of r_length:
+ // 0 - :lower16: for movw instructions
+ // 1 - :upper16: for movt instructions
+ // high bit of r_length:
+ // 0 - arm instructions
+ // 1 - thumb instructions
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 1;
+ return true;
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_HalfDifference);
- // Report as 'long', even though that is not quite accurate.
- Log2Size = llvm::Log2_32(4);
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 3;
return true;
case ARM::fixup_arm_movw_lo16:
case ARM::fixup_arm_movw_lo16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 0;
+ return true;
case ARM::fixup_t2_movw_lo16:
case ARM::fixup_t2_movw_lo16_pcrel:
RelocType = unsigned(macho::RIT_ARM_Half);
- // Report as 'long', even though that is not quite accurate.
- Log2Size = llvm::Log2_32(4);
+ Log2Size = 2;
return true;
}
}
void ARMMachObjectWriter::
-RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup,
- MCValue Target,
- uint64_t &FixedValue) {
+RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Type = macho::RIT_ARM_Half;
@@ -313,10 +326,9 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// scattered relocation entry. Differences always require scattered
// relocations.
if (Target.getSymB()) {
- if (RelocType == macho::RIT_ARM_Half ||
- RelocType == macho::RIT_ARM_HalfDifference)
- return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, FixedValue);
+ if (RelocType == macho::RIT_ARM_Half)
+ return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
+ Fixup, Target, FixedValue);
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
Target, Log2Size, FixedValue);
}
@@ -391,6 +403,30 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
(Log2Size << 25) |
(IsExtern << 27) |
(Type << 28));
+
+ // Even when it's not a scattered relocation, movw/movt always uses
+ // a PAIR relocation.
+ if (Type == macho::RIT_ARM_Half) {
+ // The other-half value only gets populated for the movt relocation.
+ uint32_t Value = 0;;
+ switch ((unsigned)Fixup.getKind()) {
+ default: break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ Value = FixedValue;
+ break;
+ }
+ macho::RelocationEntry MREPair;
+ MREPair.Word0 = Value;
+ MREPair.Word1 = ((0xffffff) |
+ (Log2Size << 25) |
+ (macho::RIT_Pair << 28));
+
+ Writer->addRelocation(Fragment->getParent(), MREPair);
+ }
+
Writer->addRelocation(Fragment->getParent(), MRE);
}
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index a89a663..edd73c2 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "Thumb1FrameLowering.h"
-#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index adaccdd..8cf7cac 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -13,7 +13,6 @@
#include "Thumb1InstrInfo.h"
#include "ARM.h"
-#include "ARMMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 4d97626..27fce9b 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -14,10 +14,10 @@
#ifndef THUMB1INSTRUCTIONINFO_H
#define THUMB1INSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "ARM.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
#include "Thumb1RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
class ARMSubtarget;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 6b8bf0e..ef77bbd 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -12,12 +12,11 @@
//
//===----------------------------------------------------------------------===//
+#include "Thumb1RegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
-#include "Thumb1InstrInfo.h"
-#include "Thumb1RegisterInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 9060e59..6971842 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -16,13 +16,12 @@
#define THUMB1REGISTERINFO_H
#include "ARM.h"
-#include "ARMRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
- class Type;
struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
public:
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 6cb182a..2fe4b85 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -15,7 +15,6 @@
#include "ARM.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
-#include "Thumb2InstrInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index a754649..1ae2ef1 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -14,10 +14,10 @@
#ifndef THUMB2INSTRUCTIONINFO_H
#define THUMB2INSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "ARM.h"
#include "ARMInstrInfo.h"
#include "Thumb2RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
class ARMSubtarget;
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 6d210fe..29a87d0 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -12,10 +12,10 @@
//
//===----------------------------------------------------------------------===//
+#include "Thumb2RegisterInfo.h"
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
-#include "Thumb2InstrInfo.h"
-#include "Thumb2RegisterInfo.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index 824378a..6b397e8 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -16,13 +16,12 @@
#define THUMB2REGISTERINFO_H
#include "ARM.h"
-#include "ARMRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
- class Type;
struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
public:
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 5ee5f42..fb9d93b 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -39,9 +39,9 @@ namespace {
/// ReduceTable - A static table with information on mapping from wide
/// opcodes to narrow
struct ReduceEntry {
- unsigned WideOpc; // Wide opcode
- unsigned NarrowOpc1; // Narrow opcode to transform to
- unsigned NarrowOpc2; // Narrow opcode when it's two-address
+ uint16_t WideOpc; // Wide opcode
+ uint16_t NarrowOpc1; // Narrow opcode to transform to
+ uint16_t NarrowOpc2; // Narrow opcode when it's two-address
uint8_t Imm1Limit; // Limit of immediate field (bits)
uint8_t Imm2Limit; // Limit of immediate field when it's two-address
unsigned LowRegs1 : 1; // Only possible if low-registers are used
@@ -189,7 +189,7 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
}
static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
- for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs)
+ for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
if (*Regs == ARM::CPSR)
return true;
return false;
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 80973b7..b6b209e 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -2392,17 +2392,17 @@ void CWriter::visitSwitchInst(SwitchInst &SI) {
printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
Out << ";\n";
- unsigned NumCases = SI.getNumCases();
// Skip the first item since that's the default case.
- for (unsigned i = 0; i < NumCases; ++i) {
- ConstantInt* CaseVal = SI.getCaseValue(i);
- BasicBlock* Succ = SI.getCaseSuccessor(i);
+ for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
+ ConstantInt* CaseVal = i.getCaseValue();
+ BasicBlock* Succ = i.getCaseSuccessor();
Out << " case ";
writeOperand(CaseVal);
Out << ":\n";
printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
printBranchToBlock(SI.getParent(), Succ, 2);
- if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
+ if (Function::iterator(Succ) ==
+ llvm::next(Function::iterator(SI.getParent())))
Out << " break;\n";
}
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
index 916f9ba..fac806e 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ b/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "SPU.h"
#include "SPUFrameLowering.h"
+#include "SPU.h"
#include "SPUInstrBuilder.h"
#include "SPUInstrInfo.h"
#include "llvm/Function.h"
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 3d2b32d..55b3f72 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -31,14 +31,10 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include <map>
using namespace llvm;
-// Used in getTargetNodeName() below
namespace {
- std::map<unsigned, const char *> node_names;
-
// Byte offset of the preferred slot (counted from the MSB)
int prefslotOffset(EVT VT) {
int retval=0;
@@ -481,40 +477,34 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setSchedulingPreference(Sched::RegPressure);
}
-const char *
-SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
-{
- if (node_names.empty()) {
- node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
- node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
- node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
- node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
- node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
- node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
- node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
- node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
- node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
- node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
- node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
- node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
- node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
- node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
- node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
- node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
- node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
- node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
- node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
- "SPUISD::ROTBYTES_LEFT_BITS";
- node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
- node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
- node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
- node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
- node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
- }
-
- std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
-
- return ((i != node_names.end()) ? i->second : 0);
+const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG";
+ case SPUISD::Hi: return "SPUISD::Hi";
+ case SPUISD::Lo: return "SPUISD::Lo";
+ case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr";
+ case SPUISD::AFormAddr: return "SPUISD::AFormAddr";
+ case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr";
+ case SPUISD::LDRESULT: return "SPUISD::LDRESULT";
+ case SPUISD::CALL: return "SPUISD::CALL";
+ case SPUISD::SHUFB: return "SPUISD::SHUFB";
+ case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK";
+ case SPUISD::CNTB: return "SPUISD::CNTB";
+ case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC";
+ case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT";
+ case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS";
+ case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES";
+ case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL";
+ case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR";
+ case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT";
+ case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS";
+ case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK";
+ case SPUISD::SELB: return "SPUISD::SELB";
+ case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER";
+ case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER";
+ case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER";
+ }
}
//===----------------------------------------------------------------------===//
@@ -1216,7 +1206,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
if (isVarArg) {
// FIXME: we should be able to query the argument registers from
// tablegen generated code.
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
@@ -1230,7 +1220,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
};
// size of ArgRegs array
- unsigned NumArgRegs = 77;
+ const unsigned NumArgRegs = 77;
// We will spill (79-3)+1 registers to the stack
SmallVector<SDValue, 79-3+1> MemOps;
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index e28e2a4..25c5355 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -15,9 +15,9 @@
#ifndef SPU_ISELLOWERING_H
#define SPU_ISELLOWERING_H
+#include "SPU.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "SPU.h"
namespace llvm {
namespace SPUISD {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index f0d21ad..85e5821 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -15,8 +15,8 @@
#define SPU_INSTRUCTIONINFO_H
#include "SPU.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "SPUGenInstrInfo.inc"
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index 92983e1..1b2da5f 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "reginfo"
-#include "SPU.h"
#include "SPURegisterInfo.h"
+#include "SPU.h"
#include "SPUInstrBuilder.h"
#include "SPUSubtarget.h"
#include "SPUMachineFunction.h"
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index e43f5ad..21f6b25 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "SPU.h"
#include "SPUTargetMachine.h"
+#include "SPU.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/Support/DynamicLibrary.h"
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index c179292..3e5d38c 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -23,9 +23,6 @@
#include "llvm/Target/TargetData.h"
namespace llvm {
-class PassManager;
-class GlobalValue;
-class TargetFrameLowering;
/// SPUTargetMachine
///
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 76b5e9c..107c6cc 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1090,10 +1090,10 @@ void CppWriter::printInstruction(const Instruction *I,
<< getOpName(SI->getDefaultDest()) << ", "
<< SI->getNumCases() << ", " << bbname << ");";
nl(Out);
- unsigned NumCases = SI->getNumCases();
- for (unsigned i = 0; i < NumCases; ++i) {
- const ConstantInt* CaseVal = SI->getCaseValue(i);
- const BasicBlock *BB = SI->getCaseSuccessor(i);
+ for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ const ConstantInt* CaseVal = i.getCaseValue();
+ const BasicBlock *BB = i.getCaseSuccessor();
Out << iName << "->addCase("
<< getOpName(CaseVal) << ", "
<< getOpName(BB) << ");";
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index bbefcaf..270c7a7 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -15,7 +15,6 @@
#ifndef TARGET_Hexagon_H
#define TARGET_Hexagon_H
-#include <cassert>
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/Target/TargetLowering.h"
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 688b8e3..bf333b7 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -32,11 +32,11 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -46,8 +46,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
index 71787de..46c20e9 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -14,13 +14,13 @@
//===----------------------------------------------------------------------===//
#include "HexagonCallingConvLower.h"
+#include "Hexagon.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "Hexagon.h"
using namespace llvm;
Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 49c6cdf..e8a6924 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -8,13 +8,13 @@
//
//===----------------------------------------------------------------------===//
+#include "HexagonFrameLowering.h"
#include "Hexagon.h"
#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
#include "HexagonMachineFunctionInfo.h"
-#include "HexagonFrameLowering.h"
#include "llvm/Function.h"
#include "llvm/Type.h"
#include "llvm/ADT/BitVector.h"
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 04ea4ed..57772a5 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -27,6 +27,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "hwloops"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
#include "llvm/Constants.h"
#include "llvm/PassSupport.h"
#include "llvm/ADT/DenseMap.h"
@@ -43,8 +45,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include <algorithm>
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index ed4b840..d6da0d0 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -28,17 +28,16 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "HexagonMachineFunctionInfo.h"
#include "llvm/Support/CommandLine.h"
+using namespace llvm;
const unsigned Hexagon_MAX_RET_SIZE = 64;
-using namespace llvm;
static cl::opt<bool>
EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
@@ -159,7 +158,7 @@ static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
- static const unsigned RegList[] = {
+ static const uint16_t RegList[] = {
Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
Hexagon::R5
};
@@ -182,10 +181,10 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
return false;
}
- static const unsigned RegList1[] = {
+ static const uint16_t RegList1[] = {
Hexagon::D1, Hexagon::D2
};
- static const unsigned RegList2[] = {
+ static const uint16_t RegList2[] = {
Hexagon::R1, Hexagon::R3
};
if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) {
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 5396486..4208bcb 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -15,10 +15,10 @@
#ifndef Hexagon_ISELLOWERING_H
#define Hexagon_ISELLOWERING_H
+#include "Hexagon.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
-#include "Hexagon.h"
namespace llvm {
namespace HexagonISD {
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 07872d4..3d7ace5 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "HexagonRegisterInfo.h"
#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "Hexagon.h"
#include "llvm/ADT/STLExtras.h"
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index eb088c3..7306870 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -14,10 +14,10 @@
#ifndef HexagonINSTRUCTIONINFO_H
#define HexagonINSTRUCTIONINFO_H
+#include "HexagonRegisterInfo.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "HexagonRegisterInfo.h"
#define GET_INSTRINFO_HEADER
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 06c732f..55cbc09 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -36,6 +36,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "hexagon-peephole"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
#include "llvm/Constants.h"
#include "llvm/PassSupport.h"
#include "llvm/ADT/DenseMap.h"
@@ -45,16 +47,13 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include <algorithm>
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
-
-#include "llvm/Support/CommandLine.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index c481270..2a9de92 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Hexagon.h"
#include "HexagonRegisterInfo.h"
+#include "Hexagon.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
#include "HexagonMachineFunctionInfo.h"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index fc65305..6cf727b 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -16,9 +16,10 @@
#define HexagonREGISTERINFO_H
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/MC/MachineLocation.h"
+
#define GET_REGINFO_HEADER
#include "HexagonGenRegisterInfo.inc"
-#include "llvm/MC/MachineLocation.h"
//
// We try not to hard code the reserved registers in our code,
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 319eab2..b9e6894 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -50,7 +50,7 @@ extern "C" void LLVMInitializeHexagonTarget() {
///
HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- TargetOptions Options,
+ const TargetOptions &Options,
Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL)
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index 70bea56..0336965 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -14,13 +14,13 @@
#ifndef HexagonTARGETMACHINE_H
#define HexagonTARGETMACHINE_H
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
#include "HexagonISelLowering.h"
#include "HexagonSelectionDAGInfo.h"
#include "HexagonFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
namespace llvm {
@@ -37,8 +37,9 @@ class HexagonTargetMachine : public LLVMTargetMachine {
public:
HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
- StringRef FS, TargetOptions Options, Reloc::Model RM,
- CodeModel::Model CM, CodeGenOpt::Level OL);
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const HexagonInstrInfo *getInstrInfo() const {
return &InstrInfo;
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index d3ce5a6..32cc709 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -11,6 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "HexagonTargetObjectFile.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Target/TargetData.h"
@@ -18,9 +21,6 @@
#include "llvm/MC/MCContext.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/CommandLine.h"
-#include "HexagonSubtarget.h"
-#include "HexagonTargetObjectFile.h"
-#include "HexagonTargetMachine.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 74abc56..3cfa4fd 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- HexagonMCTargetDesc.cpp - Cell Hexagon Target Descriptions --------===//
+//===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file provides Cell Hexagon specific target descriptions.
+// This file provides Hexagon specific target descriptions.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 364841f..b18d23a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -1,4 +1,4 @@
-//===-- SPUMCTargetDesc.h - Hexagon Target Descriptions ---------*- C++ -*-===//
+//===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPUMCTARGETDESC_H
-#define SPUMCTARGETDESC_H
+#ifndef HEXAGONMCTARGETDESC_H
+#define HEXAGONMCTARGETDESC_H
namespace llvm {
class MCSubtargetInfo;
diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
index 1114d99..73c7e01 100644
--- a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;===- ./lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
diff --git a/lib/Target/Hexagon/MCTargetDesc/Makefile b/lib/Target/Hexagon/MCTargetDesc/Makefile
index 67be2bc..885be2d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/Makefile
+++ b/lib/Target/Hexagon/MCTargetDesc/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
+##===- lib/Target/Hexagon/TargetDesc/Makefile --------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index adedf93..6b958c8 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -34,9 +34,9 @@ extern const MCInstrDesc MBlazeInsts[];
using namespace llvm;
-const unsigned UNSUPPORTED = -1;
+const uint16_t UNSUPPORTED = -1;
-static const unsigned mblazeBinary2Opcode[] = {
+static const uint16_t mblazeBinary2Opcode[] = {
MBlaze::ADD, MBlaze::RSUB, MBlaze::ADDC, MBlaze::RSUBC, //00,01,02,03
MBlaze::ADDK, MBlaze::RSUBK, MBlaze::ADDKC, MBlaze::RSUBKC, //04,05,06,07
MBlaze::ADDI, MBlaze::RSUBI, MBlaze::ADDIC, MBlaze::RSUBIC, //08,09,0A,0B
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h
index 8be15bf..01e6578 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.h
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -15,11 +15,10 @@
#define MBLAZE_FRAMEINFO_H
#include "MBlaze.h"
-#include "MBlazeSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
- class MBlazeSubtarget;
+class MBlazeSubtarget;
class MBlazeFrameLowering : public TargetFrameLowering {
protected:
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 23c8e13..9ef6bb6 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -657,7 +657,7 @@ static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
MBlaze::R5, MBlaze::R6, MBlaze::R7,
MBlaze::R8, MBlaze::R9, MBlaze::R10
};
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index 168694b..6a79fc1 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -15,11 +15,11 @@
#ifndef MBlazeISELLOWERING_H
#define MBlazeISELLOWERING_H
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
-#include "MBlaze.h"
-#include "MBlazeSubtarget.h"
namespace llvm {
namespace MBlazeCC {
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index a309d2b..5252147 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -15,9 +15,9 @@
#define MBLAZEINSTRUCTIONINFO_H
#include "MBlaze.h"
+#include "MBlazeRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "MBlazeRegisterInfo.h"
#define GET_INSTRINFO_HEADER
#include "MBlazeGenInstrInfo.inc"
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.h b/lib/Target/MBlaze/MBlazeMCInstLower.h
index bb77ed4..7b97744 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.h
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.h
@@ -14,7 +14,6 @@
namespace llvm {
class AsmPrinter;
- class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 6801a1a..46f5207 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -14,9 +14,9 @@
#define DEBUG_TYPE "mblaze-frame-info"
+#include "MBlazeRegisterInfo.h"
#include "MBlaze.h"
#include "MBlazeSubtarget.h"
-#include "MBlazeRegisterInfo.h"
#include "MBlazeMachineFunction.h"
#include "llvm/Constants.h"
#include "llvm/Type.h"
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 5c07424..dd7de9b 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MBlaze.h"
#include "MBlazeTargetMachine.h"
+#include "MBlaze.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index fd5de34..c03ba47 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MSP430.h"
#include "MSP430InstrInfo.h"
+#include "MSP430.h"
#include "MSP430MachineFunctionInfo.h"
#include "MSP430TargetMachine.h"
#include "llvm/Function.h"
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index fe2a75c..04f339b 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -14,8 +14,8 @@
#ifndef LLVM_TARGET_MSP430INSTRINFO_H
#define LLVM_TARGET_MSP430INSTRINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "MSP430RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "MSP430GenInstrInfo.inc"
diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h
index 297efd2..24151e2 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/MSP430MCInstLower.h
@@ -14,7 +14,6 @@
namespace llvm {
class AsmPrinter;
- class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index f9ddfb3..51ec71a 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -13,9 +13,9 @@
#define DEBUG_TYPE "msp430-reg-info"
+#include "MSP430RegisterInfo.h"
#include "MSP430.h"
#include "MSP430MachineFunctionInfo.h"
-#include "MSP430RegisterInfo.h"
#include "MSP430TargetMachine.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index e7bebbd..4d8792e 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -15,12 +15,11 @@
#define LLVM_TARGET_MSP430_SUBTARGET_H
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "MSP430GenSubtargetInfo.inc"
-#include <string>
-
namespace llvm {
class StringRef;
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index af62e48..9f2eda1 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MSP430.h"
#include "MSP430TargetMachine.h"
+#include "MSP430.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index d69570b..9d5a2f1 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -92,25 +92,42 @@ public:
if (!Value)
return; // Doesn't change encoding.
+ // Where do we start in the object
unsigned Offset = Fixup.getOffset();
- // FIXME: The below code will not work across endian models
- // How many bytes/bits are we fixing up?
- unsigned NumBytes = ((getFixupKindInfo(Kind).TargetSize-1)/8)+1;
- uint64_t Mask = ((uint64_t)1 << getFixupKindInfo(Kind).TargetSize) - 1;
+ // Number of bytes we need to fixup
+ unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
+ // Used to point to big endian bytes
+ unsigned FullSize;
+
+ switch ((unsigned)Kind) {
+ case Mips::fixup_Mips_16:
+ FullSize = 2;
+ break;
+ case Mips::fixup_Mips_64:
+ FullSize = 8;
+ break;
+ default:
+ FullSize = 4;
+ break;
+ }
// Grab current value, if any, from bits.
uint64_t CurVal = 0;
- for (unsigned i = 0; i != NumBytes; ++i)
- CurVal |= ((uint8_t)Data[Offset + i]) << (i * 8);
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = IsLittle ? i : (FullSize - 1 - i);
+ CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8);
+ }
+
+ uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize));
CurVal = (CurVal & ~Mask) | ((CurVal + Value) & Mask);
- // Write out the bytes back to the code/data bits.
- // First the unaffected bits and then the fixup.
+ // Write out the fixed up bytes back to the code/data bits.
for (unsigned i = 0; i != NumBytes; ++i) {
- Data[Offset + i] = uint8_t((CurVal >> (i * 8)) & 0xff);
+ unsigned Idx = IsLittle ? i : (FullSize - 1 - i);
+ Data[Offset + Idx] = (uint8_t)((CurVal >> (i*8)) & 0xff);
}
-}
+ }
unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; }
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index b039678..9ebb6d2 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -49,9 +49,9 @@ public:
void EmitInstruction(uint64_t Val, unsigned Size, raw_ostream &OS) const {
// Output the instruction encoding in little endian byte order.
- for (unsigned i = 0; i != Size; ++i) {
- EmitByte(Val & 255, OS);
- Val >>= 8;
+ for (unsigned i = 0; i < Size; ++i) {
+ unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
+ EmitByte((Val >> Shift) & 0xff, OS);
}
}
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index bacecf2..bafadc8 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -21,8 +21,6 @@
namespace llvm {
class MipsTargetMachine;
class FunctionPass;
- class MachineCodeEmitter;
- class formatted_raw_ostream;
FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
index 31b669a..dc8fbd0 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.cpp
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
@@ -26,28 +26,28 @@ void MipsAnalyzeImmediate::AddInstr(InstSeqLs &SeqLs, const Inst &I) {
Iter->push_back(I);
}
-void MipsAnalyzeImmediate::GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize,
+void MipsAnalyzeImmediate::GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize,
InstSeqLs &SeqLs) {
- GetInstSeqLs((Imm + 0x8000) & ~0xffff, RemSize, SeqLs);
- AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffff));
+ GetInstSeqLs((Imm + 0x8000ULL) & 0xffffffffffff0000ULL, RemSize, SeqLs);
+ AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffffULL));
}
-void MipsAnalyzeImmediate::GetInstSeqLsORi(int64_t Imm, unsigned RemSize,
+void MipsAnalyzeImmediate::GetInstSeqLsORi(uint64_t Imm, unsigned RemSize,
InstSeqLs &SeqLs) {
- GetInstSeqLs(Imm & ~0xffff, RemSize, SeqLs);
- AddInstr(SeqLs, Inst(ORi, Imm & 0xffff));
+ GetInstSeqLs(Imm & 0xffffffffffff0000ULL, RemSize, SeqLs);
+ AddInstr(SeqLs, Inst(ORi, Imm & 0xffffULL));
}
-void MipsAnalyzeImmediate::GetInstSeqLsSLL(int64_t Imm, unsigned RemSize,
+void MipsAnalyzeImmediate::GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize,
InstSeqLs &SeqLs) {
unsigned Shamt = CountTrailingZeros_64(Imm);
GetInstSeqLs(Imm >> Shamt, RemSize - Shamt, SeqLs);
AddInstr(SeqLs, Inst(SLL, Shamt));
}
-void MipsAnalyzeImmediate::GetInstSeqLs(int64_t Imm, unsigned RemSize,
+void MipsAnalyzeImmediate::GetInstSeqLs(uint64_t Imm, unsigned RemSize,
InstSeqLs &SeqLs) {
- int64_t MaskedImm = Imm & (((uint64_t)-1) >> (64 - Size));
+ uint64_t MaskedImm = Imm & (0xffffffffffffffffULL >> (64 - Size));
// Do nothing if Imm is 0.
if (!MaskedImm)
@@ -122,7 +122,7 @@ void MipsAnalyzeImmediate::GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts) {
}
const MipsAnalyzeImmediate::InstSeq
-&MipsAnalyzeImmediate::Analyze(int64_t Imm, unsigned Size,
+&MipsAnalyzeImmediate::Analyze(uint64_t Imm, unsigned Size,
bool LastInstrIsADDiu) {
this->Size = Size;
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h
index 24e6e5f..a094dda 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.h
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -25,7 +25,7 @@ namespace llvm {
/// Analyze - Get an instrucion sequence to load immediate Imm. The last
/// instruction in the sequence must be an ADDiu if LastInstrIsADDiu is
/// true;
- const InstSeq &Analyze(int64_t Imm, unsigned Size, bool LastInstrIsADDiu);
+ const InstSeq &Analyze(uint64_t Imm, unsigned Size, bool LastInstrIsADDiu);
private:
typedef SmallVector<InstSeq, 5> InstSeqLs;
@@ -34,18 +34,18 @@ namespace llvm {
/// GetInstSeqLsADDiu - Get instrucion sequences which end with an ADDiu to
/// load immediate Imm
- void GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+ void GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
/// GetInstSeqLsORi - Get instrucion sequences which end with an ORi to
/// load immediate Imm
- void GetInstSeqLsORi(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+ void GetInstSeqLsORi(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
/// GetInstSeqLsSLL - Get instrucion sequences which end with a SLL to
/// load immediate Imm
- void GetInstSeqLsSLL(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+ void GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
/// GetInstSeqLs - Get instrucion sequences to load immediate Imm.
- void GetInstSeqLs(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+ void GetInstSeqLs(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
/// ReplaceADDiuSLLWithLUi - Replace an ADDiu & SLL pair with a LUi.
void ReplaceADDiuSLLWithLUi(InstSeq &Seq);
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index aeabc0f..f2b842a 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-asm-printer"
-#include "Mips.h"
#include "MipsAsmPrinter.h"
+#include "Mips.h"
#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
#include "MipsMCInstLower.h"
@@ -34,8 +34,6 @@
#include "llvm/Instructions.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 8502db2..473da7e 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -22,9 +22,9 @@
namespace llvm {
class MCStreamer;
class MachineInstr;
-class raw_ostream;
class MachineBasicBlock;
class Module;
+class raw_ostream;
class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index e83c64e..ebfbb4a 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsAnalyzeImmediate.h"
#include "MipsFrameLowering.h"
+#include "MipsAnalyzeImmediate.h"
#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
#include "MCTargetDesc/MipsBaseInfo.h"
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 782d203..536879e 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -99,6 +99,8 @@ private:
return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
}
+ void ProcessFunctionAfterISel(MachineFunction &MF);
+ bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
void InitGlobalBaseReg(MachineFunction &MF);
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@@ -181,10 +183,57 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
}
}
+bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
+ const MachineInstr& MI) {
+ unsigned DstReg = 0, ZeroReg = 0;
+
+ // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
+ if ((MI.getOpcode() == Mips::ADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO;
+ } else if ((MI.getOpcode() == Mips::DADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO_64;
+ }
+
+ if (!DstReg)
+ return false;
+
+ // Replace uses with ZeroReg.
+ for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
+ E = MRI->use_end(); U != E; ++U) {
+ MachineOperand &MO = U.getOperand();
+ MachineInstr *MI = MO.getParent();
+
+ // Do not replace if it is a phi's operand or is tied to def operand.
+ if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo()))
+ continue;
+
+ MO.setReg(ZeroReg);
+ }
+
+ return true;
+}
+
+void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) {
+ InitGlobalBaseReg(MF);
+
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
+ ++MFI)
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+ ReplaceUsesWithZeroReg(MRI, *I);
+}
+
bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
- InitGlobalBaseReg(MF);
+ ProcessFunctionAfterISel(MF);
return Ret;
}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index dc894d9..ecde5b6 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -18,13 +18,13 @@
#include "MipsTargetMachine.h"
#include "MipsTargetObjectFile.h"
#include "MipsSubtarget.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Intrinsics.h"
#include "llvm/CallingConv.h"
-#include "InstPrinter/MipsInstPrinter.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -130,22 +130,32 @@ MipsTargetLowering(MipsTargetMachine &TM)
// Mips Custom Operations
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
- setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
- setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
- setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
- setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ if (HasMips64) {
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::i64, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+ }
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i32, Expand);
@@ -185,8 +195,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
@@ -214,9 +222,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
-
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
@@ -246,11 +251,11 @@ MipsTargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::SUBE);
setTargetDAGCombine(ISD::SDIVREM);
setTargetDAGCombine(ISD::UDIVREM);
- setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
- setMinFunctionAlignment(2);
+ setMinFunctionAlignment(HasMips64 ? 3 : 2);
setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP);
computeRegisterProperties();
@@ -559,21 +564,37 @@ static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
True.getValueType(), True, False, Cond);
}
-static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0));
+ SDValue SetCC = N->getOperand(0);
- if (Cond.getOpcode() != MipsISD::FPCmp)
+ if ((SetCC.getOpcode() != ISD::SETCC) ||
+ !SetCC.getOperand(0).getValueType().isInteger())
return SDValue();
- SDValue True = DAG.getConstant(1, MVT::i32);
- SDValue False = DAG.getConstant(0, MVT::i32);
+ SDValue False = N->getOperand(2);
+ EVT FalseTy = False.getValueType();
- return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc());
+ if (!FalseTy.isInteger())
+ return SDValue();
+
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(False);
+
+ if (!CN || CN->getZExtValue())
+ return SDValue();
+
+ const DebugLoc DL = N->getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
+ SDValue True = N->getOperand(1);
+
+ SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0),
+ SetCC.getOperand(1), ISD::getSetCCInverse(CC, true));
+
+ return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True);
}
static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
@@ -684,8 +705,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
case ISD::SDIVREM:
case ISD::UDIVREM:
return PerformDivRemCombine(N, DAG, DCI, Subtarget);
- case ISD::SETCC:
- return PerformSETCCCombine(N, DAG, DCI, Subtarget);
+ case ISD::SELECT:
+ return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case ISD::AND:
return PerformANDCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
@@ -708,6 +729,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
@@ -1475,6 +1497,18 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG) const
Op.getDebugLoc());
}
+SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = CreateFPCmp(DAG, Op);
+
+ assert(Cond.getOpcode() == MipsISD::FPCmp &&
+ "Floating point operand expected.");
+
+ SDValue True = DAG.getConstant(1, MVT::i32);
+ SDValue False = DAG.getConstant(0, MVT::i32);
+
+ return CreateCMovFP(DAG, Cond, True, False, Op.getDebugLoc());
+}
+
SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
// FIXME there isn't actually debug info here
@@ -1841,13 +1875,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
static const unsigned IntRegsSize=4, FloatRegsSize=2;
- static const unsigned IntRegs[] = {
+ static const uint16_t IntRegs[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
- static const unsigned F32Regs[] = {
+ static const uint16_t F32Regs[] = {
Mips::F12, Mips::F14
};
- static const unsigned F64Regs[] = {
+ static const uint16_t F64Regs[] = {
Mips::D6, Mips::D7
};
@@ -1926,10 +1960,10 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
return false; // CC must always match
}
-static const unsigned Mips64IntRegs[8] =
+static const uint16_t Mips64IntRegs[8] =
{Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64,
Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64};
-static const unsigned Mips64DPRegs[8] =
+static const uint16_t Mips64DPRegs[8] =
{Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64};
@@ -1996,7 +2030,7 @@ AnalyzeMips64CallOperands(CCState &CCInfo,
static const unsigned O32IntRegsSize = 4;
-static const unsigned O32IntRegs[] = {
+static const uint16_t O32IntRegs[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
@@ -2115,9 +2149,9 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
if (!IsRegLoc)
LocMemOffset = VA.getLocMemOffset();
else {
- const unsigned *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8,
+ const uint16_t *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8,
VA.getLocReg());
- const unsigned *RegEnd = Mips64IntRegs + 8;
+ const uint16_t *RegEnd = Mips64IntRegs + 8;
// Copy double words to registers.
for (; (Reg != RegEnd) && (ByValSize >= Offset + 8); ++Reg, Offset += 8) {
@@ -2540,7 +2574,7 @@ CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl,
MachineFrameInfo *MFI, bool IsRegLoc,
SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI,
EVT PtrTy) {
- const unsigned *Reg = Mips64IntRegs + 8;
+ const uint16_t *Reg = Mips64IntRegs + 8;
int FOOffset; // Frame object offset from virtual frame pointer.
if (IsRegLoc) {
@@ -2709,7 +2743,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
if (isVarArg) {
unsigned NumOfRegs = IsO32 ? 4 : 8;
- const unsigned *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs;
+ const uint16_t *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs;
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs);
int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot.
const TargetRegisterClass *RC
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 621bbec..66f45cd 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -15,10 +15,10 @@
#ifndef MipsISELLOWERING_H
#define MipsISELLOWERING_H
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
#include "Mips.h"
#include "MipsSubtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
namespace llvm {
namespace MipsISD {
@@ -128,6 +128,7 @@ namespace llvm {
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 10caf30..4be727d 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -15,9 +15,9 @@
#define MIPSINSTRUCTIONINFO_H
#include "Mips.h"
+#include "MipsRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "MipsRegisterInfo.h"
#define GET_INSTRINFO_HEADER
#include "MipsGenInstrInfo.inc"
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index be65298..0d51298 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -12,9 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#include "MipsMCInstLower.h"
#include "MipsAsmPrinter.h"
#include "MipsInstrInfo.h"
-#include "MipsMCInstLower.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index cbd5264..20bb338 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -14,11 +14,9 @@
#include "llvm/Support/Compiler.h"
namespace llvm {
- class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
- class MCSymbol;
class MachineInstr;
class MachineFunction;
class Mangler;
@@ -38,7 +36,7 @@ public:
void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
void LowerUnalignedLoadStore(const MachineInstr *MI,
- SmallVector<MCInst, 4>& MCInsts);
+ SmallVector<MCInst, 4>& MCInsts);
void LowerSETGP01(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 57ff069..abb5404 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -14,10 +14,10 @@
#ifndef MIPS_MACHINE_FUNCTION_INFO_H
#define MIPS_MACHINE_FUNCTION_INFO_H
-#include <utility>
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include <utility>
namespace llvm {
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index e0ecba2..5cfda34 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -13,10 +13,10 @@
#define DEBUG_TYPE "mips-reg-info"
+#include "MipsRegisterInfo.h"
#include "Mips.h"
#include "MipsAnalyzeImmediate.h"
#include "MipsSubtarget.h"
-#include "MipsRegisterInfo.h"
#include "MipsMachineFunction.h"
#include "llvm/Constants.h"
#include "llvm/Type.h"
@@ -83,12 +83,12 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const
BitVector MipsRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
- static const unsigned ReservedCPURegs[] = {
+ static const uint16_t ReservedCPURegs[] = {
Mips::ZERO, Mips::AT, Mips::K0, Mips::K1,
Mips::SP, Mips::FP, Mips::RA
};
- static const unsigned ReservedCPU64Regs[] = {
+ static const uint16_t ReservedCPU64Regs[] = {
Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64,
Mips::SP_64, Mips::FP_64, Mips::RA_64
};
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 8806aaf..ad02231 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Mips.h"
#include "MipsTargetMachine.h"
+#include "Mips.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 19ae142..80c00e8 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -14,15 +14,15 @@
#ifndef MIPSTARGETMACHINE_H
#define MIPSTARGETMACHINE_H
-#include "MipsSubtarget.h"
+#include "MipsFrameLowering.h"
#include "MipsInstrInfo.h"
#include "MipsISelLowering.h"
-#include "MipsFrameLowering.h"
+#include "MipsJITInfo.h"
#include "MipsSelectionDAGInfo.h"
+#include "MipsSubtarget.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "MipsJITInfo.h"
namespace llvm {
class formatted_raw_ostream;
diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
index 77a298d..a3e0f32 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
@@ -17,9 +17,9 @@
#ifndef PTXBASEINFO_H
#define PTXBASEINFO_H
+#include "PTXMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "PTXMCTargetDesc.h"
namespace llvm {
namespace PTXStateSpace {
diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h
index 7d46cce..ffb92cb 100644
--- a/lib/Target/PTX/PTX.h
+++ b/lib/Target/PTX/PTX.h
@@ -1,4 +1,3 @@
-//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index 58ac5f2..0b6ac7b 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -14,8 +14,8 @@
#define DEBUG_TYPE "ptx-asm-printer"
-#include "PTX.h"
#include "PTXAsmPrinter.h"
+#include "PTX.h"
#include "PTXMachineFunctionInfo.h"
#include "PTXParamManager.h"
#include "PTXRegisterInfo.h"
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index e5d4edc..db1c953 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PTX.h"
#include "PTXISelLowering.h"
+#include "PTX.h"
#include "PTXMachineFunctionInfo.h"
#include "PTXRegisterInfo.h"
#include "PTXSubtarget.h"
diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h
index fd20982..33220f4 100644
--- a/lib/Target/PTX/PTXISelLowering.h
+++ b/lib/Target/PTX/PTXISelLowering.h
@@ -18,8 +18,6 @@
#include "llvm/Target/TargetLowering.h"
namespace llvm {
-class PTXSubtarget;
-class PTXTargetMachine;
namespace PTXISD {
enum NodeType {
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index 9d6cbf1..443cd54 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -13,8 +13,8 @@
#define DEBUG_TYPE "ptx-instrinfo"
-#include "PTX.h"
#include "PTXInstrInfo.h"
+#include "PTX.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
diff --git a/lib/Target/PTX/PTXParamManager.cpp b/lib/Target/PTX/PTXParamManager.cpp
index 74538e6..cc1cc71 100644
--- a/lib/Target/PTX/PTXParamManager.cpp
+++ b/lib/Target/PTX/PTXParamManager.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PTX.h"
#include "PTXParamManager.h"
+#include "PTX.h"
#include "llvm/ADT/StringExtras.h"
using namespace llvm;
diff --git a/lib/Target/PTX/PTXParamManager.h b/lib/Target/PTX/PTXParamManager.h
index 32342f7..92e7728 100644
--- a/lib/Target/PTX/PTXParamManager.h
+++ b/lib/Target/PTX/PTXParamManager.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include <string>
namespace llvm {
diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp
index 3f087cd..b6ffd38 100644
--- a/lib/Target/PTX/PTXRegisterInfo.cpp
+++ b/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PTX.h"
#include "PTXRegisterInfo.h"
+#include "PTX.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index 9305377..40835d0 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PTX.h"
#include "PTXTargetMachine.h"
+#include "PTX.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
@@ -26,6 +26,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
@@ -37,8 +38,6 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 02dad45..9c6eefe 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAsmBackend.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 5dc1863..24a7178 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -25,14 +25,11 @@
namespace llvm {
class PPCTargetMachine;
class FunctionPass;
- class formatted_raw_ostream;
class JITCodeEmitter;
- class Target;
class MachineInstr;
class AsmPrinter;
class MCInst;
- class TargetMachine;
-
+
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 591ae02..4abb469 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -20,6 +20,7 @@
#include "PPC.h"
#include "PPCTargetMachine.h"
#include "PPCSubtarget.h"
+#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
@@ -53,7 +54,6 @@
#include "llvm/Support/ELF.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/SmallString.h"
-#include "InstPrinter/PPCInstPrinter.h"
using namespace llvm;
namespace {
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 8efc9c1..9883c2e 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -130,3 +130,34 @@ def CC_PPC_SVR4_ByVal : CallingConv<[
CCCustom<"CC_PPC_SVR4_Custom_Dummy">
]>;
+def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20, VRSAVE,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAVE,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 6d612f7..b77a80b 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
/// VRRegNo - Map from a numbered VR register to its enum value.
///
-static const unsigned short VRRegNo[] = {
+static const uint16_t VRRegNo[] = {
PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 95d0d64..d80a385 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -14,10 +14,10 @@
#ifndef PPCHAZRECS_H
#define PPCHAZRECS_H
+#include "PPCInstrInfo.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "PPCInstrInfo.h"
namespace llvm {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index bfed7ba..85b5bc1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16,6 +16,11 @@
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,16 +29,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
@@ -1547,7 +1547,7 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
@@ -1574,7 +1574,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
@@ -1598,8 +1598,8 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
/// GetFPR - Get the set of FP registers that should be allocated for arguments,
/// on Darwin.
-static const unsigned *GetFPR() {
- static const unsigned FPR[] = {
+static const uint16_t *GetFPR() {
+ static const uint16_t FPR[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
};
@@ -1780,13 +1780,13 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- static const unsigned GPArgRegs[] = {
+ static const uint16_t GPArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
- static const unsigned FPArgRegs[] = {
+ static const uint16_t FPArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
@@ -1879,18 +1879,18 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
- static const unsigned GPR_32[] = { // 32-bit registers.
+ static const uint16_t GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const unsigned GPR_64[] = { // 64-bit registers.
+ static const uint16_t GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const unsigned *FPR = GetFPR();
+ static const uint16_t *FPR = GetFPR();
- static const unsigned VR[] = {
+ static const uint16_t VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
@@ -1901,7 +1901,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
// In 32-bit non-varargs functions, the stack space for vectors is after the
// stack space for non-vectors. We do not use this space unless we have
@@ -2769,6 +2769,12 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
(CallConv == CallingConv::Fast &&
getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -3141,17 +3147,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- static const unsigned GPR_32[] = { // 32-bit registers.
+ static const uint16_t GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const unsigned GPR_64[] = { // 64-bit registers.
+ static const uint16_t GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const unsigned *FPR = GetFPR();
+ static const uint16_t *FPR = GetFPR();
- static const unsigned VR[] = {
+ static const uint16_t VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
@@ -3159,7 +3165,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
- const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 3534e9c..2e046c4 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -15,10 +15,10 @@
#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/CodeGen/SelectionDAG.h"
#include "PPC.h"
#include "PPCSubtarget.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
namespace llvm {
namespace PPCISD {
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 02bffed..78f3596 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -64,13 +64,7 @@ let Defs = [LR8] in
PPC970_Unit_BRU;
// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7,
- // All calls clobber the PPC64 non-callee saved registers.
- Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
- F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
- V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
- LR8,CTR8,
- CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8_Darwin : IForm<18, 0, 1,
@@ -90,13 +84,7 @@ let isCall = 1, PPC970_Unit = 7,
// ELF 64 ABI Calls = Darwin ABI Calls
// Used to define BL8_ELF and BLA8_ELF
-let isCall = 1, PPC970_Unit = 7,
- // All calls clobber the PPC64 non-callee saved registers.
- Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
- F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
- V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
- LR8,CTR8,
- CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8_ELF : IForm<18, 0, 1,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index e5f171d..7d49aa1 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -15,8 +15,8 @@
#define POWERPC_INSTRUCTIONINFO_H
#include "PPC.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "PPCRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "PPCGenInstrInfo.inc"
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index e234012..939b71a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -438,13 +438,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
}
// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7,
- // All calls clobber the non-callee saved registers...
- Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
- F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
- V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
- LR,CTR,
- CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL_Darwin : IForm<18, 0, 1,
@@ -463,13 +457,7 @@ let isCall = 1, PPC970_Unit = 7,
}
// SVR4 ABI Calls.
-let isCall = 1, PPC970_Unit = 7,
- // All calls clobber the non-callee saved registers...
- Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
- F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
- V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
- LR,CTR,
- CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL_SVR4 : IForm<18, 0, 1,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 306cc1f..2976f01 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -13,10 +13,10 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "reginfo"
+#include "PPCRegisterInfo.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
-#include "PPCRegisterInfo.h"
#include "PPCFrameLowering.h"
#include "PPCSubtarget.h"
#include "llvm/CallingConv.h"
@@ -100,104 +100,20 @@ PPCRegisterInfo::getPointerRegClass(unsigned Kind) const {
const uint16_t*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- // 32-bit Darwin calling convention.
- static const uint16_t Darwin32_CalleeSavedRegs[] = {
- PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31,
-
- PPC::F14, PPC::F15, PPC::F16, PPC::F17,
- PPC::F18, PPC::F19, PPC::F20, PPC::F21,
- PPC::F22, PPC::F23, PPC::F24, PPC::F25,
- PPC::F26, PPC::F27, PPC::F28, PPC::F29,
- PPC::F30, PPC::F31,
-
- PPC::CR2, PPC::CR3, PPC::CR4,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-
- PPC::LR, 0
- };
-
- // 32-bit SVR4 calling convention.
- static const uint16_t SVR4_CalleeSavedRegs[] = {
- PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31,
-
- PPC::F14, PPC::F15, PPC::F16, PPC::F17,
- PPC::F18, PPC::F19, PPC::F20, PPC::F21,
- PPC::F22, PPC::F23, PPC::F24, PPC::F25,
- PPC::F26, PPC::F27, PPC::F28, PPC::F29,
- PPC::F30, PPC::F31,
-
- PPC::CR2, PPC::CR3, PPC::CR4,
-
- PPC::VRSAVE,
-
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-
- 0
- };
- // 64-bit Darwin calling convention.
- static const uint16_t Darwin64_CalleeSavedRegs[] = {
- PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31,
-
- PPC::F14, PPC::F15, PPC::F16, PPC::F17,
- PPC::F18, PPC::F19, PPC::F20, PPC::F21,
- PPC::F22, PPC::F23, PPC::F24, PPC::F25,
- PPC::F26, PPC::F27, PPC::F28, PPC::F29,
- PPC::F30, PPC::F31,
-
- PPC::CR2, PPC::CR3, PPC::CR4,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-
- PPC::LR8, 0
- };
-
- // 64-bit SVR4 calling convention.
- static const uint16_t SVR4_64_CalleeSavedRegs[] = {
- PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31,
-
- PPC::F14, PPC::F15, PPC::F16, PPC::F17,
- PPC::F18, PPC::F19, PPC::F20, PPC::F21,
- PPC::F22, PPC::F23, PPC::F24, PPC::F25,
- PPC::F26, PPC::F27, PPC::F28, PPC::F29,
- PPC::F30, PPC::F31,
-
- PPC::CR2, PPC::CR3, PPC::CR4,
-
- PPC::VRSAVE,
+ if (Subtarget.isDarwinABI())
+ return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
+ CSR_Darwin32_SaveList;
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+ return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
+}
- 0
- };
-
+const unsigned*
+PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs :
- Darwin32_CalleeSavedRegs;
+ return Subtarget.isPPC64() ? CSR_Darwin64_RegMask :
+ CSR_Darwin32_RegMask;
- return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
+ return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
}
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 6ce90bc..b1e6a72 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -42,6 +42,7 @@ public:
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+ const unsigned *getCallPreservedMask(CallingConv::ID CC) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index da20274..ba9c779 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "PPCTargetMachine.h"
+#include "PPC.h"
#include "llvm/PassManager.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/CodeGen/Passes.h"
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 6dd11c9..7da2b0c 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -24,8 +24,6 @@
#include "llvm/Target/TargetData.h"
namespace llvm {
-class PassManager;
-class GlobalValue;
/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
///
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
index 1423b1e..9a729bd 100644
--- a/lib/Target/Sparc/FPMover.cpp
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -59,19 +59,19 @@ FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
/// registers that correspond to it.
static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
unsigned &OddReg) {
- static const unsigned EvenHalvesOfPairs[] = {
+ static const uint16_t EvenHalvesOfPairs[] = {
SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
};
- static const unsigned OddHalvesOfPairs[] = {
+ static const uint16_t OddHalvesOfPairs[] = {
SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
};
- static const unsigned DoubleRegsInOrder[] = {
+ static const uint16_t DoubleRegsInOrder[] = {
SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
};
- for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i)
+ for (unsigned i = 0; i < array_lengthof(DoubleRegsInOrder); ++i)
if (DoubleRegsInOrder[i] == DoubleReg) {
EvenReg = EvenHalvesOfPairs[i];
OddReg = OddHalvesOfPairs[i];
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index a6b63fb..ee12633 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -50,7 +50,7 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State)
{
- static const unsigned RegList[] = {
+ static const uint16_t RegList[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
};
//Try to get first reg
@@ -301,11 +301,11 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
// Store remaining ArgRegs to the stack if this is a varargs function.
if (isVarArg) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
};
unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
- const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+ const uint16_t *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
unsigned ArgOffset = CCInfo.getNextStackOffset();
if (NumAllocated == 6)
ArgOffset += StackOffset;
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 4a7c479..f483c96 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -15,8 +15,8 @@
#ifndef SPARC_ISELLOWERING_H
#define SPARC_ISELLOWERING_H
-#include "llvm/Target/TargetLowering.h"
#include "Sparc.h"
+#include "llvm/Target/TargetLowering.h"
namespace llvm {
namespace SPISD {
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 4932531..204f698 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -14,8 +14,8 @@
#ifndef SPARCINSTRUCTIONINFO_H
#define SPARCINSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "SparcRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "SparcGenInstrInfo.inc"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index c392fcc..6357468 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -11,15 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#include "Sparc.h"
#include "SparcRegisterInfo.h"
+#include "Sparc.h"
#include "SparcSubtarget.h"
+#include "llvm/Type.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 80a3be6..6f31356 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Sparc.h"
#include "SparcTargetMachine.h"
+#include "Sparc.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index d91830f..9e88472 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -67,11 +67,11 @@ private:
MCStreamer &Out);
/// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
- /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
+ /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
bool isSrcOp(X86Operand &Op);
- /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
- /// or %es:(%edi) in 32bit mode.
+ /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
+ /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
bool isDstOp(X86Operand &Op);
bool is64BitMode() const {
@@ -468,7 +468,8 @@ bool X86AsmParser::isSrcOp(X86Operand &Op) {
bool X86AsmParser::isDstOp(X86Operand &Op) {
unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
- return Op.isMem() && Op.Mem.SegReg == X86::ES &&
+ return Op.isMem() &&
+ (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
@@ -838,6 +839,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// If we reached here, then we just ate the ( of the memory operand. Process
// the rest of the memory operand.
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+ SMLoc IndexLoc;
if (getLexer().is(AsmToken::Percent)) {
SMLoc StartLoc, EndLoc;
@@ -851,6 +853,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
+ IndexLoc = Parser.getTok().getLoc();
// Following the comma we should have either an index register, or a scale
// value. We don't support the later form, but we want to parse it
@@ -876,8 +879,10 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
SMLoc Loc = Parser.getTok().getLoc();
int64_t ScaleVal;
- if (getParser().ParseAbsoluteExpression(ScaleVal))
+ if (getParser().ParseAbsoluteExpression(ScaleVal)){
+ Error(Loc, "expected scale expression");
return 0;
+ }
// Validate the scale amount.
if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
@@ -910,6 +915,23 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
SMLoc MemEnd = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
+ // If we have both a base register and an index register make sure they are
+ // both 64-bit or 32-bit registers.
+ if (BaseReg != 0 && IndexReg != 0) {
+ if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
+ !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) &&
+ IndexReg != X86::RIZ) {
+ Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
+ return 0;
+ }
+ if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
+ !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) &&
+ IndexReg != X86::EIZ){
+ Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
+ return 0;
+ }
+ }
+
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
MemStart, MemEnd);
}
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index b0e66f0..fbd81d2 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -312,6 +312,15 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (consumeByte(insn, &byte))
return -1;
+
+ /*
+ * If the first byte is a LOCK prefix break and let it be disassembled
+ * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
+ * FIXME there is currently no way to get the disassembler to print the
+ * lock prefix if it is not the first byte.
+ */
+ if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
+ break;
switch (byte) {
case 0xf0: /* LOCK */
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 30a847f..f532019 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
const char *(*getRegName)(unsigned)) {
// If this is a shuffle operation, the switch should fill in this state.
- SmallVector<unsigned, 8> ShuffleMask;
+ SmallVector<int, 8> ShuffleMask;
const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
switch (MI->getOpcode()) {
@@ -500,7 +500,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
if (Src1Name == Src2Name) {
for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
- ShuffleMask[i] >= e) // From second mask.
+ ShuffleMask[i] >= (int)e) // From second mask.
ShuffleMask[i] -= e;
}
}
@@ -518,13 +518,13 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// Otherwise, it must come from src1 or src2. Print the span of elements
// that comes from this src.
- bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
+ bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
const char *SrcName = isSrc1 ? Src1Name : Src2Name;
OS << (SrcName ? SrcName : "mem") << '[';
bool IsFirst = true;
while (i != e &&
(int)ShuffleMask[i] >= 0 &&
- (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
+ (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
if (!IsFirst)
OS << ',';
else
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 9ccbf1c..3f770f7 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -7,10 +7,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAsmBackend.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 37727b6..80990e5 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -46,6 +46,11 @@ public:
return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
}
+ bool is32BitMode() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & X86::Mode64Bit) == 0;
+ }
+
static unsigned GetX86RegNum(const MCOperand &MO) {
return X86_MC::getX86RegNum(MO.getReg());
}
@@ -154,9 +159,8 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
return MCFixup::getKindForSize(Size, isPCRel);
}
-/// Is32BitMemOperand - Return true if the specified instruction with a memory
-/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
-/// memory operand. Op specifies the operand # of the memoperand.
+/// Is32BitMemOperand - Return true if the specified instruction has
+/// a 32-bit memory operand. Op specifies the operand # of the memoperand.
static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
@@ -169,6 +173,36 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
return false;
}
+/// Is64BitMemOperand - Return true if the specified instruction has
+/// a 64-bit memory operand. Op specifies the operand # of the memoperand.
+#ifndef NDEBUG
+static bool Is64BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+#endif
+
+/// Is16BitMemOperand - Return true if the specified instruction has
+/// a 16-bit memory operand. Op specifies the operand # of the memoperand.
+static bool Is16BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+
/// StartsWithGlobalOffsetTable - Check if this expression starts with
/// _GLOBAL_OFFSET_TABLE_ and if it is of the form
/// _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF
@@ -817,8 +851,22 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
EmitByte(0xF3, CurByte, OS);
// Emit the address size opcode prefix as needed.
- if ((TSFlags & X86II::AdSize) ||
- (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand)))
+ bool need_address_override;
+ if (TSFlags & X86II::AdSize) {
+ need_address_override = true;
+ } else if (MemOperand == -1) {
+ need_address_override = false;
+ } else if (is64BitMode()) {
+ assert(!Is16BitMemOperand(MI, MemOperand));
+ need_address_override = Is32BitMemOperand(MI, MemOperand);
+ } else if (is32BitMode()) {
+ assert(!Is64BitMemOperand(MI, MemOperand));
+ need_address_override = Is16BitMemOperand(MI, MemOperand);
+ } else {
+ need_address_override = false;
+ }
+
+ if (need_address_override)
EmitByte(0x67, CurByte, OS);
// Emit the operand size opcode prefix as needed.
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index a581993..624e56f 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -922,3 +922,22 @@ _test2: ## @test2
The insertps's of $0 are pointless complex copies.
//===---------------------------------------------------------------------===//
+
+[UNSAFE FP]
+
+void foo(double, double, double);
+void norm(double x, double y, double z) {
+ double scale = __builtin_sqrt(x*x + y*y + z*z);
+ foo(x/scale, y/scale, z/scale);
+}
+
+We currently generate an sqrtsd and 3 divsd instructions. This is bad, fp div is
+slow and not pipelined. In -ffast-math mode we could compute "1.0/scale" first
+and emit 3 mulsd in place of the divs. This can be done as a target-independent
+transform.
+
+If we're dealing with floats instead of doubles we could even replace the sqrtss
+and inversion with an rsqrtss instruction, which computes 1/sqrt faster at the
+cost of reduced accuracy.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index f4b85ae..32c722a 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -20,7 +20,7 @@
namespace llvm {
-void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
// Defaults the copying the dest value.
ShuffleMask.push_back(0);
ShuffleMask.push_back(1);
@@ -44,8 +44,7 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
}
// <3,1> or <6,7,2,3>
-void DecodeMOVHLPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
for (unsigned i = NElts/2; i != NElts; ++i)
ShuffleMask.push_back(NElts+i);
@@ -54,8 +53,7 @@ void DecodeMOVHLPSMask(unsigned NElts,
}
// <0,2> or <0,1,4,5>
-void DecodeMOVLHPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i)
ShuffleMask.push_back(i);
@@ -66,8 +64,7 @@ void DecodeMOVLHPSMask(unsigned NElts,
/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
/// VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
-void DecodePSHUFMask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits() / 128;
@@ -83,8 +80,7 @@ void DecodePSHUFMask(EVT VT, unsigned Imm,
}
}
-void DecodePSHUFHWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
ShuffleMask.push_back(0);
ShuffleMask.push_back(1);
ShuffleMask.push_back(2);
@@ -95,8 +91,7 @@ void DecodePSHUFHWMask(unsigned Imm,
}
}
-void DecodePSHUFLWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
for (unsigned i = 0; i != 4; ++i) {
ShuffleMask.push_back((Imm & 3));
Imm >>= 2;
@@ -110,8 +105,7 @@ void DecodePSHUFLWMask(unsigned Imm,
/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
/// the type of the vector allowing it to handle different datatypes and vector
/// widths.
-void DecodeSHUFPMask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits() / 128;
@@ -136,7 +130,7 @@ void DecodeSHUFPMask(EVT VT, unsigned Imm,
/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
/// and punpckh*. VT indicates the type of the vector allowing it to handle
/// different datatypes and vector widths.
-void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -156,7 +150,7 @@ void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// and punpckl*. VT indicates the type of the vector allowing it to handle
/// different datatypes and vector widths.
-void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -174,7 +168,7 @@ void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
}
void DecodeVPERM2X128Mask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+ SmallVectorImpl<int> &ShuffleMask) {
unsigned HalfSize = VT.getVectorNumElements()/2;
unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 877c9bd..5b8c6ef 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -24,47 +24,41 @@
namespace llvm {
enum {
- SM_SentinelZero = ~0U
+ SM_SentinelZero = -1
};
-void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
// <3,1> or <6,7,2,3>
-void DecodeMOVHLPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
// <0,2> or <0,1,4,5>
-void DecodeMOVLHPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFMask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFHWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFLWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
/// the type of the vector allowing it to handle different datatypes and vector
/// widths.
-void DecodeSHUFPMask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
/// and punpckh*. VT indicates the type of the vector allowing it to handle
/// different datatypes and vector widths.
-void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask);
/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// and punpckl*. VT indicates the type of the vector allowing it to handle
/// different datatypes and vector widths.
-void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask);
void DecodeVPERM2X128Mask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+ SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 81e9422..ecc7b59 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -24,8 +24,6 @@ namespace llvm {
class FunctionPass;
class JITCodeEmitter;
-class MachineCodeEmitter;
-class Target;
class X86TargetMachine;
/// createX86ISelDag - This pass converts a legalized DAG into a
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 268cbf4..f1cedf3 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,13 +13,13 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
-#include "InstPrinter/X86ATTInstPrinter.h"
-#include "InstPrinter/X86IntelInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86.h"
#include "X86COFFMachineModuleInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 1058df5..a6ed9ba 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -24,11 +24,7 @@
namespace llvm {
-class MachineJumpTableInfo;
-class MCContext;
-class MCInst;
class MCStreamer;
-class MCSymbol;
class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget;
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index 63c08f1..0cec95a 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -14,9 +14,9 @@
#ifndef X86COFF_MACHINEMODULEINFO_H
#define X86COFF_MACHINEMODULEINFO_H
+#include "X86MachineFunctionInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/ADT/DenseSet.h"
-#include "X86MachineFunctionInfo.h"
namespace llvm {
class X86MachineFunctionInfo;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index f90764e..3d63b7e 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1779,7 +1779,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
// Count the number of XMM registers allocated.
- static const unsigned XMMArgRegs[] = {
+ static const uint16_t XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 32de194..936df27 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -26,6 +26,7 @@
#define DEBUG_TYPE "x86-codegen"
#include "X86.h"
#include "X86InstrInfo.h"
+#include "llvm/InlineAsm.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -37,7 +38,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/InlineAsm.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -570,8 +570,8 @@ void FPS::finishBlockStack() {
namespace {
struct TableEntry {
- unsigned from;
- unsigned to;
+ uint16_t from;
+ uint16_t to;
bool operator<(const TableEntry &TE) const { return from < TE.from; }
friend bool operator<(const TableEntry &TE, unsigned V) {
return TE.from < V;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index aa508b8..9405c2f 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -21,7 +21,6 @@
#include "X86TargetMachine.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -32,6 +31,7 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -1654,7 +1654,7 @@ enum AtomicSz {
AtomicSzEnd
};
-static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
{
X86::LOCK_OR8mi,
X86::LOCK_OR8mr,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index cae9aad..88f3829 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1927,17 +1927,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
// FIXME: We should really autogenerate these arrays
- static const unsigned GPR64ArgRegsWin64[] = {
+ static const uint16_t GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
- static const unsigned GPR64ArgRegs64Bit[] = {
+ static const uint16_t GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
- static const unsigned XMMArgRegs64Bit[] = {
+ static const uint16_t XMMArgRegs64Bit[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
- const unsigned *GPR64ArgRegs;
+ const uint16_t *GPR64ArgRegs;
unsigned NumXMMRegs = 0;
if (IsWin64) {
@@ -2326,7 +2326,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// registers used and is in the range 0 - 8 inclusive.
// Count the number of XMM registers allocated.
- static const unsigned XMMArgRegs[] = {
+ static const uint16_t XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
@@ -2910,7 +2910,7 @@ static bool isTargetShuffle(unsigned Opcode) {
}
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
- SDValue V1, SelectionDAG &DAG) {
+ SDValue V1, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::MOVSHDUP:
@@ -2921,7 +2921,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
}
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
- SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
+ SDValue V1, unsigned TargetMask,
+ SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::PSHUFD:
@@ -2933,7 +2934,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
}
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
- SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
+ SDValue V1, SDValue V2, unsigned TargetMask,
+ SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::PALIGN:
@@ -3712,6 +3714,8 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
bool V2IsSplat = false, bool V2IsUndef = false) {
unsigned NumOps = VT.getVectorNumElements();
+ if (VT.getSizeInBits() == 256)
+ return false;
if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
return false;
@@ -4342,9 +4346,81 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
}
+/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
+/// target specific opcode. Returns true if the Mask could be calculated.
+/// Sets IsUnary to true if only uses one source.
+static bool getTargetShuffleMask(SDNode *N, EVT VT,
+ SmallVectorImpl<int> &Mask, bool &IsUnary) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SDValue ImmN;
+
+ IsUnary = false;
+ switch(N->getOpcode()) {
+ case X86ISD::SHUFP:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
+ case X86ISD::UNPCKH:
+ DecodeUNPCKHMask(VT, Mask);
+ break;
+ case X86ISD::UNPCKL:
+ DecodeUNPCKLMask(VT, Mask);
+ break;
+ case X86ISD::MOVHLPS:
+ DecodeMOVHLPSMask(NumElems, Mask);
+ break;
+ case X86ISD::MOVLHPS:
+ DecodeMOVLHPSMask(NumElems, Mask);
+ break;
+ case X86ISD::PSHUFD:
+ case X86ISD::VPERMILP:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::PSHUFHW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::PSHUFLW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD: {
+ // The index 0 always comes from the first element of the second source,
+ // this is why MOVSS and MOVSD are used in the first place. The other
+ // elements come from the other positions of the first source vector
+ Mask.push_back(NumElems);
+ for (unsigned i = 1; i != NumElems; ++i) {
+ Mask.push_back(i);
+ }
+ break;
+ }
+ case X86ISD::VPERM2X128:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
+ case X86ISD::MOVDDUP:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::PALIGN:
+ // Not yet implemented
+ return false;
+ default: llvm_unreachable("unknown target shuffle node");
+ }
+
+ return true;
+}
+
/// getShuffleScalarElt - Returns the scalar element that will make up the ith
/// element of the result of the vector shuffle.
-static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
unsigned Depth) {
if (Depth == 6)
return SDValue(); // Limit search depth.
@@ -4355,89 +4431,34 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
// Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
- Index = SV->getMaskElt(Index);
+ int Elt = SV->getMaskElt(Index);
- if (Index < 0)
+ if (Elt < 0)
return DAG.getUNDEF(VT.getVectorElementType());
unsigned NumElems = VT.getVectorNumElements();
- SDValue NewV = (Index < (int)NumElems) ? SV->getOperand(0)
- : SV->getOperand(1);
- return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
+ SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
+ : SV->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
}
// Recurse into target specific vector shuffles to find scalars.
if (isTargetShuffle(Opcode)) {
unsigned NumElems = VT.getVectorNumElements();
- SmallVector<unsigned, 16> ShuffleMask;
+ SmallVector<int, 16> ShuffleMask;
SDValue ImmN;
+ bool IsUnary;
- switch(Opcode) {
- case X86ISD::SHUFP:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::UNPCKH:
- DecodeUNPCKHMask(VT, ShuffleMask);
- break;
- case X86ISD::UNPCKL:
- DecodeUNPCKLMask(VT, ShuffleMask);
- break;
- case X86ISD::MOVHLPS:
- DecodeMOVHLPSMask(NumElems, ShuffleMask);
- break;
- case X86ISD::MOVLHPS:
- DecodeMOVLHPSMask(NumElems, ShuffleMask);
- break;
- case X86ISD::PSHUFD:
- case X86ISD::VPERMILP:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::PSHUFHW:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::PSHUFLW:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::MOVSS:
- case X86ISD::MOVSD: {
- // The index 0 always comes from the first element of the second source,
- // this is why MOVSS and MOVSD are used in the first place. The other
- // elements come from the other positions of the first source vector.
- unsigned OpNum = (Index == 0) ? 1 : 0;
- return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
- Depth+1);
- }
- case X86ISD::VPERM2X128:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::MOVDDUP:
- case X86ISD::MOVLHPD:
- case X86ISD::MOVLPD:
- case X86ISD::MOVLPS:
- case X86ISD::MOVSHDUP:
- case X86ISD::MOVSLDUP:
- case X86ISD::PALIGN:
- return SDValue(); // Not yet implemented.
- default: llvm_unreachable("unknown target shuffle node");
- }
-
- Index = ShuffleMask[Index];
- if (Index < 0)
+ if (!getTargetShuffleMask(N, VT, ShuffleMask, IsUnary))
+ return SDValue();
+
+ int Elt = ShuffleMask[Index];
+ if (Elt < 0)
return DAG.getUNDEF(VT.getVectorElementType());
- SDValue NewV = (Index < (int)NumElems) ? N->getOperand(0)
+ SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
: N->getOperand(1);
- return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
+ return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
Depth+1);
}
@@ -4453,7 +4474,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
return (Index == 0) ? V.getOperand(0)
- : DAG.getUNDEF(VT.getVectorElementType());
+ : DAG.getUNDEF(VT.getVectorElementType());
if (V.getOpcode() == ISD::BUILD_VECTOR)
return V.getOperand(Index);
@@ -4465,38 +4486,37 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
/// shuffle operation which come from a consecutively from a zero. The
/// search can start in two different directions, from left or right.
static
-unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
+unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, unsigned NumElems,
bool ZerosFromLeft, SelectionDAG &DAG) {
- int i = 0;
-
- while (i < NumElems) {
+ unsigned i;
+ for (i = 0; i != NumElems; ++i) {
unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
- SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0);
+ SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
if (!(Elt.getNode() &&
(Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
break;
- ++i;
}
return i;
}
-/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to
-/// MaskE correspond consecutively to elements from one of the vector operands,
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE)
+/// correspond consecutively to elements from one of the vector operands,
/// starting from its index OpIdx. Also tell OpNum which source vector operand.
static
-bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE,
- int OpIdx, int NumElems, unsigned &OpNum) {
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
+ unsigned MaskI, unsigned MaskE, unsigned OpIdx,
+ unsigned NumElems, unsigned &OpNum) {
bool SeenV1 = false;
bool SeenV2 = false;
- for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) {
+ for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) {
int Idx = SVOp->getMaskElt(i);
// Ignore undef indicies
if (Idx < 0)
continue;
- if (Idx < NumElems)
+ if (Idx < (int)NumElems)
SeenV1 = true;
else
SeenV2 = true;
@@ -4531,7 +4551,7 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
//
if (!isShuffleMaskConsecutive(SVOp,
0, // Mask Start Index
- NumElems-NumZeros-1, // Mask End Index
+ NumElems-NumZeros, // Mask End Index(exclusive)
NumZeros, // Where to start looking in the src vector
NumElems, // Number of elements in vector
OpSrc)) // Which source operand ?
@@ -4564,7 +4584,7 @@ static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
//
if (!isShuffleMaskConsecutive(SVOp,
NumZeros, // Mask Start Index
- NumElems-1, // Mask End Index
+ NumElems, // Mask End Index(exclusive)
0, // Where to start looking in the src vector
NumElems, // Number of elements in vector
OpSrc)) // Which source operand ?
@@ -6080,88 +6100,6 @@ static bool RelaxedMayFoldVectorLoad(SDValue V) {
return false;
}
-/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
-/// a vector extract, and if both can be later optimized into a single load.
-/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
-/// here because otherwise a target specific shuffle node is going to be
-/// emitted for this shuffle, and the optimization not done.
-/// FIXME: This is probably not the best approach, but fix the problem
-/// until the right path is decided.
-static
-bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
- const TargetLowering &TLI) {
- EVT VT = V.getValueType();
- ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
-
- // Be sure that the vector shuffle is present in a pattern like this:
- // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
- if (!V.hasOneUse())
- return false;
-
- SDNode *N = *V.getNode()->use_begin();
- if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
- return false;
-
- SDValue EltNo = N->getOperand(1);
- if (!isa<ConstantSDNode>(EltNo))
- return false;
-
- // If the bit convert changed the number of elements, it is unsafe
- // to examine the mask.
- bool HasShuffleIntoBitcast = false;
- if (V.getOpcode() == ISD::BITCAST) {
- EVT SrcVT = V.getOperand(0).getValueType();
- if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
- return false;
- V = V.getOperand(0);
- HasShuffleIntoBitcast = true;
- }
-
- // Select the input vector, guarding against out of range extract vector.
- unsigned NumElems = VT.getVectorNumElements();
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
- V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
-
- // If we are accessing the upper part of a YMM register
- // then the EXTRACT_VECTOR_ELT is likely to be legalized to a sequence of
- // EXTRACT_SUBVECTOR + EXTRACT_VECTOR_ELT, which are not detected at this point
- // because the legalization of N did not happen yet.
- if (Idx >= (int)NumElems/2 && VT.getSizeInBits() == 256)
- return false;
-
- // Skip one more bit_convert if necessary
- if (V.getOpcode() == ISD::BITCAST) {
- if (!V.hasOneUse())
- return false;
- V = V.getOperand(0);
- }
-
- if (!ISD::isNormalLoad(V.getNode()))
- return false;
-
- // Is the original load suitable?
- LoadSDNode *LN0 = cast<LoadSDNode>(V);
-
- if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
- return false;
-
- if (!HasShuffleIntoBitcast)
- return true;
-
- // If there's a bitcast before the shuffle, check if the load type and
- // alignment is valid.
- unsigned Align = LN0->getAlignment();
- unsigned NewAlign =
- TLI.getTargetData()->getABITypeAlignment(
- VT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
- return false;
-
- return true;
-}
-
static
SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
@@ -6282,12 +6220,6 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
if (SVOp->isSplat()) {
unsigned NumElem = VT.getVectorNumElements();
int Size = VT.getSizeInBits();
- // Special case, this is the only place now where it's allowed to return
- // a vector_shuffle operation without using a target specific node, because
- // *hopefully* it will be optimized away by the dag combiner. FIXME: should
- // this be moved to DAGCombine instead?
- if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
- return Op;
// Use vbroadcast whenever the splat comes from a foldable load
SDValue LD = isVectorBroadcast(Op, Subtarget);
@@ -13005,11 +12937,109 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target
+/// specific shuffle of a load can be folded into a single element load.
+/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
+/// shuffles have been customed lowered so we need to handle those here.
+static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue InVec = N->getOperand(0);
+ SDValue EltNo = N->getOperand(1);
+
+ if (!isa<ConstantSDNode>(EltNo))
+ return SDValue();
+
+ EVT VT = InVec.getValueType();
+
+ bool HasShuffleIntoBitcast = false;
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (BCVT.getVectorNumElements() != VT.getVectorNumElements())
+ return SDValue();
+ InVec = InVec.getOperand(0);
+ HasShuffleIntoBitcast = true;
+ }
+
+ if (!isTargetShuffle(InVec.getOpcode()))
+ return SDValue();
+
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ SmallVector<int, 16> ShuffleMask;
+ bool UnaryShuffle;
+ if (!getTargetShuffleMask(InVec.getNode(), VT, ShuffleMask, UnaryShuffle))
+ return SDValue();
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
+ SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
+ : InVec.getOperand(1);
+
+ // If inputs to shuffle are the same for both ops, then allow 2 uses
+ unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
+
+ if (LdNode.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0))
+ return SDValue();
+
+ AllowedUses = 1; // only allow 1 load use if we have a bitcast
+ LdNode = LdNode.getOperand(0);
+ }
+
+ if (!ISD::isNormalLoad(LdNode.getNode()))
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
+
+ if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
+ return SDValue();
+
+ if (HasShuffleIntoBitcast) {
+ // If there's a bitcast before the shuffle, check if the load type and
+ // alignment is valid.
+ unsigned Align = LN0->getAlignment();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned NewAlign = TLI.getTargetData()->
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+ return SDValue();
+ }
+
+ // All checks match so transform back to vector_shuffle so that DAG combiner
+ // can finish the job
+ DebugLoc dl = N->getDebugLoc();
+
+ // Create shuffle node taking into account the case that its a unary shuffle
+ SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1);
+ Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl,
+ InVec.getOperand(0), Shuffle,
+ &ShuffleMask[0]);
+ Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
+ EltNo);
+}
+
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
/// generation and convert it from being a bunch of shuffles and extracts
/// to a simple store and scalar loads to extract the elements.
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
+ if (NewOp.getNode())
+ return NewOp;
+
SDValue InputVector = N->getOperand(0);
// Only operate on vectors of 4 elements, where the alternative shuffling
@@ -13070,6 +13100,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
unsigned EltSize =
InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
@@ -13093,6 +13124,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
+
+
DebugLoc DL = N->getDebugLoc();
SDValue Cond = N->getOperand(0);
// Get the LHS/RHS of the select.
@@ -14897,7 +14930,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default: break;
case ISD::EXTRACT_VECTOR_ELT:
- return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+ return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
case ISD::VSELECT:
case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index ac49232..42a5014 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -153,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR32:$addr)]>;
+ [(X86ehret GR32:$addr)], IIC_RET>;
}
@@ -161,7 +161,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR64:$addr)]>;
+ [(X86ehret GR64:$addr)], IIC_RET>;
}
@@ -193,7 +193,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in {
def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
- [(set GR8:$dst, 0)]>;
+ [(set GR8:$dst, 0)], IIC_ALU_NONMEM>;
// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
// encoding and avoids a partial-register update sometimes, but doing so
@@ -202,11 +202,11 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
// to an MCInst.
def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
"",
- [(set GR16:$dst, 0)]>, OpSize;
+ [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize;
// FIXME: Set encoding to pseudo.
def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, 0)]>;
+ [(set GR32:$dst, 0)], IIC_ALU_NONMEM>;
}
// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
@@ -218,7 +218,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
let Defs = [EFLAGS], isCodeGenOnly=1,
AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, 0)]>;
+ [(set GR64:$dst, 0)], IIC_ALU_NONMEM>;
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
@@ -226,7 +226,8 @@ def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in
def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
- "", [(set GR64:$dst, i64immZExt32:$src)]>;
+ "", [(set GR64:$dst, i64immZExt32:$src)],
+ IIC_ALU_NONMEM>;
// Use sbb to materialize carry bit.
let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
@@ -236,14 +237,18 @@ let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
// X86CodeEmitter.
def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
- [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+ [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+ IIC_ALU_NONMEM>;
def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
- [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
+ [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+ IIC_ALU_NONMEM>,
OpSize;
def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+ [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+ IIC_ALU_NONMEM>;
def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+ [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+ IIC_ALU_NONMEM>;
} // isCodeGenOnly
@@ -297,32 +302,32 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
//
let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
- [(X86rep_movs i8)]>, REP;
+ [(X86rep_movs i8)], IIC_REP_MOVS>, REP;
def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
- [(X86rep_movs i16)]>, REP, OpSize;
+ [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize;
def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
- [(X86rep_movs i32)]>, REP;
+ [(X86rep_movs i32)], IIC_REP_MOVS>, REP;
}
let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
- [(X86rep_movs i64)]>, REP;
+ [(X86rep_movs i64)], IIC_REP_MOVS>, REP;
// FIXME: Should use "(X86rep_stos AL)" as the pattern.
let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
- [(X86rep_stos i8)]>, REP;
+ [(X86rep_stos i8)], IIC_REP_STOS>, REP;
let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
- [(X86rep_stos i16)]>, REP, OpSize;
+ [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize;
let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
- [(X86rep_stos i32)]>, REP;
+ [(X86rep_stos i32)], IIC_REP_STOS>, REP;
let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
- [(X86rep_stos i64)]>, REP;
+ [(X86rep_stos i64)], IIC_REP_STOS>, REP;
//===----------------------------------------------------------------------===//
@@ -571,7 +576,7 @@ let isCodeGenOnly = 1, Defs = [EFLAGS] in
def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
"lock\n\t"
"or{l}\t{$zero, $dst|$dst, $zero}",
- []>, Requires<[In32BitMode]>, LOCK;
+ [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK;
let hasSideEffects = 1 in
def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
@@ -591,72 +596,72 @@ def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
!strconcat("lock\n\t", mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_NONMEM>, LOCK;
def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
!strconcat("lock\n\t", mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, OpSize, LOCK;
+ [], IIC_ALU_NONMEM>, OpSize, LOCK;
def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
!strconcat("lock\n\t", mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_NONMEM>, LOCK;
def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
!strconcat("lock\n\t", mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_NONMEM>, LOCK;
def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
!strconcat("lock\n\t", mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
!strconcat("lock\n\t", mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
!strconcat("lock\n\t", mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
!strconcat("lock\n\t", mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
!strconcat("lock\n\t", mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
!strconcat("lock\n\t", mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
!strconcat("lock\n\t", mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
}
@@ -673,29 +678,29 @@ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
"lock\n\t"
- "inc{b}\t$dst", []>, LOCK;
+ "inc{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
"lock\n\t"
- "inc{w}\t$dst", []>, OpSize, LOCK;
+ "inc{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
"lock\n\t"
- "inc{l}\t$dst", []>, LOCK;
+ "inc{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
"lock\n\t"
- "inc{q}\t$dst", []>, LOCK;
+ "inc{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
"lock\n\t"
- "dec{b}\t$dst", []>, LOCK;
+ "dec{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
"lock\n\t"
- "dec{w}\t$dst", []>, OpSize, LOCK;
+ "dec{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
"lock\n\t"
- "dec{l}\t$dst", []>, LOCK;
+ "dec{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
"lock\n\t"
- "dec{q}\t$dst", []>, LOCK;
+ "dec{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
}
// Atomic compare and swap.
@@ -704,42 +709,42 @@ let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
"lock\n\t"
"cmpxchg8b\t$ptr",
- [(X86cas8 addr:$ptr)]>, TB, LOCK;
+ [(X86cas8 addr:$ptr)], IIC_CMPX_LOCK_8B>, TB, LOCK;
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
isCodeGenOnly = 1 in
def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
"lock\n\t"
"cmpxchg16b\t$ptr",
- [(X86cas16 addr:$ptr)]>, TB, LOCK,
+ [(X86cas16 addr:$ptr)], IIC_CMPX_LOCK_16B>, TB, LOCK,
Requires<[HasCmpxchg16b]>;
let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
"lock\n\t"
"cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+ [(X86cas addr:$ptr, GR8:$swap, 1)], IIC_CMPX_LOCK_8>, TB, LOCK;
}
let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
"lock\n\t"
"cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+ [(X86cas addr:$ptr, GR16:$swap, 2)], IIC_CMPX_LOCK>, TB, OpSize, LOCK;
}
let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
"lock\n\t"
"cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+ [(X86cas addr:$ptr, GR32:$swap, 4)], IIC_CMPX_LOCK>, TB, LOCK;
}
let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
"lock\n\t"
"cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+ [(X86cas addr:$ptr, GR64:$swap, 8)], IIC_CMPX_LOCK>, TB, LOCK;
}
// Atomic exchange and add
@@ -747,22 +752,26 @@ let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
"lock\n\t"
"xadd{b}\t{$val, $ptr|$ptr, $val}",
- [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+ [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))],
+ IIC_XADD_LOCK_MEM8>,
TB, LOCK;
def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
"lock\n\t"
"xadd{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+ [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))],
+ IIC_XADD_LOCK_MEM>,
TB, OpSize, LOCK;
def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
"lock\n\t"
"xadd{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+ [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))],
+ IIC_XADD_LOCK_MEM>,
TB, LOCK;
def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
"lock\n\t"
"xadd{q}\t{$val, $ptr|$ptr, $val}",
- [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+ [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))],
+ IIC_XADD_LOCK_MEM>,
TB, LOCK;
}
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 4f9f089..ae3ed1b 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -218,6 +218,11 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
+// Like 'X86vzload', but always requires 128-bit vector alignment.
+def alignedX86vzload : PatFrag<(ops node:$ptr), (X86vzload node:$ptr), [{
+ return cast<MemSDNode>(N)->getAlignment() >= 16;
+}]>;
+
// Like 'load', but always requires 256-bit vector alignment.
def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 32;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 5a479f0..307c96b 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -25,13 +25,13 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/MC/MCAsmInfo.h"
#include <limits>
#define GET_INSTRINFO_CTOR
@@ -82,6 +82,12 @@ enum {
TB_FOLDED_STORE = 1 << 19
};
+struct X86OpTblEntry {
+ uint16_t RegOp;
+ uint16_t MemOp;
+ uint32_t Flags;
+};
+
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
? X86::ADJCALLSTACKDOWN64
@@ -91,7 +97,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: X86::ADJCALLSTACKUP32)),
TM(tm), RI(tm, *this) {
- static const unsigned OpTbl2Addr[][3] = {
+ static const X86OpTblEntry OpTbl2Addr[] = {
{ X86::ADC32ri, X86::ADC32mi, 0 },
{ X86::ADC32ri8, X86::ADC32mi8, 0 },
{ X86::ADC32rr, X86::ADC32mr, 0 },
@@ -259,16 +265,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
- unsigned RegOp = OpTbl2Addr[i][0];
- unsigned MemOp = OpTbl2Addr[i][1];
- unsigned Flags = OpTbl2Addr[i][2];
+ unsigned RegOp = OpTbl2Addr[i].RegOp;
+ unsigned MemOp = OpTbl2Addr[i].MemOp;
+ unsigned Flags = OpTbl2Addr[i].Flags;
AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
RegOp, MemOp,
// Index 0, folded load and store, no alignment requirement.
Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
}
- static const unsigned OpTbl0[][3] = {
+ static const X86OpTblEntry OpTbl0[] = {
{ X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
{ X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
{ X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
@@ -370,14 +376,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
- unsigned RegOp = OpTbl0[i][0];
- unsigned MemOp = OpTbl0[i][1];
- unsigned Flags = OpTbl0[i][2];
+ unsigned RegOp = OpTbl0[i].RegOp;
+ unsigned MemOp = OpTbl0[i].MemOp;
+ unsigned Flags = OpTbl0[i].Flags;
AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
RegOp, MemOp, TB_INDEX_0 | Flags);
}
- static const unsigned OpTbl1[][3] = {
+ static const X86OpTblEntry OpTbl1[] = {
{ X86::CMP16rr, X86::CMP16rm, 0 },
{ X86::CMP32rr, X86::CMP32rm, 0 },
{ X86::CMP64rr, X86::CMP64rm, 0 },
@@ -555,16 +561,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
- unsigned RegOp = OpTbl1[i][0];
- unsigned MemOp = OpTbl1[i][1];
- unsigned Flags = OpTbl1[i][2];
+ unsigned RegOp = OpTbl1[i].RegOp;
+ unsigned MemOp = OpTbl1[i].MemOp;
+ unsigned Flags = OpTbl1[i].Flags;
AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
RegOp, MemOp,
// Index 1, folded load
Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
}
- static const unsigned OpTbl2[][3] = {
+ static const X86OpTblEntry OpTbl2[] = {
{ X86::ADC32rr, X86::ADC32rm, 0 },
{ X86::ADC64rr, X86::ADC64rm, 0 },
{ X86::ADD16rr, X86::ADD16rm, 0 },
@@ -1108,9 +1114,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
- unsigned RegOp = OpTbl2[i][0];
- unsigned MemOp = OpTbl2[i][1];
- unsigned Flags = OpTbl2[i][2];
+ unsigned RegOp = OpTbl2[i].RegOp;
+ unsigned MemOp = OpTbl2[i].MemOp;
+ unsigned Flags = OpTbl2[i].Flags;
AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
RegOp, MemOp,
// Index 2, folded load
@@ -3627,7 +3633,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
// These are the replaceable SSE instructions. Some of these have Int variants
// that we don't include here. We don't want to replace instructions selected
// by intrinsics.
-static const unsigned ReplaceableInstrs[][3] = {
+static const uint16_t ReplaceableInstrs[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
{ X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
@@ -3667,7 +3673,7 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }
};
-static const unsigned ReplaceableInstrsAVX2[][3] = {
+static const uint16_t ReplaceableInstrsAVX2[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm },
{ X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr },
@@ -3688,14 +3694,14 @@ static const unsigned ReplaceableInstrsAVX2[][3] = {
// FIXME: Some shuffle and unpack instructions have equivalents in different
// domains, but they require a bit more work than just switching opcodes.
-static const unsigned *lookup(unsigned opcode, unsigned domain) {
+static const uint16_t *lookup(unsigned opcode, unsigned domain) {
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
if (ReplaceableInstrs[i][domain-1] == opcode)
return ReplaceableInstrs[i];
return 0;
}
-static const unsigned *lookupAVX2(unsigned opcode, unsigned domain) {
+static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
return ReplaceableInstrsAVX2[i];
@@ -3718,7 +3724,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
assert(Domain>0 && Domain<4 && "Invalid execution domain");
uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
assert(dom && "Not an SSE instruction");
- const unsigned *table = lookup(MI->getOpcode(), dom);
+ const uint16_t *table = lookup(MI->getOpcode(), dom);
if (!table) { // try the other table
assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) &&
"256-bit vector operations only available in AVX2");
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index d065d2d..b23d756 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -14,10 +14,10 @@
#ifndef X86INSTRUCTIONINFO_H
#define X86INSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "X86.h"
#include "X86RegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "X86GenInstrInfo.inc"
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index f585b47..dd7cf50 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1856,19 +1856,19 @@ def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>;
def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
// shld/shrd op,op -> shld op, op, CL
-def : InstAlias<"shldw $r1, $r2", (SHLD16rrCL GR16:$r1, GR16:$r2)>;
-def : InstAlias<"shldl $r1, $r2", (SHLD32rrCL GR32:$r1, GR32:$r2)>;
-def : InstAlias<"shldq $r1, $r2", (SHLD64rrCL GR64:$r1, GR64:$r2)>;
-def : InstAlias<"shrdw $r1, $r2", (SHRD16rrCL GR16:$r1, GR16:$r2)>;
-def : InstAlias<"shrdl $r1, $r2", (SHRD32rrCL GR32:$r1, GR32:$r2)>;
-def : InstAlias<"shrdq $r1, $r2", (SHRD64rrCL GR64:$r1, GR64:$r2)>;
-
-def : InstAlias<"shldw $mem, $reg", (SHLD16mrCL i16mem:$mem, GR16:$reg)>;
-def : InstAlias<"shldl $mem, $reg", (SHLD32mrCL i32mem:$mem, GR32:$reg)>;
-def : InstAlias<"shldq $mem, $reg", (SHLD64mrCL i64mem:$mem, GR64:$reg)>;
-def : InstAlias<"shrdw $mem, $reg", (SHRD16mrCL i16mem:$mem, GR16:$reg)>;
-def : InstAlias<"shrdl $mem, $reg", (SHRD32mrCL i32mem:$mem, GR32:$reg)>;
-def : InstAlias<"shrdq $mem, $reg", (SHRD64mrCL i64mem:$mem, GR64:$reg)>;
+def : InstAlias<"shldw $r2, $r1", (SHLD16rrCL GR16:$r1, GR16:$r2)>;
+def : InstAlias<"shldl $r2, $r1", (SHLD32rrCL GR32:$r1, GR32:$r2)>;
+def : InstAlias<"shldq $r2, $r1", (SHLD64rrCL GR64:$r1, GR64:$r2)>;
+def : InstAlias<"shrdw $r2, $r1", (SHRD16rrCL GR16:$r1, GR16:$r2)>;
+def : InstAlias<"shrdl $r2, $r1", (SHRD32rrCL GR32:$r1, GR32:$r2)>;
+def : InstAlias<"shrdq $r2, $r1", (SHRD64rrCL GR64:$r1, GR64:$r2)>;
+
+def : InstAlias<"shldw $reg, $mem", (SHLD16mrCL i16mem:$mem, GR16:$reg)>;
+def : InstAlias<"shldl $reg, $mem", (SHLD32mrCL i32mem:$mem, GR32:$reg)>;
+def : InstAlias<"shldq $reg, $mem", (SHLD64mrCL i64mem:$mem, GR64:$reg)>;
+def : InstAlias<"shrdw $reg, $mem", (SHRD16mrCL i16mem:$mem, GR16:$reg)>;
+def : InstAlias<"shrdl $reg, $mem", (SHRD32mrCL i32mem:$mem, GR32:$reg)>;
+def : InstAlias<"shrdq $reg, $mem", (SHRD64mrCL i64mem:$mem, GR64:$reg)>;
/* FIXME: This is disabled because the asm matcher is currently incapable of
* matching a fixed immediate like $1.
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index c6d1d19..df42627 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -641,7 +641,7 @@ let Predicates = [HasAVX] in {
(VMOVSDrr (v2i64 (V_SET0)),
(EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>;
-// Extract and store.
+ // Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(VMOVSSmr addr:$dst,
@@ -2306,7 +2306,7 @@ let Defs = [EFLAGS] in {
"comisd", SSEPackedDouble>, TB, OpSize;
} // Defs = [EFLAGS]
-// sse12_cmp_packed - sse 1 & 2 compared packed instructions
+// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
Intrinsic Int, string asm, string asm_alt,
Domain d> {
@@ -4820,8 +4820,10 @@ let Predicates = [HasSSE2], AddedComplexity = 20 in {
}
let Predicates = [HasAVX] in {
-def : Pat<(v4i64 (X86vzload addr:$src)),
+def : Pat<(v4i64 (alignedX86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>;
+def : Pat<(v4i64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>;
}
//===---------------------------------------------------------------------===//
@@ -7307,6 +7309,24 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
[]>, VEX;
}
+// Extract and store.
+let Predicates = [HasAVX] in {
+ def : Pat<(alignedstore (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(alignedstore (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(alignedstore (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+
+ def : Pat<(int_x86_sse_storeu_ps addr:$dst, (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2)),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(int_x86_sse2_storeu_pd addr:$dst, (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2)),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, (bc_v16i8 (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2))),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+}
+
+// AVX1 patterns
let Predicates = [HasAVX] in {
def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
@@ -7314,6 +7334,31 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4f32 (VEXTRACTF128rr
+ (v8f32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2f64 (VEXTRACTF128rr
+ (v4f64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTF128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTF128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v8i16 (VEXTRACTF128rr
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v16i8 (VEXTRACTF128rr
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
}
//===----------------------------------------------------------------------===//
@@ -7711,7 +7756,7 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
(int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2),
imm:$src3))]>, VEX_4V;
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2], AddedComplexity = 1 in {
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
(i32 imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2,
@@ -7756,6 +7801,19 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
(i32 imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
}
//===----------------------------------------------------------------------===//
@@ -7791,34 +7849,6 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
}
-// AVX1 patterns
-let Predicates = [HasAVX] in {
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v4f32 (VEXTRACTF128rr
- (v8f32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v2f64 (VEXTRACTF128rr
- (v4f64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v2i64 (VEXTRACTF128rr
- (v4i64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v4i32 (VEXTRACTF128rr
- (v8i32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v8i16 (VEXTRACTF128rr
- (v16i16 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v16i8 (VEXTRACTF128rr
- (v32i8 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-}
-
//===----------------------------------------------------------------------===//
// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
//
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 8843848..bddba6c 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -45,17 +45,17 @@ def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
-def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
-def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB;
+def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysret{q}", []>, TB,
Requires<[In64BitMode]>;
def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
-def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexitl", []>, TB;
-def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexitq", []>, TB,
+def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", []>, TB,
Requires<[In64BitMode]>;
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize;
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>;
def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
Requires<[In64BitMode]>;
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index a7a5c56..b578e8d 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -12,10 +12,11 @@
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/X86ATTInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86AsmPrinter.h"
#include "X86COFFMachineModuleInfo.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "llvm/Type.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -26,7 +27,6 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/Type.h"
using namespace llvm;
X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 93e2744..b56025f 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "X86.h"
#include "X86RegisterInfo.h"
+#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index d6d0149..17f4efd 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -114,6 +114,9 @@ def IIC_MOVZX : InstrItinClass;
def IIC_MOVZX_R16_R8 : InstrItinClass;
def IIC_MOVZX_R16_M8 : InstrItinClass;
+def IIC_REP_MOVS : InstrItinClass;
+def IIC_REP_STOS : InstrItinClass;
+
// SSE scalar/parallel binary operations
def IIC_SSE_ALU_F32S_RR : InstrItinClass;
def IIC_SSE_ALU_F32S_RM : InstrItinClass;
@@ -250,6 +253,14 @@ def IIC_SSE_CVT_SS2SI64_RR : InstrItinClass;
def IIC_SSE_CVT_SD2SI_RM : InstrItinClass;
def IIC_SSE_CVT_SD2SI_RR : InstrItinClass;
+def IIC_CMPX_LOCK : InstrItinClass;
+def IIC_CMPX_LOCK_8 : InstrItinClass;
+def IIC_CMPX_LOCK_8B : InstrItinClass;
+def IIC_CMPX_LOCK_16B : InstrItinClass;
+
+def IIC_XADD_LOCK_MEM : InstrItinClass;
+def IIC_XADD_LOCK_MEM8 : InstrItinClass;
+
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index e8cf72a..77d4e56 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -144,6 +144,9 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_REP_MOVS, [InstrStage<75, [Port0, Port1]>] >,
+ InstrItinData<IIC_REP_STOS, [InstrStage<74, [Port0, Port1]>] >,
+
// SSE binary operations
// arithmetic fp scalar
InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<5, [Port1]>] >,
@@ -289,6 +292,14 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<9, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<10, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<8, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >
-]>;
+ InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_CMPX_LOCK, [InstrStage<14, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<18, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<22, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<3, [Port0, Port1]>] >
+ ]>;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index a36d0d8..7fd832b 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -14,9 +14,9 @@
#ifndef X86SUBTARGET_H
#define X86SUBTARGET_H
+#include "llvm/CallingConv.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/CallingConv.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 143caba..8e935af 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -28,7 +28,6 @@
namespace llvm {
-class formatted_raw_ostream;
class StringRef;
class X86TargetMachine : public LLVMTargetMachine {
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index ceb7a4a..a02a368 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -15,7 +15,6 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
- class X86TargetMachine;
/// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin
/// x86-64.
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 4d8ef74..50fda58 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "XCore.h"
#include "XCoreFrameLowering.h"
+#include "XCore.h"
#include "XCoreInstrInfo.h"
#include "XCoreMachineFunctionInfo.h"
#include "llvm/Function.h"
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index c2d2a5d..593cebc 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1152,7 +1152,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
if (isVarArg) {
/* Argument registers */
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
XCore::R0, XCore::R1, XCore::R2, XCore::R3
};
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index f5a6822..5cd3e67 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -15,9 +15,9 @@
#ifndef XCOREISELLOWERING_H
#define XCOREISELLOWERING_H
+#include "XCore.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
-#include "XCore.h"
namespace llvm {
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index f930623..0a3008d 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "XCoreMachineFunctionInfo.h"
#include "XCoreInstrInfo.h"
+#include "XCoreMachineFunctionInfo.h"
#include "XCore.h"
#include "llvm/MC/MCContext.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index e47d212..42eeed8 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -14,8 +14,8 @@
#ifndef XCOREINSTRUCTIONINFO_H
#define XCOREINSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "XCoreRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "XCoreGenInstrInfo.inc"
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 8730282..f3b4b4c 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -14,6 +14,8 @@
#include "XCoreRegisterInfo.h"
#include "XCoreMachineFunctionInfo.h"
#include "XCore.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,8 +26,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
@@ -54,20 +54,6 @@ static inline bool isImmU16(unsigned val) {
return val < (1 << 16);
}
-static const unsigned XCore_ArgRegs[] = {
- XCore::R0, XCore::R1, XCore::R2, XCore::R3
-};
-
-const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF)
-{
- return XCore_ArgRegs;
-}
-
-unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF)
-{
- return array_lengthof(XCore_ArgRegs);
-}
-
bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
return MF.getMMI().hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry();
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index ab6ce56..7391cfd 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -62,15 +62,6 @@ public:
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
- //! Return the array of argument passing registers
- /*!
- \note The size of this array is returned by getArgRegsSize().
- */
- static const unsigned *getArgRegs(const MachineFunction *MF = 0);
-
- //! Return the size of the argument passing register array
- static unsigned getNumArgRegs(const MachineFunction *MF = 0);
-
//! Return whether to emit frame moves
static bool needsFrameMoves(const MachineFunction &MF);
};
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 2c174f4..2546681 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -14,13 +14,13 @@
#ifndef XCORETARGETMACHINE_H
#define XCORETARGETMACHINE_H
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
#include "XCoreFrameLowering.h"
#include "XCoreSubtarget.h"
#include "XCoreInstrInfo.h"
#include "XCoreISelLowering.h"
#include "XCoreSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
namespace llvm {
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 58ab567..a32e550 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2561,11 +2561,6 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
return false;
delete ValueStack.pop_back_val();
InstResult = RetVal;
-
- if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
- NextBB = II->getNormalDest();
- return true;
- }
}
} else if (isa<TerminatorInst>(CurInst)) {
if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
@@ -2582,8 +2577,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
ConstantInt *Val =
dyn_cast<ConstantInt>(getVal(SI->getCondition()));
if (!Val) return false; // Cannot determine.
- unsigned ValTISucc = SI->resolveSuccessorIndex(SI->findCaseValue(Val));
- NextBB = SI->getSuccessor(ValTISucc);
+ NextBB = SI->findCaseValue(Val).getCaseSuccessor();
} else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
@@ -2611,6 +2605,12 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
setVal(CurInst, InstResult);
}
+ // If we just processed an invoke, we finished evaluating the block.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
+ NextBB = II->getNormalDest();
+ return true;
+ }
+
// Advance program counter.
++CurInst;
}
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 749a3fa..3c7fac6 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -32,8 +32,6 @@ namespace {
// AlwaysInliner only inlines functions that are mark as "always inline".
class AlwaysInliner : public Inliner {
- // Functions that are never inlined
- SmallPtrSet<const Function*, 16> NeverInline;
InlineCostAnalyzer CA;
public:
// Use extremely low threshold.
@@ -46,7 +44,22 @@ namespace {
}
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
- return CA.getInlineCost(CS, NeverInline);
+ Function *Callee = CS.getCalledFunction();
+ // We assume indirect calls aren't calling an always-inline function.
+ if (!Callee) return InlineCost::getNever();
+
+ // We can't inline calls to external functions.
+ // FIXME: We shouldn't even get here.
+ if (Callee->isDeclaration()) return InlineCost::getNever();
+
+ // Return never for anything not marked as always inline.
+ if (!Callee->hasFnAttr(Attribute::AlwaysInline))
+ return InlineCost::getNever();
+
+ // We still have to check the inline cost in case there are reasons to
+ // not inline which trump the always-inline attribute such as setjmp and
+ // indirectbr.
+ return CA.getInlineCost(CS);
}
float getInlineFudgeFactor(CallSite CS) {
return CA.getInlineFudgeFactor(CS);
@@ -58,7 +71,7 @@ namespace {
CA.growCachedCostInfo(Caller, Callee);
}
virtual bool doFinalization(CallGraph &CG) {
- return removeDeadFunctions(CG, &NeverInline);
+ return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true);
}
virtual bool doInitialization(CallGraph &CG);
void releaseMemory() {
@@ -84,12 +97,5 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
// been annotated with the "always inline" attribute.
bool AlwaysInliner::doInitialization(CallGraph &CG) {
CA.setTargetData(getAnalysisIfAvailable<TargetData>());
-
- Module &M = CG.getModule();
-
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline))
- NeverInline.insert(I);
-
return false;
}
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index b3421eb..03032e6 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -23,15 +23,12 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/ADT/SmallPtrSet.h"
using namespace llvm;
namespace {
class SimpleInliner : public Inliner {
- // Functions that are never inlined
- SmallPtrSet<const Function*, 16> NeverInline;
InlineCostAnalyzer CA;
public:
SimpleInliner() : Inliner(ID) {
@@ -43,7 +40,7 @@ namespace {
}
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
- return CA.getInlineCost(CS, NeverInline);
+ return CA.getInlineCost(CS);
}
float getInlineFudgeFactor(CallSite CS) {
return CA.getInlineFudgeFactor(CS);
@@ -78,44 +75,6 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
// annotated with the noinline attribute.
bool SimpleInliner::doInitialization(CallGraph &CG) {
CA.setTargetData(getAnalysisIfAvailable<TargetData>());
-
- Module &M = CG.getModule();
-
- for (Module::iterator I = M.begin(), E = M.end();
- I != E; ++I)
- if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline))
- NeverInline.insert(I);
-
- // Get llvm.noinline
- GlobalVariable *GV = M.getNamedGlobal("llvm.noinline");
-
- if (GV == 0)
- return false;
-
- // Don't crash on invalid code
- if (!GV->hasDefinitiveInitializer())
- return false;
-
- const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
-
- if (InitList == 0)
- return false;
-
- // Iterate over each element and add to the NeverInline set
- for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-
- // Get Source
- const Constant *Elt = InitList->getOperand(i);
-
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt))
- if (CE->getOpcode() == Instruction::BitCast)
- Elt = CE->getOperand(0);
-
- // Insert into set of functions to never inline
- if (const Function *F = dyn_cast<Function>(Elt))
- NeverInline.insert(F);
- }
-
return false;
}
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 1f7625d..9975333 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -19,6 +19,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -243,13 +244,20 @@ bool Inliner::shouldInline(CallSite CS) {
return false;
}
- // Try to detect the case where the current inlining candidate caller
- // (call it B) is a static function and is an inlining candidate elsewhere,
- // and the current candidate callee (call it C) is large enough that
- // inlining it into B would make B too big to inline later. In these
- // circumstances it may be best not to inline C into B, but to inline B
- // into its callers.
- if (Caller->hasLocalLinkage()) {
+ // Try to detect the case where the current inlining candidate caller (call
+ // it B) is a static or linkonce-ODR function and is an inlining candidate
+ // elsewhere, and the current candidate callee (call it C) is large enough
+ // that inlining it into B would make B too big to inline later. In these
+ // circumstances it may be best not to inline C into B, but to inline B into
+ // its callers.
+ //
+ // This only applies to static and linkonce-ODR functions because those are
+ // expected to be available for inlining in the translation units where they
+ // are used. Thus we will always have the opportunity to make local inlining
+ // decisions. Importantly the linkonce-ODR linkage covers inline functions
+ // and templates in C++.
+ if (Caller->hasLocalLinkage() ||
+ Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) {
int TotalSecondaryCost = 0;
bool outerCallsFound = false;
// This bool tracks what happens if we do NOT inline C into B.
@@ -327,6 +335,37 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
return false;
}
+/// \brief Simplify arguments going into a particular callsite.
+///
+/// This is important to do each time we add a callsite due to inlining so that
+/// constants and other entities which feed into inline cost estimation are
+/// properly recognized when analyzing the new callsite. Consider:
+/// void outer(int x) {
+/// if (x < 42)
+/// return inner(42 - x);
+/// ...
+/// }
+/// void inner(int x) {
+/// ...
+/// }
+///
+/// The inliner gives calls to 'outer' with a constant argument a bonus because
+/// it will delete one side of a branch. But the resulting call to 'inner'
+/// will, after inlining, also have a constant operand. We need to do just
+/// enough constant folding to expose this for callsite arguments. The rest
+/// will be taken care of after the inliner finishes running.
+static void simplifyCallSiteArguments(const TargetData *TD, CallSite CS) {
+ // FIXME: It would be nice to avoid this smallvector if RAUW doesn't
+ // invalidate operand iterators in any cases.
+ SmallVector<std::pair<Value *, Value*>, 4> SimplifiedArgs;
+ for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I)
+ if (Instruction *Inst = dyn_cast<Instruction>(*I))
+ if (Value *SimpleArg = SimplifyInstruction(Inst, TD))
+ SimplifiedArgs.push_back(std::make_pair(Inst, SimpleArg));
+ for (unsigned Idx = 0, Size = SimplifiedArgs.size(); Idx != Size; ++Idx)
+ SimplifiedArgs[Idx].first->replaceAllUsesWith(SimplifiedArgs[Idx].second);
+}
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraph>();
@@ -455,7 +494,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
i != e; ++i) {
Value *Ptr = InlineInfo.InlinedCalls[i];
- CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+ CallSite NewCS = Ptr;
+ simplifyCallSiteArguments(TD, NewCS);
+ CallSites.push_back(std::make_pair(NewCS, NewHistoryID));
}
}
@@ -515,25 +556,27 @@ bool Inliner::doFinalization(CallGraph &CG) {
/// removeDeadFunctions - Remove dead functions that are not included in
/// DNR (Do Not Remove) list.
-bool Inliner::removeDeadFunctions(CallGraph &CG,
- SmallPtrSet<const Function *, 16> *DNR) {
- SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove;
+bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
+ SmallVector<CallGraphNode*, 16> FunctionsToRemove;
// Scan for all of the functions, looking for ones that should now be removed
// from the program. Insert the dead ones in the FunctionsToRemove set.
for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
CallGraphNode *CGN = I->second;
- if (CGN->getFunction() == 0)
- continue;
-
Function *F = CGN->getFunction();
-
+ if (!F || F->isDeclaration())
+ continue;
+
+ // Handle the case when this function is called and we only want to care
+ // about always-inline functions. This is a bit of a hack to share code
+ // between here and the InlineAlways pass.
+ if (AlwaysInlineOnly && !F->hasFnAttr(Attribute::AlwaysInline))
+ continue;
+
// If the only remaining users of the function are dead constants, remove
// them.
F->removeDeadConstantUsers();
- if (DNR && DNR->count(F))
- continue;
if (!F->isDefTriviallyDead())
continue;
@@ -546,24 +589,28 @@ bool Inliner::removeDeadFunctions(CallGraph &CG,
CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
// Removing the node for callee from the call graph and delete it.
- FunctionsToRemove.insert(CGN);
+ FunctionsToRemove.push_back(CGN);
}
+ if (FunctionsToRemove.empty())
+ return false;
// Now that we know which functions to delete, do so. We didn't want to do
// this inline, because that would invalidate our CallGraph::iterator
// objects. :(
//
- // Note that it doesn't matter that we are iterating over a non-stable set
+ // Note that it doesn't matter that we are iterating over a non-stable order
// here to do this, it doesn't matter which order the functions are deleted
// in.
- bool Changed = false;
- for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(),
- E = FunctionsToRemove.end(); I != E; ++I) {
+ std::sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
+ FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(),
+ FunctionsToRemove.end()),
+ FunctionsToRemove.end());
+ for (SmallVectorImpl<CallGraphNode *>::iterator I = FunctionsToRemove.begin(),
+ E = FunctionsToRemove.end();
+ I != E; ++I) {
resetCachedCostInfo((*I)->getFunction());
delete CG.removeFunctionFromModule(*I);
++NumDeleted;
- Changed = true;
}
-
- return Changed;
+ return true;
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index cc8f5bf..1165660 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1929,8 +1929,11 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
// Canonicalize xor to the RHS.
- if (match(Op0, m_Xor(m_Value(), m_Value())))
+ bool SwappedForXor = false;
+ if (match(Op0, m_Xor(m_Value(), m_Value()))) {
std::swap(Op0, Op1);
+ SwappedForXor = true;
+ }
// A | ( A ^ B) -> A | B
// A | (~A ^ B) -> A | ~B
@@ -1961,6 +1964,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return BinaryOperator::CreateOr(Not, Op0);
}
+ if (SwappedForXor)
+ std::swap(Op0, Op1);
+
if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
if (Value *Res = FoldOrOfICmps(LHS, RHS))
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 32009c3..99a02fc 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -55,9 +55,9 @@ public:
Worklist.reserve(NumEntries+16);
WorklistMap.resize(NumEntries);
DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
- for (; NumEntries; --NumEntries) {
+ for (unsigned Idx = 0; NumEntries; --NumEntries) {
Instruction *I = List[NumEntries-1];
- WorklistMap.insert(std::make_pair(I, Worklist.size()));
+ WorklistMap.insert(std::make_pair(I, Idx++));
Worklist.push_back(I);
}
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 318256a..349ba83 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1245,15 +1245,15 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
if (I->getOpcode() == Instruction::Add)
if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
// change 'switch (X+4) case 1:' into 'switch (X) case -3'
- unsigned NumCases = SI.getNumCases();
// Skip the first item since that's the default case.
- for (unsigned i = 0; i < NumCases; ++i) {
- ConstantInt* CaseVal = SI.getCaseValue(i);
+ for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
+ ConstantInt* CaseVal = i.getCaseValue();
Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal),
AddRHS);
assert(isa<ConstantInt>(NewCaseVal) &&
"Result of expression should be constant");
- SI.setCaseValue(i, cast<ConstantInt>(NewCaseVal));
+ i.setValue(cast<ConstantInt>(NewCaseVal));
}
SI.setCondition(I->getOperand(0));
Worklist.Add(I);
@@ -1873,9 +1873,10 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
// See if this is an explicit destination.
- for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i)
- if (SI->getCaseValue(i) == Cond) {
- BasicBlock *ReachableBB = SI->getCaseSuccessor(i);
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseValue() == Cond) {
+ BasicBlock *ReachableBB = i.getCaseSuccessor();
Worklist.push_back(ReachableBB);
continue;
}
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 123e399..b43b9e5 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asan"
+#include "FunctionBlackList.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallSet.h"
@@ -29,8 +30,6 @@
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/IRBuilder.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
#include "llvm/Target/TargetData.h"
@@ -126,21 +125,6 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
namespace {
-// Blacklisted functions are not instrumented.
-// The blacklist file contains one or more lines like this:
-// ---
-// fun:FunctionWildCard
-// ---
-// This is similar to the "ignore" feature of ThreadSanitizer.
-// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
-class BlackList {
- public:
- BlackList(const std::string &Path);
- bool isIn(const Function &F);
- private:
- Regex *Functions;
-};
-
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer : public ModulePass {
AddressSanitizer();
@@ -195,7 +179,7 @@ struct AddressSanitizer : public ModulePass {
Function *AsanCtorFunction;
Function *AsanInitFunction;
Instruction *CtorInsertBefore;
- OwningPtr<BlackList> BL;
+ OwningPtr<FunctionBlackList> BL;
};
} // namespace
@@ -470,7 +454,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
GlobalVariable *G = GlobalsToChange[i];
PointerType *PtrTy = cast<PointerType>(G->getType());
Type *Ty = PtrTy->getElementType();
- uint64_t SizeInBytes = TD->getTypeStoreSizeInBits(Ty) / 8;
+ uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
uint64_t RightRedzoneSize = RedzoneSize +
(RedzoneSize - (SizeInBytes % RedzoneSize));
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
@@ -551,7 +535,7 @@ bool AddressSanitizer::runOnModule(Module &M) {
TD = getAnalysisIfAvailable<TargetData>();
if (!TD)
return false;
- BL.reset(new BlackList(ClBlackListFile));
+ BL.reset(new FunctionBlackList(ClBlackListFile));
CurrentModule = &M;
C = &(M.getContext());
@@ -595,18 +579,23 @@ bool AddressSanitizer::runOnModule(Module &M) {
if (ClGlobals)
Res |= insertGlobalRedzones(M);
- // Tell the run-time the current values of mapping offset and scale.
- GlobalValue *asan_mapping_offset =
- new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
- ConstantInt::get(IntptrTy, MappingOffset),
- kAsanMappingOffsetName);
- GlobalValue *asan_mapping_scale =
- new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
- ConstantInt::get(IntptrTy, MappingScale),
- kAsanMappingScaleName);
- // Read these globals, otherwise they may be optimized away.
- IRB.CreateLoad(asan_mapping_scale, true);
- IRB.CreateLoad(asan_mapping_offset, true);
+ if (ClMappingOffsetLog >= 0) {
+ // Tell the run-time the current values of mapping offset and scale.
+ GlobalValue *asan_mapping_offset =
+ new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+ ConstantInt::get(IntptrTy, MappingOffset),
+ kAsanMappingOffsetName);
+ // Read the global, otherwise it may be optimized away.
+ IRB.CreateLoad(asan_mapping_offset, true);
+ }
+ if (ClMappingScale) {
+ GlobalValue *asan_mapping_scale =
+ new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+ ConstantInt::get(IntptrTy, MappingScale),
+ kAsanMappingScaleName);
+ // Read the global, otherwise it may be optimized away.
+ IRB.CreateLoad(asan_mapping_scale, true);
+ }
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
@@ -946,54 +935,3 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
return true;
}
-
-BlackList::BlackList(const std::string &Path) {
- Functions = NULL;
- const char *kFunPrefix = "fun:";
- if (!ClBlackListFile.size()) return;
- std::string Fun;
-
- OwningPtr<MemoryBuffer> File;
- if (error_code EC = MemoryBuffer::getFile(ClBlackListFile.c_str(), File)) {
- report_fatal_error("Can't open blacklist file " + ClBlackListFile + ": " +
- EC.message());
- }
- MemoryBuffer *Buff = File.take();
- const char *Data = Buff->getBufferStart();
- size_t DataLen = Buff->getBufferSize();
- SmallVector<StringRef, 16> Lines;
- SplitString(StringRef(Data, DataLen), Lines, "\n\r");
- for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) {
- if (Lines[i].startswith(kFunPrefix)) {
- std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix));
- std::string ThisFuncRE;
- // add ThisFunc replacing * with .*
- for (size_t j = 0, n = ThisFunc.size(); j < n; j++) {
- if (ThisFunc[j] == '*')
- ThisFuncRE += '.';
- ThisFuncRE += ThisFunc[j];
- }
- // Check that the regexp is valid.
- Regex CheckRE(ThisFuncRE);
- std::string Error;
- if (!CheckRE.isValid(Error))
- report_fatal_error("malformed blacklist regex: " + ThisFunc +
- ": " + Error);
- // Append to the final regexp.
- if (Fun.size())
- Fun += "|";
- Fun += ThisFuncRE;
- }
- }
- if (Fun.size()) {
- Functions = new Regex(Fun);
- }
-}
-
-bool BlackList::isIn(const Function &F) {
- if (Functions) {
- bool Res = Functions->match(F.getName());
- return Res;
- }
- return false;
-}
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index f8dbca3..e4c8cf1 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_library(LLVMInstrumentation
AddressSanitizer.cpp
EdgeProfiling.cpp
+ FunctionBlackList.cpp
GCOVProfiling.cpp
Instrumentation.cpp
OptimalEdgeProfiling.cpp
diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.cpp b/lib/Transforms/Instrumentation/FunctionBlackList.cpp
new file mode 100644
index 0000000..188ea4d
--- /dev/null
+++ b/lib/Transforms/Instrumentation/FunctionBlackList.cpp
@@ -0,0 +1,79 @@
+//===-- FunctionBlackList.cpp - blacklist of functions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions based on
+// user-supplied blacklist.
+//
+//===----------------------------------------------------------------------===//
+
+#include "FunctionBlackList.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Function.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+
+namespace llvm {
+
+FunctionBlackList::FunctionBlackList(const std::string &Path) {
+ Functions = NULL;
+ const char *kFunPrefix = "fun:";
+ if (!Path.size()) return;
+ std::string Fun;
+
+ OwningPtr<MemoryBuffer> File;
+ if (error_code EC = MemoryBuffer::getFile(Path.c_str(), File)) {
+ report_fatal_error("Can't open blacklist file " + Path + ": " +
+ EC.message());
+ }
+ MemoryBuffer *Buff = File.take();
+ const char *Data = Buff->getBufferStart();
+ size_t DataLen = Buff->getBufferSize();
+ SmallVector<StringRef, 16> Lines;
+ SplitString(StringRef(Data, DataLen), Lines, "\n\r");
+ for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) {
+ if (Lines[i].startswith(kFunPrefix)) {
+ std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix));
+ std::string ThisFuncRE;
+ // add ThisFunc replacing * with .*
+ for (size_t j = 0, n = ThisFunc.size(); j < n; j++) {
+ if (ThisFunc[j] == '*')
+ ThisFuncRE += '.';
+ ThisFuncRE += ThisFunc[j];
+ }
+ // Check that the regexp is valid.
+ Regex CheckRE(ThisFuncRE);
+ std::string Error;
+ if (!CheckRE.isValid(Error))
+ report_fatal_error("malformed blacklist regex: " + ThisFunc +
+ ": " + Error);
+ // Append to the final regexp.
+ if (Fun.size())
+ Fun += "|";
+ Fun += ThisFuncRE;
+ }
+ }
+ if (Fun.size()) {
+ Functions = new Regex(Fun);
+ }
+}
+
+bool FunctionBlackList::isIn(const Function &F) {
+ if (Functions) {
+ bool Res = Functions->match(F.getName());
+ return Res;
+ }
+ return false;
+}
+
+} // namespace llvm
diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.h b/lib/Transforms/Instrumentation/FunctionBlackList.h
new file mode 100644
index 0000000..c1239b9
--- /dev/null
+++ b/lib/Transforms/Instrumentation/FunctionBlackList.h
@@ -0,0 +1,37 @@
+//===-- FunctionBlackList.cpp - blacklist of functions ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions based on
+// user-supplied blacklist.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include <string>
+
+namespace llvm {
+class Function;
+class Regex;
+
+// Blacklisted functions are not instrumented.
+// The blacklist file contains one or more lines like this:
+// ---
+// fun:FunctionWildCard
+// ---
+// This is similar to the "ignore" feature of ThreadSanitizer.
+// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
+class FunctionBlackList {
+ public:
+ FunctionBlackList(const std::string &Path);
+ bool isIn(const Function &F);
+ private:
+ Regex *Functions;
+};
+
+} // namespace llvm
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index d822535..85fda30 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -21,12 +21,14 @@
#define DEBUG_TYPE "tsan"
+#include "FunctionBlackList.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Intrinsics.h"
#include "llvm/Function.h"
#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/MathExtras.h"
@@ -37,6 +39,9 @@
using namespace llvm;
+static cl::opt<std::string> ClBlackListFile("tsan-blacklist",
+ cl::desc("Blacklist file"), cl::Hidden);
+
namespace {
/// ThreadSanitizer: instrument the code in module to find races.
struct ThreadSanitizer : public FunctionPass {
@@ -48,6 +53,7 @@ struct ThreadSanitizer : public FunctionPass {
private:
TargetData *TD;
+ OwningPtr<FunctionBlackList> BL;
// Callbacks to run-time library are computed in doInitialization.
Value *TsanFuncEntry;
Value *TsanFuncExit;
@@ -76,6 +82,8 @@ bool ThreadSanitizer::doInitialization(Module &M) {
TD = getAnalysisIfAvailable<TargetData>();
if (!TD)
return false;
+ BL.reset(new FunctionBlackList(ClBlackListFile));
+
// Always insert a call to __tsan_init into the module's CTORs.
IRBuilder<> IRB(M.getContext());
Value *TsanInit = M.getOrInsertFunction("__tsan_init",
@@ -102,6 +110,7 @@ bool ThreadSanitizer::doInitialization(Module &M) {
bool ThreadSanitizer::runOnFunction(Function &F) {
if (!TD) return false;
+ if (BL->isIn(F)) return false;
SmallVector<Instruction*, 8> RetVec;
SmallVector<Instruction*, 8> LoadsAndStores;
bool Res = false;
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index aad3a92..020ec57 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -579,6 +579,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
return true;
}
+ if (II && TLI) {
+ SmallVector<Value*, 2> PtrOps;
+ Type *AccessTy;
+ if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
+ while (!PtrOps.empty())
+ if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
+ return true;
+ }
+
// From here on out we're working with named functions.
if (CI->getCalledFunction() == 0) return false;
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index e275268..9b0aadb 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -28,6 +28,7 @@ STATISTIC(NumPhis, "Number of phis propagated");
STATISTIC(NumSelects, "Number of selects propagated");
STATISTIC(NumMemAccess, "Number of memory access targets propagated");
STATISTIC(NumCmps, "Number of comparisons propagated");
+STATISTIC(NumDeadCases, "Number of switch cases removed");
namespace {
class CorrelatedValuePropagation : public FunctionPass {
@@ -37,6 +38,7 @@ namespace {
bool processPHI(PHINode *P);
bool processMemAccess(Instruction *I);
bool processCmp(CmpInst *C);
+ bool processSwitch(SwitchInst *SI);
public:
static char ID;
@@ -110,7 +112,8 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
Changed = true;
}
- ++NumPhis;
+ if (Changed)
+ ++NumPhis;
return Changed;
}
@@ -173,6 +176,86 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
return true;
}
+/// processSwitch - Simplify a switch instruction by removing cases which can
+/// never fire. If the uselessness of a case could be determined locally then
+/// constant propagation would already have figured it out. Instead, walk the
+/// predecessors and statically evaluate cases based on information available
+/// on that edge. Cases that cannot fire no matter what the incoming edge can
+/// safely be removed. If a case fires on every incoming edge then the entire
+/// switch can be removed and replaced with a branch to the case destination.
+bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
+ Value *Cond = SI->getCondition();
+ BasicBlock *BB = SI->getParent();
+
+ // If the condition was defined in same block as the switch then LazyValueInfo
+ // currently won't say anything useful about it, though in theory it could.
+ if (isa<Instruction>(Cond) && cast<Instruction>(Cond)->getParent() == BB)
+ return false;
+
+ // If the switch is unreachable then trying to improve it is a waste of time.
+ pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
+ if (PB == PE) return false;
+
+ // Analyse each switch case in turn. This is done in reverse order so that
+ // removing a case doesn't cause trouble for the iteration.
+ bool Changed = false;
+ for (SwitchInst::CaseIt CI = SI->case_end(), CE = SI->case_begin(); CI-- != CE;
+ ) {
+ ConstantInt *Case = CI.getCaseValue();
+
+ // Check to see if the switch condition is equal to/not equal to the case
+ // value on every incoming edge, equal/not equal being the same each time.
+ LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ // Is the switch condition equal to the case value?
+ LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
+ Cond, Case, *PI, BB);
+ // Give up on this case if nothing is known.
+ if (Value == LazyValueInfo::Unknown) {
+ State = LazyValueInfo::Unknown;
+ break;
+ }
+
+ // If this was the first edge to be visited, record that all other edges
+ // need to give the same result.
+ if (PI == PB) {
+ State = Value;
+ continue;
+ }
+
+ // If this case is known to fire for some edges and known not to fire for
+ // others then there is nothing we can do - give up.
+ if (Value != State) {
+ State = LazyValueInfo::Unknown;
+ break;
+ }
+ }
+
+ if (State == LazyValueInfo::False) {
+ // This case never fires - remove it.
+ CI.getCaseSuccessor()->removePredecessor(BB);
+ SI->removeCase(CI); // Does not invalidate the iterator.
+ ++NumDeadCases;
+ Changed = true;
+ } else if (State == LazyValueInfo::True) {
+ // This case always fires. Arrange for the switch to be turned into an
+ // unconditional branch by replacing the switch condition with the case
+ // value.
+ SI->setCondition(Case);
+ NumDeadCases += SI->getNumCases();
+ Changed = true;
+ break;
+ }
+ }
+
+ if (Changed)
+ // If the switch has been simplified to the point where it can be replaced
+ // by a branch then do so now.
+ ConstantFoldTerminator(BB);
+
+ return Changed;
+}
+
bool CorrelatedValuePropagation::runOnFunction(Function &F) {
LVI = &getAnalysis<LazyValueInfo>();
@@ -200,6 +283,13 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
}
}
+ Instruction *Term = FI->getTerminator();
+ switch (Term->getOpcode()) {
+ case Instruction::Switch:
+ BBChanged |= processSwitch(cast<SwitchInst>(Term));
+ break;
+ }
+
FnChanged |= BBChanged;
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index fe05e35..ac80c48 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -2158,10 +2158,11 @@ bool GVN::processInstruction(Instruction *I) {
Value *SwitchCond = SI->getCondition();
BasicBlock *Parent = SI->getParent();
bool Changed = false;
- for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) {
- BasicBlock *Dst = SI->getCaseSuccessor(i);
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ BasicBlock *Dst = i.getCaseSuccessor();
if (isOnlyReachableViaThisEdge(Parent, Dst, DT))
- Changed |= propagateEquality(SwitchCond, SI->getCaseValue(i), Dst);
+ Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst);
}
return Changed;
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index d1e57e1..490617a 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -450,8 +450,10 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
}
// Add a new IVUsers entry for the newly-created integer PHI.
- if (IU)
- IU->AddUsersIfInteresting(NewPHI);
+ if (IU) {
+ SmallPtrSet<Loop*, 16> SimplifiedLoopNests;
+ IU->AddUsersIfInteresting(NewPHI, SimplifiedLoopNests);
+ }
Changed = true;
}
@@ -1967,8 +1969,11 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// loop exit test instruction.
if (IU && NewICmp) {
ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
- if (NewICmpInst)
- IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)));
+ if (NewICmpInst) {
+ SmallPtrSet<Loop*, 16> SimplifiedLoopNests;
+ IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)),
+ SimplifiedLoopNests);
+ }
}
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader());
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index fa25a8f..429b61b 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -857,6 +857,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (BBIt != LoadBB->begin())
return false;
+ // If all of the loads and stores that feed the value have the same TBAA tag,
+ // then we can propagate it onto any newly inserted loads.
+ MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
@@ -875,11 +878,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
- Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6);
+ MDNode *ThisTBAATag = 0;
+ Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
+ 0, &ThisTBAATag);
if (!PredAvailable) {
OneUnavailablePred = PredBB;
continue;
}
+
+ // If tbaa tags disagree or are not present, forget about them.
+ if (TBAATag != ThisTBAATag) TBAATag = 0;
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
@@ -939,6 +947,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
LI->getAlignment(),
UnavailablePred->getTerminator());
NewVal->setDebugLoc(LI->getDebugLoc());
+ if (TBAATag)
+ NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
@@ -1087,8 +1098,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- unsigned ValCase = SI->findCaseValue(cast<ConstantInt>(Val));
- DestBB = SI->getSuccessor(SI->resolveSuccessorIndex(ValCase));
+ DestBB = SI->findCaseValue(cast<ConstantInt>(Val)).getCaseSuccessor();
} else {
assert(isa<IndirectBrInst>(BB->getTerminator())
&& "Unexpected terminator");
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 6768860..82d918e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4534,22 +4534,25 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
if (!L->isLoopSimplifyForm())
return;
+ // If there's no interesting work to be done, bail early.
+ if (IU.empty()) return;
+
+#ifndef NDEBUG
// All dominating loops must have preheaders, or SCEVExpander may not be able
// to materialize an AddRecExpr whose Start is an outer AddRecExpr.
//
- // FIXME: This is a little absurd. I think LoopSimplify should be taught
- // to create a preheader under any circumstance.
+ // IVUsers analysis should only create users that are dominated by simple loop
+ // headers. Since this loop should dominate all of its users, its user list
+ // should be empty if this loop itself is not within a simple loop nest.
for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
Rung; Rung = Rung->getIDom()) {
BasicBlock *BB = Rung->getBlock();
const Loop *DomLoop = LI.getLoopFor(BB);
if (DomLoop && DomLoop->getHeader() == BB) {
- if (!DomLoop->getLoopPreheader())
- return;
+ assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
}
}
- // If there's no interesting work to be done, bail early.
- if (IU.empty()) return;
+#endif // DEBUG
DEBUG(dbgs() << "\nLSR on loop ";
WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 2c75f63..053eb0c 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,7 +32,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
-#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -445,8 +445,9 @@ bool LoopUnswitch::processCurrentLoop() {
// Do not process same value again and again.
// At this point we have some cases already unswitched and
// some not yet unswitched. Let's find the first not yet unswitched one.
- for (unsigned i = 0; i < NumCases; ++i) {
- Constant* UnswitchValCandidate = SI->getCaseValue(i);
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ Constant* UnswitchValCandidate = i.getCaseValue();
if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
UnswitchVal = UnswitchValCandidate;
break;
@@ -574,12 +575,13 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
// this.
// Note that we can't trivially unswitch on the default case or
// on already unswitched cases.
- for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) {
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
BasicBlock* LoopExitCandidate;
if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
- SI->getCaseSuccessor(i)))) {
+ i.getCaseSuccessor()))) {
// Okay, we found a trivial case, remember the value that is trivial.
- ConstantInt* CaseVal = SI->getCaseValue(i);
+ ConstantInt* CaseVal = i.getCaseValue();
// Check that it was not unswitched before, since already unswitched
// trivial vals are looks trivial too.
@@ -1117,16 +1119,16 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
SwitchInst *SI = dyn_cast<SwitchInst>(U);
if (SI == 0 || !isa<ConstantInt>(Val)) continue;
- unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
+ SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
// Default case is live for multiple values.
- if (DeadCase == SwitchInst::ErrorIndex) continue;
+ if (DeadCase == SI->case_default()) continue;
// Found a dead case value. Don't remove PHI nodes in the
// successor if they become single-entry, those PHI nodes may
// be in the Users list.
BasicBlock *Switch = SI->getParent();
- BasicBlock *SISucc = SI->getCaseSuccessor(DeadCase);
+ BasicBlock *SISucc = DeadCase.getCaseSuccessor();
BasicBlock *Latch = L->getLoopLatch();
BranchesInfo.setUnswitched(SI, Val);
@@ -1146,7 +1148,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// Compute the successors instead of relying on the return value
// of SplitEdge, since it may have split the switch successor
// after PHI nodes.
- BasicBlock *NewSISucc = SI->getCaseSuccessor(DeadCase);
+ BasicBlock *NewSISucc = DeadCase.getCaseSuccessor();
BasicBlock *OldSISucc = *succ_begin(NewSISucc);
// Create an "unreachable" destination.
BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 1c7f036..9fdea8d 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -2929,11 +2929,17 @@ ComputePostOrders(Function &F,
Visited.clear();
// Compute the exits, which are the starting points for reverse-CFG DFS.
+ // This includes blocks where all the successors are backedges that
+ // we're skipping.
SmallVector<BasicBlock *, 4> Exits;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
BasicBlock *BB = I;
- if (cast<TerminatorInst>(&BB->back())->getNumSuccessors() == 0)
- Exits.push_back(BB);
+ TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ for (succ_iterator SI(TI), SE(TI, true); SI != SE; ++SI)
+ if (!Backedges.count(std::make_pair(BB, *SI)))
+ goto HasNonBackedgeSucc;
+ Exits.push_back(BB);
+ HasNonBackedgeSucc:;
}
// Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
@@ -3035,7 +3041,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
// but our releases will never depend on it, because they must be
// paired with retains from before the invoke.
InsertPts[0] = II->getNormalDest()->getFirstInsertionPt();
- InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt();
+ if (!II->getMetadata(NoObjCARCExceptionsMDKind))
+ InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt();
} else {
// Insert code immediately after the last use.
InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
@@ -4017,36 +4024,40 @@ bool ObjCARCContract::runOnFunction(Function &F) {
Use &U = UI.getUse();
unsigned OperandNo = UI.getOperandNo();
++UI; // Increment UI now, because we may unlink its element.
- if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser()))
- if (Inst != UserInst && DT->dominates(Inst, UserInst)) {
- Changed = true;
- Instruction *Replacement = Inst;
- Type *UseTy = U.get()->getType();
- if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
- // For PHI nodes, insert the bitcast in the predecessor block.
- unsigned ValNo =
- PHINode::getIncomingValueNumForOperand(OperandNo);
- BasicBlock *BB =
- PHI->getIncomingBlock(ValNo);
- if (Replacement->getType() != UseTy)
- Replacement = new BitCastInst(Replacement, UseTy, "",
- &BB->back());
- for (unsigned i = 0, e = PHI->getNumIncomingValues();
- i != e; ++i)
- if (PHI->getIncomingBlock(i) == BB) {
- // Keep the UI iterator valid.
- if (&PHI->getOperandUse(
- PHINode::getOperandNumForIncomingValue(i)) ==
- &UI.getUse())
- ++UI;
- PHI->setIncomingValue(i, Replacement);
- }
- } else {
- if (Replacement->getType() != UseTy)
- Replacement = new BitCastInst(Replacement, UseTy, "", UserInst);
- U.set(Replacement);
- }
+ Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
+ if (!UserInst)
+ continue;
+ // FIXME: dominates should return true for unreachable UserInst.
+ if (!DT->isReachableFromEntry(UserInst->getParent()) ||
+ DT->dominates(Inst, UserInst)) {
+ Changed = true;
+ Instruction *Replacement = Inst;
+ Type *UseTy = U.get()->getType();
+ if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
+ // For PHI nodes, insert the bitcast in the predecessor block.
+ unsigned ValNo =
+ PHINode::getIncomingValueNumForOperand(OperandNo);
+ BasicBlock *BB =
+ PHI->getIncomingBlock(ValNo);
+ if (Replacement->getType() != UseTy)
+ Replacement = new BitCastInst(Replacement, UseTy, "",
+ &BB->back());
+ for (unsigned i = 0, e = PHI->getNumIncomingValues();
+ i != e; ++i)
+ if (PHI->getIncomingBlock(i) == BB) {
+ // Keep the UI iterator valid.
+ if (&PHI->getOperandUse(
+ PHINode::getOperandNumForIncomingValue(i)) ==
+ &UI.getUse())
+ ++UI;
+ PHI->setIncomingValue(i, Replacement);
+ }
+ } else {
+ if (Replacement->getType() != UseTy)
+ Replacement = new BitCastInst(Replacement, UseTy, "", UserInst);
+ U.set(Replacement);
}
+ }
}
// If Arg is a no-op casted pointer, strip one level of casts and
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 4274b50..5ce82b9 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -564,7 +564,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
return;
}
- Succs[SI->resolveSuccessorIndex(SI->findCaseValue(CI))] = true;
+ Succs[SI->findCaseValue(CI).getSuccessorIndex()] = true;
return;
}
@@ -623,14 +623,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
if (CI == 0)
return !SCValue.isUndefined();
- // Make sure to skip the "default value" which isn't a value
- for (unsigned i = 0, E = SI->getNumCases(); i != E; ++i)
- if (SI->getCaseValue(i) == CI) // Found the taken branch.
- return SI->getCaseSuccessor(i) == To;
-
- // If the constant value is not equal to any of the branches, we must
- // execute default branch.
- return SI->getDefaultDest() == To;
+ return SI->findCaseValue(CI).getCaseSuccessor() == To;
}
// Just mark all destinations executable!
@@ -1495,12 +1488,12 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// If the input to SCCP is actually switch on undef, fix the undef to
// the first constant.
if (isa<UndefValue>(SI->getCondition())) {
- SI->setCondition(SI->getCaseValue(0));
- markEdgeExecutable(BB, SI->getCaseSuccessor(0));
+ SI->setCondition(SI->case_begin().getCaseValue());
+ markEdgeExecutable(BB, SI->case_begin().getCaseSuccessor());
return true;
}
- markForcedConstant(SI->getCondition(), SI->getCaseValue(0));
+ markForcedConstant(SI->getCondition(), SI->case_begin().getCaseValue());
return true;
}
}
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index d23263f..d36a18f 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -574,8 +574,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// transform it into a store of the expanded constant value.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
assert(MSI->getRawDest() == Ptr && "Consistency error!");
- unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
- if (NumBytes != 0) {
+ signed SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
+ if (SNumBytes > 0) {
+ unsigned NumBytes = static_cast<unsigned>(SNumBytes);
unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
// Compute the value replicated the right number of times.
@@ -1517,6 +1518,9 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
if (Length == 0)
return MarkUnsafe(Info, User);
+ if (Length->isNegative())
+ return MarkUnsafe(Info, User);
+
isSafeMemAccess(Offset, Length->getZExtValue(), 0,
UI.getOperandNo() == 0, Info, MI,
true /*AllowWholeAccess*/);
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
deleted file mode 100644
index 50c91b6..0000000
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-//===- BasicInliner.cpp - Basic function level inliner --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a simple function based inliner that does not use
-// call graph information.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "basicinliner"
-#include "llvm/Module.h"
-#include "llvm/Function.h"
-#include "llvm/Transforms/Utils/BasicInliner.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include <vector>
-
-using namespace llvm;
-
-static cl::opt<unsigned>
-BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200),
- cl::desc("Control the amount of basic inlining to perform (default = 200)"));
-
-namespace llvm {
-
- /// BasicInlinerImpl - BasicInliner implemantation class. This hides
- /// container info, used by basic inliner, from public interface.
- struct BasicInlinerImpl {
-
- BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT
- void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT
- public:
- BasicInlinerImpl(TargetData *T) : TD(T) {}
-
- /// addFunction - Add function into the list of functions to process.
- /// All functions must be inserted using this interface before invoking
- /// inlineFunctions().
- void addFunction(Function *F) {
- Functions.push_back(F);
- }
-
- /// neverInlineFunction - Sometimes a function is never to be inlined
- /// because of one or other reason.
- void neverInlineFunction(Function *F) {
- NeverInline.insert(F);
- }
-
- /// inlineFuctions - Walk all call sites in all functions supplied by
- /// client. Inline as many call sites as possible. Delete completely
- /// inlined functions.
- void inlineFunctions();
-
- private:
- TargetData *TD;
- std::vector<Function *> Functions;
- SmallPtrSet<const Function *, 16> NeverInline;
- SmallPtrSet<Function *, 8> DeadFunctions;
- InlineCostAnalyzer CA;
- };
-
-/// inlineFuctions - Walk all call sites in all functions supplied by
-/// client. Inline as many call sites as possible. Delete completely
-/// inlined functions.
-void BasicInlinerImpl::inlineFunctions() {
-
- // Scan through and identify all call sites ahead of time so that we only
- // inline call sites in the original functions, not call sites that result
- // from inlining other functions.
- std::vector<CallSite> CallSites;
-
- for (std::vector<Function *>::iterator FI = Functions.begin(),
- FE = Functions.end(); FI != FE; ++FI) {
- Function *F = *FI;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
- CallSite CS(cast<Value>(I));
- if (CS && CS.getCalledFunction()
- && !CS.getCalledFunction()->isDeclaration())
- CallSites.push_back(CS);
- }
- }
-
- DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
-
- // Inline call sites.
- bool Changed = false;
- do {
- Changed = false;
- for (unsigned index = 0; index != CallSites.size() && !CallSites.empty();
- ++index) {
- CallSite CS = CallSites[index];
- if (Function *Callee = CS.getCalledFunction()) {
-
- // Eliminate calls that are never inlinable.
- if (Callee->isDeclaration() ||
- CS.getInstruction()->getParent()->getParent() == Callee) {
- CallSites.erase(CallSites.begin() + index);
- --index;
- continue;
- }
- InlineCost IC = CA.getInlineCost(CS, NeverInline);
- if (IC.isAlways()) {
- DEBUG(dbgs() << " Inlining: cost=always"
- <<", call: " << *CS.getInstruction());
- } else if (IC.isNever()) {
- DEBUG(dbgs() << " NOT Inlining: cost=never"
- <<", call: " << *CS.getInstruction());
- continue;
- } else {
- int Cost = IC.getValue();
-
- if (Cost >= (int) BasicInlineThreshold) {
- DEBUG(dbgs() << " NOT Inlining: cost = " << Cost
- << ", call: " << *CS.getInstruction());
- continue;
- } else {
- DEBUG(dbgs() << " Inlining: cost = " << Cost
- << ", call: " << *CS.getInstruction());
- }
- }
-
- // Inline
- InlineFunctionInfo IFI(0, TD);
- if (InlineFunction(CS, IFI)) {
- Callee->removeDeadConstantUsers();
- if (Callee->isDefTriviallyDead())
- DeadFunctions.insert(Callee);
- Changed = true;
- CallSites.erase(CallSites.begin() + index);
- --index;
- }
- }
- }
- } while (Changed);
-
- // Remove completely inlined functions from module.
- for(SmallPtrSet<Function *, 8>::iterator I = DeadFunctions.begin(),
- E = DeadFunctions.end(); I != E; ++I) {
- Function *D = *I;
- Module *M = D->getParent();
- M->getFunctionList().remove(D);
- }
-}
-
-BasicInliner::BasicInliner(TargetData *TD) {
- Impl = new BasicInlinerImpl(TD);
-}
-
-BasicInliner::~BasicInliner() {
- delete Impl;
-}
-
-/// addFunction - Add function into the list of functions to process.
-/// All functions must be inserted using this interface before invoking
-/// inlineFunctions().
-void BasicInliner::addFunction(Function *F) {
- Impl->addFunction(F);
-}
-
-/// neverInlineFunction - Sometimes a function is never to be inlined because
-/// of one or other reason.
-void BasicInliner::neverInlineFunction(Function *F) {
- Impl->neverInlineFunction(F);
-}
-
-/// inlineFuctions - Walk all call sites in all functions supplied by
-/// client. Inline as many call sites as possible. Delete completely
-/// inlined functions.
-void BasicInliner::inlineFunctions() {
- Impl->inlineFunctions();
-}
-
-}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index d1aa599..7f5cb5e 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -1,7 +1,6 @@
add_llvm_library(LLVMTransformUtils
AddrModeMatcher.cpp
BasicBlockUtils.cpp
- BasicInliner.cpp
BreakCriticalEdges.cpp
BuildLibCalls.cpp
CloneFunction.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 04ef7d7..1b28c35 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -313,8 +313,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
Cond = dyn_cast_or_null<ConstantInt>(V);
}
if (Cond) { // Constant fold to uncond branch!
- unsigned CaseIndex = SI->findCaseValue(Cond);
- BasicBlock *Dest = SI->getSuccessor(SI->resolveSuccessorIndex(CaseIndex));
+ SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond);
+ BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
VMap[OldTI] = BranchInst::Create(Dest, NewBB);
ToClone.push_back(Dest);
TerminatorDone = true;
@@ -424,10 +424,6 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
//
BasicBlock::iterator I = NewBB->begin();
- DebugLoc TheCallDL;
- if (TheCall)
- TheCallDL = TheCall->getDebugLoc();
-
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
if (PHINode *PN = dyn_cast<PHINode>(I)) {
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 429919b..e8c0b80 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -617,7 +617,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// of the other successors.
TheSwitch->setCondition(call);
TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
- TheSwitch->removeCase(NumExitBlocks-1); // Remove redundant case
+ // Remove redundant case
+ TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
break;
}
}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 336d8f6..5f895eb 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -106,31 +106,32 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
// If we are switching on a constant, we can convert the switch into a
// single branch instruction!
ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
- BasicBlock *TheOnlyDest = SI->getDefaultDest(); // The default dest
+ BasicBlock *TheOnlyDest = SI->getDefaultDest();
BasicBlock *DefaultDest = TheOnlyDest;
// Figure out which case it goes to.
- for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) {
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
// Found case matching a constant operand?
- if (SI->getCaseValue(i) == CI) {
- TheOnlyDest = SI->getCaseSuccessor(i);
+ if (i.getCaseValue() == CI) {
+ TheOnlyDest = i.getCaseSuccessor();
break;
}
// Check to see if this branch is going to the same place as the default
// dest. If so, eliminate it as an explicit compare.
- if (SI->getCaseSuccessor(i) == DefaultDest) {
+ if (i.getCaseSuccessor() == DefaultDest) {
// Remove this entry.
DefaultDest->removePredecessor(SI->getParent());
SI->removeCase(i);
- --i; --e; // Don't skip an entry...
+ --i; --e;
continue;
}
// Otherwise, check to see if the switch only branches to one destination.
// We do this by reseting "TheOnlyDest" to null when we find two non-equal
// destinations.
- if (SI->getCaseSuccessor(i) != TheOnlyDest) TheOnlyDest = 0;
+ if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0;
}
if (CI && !TheOnlyDest) {
@@ -167,11 +168,13 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
if (SI->getNumCases() == 1) {
// Otherwise, we can fold this switch into a conditional branch
// instruction if it has only one non-default destination.
+ SwitchInst::CaseIt FirstCase = SI->case_begin();
Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
- SI->getCaseValue(0), "cond");
+ FirstCase.getCaseValue(), "cond");
// Insert the new branch.
- Builder.CreateCondBr(Cond, SI->getCaseSuccessor(0), SI->getDefaultDest());
+ Builder.CreateCondBr(Cond, FirstCase.getCaseSuccessor(),
+ SI->getDefaultDest());
// Delete the old switch.
SI->eraseFromParent();
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 4376265..0bc185d 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -386,7 +386,7 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
this);
} else {
SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
+ SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
".split-lp", this, NewBBs);
PreheaderBB = NewBBs[0];
}
@@ -538,8 +538,7 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
///
Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
BasicBlock *Preheader) {
- // Don't try to separate loops without a preheader (this excludes
- // loop headers which are targeted by an indirectbr).
+ // Don't try to separate loops without a preheader.
if (!Preheader)
return 0;
@@ -554,11 +553,15 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
// handles the case when a PHI node has multiple instances of itself as
// arguments.
SmallVector<BasicBlock*, 8> OuterLoopPreds;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
if (PN->getIncomingValue(i) != PN ||
- !L->contains(PN->getIncomingBlock(i)))
+ !L->contains(PN->getIncomingBlock(i))) {
+ // We can't split indirectbr edges.
+ if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+ return 0;
OuterLoopPreds.push_back(PN->getIncomingBlock(i));
-
+ }
+ }
DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
// If ScalarEvolution is around and knows anything about values in
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index df8d68e..c70ced1 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -73,16 +73,16 @@ bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
LLVMContext &Context = CI->getContext();
Type *Int32Ty = Type::getInt32Ty(Context);
- unsigned caseNo = SI->findCaseValue(ExpectedValue);
+ SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue);
std::vector<Value *> Vec;
unsigned n = SI->getNumCases();
Vec.resize(n + 1 + 1); // +1 for MDString and +1 for default case
Vec[0] = MDString::get(Context, "branch_weights");
- Vec[1] = ConstantInt::get(Int32Ty, SwitchInst::ErrorIndex == caseNo ?
+ Vec[1] = ConstantInt::get(Int32Ty, Case == SI->case_default() ?
LikelyBranchWeight : UnlikelyBranchWeight);
for (unsigned i = 0; i < n; ++i) {
- Vec[i + 1 + 1] = ConstantInt::get(Int32Ty, i == caseNo ?
+ Vec[i + 1 + 1] = ConstantInt::get(Int32Ty, i == Case.getCaseIndex() ?
LikelyBranchWeight : UnlikelyBranchWeight);
}
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 424f564..a16130d 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -237,10 +237,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
unsigned numCmps = 0;
// Start with "simple" cases
- for (unsigned i = 0; i < SI->getNumCases(); ++i)
- Cases.push_back(CaseRange(SI->getCaseValue(i),
- SI->getCaseValue(i),
- SI->getCaseSuccessor(i)));
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+ Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(),
+ i.getCaseSuccessor()));
+
std::sort(Cases.begin(), Cases.end(), CaseCmp());
// Merge case into clusters
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index a9853a4..d53a46e 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -480,9 +480,9 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
BasicBlock*> > &Cases) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cases.reserve(SI->getNumCases());
- for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i)
- Cases.push_back(std::make_pair(SI->getCaseValue(i),
- SI->getCaseSuccessor(i)));
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+ Cases.push_back(std::make_pair(i.getCaseValue(),
+ i.getCaseSuccessor()));
return SI->getDefaultDest();
}
@@ -605,10 +605,10 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
<< "Through successor TI: " << *TI);
- for (unsigned i = SI->getNumCases(); i != 0;) {
+ for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
--i;
- if (DeadCases.count(SI->getCaseValue(i))) {
- SI->getCaseSuccessor(i)->removePredecessor(TI->getParent());
+ if (DeadCases.count(i.getCaseValue())) {
+ i.getCaseSuccessor()->removePredecessor(TI->getParent());
SI->removeCase(i);
}
}
@@ -2009,10 +2009,8 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
// Find the relevant condition and destinations.
Value *Condition = Select->getCondition();
- unsigned TrueCase = SI->findCaseValue(TrueVal);
- unsigned FalseCase = SI->findCaseValue(FalseVal);
- BasicBlock *TrueBB = SI->getSuccessor(SI->resolveSuccessorIndex(TrueCase));
- BasicBlock *FalseBB = SI->getSuccessor(SI->resolveSuccessorIndex(FalseCase));
+ BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor();
+ BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor();
// Perform the actual simplification.
return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB);
@@ -2096,7 +2094,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
// Ok, the block is reachable from the default dest. If the constant we're
// comparing exists in one of the other edges, then we can constant fold ICI
// and zap it.
- if (SI->findCaseValue(Cst) != SwitchInst::ErrorIndex) {
+ if (SI->findCaseValue(Cst) != SI->case_default()) {
Value *V;
if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
V = ConstantInt::getFalse(BB->getContext());
@@ -2423,8 +2421,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
}
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i)
- if (SI->getCaseSuccessor(i) == BB) {
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseSuccessor() == BB) {
BB->removePredecessor(SI->getParent());
SI->removeCase(i);
--i; --e;
@@ -2434,12 +2433,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// destination and make it the default.
if (SI->getDefaultDest() == BB) {
std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
- for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) {
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
std::pair<unsigned, unsigned> &entry =
- Popularity[SI->getCaseSuccessor(i)];
+ Popularity[i.getCaseSuccessor()];
if (entry.first == 0) {
entry.first = 1;
- entry.second = i;
+ entry.second = i.getCaseIndex();
} else {
entry.first++;
}
@@ -2470,8 +2470,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
for (unsigned i = 0; i != MaxPop-1; ++i)
MaxBlock->removePredecessor(SI->getParent());
- for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i)
- if (SI->getCaseSuccessor(i) == MaxBlock) {
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseSuccessor() == MaxBlock) {
SI->removeCase(i);
--i; --e;
}
@@ -2517,11 +2518,13 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
// Make sure all cases point to the same destination and gather the values.
SmallVector<ConstantInt *, 16> Cases;
- Cases.push_back(SI->getCaseValue(0));
- for (unsigned I = 1, E = SI->getNumCases(); I != E; ++I) {
- if (SI->getCaseSuccessor(I-1) != SI->getCaseSuccessor(I))
+ SwitchInst::CaseIt I = SI->case_begin();
+ Cases.push_back(I.getCaseValue());
+ SwitchInst::CaseIt PrevI = I++;
+ for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) {
+ if (PrevI.getCaseSuccessor() != I.getCaseSuccessor())
return false;
- Cases.push_back(SI->getCaseValue(I));
+ Cases.push_back(I.getCaseValue());
}
assert(Cases.size() == SI->getNumCases() && "Not all cases gathered");
@@ -2539,10 +2542,11 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
if (!Offset->isNullValue())
Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
- Builder.CreateCondBr(Cmp, SI->getCaseSuccessor(0), SI->getDefaultDest());
+ Builder.CreateCondBr(
+ Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
// Prune obsolete incoming values off the successor's PHI nodes.
- for (BasicBlock::iterator BBI = SI->getCaseSuccessor(0)->begin();
+ for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin();
isa<PHINode>(BBI); ++BBI) {
for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I)
cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
@@ -2562,22 +2566,22 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
// Gather dead cases.
SmallVector<ConstantInt*, 8> DeadCases;
- for (unsigned I = 0, E = SI->getNumCases(); I != E; ++I) {
- if ((SI->getCaseValue(I)->getValue() & KnownZero) != 0 ||
- (SI->getCaseValue(I)->getValue() & KnownOne) != KnownOne) {
- DeadCases.push_back(SI->getCaseValue(I));
+ for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
+ if ((I.getCaseValue()->getValue() & KnownZero) != 0 ||
+ (I.getCaseValue()->getValue() & KnownOne) != KnownOne) {
+ DeadCases.push_back(I.getCaseValue());
DEBUG(dbgs() << "SimplifyCFG: switch case '"
- << SI->getCaseValue(I)->getValue() << "' is dead.\n");
+ << I.getCaseValue() << "' is dead.\n");
}
}
// Remove dead cases from the switch.
for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
- unsigned Case = SI->findCaseValue(DeadCases[I]);
- assert(Case != SwitchInst::ErrorIndex &&
+ SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]);
+ assert(Case != SI->case_default() &&
"Case was not found. Probably mistake in DeadCases forming.");
// Prune unused values from PHI nodes.
- SI->getCaseSuccessor(Case)->removePredecessor(SI->getParent());
+ Case.getCaseSuccessor()->removePredecessor(SI->getParent());
SI->removeCase(Case);
}
@@ -2626,9 +2630,9 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
ForwardingNodesMap ForwardingNodes;
- for (unsigned I = 0; I < SI->getNumCases(); ++I) { // 0 is the default case.
- ConstantInt *CaseValue = SI->getCaseValue(I);
- BasicBlock *CaseDest = SI->getCaseSuccessor(I);
+ for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
+ ConstantInt *CaseValue = I.getCaseValue();
+ BasicBlock *CaseDest = I.getCaseSuccessor();
int PhiIndex;
PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 20eef3c..e00565d 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -231,8 +231,10 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
// Inform IVUsers about the new users.
if (IU) {
- if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
- IU->AddUsersIfInteresting(I);
+ if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) {
+ SmallPtrSet<Loop*, 16> SimplifiedLoopNests;
+ IU->AddUsersIfInteresting(I, SimplifiedLoopNests);
+ }
}
DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
++NumElimRem;
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 6874906..7b39efb 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -1731,12 +1731,12 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ", ";
writeOperand(SI.getDefaultDest(), true);
Out << " [";
- unsigned NumCases = SI.getNumCases();
- for (unsigned i = 0; i < NumCases; ++i) {
+ for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
Out << "\n ";
- writeOperand(SI.getCaseValue(i), true);
+ writeOperand(i.getCaseValue(), true);
Out << ", ";
- writeOperand(SI.getCaseSuccessor(i), true);
+ writeOperand(i.getCaseSuccessor(), true);
}
Out << "\n ]";
} else if (isa<IndirectBrInst>(I)) {
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index a7277f6..8903a8f 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -16,6 +16,7 @@
#define LLVM_CONSTANTSCONTEXT_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
#include "llvm/Operator.h"
@@ -656,48 +657,18 @@ private:
return ConstantClassInfo::getTombstoneKey();
}
static unsigned getHashValue(const ConstantClass *CP) {
- // This is adapted from SuperFastHash by Paul Hsieh.
- unsigned Hash = TypeClassInfo::getHashValue(CP->getType());
- for (unsigned I = 0, E = CP->getNumOperands(); I < E; ++I) {
- unsigned Data = ConstantInfo::getHashValue(CP->getOperand(I));
- Hash += Data & 0xFFFF;
- unsigned Tmp = ((Data >> 16) << 11) ^ Hash;
- Hash = (Hash << 16) ^ Tmp;
- Hash += Hash >> 11;
- }
-
- // Force "avalanching" of final 127 bits.
- Hash ^= Hash << 3;
- Hash += Hash >> 5;
- Hash ^= Hash << 4;
- Hash += Hash >> 17;
- Hash ^= Hash << 25;
- Hash += Hash >> 6;
- return Hash;
+ SmallVector<Constant*, 8> CPOperands;
+ CPOperands.reserve(CP->getNumOperands());
+ for (unsigned I = 0, E = CP->getNumOperands(); I < E; ++I)
+ CPOperands.push_back(CP->getOperand(I));
+ return getHashValue(LookupKey(CP->getType(), CPOperands));
}
static bool isEqual(const ConstantClass *LHS, const ConstantClass *RHS) {
return LHS == RHS;
}
static unsigned getHashValue(const LookupKey &Val) {
- // This is adapted from SuperFastHash by Paul Hsieh.
- unsigned Hash = TypeClassInfo::getHashValue(Val.first);
- for (Operands::const_iterator
- I = Val.second.begin(), E = Val.second.end(); I != E; ++I) {
- unsigned Data = ConstantInfo::getHashValue(*I);
- Hash += Data & 0xFFFF;
- unsigned Tmp = ((Data >> 16) << 11) ^ Hash;
- Hash = (Hash << 16) ^ Tmp;
- Hash += Hash >> 11;
- }
-
- // Force "avalanching" of final 127 bits.
- Hash ^= Hash << 3;
- Hash += Hash >> 5;
- Hash ^= Hash << 4;
- Hash += Hash >> 17;
- Hash ^= Hash << 25;
- Hash += Hash >> 6;
- return Hash;
+ return hash_combine(Val.first, hash_combine_range(Val.second.begin(),
+ Val.second.end()));
}
static bool isEqual(const LookupKey &LHS, const ConstantClass *RHS) {
if (RHS == getEmptyKey() || RHS == getTombstoneKey())
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 11fd5b6..8db6ac9 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -3159,13 +3159,16 @@ void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
// Initialize some new operands.
assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
NumOperands = OpNo+2;
- setCaseValue(NewCaseIdx, OnVal);
- setCaseSuccessor(NewCaseIdx, Dest);
+ CaseIt Case(this, NewCaseIdx);
+ Case.setValue(OnVal);
+ Case.setSuccessor(Dest);
}
/// removeCase - This method removes the specified case and its successor
/// from the switch instruction.
-void SwitchInst::removeCase(unsigned idx) {
+void SwitchInst::removeCase(CaseIt i) {
+ unsigned idx = i.getCaseIndex();
+
assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!");
unsigned NumOps = getNumOperands();
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 0bc4f74..c6f3558 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -185,16 +185,7 @@ bool Type::isSizedDerivedType() const {
if (!this->isStructTy())
return false;
- // Opaque structs have no size.
- if (cast<StructType>(this)->isOpaque())
- return false;
-
- // Okay, our struct is sized if all of the elements are.
- for (subtype_iterator I = subtype_begin(), E = subtype_end(); I != E; ++I)
- if (!(*I)->isSized())
- return false;
-
- return true;
+ return cast<StructType>(this)->isSized();
}
//===----------------------------------------------------------------------===//
@@ -579,6 +570,26 @@ StructType *StructType::create(StringRef Name, Type *type, ...) {
return llvm::StructType::create(Ctx, StructFields, Name);
}
+bool StructType::isSized() const {
+ if ((getSubclassData() & SCDB_IsSized) != 0)
+ return true;
+ if (isOpaque())
+ return false;
+
+ // Okay, our struct is sized if all of the elements are, but if one of the
+ // elements is opaque, the struct isn't sized *yet*, but may become sized in
+ // the future, so just bail out without caching.
+ for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
+ if (!(*I)->isSized())
+ return false;
+
+ // Here we cheat a bit and cast away const-ness. The goal is to memoize when
+ // we find a sized type, as types can only move from opaque to sized, not the
+ // other way.
+ const_cast<StructType*>(this)->setSubclassData(
+ getSubclassData() | SCDB_IsSized);
+ return true;
+}
StringRef StructType::getName() const {
assert(!isLiteral() && "Literal structs never have names");
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 207c06d..41cc38c 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -317,20 +317,40 @@ void Value::replaceAllUsesWith(Value *New) {
BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
}
-Value *Value::stripPointerCasts() {
- if (!getType()->isPointerTy())
- return this;
+namespace {
+// Various metrics for how much to strip off of pointers.
+enum PointerStripKind {
+ PSK_ZeroIndices,
+ PSK_InBoundsConstantIndices,
+ PSK_InBounds
+};
+
+template <PointerStripKind StripKind>
+static Value *stripPointerCastsAndOffsets(Value *V) {
+ if (!V->getType()->isPointerTy())
+ return V;
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
- Value *V = this;
Visited.insert(V);
do {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- if (!GEP->hasAllZeroIndices())
- return V;
+ switch (StripKind) {
+ case PSK_ZeroIndices:
+ if (!GEP->hasAllZeroIndices())
+ return V;
+ break;
+ case PSK_InBoundsConstantIndices:
+ if (!GEP->hasAllConstantIndices())
+ return V;
+ // fallthrough
+ case PSK_InBounds:
+ if (!GEP->isInBounds())
+ return V;
+ break;
+ }
V = GEP->getPointerOperand();
} else if (Operator::getOpcode(V) == Instruction::BitCast) {
V = cast<Operator>(V)->getOperand(0);
@@ -346,6 +366,19 @@ Value *Value::stripPointerCasts() {
return V;
}
+} // namespace
+
+Value *Value::stripPointerCasts() {
+ return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
+}
+
+Value *Value::stripInBoundsConstantOffsets() {
+ return stripPointerCastsAndOffsets<PSK_InBoundsConstantIndices>(this);
+}
+
+Value *Value::stripInBoundsOffsets() {
+ return stripPointerCastsAndOffsets<PSK_InBounds>(this);
+}
/// isDereferenceablePointer - Test if this value is always a pointer to
/// allocated and suitably aligned memory for a simple load or store.
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index dcf86d2..5b9b2a5 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -813,11 +813,11 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
// have the same type as the switched-on value.
Type *SwitchTy = SI.getCondition()->getType();
SmallPtrSet<ConstantInt*, 32> Constants;
- for (unsigned i = 0, e = SI.getNumCases(); i != e; ++i) {
- Assert1(SI.getCaseValue(i)->getType() == SwitchTy,
+ for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
+ Assert1(i.getCaseValue()->getType() == SwitchTy,
"Switch constants must all be same type as switch value!", &SI);
- Assert2(Constants.insert(SI.getCaseValue(i)),
- "Duplicate integer as switch case", &SI, SI.getCaseValue(i));
+ Assert2(Constants.insert(i.getCaseValue()),
+ "Duplicate integer as switch case", &SI, i.getCaseValue());
}
visitTerminatorInst(SI);